Index: stable/10/sys/contrib/rdma/krping/krping.c =================================================================== --- stable/10/sys/contrib/rdma/krping/krping.c (revision 271126) +++ stable/10/sys/contrib/rdma/krping/krping.c (revision 271127) @@ -1,2330 +1,2328 @@ /* * Copyright (c) 2005 Ammasso, Inc. All rights reserved. * Copyright (c) 2006-2009 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include -#include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include "krping.h" #include "getopt.h" extern int krping_debug; #define DEBUG_LOG(cb, x...) if (krping_debug) krping_printf((cb)->cookie, x) #define PRINTF(cb, x...) krping_printf((cb)->cookie, x) MODULE_AUTHOR("Steve Wise"); MODULE_DESCRIPTION("RDMA ping client/server"); MODULE_LICENSE("Dual BSD/GPL"); static __inline uint64_t get_cycles(void) { uint32_t low, high; __asm __volatile("rdtsc" : "=a" (low), "=d" (high)); return (low | ((u_int64_t)high << 32)); } typedef uint64_t cycles_t; enum mem_type { DMA = 1, FASTREG = 2, MW = 3, MR = 4 }; static const struct krping_option krping_opts[] = { {"count", OPT_INT, 'C'}, {"size", OPT_INT, 'S'}, {"addr", OPT_STRING, 'a'}, {"port", OPT_INT, 'p'}, {"verbose", OPT_NOPARAM, 'v'}, {"validate", OPT_NOPARAM, 'V'}, {"server", OPT_NOPARAM, 's'}, {"client", OPT_NOPARAM, 'c'}, {"mem_mode", OPT_STRING, 'm'}, {"server_inv", OPT_NOPARAM, 'I'}, {"wlat", OPT_NOPARAM, 'l'}, {"rlat", OPT_NOPARAM, 'L'}, {"bw", OPT_NOPARAM, 'B'}, {"duplex", OPT_NOPARAM, 'd'}, {"txdepth", OPT_INT, 'T'}, {"poll", OPT_NOPARAM, 'P'}, {"local_dma_lkey", OPT_NOPARAM, 'Z'}, {"read_inv", OPT_NOPARAM, 'R'}, {"fr", OPT_NOPARAM, 'f'}, {NULL, 0, 0} }; #define htonll(x) cpu_to_be64((x)) #define ntohll(x) cpu_to_be64((x)) static struct mutex krping_mutex; /* * List of running krping threads. */ static LIST_HEAD(krping_cbs); /* * krping "ping/pong" loop: * client sends source rkey/addr/len * server receives source rkey/add/len * server rdma reads "ping" data from source * server sends "go ahead" on rdma read completion * client sends sink rkey/addr/len * server receives sink rkey/addr/len * server rdma writes "pong" data to sink * server sends "go ahead" on rdma write completion * */ /* * These states are used to signal events between the completion handler * and the main client or server thread. * * Once CONNECTED, they cycle through RDMA_READ_ADV, RDMA_WRITE_ADV, * and RDMA_WRITE_COMPLETE for each ping. */ enum test_state { IDLE = 1, CONNECT_REQUEST, ADDR_RESOLVED, ROUTE_RESOLVED, CONNECTED, RDMA_READ_ADV, RDMA_READ_COMPLETE, RDMA_WRITE_ADV, RDMA_WRITE_COMPLETE, ERROR }; struct krping_rdma_info { uint64_t buf; uint32_t rkey; uint32_t size; }; /* * Default max buffer size for IO... */ #define RPING_BUFSIZE 128*1024 #define RPING_SQ_DEPTH 64 /* * Control block struct. */ struct krping_cb { void *cookie; int server; /* 0 iff client */ struct ib_cq *cq; struct ib_pd *pd; struct ib_qp *qp; enum mem_type mem; struct ib_mr *dma_mr; struct ib_fast_reg_page_list *page_list; int page_list_len; struct ib_send_wr fastreg_wr; struct ib_send_wr invalidate_wr; struct ib_mr *fastreg_mr; int server_invalidate; int read_inv; u8 key; struct ib_mw *mw; struct ib_mw_bind bind_attr; struct ib_recv_wr rq_wr; /* recv work request record */ struct ib_sge recv_sgl; /* recv single SGE */ struct krping_rdma_info recv_buf;/* malloc'd buffer */ u64 recv_dma_addr; DECLARE_PCI_UNMAP_ADDR(recv_mapping) struct ib_mr *recv_mr; struct ib_send_wr sq_wr; /* send work requrest record */ struct ib_sge send_sgl; struct krping_rdma_info send_buf;/* single send buf */ u64 send_dma_addr; DECLARE_PCI_UNMAP_ADDR(send_mapping) struct ib_mr *send_mr; struct ib_send_wr rdma_sq_wr; /* rdma work request record */ struct ib_sge rdma_sgl; /* rdma single SGE */ char *rdma_buf; /* used as rdma sink */ u64 rdma_dma_addr; DECLARE_PCI_UNMAP_ADDR(rdma_mapping) struct ib_mr *rdma_mr; uint32_t remote_rkey; /* remote guys RKEY */ uint64_t remote_addr; /* remote guys TO */ uint32_t remote_len; /* remote guys LEN */ char *start_buf; /* rdma read src */ u64 start_dma_addr; DECLARE_PCI_UNMAP_ADDR(start_mapping) struct ib_mr *start_mr; enum test_state state; /* used for cond/signalling */ wait_queue_head_t sem; struct krping_stats stats; uint16_t port; /* dst port in NBO */ struct in_addr addr; /* dst addr in NBO */ char *addr_str; /* dst addr string */ int verbose; /* verbose logging */ int count; /* ping count */ int size; /* ping data size */ int validate; /* validate ping data */ int wlat; /* run wlat test */ int rlat; /* run rlat test */ int bw; /* run bw test */ int duplex; /* run bw full duplex test */ int poll; /* poll or block for rlat test */ int txdepth; /* SQ depth */ int local_dma_lkey; /* use 0 for lkey */ int frtest; /* fastreg test */ /* CM stuff */ struct rdma_cm_id *cm_id; /* connection on client side,*/ /* listener on server side. */ struct rdma_cm_id *child_cm_id; /* connection on server side */ struct list_head list; }; static int krping_cma_event_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { int ret; struct krping_cb *cb = cma_id->context; DEBUG_LOG(cb, "cma_event type %d cma_id %p (%s)\n", event->event, cma_id, (cma_id == cb->cm_id) ? "parent" : "child"); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: cb->state = ADDR_RESOLVED; ret = rdma_resolve_route(cma_id, 2000); if (ret) { PRINTF(cb, "rdma_resolve_route error %d\n", ret); wake_up_interruptible(&cb->sem); } break; case RDMA_CM_EVENT_ROUTE_RESOLVED: cb->state = ROUTE_RESOLVED; wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_CONNECT_REQUEST: cb->state = CONNECT_REQUEST; cb->child_cm_id = cma_id; DEBUG_LOG(cb, "child cma %p\n", cb->child_cm_id); wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_ESTABLISHED: DEBUG_LOG(cb, "ESTABLISHED\n"); if (!cb->server) { cb->state = CONNECTED; } wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR: case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_REJECTED: PRINTF(cb, "cma event %d, error %d\n", event->event, event->status); cb->state = ERROR; wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_DISCONNECTED: PRINTF(cb, "DISCONNECT EVENT...\n"); cb->state = ERROR; wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: PRINTF(cb, "cma detected device removal!!!!\n"); break; default: PRINTF(cb, "oof bad type!\n"); wake_up_interruptible(&cb->sem); break; } return 0; } static int server_recv(struct krping_cb *cb, struct ib_wc *wc) { if (wc->byte_len != sizeof(cb->recv_buf)) { PRINTF(cb, "Received bogus data, size %d\n", wc->byte_len); return -1; } cb->remote_rkey = ntohl(cb->recv_buf.rkey); cb->remote_addr = ntohll(cb->recv_buf.buf); cb->remote_len = ntohl(cb->recv_buf.size); DEBUG_LOG(cb, "Received rkey %x addr %llx len %d from peer\n", cb->remote_rkey, (unsigned long long)cb->remote_addr, cb->remote_len); if (cb->state <= CONNECTED || cb->state == RDMA_WRITE_COMPLETE) cb->state = RDMA_READ_ADV; else cb->state = RDMA_WRITE_ADV; return 0; } static int client_recv(struct krping_cb *cb, struct ib_wc *wc) { if (wc->byte_len != sizeof(cb->recv_buf)) { PRINTF(cb, "Received bogus data, size %d\n", wc->byte_len); return -1; } if (cb->state == RDMA_READ_ADV) cb->state = RDMA_WRITE_ADV; else cb->state = RDMA_WRITE_COMPLETE; return 0; } static void krping_cq_event_handler(struct ib_cq *cq, void *ctx) { struct krping_cb *cb = ctx; struct ib_wc wc; struct ib_recv_wr *bad_wr; int ret; BUG_ON(cb->cq != cq); if (cb->state == ERROR) { PRINTF(cb, "cq completion in ERROR state\n"); return; } if (cb->frtest) { PRINTF(cb, "cq completion event in frtest!\n"); return; } if (!cb->wlat && !cb->rlat && !cb->bw) ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) { if (wc.status) { if (wc.status == IB_WC_WR_FLUSH_ERR) { DEBUG_LOG(cb, "cq flushed\n"); continue; } else { PRINTF(cb, "cq completion failed with " "wr_id %Lx status %d opcode %d vender_err %x\n", wc.wr_id, wc.status, wc.opcode, wc.vendor_err); goto error; } } switch (wc.opcode) { case IB_WC_SEND: DEBUG_LOG(cb, "send completion\n"); cb->stats.send_bytes += cb->send_sgl.length; cb->stats.send_msgs++; break; case IB_WC_RDMA_WRITE: DEBUG_LOG(cb, "rdma write completion\n"); cb->stats.write_bytes += cb->rdma_sq_wr.sg_list->length; cb->stats.write_msgs++; cb->state = RDMA_WRITE_COMPLETE; wake_up_interruptible(&cb->sem); break; case IB_WC_RDMA_READ: DEBUG_LOG(cb, "rdma read completion\n"); cb->stats.read_bytes += cb->rdma_sq_wr.sg_list->length; cb->stats.read_msgs++; cb->state = RDMA_READ_COMPLETE; wake_up_interruptible(&cb->sem); break; case IB_WC_RECV: DEBUG_LOG(cb, "recv completion\n"); cb->stats.recv_bytes += sizeof(cb->recv_buf); cb->stats.recv_msgs++; if (cb->wlat || cb->rlat || cb->bw) ret = server_recv(cb, &wc); else ret = cb->server ? server_recv(cb, &wc) : client_recv(cb, &wc); if (ret) { PRINTF(cb, "recv wc error: %d\n", ret); goto error; } ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); if (ret) { PRINTF(cb, "post recv error: %d\n", ret); goto error; } wake_up_interruptible(&cb->sem); break; default: PRINTF(cb, "%s:%d Unexpected opcode %d, Shutting down\n", __func__, __LINE__, wc.opcode); goto error; } } if (ret) { PRINTF(cb, "poll error %d\n", ret); goto error; } return; error: cb->state = ERROR; wake_up_interruptible(&cb->sem); } static int krping_accept(struct krping_cb *cb) { struct rdma_conn_param conn_param; int ret; DEBUG_LOG(cb, "accepting client connection request\n"); memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 1; conn_param.initiator_depth = 1; ret = rdma_accept(cb->child_cm_id, &conn_param); if (ret) { PRINTF(cb, "rdma_accept error: %d\n", ret); return ret; } if (!cb->wlat && !cb->rlat && !cb->bw) { wait_event_interruptible(cb->sem, cb->state >= CONNECTED); if (cb->state == ERROR) { PRINTF(cb, "wait for CONNECTED state %d\n", cb->state); return -1; } } return 0; } static void krping_setup_wr(struct krping_cb *cb) { cb->recv_sgl.addr = cb->recv_dma_addr; cb->recv_sgl.length = sizeof cb->recv_buf; if (cb->local_dma_lkey) cb->recv_sgl.lkey = cb->qp->device->local_dma_lkey; else if (cb->mem == DMA) cb->recv_sgl.lkey = cb->dma_mr->lkey; else cb->recv_sgl.lkey = cb->recv_mr->lkey; cb->rq_wr.sg_list = &cb->recv_sgl; cb->rq_wr.num_sge = 1; cb->send_sgl.addr = cb->send_dma_addr; cb->send_sgl.length = sizeof cb->send_buf; if (cb->local_dma_lkey) cb->send_sgl.lkey = cb->qp->device->local_dma_lkey; else if (cb->mem == DMA) cb->send_sgl.lkey = cb->dma_mr->lkey; else cb->send_sgl.lkey = cb->send_mr->lkey; cb->sq_wr.opcode = IB_WR_SEND; cb->sq_wr.send_flags = IB_SEND_SIGNALED; cb->sq_wr.sg_list = &cb->send_sgl; cb->sq_wr.num_sge = 1; if (cb->server || cb->wlat || cb->rlat || cb->bw) { cb->rdma_sgl.addr = cb->rdma_dma_addr; if (cb->mem == MR) cb->rdma_sgl.lkey = cb->rdma_mr->lkey; cb->rdma_sq_wr.send_flags = IB_SEND_SIGNALED; cb->rdma_sq_wr.sg_list = &cb->rdma_sgl; cb->rdma_sq_wr.num_sge = 1; } switch(cb->mem) { case FASTREG: /* * A chain of 2 WRs, INVALDATE_MR + FAST_REG_MR. * both unsignaled. The client uses them to reregister * the rdma buffers with a new key each iteration. */ cb->fastreg_wr.opcode = IB_WR_FAST_REG_MR; cb->fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; cb->fastreg_wr.wr.fast_reg.length = cb->size; cb->fastreg_wr.wr.fast_reg.page_list = cb->page_list; cb->fastreg_wr.wr.fast_reg.page_list_len = cb->page_list_len; cb->invalidate_wr.next = &cb->fastreg_wr; cb->invalidate_wr.opcode = IB_WR_LOCAL_INV; break; case MW: cb->bind_attr.wr_id = 0xabbaabba; cb->bind_attr.send_flags = 0; /* unsignaled */ cb->bind_attr.length = cb->size; break; default: break; } } static int krping_setup_buffers(struct krping_cb *cb) { int ret; struct ib_phys_buf buf; u64 iovbase; DEBUG_LOG(cb, "krping_setup_buffers called on cb %p\n", cb); cb->recv_dma_addr = dma_map_single(cb->pd->device->dma_device, &cb->recv_buf, sizeof(cb->recv_buf), DMA_BIDIRECTIONAL); pci_unmap_addr_set(cb, recv_mapping, cb->recv_dma_addr); cb->send_dma_addr = dma_map_single(cb->pd->device->dma_device, &cb->send_buf, sizeof(cb->send_buf), DMA_BIDIRECTIONAL); pci_unmap_addr_set(cb, send_mapping, cb->send_dma_addr); if (cb->mem == DMA) { cb->dma_mr = ib_get_dma_mr(cb->pd, IB_ACCESS_LOCAL_WRITE| IB_ACCESS_REMOTE_READ| IB_ACCESS_REMOTE_WRITE); if (IS_ERR(cb->dma_mr)) { DEBUG_LOG(cb, "reg_dmamr failed\n"); ret = PTR_ERR(cb->dma_mr); goto bail; } } else { if (!cb->local_dma_lkey) { buf.addr = cb->recv_dma_addr; buf.size = sizeof cb->recv_buf; DEBUG_LOG(cb, "recv buf dma_addr %llx size %d\n", buf.addr, (int)buf.size); iovbase = cb->recv_dma_addr; cb->recv_mr = ib_reg_phys_mr(cb->pd, &buf, 1, IB_ACCESS_LOCAL_WRITE, &iovbase); if (IS_ERR(cb->recv_mr)) { DEBUG_LOG(cb, "recv_buf reg_mr failed\n"); ret = PTR_ERR(cb->recv_mr); goto bail; } buf.addr = cb->send_dma_addr; buf.size = sizeof cb->send_buf; DEBUG_LOG(cb, "send buf dma_addr %llx size %d\n", buf.addr, (int)buf.size); iovbase = cb->send_dma_addr; cb->send_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 0, &iovbase); if (IS_ERR(cb->send_mr)) { DEBUG_LOG(cb, "send_buf reg_mr failed\n"); ret = PTR_ERR(cb->send_mr); goto bail; } } } cb->rdma_buf = kmalloc(cb->size, GFP_KERNEL); if (!cb->rdma_buf) { DEBUG_LOG(cb, "rdma_buf malloc failed\n"); ret = -ENOMEM; goto bail; } cb->rdma_dma_addr = dma_map_single(cb->pd->device->dma_device, cb->rdma_buf, cb->size, DMA_BIDIRECTIONAL); pci_unmap_addr_set(cb, rdma_mapping, cb->rdma_dma_addr); if (cb->mem != DMA) { switch (cb->mem) { case FASTREG: cb->page_list_len = (((cb->size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT; cb->page_list = ib_alloc_fast_reg_page_list( cb->pd->device, cb->page_list_len); if (IS_ERR(cb->page_list)) { DEBUG_LOG(cb, "recv_buf reg_mr failed\n"); ret = PTR_ERR(cb->page_list); goto bail; } cb->fastreg_mr = ib_alloc_fast_reg_mr(cb->pd, cb->page_list->max_page_list_len); if (IS_ERR(cb->fastreg_mr)) { DEBUG_LOG(cb, "recv_buf reg_mr failed\n"); ret = PTR_ERR(cb->fastreg_mr); goto bail; } DEBUG_LOG(cb, "fastreg rkey 0x%x page_list %p" " page_list_len %u\n", cb->fastreg_mr->rkey, cb->page_list, cb->page_list_len); break; case MW: cb->mw = ib_alloc_mw(cb->pd); if (IS_ERR(cb->mw)) { DEBUG_LOG(cb, "recv_buf alloc_mw failed\n"); ret = PTR_ERR(cb->mw); goto bail; } DEBUG_LOG(cb, "mw rkey 0x%x\n", cb->mw->rkey); /*FALLTHROUGH*/ case MR: buf.addr = cb->rdma_dma_addr; buf.size = cb->size; iovbase = cb->rdma_dma_addr; cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1, IB_ACCESS_REMOTE_READ| IB_ACCESS_REMOTE_WRITE, &iovbase); if (IS_ERR(cb->rdma_mr)) { DEBUG_LOG(cb, "rdma_buf reg_mr failed\n"); ret = PTR_ERR(cb->rdma_mr); goto bail; } DEBUG_LOG(cb, "rdma buf dma_addr %llx size %d mr rkey 0x%x\n", buf.addr, (int)buf.size, cb->rdma_mr->rkey); break; default: ret = -EINVAL; goto bail; break; } } if (!cb->server || cb->wlat || cb->rlat || cb->bw) { cb->start_buf = kmalloc(cb->size, GFP_KERNEL); if (!cb->start_buf) { DEBUG_LOG(cb, "start_buf malloc failed\n"); ret = -ENOMEM; goto bail; } cb->start_dma_addr = dma_map_single(cb->pd->device->dma_device, cb->start_buf, cb->size, DMA_BIDIRECTIONAL); pci_unmap_addr_set(cb, start_mapping, cb->start_dma_addr); if (cb->mem == MR || cb->mem == MW) { unsigned flags = IB_ACCESS_REMOTE_READ; if (cb->wlat || cb->rlat || cb->bw) flags |= IB_ACCESS_REMOTE_WRITE; buf.addr = cb->start_dma_addr; buf.size = cb->size; DEBUG_LOG(cb, "start buf dma_addr %llx size %d\n", buf.addr, (int)buf.size); iovbase = cb->start_dma_addr; cb->start_mr = ib_reg_phys_mr(cb->pd, &buf, 1, flags, &iovbase); if (IS_ERR(cb->start_mr)) { DEBUG_LOG(cb, "start_buf reg_mr failed\n"); ret = PTR_ERR(cb->start_mr); goto bail; } } } krping_setup_wr(cb); DEBUG_LOG(cb, "allocated & registered buffers...\n"); return 0; bail: if (cb->fastreg_mr && !IS_ERR(cb->fastreg_mr)) ib_dereg_mr(cb->fastreg_mr); if (cb->mw && !IS_ERR(cb->mw)) ib_dealloc_mw(cb->mw); if (cb->rdma_mr && !IS_ERR(cb->rdma_mr)) ib_dereg_mr(cb->rdma_mr); if (cb->page_list && !IS_ERR(cb->page_list)) ib_free_fast_reg_page_list(cb->page_list); if (cb->dma_mr && !IS_ERR(cb->dma_mr)) ib_dereg_mr(cb->dma_mr); if (cb->recv_mr && !IS_ERR(cb->recv_mr)) ib_dereg_mr(cb->recv_mr); if (cb->send_mr && !IS_ERR(cb->send_mr)) ib_dereg_mr(cb->send_mr); if (cb->rdma_buf) kfree(cb->rdma_buf); if (cb->start_buf) kfree(cb->start_buf); return ret; } static void krping_free_buffers(struct krping_cb *cb) { DEBUG_LOG(cb, "krping_free_buffers called on cb %p\n", cb); if (cb->dma_mr) ib_dereg_mr(cb->dma_mr); if (cb->send_mr) ib_dereg_mr(cb->send_mr); if (cb->recv_mr) ib_dereg_mr(cb->recv_mr); if (cb->rdma_mr) ib_dereg_mr(cb->rdma_mr); if (cb->start_mr) ib_dereg_mr(cb->start_mr); if (cb->fastreg_mr) ib_dereg_mr(cb->fastreg_mr); if (cb->mw) ib_dealloc_mw(cb->mw); dma_unmap_single(cb->pd->device->dma_device, pci_unmap_addr(cb, recv_mapping), sizeof(cb->recv_buf), DMA_BIDIRECTIONAL); dma_unmap_single(cb->pd->device->dma_device, pci_unmap_addr(cb, send_mapping), sizeof(cb->send_buf), DMA_BIDIRECTIONAL); dma_unmap_single(cb->pd->device->dma_device, pci_unmap_addr(cb, rdma_mapping), cb->size, DMA_BIDIRECTIONAL); kfree(cb->rdma_buf); if (cb->start_buf) { dma_unmap_single(cb->pd->device->dma_device, pci_unmap_addr(cb, start_mapping), cb->size, DMA_BIDIRECTIONAL); kfree(cb->start_buf); } } static int krping_create_qp(struct krping_cb *cb) { struct ib_qp_init_attr init_attr; int ret; memset(&init_attr, 0, sizeof(init_attr)); init_attr.cap.max_send_wr = cb->txdepth; init_attr.cap.max_recv_wr = 2; init_attr.cap.max_recv_sge = 1; init_attr.cap.max_send_sge = 1; init_attr.qp_type = IB_QPT_RC; init_attr.send_cq = cb->cq; init_attr.recv_cq = cb->cq; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; if (cb->server) { ret = rdma_create_qp(cb->child_cm_id, cb->pd, &init_attr); if (!ret) cb->qp = cb->child_cm_id->qp; } else { ret = rdma_create_qp(cb->cm_id, cb->pd, &init_attr); if (!ret) cb->qp = cb->cm_id->qp; } return ret; } static void krping_free_qp(struct krping_cb *cb) { ib_destroy_qp(cb->qp); ib_destroy_cq(cb->cq); ib_dealloc_pd(cb->pd); } static int krping_setup_qp(struct krping_cb *cb, struct rdma_cm_id *cm_id) { int ret; cb->pd = ib_alloc_pd(cm_id->device); if (IS_ERR(cb->pd)) { PRINTF(cb, "ib_alloc_pd failed\n"); return PTR_ERR(cb->pd); } DEBUG_LOG(cb, "created pd %p\n", cb->pd); strlcpy(cb->stats.name, cb->pd->device->name, sizeof(cb->stats.name)); cb->cq = ib_create_cq(cm_id->device, krping_cq_event_handler, NULL, cb, cb->txdepth * 2, 0); if (IS_ERR(cb->cq)) { PRINTF(cb, "ib_create_cq failed\n"); ret = PTR_ERR(cb->cq); goto err1; } DEBUG_LOG(cb, "created cq %p\n", cb->cq); if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest) { ret = ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); if (ret) { PRINTF(cb, "ib_create_cq failed\n"); goto err2; } } ret = krping_create_qp(cb); if (ret) { PRINTF(cb, "krping_create_qp failed: %d\n", ret); goto err2; } DEBUG_LOG(cb, "created qp %p\n", cb->qp); return 0; err2: ib_destroy_cq(cb->cq); err1: ib_dealloc_pd(cb->pd); return ret; } /* * return the (possibly rebound) rkey for the rdma buffer. * FASTREG mode: invalidate and rebind via fastreg wr. * MW mode: rebind the MW. * other modes: just return the mr rkey. */ static u32 krping_rdma_rkey(struct krping_cb *cb, u64 buf, int post_inv) { u32 rkey = 0xffffffff; u64 p; struct ib_send_wr *bad_wr; int i; int ret; switch (cb->mem) { case FASTREG: cb->invalidate_wr.ex.invalidate_rkey = cb->fastreg_mr->rkey; /* * Update the fastreg key. */ ib_update_fast_reg_key(cb->fastreg_mr, ++cb->key); cb->fastreg_wr.wr.fast_reg.rkey = cb->fastreg_mr->rkey; /* * Update the fastreg WR with new buf info. */ if (buf == (u64)cb->start_dma_addr) cb->fastreg_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_READ; else cb->fastreg_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE; cb->fastreg_wr.wr.fast_reg.iova_start = buf; p = (u64)(buf & PAGE_MASK); for (i=0; i < cb->fastreg_wr.wr.fast_reg.page_list_len; i++, p += PAGE_SIZE) { cb->page_list->page_list[i] = p; DEBUG_LOG(cb, "page_list[%d] 0x%llx\n", i, p); } DEBUG_LOG(cb, "post_inv = %d, fastreg new rkey 0x%x shift %u len %u" " iova_start %llx page_list_len %u\n", post_inv, cb->fastreg_wr.wr.fast_reg.rkey, cb->fastreg_wr.wr.fast_reg.page_shift, cb->fastreg_wr.wr.fast_reg.length, cb->fastreg_wr.wr.fast_reg.iova_start, cb->fastreg_wr.wr.fast_reg.page_list_len); if (post_inv) ret = ib_post_send(cb->qp, &cb->invalidate_wr, &bad_wr); else ret = ib_post_send(cb->qp, &cb->fastreg_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); cb->state = ERROR; } rkey = cb->fastreg_mr->rkey; break; case MW: /* * Update the MW with new buf info. */ if (buf == (u64)cb->start_dma_addr) { cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_READ; cb->bind_attr.mr = cb->start_mr; } else { cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_WRITE; cb->bind_attr.mr = cb->rdma_mr; } cb->bind_attr.addr = buf; DEBUG_LOG(cb, "binding mw rkey 0x%x to buf %llx mr rkey 0x%x\n", cb->mw->rkey, buf, cb->bind_attr.mr->rkey); ret = ib_bind_mw(cb->qp, cb->mw, &cb->bind_attr); if (ret) { PRINTF(cb, "bind mw error %d\n", ret); cb->state = ERROR; } else rkey = cb->mw->rkey; break; case MR: if (buf == (u64)cb->start_dma_addr) rkey = cb->start_mr->rkey; else rkey = cb->rdma_mr->rkey; break; case DMA: rkey = cb->dma_mr->rkey; break; default: PRINTF(cb, "%s:%d case ERROR\n", __func__, __LINE__); cb->state = ERROR; break; } return rkey; } static void krping_format_send(struct krping_cb *cb, u64 buf) { struct krping_rdma_info *info = &cb->send_buf; u32 rkey; /* * Client side will do fastreg or mw bind before * advertising the rdma buffer. Server side * sends have no data. */ if (!cb->server || cb->wlat || cb->rlat || cb->bw) { rkey = krping_rdma_rkey(cb, buf, !cb->server_invalidate); info->buf = htonll(buf); info->rkey = htonl(rkey); info->size = htonl(cb->size); DEBUG_LOG(cb, "RDMA addr %llx rkey %x len %d\n", (unsigned long long)buf, rkey, cb->size); } } static void krping_test_server(struct krping_cb *cb) { struct ib_send_wr *bad_wr, inv; int ret; while (1) { /* Wait for client's Start STAG/TO/Len */ wait_event_interruptible(cb->sem, cb->state >= RDMA_READ_ADV); if (cb->state != RDMA_READ_ADV) { PRINTF(cb, "wait for RDMA_READ_ADV state %d\n", cb->state); break; } DEBUG_LOG(cb, "server received sink adv\n"); cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = cb->remote_len; cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 1); cb->rdma_sq_wr.next = NULL; /* Issue RDMA Read. */ if (cb->read_inv) cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ_WITH_INV; else { cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ; if (cb->mem == FASTREG) { /* * Immediately follow the read with a * fenced LOCAL_INV. */ cb->rdma_sq_wr.next = &inv; memset(&inv, 0, sizeof inv); inv.opcode = IB_WR_LOCAL_INV; inv.ex.invalidate_rkey = cb->fastreg_mr->rkey; inv.send_flags = IB_SEND_FENCE; } } ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); break; } cb->rdma_sq_wr.next = NULL; DEBUG_LOG(cb, "server posted rdma read req \n"); /* Wait for read completion */ wait_event_interruptible(cb->sem, cb->state >= RDMA_READ_COMPLETE); if (cb->state != RDMA_READ_COMPLETE) { PRINTF(cb, "wait for RDMA_READ_COMPLETE state %d\n", cb->state); break; } DEBUG_LOG(cb, "server received read complete\n"); /* Display data in recv buf */ if (cb->verbose) PRINTF(cb, "server ping data: %s\n", cb->rdma_buf); /* Tell client to continue */ if (cb->server && cb->server_invalidate) { cb->sq_wr.ex.invalidate_rkey = cb->remote_rkey; cb->sq_wr.opcode = IB_WR_SEND_WITH_INV; DEBUG_LOG(cb, "send-w-inv rkey 0x%x\n", cb->remote_rkey); } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); break; } DEBUG_LOG(cb, "server posted go ahead\n"); /* Wait for client's RDMA STAG/TO/Len */ wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV); if (cb->state != RDMA_WRITE_ADV) { PRINTF(cb, "wait for RDMA_WRITE_ADV state %d\n", cb->state); break; } DEBUG_LOG(cb, "server received sink adv\n"); /* RDMA Write echo data */ cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = strlen(cb->rdma_buf) + 1; if (cb->local_dma_lkey) cb->rdma_sgl.lkey = cb->qp->device->local_dma_lkey; else cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 0); DEBUG_LOG(cb, "rdma write from lkey %x laddr %llx len %d\n", cb->rdma_sq_wr.sg_list->lkey, (unsigned long long)cb->rdma_sq_wr.sg_list->addr, cb->rdma_sq_wr.sg_list->length); ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); break; } /* Wait for completion */ ret = wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_COMPLETE); if (cb->state != RDMA_WRITE_COMPLETE) { PRINTF(cb, "wait for RDMA_WRITE_COMPLETE state %d\n", cb->state); break; } DEBUG_LOG(cb, "server rdma write complete \n"); cb->state = CONNECTED; /* Tell client to begin again */ if (cb->server && cb->server_invalidate) { cb->sq_wr.ex.invalidate_rkey = cb->remote_rkey; cb->sq_wr.opcode = IB_WR_SEND_WITH_INV; DEBUG_LOG(cb, "send-w-inv rkey 0x%x\n", cb->remote_rkey); } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); break; } DEBUG_LOG(cb, "server posted go ahead\n"); } } static void rlat_test(struct krping_cb *cb) { int scnt; int iters = cb->count; struct timeval start_tv, stop_tv; int ret; struct ib_wc wc; struct ib_send_wr *bad_wr; int ne; scnt = 0; cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = cb->size; microtime(&start_tv); if (!cb->poll) { cb->state = RDMA_READ_ADV; ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); } while (scnt < iters) { cb->state = RDMA_READ_ADV; ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); if (ret) { PRINTF(cb, "Couldn't post send: ret=%d scnt %d\n", ret, scnt); return; } do { if (!cb->poll) { wait_event_interruptible(cb->sem, cb->state != RDMA_READ_ADV); if (cb->state == RDMA_READ_COMPLETE) { ne = 1; ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); } else { ne = -1; } } else ne = ib_poll_cq(cb->cq, 1, &wc); if (cb->state == ERROR) { PRINTF(cb, "state == ERROR...bailing scnt %d\n", scnt); return; } } while (ne == 0); if (ne < 0) { PRINTF(cb, "poll CQ failed %d\n", ne); return; } if (cb->poll && wc.status != IB_WC_SUCCESS) { PRINTF(cb, "Completion wth error at %s:\n", cb->server ? "server" : "client"); PRINTF(cb, "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); return; } ++scnt; } microtime(&stop_tv); if (stop_tv.tv_usec < start_tv.tv_usec) { stop_tv.tv_usec += 1000000; stop_tv.tv_sec -= 1; } PRINTF(cb, "delta sec %lu delta usec %lu iter %d size %d\n", stop_tv.tv_sec - start_tv.tv_sec, stop_tv.tv_usec - start_tv.tv_usec, scnt, cb->size); } static void wlat_test(struct krping_cb *cb) { int ccnt, scnt, rcnt; int iters=cb->count; volatile char *poll_buf = (char *) cb->start_buf; char *buf = (char *)cb->rdma_buf; struct timeval start_tv, stop_tv; cycles_t *post_cycles_start, *post_cycles_stop; cycles_t *poll_cycles_start, *poll_cycles_stop; cycles_t *last_poll_cycles_start; cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0; int i; int cycle_iters = 1000; ccnt = 0; scnt = 0; rcnt = 0; post_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!post_cycles_start) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } post_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!post_cycles_stop) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!poll_cycles_start) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } poll_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!poll_cycles_stop) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } last_poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!last_poll_cycles_start) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = cb->size; if (cycle_iters > iters) cycle_iters = iters; microtime(&start_tv); while (scnt < iters || ccnt < iters || rcnt < iters) { /* Wait till buffer changes. */ if (rcnt < iters && !(scnt < 1 && !cb->server)) { ++rcnt; while (*poll_buf != (char)rcnt) { if (cb->state == ERROR) { PRINTF(cb, "state = ERROR, bailing\n"); return; } } } if (scnt < iters) { struct ib_send_wr *bad_wr; *buf = (char)scnt+1; if (scnt < cycle_iters) post_cycles_start[scnt] = get_cycles(); if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { PRINTF(cb, "Couldn't post send: scnt=%d\n", scnt); return; } if (scnt < cycle_iters) post_cycles_stop[scnt] = get_cycles(); scnt++; } if (ccnt < iters) { struct ib_wc wc; int ne; if (ccnt < cycle_iters) poll_cycles_start[ccnt] = get_cycles(); do { if (ccnt < cycle_iters) last_poll_cycles_start[ccnt] = get_cycles(); ne = ib_poll_cq(cb->cq, 1, &wc); } while (ne == 0); if (ccnt < cycle_iters) poll_cycles_stop[ccnt] = get_cycles(); ++ccnt; if (ne < 0) { PRINTF(cb, "poll CQ failed %d\n", ne); return; } if (wc.status != IB_WC_SUCCESS) { PRINTF(cb, "Completion wth error at %s:\n", cb->server ? "server" : "client"); PRINTF(cb, "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); PRINTF(cb, "scnt=%d, rcnt=%d, ccnt=%d\n", scnt, rcnt, ccnt); return; } } } microtime(&stop_tv); if (stop_tv.tv_usec < start_tv.tv_usec) { stop_tv.tv_usec += 1000000; stop_tv.tv_sec -= 1; } for (i=0; i < cycle_iters; i++) { sum_post += post_cycles_stop[i] - post_cycles_start[i]; sum_poll += poll_cycles_stop[i] - poll_cycles_start[i]; sum_last_poll += poll_cycles_stop[i]-last_poll_cycles_start[i]; } PRINTF(cb, "delta sec %lu delta usec %lu iter %d size %d cycle_iters %d" " sum_post %llu sum_poll %llu sum_last_poll %llu\n", stop_tv.tv_sec - start_tv.tv_sec, stop_tv.tv_usec - start_tv.tv_usec, scnt, cb->size, cycle_iters, (unsigned long long)sum_post, (unsigned long long)sum_poll, (unsigned long long)sum_last_poll); kfree(post_cycles_start); kfree(post_cycles_stop); kfree(poll_cycles_start); kfree(poll_cycles_stop); kfree(last_poll_cycles_start); } static void bw_test(struct krping_cb *cb) { int ccnt, scnt, rcnt; int iters=cb->count; struct timeval start_tv, stop_tv; cycles_t *post_cycles_start, *post_cycles_stop; cycles_t *poll_cycles_start, *poll_cycles_stop; cycles_t *last_poll_cycles_start; cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0; int i; int cycle_iters = 1000; ccnt = 0; scnt = 0; rcnt = 0; post_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!post_cycles_start) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } post_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!post_cycles_stop) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!poll_cycles_start) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } poll_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!poll_cycles_stop) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } last_poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!last_poll_cycles_start) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = cb->size; if (cycle_iters > iters) cycle_iters = iters; microtime(&start_tv); while (scnt < iters || ccnt < iters) { while (scnt < iters && scnt - ccnt < cb->txdepth) { struct ib_send_wr *bad_wr; if (scnt < cycle_iters) post_cycles_start[scnt] = get_cycles(); if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { PRINTF(cb, "Couldn't post send: scnt=%d\n", scnt); return; } if (scnt < cycle_iters) post_cycles_stop[scnt] = get_cycles(); ++scnt; } if (ccnt < iters) { int ne; struct ib_wc wc; if (ccnt < cycle_iters) poll_cycles_start[ccnt] = get_cycles(); do { if (ccnt < cycle_iters) last_poll_cycles_start[ccnt] = get_cycles(); ne = ib_poll_cq(cb->cq, 1, &wc); } while (ne == 0); if (ccnt < cycle_iters) poll_cycles_stop[ccnt] = get_cycles(); ccnt += 1; if (ne < 0) { PRINTF(cb, "poll CQ failed %d\n", ne); return; } if (wc.status != IB_WC_SUCCESS) { PRINTF(cb, "Completion wth error at %s:\n", cb->server ? "server" : "client"); PRINTF(cb, "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); return; } } } microtime(&stop_tv); if (stop_tv.tv_usec < start_tv.tv_usec) { stop_tv.tv_usec += 1000000; stop_tv.tv_sec -= 1; } for (i=0; i < cycle_iters; i++) { sum_post += post_cycles_stop[i] - post_cycles_start[i]; sum_poll += poll_cycles_stop[i] - poll_cycles_start[i]; sum_last_poll += poll_cycles_stop[i]-last_poll_cycles_start[i]; } PRINTF(cb, "delta sec %lu delta usec %lu iter %d size %d cycle_iters %d" " sum_post %llu sum_poll %llu sum_last_poll %llu\n", stop_tv.tv_sec - start_tv.tv_sec, stop_tv.tv_usec - start_tv.tv_usec, scnt, cb->size, cycle_iters, (unsigned long long)sum_post, (unsigned long long)sum_poll, (unsigned long long)sum_last_poll); kfree(post_cycles_start); kfree(post_cycles_stop); kfree(poll_cycles_start); kfree(poll_cycles_stop); kfree(last_poll_cycles_start); } static void krping_rlat_test_server(struct krping_cb *cb) { struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; /* Spin waiting for client's Start STAG/TO/Len */ while (cb->state < RDMA_READ_ADV) { krping_cq_event_handler(cb->cq, cb); } /* Send STAG/TO/Len to client */ krping_format_send(cb, cb->start_dma_addr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { PRINTF(cb, "poll error %d\n", ret); return; } if (wc.status) { PRINTF(cb, "send completiong error %d\n", wc.status); return; } wait_event_interruptible(cb->sem, cb->state == ERROR); } static void krping_wlat_test_server(struct krping_cb *cb) { struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; /* Spin waiting for client's Start STAG/TO/Len */ while (cb->state < RDMA_READ_ADV) { krping_cq_event_handler(cb->cq, cb); } /* Send STAG/TO/Len to client */ krping_format_send(cb, cb->start_dma_addr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { PRINTF(cb, "poll error %d\n", ret); return; } if (wc.status) { PRINTF(cb, "send completiong error %d\n", wc.status); return; } wlat_test(cb); wait_event_interruptible(cb->sem, cb->state == ERROR); } static void krping_bw_test_server(struct krping_cb *cb) { struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; /* Spin waiting for client's Start STAG/TO/Len */ while (cb->state < RDMA_READ_ADV) { krping_cq_event_handler(cb->cq, cb); } /* Send STAG/TO/Len to client */ krping_format_send(cb, cb->start_dma_addr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { PRINTF(cb, "poll error %d\n", ret); return; } if (wc.status) { PRINTF(cb, "send completiong error %d\n", wc.status); return; } if (cb->duplex) bw_test(cb); wait_event_interruptible(cb->sem, cb->state == ERROR); } static int fastreg_supported(struct krping_cb *cb) { struct ib_device *dev = cb->child_cm_id->device; struct ib_device_attr attr; int ret; ret = ib_query_device(dev, &attr); if (ret) { PRINTF(cb, "ib_query_device failed ret %d\n", ret); return 0; } if (!(attr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) { PRINTF(cb, "Fastreg not supported - device_cap_flags 0x%x\n", attr.device_cap_flags); return 0; } DEBUG_LOG(cb, "Fastreg supported - device_cap_flags 0x%x\n", attr.device_cap_flags); return 1; } static int krping_bind_server(struct krping_cb *cb) { struct sockaddr_in sin; int ret; memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof sin; sin.sin_family = AF_INET; sin.sin_addr.s_addr = cb->addr.s_addr; sin.sin_port = cb->port; ret = rdma_bind_addr(cb->cm_id, (struct sockaddr *) &sin); if (ret) { PRINTF(cb, "rdma_bind_addr error %d\n", ret); return ret; } DEBUG_LOG(cb, "rdma_bind_addr successful\n"); DEBUG_LOG(cb, "rdma_listen\n"); ret = rdma_listen(cb->cm_id, 3); if (ret) { PRINTF(cb, "rdma_listen failed: %d\n", ret); return ret; } wait_event_interruptible(cb->sem, cb->state >= CONNECT_REQUEST); if (cb->state != CONNECT_REQUEST) { PRINTF(cb, "wait for CONNECT_REQUEST state %d\n", cb->state); return -1; } if (cb->mem == FASTREG && !fastreg_supported(cb)) return -EINVAL; return 0; } static void krping_run_server(struct krping_cb *cb) { struct ib_recv_wr *bad_wr; int ret; ret = krping_bind_server(cb); if (ret) return; ret = krping_setup_qp(cb, cb->child_cm_id); if (ret) { PRINTF(cb, "setup_qp failed: %d\n", ret); goto err0; } ret = krping_setup_buffers(cb); if (ret) { PRINTF(cb, "krping_setup_buffers failed: %d\n", ret); goto err1; } ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); if (ret) { PRINTF(cb, "ib_post_recv failed: %d\n", ret); goto err2; } ret = krping_accept(cb); if (ret) { PRINTF(cb, "connect error %d\n", ret); goto err2; } if (cb->wlat) krping_wlat_test_server(cb); else if (cb->rlat) krping_rlat_test_server(cb); else if (cb->bw) krping_bw_test_server(cb); else krping_test_server(cb); rdma_disconnect(cb->child_cm_id); err2: krping_free_buffers(cb); err1: krping_free_qp(cb); err0: rdma_destroy_id(cb->child_cm_id); } static void krping_test_client(struct krping_cb *cb) { int ping, start, cc, i, ret; struct ib_send_wr *bad_wr; unsigned char c; start = 65; for (ping = 0; !cb->count || ping < cb->count; ping++) { cb->state = RDMA_READ_ADV; /* Put some ascii text in the buffer. */ cc = sprintf(cb->start_buf, "rdma-ping-%d: ", ping); for (i = cc, c = start; i < cb->size; i++) { cb->start_buf[i] = c; c++; if (c > 122) c = 65; } start++; if (start > 122) start = 65; cb->start_buf[cb->size - 1] = 0; krping_format_send(cb, cb->start_dma_addr); if (cb->state == ERROR) { PRINTF(cb, "krping_format_send failed\n"); break; } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); break; } /* Wait for server to ACK */ wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV); if (cb->state != RDMA_WRITE_ADV) { PRINTF(cb, "wait for RDMA_WRITE_ADV state %d\n", cb->state); break; } krping_format_send(cb, cb->rdma_dma_addr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); break; } /* Wait for the server to say the RDMA Write is complete. */ wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_COMPLETE); if (cb->state != RDMA_WRITE_COMPLETE) { PRINTF(cb, "wait for RDMA_WRITE_COMPLETE state %d\n", cb->state); break; } if (cb->validate) if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) { PRINTF(cb, "data mismatch!\n"); break; } if (cb->verbose) PRINTF(cb, "ping data: %s\n", cb->rdma_buf); #ifdef SLOW_KRPING wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ); #endif } } static void krping_rlat_test_client(struct krping_cb *cb) { struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; cb->state = RDMA_READ_ADV; /* Send STAG/TO/Len to client */ krping_format_send(cb, cb->start_dma_addr); if (cb->state == ERROR) { PRINTF(cb, "krping_format_send failed\n"); return; } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { PRINTF(cb, "poll error %d\n", ret); return; } if (wc.status) { PRINTF(cb, "send completion error %d\n", wc.status); return; } /* Spin waiting for server's Start STAG/TO/Len */ while (cb->state < RDMA_WRITE_ADV) { krping_cq_event_handler(cb->cq, cb); } #if 0 { int i; struct timeval start, stop; time_t sec; suseconds_t usec; unsigned long long elapsed; struct ib_wc wc; struct ib_send_wr *bad_wr; int ne; cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = 0; cb->rdma_sq_wr.num_sge = 0; microtime(&start); for (i=0; i < 100000; i++) { if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { PRINTF(cb, "Couldn't post send\n"); return; } do { ne = ib_poll_cq(cb->cq, 1, &wc); } while (ne == 0); if (ne < 0) { PRINTF(cb, "poll CQ failed %d\n", ne); return; } if (wc.status != IB_WC_SUCCESS) { PRINTF(cb, "Completion wth error at %s:\n", cb->server ? "server" : "client"); PRINTF(cb, "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); return; } } microtime(&stop); if (stop.tv_usec < start.tv_usec) { stop.tv_usec += 1000000; stop.tv_sec -= 1; } sec = stop.tv_sec - start.tv_sec; usec = stop.tv_usec - start.tv_usec; elapsed = sec * 1000000 + usec; PRINTF(cb, "0B-write-lat iters 100000 usec %llu\n", elapsed); } #endif rlat_test(cb); } static void krping_wlat_test_client(struct krping_cb *cb) { struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; cb->state = RDMA_READ_ADV; /* Send STAG/TO/Len to client */ krping_format_send(cb, cb->start_dma_addr); if (cb->state == ERROR) { PRINTF(cb, "krping_format_send failed\n"); return; } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { PRINTF(cb, "poll error %d\n", ret); return; } if (wc.status) { PRINTF(cb, "send completion error %d\n", wc.status); return; } /* Spin waiting for server's Start STAG/TO/Len */ while (cb->state < RDMA_WRITE_ADV) { krping_cq_event_handler(cb->cq, cb); } wlat_test(cb); } static void krping_bw_test_client(struct krping_cb *cb) { struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; cb->state = RDMA_READ_ADV; /* Send STAG/TO/Len to client */ krping_format_send(cb, cb->start_dma_addr); if (cb->state == ERROR) { PRINTF(cb, "krping_format_send failed\n"); return; } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { PRINTF(cb, "poll error %d\n", ret); return; } if (wc.status) { PRINTF(cb, "send completion error %d\n", wc.status); return; } /* Spin waiting for server's Start STAG/TO/Len */ while (cb->state < RDMA_WRITE_ADV) { krping_cq_event_handler(cb->cq, cb); } bw_test(cb); } static void krping_fr_test(struct krping_cb *cb) { struct ib_fast_reg_page_list *pl; struct ib_send_wr fr, inv, *bad; struct ib_wc wc; u8 key = 0; struct ib_mr *mr; int i; int ret; int size = cb->size; int plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT; time_t start; int count = 0; int scnt = 0; pl = ib_alloc_fast_reg_page_list(cb->qp->device, plen); if (IS_ERR(pl)) { PRINTF(cb, "ib_alloc_fast_reg_page_list failed %ld\n", PTR_ERR(pl)); return; } mr = ib_alloc_fast_reg_mr(cb->pd, plen); if (IS_ERR(mr)) { PRINTF(cb, "ib_alloc_fast_reg_mr failed %ld\n", PTR_ERR(pl)); goto err1; } for (i=0; ipage_list[i] = 0xcafebabe | i; memset(&fr, 0, sizeof fr); fr.opcode = IB_WR_FAST_REG_MR; fr.wr.fast_reg.page_shift = PAGE_SHIFT; fr.wr.fast_reg.length = size; fr.wr.fast_reg.page_list = pl; fr.wr.fast_reg.page_list_len = plen; fr.wr.fast_reg.iova_start = 0; fr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE; fr.next = &inv; memset(&inv, 0, sizeof inv); inv.opcode = IB_WR_LOCAL_INV; inv.send_flags = IB_SEND_SIGNALED; DEBUG_LOG(cb, "fr_test: stag index 0x%x plen %u size %u depth %u\n", mr->rkey >> 8, plen, cb->size, cb->txdepth); start = time_uptime; while (1) { if ((time_uptime - start) >= 9) { DEBUG_LOG(cb, "fr_test: pausing 1 second! count %u latest size %u plen %u\n", count, size, plen); wait_event_interruptible(cb->sem, cb->state == ERROR); if (cb->state == ERROR) break; start = time_uptime; } while (scnt < (cb->txdepth>>1)) { ib_update_fast_reg_key(mr, ++key); fr.wr.fast_reg.rkey = mr->rkey; inv.ex.invalidate_rkey = mr->rkey; size = arc4random() % cb->size; if (size == 0) size = cb->size; plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT; fr.wr.fast_reg.length = size; fr.wr.fast_reg.page_list_len = plen; ret = ib_post_send(cb->qp, &fr, &bad); if (ret) { PRINTF(cb, "ib_post_send failed %d\n", ret); goto err2; } scnt++; } do { ret = ib_poll_cq(cb->cq, 1, &wc); if (ret < 0) { PRINTF(cb, "ib_poll_cq failed %d\n", ret); goto err2; } if (ret == 1) { if (wc.status) { PRINTF(cb, "completion error %u\n", wc.status); goto err2; } count++; scnt--; } else if (krping_sigpending()) { PRINTF(cb, "signal!\n"); goto err2; } } while (ret == 1); } err2: #if 0 DEBUG_LOG(cb, "sleeping 1 second\n"); wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ); #endif DEBUG_LOG(cb, "draining the cq...\n"); do { ret = ib_poll_cq(cb->cq, 1, &wc); if (ret < 0) { PRINTF(cb, "ib_poll_cq failed %d\n", ret); break; } if (ret == 1) { if (wc.status) { PRINTF(cb, "completion error %u opcode %u\n", wc.status, wc.opcode); } } } while (ret == 1); DEBUG_LOG(cb, "fr_test: done!\n"); ib_dereg_mr(mr); err1: ib_free_fast_reg_page_list(pl); } static int krping_connect_client(struct krping_cb *cb) { struct rdma_conn_param conn_param; int ret; memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 1; conn_param.initiator_depth = 1; conn_param.retry_count = 10; ret = rdma_connect(cb->cm_id, &conn_param); if (ret) { PRINTF(cb, "rdma_connect error %d\n", ret); return ret; } wait_event_interruptible(cb->sem, cb->state >= CONNECTED); if (cb->state == ERROR) { PRINTF(cb, "wait for CONNECTED state %d\n", cb->state); return -1; } DEBUG_LOG(cb, "rdma_connect successful\n"); return 0; } static int krping_bind_client(struct krping_cb *cb) { struct sockaddr_in sin; int ret; memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof sin; sin.sin_family = AF_INET; sin.sin_addr.s_addr = cb->addr.s_addr; sin.sin_port = cb->port; ret = rdma_resolve_addr(cb->cm_id, NULL, (struct sockaddr *) &sin, 2000); if (ret) { PRINTF(cb, "rdma_resolve_addr error %d\n", ret); return ret; } wait_event_interruptible(cb->sem, cb->state >= ROUTE_RESOLVED); if (cb->state != ROUTE_RESOLVED) { PRINTF(cb, "addr/route resolution did not resolve: state %d\n", cb->state); return -EINTR; } if (cb->mem == FASTREG && !fastreg_supported(cb)) return -EINVAL; DEBUG_LOG(cb, "rdma_resolve_addr - rdma_resolve_route successful\n"); return 0; } static void krping_run_client(struct krping_cb *cb) { struct ib_recv_wr *bad_wr; int ret; ret = krping_bind_client(cb); if (ret) return; ret = krping_setup_qp(cb, cb->cm_id); if (ret) { PRINTF(cb, "setup_qp failed: %d\n", ret); return; } ret = krping_setup_buffers(cb); if (ret) { PRINTF(cb, "krping_setup_buffers failed: %d\n", ret); goto err1; } ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); if (ret) { PRINTF(cb, "ib_post_recv failed: %d\n", ret); goto err2; } ret = krping_connect_client(cb); if (ret) { PRINTF(cb, "connect error %d\n", ret); goto err2; } if (cb->wlat) krping_wlat_test_client(cb); else if (cb->rlat) krping_rlat_test_client(cb); else if (cb->bw) krping_bw_test_client(cb); else if (cb->frtest) krping_fr_test(cb); else krping_test_client(cb); rdma_disconnect(cb->cm_id); err2: krping_free_buffers(cb); err1: krping_free_qp(cb); } int krping_doit(char *cmd, void *cookie) { struct krping_cb *cb; int op; int ret = 0; char *optarg; unsigned long optint; cb = kzalloc(sizeof(*cb), GFP_KERNEL); if (!cb) return -ENOMEM; mutex_lock(&krping_mutex); list_add_tail(&cb->list, &krping_cbs); mutex_unlock(&krping_mutex); cb->cookie = cookie; cb->server = -1; cb->state = IDLE; cb->size = 64; cb->txdepth = RPING_SQ_DEPTH; cb->mem = DMA; init_waitqueue_head(&cb->sem); while ((op = krping_getopt("krping", &cmd, krping_opts, NULL, &optarg, &optint)) != 0) { switch (op) { case 'a': cb->addr_str = optarg; DEBUG_LOG(cb, "ipaddr (%s)\n", optarg); if (!inet_aton(optarg, &cb->addr)) { PRINTF(cb, "bad addr string %s\n", optarg); ret = EINVAL; } break; case 'p': cb->port = htons(optint); DEBUG_LOG(cb, "port %d\n", (int)optint); break; case 'P': cb->poll = 1; DEBUG_LOG(cb, "server\n"); break; case 's': cb->server = 1; DEBUG_LOG(cb, "server\n"); break; case 'c': cb->server = 0; DEBUG_LOG(cb, "client\n"); break; case 'S': cb->size = optint; if ((cb->size < 1) || (cb->size > RPING_BUFSIZE)) { PRINTF(cb, "Invalid size %d " "(valid range is 1 to %d)\n", cb->size, RPING_BUFSIZE); ret = EINVAL; } else DEBUG_LOG(cb, "size %d\n", (int)optint); break; case 'C': cb->count = optint; if (cb->count < 0) { PRINTF(cb, "Invalid count %d\n", cb->count); ret = EINVAL; } else DEBUG_LOG(cb, "count %d\n", (int) cb->count); break; case 'v': cb->verbose++; DEBUG_LOG(cb, "verbose\n"); break; case 'V': cb->validate++; DEBUG_LOG(cb, "validate data\n"); break; case 'l': cb->wlat++; break; case 'L': cb->rlat++; break; case 'B': cb->bw++; break; case 'd': cb->duplex++; break; case 'm': if (!strncmp(optarg, "dma", 3)) cb->mem = DMA; else if (!strncmp(optarg, "fastreg", 7)) cb->mem = FASTREG; else if (!strncmp(optarg, "mw", 2)) cb->mem = MW; else if (!strncmp(optarg, "mr", 2)) cb->mem = MR; else { PRINTF(cb, "unknown mem mode %s. " "Must be dma, fastreg, mw, or mr\n", optarg); ret = -EINVAL; break; } break; case 'I': cb->server_invalidate = 1; break; case 'T': cb->txdepth = optint; DEBUG_LOG(cb, "txdepth %d\n", (int) cb->txdepth); break; case 'Z': cb->local_dma_lkey = 1; DEBUG_LOG(cb, "using local dma lkey\n"); break; case 'R': cb->read_inv = 1; DEBUG_LOG(cb, "using read-with-inv\n"); break; case 'f': cb->frtest = 1; DEBUG_LOG(cb, "fast-reg test!\n"); break; default: PRINTF(cb, "unknown opt %s\n", optarg); ret = -EINVAL; break; } } if (ret) goto out; if (cb->server == -1) { PRINTF(cb, "must be either client or server\n"); ret = -EINVAL; goto out; } if (cb->server && cb->frtest) { PRINTF(cb, "must be client to run frtest\n"); ret = -EINVAL; goto out; } if ((cb->frtest + cb->bw + cb->rlat + cb->wlat) > 1) { PRINTF(cb, "Pick only one test: fr, bw, rlat, wlat\n"); ret = -EINVAL; goto out; } if (cb->server_invalidate && cb->mem != FASTREG) { PRINTF(cb, "server_invalidate only valid with fastreg mem_mode\n"); ret = -EINVAL; goto out; } if (cb->read_inv && cb->mem != FASTREG) { PRINTF(cb, "read_inv only valid with fastreg mem_mode\n"); ret = -EINVAL; goto out; } if (cb->mem != MR && (cb->wlat || cb->rlat || cb->bw)) { PRINTF(cb, "wlat, rlat, and bw tests only support mem_mode MR\n"); ret = -EINVAL; goto out; } cb->cm_id = rdma_create_id(krping_cma_event_handler, cb, RDMA_PS_TCP); if (IS_ERR(cb->cm_id)) { ret = PTR_ERR(cb->cm_id); PRINTF(cb, "rdma_create_id error %d\n", ret); goto out; } DEBUG_LOG(cb, "created cm_id %p\n", cb->cm_id); if (cb->server) krping_run_server(cb); else krping_run_client(cb); DEBUG_LOG(cb, "destroy cm_id %p\n", cb->cm_id); rdma_destroy_id(cb->cm_id); out: mutex_lock(&krping_mutex); list_del(&cb->list); mutex_unlock(&krping_mutex); kfree(cb); return ret; } void krping_walk_cb_list(void (*f)(struct krping_stats *, void *), void *arg) { struct krping_cb *cb; mutex_lock(&krping_mutex); list_for_each_entry(cb, &krping_cbs, list) (*f)(cb->pd ? &cb->stats : NULL, arg); mutex_unlock(&krping_mutex); } void krping_init(void) { mutex_init(&krping_mutex); } Index: stable/10/sys/dev/cxgb/cxgb_osdep.h =================================================================== --- stable/10/sys/dev/cxgb/cxgb_osdep.h (revision 271126) +++ stable/10/sys/dev/cxgb/cxgb_osdep.h (revision 271127) @@ -1,310 +1,308 @@ /************************************************************************** Copyright (c) 2007, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. $FreeBSD$ ***************************************************************************/ #include #include #include #include #include #include #include #include #ifndef _CXGB_OSDEP_H_ #define _CXGB_OSDEP_H_ typedef struct adapter adapter_t; typedef struct port_info pinfo_t; struct sge_rspq; enum { TP_TMR_RES = 200, /* TP timer resolution in usec */ MAX_NPORTS = 4, /* max # of ports */ TP_SRAM_OFFSET = 4096, /* TP SRAM content offset in eeprom */ TP_SRAM_LEN = 2112, /* TP SRAM content offset in eeprom */ }; struct t3_mbuf_hdr { struct mbuf *mh_head; struct mbuf *mh_tail; }; #ifndef PANIC_IF #define PANIC_IF(exp) do { \ if (exp) \ panic("BUG: %s", #exp); \ } while (0) #endif #if __FreeBSD_version < 800054 #if defined (__GNUC__) #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__ #define mb() __asm__ __volatile__ ("mfence;": : :"memory") #define wmb() __asm__ __volatile__ ("sfence;": : :"memory") #define rmb() __asm__ __volatile__ ("lfence;": : :"memory") #elif #cpu(sparc64) || defined sparc64 || defined __sparcv9 #define mb() __asm__ __volatile__ ("membar #MemIssue": : :"memory") #define wmb() mb() #define rmb() mb() #elif #cpu(sparc) || defined sparc || defined __sparc__ #define mb() __asm__ __volatile__ ("stbar;": : :"memory") #define wmb() mb() #define rmb() mb() #else #define wmb() mb() #define rmb() mb() #define mb() /* XXX just to make this compile */ #endif #else #error "unknown compiler" #endif #endif -#define __read_mostly __attribute__((__section__(".data.read_mostly"))) - /* * Workaround for weird Chelsio issue */ #if __FreeBSD_version > 700029 #define PRIV_SUPPORTED #endif #define CXGB_TX_CLEANUP_THRESHOLD 32 #define TX_MAX_SIZE (1 << 16) /* 64KB */ #define TX_MAX_SEGS 36 /* maximum supported by card */ #define TX_MAX_DESC 4 /* max descriptors per packet */ #define TX_START_MAX_DESC (TX_MAX_DESC << 2) /* maximum number of descriptors * call to start used per */ #define TX_CLEAN_MAX_DESC (TX_MAX_DESC << 4) /* maximum tx descriptors * to clean per iteration */ #define TX_WR_SIZE_MAX 11*1024 /* the maximum total size of packets aggregated into a single * TX WR */ #define TX_WR_COUNT_MAX 7 /* the maximum total number of packets that can be * aggregated into a single TX WR */ #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } #define smp_mb() mb() #define L1_CACHE_BYTES 128 extern void kdb_backtrace(void); #define WARN_ON(condition) do { \ if (__predict_false((condition)!=0)) { \ log(LOG_WARNING, "BUG: warning at %s:%d/%s()\n", __FILE__, __LINE__, __FUNCTION__); \ kdb_backtrace(); \ } \ } while (0) #else #define smp_mb() #define prefetch(x) #define L1_CACHE_BYTES 32 #endif #define DBG_RX (1 << 0) static const int debug_flags = DBG_RX; #ifdef DEBUG_PRINT #define DBG(flag, msg) do { \ if ((flag & debug_flags)) \ printf msg; \ } while (0) #else #define DBG(...) #endif #include #define promisc_rx_mode(rm) ((rm)->port->ifp->if_flags & IFF_PROMISC) #define allmulti_rx_mode(rm) ((rm)->port->ifp->if_flags & IFF_ALLMULTI) #define CH_ERR(adap, fmt, ...) log(LOG_ERR, fmt, ##__VA_ARGS__) #define CH_WARN(adap, fmt, ...) log(LOG_WARNING, fmt, ##__VA_ARGS__) #define CH_ALERT(adap, fmt, ...) log(LOG_ALERT, fmt, ##__VA_ARGS__) #define t3_os_sleep(x) DELAY((x) * 1000) #define test_and_clear_bit(bit, p) atomic_cmpset_int((p), ((*(p)) | (1< __FBSDID("$FreeBSD$"); #include "opt_inet.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include struct sge_iq; struct rss_header; #include #include "offload.h" #include "tom/t4_tom.h" #define TOEPCB(so) ((struct toepcb *)(so_sototcpcb((so))->t_toe)) #include "iw_cxgbe.h" #include #include #include #include #include #include static spinlock_t req_lock; static TAILQ_HEAD(c4iw_ep_list, c4iw_ep_common) req_list; static struct work_struct c4iw_task; static struct workqueue_struct *c4iw_taskq; static LIST_HEAD(timeout_list); static spinlock_t timeout_lock; static void process_req(struct work_struct *ctx); static void start_ep_timer(struct c4iw_ep *ep); static void stop_ep_timer(struct c4iw_ep *ep); static int set_tcpinfo(struct c4iw_ep *ep); static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc); static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); static void *alloc_ep(int size, gfp_t flags); void __free_ep(struct c4iw_ep_common *epc); static struct rtentry * find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, __be16 peer_port, u8 tos); static int close_socket(struct c4iw_ep_common *epc, int close); static int shutdown_socket(struct c4iw_ep_common *epc); static void abort_socket(struct c4iw_ep *ep); static void send_mpa_req(struct c4iw_ep *ep); static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen); static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen); static void close_complete_upcall(struct c4iw_ep *ep); static int abort_connection(struct c4iw_ep *ep); static void peer_close_upcall(struct c4iw_ep *ep); static void peer_abort_upcall(struct c4iw_ep *ep); static void connect_reply_upcall(struct c4iw_ep *ep, int status); static void connect_request_upcall(struct c4iw_ep *ep); static void established_upcall(struct c4iw_ep *ep); static void process_mpa_reply(struct c4iw_ep *ep); static void process_mpa_request(struct c4iw_ep *ep); static void process_peer_close(struct c4iw_ep *ep); static void process_conn_error(struct c4iw_ep *ep); static void process_close_complete(struct c4iw_ep *ep); static void ep_timeout(unsigned long arg); static void init_sock(struct c4iw_ep_common *epc); static void process_data(struct c4iw_ep *ep); static void process_connected(struct c4iw_ep *ep); static struct socket * dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct c4iw_ep *child_ep); static void process_newconn(struct c4iw_ep *parent_ep); static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag); static void process_socket_event(struct c4iw_ep *ep); static void release_ep_resources(struct c4iw_ep *ep); #define START_EP_TIMER(ep) \ do { \ CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \ __func__, __LINE__, (ep)); \ start_ep_timer(ep); \ } while (0) #define STOP_EP_TIMER(ep) \ do { \ CTR3(KTR_IW_CXGBE, "stop_ep_timer (%s:%d) ep %p", \ __func__, __LINE__, (ep)); \ stop_ep_timer(ep); \ } while (0) #ifdef KTR static char *states[] = { "idle", "listen", "connecting", "mpa_wait_req", "mpa_req_sent", "mpa_req_rcvd", "mpa_rep_sent", "fpdu_mode", "aborting", "closing", "moribund", "dead", NULL, }; #endif static void process_req(struct work_struct *ctx) { struct c4iw_ep_common *epc; spin_lock(&req_lock); while (!TAILQ_EMPTY(&req_list)) { epc = TAILQ_FIRST(&req_list); TAILQ_REMOVE(&req_list, epc, entry); epc->entry.tqe_prev = NULL; spin_unlock(&req_lock); if (epc->so) process_socket_event((struct c4iw_ep *)epc); c4iw_put_ep(epc); spin_lock(&req_lock); } spin_unlock(&req_lock); } /* * XXX: doesn't belong here in the iWARP driver. * XXX: assumes that the connection was offloaded by cxgbe/t4_tom if TF_TOE is * set. Is this a valid assumption for active open? */ static int set_tcpinfo(struct c4iw_ep *ep) { struct socket *so = ep->com.so; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; struct toepcb *toep; int rc = 0; INP_WLOCK(inp); tp = intotcpcb(inp); if ((tp->t_flags & TF_TOE) == 0) { rc = EINVAL; log(LOG_ERR, "%s: connection not offloaded (so %p, ep %p)\n", __func__, so, ep); goto done; } toep = TOEPCB(so); ep->hwtid = toep->tid; ep->snd_seq = tp->snd_nxt; ep->rcv_seq = tp->rcv_nxt; ep->emss = max(tp->t_maxseg, 128); done: INP_WUNLOCK(inp); return (rc); } static struct rtentry * find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, __be16 peer_port, u8 tos) { struct route iproute; struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst; CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port)); bzero(&iproute, sizeof iproute); dst->sin_family = AF_INET; dst->sin_len = sizeof *dst; dst->sin_addr.s_addr = peer_ip; rtalloc(&iproute); CTR2(KTR_IW_CXGBE, "%s:frtE %p", __func__, (uint64_t)iproute.ro_rt); return iproute.ro_rt; } static int close_socket(struct c4iw_ep_common *epc, int close) { struct socket *so = epc->so; int rc; CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s", __func__, epc, so, states[epc->state]); SOCK_LOCK(so); soupcall_clear(so, SO_RCV); SOCK_UNLOCK(so); if (close) rc = soclose(so); else rc = soshutdown(so, SHUT_WR | SHUT_RD); epc->so = NULL; return (rc); } static int shutdown_socket(struct c4iw_ep_common *epc) { CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s", __func__, epc->so, epc, states[epc->state]); return (soshutdown(epc->so, SHUT_WR)); } static void abort_socket(struct c4iw_ep *ep) { struct sockopt sopt; int rc; struct linger l; CTR4(KTR_IW_CXGBE, "%s ep %p so %p state %s", __func__, ep, ep->com.so, states[ep->com.state]); l.l_onoff = 1; l.l_linger = 0; /* linger_time of 0 forces RST to be sent */ sopt.sopt_dir = SOPT_SET; sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_LINGER; sopt.sopt_val = (caddr_t)&l; sopt.sopt_valsize = sizeof l; sopt.sopt_td = NULL; rc = sosetopt(ep->com.so, &sopt); if (rc) { log(LOG_ERR, "%s: can't set linger to 0, no RST! err %d\n", __func__, rc); } } static void process_peer_close(struct c4iw_ep *ep) { struct c4iw_qp_attributes attrs; int disconnect = 1; int release = 0; CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep, ep->com.so, states[ep->com.state]); mutex_lock(&ep->com.mutex); switch (ep->com.state) { case MPA_REQ_WAIT: CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING", __func__, ep); __state_set(&ep->com, CLOSING); break; case MPA_REQ_SENT: CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING", __func__, ep); __state_set(&ep->com, DEAD); connect_reply_upcall(ep, -ECONNABORTED); disconnect = 0; STOP_EP_TIMER(ep); close_socket(&ep->com, 0); ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; ep->com.qp = NULL; release = 1; break; case MPA_REQ_RCVD: /* * We're gonna mark this puppy DEAD, but keep * the reference on it until the ULP accepts or * rejects the CR. */ CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING", __func__, ep); __state_set(&ep->com, CLOSING); c4iw_get_ep(&ep->com); break; case MPA_REP_SENT: CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING", __func__, ep); __state_set(&ep->com, CLOSING); break; case FPDU_MODE: CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING", __func__, ep); START_EP_TIMER(ep); __state_set(&ep->com, CLOSING); attrs.next_state = C4IW_QP_STATE_CLOSING; c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); peer_close_upcall(ep); break; case ABORTING: CTR2(KTR_IW_CXGBE, "%s:ppc6 %p ABORTING (disconn)", __func__, ep); disconnect = 0; break; case CLOSING: CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND", __func__, ep); __state_set(&ep->com, MORIBUND); disconnect = 0; break; case MORIBUND: CTR2(KTR_IW_CXGBE, "%s:ppc8 %p MORIBUND DEAD", __func__, ep); STOP_EP_TIMER(ep); if (ep->com.cm_id && ep->com.qp) { attrs.next_state = C4IW_QP_STATE_IDLE; c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } close_socket(&ep->com, 0); close_complete_upcall(ep); __state_set(&ep->com, DEAD); release = 1; disconnect = 0; break; case DEAD: CTR2(KTR_IW_CXGBE, "%s:ppc9 %p DEAD (disconn)", __func__, ep); disconnect = 0; break; default: panic("%s: ep %p state %d", __func__, ep, ep->com.state); break; } mutex_unlock(&ep->com.mutex); if (disconnect) { CTR2(KTR_IW_CXGBE, "%s:ppca %p", __func__, ep); c4iw_ep_disconnect(ep, 0, M_NOWAIT); } if (release) { CTR2(KTR_IW_CXGBE, "%s:ppcb %p", __func__, ep); c4iw_put_ep(&ep->com); } CTR2(KTR_IW_CXGBE, "%s:ppcE %p", __func__, ep); return; } static void process_conn_error(struct c4iw_ep *ep) { struct c4iw_qp_attributes attrs; int ret; int state; state = state_read(&ep->com); CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s", __func__, ep, ep->com.so, ep->com.so->so_error, states[ep->com.state]); switch (state) { case MPA_REQ_WAIT: STOP_EP_TIMER(ep); break; case MPA_REQ_SENT: STOP_EP_TIMER(ep); connect_reply_upcall(ep, -ECONNRESET); break; case MPA_REP_SENT: ep->com.rpl_err = ECONNRESET; CTR1(KTR_IW_CXGBE, "waking up ep %p", ep); break; case MPA_REQ_RCVD: /* * We're gonna mark this puppy DEAD, but keep * the reference on it until the ULP accepts or * rejects the CR. */ c4iw_get_ep(&ep->com); break; case MORIBUND: case CLOSING: STOP_EP_TIMER(ep); /*FALLTHROUGH*/ case FPDU_MODE: if (ep->com.cm_id && ep->com.qp) { attrs.next_state = C4IW_QP_STATE_ERROR; ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); if (ret) log(LOG_ERR, "%s - qp <- error failed!\n", __func__); } peer_abort_upcall(ep); break; case ABORTING: break; case DEAD: CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!", __func__, ep->com.so->so_error); return; default: panic("%s: ep %p state %d", __func__, ep, state); break; } if (state != ABORTING) { CTR2(KTR_IW_CXGBE, "%s:pce1 %p", __func__, ep); close_socket(&ep->com, 0); state_set(&ep->com, DEAD); c4iw_put_ep(&ep->com); } CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep); return; } static void process_close_complete(struct c4iw_ep *ep) { struct c4iw_qp_attributes attrs; int release = 0; CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep, ep->com.so, states[ep->com.state]); /* The cm_id may be null if we failed to connect */ mutex_lock(&ep->com.mutex); switch (ep->com.state) { case CLOSING: CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND", __func__, ep); __state_set(&ep->com, MORIBUND); break; case MORIBUND: CTR2(KTR_IW_CXGBE, "%s:pcc1 %p MORIBUND DEAD", __func__, ep); STOP_EP_TIMER(ep); if ((ep->com.cm_id) && (ep->com.qp)) { CTR2(KTR_IW_CXGBE, "%s:pcc2 %p QP_STATE_IDLE", __func__, ep); attrs.next_state = C4IW_QP_STATE_IDLE; c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } if (ep->parent_ep) { CTR2(KTR_IW_CXGBE, "%s:pcc3 %p", __func__, ep); close_socket(&ep->com, 1); } else { CTR2(KTR_IW_CXGBE, "%s:pcc4 %p", __func__, ep); close_socket(&ep->com, 0); } close_complete_upcall(ep); __state_set(&ep->com, DEAD); release = 1; break; case ABORTING: CTR2(KTR_IW_CXGBE, "%s:pcc5 %p ABORTING", __func__, ep); break; case DEAD: default: CTR2(KTR_IW_CXGBE, "%s:pcc6 %p DEAD", __func__, ep); panic("%s:pcc6 %p DEAD", __func__, ep); break; } mutex_unlock(&ep->com.mutex); if (release) { CTR2(KTR_IW_CXGBE, "%s:pcc7 %p", __func__, ep); c4iw_put_ep(&ep->com); } CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep); return; } static void init_sock(struct c4iw_ep_common *epc) { int rc; struct sockopt sopt; struct socket *so = epc->so; int on = 1; SOCK_LOCK(so); soupcall_set(so, SO_RCV, c4iw_so_upcall, epc); so->so_state |= SS_NBIO; SOCK_UNLOCK(so); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = IPPROTO_TCP; sopt.sopt_name = TCP_NODELAY; sopt.sopt_val = (caddr_t)&on; sopt.sopt_valsize = sizeof on; sopt.sopt_td = NULL; rc = sosetopt(so, &sopt); if (rc) { log(LOG_ERR, "%s: can't set TCP_NODELAY on so %p (%d)\n", __func__, so, rc); } } static void process_data(struct c4iw_ep *ep) { struct sockaddr_in *local, *remote; CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sb_cc %d", __func__, ep->com.so, ep, states[ep->com.state], ep->com.so->so_rcv.sb_cc); switch (state_read(&ep->com)) { case MPA_REQ_SENT: process_mpa_reply(ep); break; case MPA_REQ_WAIT: in_getsockaddr(ep->com.so, (struct sockaddr **)&local); in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote); ep->com.local_addr = *local; ep->com.remote_addr = *remote; free(local, M_SONAME); free(remote, M_SONAME); process_mpa_request(ep); break; default: if (ep->com.so->so_rcv.sb_cc) log(LOG_ERR, "%s: Unexpected streaming data. " "ep %p, state %d, so %p, so_state 0x%x, sb_cc %u\n", __func__, ep, state_read(&ep->com), ep->com.so, ep->com.so->so_state, ep->com.so->so_rcv.sb_cc); break; } } static void process_connected(struct c4iw_ep *ep) { if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) send_mpa_req(ep); else { connect_reply_upcall(ep, -ep->com.so->so_error); close_socket(&ep->com, 0); state_set(&ep->com, DEAD); c4iw_put_ep(&ep->com); } } static struct socket * dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct c4iw_ep *child_ep) { struct socket *so; ACCEPT_LOCK(); so = TAILQ_FIRST(&head->so_comp); if (!so) { ACCEPT_UNLOCK(); return (NULL); } TAILQ_REMOVE(&head->so_comp, so, so_list); head->so_qlen--; SOCK_LOCK(so); so->so_qstate &= ~SQ_COMP; so->so_head = NULL; soref(so); soupcall_set(so, SO_RCV, c4iw_so_upcall, child_ep); so->so_state |= SS_NBIO; SOCK_UNLOCK(so); ACCEPT_UNLOCK(); soaccept(so, (struct sockaddr **)remote); return (so); } static void process_newconn(struct c4iw_ep *parent_ep) { struct socket *child_so; struct c4iw_ep *child_ep; struct sockaddr_in *remote; child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); if (!child_ep) { CTR3(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, ENOMEM", __func__, parent_ep->com.so, parent_ep); log(LOG_ERR, "%s: failed to allocate ep entry\n", __func__); return; } child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep); if (!child_so) { CTR4(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, child ep %p, dequeue err", __func__, parent_ep->com.so, parent_ep, child_ep); log(LOG_ERR, "%s: failed to dequeue child socket\n", __func__); __free_ep(&child_ep->com); return; } CTR5(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, child so %p, child ep %p", __func__, parent_ep->com.so, parent_ep, child_so, child_ep); child_ep->com.local_addr = parent_ep->com.local_addr; child_ep->com.remote_addr = *remote; child_ep->com.dev = parent_ep->com.dev; child_ep->com.so = child_so; child_ep->com.cm_id = NULL; child_ep->com.thread = parent_ep->com.thread; child_ep->parent_ep = parent_ep; free(remote, M_SONAME); c4iw_get_ep(&parent_ep->com); child_ep->parent_ep = parent_ep; init_timer(&child_ep->timer); state_set(&child_ep->com, MPA_REQ_WAIT); START_EP_TIMER(child_ep); /* maybe the request has already been queued up on the socket... */ process_mpa_request(child_ep); } static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag) { struct c4iw_ep *ep = arg; spin_lock(&req_lock); CTR6(KTR_IW_CXGBE, "%s: so %p, so_state 0x%x, ep %p, ep_state %s, tqe_prev %p", __func__, so, so->so_state, ep, states[ep->com.state], ep->com.entry.tqe_prev); if (ep && ep->com.so && !ep->com.entry.tqe_prev) { KASSERT(ep->com.so == so, ("%s: XXX review.", __func__)); c4iw_get_ep(&ep->com); TAILQ_INSERT_TAIL(&req_list, &ep->com, entry); queue_work(c4iw_taskq, &c4iw_task); } spin_unlock(&req_lock); return (SU_OK); } static void process_socket_event(struct c4iw_ep *ep) { int state = state_read(&ep->com); struct socket *so = ep->com.so; CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, " "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state, so->so_error, so->so_rcv.sb_state, ep, states[state]); if (state == CONNECTING) { process_connected(ep); return; } if (state == LISTEN) { process_newconn(ep); return; } /* connection error */ if (so->so_error) { process_conn_error(ep); return; } /* peer close */ if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) { process_peer_close(ep); return; } /* close complete */ if (so->so_state & SS_ISDISCONNECTED) { process_close_complete(ep); return; } /* rx data */ process_data(ep); } SYSCTL_NODE(_hw, OID_AUTO, iw_cxgbe, CTLFLAG_RD, 0, "iw_cxgbe driver parameters"); int db_delay_usecs = 1; TUNABLE_INT("hw.iw_cxgbe.db_delay_usecs", &db_delay_usecs); SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_delay_usecs, CTLFLAG_RW, &db_delay_usecs, 0, "Usecs to delay awaiting db fifo to drain"); static int dack_mode = 1; TUNABLE_INT("hw.iw_cxgbe.dack_mode", &dack_mode); SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, dack_mode, CTLFLAG_RW, &dack_mode, 0, "Delayed ack mode (default = 1)"); int c4iw_max_read_depth = 8; TUNABLE_INT("hw.iw_cxgbe.c4iw_max_read_depth", &c4iw_max_read_depth); SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_max_read_depth, CTLFLAG_RW, &c4iw_max_read_depth, 0, "Per-connection max ORD/IRD (default = 8)"); static int enable_tcp_timestamps; TUNABLE_INT("hw.iw_cxgbe.enable_tcp_timestamps", &enable_tcp_timestamps); SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_timestamps, CTLFLAG_RW, &enable_tcp_timestamps, 0, "Enable tcp timestamps (default = 0)"); static int enable_tcp_sack; TUNABLE_INT("hw.iw_cxgbe.enable_tcp_sack", &enable_tcp_sack); SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_sack, CTLFLAG_RW, &enable_tcp_sack, 0, "Enable tcp SACK (default = 0)"); static int enable_tcp_window_scaling = 1; TUNABLE_INT("hw.iw_cxgbe.enable_tcp_window_scaling", &enable_tcp_window_scaling); SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_window_scaling, CTLFLAG_RW, &enable_tcp_window_scaling, 0, "Enable tcp window scaling (default = 1)"); int c4iw_debug = 1; TUNABLE_INT("hw.iw_cxgbe.c4iw_debug", &c4iw_debug); SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_debug, CTLFLAG_RW, &c4iw_debug, 0, "Enable debug logging (default = 0)"); static int peer2peer; TUNABLE_INT("hw.iw_cxgbe.peer2peer", &peer2peer); SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, peer2peer, CTLFLAG_RW, &peer2peer, 0, "Support peer2peer ULPs (default = 0)"); static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; TUNABLE_INT("hw.iw_cxgbe.p2p_type", &p2p_type); SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, p2p_type, CTLFLAG_RW, &p2p_type, 0, "RDMAP opcode to use for the RTR message: 1 = RDMA_READ 0 = RDMA_WRITE (default 1)"); static int ep_timeout_secs = 60; TUNABLE_INT("hw.iw_cxgbe.ep_timeout_secs", &ep_timeout_secs); SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, ep_timeout_secs, CTLFLAG_RW, &ep_timeout_secs, 0, "CM Endpoint operation timeout in seconds (default = 60)"); static int mpa_rev = 1; TUNABLE_INT("hw.iw_cxgbe.mpa_rev", &mpa_rev); #ifdef IW_CM_MPAV2 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RW, &mpa_rev, 0, "MPA Revision, 0 supports amso1100, 1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft compliant (default = 1)"); #else SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RW, &mpa_rev, 0, "MPA Revision, 0 supports amso1100, 1 is RFC0544 spec compliant (default = 1)"); #endif static int markers_enabled; TUNABLE_INT("hw.iw_cxgbe.markers_enabled", &markers_enabled); SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, markers_enabled, CTLFLAG_RW, &markers_enabled, 0, "Enable MPA MARKERS (default(0) = disabled)"); static int crc_enabled = 1; TUNABLE_INT("hw.iw_cxgbe.crc_enabled", &crc_enabled); SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, crc_enabled, CTLFLAG_RW, &crc_enabled, 0, "Enable MPA CRC (default(1) = enabled)"); static int rcv_win = 256 * 1024; TUNABLE_INT("hw.iw_cxgbe.rcv_win", &rcv_win); SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, rcv_win, CTLFLAG_RW, &rcv_win, 0, "TCP receive window in bytes (default = 256KB)"); static int snd_win = 128 * 1024; TUNABLE_INT("hw.iw_cxgbe.snd_win", &snd_win); SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, snd_win, CTLFLAG_RW, &snd_win, 0, "TCP send window in bytes (default = 128KB)"); int db_fc_threshold = 2000; TUNABLE_INT("hw.iw_cxgbe.db_fc_threshold", &db_fc_threshold); SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_fc_threshold, CTLFLAG_RW, &db_fc_threshold, 0, "QP count/threshold that triggers automatic"); static void start_ep_timer(struct c4iw_ep *ep) { if (timer_pending(&ep->timer)) { CTR2(KTR_IW_CXGBE, "%s: ep %p, already started", __func__, ep); printk(KERN_ERR "%s timer already started! ep %p\n", __func__, ep); return; } clear_bit(TIMEOUT, &ep->com.flags); c4iw_get_ep(&ep->com); ep->timer.expires = jiffies + ep_timeout_secs * HZ; ep->timer.data = (unsigned long)ep; ep->timer.function = ep_timeout; add_timer(&ep->timer); } static void stop_ep_timer(struct c4iw_ep *ep) { del_timer_sync(&ep->timer); if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { c4iw_put_ep(&ep->com); } } static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc) { enum c4iw_ep_state state; mutex_lock(&epc->mutex); state = epc->state; mutex_unlock(&epc->mutex); return (state); } static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) { epc->state = new; } static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) { mutex_lock(&epc->mutex); __state_set(epc, new); mutex_unlock(&epc->mutex); } static void * alloc_ep(int size, gfp_t gfp) { struct c4iw_ep_common *epc; epc = kzalloc(size, gfp); if (epc == NULL) return (NULL); kref_init(&epc->kref); mutex_init(&epc->mutex); c4iw_init_wr_wait(&epc->wr_wait); return (epc); } void __free_ep(struct c4iw_ep_common *epc) { CTR2(KTR_IW_CXGBE, "%s:feB %p", __func__, epc); KASSERT(!epc->so, ("%s warning ep->so %p \n", __func__, epc->so)); KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __func__, epc)); free(epc, M_DEVBUF); CTR2(KTR_IW_CXGBE, "%s:feE %p", __func__, epc); } void _c4iw_free_ep(struct kref *kref) { struct c4iw_ep *ep; struct c4iw_ep_common *epc; ep = container_of(kref, struct c4iw_ep, com.kref); epc = &ep->com; KASSERT(!epc->so, ("%s ep->so %p", __func__, epc->so)); KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list", __func__, epc)); kfree(ep); } static void release_ep_resources(struct c4iw_ep *ep) { CTR2(KTR_IW_CXGBE, "%s:rerB %p", __func__, ep); set_bit(RELEASE_RESOURCES, &ep->com.flags); c4iw_put_ep(&ep->com); CTR2(KTR_IW_CXGBE, "%s:rerE %p", __func__, ep); } static void send_mpa_req(struct c4iw_ep *ep) { int mpalen; struct mpa_message *mpa; struct mpa_v2_conn_params mpa_v2_params; struct mbuf *m; char mpa_rev_to_use = mpa_rev; int err; if (ep->retry_with_mpa_v1) mpa_rev_to_use = 1; mpalen = sizeof(*mpa) + ep->plen; if (mpa_rev_to_use == 2) mpalen += sizeof(struct mpa_v2_conn_params); if (mpalen > MHLEN) CXGBE_UNIMPLEMENTED(__func__); m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { connect_reply_upcall(ep, -ENOMEM); return; } mpa = mtod(m, struct mpa_message *); m->m_len = mpalen; m->m_pkthdr.len = mpalen; memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); mpa->flags = (crc_enabled ? MPA_CRC : 0) | (markers_enabled ? MPA_MARKERS : 0) | (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0); mpa->private_data_size = htons(ep->plen); mpa->revision = mpa_rev_to_use; if (mpa_rev_to_use == 1) { ep->tried_with_mpa_v1 = 1; ep->retry_with_mpa_v1 = 0; } if (mpa_rev_to_use == 2) { mpa->private_data_size += htons(sizeof(struct mpa_v2_conn_params)); mpa_v2_params.ird = htons((u16)ep->ird); mpa_v2_params.ord = htons((u16)ep->ord); if (peer2peer) { mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) { mpa_v2_params.ord |= htons(MPA_V2_RDMA_WRITE_RTR); } else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) { mpa_v2_params.ord |= htons(MPA_V2_RDMA_READ_RTR); } } memcpy(mpa->private_data, &mpa_v2_params, sizeof(struct mpa_v2_conn_params)); if (ep->plen) { memcpy(mpa->private_data + sizeof(struct mpa_v2_conn_params), ep->mpa_pkt + sizeof(*mpa), ep->plen); } } else { if (ep->plen) memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); CTR2(KTR_IW_CXGBE, "%s:smr7 %p", __func__, ep); } err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); if (err) { connect_reply_upcall(ep, -ENOMEM); return; } START_EP_TIMER(ep); state_set(&ep->com, MPA_REQ_SENT); ep->mpa_attr.initiator = 1; } static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) { int mpalen ; struct mpa_message *mpa; struct mpa_v2_conn_params mpa_v2_params; struct mbuf *m; int err; CTR4(KTR_IW_CXGBE, "%s:smrejB %p %u %d", __func__, ep, ep->hwtid, ep->plen); mpalen = sizeof(*mpa) + plen; if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { mpalen += sizeof(struct mpa_v2_conn_params); CTR4(KTR_IW_CXGBE, "%s:smrej1 %p %u %d", __func__, ep, ep->mpa_attr.version, mpalen); } if (mpalen > MHLEN) CXGBE_UNIMPLEMENTED(__func__); m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { printf("%s - cannot alloc mbuf!\n", __func__); CTR2(KTR_IW_CXGBE, "%s:smrej2 %p", __func__, ep); return (-ENOMEM); } mpa = mtod(m, struct mpa_message *); m->m_len = mpalen; m->m_pkthdr.len = mpalen; memset(mpa, 0, sizeof(*mpa)); memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); mpa->flags = MPA_REJECT; mpa->revision = mpa_rev; mpa->private_data_size = htons(plen); if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { mpa->flags |= MPA_ENHANCED_RDMA_CONN; mpa->private_data_size += htons(sizeof(struct mpa_v2_conn_params)); mpa_v2_params.ird = htons(((u16)ep->ird) | (peer2peer ? MPA_V2_PEER2PEER_MODEL : 0)); mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ? (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE ? MPA_V2_RDMA_WRITE_RTR : p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ ? MPA_V2_RDMA_READ_RTR : 0) : 0)); memcpy(mpa->private_data, &mpa_v2_params, sizeof(struct mpa_v2_conn_params)); if (ep->plen) memcpy(mpa->private_data + sizeof(struct mpa_v2_conn_params), pdata, plen); CTR5(KTR_IW_CXGBE, "%s:smrej3 %p %d %d %d", __func__, ep, mpa_v2_params.ird, mpa_v2_params.ord, ep->plen); } else if (plen) memcpy(mpa->private_data, pdata, plen); err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); if (!err) ep->snd_seq += mpalen; CTR4(KTR_IW_CXGBE, "%s:smrejE %p %u %d", __func__, ep, ep->hwtid, err); return err; } static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) { int mpalen; struct mpa_message *mpa; struct mbuf *m; struct mpa_v2_conn_params mpa_v2_params; int err; CTR2(KTR_IW_CXGBE, "%s:smrepB %p", __func__, ep); mpalen = sizeof(*mpa) + plen; if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { CTR3(KTR_IW_CXGBE, "%s:smrep1 %p %d", __func__, ep, ep->mpa_attr.version); mpalen += sizeof(struct mpa_v2_conn_params); } if (mpalen > MHLEN) CXGBE_UNIMPLEMENTED(__func__); m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { CTR2(KTR_IW_CXGBE, "%s:smrep2 %p", __func__, ep); printf("%s - cannot alloc mbuf!\n", __func__); return (-ENOMEM); } mpa = mtod(m, struct mpa_message *); m->m_len = mpalen; m->m_pkthdr.len = mpalen; memset(mpa, 0, sizeof(*mpa)); memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | (markers_enabled ? MPA_MARKERS : 0); mpa->revision = ep->mpa_attr.version; mpa->private_data_size = htons(plen); if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { mpa->flags |= MPA_ENHANCED_RDMA_CONN; mpa->private_data_size += htons(sizeof(struct mpa_v2_conn_params)); mpa_v2_params.ird = htons((u16)ep->ird); mpa_v2_params.ord = htons((u16)ep->ord); CTR5(KTR_IW_CXGBE, "%s:smrep3 %p %d %d %d", __func__, ep, ep->mpa_attr.version, mpa_v2_params.ird, mpa_v2_params.ord); if (peer2peer && (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED)) { mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) { mpa_v2_params.ord |= htons(MPA_V2_RDMA_WRITE_RTR); CTR5(KTR_IW_CXGBE, "%s:smrep4 %p %d %d %d", __func__, ep, p2p_type, mpa_v2_params.ird, mpa_v2_params.ord); } else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) { mpa_v2_params.ord |= htons(MPA_V2_RDMA_READ_RTR); CTR5(KTR_IW_CXGBE, "%s:smrep5 %p %d %d %d", __func__, ep, p2p_type, mpa_v2_params.ird, mpa_v2_params.ord); } } memcpy(mpa->private_data, &mpa_v2_params, sizeof(struct mpa_v2_conn_params)); if (ep->plen) memcpy(mpa->private_data + sizeof(struct mpa_v2_conn_params), pdata, plen); } else if (plen) memcpy(mpa->private_data, pdata, plen); state_set(&ep->com, MPA_REP_SENT); ep->snd_seq += mpalen; err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); CTR3(KTR_IW_CXGBE, "%s:smrepE %p %d", __func__, ep, err); return err; } static void close_complete_upcall(struct c4iw_ep *ep) { struct iw_cm_event event; CTR2(KTR_IW_CXGBE, "%s:ccuB %p", __func__, ep); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CLOSE; if (ep->com.cm_id) { CTR2(KTR_IW_CXGBE, "%s:ccu1 %1", __func__, ep); ep->com.cm_id->event_handler(ep->com.cm_id, &event); ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; ep->com.qp = NULL; set_bit(CLOSE_UPCALL, &ep->com.history); } CTR2(KTR_IW_CXGBE, "%s:ccuE %p", __func__, ep); } static int abort_connection(struct c4iw_ep *ep) { int err; CTR2(KTR_IW_CXGBE, "%s:abB %p", __func__, ep); close_complete_upcall(ep); state_set(&ep->com, ABORTING); abort_socket(ep); err = close_socket(&ep->com, 0); set_bit(ABORT_CONN, &ep->com.history); CTR2(KTR_IW_CXGBE, "%s:abE %p", __func__, ep); return err; } static void peer_close_upcall(struct c4iw_ep *ep) { struct iw_cm_event event; CTR2(KTR_IW_CXGBE, "%s:pcuB %p", __func__, ep); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_DISCONNECT; if (ep->com.cm_id) { CTR2(KTR_IW_CXGBE, "%s:pcu1 %p", __func__, ep); ep->com.cm_id->event_handler(ep->com.cm_id, &event); set_bit(DISCONN_UPCALL, &ep->com.history); } CTR2(KTR_IW_CXGBE, "%s:pcuE %p", __func__, ep); } static void peer_abort_upcall(struct c4iw_ep *ep) { struct iw_cm_event event; CTR2(KTR_IW_CXGBE, "%s:pauB %p", __func__, ep); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CLOSE; event.status = -ECONNRESET; if (ep->com.cm_id) { CTR2(KTR_IW_CXGBE, "%s:pau1 %p", __func__, ep); ep->com.cm_id->event_handler(ep->com.cm_id, &event); ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; ep->com.qp = NULL; set_bit(ABORT_UPCALL, &ep->com.history); } CTR2(KTR_IW_CXGBE, "%s:pauE %p", __func__, ep); } static void connect_reply_upcall(struct c4iw_ep *ep, int status) { struct iw_cm_event event; CTR3(KTR_IW_CXGBE, "%s:cruB %p", __func__, ep, status); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REPLY; event.status = (status ==-ECONNABORTED)?-ECONNRESET: status; event.local_addr = ep->com.local_addr; event.remote_addr = ep->com.remote_addr; if ((status == 0) || (status == -ECONNREFUSED)) { if (!ep->tried_with_mpa_v1) { CTR2(KTR_IW_CXGBE, "%s:cru1 %p", __func__, ep); /* this means MPA_v2 is used */ event.private_data_len = ep->plen - sizeof(struct mpa_v2_conn_params); event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) + sizeof(struct mpa_v2_conn_params); } else { CTR2(KTR_IW_CXGBE, "%s:cru2 %p", __func__, ep); /* this means MPA_v1 is used */ event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); } } if (ep->com.cm_id) { CTR2(KTR_IW_CXGBE, "%s:cru3 %p", __func__, ep); set_bit(CONN_RPL_UPCALL, &ep->com.history); ep->com.cm_id->event_handler(ep->com.cm_id, &event); } if(status == -ECONNABORTED) { CTR3(KTR_IW_CXGBE, "%s:cruE %p %d", __func__, ep, status); return; } if (status < 0) { CTR3(KTR_IW_CXGBE, "%s:cru4 %p %d", __func__, ep, status); ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; ep->com.qp = NULL; } CTR2(KTR_IW_CXGBE, "%s:cruE %p", __func__, ep); } static void connect_request_upcall(struct c4iw_ep *ep) { struct iw_cm_event event; CTR3(KTR_IW_CXGBE, "%s: ep %p, mpa_v1 %d", __func__, ep, ep->tried_with_mpa_v1); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REQUEST; event.local_addr = ep->com.local_addr; event.remote_addr = ep->com.remote_addr; event.provider_data = ep; event.so = ep->com.so; if (!ep->tried_with_mpa_v1) { /* this means MPA_v2 is used */ #ifdef IW_CM_MPAV2 event.ord = ep->ord; event.ird = ep->ird; #endif event.private_data_len = ep->plen - sizeof(struct mpa_v2_conn_params); event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) + sizeof(struct mpa_v2_conn_params); } else { /* this means MPA_v1 is used. Send max supported */ #ifdef IW_CM_MPAV2 event.ord = c4iw_max_read_depth; event.ird = c4iw_max_read_depth; #endif event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); } c4iw_get_ep(&ep->com); ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id, &event); set_bit(CONNREQ_UPCALL, &ep->com.history); c4iw_put_ep(&ep->parent_ep->com); } static void established_upcall(struct c4iw_ep *ep) { struct iw_cm_event event; CTR2(KTR_IW_CXGBE, "%s:euB %p", __func__, ep); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_ESTABLISHED; #ifdef IW_CM_MPAV2 event.ird = ep->ird; event.ord = ep->ord; #endif if (ep->com.cm_id) { CTR2(KTR_IW_CXGBE, "%s:eu1 %p", __func__, ep); ep->com.cm_id->event_handler(ep->com.cm_id, &event); set_bit(ESTAB_UPCALL, &ep->com.history); } CTR2(KTR_IW_CXGBE, "%s:euE %p", __func__, ep); } static void process_mpa_reply(struct c4iw_ep *ep) { struct mpa_message *mpa; struct mpa_v2_conn_params *mpa_v2_params; u16 plen; u16 resp_ird, resp_ord; u8 rtr_mismatch = 0, insuff_ird = 0; struct c4iw_qp_attributes attrs; enum c4iw_qp_attr_mask mask; int err; struct mbuf *top, *m; int flags = MSG_DONTWAIT; struct uio uio; CTR2(KTR_IW_CXGBE, "%s:pmrB %p", __func__, ep); /* * Stop mpa timer. If it expired, then the state has * changed and we bail since ep_timeout already aborted * the connection. */ STOP_EP_TIMER(ep); if (state_read(&ep->com) != MPA_REQ_SENT) return; uio.uio_resid = 1000000; uio.uio_td = ep->com.thread; err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); if (err) { if (err == EWOULDBLOCK) { CTR2(KTR_IW_CXGBE, "%s:pmr1 %p", __func__, ep); START_EP_TIMER(ep); return; } err = -err; CTR2(KTR_IW_CXGBE, "%s:pmr2 %p", __func__, ep); goto err; } if (ep->com.so->so_rcv.sb_mb) { CTR2(KTR_IW_CXGBE, "%s:pmr3 %p", __func__, ep); printf("%s data after soreceive called! so %p sb_mb %p top %p\n", __func__, ep->com.so, ep->com.so->so_rcv.sb_mb, top); } m = top; do { CTR2(KTR_IW_CXGBE, "%s:pmr4 %p", __func__, ep); /* * If we get more than the supported amount of private data * then we must fail this connection. */ if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { CTR3(KTR_IW_CXGBE, "%s:pmr5 %p %d", __func__, ep, ep->mpa_pkt_len + m->m_len); err = (-EINVAL); goto err; } /* * copy the new data into our accumulation buffer. */ m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); ep->mpa_pkt_len += m->m_len; if (!m->m_next) m = m->m_nextpkt; else m = m->m_next; } while (m); m_freem(top); /* * if we don't even have the mpa message, then bail. */ if (ep->mpa_pkt_len < sizeof(*mpa)) return; mpa = (struct mpa_message *) ep->mpa_pkt; /* Validate MPA header. */ if (mpa->revision > mpa_rev) { CTR4(KTR_IW_CXGBE, "%s:pmr6 %p %d %d", __func__, ep, mpa->revision, mpa_rev); printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d, " " Received = %d\n", __func__, mpa_rev, mpa->revision); err = -EPROTO; goto err; } if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { CTR2(KTR_IW_CXGBE, "%s:pmr7 %p", __func__, ep); err = -EPROTO; goto err; } plen = ntohs(mpa->private_data_size); /* * Fail if there's too much private data. */ if (plen > MPA_MAX_PRIVATE_DATA) { CTR2(KTR_IW_CXGBE, "%s:pmr8 %p", __func__, ep); err = -EPROTO; goto err; } /* * If plen does not account for pkt size */ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { CTR2(KTR_IW_CXGBE, "%s:pmr9 %p", __func__, ep); err = -EPROTO; goto err; } ep->plen = (u8) plen; /* * If we don't have all the pdata yet, then bail. * We'll continue process when more data arrives. */ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) { CTR2(KTR_IW_CXGBE, "%s:pmra %p", __func__, ep); return; } if (mpa->flags & MPA_REJECT) { CTR2(KTR_IW_CXGBE, "%s:pmrb %p", __func__, ep); err = -ECONNREFUSED; goto err; } /* * If we get here we have accumulated the entire mpa * start reply message including private data. And * the MPA header is valid. */ state_set(&ep->com, FPDU_MODE); ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; ep->mpa_attr.version = mpa->revision; ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; if (mpa->revision == 2) { CTR2(KTR_IW_CXGBE, "%s:pmrc %p", __func__, ep); ep->mpa_attr.enhanced_rdma_conn = mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; if (ep->mpa_attr.enhanced_rdma_conn) { CTR2(KTR_IW_CXGBE, "%s:pmrd %p", __func__, ep); mpa_v2_params = (struct mpa_v2_conn_params *) (ep->mpa_pkt + sizeof(*mpa)); resp_ird = ntohs(mpa_v2_params->ird) & MPA_V2_IRD_ORD_MASK; resp_ord = ntohs(mpa_v2_params->ord) & MPA_V2_IRD_ORD_MASK; /* * This is a double-check. Ideally, below checks are * not required since ird/ord stuff has been taken * care of in c4iw_accept_cr */ if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) { CTR2(KTR_IW_CXGBE, "%s:pmre %p", __func__, ep); err = -ENOMEM; ep->ird = resp_ord; ep->ord = resp_ird; insuff_ird = 1; } if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL) { CTR2(KTR_IW_CXGBE, "%s:pmrf %p", __func__, ep); if (ntohs(mpa_v2_params->ord) & MPA_V2_RDMA_WRITE_RTR) { CTR2(KTR_IW_CXGBE, "%s:pmrg %p", __func__, ep); ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_RDMA_WRITE; } else if (ntohs(mpa_v2_params->ord) & MPA_V2_RDMA_READ_RTR) { CTR2(KTR_IW_CXGBE, "%s:pmrh %p", __func__, ep); ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; } } } } else { CTR2(KTR_IW_CXGBE, "%s:pmri %p", __func__, ep); if (mpa->revision == 1) { CTR2(KTR_IW_CXGBE, "%s:pmrj %p", __func__, ep); if (peer2peer) { CTR2(KTR_IW_CXGBE, "%s:pmrk %p", __func__, ep); ep->mpa_attr.p2p_type = p2p_type; } } } if (set_tcpinfo(ep)) { CTR2(KTR_IW_CXGBE, "%s:pmrl %p", __func__, ep); printf("%s set_tcpinfo error\n", __func__); goto err; } CTR6(KTR_IW_CXGBE, "%s - crc_enabled = %d, recv_marker_enabled = %d, " "xmit_marker_enabled = %d, version = %d p2p_type = %d", __func__, ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, ep->mpa_attr.p2p_type); /* * If responder's RTR does not match with that of initiator, assign * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not * generated when moving QP to RTS state. * A TERM message will be sent after QP has moved to RTS state */ if ((ep->mpa_attr.version == 2) && peer2peer && (ep->mpa_attr.p2p_type != p2p_type)) { CTR2(KTR_IW_CXGBE, "%s:pmrm %p", __func__, ep); ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; rtr_mismatch = 1; } //ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq; attrs.mpa_attr = ep->mpa_attr; attrs.max_ird = ep->ird; attrs.max_ord = ep->ord; attrs.llp_stream_handle = ep; attrs.next_state = C4IW_QP_STATE_RTS; mask = C4IW_QP_ATTR_NEXT_STATE | C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR | C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD; /* bind QP and TID with INIT_WR */ err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); if (err) { CTR2(KTR_IW_CXGBE, "%s:pmrn %p", __func__, ep); goto err; } /* * If responder's RTR requirement did not match with what initiator * supports, generate TERM message */ if (rtr_mismatch) { CTR2(KTR_IW_CXGBE, "%s:pmro %p", __func__, ep); printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__); attrs.layer_etype = LAYER_MPA | DDP_LLP; attrs.ecode = MPA_NOMATCH_RTR; attrs.next_state = C4IW_QP_STATE_TERMINATE; err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); err = -ENOMEM; goto out; } /* * Generate TERM if initiator IRD is not sufficient for responder * provided ORD. Currently, we do the same behaviour even when * responder provided IRD is also not sufficient as regards to * initiator ORD. */ if (insuff_ird) { CTR2(KTR_IW_CXGBE, "%s:pmrp %p", __func__, ep); printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n", __func__); attrs.layer_etype = LAYER_MPA | DDP_LLP; attrs.ecode = MPA_INSUFF_IRD; attrs.next_state = C4IW_QP_STATE_TERMINATE; err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); err = -ENOMEM; goto out; } goto out; err: state_set(&ep->com, ABORTING); abort_connection(ep); out: connect_reply_upcall(ep, err); CTR2(KTR_IW_CXGBE, "%s:pmrE %p", __func__, ep); return; } static void process_mpa_request(struct c4iw_ep *ep) { struct mpa_message *mpa; u16 plen; int flags = MSG_DONTWAIT; int rc; struct iovec iov; struct uio uio; enum c4iw_ep_state state = state_read(&ep->com); CTR3(KTR_IW_CXGBE, "%s: ep %p, state %s", __func__, ep, states[state]); if (state != MPA_REQ_WAIT) return; iov.iov_base = &ep->mpa_pkt[ep->mpa_pkt_len]; iov.iov_len = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = 0; uio.uio_resid = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = NULL; /* uio.uio_td = ep->com.thread; */ rc = soreceive(ep->com.so, NULL, &uio, NULL, NULL, &flags); if (rc == EAGAIN) return; else if (rc) { abort: STOP_EP_TIMER(ep); abort_connection(ep); return; } KASSERT(uio.uio_offset > 0, ("%s: sorecieve on so %p read no data", __func__, ep->com.so)); ep->mpa_pkt_len += uio.uio_offset; /* * If we get more than the supported amount of private data then we must * fail this connection. XXX: check so_rcv->sb_cc, or peek with another * soreceive, or increase the size of mpa_pkt by 1 and abort if the last * byte is filled by the soreceive above. */ /* Don't even have the MPA message. Wait for more data to arrive. */ if (ep->mpa_pkt_len < sizeof(*mpa)) return; mpa = (struct mpa_message *) ep->mpa_pkt; /* * Validate MPA Header. */ if (mpa->revision > mpa_rev) { log(LOG_ERR, "%s: MPA version mismatch. Local = %d," " Received = %d\n", __func__, mpa_rev, mpa->revision); goto abort; } if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) goto abort; /* * Fail if there's too much private data. */ plen = ntohs(mpa->private_data_size); if (plen > MPA_MAX_PRIVATE_DATA) goto abort; /* * If plen does not account for pkt size */ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) goto abort; ep->plen = (u8) plen; /* * If we don't have all the pdata yet, then bail. */ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) return; /* * If we get here we have accumulated the entire mpa * start reply message including private data. */ ep->mpa_attr.initiator = 0; ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; ep->mpa_attr.version = mpa->revision; if (mpa->revision == 1) ep->tried_with_mpa_v1 = 1; ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; if (mpa->revision == 2) { ep->mpa_attr.enhanced_rdma_conn = mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; if (ep->mpa_attr.enhanced_rdma_conn) { struct mpa_v2_conn_params *mpa_v2_params; u16 ird, ord; mpa_v2_params = (void *)&ep->mpa_pkt[sizeof(*mpa)]; ird = ntohs(mpa_v2_params->ird); ord = ntohs(mpa_v2_params->ord); ep->ird = ird & MPA_V2_IRD_ORD_MASK; ep->ord = ord & MPA_V2_IRD_ORD_MASK; if (ird & MPA_V2_PEER2PEER_MODEL && peer2peer) { if (ord & MPA_V2_RDMA_WRITE_RTR) { ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_RDMA_WRITE; } else if (ord & MPA_V2_RDMA_READ_RTR) { ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; } } } } else if (mpa->revision == 1 && peer2peer) ep->mpa_attr.p2p_type = p2p_type; if (set_tcpinfo(ep)) goto abort; CTR5(KTR_IW_CXGBE, "%s: crc_enabled = %d, recv_marker_enabled = %d, " "xmit_marker_enabled = %d, version = %d", __func__, ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); state_set(&ep->com, MPA_REQ_RCVD); STOP_EP_TIMER(ep); /* drive upcall */ mutex_lock(&ep->parent_ep->com.mutex); if (ep->parent_ep->com.state != DEAD) connect_request_upcall(ep); else abort_connection(ep); mutex_unlock(&ep->parent_ep->com.mutex); } /* * Upcall from the adapter indicating data has been transmitted. * For us its just the single MPA request or reply. We can now free * the skb holding the mpa message. */ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { int err; struct c4iw_ep *ep = to_ep(cm_id); CTR2(KTR_IW_CXGBE, "%s:crcB %p", __func__, ep); if (state_read(&ep->com) == DEAD) { CTR2(KTR_IW_CXGBE, "%s:crc1 %p", __func__, ep); c4iw_put_ep(&ep->com); return -ECONNRESET; } set_bit(ULP_REJECT, &ep->com.history); BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); if (mpa_rev == 0) { CTR2(KTR_IW_CXGBE, "%s:crc2 %p", __func__, ep); abort_connection(ep); } else { CTR2(KTR_IW_CXGBE, "%s:crc3 %p", __func__, ep); err = send_mpa_reject(ep, pdata, pdata_len); err = soshutdown(ep->com.so, 3); } c4iw_put_ep(&ep->com); CTR2(KTR_IW_CXGBE, "%s:crc4 %p", __func__, ep); return 0; } int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { int err; struct c4iw_qp_attributes attrs; enum c4iw_qp_attr_mask mask; struct c4iw_ep *ep = to_ep(cm_id); struct c4iw_dev *h = to_c4iw_dev(cm_id->device); struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); CTR2(KTR_IW_CXGBE, "%s:cacB %p", __func__, ep); if (state_read(&ep->com) == DEAD) { CTR2(KTR_IW_CXGBE, "%s:cac1 %p", __func__, ep); err = -ECONNRESET; goto err; } BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); BUG_ON(!qp); set_bit(ULP_ACCEPT, &ep->com.history); if ((conn_param->ord > c4iw_max_read_depth) || (conn_param->ird > c4iw_max_read_depth)) { CTR2(KTR_IW_CXGBE, "%s:cac2 %p", __func__, ep); abort_connection(ep); err = -EINVAL; goto err; } if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { CTR2(KTR_IW_CXGBE, "%s:cac3 %p", __func__, ep); if (conn_param->ord > ep->ird) { CTR2(KTR_IW_CXGBE, "%s:cac4 %p", __func__, ep); ep->ird = conn_param->ird; ep->ord = conn_param->ord; send_mpa_reject(ep, conn_param->private_data, conn_param->private_data_len); abort_connection(ep); err = -ENOMEM; goto err; } if (conn_param->ird > ep->ord) { CTR2(KTR_IW_CXGBE, "%s:cac5 %p", __func__, ep); if (!ep->ord) { CTR2(KTR_IW_CXGBE, "%s:cac6 %p", __func__, ep); conn_param->ird = 1; } else { CTR2(KTR_IW_CXGBE, "%s:cac7 %p", __func__, ep); abort_connection(ep); err = -ENOMEM; goto err; } } } ep->ird = conn_param->ird; ep->ord = conn_param->ord; if (ep->mpa_attr.version != 2) { CTR2(KTR_IW_CXGBE, "%s:cac8 %p", __func__, ep); if (peer2peer && ep->ird == 0) { CTR2(KTR_IW_CXGBE, "%s:cac9 %p", __func__, ep); ep->ird = 1; } } cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; ep->com.qp = qp; //ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq; /* bind QP to EP and move to RTS */ attrs.mpa_attr = ep->mpa_attr; attrs.max_ird = ep->ird; attrs.max_ord = ep->ord; attrs.llp_stream_handle = ep; attrs.next_state = C4IW_QP_STATE_RTS; /* bind QP and TID with INIT_WR */ mask = C4IW_QP_ATTR_NEXT_STATE | C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR | C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD; err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); if (err) { CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep); goto err1; } err = send_mpa_reply(ep, conn_param->private_data, conn_param->private_data_len); if (err) { CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep); goto err1; } state_set(&ep->com, FPDU_MODE); established_upcall(ep); c4iw_put_ep(&ep->com); CTR2(KTR_IW_CXGBE, "%s:cacE %p", __func__, ep); return 0; err1: ep->com.cm_id = NULL; ep->com.qp = NULL; cm_id->rem_ref(cm_id); err: c4iw_put_ep(&ep->com); CTR2(KTR_IW_CXGBE, "%s:cacE err %p", __func__, ep); return err; } int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { int err = 0; struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_ep *ep = NULL; struct rtentry *rt; struct toedev *tdev; CTR2(KTR_IW_CXGBE, "%s:ccB %p", __func__, cm_id); if ((conn_param->ord > c4iw_max_read_depth) || (conn_param->ird > c4iw_max_read_depth)) { CTR2(KTR_IW_CXGBE, "%s:cc1 %p", __func__, cm_id); err = -EINVAL; goto out; } ep = alloc_ep(sizeof(*ep), M_NOWAIT); if (!ep) { CTR2(KTR_IW_CXGBE, "%s:cc2 %p", __func__, cm_id); printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__); err = -ENOMEM; goto out; } init_timer(&ep->timer); ep->plen = conn_param->private_data_len; if (ep->plen) { CTR2(KTR_IW_CXGBE, "%s:cc3 %p", __func__, ep); memcpy(ep->mpa_pkt + sizeof(struct mpa_message), conn_param->private_data, ep->plen); } ep->ird = conn_param->ird; ep->ord = conn_param->ord; if (peer2peer && ep->ord == 0) { CTR2(KTR_IW_CXGBE, "%s:cc4 %p", __func__, ep); ep->ord = 1; } cm_id->add_ref(cm_id); ep->com.dev = dev; ep->com.cm_id = cm_id; ep->com.qp = get_qhp(dev, conn_param->qpn); if (!ep->com.qp) { CTR2(KTR_IW_CXGBE, "%s:cc5 %p", __func__, ep); err = -EINVAL; goto fail2; } ep->com.thread = curthread; ep->com.so = cm_id->so; init_sock(&ep->com); /* find a route */ rt = find_route( cm_id->local_addr.sin_addr.s_addr, cm_id->remote_addr.sin_addr.s_addr, cm_id->local_addr.sin_port, cm_id->remote_addr.sin_port, 0); if (!rt) { CTR2(KTR_IW_CXGBE, "%s:cc7 %p", __func__, ep); printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); err = -EHOSTUNREACH; goto fail3; } if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) { CTR2(KTR_IW_CXGBE, "%s:cc8 %p", __func__, ep); printf("%s - interface not TOE capable.\n", __func__); goto fail3; } tdev = TOEDEV(rt->rt_ifp); if (tdev == NULL) { CTR2(KTR_IW_CXGBE, "%s:cc9 %p", __func__, ep); printf("%s - No toedev for interface.\n", __func__); goto fail3; } RTFREE(rt); state_set(&ep->com, CONNECTING); ep->tos = 0; ep->com.local_addr = cm_id->local_addr; ep->com.remote_addr = cm_id->remote_addr; err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr, ep->com.thread); if (!err) { CTR2(KTR_IW_CXGBE, "%s:cca %p", __func__, ep); goto out; } fail3: CTR2(KTR_IW_CXGBE, "%s:ccb %p", __func__, ep); RTFREE(rt); fail2: cm_id->rem_ref(cm_id); c4iw_put_ep(&ep->com); out: CTR2(KTR_IW_CXGBE, "%s:ccE %p", __func__, ep); return err; } /* * iwcm->create_listen. Returns -errno on failure. */ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) { int rc; struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_listen_ep *ep; struct socket *so = cm_id->so; ep = alloc_ep(sizeof(*ep), GFP_KERNEL); CTR5(KTR_IW_CXGBE, "%s: cm_id %p, lso %p, ep %p, inp %p", __func__, cm_id, so, ep, so->so_pcb); if (ep == NULL) { log(LOG_ERR, "%s: failed to alloc memory for endpoint\n", __func__); rc = ENOMEM; goto failed; } cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; ep->com.dev = dev; ep->backlog = backlog; ep->com.local_addr = cm_id->local_addr; ep->com.thread = curthread; state_set(&ep->com, LISTEN); ep->com.so = so; init_sock(&ep->com); rc = solisten(so, ep->backlog, ep->com.thread); if (rc != 0) { log(LOG_ERR, "%s: failed to start listener: %d\n", __func__, rc); close_socket(&ep->com, 0); cm_id->rem_ref(cm_id); c4iw_put_ep(&ep->com); goto failed; } cm_id->provider_data = ep; return (0); failed: CTR3(KTR_IW_CXGBE, "%s: cm_id %p, FAILED (%d)", __func__, cm_id, rc); return (-rc); } int c4iw_destroy_listen(struct iw_cm_id *cm_id) { int rc; struct c4iw_listen_ep *ep = to_listen_ep(cm_id); CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, inp %p", __func__, cm_id, cm_id->so, cm_id->so->so_pcb); state_set(&ep->com, DEAD); rc = close_socket(&ep->com, 0); cm_id->rem_ref(cm_id); c4iw_put_ep(&ep->com); return (rc); } int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) { int ret = 0; int close = 0; int fatal = 0; struct c4iw_rdev *rdev; mutex_lock(&ep->com.mutex); CTR2(KTR_IW_CXGBE, "%s:cedB %p", __func__, ep); rdev = &ep->com.dev->rdev; if (c4iw_fatal_error(rdev)) { CTR2(KTR_IW_CXGBE, "%s:ced1 %p", __func__, ep); fatal = 1; close_complete_upcall(ep); ep->com.state = DEAD; } CTR3(KTR_IW_CXGBE, "%s:ced2 %p %s", __func__, ep, states[ep->com.state]); switch (ep->com.state) { case MPA_REQ_WAIT: case MPA_REQ_SENT: case MPA_REQ_RCVD: case MPA_REP_SENT: case FPDU_MODE: close = 1; if (abrupt) ep->com.state = ABORTING; else { ep->com.state = CLOSING; START_EP_TIMER(ep); } set_bit(CLOSE_SENT, &ep->com.flags); break; case CLOSING: if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { close = 1; if (abrupt) { STOP_EP_TIMER(ep); ep->com.state = ABORTING; } else ep->com.state = MORIBUND; } break; case MORIBUND: case ABORTING: case DEAD: CTR3(KTR_IW_CXGBE, "%s ignoring disconnect ep %p state %u", __func__, ep, ep->com.state); break; default: BUG(); break; } mutex_unlock(&ep->com.mutex); if (close) { CTR2(KTR_IW_CXGBE, "%s:ced3 %p", __func__, ep); if (abrupt) { CTR2(KTR_IW_CXGBE, "%s:ced4 %p", __func__, ep); set_bit(EP_DISC_ABORT, &ep->com.history); ret = abort_connection(ep); } else { CTR2(KTR_IW_CXGBE, "%s:ced5 %p", __func__, ep); set_bit(EP_DISC_CLOSE, &ep->com.history); if (!ep->parent_ep) __state_set(&ep->com, MORIBUND); ret = shutdown_socket(&ep->com); } if (ret) { fatal = 1; } } if (fatal) { release_ep_resources(ep); CTR2(KTR_IW_CXGBE, "%s:ced6 %p", __func__, ep); } CTR2(KTR_IW_CXGBE, "%s:cedE %p", __func__, ep); return ret; } #ifdef C4IW_EP_REDIRECT int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, struct l2t_entry *l2t) { struct c4iw_ep *ep = ctx; if (ep->dst != old) return 0; PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new, l2t); dst_hold(new); cxgb4_l2t_release(ep->l2t); ep->l2t = l2t; dst_release(old); ep->dst = new; return 1; } #endif static void ep_timeout(unsigned long arg) { struct c4iw_ep *ep = (struct c4iw_ep *)arg; int kickit = 0; CTR2(KTR_IW_CXGBE, "%s:etB %p", __func__, ep); spin_lock(&timeout_lock); if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { list_add_tail(&ep->entry, &timeout_list); kickit = 1; } spin_unlock(&timeout_lock); if (kickit) { CTR2(KTR_IW_CXGBE, "%s:et1 %p", __func__, ep); queue_work(c4iw_taskq, &c4iw_task); } CTR2(KTR_IW_CXGBE, "%s:etE %p", __func__, ep); } static int fw6_wr_rpl(struct adapter *sc, const __be64 *rpl) { uint64_t val = be64toh(*rpl); int ret; struct c4iw_wr_wait *wr_waitp; ret = (int)((val >> 8) & 0xff); wr_waitp = (struct c4iw_wr_wait *)rpl[1]; CTR3(KTR_IW_CXGBE, "%s wr_waitp %p ret %u", __func__, wr_waitp, ret); if (wr_waitp) c4iw_wake_up(wr_waitp, ret ? -ret : 0); return (0); } static int fw6_cqe_handler(struct adapter *sc, const __be64 *rpl) { struct t4_cqe cqe =*(const struct t4_cqe *)(&rpl[0]); CTR2(KTR_IW_CXGBE, "%s rpl %p", __func__, rpl); c4iw_ev_dispatch(sc->iwarp_softc, &cqe); return (0); } static int terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_rdma_terminate *rpl = (const void *)(rss + 1); unsigned int tid = GET_TID(rpl); struct c4iw_qp_attributes attrs; struct toepcb *toep = lookup_tid(sc, tid); struct socket *so = inp_inpcbtosocket(toep->inp); struct c4iw_ep *ep = so->so_rcv.sb_upcallarg; CTR2(KTR_IW_CXGBE, "%s:tB %p %d", __func__, ep); if (ep && ep->com.qp) { printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid, ep->com.qp->wq.sq.qid); attrs.next_state = C4IW_QP_STATE_TERMINATE; c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } else printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid); CTR2(KTR_IW_CXGBE, "%s:tE %p %d", __func__, ep); return 0; } void c4iw_cm_init_cpl(struct adapter *sc) { t4_register_cpl_handler(sc, CPL_RDMA_TERMINATE, terminate); t4_register_fw_msg_handler(sc, FW6_TYPE_WR_RPL, fw6_wr_rpl); t4_register_fw_msg_handler(sc, FW6_TYPE_CQE, fw6_cqe_handler); t4_register_an_handler(sc, c4iw_ev_handler); } void c4iw_cm_term_cpl(struct adapter *sc) { t4_register_cpl_handler(sc, CPL_RDMA_TERMINATE, NULL); t4_register_fw_msg_handler(sc, FW6_TYPE_WR_RPL, NULL); t4_register_fw_msg_handler(sc, FW6_TYPE_CQE, NULL); } int __init c4iw_cm_init(void) { TAILQ_INIT(&req_list); spin_lock_init(&req_lock); INIT_LIST_HEAD(&timeout_list); spin_lock_init(&timeout_lock); INIT_WORK(&c4iw_task, process_req); c4iw_taskq = create_singlethread_workqueue("iw_cxgbe"); if (!c4iw_taskq) return -ENOMEM; return 0; } void __exit c4iw_cm_term(void) { WARN_ON(!TAILQ_EMPTY(&req_list)); WARN_ON(!list_empty(&timeout_list)); flush_workqueue(c4iw_taskq); destroy_workqueue(c4iw_taskq); } #endif Index: stable/10/sys/dev/cxgbe/iw_cxgbe/qp.c =================================================================== --- stable/10/sys/dev/cxgbe/iw_cxgbe/qp.c (revision 271126) +++ stable/10/sys/dev/cxgbe/iw_cxgbe/qp.c (revision 271127) @@ -1,1707 +1,1706 @@ /* * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include struct sge_iq; struct rss_header; #include #include "offload.h" #include "tom/t4_tom.h" #include "iw_cxgbe.h" #include "user.h" extern int db_delay_usecs; extern int db_fc_threshold; static void creds(struct toepcb *toep, size_t wrsize); static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) { unsigned long flag; spin_lock_irqsave(&qhp->lock, flag); qhp->attr.state = state; spin_unlock_irqrestore(&qhp->lock, flag); } static void dealloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) { contigfree(sq->queue, sq->memsize, M_DEVBUF); } static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) { dealloc_host_sq(rdev, sq); } static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) { sq->queue = contigmalloc(sq->memsize, M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0); if (sq->queue) sq->dma_addr = vtophys(sq->queue); else return -ENOMEM; sq->phys_addr = vtophys(sq->queue); pci_unmap_addr_set(sq, mapping, sq->dma_addr); CTR4(KTR_IW_CXGBE, "%s sq %p dma_addr %p phys_addr %p", __func__, sq->queue, sq->dma_addr, sq->phys_addr); return 0; } static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct c4iw_dev_ucontext *uctx) { /* * uP clears EQ contexts when the connection exits rdma mode, * so no need to post a RESET WR for these EQs. */ contigfree(wq->rq.queue, wq->rq.memsize, M_DEVBUF); dealloc_sq(rdev, &wq->sq); c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); kfree(wq->rq.sw_rq); kfree(wq->sq.sw_sq); c4iw_put_qpid(rdev, wq->rq.qid, uctx); c4iw_put_qpid(rdev, wq->sq.qid, uctx); return 0; } static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct t4_cq *rcq, struct t4_cq *scq, struct c4iw_dev_ucontext *uctx) { struct adapter *sc = rdev->adap; int user = (uctx != &rdev->uctx); struct fw_ri_res_wr *res_wr; struct fw_ri_res *res; int wr_len; struct c4iw_wr_wait wr_wait; int ret; int eqsize; struct wrqe *wr; wq->sq.qid = c4iw_get_qpid(rdev, uctx); if (!wq->sq.qid) return -ENOMEM; wq->rq.qid = c4iw_get_qpid(rdev, uctx); if (!wq->rq.qid) goto err1; if (!user) { wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq, GFP_KERNEL); if (!wq->sq.sw_sq) goto err2; wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq, GFP_KERNEL); if (!wq->rq.sw_rq) goto err3; } /* RQT must be a power of 2. */ wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size); wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); if (!wq->rq.rqt_hwaddr) goto err4; if (alloc_host_sq(rdev, &wq->sq)) goto err5; memset(wq->sq.queue, 0, wq->sq.memsize); pci_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); wq->rq.queue = contigmalloc(wq->rq.memsize, M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0); if (wq->rq.queue) wq->rq.dma_addr = vtophys(wq->rq.queue); else goto err6; CTR5(KTR_IW_CXGBE, "%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx", __func__, wq->sq.queue, (unsigned long long)vtophys(wq->sq.queue), wq->rq.queue, (unsigned long long)vtophys(wq->rq.queue)); memset(wq->rq.queue, 0, wq->rq.memsize); pci_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); wq->db = (void *)((unsigned long)rman_get_virtual(sc->regs_res) + MYPF_REG(SGE_PF_KDOORBELL)); wq->gts = (void *)((unsigned long)rman_get_virtual(rdev->adap->regs_res) + MYPF_REG(SGE_PF_GTS)); if (user) { wq->sq.udb = (u64)((char*)rman_get_virtual(rdev->adap->udbs_res) + (wq->sq.qid << rdev->qpshift)); wq->sq.udb &= PAGE_MASK; wq->rq.udb = (u64)((char*)rman_get_virtual(rdev->adap->udbs_res) + (wq->rq.qid << rdev->qpshift)); wq->rq.udb &= PAGE_MASK; } wq->rdev = rdev; wq->rq.msn = 1; /* build fw_ri_res_wr */ wr_len = sizeof *res_wr + 2 * sizeof *res; wr = alloc_wrqe(wr_len, &sc->sge.mgmtq); if (wr == NULL) return (0); res_wr = wrtod(wr); memset(res_wr, 0, wr_len); res_wr->op_nres = cpu_to_be32( V_FW_WR_OP(FW_RI_RES_WR) | V_FW_RI_RES_WR_NRES(2) | F_FW_WR_COMPL); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); res_wr->cookie = (unsigned long) &wr_wait; res = res_wr->res; res->u.sqrq.restype = FW_RI_RES_TYPE_SQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; /* eqsize is the number of 64B entries plus the status page size. */ eqsize = wq->sq.size * T4_SQ_NUM_SLOTS + spg_creds; res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ V_FW_RI_RES_WR_IQID(scq->cqid)); res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( V_FW_RI_RES_WR_DCAEN(0) | V_FW_RI_RES_WR_DCACPU(0) | V_FW_RI_RES_WR_FBMIN(2) | V_FW_RI_RES_WR_FBMAX(2) | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | V_FW_RI_RES_WR_CIDXFTHRESH(0) | V_FW_RI_RES_WR_EQSIZE(eqsize)); res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr); res++; res->u.sqrq.restype = FW_RI_RES_TYPE_RQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; /* eqsize is the number of 64B entries plus the status page size. */ eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + spg_creds ; res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ V_FW_RI_RES_WR_IQID(rcq->cqid)); res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( V_FW_RI_RES_WR_DCAEN(0) | V_FW_RI_RES_WR_DCACPU(0) | V_FW_RI_RES_WR_FBMIN(2) | V_FW_RI_RES_WR_FBMAX(2) | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | V_FW_RI_RES_WR_CIDXFTHRESH(0) | V_FW_RI_RES_WR_EQSIZE(eqsize)); res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); c4iw_init_wr_wait(&wr_wait); t4_wrq_tx(sc, wr); ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__); if (ret) goto err7; CTR6(KTR_IW_CXGBE, "%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx", __func__, wq->sq.qid, wq->rq.qid, wq->db, (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb); return 0; err7: contigfree(wq->rq.queue, wq->rq.memsize, M_DEVBUF); err6: dealloc_sq(rdev, &wq->sq); err5: c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); err4: kfree(wq->rq.sw_rq); err3: kfree(wq->sq.sw_sq); err2: c4iw_put_qpid(rdev, wq->rq.qid, uctx); err1: c4iw_put_qpid(rdev, wq->sq.qid, uctx); return -ENOMEM; } static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, struct ib_send_wr *wr, int max, u32 *plenp) { u8 *dstp, *srcp; u32 plen = 0; int i; int rem, len; dstp = (u8 *)immdp->data; for (i = 0; i < wr->num_sge; i++) { if ((plen + wr->sg_list[i].length) > max) return -EMSGSIZE; srcp = (u8 *)(unsigned long)wr->sg_list[i].addr; plen += wr->sg_list[i].length; rem = wr->sg_list[i].length; while (rem) { if (dstp == (u8 *)&sq->queue[sq->size]) dstp = (u8 *)sq->queue; if (rem <= (u8 *)&sq->queue[sq->size] - dstp) len = rem; else len = (u8 *)&sq->queue[sq->size] - dstp; memcpy(dstp, srcp, len); dstp += len; srcp += len; rem -= len; } } len = roundup(plen + sizeof *immdp, 16) - (plen + sizeof *immdp); if (len) memset(dstp, 0, len); immdp->op = FW_RI_DATA_IMMD; immdp->r1 = 0; immdp->r2 = 0; immdp->immdlen = cpu_to_be32(plen); *plenp = plen; return 0; } static int build_isgl(__be64 *queue_start, __be64 *queue_end, struct fw_ri_isgl *isglp, struct ib_sge *sg_list, int num_sge, u32 *plenp) { int i; u32 plen = 0; __be64 *flitp = (__be64 *)isglp->sge; for (i = 0; i < num_sge; i++) { if ((plen + sg_list[i].length) < plen) return -EMSGSIZE; plen += sg_list[i].length; *flitp = cpu_to_be64(((u64)sg_list[i].lkey << 32) | sg_list[i].length); if (++flitp == queue_end) flitp = queue_start; *flitp = cpu_to_be64(sg_list[i].addr); if (++flitp == queue_end) flitp = queue_start; } *flitp = (__force __be64)0; isglp->op = FW_RI_DATA_ISGL; isglp->r1 = 0; isglp->nsge = cpu_to_be16(num_sge); isglp->r2 = 0; if (plenp) *plenp = plen; return 0; } static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) { u32 plen; int size; int ret; if (wr->num_sge > T4_MAX_SEND_SGE) return -EINVAL; switch (wr->opcode) { case IB_WR_SEND: if (wr->send_flags & IB_SEND_SOLICITED) wqe->send.sendop_pkd = cpu_to_be32( V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_SE)); else wqe->send.sendop_pkd = cpu_to_be32( V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND)); wqe->send.stag_inv = 0; break; case IB_WR_SEND_WITH_INV: if (wr->send_flags & IB_SEND_SOLICITED) wqe->send.sendop_pkd = cpu_to_be32( V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_SE_INV)); else wqe->send.sendop_pkd = cpu_to_be32( V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_INV)); wqe->send.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); break; default: return -EINVAL; } plen = 0; if (wr->num_sge) { if (wr->send_flags & IB_SEND_INLINE) { ret = build_immd(sq, wqe->send.u.immd_src, wr, T4_MAX_SEND_INLINE, &plen); if (ret) return ret; size = sizeof wqe->send + sizeof(struct fw_ri_immd) + plen; } else { ret = build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size], wqe->send.u.isgl_src, wr->sg_list, wr->num_sge, &plen); if (ret) return ret; size = sizeof wqe->send + sizeof(struct fw_ri_isgl) + wr->num_sge * sizeof(struct fw_ri_sge); } } else { wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD; wqe->send.u.immd_src[0].r1 = 0; wqe->send.u.immd_src[0].r2 = 0; wqe->send.u.immd_src[0].immdlen = 0; size = sizeof wqe->send + sizeof(struct fw_ri_immd); plen = 0; } *len16 = DIV_ROUND_UP(size, 16); wqe->send.plen = cpu_to_be32(plen); return 0; } static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) { u32 plen; int size; int ret; if (wr->num_sge > T4_MAX_SEND_SGE) return -EINVAL; wqe->write.r2 = 0; wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); if (wr->num_sge) { if (wr->send_flags & IB_SEND_INLINE) { ret = build_immd(sq, wqe->write.u.immd_src, wr, T4_MAX_WRITE_INLINE, &plen); if (ret) return ret; size = sizeof wqe->write + sizeof(struct fw_ri_immd) + plen; } else { ret = build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size], wqe->write.u.isgl_src, wr->sg_list, wr->num_sge, &plen); if (ret) return ret; size = sizeof wqe->write + sizeof(struct fw_ri_isgl) + wr->num_sge * sizeof(struct fw_ri_sge); } } else { wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD; wqe->write.u.immd_src[0].r1 = 0; wqe->write.u.immd_src[0].r2 = 0; wqe->write.u.immd_src[0].immdlen = 0; size = sizeof wqe->write + sizeof(struct fw_ri_immd); plen = 0; } *len16 = DIV_ROUND_UP(size, 16); wqe->write.plen = cpu_to_be32(plen); return 0; } static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) { if (wr->num_sge > 1) return -EINVAL; if (wr->num_sge) { wqe->read.stag_src = cpu_to_be32(wr->wr.rdma.rkey); wqe->read.to_src_hi = cpu_to_be32((u32)(wr->wr.rdma.remote_addr >> 32)); wqe->read.to_src_lo = cpu_to_be32((u32)wr->wr.rdma.remote_addr); wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey); wqe->read.plen = cpu_to_be32(wr->sg_list[0].length); wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr >> 32)); wqe->read.to_sink_lo = cpu_to_be32((u32)(wr->sg_list[0].addr)); } else { wqe->read.stag_src = cpu_to_be32(2); wqe->read.to_src_hi = 0; wqe->read.to_src_lo = 0; wqe->read.stag_sink = cpu_to_be32(2); wqe->read.plen = 0; wqe->read.to_sink_hi = 0; wqe->read.to_sink_lo = 0; } wqe->read.r2 = 0; wqe->read.r5 = 0; *len16 = DIV_ROUND_UP(sizeof wqe->read, 16); return 0; } static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, struct ib_recv_wr *wr, u8 *len16) { int ret; ret = build_isgl((__be64 *)qhp->wq.rq.queue, (__be64 *)&qhp->wq.rq.queue[qhp->wq.rq.size], &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL); if (ret) return ret; *len16 = DIV_ROUND_UP(sizeof wqe->recv + wr->num_sge * sizeof(struct fw_ri_sge), 16); return 0; } static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) { struct fw_ri_immd *imdp; __be64 *p; int i; int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32); int rem; if (wr->wr.fast_reg.page_list_len > T4_MAX_FR_DEPTH) return -EINVAL; wqe->fr.qpbinde_to_dcacpu = 0; wqe->fr.pgsz_shift = wr->wr.fast_reg.page_shift - 12; wqe->fr.addr_type = FW_RI_VA_BASED_TO; wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->wr.fast_reg.access_flags); wqe->fr.len_hi = 0; wqe->fr.len_lo = cpu_to_be32(wr->wr.fast_reg.length); wqe->fr.stag = cpu_to_be32(wr->wr.fast_reg.rkey); wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff); WARN_ON(pbllen > T4_MAX_FR_IMMD); imdp = (struct fw_ri_immd *)(&wqe->fr + 1); imdp->op = FW_RI_DATA_IMMD; imdp->r1 = 0; imdp->r2 = 0; imdp->immdlen = cpu_to_be32(pbllen); p = (__be64 *)(imdp + 1); rem = pbllen; for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); rem -= sizeof *p; if (++p == (__be64 *)&sq->queue[sq->size]) p = (__be64 *)sq->queue; } BUG_ON(rem < 0); while (rem) { *p = 0; rem -= sizeof *p; if (++p == (__be64 *)&sq->queue[sq->size]) p = (__be64 *)sq->queue; } *len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *imdp + pbllen, 16); return 0; } static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) { wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); wqe->inv.r2 = 0; *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16); return 0; } void c4iw_qp_add_ref(struct ib_qp *qp) { CTR2(KTR_IW_CXGBE, "%s ib_qp %p", __func__, qp); atomic_inc(&(to_c4iw_qp(qp)->refcnt)); } void c4iw_qp_rem_ref(struct ib_qp *qp) { CTR2(KTR_IW_CXGBE, "%s ib_qp %p", __func__, qp); if (atomic_dec_and_test(&(to_c4iw_qp(qp)->refcnt))) wake_up(&(to_c4iw_qp(qp)->wait)); } int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { int err = 0; u8 len16 = 0; enum fw_wr_opcodes fw_opcode = 0; enum fw_ri_wr_flags fw_flags; struct c4iw_qp *qhp; union t4_wr *wqe; u32 num_wrs; struct t4_swsqe *swsqe; unsigned long flag; u16 idx = 0; qhp = to_c4iw_qp(ibqp); spin_lock_irqsave(&qhp->lock, flag); if (t4_wq_in_error(&qhp->wq)) { spin_unlock_irqrestore(&qhp->lock, flag); return -EINVAL; } num_wrs = t4_sq_avail(&qhp->wq); if (num_wrs == 0) { spin_unlock_irqrestore(&qhp->lock, flag); return -ENOMEM; } while (wr) { if (num_wrs == 0) { err = -ENOMEM; *bad_wr = wr; break; } wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue + qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE); fw_flags = 0; if (wr->send_flags & IB_SEND_SOLICITED) fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; if (wr->send_flags & IB_SEND_SIGNALED) fw_flags |= FW_RI_COMPLETION_FLAG; swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; switch (wr->opcode) { case IB_WR_SEND_WITH_INV: case IB_WR_SEND: if (wr->send_flags & IB_SEND_FENCE) fw_flags |= FW_RI_READ_FENCE_FLAG; fw_opcode = FW_RI_SEND_WR; if (wr->opcode == IB_WR_SEND) swsqe->opcode = FW_RI_SEND; else swsqe->opcode = FW_RI_SEND_WITH_INV; err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16); break; case IB_WR_RDMA_WRITE: fw_opcode = FW_RI_RDMA_WRITE_WR; swsqe->opcode = FW_RI_RDMA_WRITE; err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16); break; case IB_WR_RDMA_READ: case IB_WR_RDMA_READ_WITH_INV: fw_opcode = FW_RI_RDMA_READ_WR; swsqe->opcode = FW_RI_READ_REQ; if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) fw_flags = FW_RI_RDMA_READ_INVALIDATE; else fw_flags = 0; err = build_rdma_read(wqe, wr, &len16); if (err) break; swsqe->read_len = wr->sg_list[0].length; if (!qhp->wq.sq.oldest_read) qhp->wq.sq.oldest_read = swsqe; break; case IB_WR_FAST_REG_MR: fw_opcode = FW_RI_FR_NSMR_WR; swsqe->opcode = FW_RI_FAST_REGISTER; err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16); break; case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) fw_flags |= FW_RI_LOCAL_FENCE_FLAG; fw_opcode = FW_RI_INV_LSTAG_WR; swsqe->opcode = FW_RI_LOCAL_INV; err = build_inv_stag(wqe, wr, &len16); break; default: CTR2(KTR_IW_CXGBE, "%s post of type =%d TBD!", __func__, wr->opcode); err = -EINVAL; } if (err) { *bad_wr = wr; break; } swsqe->idx = qhp->wq.sq.pidx; swsqe->complete = 0; swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED); swsqe->wr_id = wr->wr_id; init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16); CTR5(KTR_IW_CXGBE, "%s cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u", __func__, (unsigned long long)wr->wr_id, qhp->wq.sq.pidx, swsqe->opcode, swsqe->read_len); wr = wr->next; num_wrs--; t4_sq_produce(&qhp->wq, len16); idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); } if (t4_wq_db_enabled(&qhp->wq)) t4_ring_sq_db(&qhp->wq, idx); spin_unlock_irqrestore(&qhp->lock, flag); return err; } int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { int err = 0; struct c4iw_qp *qhp; union t4_recv_wr *wqe; u32 num_wrs; u8 len16 = 0; unsigned long flag; u16 idx = 0; qhp = to_c4iw_qp(ibqp); spin_lock_irqsave(&qhp->lock, flag); if (t4_wq_in_error(&qhp->wq)) { spin_unlock_irqrestore(&qhp->lock, flag); return -EINVAL; } num_wrs = t4_rq_avail(&qhp->wq); if (num_wrs == 0) { spin_unlock_irqrestore(&qhp->lock, flag); return -ENOMEM; } while (wr) { if (wr->num_sge > T4_MAX_RECV_SGE) { err = -EINVAL; *bad_wr = wr; break; } wqe = (union t4_recv_wr *)((u8 *)qhp->wq.rq.queue + qhp->wq.rq.wq_pidx * T4_EQ_ENTRY_SIZE); if (num_wrs) err = build_rdma_recv(qhp, wqe, wr, &len16); else err = -ENOMEM; if (err) { *bad_wr = wr; break; } qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].wr_id = wr->wr_id; wqe->recv.opcode = FW_RI_RECV_WR; wqe->recv.r1 = 0; wqe->recv.wrid = qhp->wq.rq.pidx; wqe->recv.r2[0] = 0; wqe->recv.r2[1] = 0; wqe->recv.r2[2] = 0; wqe->recv.len16 = len16; CTR3(KTR_IW_CXGBE, "%s cookie 0x%llx pidx %u", __func__, (unsigned long long) wr->wr_id, qhp->wq.rq.pidx); t4_rq_produce(&qhp->wq, len16); idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); wr = wr->next; num_wrs--; } if (t4_wq_db_enabled(&qhp->wq)) t4_ring_rq_db(&qhp->wq, idx); spin_unlock_irqrestore(&qhp->lock, flag); return err; } int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind) { return -ENOSYS; } static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type, u8 *ecode) { int status; int tagged; int opcode; int rqtype; int send_inv; if (!err_cqe) { *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; *ecode = 0; return; } status = CQE_STATUS(err_cqe); opcode = CQE_OPCODE(err_cqe); rqtype = RQ_TYPE(err_cqe); send_inv = (opcode == FW_RI_SEND_WITH_INV) || (opcode == FW_RI_SEND_WITH_SE_INV); tagged = (opcode == FW_RI_RDMA_WRITE) || (rqtype && (opcode == FW_RI_READ_RESP)); switch (status) { case T4_ERR_STAG: if (send_inv) { *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; *ecode = RDMAP_CANT_INV_STAG; } else { *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_INV_STAG; } break; case T4_ERR_PDID: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; if ((opcode == FW_RI_SEND_WITH_INV) || (opcode == FW_RI_SEND_WITH_SE_INV)) *ecode = RDMAP_CANT_INV_STAG; else *ecode = RDMAP_STAG_NOT_ASSOC; break; case T4_ERR_QPID: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_STAG_NOT_ASSOC; break; case T4_ERR_ACCESS: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_ACC_VIOL; break; case T4_ERR_WRAP: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_TO_WRAP; break; case T4_ERR_BOUND: if (tagged) { *layer_type = LAYER_DDP|DDP_TAGGED_ERR; *ecode = DDPT_BASE_BOUNDS; } else { *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_BASE_BOUNDS; } break; case T4_ERR_INVALIDATE_SHARED_MR: case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; *ecode = RDMAP_CANT_INV_STAG; break; case T4_ERR_ECC: case T4_ERR_ECC_PSTAG: case T4_ERR_INTERNAL_ERR: *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA; *ecode = 0; break; case T4_ERR_OUT_OF_RQE: *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; *ecode = DDPU_INV_MSN_NOBUF; break; case T4_ERR_PBL_ADDR_BOUND: *layer_type = LAYER_DDP|DDP_TAGGED_ERR; *ecode = DDPT_BASE_BOUNDS; break; case T4_ERR_CRC: *layer_type = LAYER_MPA|DDP_LLP; *ecode = MPA_CRC_ERR; break; case T4_ERR_MARKER: *layer_type = LAYER_MPA|DDP_LLP; *ecode = MPA_MARKER_ERR; break; case T4_ERR_PDU_LEN_ERR: *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; *ecode = DDPU_MSG_TOOBIG; break; case T4_ERR_DDP_VERSION: if (tagged) { *layer_type = LAYER_DDP|DDP_TAGGED_ERR; *ecode = DDPT_INV_VERS; } else { *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; *ecode = DDPU_INV_VERS; } break; case T4_ERR_RDMA_VERSION: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; *ecode = RDMAP_INV_VERS; break; case T4_ERR_OPCODE: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; *ecode = RDMAP_INV_OPCODE; break; case T4_ERR_DDP_QUEUE_NUM: *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; *ecode = DDPU_INV_QN; break; case T4_ERR_MSN: case T4_ERR_MSN_GAP: case T4_ERR_MSN_RANGE: case T4_ERR_IRD_OVERFLOW: *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; *ecode = DDPU_INV_MSN_RANGE; break; case T4_ERR_TBIT: *layer_type = LAYER_DDP|DDP_LOCAL_CATA; *ecode = 0; break; case T4_ERR_MO: *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; *ecode = DDPU_INV_MO; break; default: *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; *ecode = 0; break; } } static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe, gfp_t gfp) { struct fw_ri_wr *wqe; struct terminate_message *term; struct wrqe *wr; struct socket *so = qhp->ep->com.so; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); struct toepcb *toep = tp->t_toe; CTR4(KTR_IW_CXGBE, "%s qhp %p qid 0x%x tid %u", __func__, qhp, qhp->wq.sq.qid, qhp->ep->hwtid); wr = alloc_wrqe(sizeof(*wqe), toep->ofld_txq); if (wr == NULL) return; wqe = wrtod(wr); memset(wqe, 0, sizeof *wqe); wqe->op_compl = cpu_to_be32(V_FW_WR_OP(FW_RI_WR)); wqe->flowid_len16 = cpu_to_be32( V_FW_WR_FLOWID(qhp->ep->hwtid) | V_FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); wqe->u.terminate.type = FW_RI_TYPE_TERMINATE; wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term); term = (struct terminate_message *)wqe->u.terminate.termmsg; if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) { term->layer_etype = qhp->attr.layer_etype; term->ecode = qhp->attr.ecode; } else build_term_codes(err_cqe, &term->layer_etype, &term->ecode); creds(toep, sizeof(*wqe)); t4_wrq_tx(qhp->rhp->rdev.adap, wr); } /* Assumes qhp lock is held. */ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, struct c4iw_cq *schp) { int count; int flushed; unsigned long flag; CTR4(KTR_IW_CXGBE, "%s qhp %p rchp %p schp %p", __func__, qhp, rchp, schp); /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&rchp->lock, flag); spin_lock(&qhp->lock); c4iw_flush_hw_cq(&rchp->cq); c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&rchp->lock, flag); if (flushed) { spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); } /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, flag); spin_lock(&qhp->lock); c4iw_flush_hw_cq(&schp->cq); c4iw_count_scqes(&schp->cq, &qhp->wq, &count); flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, flag); if (flushed) { spin_lock_irqsave(&schp->comp_handler_lock, flag); (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); spin_unlock_irqrestore(&schp->comp_handler_lock, flag); } } static void flush_qp(struct c4iw_qp *qhp) { struct c4iw_cq *rchp, *schp; unsigned long flag; rchp = get_chp(qhp->rhp, qhp->attr.rcq); schp = get_chp(qhp->rhp, qhp->attr.scq); if (qhp->ibqp.uobject) { t4_set_wq_in_error(&qhp->wq); t4_set_cq_in_error(&rchp->cq); spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); if (schp != rchp) { t4_set_cq_in_error(&schp->cq); spin_lock_irqsave(&schp->comp_handler_lock, flag); (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); spin_unlock_irqrestore(&schp->comp_handler_lock, flag); } return; } __flush_qp(qhp, rchp, schp); } static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, struct c4iw_ep *ep) { struct c4iw_rdev *rdev = &rhp->rdev; struct adapter *sc = rdev->adap; struct fw_ri_wr *wqe; int ret; struct wrqe *wr; struct socket *so = ep->com.so; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); struct toepcb *toep = tp->t_toe; KASSERT(rhp == qhp->rhp && ep == qhp->ep, ("%s: EDOOFUS", __func__)); CTR4(KTR_IW_CXGBE, "%s qhp %p qid 0x%x tid %u", __func__, qhp, qhp->wq.sq.qid, ep->hwtid); wr = alloc_wrqe(sizeof(*wqe), toep->ofld_txq); if (wr == NULL) return (0); wqe = wrtod(wr); memset(wqe, 0, sizeof *wqe); wqe->op_compl = cpu_to_be32(V_FW_WR_OP(FW_RI_WR) | F_FW_WR_COMPL); wqe->flowid_len16 = cpu_to_be32(V_FW_WR_FLOWID(ep->hwtid) | V_FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); wqe->cookie = (unsigned long) &ep->com.wr_wait; wqe->u.fini.type = FW_RI_TYPE_FINI; c4iw_init_wr_wait(&ep->com.wr_wait); creds(toep, sizeof(*wqe)); t4_wrq_tx(sc, wr); ret = c4iw_wait_for_reply(rdev, &ep->com.wr_wait, ep->hwtid, qhp->wq.sq.qid, __func__); return ret; } static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init) { CTR2(KTR_IW_CXGBE, "%s p2p_type = %d", __func__, p2p_type); memset(&init->u, 0, sizeof init->u); switch (p2p_type) { case FW_RI_INIT_P2PTYPE_RDMA_WRITE: init->u.write.opcode = FW_RI_RDMA_WRITE_WR; init->u.write.stag_sink = cpu_to_be32(1); init->u.write.to_sink = cpu_to_be64(1); init->u.write.u.immd_src[0].op = FW_RI_DATA_IMMD; init->u.write.len16 = DIV_ROUND_UP(sizeof init->u.write + sizeof(struct fw_ri_immd), 16); break; case FW_RI_INIT_P2PTYPE_READ_REQ: init->u.write.opcode = FW_RI_RDMA_READ_WR; init->u.read.stag_src = cpu_to_be32(1); init->u.read.to_src_lo = cpu_to_be32(1); init->u.read.stag_sink = cpu_to_be32(1); init->u.read.to_sink_lo = cpu_to_be32(1); init->u.read.len16 = DIV_ROUND_UP(sizeof init->u.read, 16); break; } } static void creds(struct toepcb *toep, size_t wrsize) { struct ofld_tx_sdesc *txsd; CTR3(KTR_IW_CXGBE, "%s:creB %p %u", __func__, toep , wrsize); INP_WLOCK(toep->inp); txsd = &toep->txsd[toep->txsd_pidx]; txsd->tx_credits = howmany(wrsize, 16); txsd->plen = 0; KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, ("%s: not enough credits (%d)", __func__, toep->tx_credits)); toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; INP_WUNLOCK(toep->inp); CTR5(KTR_IW_CXGBE, "%s:creE %p %u %u %u", __func__, toep , txsd->tx_credits, toep->tx_credits, toep->txsd_pidx); } static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) { struct fw_ri_wr *wqe; int ret; struct wrqe *wr; struct c4iw_ep *ep = qhp->ep; struct c4iw_rdev *rdev = &qhp->rhp->rdev; struct adapter *sc = rdev->adap; struct socket *so = ep->com.so; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); struct toepcb *toep = tp->t_toe; CTR4(KTR_IW_CXGBE, "%s qhp %p qid 0x%x tid %u", __func__, qhp, qhp->wq.sq.qid, ep->hwtid); wr = alloc_wrqe(sizeof(*wqe), toep->ofld_txq); if (wr == NULL) return (0); wqe = wrtod(wr); memset(wqe, 0, sizeof *wqe); wqe->op_compl = cpu_to_be32( V_FW_WR_OP(FW_RI_WR) | F_FW_WR_COMPL); wqe->flowid_len16 = cpu_to_be32(V_FW_WR_FLOWID(ep->hwtid) | V_FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); wqe->cookie = (unsigned long) &ep->com.wr_wait; wqe->u.init.type = FW_RI_TYPE_INIT; wqe->u.init.mpareqbit_p2ptype = V_FW_RI_WR_MPAREQBIT(qhp->attr.mpa_attr.initiator) | V_FW_RI_WR_P2PTYPE(qhp->attr.mpa_attr.p2p_type); wqe->u.init.mpa_attrs = FW_RI_MPA_IETF_ENABLE; if (qhp->attr.mpa_attr.recv_marker_enabled) wqe->u.init.mpa_attrs |= FW_RI_MPA_RX_MARKER_ENABLE; if (qhp->attr.mpa_attr.xmit_marker_enabled) wqe->u.init.mpa_attrs |= FW_RI_MPA_TX_MARKER_ENABLE; if (qhp->attr.mpa_attr.crc_enabled) wqe->u.init.mpa_attrs |= FW_RI_MPA_CRC_ENABLE; wqe->u.init.qp_caps = FW_RI_QP_RDMA_READ_ENABLE | FW_RI_QP_RDMA_WRITE_ENABLE | FW_RI_QP_BIND_ENABLE; if (!qhp->ibqp.uobject) wqe->u.init.qp_caps |= FW_RI_QP_FAST_REGISTER_ENABLE | FW_RI_QP_STAG0_ENABLE; wqe->u.init.nrqe = cpu_to_be16(t4_rqes_posted(&qhp->wq)); wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd); wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid); wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid); wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid); wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq); wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq); wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord); wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird); wqe->u.init.iss = cpu_to_be32(ep->snd_seq); wqe->u.init.irs = cpu_to_be32(ep->rcv_seq); wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size); wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr - sc->vres.rq.start); if (qhp->attr.mpa_attr.initiator) build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init); c4iw_init_wr_wait(&ep->com.wr_wait); creds(toep, sizeof(*wqe)); t4_wrq_tx(sc, wr); ret = c4iw_wait_for_reply(rdev, &ep->com.wr_wait, ep->hwtid, qhp->wq.sq.qid, __func__); toep->ulp_mode = ULP_MODE_RDMA; return ret; } int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, enum c4iw_qp_attr_mask mask, struct c4iw_qp_attributes *attrs, int internal) { int ret = 0; struct c4iw_qp_attributes newattr = qhp->attr; int disconnect = 0; int terminate = 0; int abort = 0; int free = 0; struct c4iw_ep *ep = NULL; CTR5(KTR_IW_CXGBE, "%s qhp %p sqid 0x%x rqid 0x%x ep %p", __func__, qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep); CTR3(KTR_IW_CXGBE, "%s state %d -> %d", __func__, qhp->attr.state, (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1); mutex_lock(&qhp->mutex); /* Process attr changes if in IDLE */ if (mask & C4IW_QP_ATTR_VALID_MODIFY) { if (qhp->attr.state != C4IW_QP_STATE_IDLE) { ret = -EIO; goto out; } if (mask & C4IW_QP_ATTR_ENABLE_RDMA_READ) newattr.enable_rdma_read = attrs->enable_rdma_read; if (mask & C4IW_QP_ATTR_ENABLE_RDMA_WRITE) newattr.enable_rdma_write = attrs->enable_rdma_write; if (mask & C4IW_QP_ATTR_ENABLE_RDMA_BIND) newattr.enable_bind = attrs->enable_bind; if (mask & C4IW_QP_ATTR_MAX_ORD) { if (attrs->max_ord > c4iw_max_read_depth) { ret = -EINVAL; goto out; } newattr.max_ord = attrs->max_ord; } if (mask & C4IW_QP_ATTR_MAX_IRD) { if (attrs->max_ird > c4iw_max_read_depth) { ret = -EINVAL; goto out; } newattr.max_ird = attrs->max_ird; } qhp->attr = newattr; } if (!(mask & C4IW_QP_ATTR_NEXT_STATE)) goto out; if (qhp->attr.state == attrs->next_state) goto out; switch (qhp->attr.state) { case C4IW_QP_STATE_IDLE: switch (attrs->next_state) { case C4IW_QP_STATE_RTS: if (!(mask & C4IW_QP_ATTR_LLP_STREAM_HANDLE)) { ret = -EINVAL; goto out; } if (!(mask & C4IW_QP_ATTR_MPA_ATTR)) { ret = -EINVAL; goto out; } qhp->attr.mpa_attr = attrs->mpa_attr; qhp->attr.llp_stream_handle = attrs->llp_stream_handle; qhp->ep = qhp->attr.llp_stream_handle; set_state(qhp, C4IW_QP_STATE_RTS); /* * Ref the endpoint here and deref when we * disassociate the endpoint from the QP. This * happens in CLOSING->IDLE transition or *->ERROR * transition. */ c4iw_get_ep(&qhp->ep->com); ret = rdma_init(rhp, qhp); if (ret) goto err; break; case C4IW_QP_STATE_ERROR: set_state(qhp, C4IW_QP_STATE_ERROR); flush_qp(qhp); break; default: ret = -EINVAL; goto out; } break; case C4IW_QP_STATE_RTS: switch (attrs->next_state) { case C4IW_QP_STATE_CLOSING: //Fixme: Use atomic_read as same as Linux BUG_ON(qhp->ep->com.kref.count < 2); set_state(qhp, C4IW_QP_STATE_CLOSING); ep = qhp->ep; if (!internal) { abort = 0; disconnect = 1; c4iw_get_ep(&qhp->ep->com); } if (qhp->ibqp.uobject) t4_set_wq_in_error(&qhp->wq); ret = rdma_fini(rhp, qhp, ep); if (ret) goto err; break; case C4IW_QP_STATE_TERMINATE: set_state(qhp, C4IW_QP_STATE_TERMINATE); qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.ecode = attrs->ecode; if (qhp->ibqp.uobject) t4_set_wq_in_error(&qhp->wq); ep = qhp->ep; if (!internal) terminate = 1; disconnect = 1; c4iw_get_ep(&qhp->ep->com); break; case C4IW_QP_STATE_ERROR: set_state(qhp, C4IW_QP_STATE_ERROR); if (qhp->ibqp.uobject) t4_set_wq_in_error(&qhp->wq); if (!internal) { abort = 1; disconnect = 1; ep = qhp->ep; c4iw_get_ep(&qhp->ep->com); } goto err; break; default: ret = -EINVAL; goto out; } break; case C4IW_QP_STATE_CLOSING: if (!internal) { ret = -EINVAL; goto out; } switch (attrs->next_state) { case C4IW_QP_STATE_IDLE: flush_qp(qhp); set_state(qhp, C4IW_QP_STATE_IDLE); qhp->attr.llp_stream_handle = NULL; c4iw_put_ep(&qhp->ep->com); qhp->ep = NULL; wake_up(&qhp->wait); break; case C4IW_QP_STATE_ERROR: goto err; default: ret = -EINVAL; goto err; } break; case C4IW_QP_STATE_ERROR: if (attrs->next_state != C4IW_QP_STATE_IDLE) { ret = -EINVAL; goto out; } if (!t4_sq_empty(&qhp->wq) || !t4_rq_empty(&qhp->wq)) { ret = -EINVAL; goto out; } set_state(qhp, C4IW_QP_STATE_IDLE); break; case C4IW_QP_STATE_TERMINATE: if (!internal) { ret = -EINVAL; goto out; } goto err; break; default: printf("%s in a bad state %d\n", __func__, qhp->attr.state); ret = -EINVAL; goto err; break; } goto out; err: CTR3(KTR_IW_CXGBE, "%s disassociating ep %p qpid 0x%x", __func__, qhp->ep, qhp->wq.sq.qid); /* disassociate the LLP connection */ qhp->attr.llp_stream_handle = NULL; if (!ep) ep = qhp->ep; qhp->ep = NULL; set_state(qhp, C4IW_QP_STATE_ERROR); free = 1; wake_up(&qhp->wait); BUG_ON(!ep); flush_qp(qhp); out: mutex_unlock(&qhp->mutex); if (terminate) post_terminate(qhp, NULL, internal ? GFP_ATOMIC : GFP_KERNEL); /* * If disconnect is 1, then we need to initiate a disconnect * on the EP. This can be a normal close (RTS->CLOSING) or * an abnormal close (RTS/CLOSING->ERROR). */ if (disconnect) { c4iw_ep_disconnect(ep, abort, internal ? GFP_ATOMIC : GFP_KERNEL); c4iw_put_ep(&ep->com); } /* * If free is 1, then we've disassociated the EP from the QP * and we need to dereference the EP. */ if (free) c4iw_put_ep(&ep->com); CTR2(KTR_IW_CXGBE, "%s exit state %d", __func__, qhp->attr.state); return ret; } static int enable_qp_db(int id, void *p, void *data) { struct c4iw_qp *qp = p; t4_enable_wq_db(&qp->wq); return 0; } int c4iw_destroy_qp(struct ib_qp *ib_qp) { struct c4iw_dev *rhp; struct c4iw_qp *qhp; struct c4iw_qp_attributes attrs; struct c4iw_ucontext *ucontext; CTR2(KTR_IW_CXGBE, "%s ib_qp %p", __func__, ib_qp); qhp = to_c4iw_qp(ib_qp); rhp = qhp->rhp; attrs.next_state = C4IW_QP_STATE_ERROR; if (qhp->attr.state == C4IW_QP_STATE_TERMINATE) c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); else c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); wait_event(qhp->wait, !qhp->ep); spin_lock_irq(&rhp->lock); remove_handle_nolock(rhp, &rhp->qpidr, qhp->wq.sq.qid); rhp->qpcnt--; BUG_ON(rhp->qpcnt < 0); if (rhp->qpcnt <= db_fc_threshold && rhp->db_state == FLOW_CONTROL) { rhp->rdev.stats.db_state_transitions++; rhp->db_state = NORMAL; idr_for_each(&rhp->qpidr, enable_qp_db, NULL); } spin_unlock_irq(&rhp->lock); atomic_dec(&qhp->refcnt); wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); ucontext = ib_qp->uobject ? to_c4iw_ucontext(ib_qp->uobject->context) : NULL; destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); CTR3(KTR_IW_CXGBE, "%s ib_qp %p qpid 0x%0x", __func__, ib_qp, qhp->wq.sq.qid); kfree(qhp); return 0; } static int disable_qp_db(int id, void *p, void *data) { struct c4iw_qp *qp = p; t4_disable_wq_db(&qp->wq); return 0; } struct ib_qp * c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_qp *qhp; struct c4iw_pd *php; struct c4iw_cq *schp; struct c4iw_cq *rchp; struct c4iw_create_qp_resp uresp; int sqsize, rqsize; struct c4iw_ucontext *ucontext; int ret; struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4; CTR2(KTR_IW_CXGBE, "%s ib_pd %p", __func__, pd); if (attrs->qp_type != IB_QPT_RC) return ERR_PTR(-EINVAL); php = to_c4iw_pd(pd); rhp = php->rhp; schp = get_chp(rhp, ((struct c4iw_cq *)attrs->send_cq)->cq.cqid); rchp = get_chp(rhp, ((struct c4iw_cq *)attrs->recv_cq)->cq.cqid); if (!schp || !rchp) return ERR_PTR(-EINVAL); if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE) return ERR_PTR(-EINVAL); rqsize = roundup(attrs->cap.max_recv_wr + 1, 16); if (rqsize > T4_MAX_RQ_SIZE) return ERR_PTR(-E2BIG); sqsize = roundup(attrs->cap.max_send_wr + 1, 16); if (sqsize > T4_MAX_SQ_SIZE) return ERR_PTR(-E2BIG); ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); if (!qhp) return ERR_PTR(-ENOMEM); qhp->wq.sq.size = sqsize; qhp->wq.sq.memsize = (sqsize + 1) * sizeof *qhp->wq.sq.queue; qhp->wq.rq.size = rqsize; qhp->wq.rq.memsize = (rqsize + 1) * sizeof *qhp->wq.rq.queue; if (ucontext) { qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE); qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE); } CTR5(KTR_IW_CXGBE, "%s sqsize %u sqmemsize %zu rqsize %u rqmemsize %zu", __func__, sqsize, qhp->wq.sq.memsize, rqsize, qhp->wq.rq.memsize); ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); if (ret) goto err1; attrs->cap.max_recv_wr = rqsize - 1; attrs->cap.max_send_wr = sqsize - 1; attrs->cap.max_inline_data = T4_MAX_SEND_INLINE; qhp->rhp = rhp; qhp->attr.pd = php->pdid; qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid; qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid; qhp->attr.sq_num_entries = attrs->cap.max_send_wr; qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; qhp->attr.sq_max_sges = attrs->cap.max_send_sge; qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge; qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; qhp->attr.state = C4IW_QP_STATE_IDLE; qhp->attr.next_state = C4IW_QP_STATE_IDLE; qhp->attr.enable_rdma_read = 1; qhp->attr.enable_rdma_write = 1; qhp->attr.enable_bind = 1; qhp->attr.max_ord = 1; qhp->attr.max_ird = 1; spin_lock_init(&qhp->lock); mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); atomic_set(&qhp->refcnt, 1); spin_lock_irq(&rhp->lock); if (rhp->db_state != NORMAL) t4_disable_wq_db(&qhp->wq); if (++rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) { rhp->rdev.stats.db_state_transitions++; rhp->db_state = FLOW_CONTROL; idr_for_each(&rhp->qpidr, disable_qp_db, NULL); } ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); spin_unlock_irq(&rhp->lock); if (ret) goto err2; if (udata) { mm1 = kmalloc(sizeof *mm1, GFP_KERNEL); if (!mm1) { ret = -ENOMEM; goto err3; } mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); if (!mm2) { ret = -ENOMEM; goto err4; } mm3 = kmalloc(sizeof *mm3, GFP_KERNEL); if (!mm3) { ret = -ENOMEM; goto err5; } mm4 = kmalloc(sizeof *mm4, GFP_KERNEL); if (!mm4) { ret = -ENOMEM; goto err6; } uresp.flags = 0; uresp.qid_mask = rhp->rdev.qpmask; uresp.sqid = qhp->wq.sq.qid; uresp.sq_size = qhp->wq.sq.size; uresp.sq_memsize = qhp->wq.sq.memsize; uresp.rqid = qhp->wq.rq.qid; uresp.rq_size = qhp->wq.rq.size; uresp.rq_memsize = qhp->wq.rq.memsize; spin_lock(&ucontext->mmap_lock); uresp.sq_key = ucontext->key; ucontext->key += PAGE_SIZE; uresp.rq_key = ucontext->key; ucontext->key += PAGE_SIZE; uresp.sq_db_gts_key = ucontext->key; ucontext->key += PAGE_SIZE; uresp.rq_db_gts_key = ucontext->key; ucontext->key += PAGE_SIZE; spin_unlock(&ucontext->mmap_lock); ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); if (ret) goto err7; mm1->key = uresp.sq_key; mm1->addr = qhp->wq.sq.phys_addr; mm1->len = PAGE_ALIGN(qhp->wq.sq.memsize); CTR4(KTR_IW_CXGBE, "%s mm1 %x, %x, %d", __func__, mm1->key, mm1->addr, mm1->len); insert_mmap(ucontext, mm1); mm2->key = uresp.rq_key; mm2->addr = vtophys(qhp->wq.rq.queue); mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); CTR4(KTR_IW_CXGBE, "%s mm2 %x, %x, %d", __func__, mm2->key, mm2->addr, mm2->len); insert_mmap(ucontext, mm2); mm3->key = uresp.sq_db_gts_key; mm3->addr = qhp->wq.sq.udb; mm3->len = PAGE_SIZE; CTR4(KTR_IW_CXGBE, "%s mm3 %x, %x, %d", __func__, mm3->key, mm3->addr, mm3->len); insert_mmap(ucontext, mm3); mm4->key = uresp.rq_db_gts_key; mm4->addr = qhp->wq.rq.udb; mm4->len = PAGE_SIZE; CTR4(KTR_IW_CXGBE, "%s mm4 %x, %x, %d", __func__, mm4->key, mm4->addr, mm4->len); insert_mmap(ucontext, mm4); } qhp->ibqp.qp_num = qhp->wq.sq.qid; init_timer(&(qhp->timer)); CTR5(KTR_IW_CXGBE, "%s qhp %p sq_num_entries %d, rq_num_entries %d qpid 0x%0x", __func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, qhp->wq.sq.qid); return &qhp->ibqp; err7: kfree(mm4); err6: kfree(mm3); err5: kfree(mm2); err4: kfree(mm1); err3: remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); err2: destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); err1: kfree(qhp); return ERR_PTR(ret); } int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_qp *qhp; enum c4iw_qp_attr_mask mask = 0; struct c4iw_qp_attributes attrs; CTR2(KTR_IW_CXGBE, "%s ib_qp %p", __func__, ibqp); /* iwarp does not support the RTR state */ if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) attr_mask &= ~IB_QP_STATE; /* Make sure we still have something left to do */ if (!attr_mask) return 0; memset(&attrs, 0, sizeof attrs); qhp = to_c4iw_qp(ibqp); rhp = qhp->rhp; attrs.next_state = c4iw_convert_state(attr->qp_state); attrs.enable_rdma_read = (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) ? 1 : 0; attrs.enable_rdma_write = (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0; mask |= (attr_mask & IB_QP_STATE) ? C4IW_QP_ATTR_NEXT_STATE : 0; mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ? (C4IW_QP_ATTR_ENABLE_RDMA_READ | C4IW_QP_ATTR_ENABLE_RDMA_WRITE | C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0; /* * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for * ringing the queue db when we're in DB_FULL mode. */ attrs.sq_db_inc = attr->sq_psn; attrs.rq_db_inc = attr->rq_psn; mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0; mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0; return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0); } struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn) { CTR3(KTR_IW_CXGBE, "%s ib_dev %p qpn 0x%x", __func__, dev, qpn); return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn); } int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr) { struct c4iw_qp *qhp = to_c4iw_qp(ibqp); memset(attr, 0, sizeof *attr); memset(init_attr, 0, sizeof *init_attr); attr->qp_state = to_ib_qp_state(qhp->attr.state); return 0; } #endif Index: stable/10/sys/modules/mlx4/Makefile =================================================================== --- stable/10/sys/modules/mlx4/Makefile (revision 271126) +++ stable/10/sys/modules/mlx4/Makefile (revision 271127) @@ -1,32 +1,33 @@ # $FreeBSD$ .PATH: ${.CURDIR}/../../ofed/drivers/net/mlx4 .include .PATH: ${.CURDIR}/../../ofed/include/linux KMOD = mlx4 SRCS = device_if.h bus_if.h pci_if.h vnode_if.h SRCS+= alloc.c catas.c cmd.c cq.c eq.c fw.c icm.c intf.c main.c mcg.c mr.c linux_compat.c linux_radix.c linux_idr.c SRCS+= pd.c port.c profile.c qp.c reset.c sense.c srq.c resource_tracker.c sys_tune.c CFLAGS+= -I${.CURDIR}/../../ofed/drivers/net/mlx4 CFLAGS+= -I${.CURDIR}/../../ofed/include/ .if !defined(KERNBUILDDIR) .if ${MK_INET_SUPPORT} != "no" opt_inet.h: @echo "#define INET 1" > ${.TARGET} .endif .if ${MK_INET6_SUPPORT} != "no" opt_inet6.h: @echo "#define INET6 1" > ${.TARGET} .endif .endif .include CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS} +CFLAGS+= -fms-extensions CWARNFLAGS.mcg.c= -Wno-unused CWARNFLAGS+= ${CWARNFLAGS.${.IMPSRC:T}} Index: stable/10/sys/modules/mlx4ib/Makefile =================================================================== --- stable/10/sys/modules/mlx4ib/Makefile (revision 271126) +++ stable/10/sys/modules/mlx4ib/Makefile (revision 271127) @@ -1,34 +1,35 @@ # $FreeBSD$ .PATH: ${.CURDIR}/../../ofed/drivers/infiniband/hw/mlx4 .PATH: ${.CURDIR}/../../ofed/include/linux .include KMOD = mlx4ib SRCS = device_if.h bus_if.h pci_if.h vnode_if.h SRCS+= linux_compat.c linux_radix.c linux_idr.c SRCS+= alias_GUID.c mcg.c sysfs.c ah.c cq.c doorbell.c mad.c main.c mr.c qp.c srq.c wc.c cm.c SRCS+= opt_inet.h opt_inet6.h #CFLAGS+= -I${.CURDIR}/../../ofed/include/ #CFLAGS+= -I${.CURDIR}/../../../../include CFLAGS+= -I${.CURDIR}/../../ofed/drivers/infiniband/hw/mlx4 CFLAGS+= -I${.CURDIR}/../../ofed/include/ CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM CFLAGS+= -DINET6 -DINET -DOFED +CFLAGS+= -fms-extensions .if !defined(KERNBUILDDIR) .if ${MK_INET_SUPPORT} != "no" opt_inet.h: @echo "#define INET 1" > ${.TARGET} .endif .if ${MK_INET6_SUPPORT} != "no" opt_inet6.h: @echo "#define INET6 1" > ${.TARGET} .endif .endif .include CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS} Index: stable/10/sys/modules/mlxen/Makefile =================================================================== --- stable/10/sys/modules/mlxen/Makefile (revision 271126) +++ stable/10/sys/modules/mlxen/Makefile (revision 271127) @@ -1,28 +1,29 @@ # $FreeBSD$ .PATH: ${.CURDIR}/../../ofed/drivers/net/mlx4 .include KMOD = mlxen SRCS = device_if.h bus_if.h pci_if.h vnode_if.h SRCS += en_cq.c en_frag.c en_main.c en_netdev.c en_port.c en_resources.c SRCS += en_rx.c en_tx.c SRCS += opt_inet.h opt_inet6.h CFLAGS+= -I${.CURDIR}/../../ofed/drivers/net/mlx4 CFLAGS+= -I${.CURDIR}/../../ofed/include/ +CFLAGS+= -fms-extensions .if !defined(KERNBUILDDIR) .if ${MK_INET_SUPPORT} != "no" opt_inet.h: @echo "#define INET 1" > ${.TARGET} .endif .if ${MK_INET6_SUPPORT} != "no" opt_inet6.h: @echo "#define INET6 1" > ${.TARGET} .endif .endif .include CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS} Index: stable/10/sys/ofed/drivers/infiniband/core/addr.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/core/addr.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/core/addr.c (revision 271127) @@ -1,645 +1,641 @@ /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include -#include -#include #include #include -#include -#include #include MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("IB Address Translation"); MODULE_LICENSE("Dual BSD/GPL"); struct addr_req { struct list_head list; struct sockaddr_storage src_addr; struct sockaddr_storage dst_addr; struct rdma_dev_addr *addr; struct rdma_addr_client *client; void *context; void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context); unsigned long timeout; int status; }; static void process_req(struct work_struct *work); static DEFINE_MUTEX(lock); static LIST_HEAD(req_list); static struct delayed_work work; static struct workqueue_struct *addr_wq; void rdma_addr_register_client(struct rdma_addr_client *client) { atomic_set(&client->refcount, 1); init_completion(&client->comp); } EXPORT_SYMBOL(rdma_addr_register_client); static inline void put_client(struct rdma_addr_client *client) { if (atomic_dec_and_test(&client->refcount)) complete(&client->comp); } void rdma_addr_unregister_client(struct rdma_addr_client *client) { put_client(client); wait_for_completion(&client->comp); } EXPORT_SYMBOL(rdma_addr_unregister_client); #ifdef __linux__ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, const unsigned char *dst_dev_addr) { dev_addr->dev_type = dev->type; memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); if (dst_dev_addr) memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); dev_addr->bound_dev_if = dev->ifindex; return 0; } #else int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev, const unsigned char *dst_dev_addr) { if (dev->if_type == IFT_INFINIBAND) dev_addr->dev_type = ARPHRD_INFINIBAND; else if (dev->if_type == IFT_ETHER) dev_addr->dev_type = ARPHRD_ETHER; else dev_addr->dev_type = 0; memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen); memcpy(dev_addr->broadcast, __DECONST(char *, dev->if_broadcastaddr), dev->if_addrlen); if (dst_dev_addr) memcpy(dev_addr->dst_dev_addr, dst_dev_addr, dev->if_addrlen); dev_addr->bound_dev_if = dev->if_index; return 0; } #endif EXPORT_SYMBOL(rdma_copy_addr); int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) { struct net_device *dev; int ret = -EADDRNOTAVAIL; if (dev_addr->bound_dev_if) { dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); if (!dev) return -ENODEV; ret = rdma_copy_addr(dev_addr, dev, NULL); dev_put(dev); return ret; } switch (addr->sa_family) { #ifdef INET case AF_INET: dev = ip_dev_find(NULL, ((struct sockaddr_in *) addr)->sin_addr.s_addr); if (!dev) return ret; ret = rdma_copy_addr(dev_addr, dev, NULL); dev_put(dev); break; #endif #if defined(INET6) case AF_INET6: #ifdef __linux__ read_lock(&dev_base_lock); for_each_netdev(&init_net, dev) { if (ipv6_chk_addr(&init_net, &((struct sockaddr_in6 *) addr)->sin6_addr, dev, 1)) { ret = rdma_copy_addr(dev_addr, dev, NULL); break; } } read_unlock(&dev_base_lock); #else { struct sockaddr_in6 *sin6; struct ifaddr *ifa; in_port_t port; sin6 = (struct sockaddr_in6 *)addr; port = sin6->sin6_port; sin6->sin6_port = 0; ifa = ifa_ifwithaddr(addr); sin6->sin6_port = port; if (ifa == NULL) { ret = -ENODEV; break; } ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL); ifa_free(ifa); break; } #endif break; #endif } return ret; } EXPORT_SYMBOL(rdma_translate_ip); static void set_timeout(unsigned long time) { unsigned long delay; cancel_delayed_work(&work); delay = time - jiffies; if ((long)delay <= 0) delay = 1; queue_delayed_work(addr_wq, &work, delay); } static void queue_req(struct addr_req *req) { struct addr_req *temp_req; mutex_lock(&lock); list_for_each_entry_reverse(temp_req, &req_list, list) { if (time_after_eq(req->timeout, temp_req->timeout)) break; } list_add(&req->list, &temp_req->list); if (req_list.next == &req->list) set_timeout(req->timeout); mutex_unlock(&lock); } #ifdef __linux__ static int addr4_resolve(struct sockaddr_in *src_in, struct sockaddr_in *dst_in, struct rdma_dev_addr *addr) { __be32 src_ip = src_in->sin_addr.s_addr; __be32 dst_ip = dst_in->sin_addr.s_addr; struct flowi fl; struct rtable *rt; struct neighbour *neigh; int ret; memset(&fl, 0, sizeof fl); fl.nl_u.ip4_u.daddr = dst_ip; fl.nl_u.ip4_u.saddr = src_ip; fl.oif = addr->bound_dev_if; ret = ip_route_output_key(&init_net, &rt, &fl); if (ret) goto out; src_in->sin_family = AF_INET; src_in->sin_addr.s_addr = rt->rt_src; if (rt->idev->dev->flags & IFF_LOOPBACK) { ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); if (!ret) memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); goto put; } /* If the device does ARP internally, return 'done' */ if (rt->idev->dev->flags & IFF_NOARP) { rdma_copy_addr(addr, rt->idev->dev, NULL); goto put; } neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); if (!neigh || !(neigh->nud_state & NUD_VALID)) { neigh_event_send(rt->u.dst.neighbour, NULL); ret = -ENODATA; if (neigh) goto release; goto put; } ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); release: neigh_release(neigh); put: ip_rt_put(rt); out: return ret; } #if defined(INET6) static int addr6_resolve(struct sockaddr_in6 *src_in, struct sockaddr_in6 *dst_in, struct rdma_dev_addr *addr) { struct flowi fl; struct neighbour *neigh; struct dst_entry *dst; int ret; memset(&fl, 0, sizeof fl); ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr); ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr); fl.oif = addr->bound_dev_if; dst = ip6_route_output(&init_net, NULL, &fl); if ((ret = dst->error)) goto put; if (ipv6_addr_any(&fl.fl6_src)) { ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, &fl.fl6_dst, 0, &fl.fl6_src); if (ret) goto put; src_in->sin6_family = AF_INET6; ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src); } if (dst->dev->flags & IFF_LOOPBACK) { ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); if (!ret) memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); goto put; } /* If the device does ARP internally, return 'done' */ if (dst->dev->flags & IFF_NOARP) { ret = rdma_copy_addr(addr, dst->dev, NULL); goto put; } neigh = dst->neighbour; if (!neigh || !(neigh->nud_state & NUD_VALID)) { neigh_event_send(dst->neighbour, NULL); ret = -ENODATA; goto put; } ret = rdma_copy_addr(addr, dst->dev, neigh->ha); put: dst_release(dst); return ret; } #else static int addr6_resolve(struct sockaddr_in6 *src_in, struct sockaddr_in6 *dst_in, struct rdma_dev_addr *addr) { return -EADDRNOTAVAIL; } #endif #else #include static int addr_resolve(struct sockaddr *src_in, struct sockaddr *dst_in, struct rdma_dev_addr *addr) { struct sockaddr_in *sin; struct sockaddr_in6 *sin6; struct ifaddr *ifa; struct ifnet *ifp; #if defined(INET) || defined(INET6) struct llentry *lle; #endif struct rtentry *rte; in_port_t port; u_char edst[MAX_ADDR_LEN]; int multi; int bcast; int error = 0; /* * Determine whether the address is unicast, multicast, or broadcast * and whether the source interface is valid. */ multi = 0; bcast = 0; sin = NULL; sin6 = NULL; ifp = NULL; rte = NULL; switch (dst_in->sa_family) { #ifdef INET case AF_INET: sin = (struct sockaddr_in *)dst_in; if (sin->sin_addr.s_addr == INADDR_BROADCAST) bcast = 1; if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) multi = 1; sin = (struct sockaddr_in *)src_in; if (sin->sin_addr.s_addr != INADDR_ANY) { /* * Address comparison fails if the port is set * cache it here to be restored later. */ port = sin->sin_port; sin->sin_port = 0; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); } else src_in = NULL; break; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)dst_in; if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) multi = 1; sin6 = (struct sockaddr_in6 *)src_in; if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { port = sin6->sin6_port; sin6->sin6_port = 0; } else src_in = NULL; break; #endif default: return -EINVAL; } /* * If we have a source address to use look it up first and verify * that it is a local interface. */ if (src_in) { ifa = ifa_ifwithaddr(src_in); if (sin) sin->sin_port = port; if (sin6) sin6->sin6_port = port; if (ifa == NULL) return -ENETUNREACH; ifp = ifa->ifa_ifp; ifa_free(ifa); if (bcast || multi) goto mcast; } /* * Make sure the route exists and has a valid link. */ rte = rtalloc1(dst_in, 1, 0); if (rte == NULL || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) { if (rte) RTFREE_LOCKED(rte); return -EHOSTUNREACH; } /* * If it's not multicast or broadcast and the route doesn't match the * requested interface return unreachable. Otherwise fetch the * correct interface pointer and unlock the route. */ if (multi || bcast) { if (ifp == NULL) ifp = rte->rt_ifp; RTFREE_LOCKED(rte); } else if (ifp && ifp != rte->rt_ifp) { RTFREE_LOCKED(rte); return -ENETUNREACH; } else { if (ifp == NULL) ifp = rte->rt_ifp; RT_UNLOCK(rte); } mcast: if (bcast) return rdma_copy_addr(addr, ifp, ifp->if_broadcastaddr); if (multi) { struct sockaddr *llsa; error = ifp->if_resolvemulti(ifp, &llsa, dst_in); if (error) return -error; error = rdma_copy_addr(addr, ifp, LLADDR((struct sockaddr_dl *)llsa)); free(llsa, M_IFMADDR); return error; } /* * Resolve the link local address. */ switch (dst_in->sa_family) { #ifdef INET case AF_INET: error = arpresolve(ifp, rte, NULL, dst_in, edst, &lle); break; #endif #ifdef INET6 case AF_INET6: error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst, &lle); break; #endif default: /* XXX: Shouldn't happen. */ error = -EINVAL; } RTFREE(rte); if (error == 0) return rdma_copy_addr(addr, ifp, edst); if (error == EWOULDBLOCK) return -ENODATA; return -error; } #endif static void process_req(struct work_struct *work) { struct addr_req *req, *temp_req; struct sockaddr *src_in, *dst_in; struct list_head done_list; INIT_LIST_HEAD(&done_list); mutex_lock(&lock); list_for_each_entry_safe(req, temp_req, &req_list, list) { if (req->status == -ENODATA) { src_in = (struct sockaddr *) &req->src_addr; dst_in = (struct sockaddr *) &req->dst_addr; req->status = addr_resolve(src_in, dst_in, req->addr); if (req->status && time_after_eq(jiffies, req->timeout)) req->status = -ETIMEDOUT; else if (req->status == -ENODATA) continue; } list_move_tail(&req->list, &done_list); } if (!list_empty(&req_list)) { req = list_entry(req_list.next, struct addr_req, list); set_timeout(req->timeout); } mutex_unlock(&lock); list_for_each_entry_safe(req, temp_req, &done_list, list) { list_del(&req->list); req->callback(req->status, (struct sockaddr *) &req->src_addr, req->addr, req->context); put_client(req->client); kfree(req); } } int rdma_resolve_ip(struct rdma_addr_client *client, struct sockaddr *src_addr, struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), void *context) { struct sockaddr *src_in, *dst_in; struct addr_req *req; int ret = 0; req = kzalloc(sizeof *req, GFP_KERNEL); if (!req) return -ENOMEM; src_in = (struct sockaddr *) &req->src_addr; dst_in = (struct sockaddr *) &req->dst_addr; if (src_addr) { if (src_addr->sa_family != dst_addr->sa_family) { ret = -EINVAL; goto err; } memcpy(src_in, src_addr, ip_addr_size(src_addr)); } else { src_in->sa_family = dst_addr->sa_family; } memcpy(dst_in, dst_addr, ip_addr_size(dst_addr)); req->addr = addr; req->callback = callback; req->context = context; req->client = client; atomic_inc(&client->refcount); req->status = addr_resolve(src_in, dst_in, addr); switch (req->status) { case 0: req->timeout = jiffies; queue_req(req); break; case -ENODATA: req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; queue_req(req); break; default: ret = req->status; atomic_dec(&client->refcount); goto err; } return ret; err: kfree(req); return ret; } EXPORT_SYMBOL(rdma_resolve_ip); void rdma_addr_cancel(struct rdma_dev_addr *addr) { struct addr_req *req, *temp_req; mutex_lock(&lock); list_for_each_entry_safe(req, temp_req, &req_list, list) { if (req->addr == addr) { req->status = -ECANCELED; req->timeout = jiffies; list_move(&req->list, &req_list); set_timeout(req->timeout); break; } } mutex_unlock(&lock); } EXPORT_SYMBOL(rdma_addr_cancel); static int netevent_callback(struct notifier_block *self, unsigned long event, void *ctx) { if (event == NETEVENT_NEIGH_UPDATE) { #ifdef __linux__ struct neighbour *neigh = ctx; if (neigh->nud_state & NUD_VALID) { set_timeout(jiffies); } #else set_timeout(jiffies); #endif } return 0; } static struct notifier_block nb = { .notifier_call = netevent_callback }; static int addr_init(void) { INIT_DELAYED_WORK(&work, process_req); addr_wq = create_singlethread_workqueue("ib_addr"); if (!addr_wq) return -ENOMEM; register_netevent_notifier(&nb); return 0; } static void addr_cleanup(void) { unregister_netevent_notifier(&nb); destroy_workqueue(addr_wq); } module_init(addr_init); module_exit(addr_cleanup); Index: stable/10/sys/ofed/drivers/infiniband/core/cm.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/core/cm.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/core/cm.c (revision 271127) @@ -1,3894 +1,3897 @@ /* * Copyright (c) 2004-2007 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include +#include +#include + #include #include #include "cm_msgs.h" MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("InfiniBand CM"); MODULE_LICENSE("Dual BSD/GPL"); #define PFX "ib_cm: " /* * Limit CM message timeouts to something reasonable: * 8 seconds per message, with up to 15 retries */ static int max_timeout = 21; module_param(max_timeout, int, 0644); MODULE_PARM_DESC(max_timeout, "Maximum IB CM per message timeout " "(default=21, or ~8 seconds)"); static void cm_add_one(struct ib_device *device); static void cm_remove_one(struct ib_device *device); static struct ib_client cm_client = { .name = "cm", .add = cm_add_one, .remove = cm_remove_one }; static struct ib_cm { spinlock_t lock; struct list_head device_list; rwlock_t device_lock; struct rb_root listen_service_table; u64 listen_service_id; /* struct rb_root peer_service_table; todo: fix peer to peer */ struct rb_root remote_qp_table; struct rb_root remote_id_table; struct rb_root remote_sidr_table; struct idr local_id_table; __be32 random_id_operand; struct list_head timewait_list; struct workqueue_struct *wq; } cm; /* Counter indexes ordered by attribute ID */ enum { CM_REQ_COUNTER, CM_MRA_COUNTER, CM_REJ_COUNTER, CM_REP_COUNTER, CM_RTU_COUNTER, CM_DREQ_COUNTER, CM_DREP_COUNTER, CM_SIDR_REQ_COUNTER, CM_SIDR_REP_COUNTER, CM_LAP_COUNTER, CM_APR_COUNTER, CM_ATTR_COUNT, CM_ATTR_ID_OFFSET = 0x0010, }; enum { CM_XMIT, CM_XMIT_RETRIES, CM_RECV, CM_RECV_DUPLICATES, CM_COUNTER_GROUPS }; static char const counter_group_names[CM_COUNTER_GROUPS] [sizeof("cm_rx_duplicates")] = { "cm_tx_msgs", "cm_tx_retries", "cm_rx_msgs", "cm_rx_duplicates" }; struct cm_counter_group { struct kobject obj; atomic_long_t counter[CM_ATTR_COUNT]; }; struct cm_counter_attribute { struct attribute attr; int index; }; #define CM_COUNTER_ATTR(_name, _index) \ struct cm_counter_attribute cm_##_name##_counter_attr = { \ .attr = { .name = __stringify(_name), .mode = 0444 }, \ .index = _index \ } static CM_COUNTER_ATTR(req, CM_REQ_COUNTER); static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER); static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER); static CM_COUNTER_ATTR(rep, CM_REP_COUNTER); static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER); static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER); static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER); static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER); static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER); static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER); static CM_COUNTER_ATTR(apr, CM_APR_COUNTER); static struct attribute *cm_counter_default_attrs[] = { &cm_req_counter_attr.attr, &cm_mra_counter_attr.attr, &cm_rej_counter_attr.attr, &cm_rep_counter_attr.attr, &cm_rtu_counter_attr.attr, &cm_dreq_counter_attr.attr, &cm_drep_counter_attr.attr, &cm_sidr_req_counter_attr.attr, &cm_sidr_rep_counter_attr.attr, &cm_lap_counter_attr.attr, &cm_apr_counter_attr.attr, NULL }; struct cm_port { struct cm_device *cm_dev; struct ib_mad_agent *mad_agent; struct kobject port_obj; u8 port_num; struct cm_counter_group counter_group[CM_COUNTER_GROUPS]; }; struct cm_device { struct list_head list; struct ib_device *ib_device; struct device *device; u8 ack_delay; struct cm_port *port[0]; }; struct cm_av { struct cm_port *port; union ib_gid dgid; struct ib_ah_attr ah_attr; u16 pkey_index; u8 timeout; }; struct cm_work { struct delayed_work work; struct list_head list; struct cm_port *port; struct ib_mad_recv_wc *mad_recv_wc; /* Received MADs */ __be32 local_id; /* Established / timewait */ __be32 remote_id; struct ib_cm_event cm_event; struct ib_sa_path_rec path[0]; }; struct cm_timewait_info { struct cm_work work; /* Must be first. */ struct list_head list; struct rb_node remote_qp_node; struct rb_node remote_id_node; __be64 remote_ca_guid; __be32 remote_qpn; u8 inserted_remote_qp; u8 inserted_remote_id; }; struct cm_id_private { struct ib_cm_id id; struct rb_node service_node; struct rb_node sidr_id_node; spinlock_t lock; /* Do not acquire inside cm.lock */ struct completion comp; atomic_t refcount; struct ib_mad_send_buf *msg; struct cm_timewait_info *timewait_info; /* todo: use alternate port on send failure */ struct cm_av av; struct cm_av alt_av; struct ib_cm_compare_data *compare_data; void *private_data; __be64 tid; __be32 local_qpn; __be32 remote_qpn; enum ib_qp_type qp_type; __be32 sq_psn; __be32 rq_psn; int timeout_ms; enum ib_mtu path_mtu; __be16 pkey; u8 private_data_len; u8 max_cm_retries; u8 peer_to_peer; u8 responder_resources; u8 initiator_depth; u8 retry_count; u8 rnr_retry_count; u8 service_timeout; u8 target_ack_delay; struct list_head work_list; atomic_t work_count; }; static void cm_work_handler(struct work_struct *work); static inline void cm_deref_id(struct cm_id_private *cm_id_priv) { if (atomic_dec_and_test(&cm_id_priv->refcount)) complete(&cm_id_priv->comp); } static int cm_alloc_msg(struct cm_id_private *cm_id_priv, struct ib_mad_send_buf **msg) { struct ib_mad_agent *mad_agent; struct ib_mad_send_buf *m; struct ib_ah *ah; mad_agent = cm_id_priv->av.port->mad_agent; ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr); if (IS_ERR(ah)) return PTR_ERR(ah); m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, cm_id_priv->av.pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC); if (IS_ERR(m)) { ib_destroy_ah(ah); return PTR_ERR(m); } /* Timeout set by caller if response is expected. */ m->ah = ah; m->retries = cm_id_priv->max_cm_retries; atomic_inc(&cm_id_priv->refcount); m->context[0] = cm_id_priv; *msg = m; return 0; } static int cm_alloc_response_msg(struct cm_port *port, struct ib_mad_recv_wc *mad_recv_wc, struct ib_mad_send_buf **msg) { struct ib_mad_send_buf *m; struct ib_ah *ah; ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc, mad_recv_wc->recv_buf.grh, port->port_num); if (IS_ERR(ah)) return PTR_ERR(ah); m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC); if (IS_ERR(m)) { ib_destroy_ah(ah); return PTR_ERR(m); } m->ah = ah; *msg = m; return 0; } static void cm_free_msg(struct ib_mad_send_buf *msg) { ib_destroy_ah(msg->ah); if (msg->context[0]) cm_deref_id(msg->context[0]); ib_free_send_mad(msg); } static void * cm_copy_private_data(const void *private_data, u8 private_data_len) { void *data; if (!private_data || !private_data_len) return NULL; data = kmemdup(private_data, private_data_len, GFP_KERNEL); if (!data) return ERR_PTR(-ENOMEM); return data; } static void cm_set_private_data(struct cm_id_private *cm_id_priv, void *private_data, u8 private_data_len) { if (cm_id_priv->private_data && cm_id_priv->private_data_len) kfree(cm_id_priv->private_data); cm_id_priv->private_data = private_data; cm_id_priv->private_data_len = private_data_len; } static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, struct ib_grh *grh, struct cm_av *av) { av->port = port; av->pkey_index = wc->pkey_index; ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc, grh, &av->ah_attr); } static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) { struct cm_device *cm_dev; struct cm_port *port = NULL; unsigned long flags; int ret; u8 p; read_lock_irqsave(&cm.device_lock, flags); list_for_each_entry(cm_dev, &cm.device_list, list) { if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, &p, NULL)) { port = cm_dev->port[p-1]; break; } } read_unlock_irqrestore(&cm.device_lock, flags); if (!port) return -EINVAL; ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num, be16_to_cpu(path->pkey), &av->pkey_index); if (ret) return ret; av->port = port; ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path, &av->ah_attr); av->timeout = path->packet_life_time + 1; return 0; } static int cm_alloc_id(struct cm_id_private *cm_id_priv) { unsigned long flags; int ret, id; static int next_id; do { spin_lock_irqsave(&cm.lock, flags); ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id, &id); if (!ret) next_id = ((unsigned) id + 1) & MAX_ID_MASK; spin_unlock_irqrestore(&cm.lock, flags); } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand; return ret; } static void cm_free_id(__be32 local_id) { spin_lock_irq(&cm.lock); idr_remove(&cm.local_id_table, (__force int) (local_id ^ cm.random_id_operand)); spin_unlock_irq(&cm.lock); } static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; cm_id_priv = idr_find(&cm.local_id_table, (__force int) (local_id ^ cm.random_id_operand)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); else cm_id_priv = NULL; } return cm_id_priv; } static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; spin_lock_irq(&cm.lock); cm_id_priv = cm_get_id(local_id, remote_id); spin_unlock_irq(&cm.lock); return cm_id_priv; } static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask) { int i; for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++) ((unsigned long *) dst)[i] = ((unsigned long *) src)[i] & ((unsigned long *) mask)[i]; } static int cm_compare_data(struct ib_cm_compare_data *src_data, struct ib_cm_compare_data *dst_data) { u8 src[IB_CM_COMPARE_SIZE]; u8 dst[IB_CM_COMPARE_SIZE]; if (!src_data || !dst_data) return 0; cm_mask_copy(src, src_data->data, dst_data->mask); cm_mask_copy(dst, dst_data->data, src_data->mask); return memcmp(src, dst, IB_CM_COMPARE_SIZE); } static int cm_compare_private_data(u8 *private_data, struct ib_cm_compare_data *dst_data) { u8 src[IB_CM_COMPARE_SIZE]; if (!dst_data) return 0; cm_mask_copy(src, private_data, dst_data->mask); return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE); } /* * Trivial helpers to strip endian annotation and compare; the * endianness doesn't actually matter since we just need a stable * order for the RB tree. */ static int be32_lt(__be32 a, __be32 b) { return (__force u32) a < (__force u32) b; } static int be32_gt(__be32 a, __be32 b) { return (__force u32) a > (__force u32) b; } static int be64_lt(__be64 a, __be64 b) { return (__force u64) a < (__force u64) b; } static int be64_gt(__be64 a, __be64 b) { return (__force u64) a > (__force u64) b; } static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) { struct rb_node **link = &cm.listen_service_table.rb_node; struct rb_node *parent = NULL; struct cm_id_private *cur_cm_id_priv; __be64 service_id = cm_id_priv->id.service_id; __be64 service_mask = cm_id_priv->id.service_mask; int data_cmp; while (*link) { parent = *link; cur_cm_id_priv = rb_entry(parent, struct cm_id_private, service_node); data_cmp = cm_compare_data(cm_id_priv->compare_data, cur_cm_id_priv->compare_data); if ((cur_cm_id_priv->id.service_mask & service_id) == (service_mask & cur_cm_id_priv->id.service_id) && (cm_id_priv->id.device == cur_cm_id_priv->id.device) && !data_cmp) return cur_cm_id_priv; if (cm_id_priv->id.device < cur_cm_id_priv->id.device) link = &(*link)->rb_left; else if (cm_id_priv->id.device > cur_cm_id_priv->id.device) link = &(*link)->rb_right; else if (be64_lt(service_id, cur_cm_id_priv->id.service_id)) link = &(*link)->rb_left; else if (be64_gt(service_id, cur_cm_id_priv->id.service_id)) link = &(*link)->rb_right; else if (data_cmp < 0) link = &(*link)->rb_left; else link = &(*link)->rb_right; } rb_link_node(&cm_id_priv->service_node, parent, link); rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table); return NULL; } static struct cm_id_private * cm_find_listen(struct ib_device *device, __be64 service_id, u8 *private_data) { struct rb_node *node = cm.listen_service_table.rb_node; struct cm_id_private *cm_id_priv; int data_cmp; while (node) { cm_id_priv = rb_entry(node, struct cm_id_private, service_node); data_cmp = cm_compare_private_data(private_data, cm_id_priv->compare_data); if ((cm_id_priv->id.service_mask & service_id) == cm_id_priv->id.service_id && (cm_id_priv->id.device == device) && !data_cmp) return cm_id_priv; if (device < cm_id_priv->id.device) node = node->rb_left; else if (device > cm_id_priv->id.device) node = node->rb_right; else if (be64_lt(service_id, cm_id_priv->id.service_id)) node = node->rb_left; else if (be64_gt(service_id, cm_id_priv->id.service_id)) node = node->rb_right; else if (data_cmp < 0) node = node->rb_left; else node = node->rb_right; } return NULL; } static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info *timewait_info) { struct rb_node **link = &cm.remote_id_table.rb_node; struct rb_node *parent = NULL; struct cm_timewait_info *cur_timewait_info; __be64 remote_ca_guid = timewait_info->remote_ca_guid; __be32 remote_id = timewait_info->work.remote_id; while (*link) { parent = *link; cur_timewait_info = rb_entry(parent, struct cm_timewait_info, remote_id_node); if (be32_lt(remote_id, cur_timewait_info->work.remote_id)) link = &(*link)->rb_left; else if (be32_gt(remote_id, cur_timewait_info->work.remote_id)) link = &(*link)->rb_right; else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) link = &(*link)->rb_left; else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) link = &(*link)->rb_right; else return cur_timewait_info; } timewait_info->inserted_remote_id = 1; rb_link_node(&timewait_info->remote_id_node, parent, link); rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table); return NULL; } static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid, __be32 remote_id) { struct rb_node *node = cm.remote_id_table.rb_node; struct cm_timewait_info *timewait_info; while (node) { timewait_info = rb_entry(node, struct cm_timewait_info, remote_id_node); if (be32_lt(remote_id, timewait_info->work.remote_id)) node = node->rb_left; else if (be32_gt(remote_id, timewait_info->work.remote_id)) node = node->rb_right; else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid)) node = node->rb_left; else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid)) node = node->rb_right; else return timewait_info; } return NULL; } static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info *timewait_info) { struct rb_node **link = &cm.remote_qp_table.rb_node; struct rb_node *parent = NULL; struct cm_timewait_info *cur_timewait_info; __be64 remote_ca_guid = timewait_info->remote_ca_guid; __be32 remote_qpn = timewait_info->remote_qpn; while (*link) { parent = *link; cur_timewait_info = rb_entry(parent, struct cm_timewait_info, remote_qp_node); if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn)) link = &(*link)->rb_left; else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn)) link = &(*link)->rb_right; else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) link = &(*link)->rb_left; else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) link = &(*link)->rb_right; else return cur_timewait_info; } timewait_info->inserted_remote_qp = 1; rb_link_node(&timewait_info->remote_qp_node, parent, link); rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table); return NULL; } static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private *cm_id_priv) { struct rb_node **link = &cm.remote_sidr_table.rb_node; struct rb_node *parent = NULL; struct cm_id_private *cur_cm_id_priv; union ib_gid *port_gid = &cm_id_priv->av.dgid; __be32 remote_id = cm_id_priv->id.remote_id; while (*link) { parent = *link; cur_cm_id_priv = rb_entry(parent, struct cm_id_private, sidr_id_node); if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id)) link = &(*link)->rb_left; else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id)) link = &(*link)->rb_right; else { int cmp; cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid, sizeof *port_gid); if (cmp < 0) link = &(*link)->rb_left; else if (cmp > 0) link = &(*link)->rb_right; else return cur_cm_id_priv; } } rb_link_node(&cm_id_priv->sidr_id_node, parent, link); rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); return NULL; } static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv, enum ib_cm_sidr_status status) { struct ib_cm_sidr_rep_param param; memset(¶m, 0, sizeof param); param.status = status; ib_send_cm_sidr_rep(&cm_id_priv->id, ¶m); } struct ib_cm_id *ib_create_cm_id(struct ib_device *device, ib_cm_handler cm_handler, void *context) { struct cm_id_private *cm_id_priv; int ret; cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL); if (!cm_id_priv) return ERR_PTR(-ENOMEM); cm_id_priv->id.state = IB_CM_IDLE; cm_id_priv->id.device = device; cm_id_priv->id.cm_handler = cm_handler; cm_id_priv->id.context = context; cm_id_priv->id.remote_cm_qpn = 1; ret = cm_alloc_id(cm_id_priv); if (ret) goto error; spin_lock_init(&cm_id_priv->lock); init_completion(&cm_id_priv->comp); INIT_LIST_HEAD(&cm_id_priv->work_list); atomic_set(&cm_id_priv->work_count, -1); atomic_set(&cm_id_priv->refcount, 1); return &cm_id_priv->id; error: kfree(cm_id_priv); return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL(ib_create_cm_id); static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv) { struct cm_work *work; if (list_empty(&cm_id_priv->work_list)) return NULL; work = list_entry(cm_id_priv->work_list.next, struct cm_work, list); list_del(&work->list); return work; } static void cm_free_work(struct cm_work *work) { if (work->mad_recv_wc) ib_free_recv_mad(work->mad_recv_wc); kfree(work); } static inline int cm_convert_to_ms(int iba_time) { /* approximate conversion to ms from 4.096us x 2^iba_time */ return 1 << max(iba_time - 8, 0); } /* * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time * Because of how ack_timeout is stored, adding one doubles the timeout. * To avoid large timeouts, select the max(ack_delay, life_time + 1), and * increment it (round up) only if the other is within 50%. */ static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time) { int ack_timeout = packet_life_time + 1; if (ack_timeout >= ca_ack_delay) ack_timeout += (ca_ack_delay >= (ack_timeout - 1)); else ack_timeout = ca_ack_delay + (ack_timeout >= (ca_ack_delay - 1)); return min(31, ack_timeout); } static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) { if (timewait_info->inserted_remote_id) { rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table); timewait_info->inserted_remote_id = 0; } if (timewait_info->inserted_remote_qp) { rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table); timewait_info->inserted_remote_qp = 0; } } static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) { struct cm_timewait_info *timewait_info; timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL); if (!timewait_info) return ERR_PTR(-ENOMEM); timewait_info->work.local_id = local_id; INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler); timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT; return timewait_info; } static void cm_enter_timewait(struct cm_id_private *cm_id_priv) { int wait_time; unsigned long flags; spin_lock_irqsave(&cm.lock, flags); cm_cleanup_timewait(cm_id_priv->timewait_info); list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list); spin_unlock_irqrestore(&cm.lock, flags); /* * The cm_id could be destroyed by the user before we exit timewait. * To protect against this, we search for the cm_id after exiting * timewait before notifying the user that we've exited timewait. */ cm_id_priv->id.state = IB_CM_TIMEWAIT; wait_time = cm_convert_to_ms(cm_id_priv->av.timeout); queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, msecs_to_jiffies(wait_time)); cm_id_priv->timewait_info = NULL; } static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) { unsigned long flags; cm_id_priv->id.state = IB_CM_IDLE; if (cm_id_priv->timewait_info) { spin_lock_irqsave(&cm.lock, flags); cm_cleanup_timewait(cm_id_priv->timewait_info); spin_unlock_irqrestore(&cm.lock, flags); kfree(cm_id_priv->timewait_info); cm_id_priv->timewait_info = NULL; } } static void cm_destroy_id(struct ib_cm_id *cm_id, int err) { struct cm_id_private *cm_id_priv; struct cm_work *work; cm_id_priv = container_of(cm_id, struct cm_id_private, id); retest: spin_lock_irq(&cm_id_priv->lock); switch (cm_id->state) { case IB_CM_LISTEN: cm_id->state = IB_CM_IDLE; spin_unlock_irq(&cm_id_priv->lock); spin_lock_irq(&cm.lock); rb_erase(&cm_id_priv->service_node, &cm.listen_service_table); spin_unlock_irq(&cm.lock); break; case IB_CM_SIDR_REQ_SENT: cm_id->state = IB_CM_IDLE; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irq(&cm_id_priv->lock); break; case IB_CM_SIDR_REQ_RCVD: spin_unlock_irq(&cm_id_priv->lock); cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); break; case IB_CM_REQ_SENT: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, &cm_id_priv->id.device->node_guid, sizeof cm_id_priv->id.device->node_guid, NULL, 0); break; case IB_CM_REQ_RCVD: if (err == -ENOMEM) { /* Do not reject to allow future retries. */ cm_reset_to_idle(cm_id_priv); spin_unlock_irq(&cm_id_priv->lock); } else { spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); } break; case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* Fall through */ case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); break; case IB_CM_ESTABLISHED: spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_dreq(cm_id, NULL, 0); goto retest; case IB_CM_DREQ_SENT: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); cm_enter_timewait(cm_id_priv); spin_unlock_irq(&cm_id_priv->lock); break; case IB_CM_DREQ_RCVD: spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_drep(cm_id, NULL, 0); break; default: spin_unlock_irq(&cm_id_priv->lock); break; } cm_free_id(cm_id->local_id); cm_deref_id(cm_id_priv); wait_for_completion(&cm_id_priv->comp); while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); kfree(cm_id_priv->compare_data); kfree(cm_id_priv->private_data); kfree(cm_id_priv); } void ib_destroy_cm_id(struct ib_cm_id *cm_id) { cm_destroy_id(cm_id, 0); } EXPORT_SYMBOL(ib_destroy_cm_id); int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask, struct ib_cm_compare_data *compare_data) { struct cm_id_private *cm_id_priv, *cur_cm_id_priv; unsigned long flags; int ret = 0; service_mask = service_mask ? service_mask : ~cpu_to_be64(0); service_id &= service_mask; if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID && (service_id != IB_CM_ASSIGN_SERVICE_ID)) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); if (cm_id->state != IB_CM_IDLE) return -EINVAL; if (compare_data) { cm_id_priv->compare_data = kzalloc(sizeof *compare_data, GFP_KERNEL); if (!cm_id_priv->compare_data) return -ENOMEM; cm_mask_copy(cm_id_priv->compare_data->data, compare_data->data, compare_data->mask); memcpy(cm_id_priv->compare_data->mask, compare_data->mask, IB_CM_COMPARE_SIZE); } cm_id->state = IB_CM_LISTEN; spin_lock_irqsave(&cm.lock, flags); if (service_id == IB_CM_ASSIGN_SERVICE_ID) { cm_id->service_id = cpu_to_be64(cm.listen_service_id++); cm_id->service_mask = ~cpu_to_be64(0); } else { cm_id->service_id = service_id; cm_id->service_mask = service_mask; } cur_cm_id_priv = cm_insert_listen(cm_id_priv); spin_unlock_irqrestore(&cm.lock, flags); if (cur_cm_id_priv) { cm_id->state = IB_CM_IDLE; kfree(cm_id_priv->compare_data); cm_id_priv->compare_data = NULL; ret = -EBUSY; } return ret; } EXPORT_SYMBOL(ib_cm_listen); static __be64 cm_form_tid(struct cm_id_private *cm_id_priv, enum cm_msg_sequence msg_seq) { u64 hi_tid, low_tid; hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32; low_tid = (u64) ((__force u32)cm_id_priv->id.local_id | (msg_seq << 30)); return cpu_to_be64(hi_tid | low_tid); } static void cm_format_mad_hdr(struct ib_mad_hdr *hdr, __be16 attr_id, __be64 tid) { hdr->base_version = IB_MGMT_BASE_VERSION; hdr->mgmt_class = IB_MGMT_CLASS_CM; hdr->class_version = IB_CM_CLASS_VERSION; hdr->method = IB_MGMT_METHOD_SEND; hdr->attr_id = attr_id; hdr->tid = tid; } static void cm_format_req(struct cm_req_msg *req_msg, struct cm_id_private *cm_id_priv, struct ib_cm_req_param *param) { struct ib_sa_path_rec *pri_path = param->primary_path; struct ib_sa_path_rec *alt_path = param->alternate_path; cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ)); req_msg->local_comm_id = cm_id_priv->id.local_id; req_msg->service_id = param->service_id; req_msg->local_ca_guid = cm_id_priv->id.device->node_guid; cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num)); cm_req_set_resp_res(req_msg, param->responder_resources); cm_req_set_init_depth(req_msg, param->initiator_depth); cm_req_set_remote_resp_timeout(req_msg, param->remote_cm_response_timeout); if (param->remote_cm_response_timeout > (u8) max_timeout) { printk(KERN_WARNING PFX "req remote_cm_response_timeout %d > " "%d, decreasing\n", param->remote_cm_response_timeout, max_timeout); cm_req_set_remote_resp_timeout(req_msg, (u8) max_timeout); } cm_req_set_qp_type(req_msg, param->qp_type); cm_req_set_flow_ctrl(req_msg, param->flow_control); cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn)); cm_req_set_local_resp_timeout(req_msg, param->local_cm_response_timeout); if (param->local_cm_response_timeout > (u8) max_timeout) { printk(KERN_WARNING PFX "req local_cm_response_timeout %d > " "%d, decreasing\n", param->local_cm_response_timeout, max_timeout); cm_req_set_local_resp_timeout(req_msg, (u8) max_timeout); } cm_req_set_retry_count(req_msg, param->retry_count); req_msg->pkey = param->primary_path->pkey; cm_req_set_path_mtu(req_msg, param->primary_path->mtu); cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count); cm_req_set_max_cm_retries(req_msg, param->max_cm_retries); cm_req_set_srq(req_msg, param->srq); if (pri_path->hop_limit <= 1) { req_msg->primary_local_lid = pri_path->slid; req_msg->primary_remote_lid = pri_path->dlid; } else { /* Work-around until there's a way to obtain remote LID info */ req_msg->primary_local_lid = IB_LID_PERMISSIVE; req_msg->primary_remote_lid = IB_LID_PERMISSIVE; } req_msg->primary_local_gid = pri_path->sgid; req_msg->primary_remote_gid = pri_path->dgid; cm_req_set_primary_flow_label(req_msg, pri_path->flow_label); cm_req_set_primary_packet_rate(req_msg, pri_path->rate); req_msg->primary_traffic_class = pri_path->traffic_class; req_msg->primary_hop_limit = pri_path->hop_limit; cm_req_set_primary_sl(req_msg, pri_path->sl); cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1)); cm_req_set_primary_local_ack_timeout(req_msg, cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, pri_path->packet_life_time)); if (alt_path) { if (alt_path->hop_limit <= 1) { req_msg->alt_local_lid = alt_path->slid; req_msg->alt_remote_lid = alt_path->dlid; } else { req_msg->alt_local_lid = IB_LID_PERMISSIVE; req_msg->alt_remote_lid = IB_LID_PERMISSIVE; } req_msg->alt_local_gid = alt_path->sgid; req_msg->alt_remote_gid = alt_path->dgid; cm_req_set_alt_flow_label(req_msg, alt_path->flow_label); cm_req_set_alt_packet_rate(req_msg, alt_path->rate); req_msg->alt_traffic_class = alt_path->traffic_class; req_msg->alt_hop_limit = alt_path->hop_limit; cm_req_set_alt_sl(req_msg, alt_path->sl); cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1)); cm_req_set_alt_local_ack_timeout(req_msg, cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, alt_path->packet_life_time)); } if (param->private_data && param->private_data_len) memcpy(req_msg->private_data, param->private_data, param->private_data_len); } static int cm_validate_req_param(struct ib_cm_req_param *param) { /* peer-to-peer not supported */ if (param->peer_to_peer) return -EINVAL; if (!param->primary_path) return -EINVAL; if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC) return -EINVAL; if (param->private_data && param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE) return -EINVAL; if (param->alternate_path && (param->alternate_path->pkey != param->primary_path->pkey || param->alternate_path->mtu != param->primary_path->mtu)) return -EINVAL; return 0; } int ib_send_cm_req(struct ib_cm_id *cm_id, struct ib_cm_req_param *param) { struct cm_id_private *cm_id_priv; struct cm_req_msg *req_msg; unsigned long flags; int ret; ret = cm_validate_req_param(param); if (ret) return ret; /* Verify that we're not in timewait. */ cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_IDLE) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = -EINVAL; goto out; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> id.local_id); if (IS_ERR(cm_id_priv->timewait_info)) { ret = PTR_ERR(cm_id_priv->timewait_info); goto out; } ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av); if (ret) goto error1; if (param->alternate_path) { ret = cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av); if (ret) goto error1; } cm_id->service_id = param->service_id; cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = cm_convert_to_ms( param->primary_path->packet_life_time) * 2 + cm_convert_to_ms( param->remote_cm_response_timeout); if (cm_id_priv->timeout_ms > cm_convert_to_ms(max_timeout)) { printk(KERN_WARNING PFX "req timeout_ms %d > %d, decreasing\n", cm_id_priv->timeout_ms, cm_convert_to_ms(max_timeout)); cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout); } cm_id_priv->max_cm_retries = param->max_cm_retries; cm_id_priv->initiator_depth = param->initiator_depth; cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->retry_count = param->retry_count; cm_id_priv->path_mtu = param->primary_path->mtu; cm_id_priv->pkey = param->primary_path->pkey; cm_id_priv->qp_type = param->qp_type; ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); if (ret) goto error1; req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad; cm_format_req(req_msg, cm_id_priv, param); cm_id_priv->tid = req_msg->hdr.tid; cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms; cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT; cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg); cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg); spin_lock_irqsave(&cm_id_priv->lock, flags); ret = ib_post_send_mad(cm_id_priv->msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); goto error2; } BUG_ON(cm_id->state != IB_CM_IDLE); cm_id->state = IB_CM_REQ_SENT; spin_unlock_irqrestore(&cm_id_priv->lock, flags); return 0; error2: cm_free_msg(cm_id_priv->msg); error1: kfree(cm_id_priv->timewait_info); out: return ret; } EXPORT_SYMBOL(ib_send_cm_req); static int cm_issue_rej(struct cm_port *port, struct ib_mad_recv_wc *mad_recv_wc, enum ib_cm_rej_reason reason, enum cm_msg_response msg_rejected, void *ari, u8 ari_length) { struct ib_mad_send_buf *msg = NULL; struct cm_rej_msg *rej_msg, *rcv_msg; int ret; ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); if (ret) return ret; /* We just need common CM header information. Cast to any message. */ rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad; rej_msg = (struct cm_rej_msg *) msg->mad; cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid); rej_msg->remote_comm_id = rcv_msg->local_comm_id; rej_msg->local_comm_id = rcv_msg->remote_comm_id; cm_rej_set_msg_rejected(rej_msg, msg_rejected); rej_msg->reason = cpu_to_be16(reason); if (ari && ari_length) { cm_rej_set_reject_info_len(rej_msg, ari_length); memcpy(rej_msg->ari, ari, ari_length); } ret = ib_post_send_mad(msg, NULL); if (ret) cm_free_msg(msg); return ret; } static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid, __be32 local_qpn, __be32 remote_qpn) { return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) || ((local_ca_guid == remote_ca_guid) && (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn)))); } static void cm_format_paths_from_req(struct cm_req_msg *req_msg, struct ib_sa_path_rec *primary_path, struct ib_sa_path_rec *alt_path) { memset(primary_path, 0, sizeof *primary_path); primary_path->dgid = req_msg->primary_local_gid; primary_path->sgid = req_msg->primary_remote_gid; primary_path->dlid = req_msg->primary_local_lid; primary_path->slid = req_msg->primary_remote_lid; primary_path->flow_label = cm_req_get_primary_flow_label(req_msg); primary_path->hop_limit = req_msg->primary_hop_limit; primary_path->traffic_class = req_msg->primary_traffic_class; primary_path->reversible = 1; primary_path->pkey = req_msg->pkey; primary_path->sl = cm_req_get_primary_sl(req_msg); primary_path->mtu_selector = IB_SA_EQ; primary_path->mtu = cm_req_get_path_mtu(req_msg); primary_path->rate_selector = IB_SA_EQ; primary_path->rate = cm_req_get_primary_packet_rate(req_msg); primary_path->packet_life_time_selector = IB_SA_EQ; primary_path->packet_life_time = cm_req_get_primary_local_ack_timeout(req_msg); primary_path->packet_life_time -= (primary_path->packet_life_time > 0); if (req_msg->alt_local_lid) { memset(alt_path, 0, sizeof *alt_path); alt_path->dgid = req_msg->alt_local_gid; alt_path->sgid = req_msg->alt_remote_gid; alt_path->dlid = req_msg->alt_local_lid; alt_path->slid = req_msg->alt_remote_lid; alt_path->flow_label = cm_req_get_alt_flow_label(req_msg); alt_path->hop_limit = req_msg->alt_hop_limit; alt_path->traffic_class = req_msg->alt_traffic_class; alt_path->reversible = 1; alt_path->pkey = req_msg->pkey; alt_path->sl = cm_req_get_alt_sl(req_msg); alt_path->mtu_selector = IB_SA_EQ; alt_path->mtu = cm_req_get_path_mtu(req_msg); alt_path->rate_selector = IB_SA_EQ; alt_path->rate = cm_req_get_alt_packet_rate(req_msg); alt_path->packet_life_time_selector = IB_SA_EQ; alt_path->packet_life_time = cm_req_get_alt_local_ack_timeout(req_msg); alt_path->packet_life_time -= (alt_path->packet_life_time > 0); } } static void cm_format_req_event(struct cm_work *work, struct cm_id_private *cm_id_priv, struct ib_cm_id *listen_id) { struct cm_req_msg *req_msg; struct ib_cm_req_event_param *param; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.req_rcvd; param->listen_id = listen_id; param->port = cm_id_priv->av.port->port_num; param->primary_path = &work->path[0]; if (req_msg->alt_local_lid) param->alternate_path = &work->path[1]; else param->alternate_path = NULL; param->remote_ca_guid = req_msg->local_ca_guid; param->remote_qkey = be32_to_cpu(req_msg->local_qkey); param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg)); param->qp_type = cm_req_get_qp_type(req_msg); param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg)); param->responder_resources = cm_req_get_init_depth(req_msg); param->initiator_depth = cm_req_get_resp_res(req_msg); param->local_cm_response_timeout = cm_req_get_remote_resp_timeout(req_msg); param->flow_control = cm_req_get_flow_ctrl(req_msg); param->remote_cm_response_timeout = cm_req_get_local_resp_timeout(req_msg); param->retry_count = cm_req_get_retry_count(req_msg); param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); param->srq = cm_req_get_srq(req_msg); work->cm_event.private_data = &req_msg->private_data; } static void cm_process_work(struct cm_id_private *cm_id_priv, struct cm_work *work) { int ret; /* We will typically only have the current event to report. */ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event); cm_free_work(work); while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) { spin_lock_irq(&cm_id_priv->lock); work = cm_dequeue_work(cm_id_priv); spin_unlock_irq(&cm_id_priv->lock); BUG_ON(!work); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event); cm_free_work(work); } cm_deref_id(cm_id_priv); if (ret) cm_destroy_id(&cm_id_priv->id, ret); } static void cm_format_mra(struct cm_mra_msg *mra_msg, struct cm_id_private *cm_id_priv, enum cm_msg_response msg_mraed, u8 service_timeout, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid); cm_mra_set_msg_mraed(mra_msg, msg_mraed); mra_msg->local_comm_id = cm_id_priv->id.local_id; mra_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_mra_set_service_timeout(mra_msg, service_timeout); if (private_data && private_data_len) memcpy(mra_msg->private_data, private_data, private_data_len); } static void cm_format_rej(struct cm_rej_msg *rej_msg, struct cm_id_private *cm_id_priv, enum ib_cm_rej_reason reason, void *ari, u8 ari_length, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid); rej_msg->remote_comm_id = cm_id_priv->id.remote_id; switch(cm_id_priv->id.state) { case IB_CM_REQ_RCVD: rej_msg->local_comm_id = 0; cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ); break; case IB_CM_MRA_REQ_SENT: rej_msg->local_comm_id = cm_id_priv->id.local_id; cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ); break; case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: rej_msg->local_comm_id = cm_id_priv->id.local_id; cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP); break; default: rej_msg->local_comm_id = cm_id_priv->id.local_id; cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER); break; } rej_msg->reason = cpu_to_be16(reason); if (ari && ari_length) { cm_rej_set_reject_info_len(rej_msg, ari_length); memcpy(rej_msg->ari, ari, ari_length); } if (private_data && private_data_len) memcpy(rej_msg->private_data, private_data, private_data_len); } static void cm_dup_req_handler(struct cm_work *work, struct cm_id_private *cm_id_priv) { struct ib_mad_send_buf *msg = NULL; int ret; atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_REQ_COUNTER]); /* Quick state check to discard duplicate REQs. */ if (cm_id_priv->id.state == IB_CM_REQ_RCVD) return; ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); if (ret) return; spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_MRA_REQ_SENT: cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout, cm_id_priv->private_data, cm_id_priv->private_data_len); break; case IB_CM_TIMEWAIT: cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv, IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0); break; default: goto unlock; } spin_unlock_irq(&cm_id_priv->lock); ret = ib_post_send_mad(msg, NULL); if (ret) goto free; return; unlock: spin_unlock_irq(&cm_id_priv->lock); free: cm_free_msg(msg); } static struct cm_id_private * cm_match_req(struct cm_work *work, struct cm_id_private *cm_id_priv) { struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv; struct cm_timewait_info *timewait_info; struct cm_req_msg *req_msg; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; /* Check for possible duplicate REQ. */ spin_lock_irq(&cm.lock); timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info); if (timewait_info) { cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, timewait_info->work.remote_id); spin_unlock_irq(&cm.lock); if (cur_cm_id_priv) { cm_dup_req_handler(work, cur_cm_id_priv); cm_deref_id(cur_cm_id_priv); } return NULL; } /* Check for stale connections. */ timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info); if (timewait_info) { cm_cleanup_timewait(cm_id_priv->timewait_info); spin_unlock_irq(&cm.lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, NULL, 0); return NULL; } /* Find matching listen request. */ listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device, req_msg->service_id, req_msg->private_data); if (!listen_cm_id_priv) { cm_cleanup_timewait(cm_id_priv->timewait_info); spin_unlock_irq(&cm.lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ, NULL, 0); goto out; } atomic_inc(&listen_cm_id_priv->refcount); atomic_inc(&cm_id_priv->refcount); cm_id_priv->id.state = IB_CM_REQ_RCVD; atomic_inc(&cm_id_priv->work_count); spin_unlock_irq(&cm.lock); out: return listen_cm_id_priv; } /* * Work-around for inter-subnet connections. If the LIDs are permissive, * we need to override the LID/SL data in the REQ with the LID information * in the work completion. */ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) { if (!cm_req_get_primary_subnet_local(req_msg)) { if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) { req_msg->primary_local_lid = cpu_to_be16(wc->slid); cm_req_set_primary_sl(req_msg, wc->sl); } if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE) req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits); } if (!cm_req_get_alt_subnet_local(req_msg)) { if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) { req_msg->alt_local_lid = cpu_to_be16(wc->slid); cm_req_set_alt_sl(req_msg, wc->sl); } if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE) req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits); } } static int cm_req_handler(struct cm_work *work) { struct ib_cm_id *cm_id; struct cm_id_private *cm_id_priv, *listen_cm_id_priv; struct cm_req_msg *req_msg; int ret; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); cm_id_priv = container_of(cm_id, struct cm_id_private, id); cm_id_priv->id.remote_id = req_msg->local_comm_id; cm_init_av_for_response(work->port, work->mad_recv_wc->wc, work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> id.local_id); if (IS_ERR(cm_id_priv->timewait_info)) { ret = PTR_ERR(cm_id_priv->timewait_info); goto destroy; } cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id; cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid; cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg); listen_cm_id_priv = cm_match_req(work, cm_id_priv); if (!listen_cm_id_priv) { ret = -EINVAL; kfree(cm_id_priv->timewait_info); goto destroy; } cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; cm_id_priv->id.context = listen_cm_id_priv->id.context; cm_id_priv->id.service_id = req_msg->service_id; cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_process_routed_req(req_msg, work->mad_recv_wc->wc); cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); if (ret) { ib_get_cached_gid(work->port->cm_dev->ib_device, work->port->port_num, 0, &work->path[0].sgid); ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, &work->path[0].sgid, sizeof work->path[0].sgid, NULL, 0); goto rejected; } if (req_msg->alt_local_lid) { ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av); if (ret) { ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, &work->path[0].sgid, sizeof work->path[0].sgid, NULL, 0); goto rejected; } } cm_id_priv->tid = req_msg->hdr.tid; cm_id_priv->timeout_ms = cm_convert_to_ms( cm_req_get_local_resp_timeout(req_msg)); if (cm_req_get_local_resp_timeout(req_msg) > (u8) max_timeout) { printk(KERN_WARNING PFX "rcvd cm_local_resp_timeout %d > %d, " "decreasing used timeout_ms\n", cm_req_get_local_resp_timeout(req_msg), max_timeout); cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout); } cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg); cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg); cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg); cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg); cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg); cm_id_priv->pkey = req_msg->pkey; cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg); cm_id_priv->retry_count = cm_req_get_retry_count(req_msg); cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); cm_id_priv->qp_type = cm_req_get_qp_type(req_msg); cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id); cm_process_work(cm_id_priv, work); cm_deref_id(listen_cm_id_priv); return 0; rejected: atomic_dec(&cm_id_priv->refcount); cm_deref_id(listen_cm_id_priv); destroy: ib_destroy_cm_id(cm_id); return ret; } static void cm_format_rep(struct cm_rep_msg *rep_msg, struct cm_id_private *cm_id_priv, struct ib_cm_rep_param *param) { cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid); rep_msg->local_comm_id = cm_id_priv->id.local_id; rep_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num)); cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn)); rep_msg->resp_resources = param->responder_resources; rep_msg->initiator_depth = param->initiator_depth; cm_rep_set_target_ack_delay(rep_msg, cm_id_priv->av.port->cm_dev->ack_delay); cm_rep_set_failover(rep_msg, param->failover_accepted); cm_rep_set_flow_ctrl(rep_msg, param->flow_control); cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); cm_rep_set_srq(rep_msg, param->srq); rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid; if (param->private_data && param->private_data_len) memcpy(rep_msg->private_data, param->private_data, param->private_data_len); } int ib_send_cm_rep(struct ib_cm_id *cm_id, struct ib_cm_rep_param *param) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; struct cm_rep_msg *rep_msg; unsigned long flags; int ret; if (param->private_data && param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_REQ_RCVD && cm_id->state != IB_CM_MRA_REQ_SENT) { ret = -EINVAL; goto out; } ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto out; rep_msg = (struct cm_rep_msg *) msg->mad; cm_format_rep(rep_msg, cm_id_priv, param); msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT; ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } cm_id->state = IB_CM_REP_SENT; cm_id_priv->msg = msg; cm_id_priv->initiator_depth = param->initiator_depth; cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg); cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg); out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_rep); static void cm_format_rtu(struct cm_rtu_msg *rtu_msg, struct cm_id_private *cm_id_priv, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid); rtu_msg->local_comm_id = cm_id_priv->id.local_id; rtu_msg->remote_comm_id = cm_id_priv->id.remote_id; if (private_data && private_data_len) memcpy(rtu_msg->private_data, private_data, private_data_len); } int ib_send_cm_rtu(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; void *data; int ret; if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE) return -EINVAL; data = cm_copy_private_data(private_data, private_data_len); if (IS_ERR(data)) return PTR_ERR(data); cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_REP_RCVD && cm_id->state != IB_CM_MRA_REP_SENT) { ret = -EINVAL; goto error; } ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto error; cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, private_data, private_data_len); ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); kfree(data); return ret; } cm_id->state = IB_CM_ESTABLISHED; cm_set_private_data(cm_id_priv, data, private_data_len); spin_unlock_irqrestore(&cm_id_priv->lock, flags); return 0; error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); kfree(data); return ret; } EXPORT_SYMBOL(ib_send_cm_rtu); static void cm_format_rep_event(struct cm_work *work) { struct cm_rep_msg *rep_msg; struct ib_cm_rep_event_param *param; rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.rep_rcvd; param->remote_ca_guid = rep_msg->local_ca_guid; param->remote_qkey = be32_to_cpu(rep_msg->local_qkey); param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg)); param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg)); param->responder_resources = rep_msg->initiator_depth; param->initiator_depth = rep_msg->resp_resources; param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg); param->failover_accepted = cm_rep_get_failover(rep_msg); param->flow_control = cm_rep_get_flow_ctrl(rep_msg); param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); param->srq = cm_rep_get_srq(rep_msg); work->cm_event.private_data = &rep_msg->private_data; } static void cm_dup_rep_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rep_msg *rep_msg; struct ib_mad_send_buf *msg = NULL; int ret; rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, rep_msg->local_comm_id); if (!cm_id_priv) return; atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_REP_COUNTER]); ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); if (ret) goto deref; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state == IB_CM_ESTABLISHED) cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, cm_id_priv->private_data, cm_id_priv->private_data_len); else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT) cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout, cm_id_priv->private_data, cm_id_priv->private_data_len); else goto unlock; spin_unlock_irq(&cm_id_priv->lock); ret = ib_post_send_mad(msg, NULL); if (ret) goto free; goto deref; unlock: spin_unlock_irq(&cm_id_priv->lock); free: cm_free_msg(msg); deref: cm_deref_id(cm_id_priv); } static int cm_rep_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rep_msg *rep_msg; int ret; rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0); if (!cm_id_priv) { cm_dup_rep_handler(work); return -EINVAL; } cm_format_rep_event(work); spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: break; default: spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; goto error; } cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id; cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg); spin_lock(&cm.lock); /* Check for duplicate REP. */ if (cm_insert_remote_id(cm_id_priv->timewait_info)) { spin_unlock(&cm.lock); spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; goto error; } /* Check for a stale connection. */ if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) { rb_erase(&cm_id_priv->timewait_info->remote_id_node, &cm.remote_id_table); cm_id_priv->timewait_info->inserted_remote_id = 0; spin_unlock(&cm.lock); spin_unlock_irq(&cm_id_priv->lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, NULL, 0); ret = -EINVAL; goto error; } spin_unlock(&cm.lock); cm_id_priv->id.state = IB_CM_REP_RCVD; cm_id_priv->id.remote_id = rep_msg->local_comm_id; cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg); cm_id_priv->initiator_depth = rep_msg->resp_resources; cm_id_priv->responder_resources = rep_msg->initiator_depth; cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg); cm_id_priv->av.timeout = cm_ack_timeout(cm_id_priv->target_ack_delay, cm_id_priv->av.timeout - 1); cm_id_priv->alt_av.timeout = cm_ack_timeout(cm_id_priv->target_ack_delay, cm_id_priv->alt_av.timeout - 1); /* todo: handle peer_to_peer */ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; error: cm_deref_id(cm_id_priv); return ret; } static int cm_establish_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; int ret; /* See comment in cm_establish about lookup. */ cm_id_priv = cm_acquire_id(work->local_id, work->remote_id); if (!cm_id_priv) return -EINVAL; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED) { spin_unlock_irq(&cm_id_priv->lock); goto out; } ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } static int cm_rtu_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rtu_msg *rtu_msg; int ret; rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id, rtu_msg->local_comm_id); if (!cm_id_priv) return -EINVAL; work->cm_event.private_data = &rtu_msg->private_data; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_REP_SENT && cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) { spin_unlock_irq(&cm_id_priv->lock); atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_RTU_COUNTER]); goto out; } cm_id_priv->id.state = IB_CM_ESTABLISHED; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } static void cm_format_dreq(struct cm_dreq_msg *dreq_msg, struct cm_id_private *cm_id_priv, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ)); dreq_msg->local_comm_id = cm_id_priv->id.local_id; dreq_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn); if (private_data && private_data_len) memcpy(dreq_msg->private_data, private_data, private_data_len); } int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; int ret; if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_ESTABLISHED) { ret = -EINVAL; goto out; } ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) { cm_enter_timewait(cm_id_priv); goto out; } cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, private_data, private_data_len); msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT; ret = ib_post_send_mad(msg, NULL); if (ret) { cm_enter_timewait(cm_id_priv); spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } cm_id->state = IB_CM_DREQ_SENT; cm_id_priv->msg = msg; out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_dreq); static void cm_format_drep(struct cm_drep_msg *drep_msg, struct cm_id_private *cm_id_priv, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid); drep_msg->local_comm_id = cm_id_priv->id.local_id; drep_msg->remote_comm_id = cm_id_priv->id.remote_id; if (private_data && private_data_len) memcpy(drep_msg->private_data, private_data, private_data_len); } int ib_send_cm_drep(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; void *data; int ret; if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE) return -EINVAL; data = cm_copy_private_data(private_data, private_data_len); if (IS_ERR(data)) return PTR_ERR(data); cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_DREQ_RCVD) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); kfree(data); return -EINVAL; } cm_set_private_data(cm_id_priv, data, private_data_len); cm_enter_timewait(cm_id_priv); ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto out; cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, private_data, private_data_len); ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_drep); static int cm_issue_drep(struct cm_port *port, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_mad_send_buf *msg = NULL; struct cm_dreq_msg *dreq_msg; struct cm_drep_msg *drep_msg; int ret; ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); if (ret) return ret; dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad; drep_msg = (struct cm_drep_msg *) msg->mad; cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid); drep_msg->remote_comm_id = dreq_msg->local_comm_id; drep_msg->local_comm_id = dreq_msg->remote_comm_id; ret = ib_post_send_mad(msg, NULL); if (ret) cm_free_msg(msg); return ret; } static int cm_dreq_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_dreq_msg *dreq_msg; struct ib_mad_send_buf *msg = NULL; int ret; dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id, dreq_msg->local_comm_id); if (!cm_id_priv) { atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_DREQ_COUNTER]); cm_issue_drep(work->port, work->mad_recv_wc); return -EINVAL; } work->cm_event.private_data = &dreq_msg->private_data; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg)) goto unlock; switch (cm_id_priv->id.state) { case IB_CM_REP_SENT: case IB_CM_DREQ_SENT: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); break; case IB_CM_ESTABLISHED: case IB_CM_MRA_REP_RCVD: break; case IB_CM_TIMEWAIT: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_DREQ_COUNTER]); if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) goto unlock; cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, cm_id_priv->private_data, cm_id_priv->private_data_len); spin_unlock_irq(&cm_id_priv->lock); if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; case IB_CM_DREQ_RCVD: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_DREQ_COUNTER]); goto unlock; default: goto unlock; } cm_id_priv->id.state = IB_CM_DREQ_RCVD; cm_id_priv->tid = dreq_msg->hdr.tid; ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; unlock: spin_unlock_irq(&cm_id_priv->lock); deref: cm_deref_id(cm_id_priv); return -EINVAL; } static int cm_drep_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_drep_msg *drep_msg; int ret; drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id, drep_msg->local_comm_id); if (!cm_id_priv) return -EINVAL; work->cm_event.private_data = &drep_msg->private_data; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_DREQ_SENT && cm_id_priv->id.state != IB_CM_DREQ_RCVD) { spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_enter_timewait(cm_id_priv); ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } int ib_send_cm_rej(struct ib_cm_id *cm_id, enum ib_cm_rej_reason reason, void *ari, u8 ari_length, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; int ret; if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) || (ari && ari_length > IB_CM_REJ_ARI_LENGTH)) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id->state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: ret = cm_alloc_msg(cm_id_priv, &msg); if (!ret) cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv, reason, ari, ari_length, private_data, private_data_len); cm_reset_to_idle(cm_id_priv); break; case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: ret = cm_alloc_msg(cm_id_priv, &msg); if (!ret) cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv, reason, ari, ari_length, private_data, private_data_len); cm_enter_timewait(cm_id_priv); break; default: ret = -EINVAL; goto out; } if (ret) goto out; ret = ib_post_send_mad(msg, NULL); if (ret) cm_free_msg(msg); out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_rej); static void cm_format_rej_event(struct cm_work *work) { struct cm_rej_msg *rej_msg; struct ib_cm_rej_event_param *param; rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.rej_rcvd; param->ari = rej_msg->ari; param->ari_length = cm_rej_get_reject_info_len(rej_msg); param->reason = __be16_to_cpu(rej_msg->reason); work->cm_event.private_data = &rej_msg->private_data; } static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) { struct cm_timewait_info *timewait_info; struct cm_id_private *cm_id_priv; __be32 remote_id; remote_id = rej_msg->local_comm_id; if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) { spin_lock_irq(&cm.lock); timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari), remote_id); if (!timewait_info) { spin_unlock_irq(&cm.lock); return NULL; } cm_id_priv = idr_find(&cm.local_id_table, (__force int) (timewait_info->work.local_id ^ cm.random_id_operand)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); else cm_id_priv = NULL; } spin_unlock_irq(&cm.lock); } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ) cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0); else cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id); return cm_id_priv; } static int cm_rej_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rej_msg *rej_msg; int ret; rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_rejected_id(rej_msg); if (!cm_id_priv) return -EINVAL; cm_format_rej_event(work); spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* fall through */ case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN) cm_enter_timewait(cm_id_priv); else cm_reset_to_idle(cm_id_priv); break; case IB_CM_DREQ_SENT: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* fall through */ case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: case IB_CM_ESTABLISHED: cm_enter_timewait(cm_id_priv); break; default: spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; goto out; } ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } int ib_send_cm_mra(struct ib_cm_id *cm_id, u8 service_timeout, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; enum ib_cm_state cm_state; enum ib_cm_lap_state lap_state; enum cm_msg_response msg_response; void *data; unsigned long flags; int ret; if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE) return -EINVAL; data = cm_copy_private_data(private_data, private_data_len); if (IS_ERR(data)) return PTR_ERR(data); cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); switch(cm_id_priv->id.state) { case IB_CM_REQ_RCVD: cm_state = IB_CM_MRA_REQ_SENT; lap_state = cm_id->lap_state; msg_response = CM_MSG_RESPONSE_REQ; break; case IB_CM_REP_RCVD: cm_state = IB_CM_MRA_REP_SENT; lap_state = cm_id->lap_state; msg_response = CM_MSG_RESPONSE_REP; break; case IB_CM_ESTABLISHED: if (cm_id->lap_state == IB_CM_LAP_RCVD) { cm_state = cm_id->state; lap_state = IB_CM_MRA_LAP_SENT; msg_response = CM_MSG_RESPONSE_OTHER; break; } default: ret = -EINVAL; goto error1; } if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) { ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto error1; cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, msg_response, service_timeout, private_data, private_data_len); ret = ib_post_send_mad(msg, NULL); if (ret) goto error2; } cm_id->state = cm_state; cm_id->lap_state = lap_state; cm_id_priv->service_timeout = service_timeout; cm_set_private_data(cm_id_priv, data, private_data_len); spin_unlock_irqrestore(&cm_id_priv->lock, flags); return 0; error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags); kfree(data); return ret; error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags); kfree(data); cm_free_msg(msg); return ret; } EXPORT_SYMBOL(ib_send_cm_mra); static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg) { switch (cm_mra_get_msg_mraed(mra_msg)) { case CM_MSG_RESPONSE_REQ: return cm_acquire_id(mra_msg->remote_comm_id, 0); case CM_MSG_RESPONSE_REP: case CM_MSG_RESPONSE_OTHER: return cm_acquire_id(mra_msg->remote_comm_id, mra_msg->local_comm_id); default: return NULL; } } static int cm_mra_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_mra_msg *mra_msg; int timeout, ret; mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_mraed_id(mra_msg); if (!cm_id_priv) return -EINVAL; work->cm_event.private_data = &mra_msg->private_data; work->cm_event.param.mra_rcvd.service_timeout = cm_mra_get_service_timeout(mra_msg); timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + cm_convert_to_ms(cm_id_priv->av.timeout); if (timeout > cm_convert_to_ms(max_timeout)) { printk(KERN_WARNING PFX "calculated mra timeout %d > %d, " "decreasing used timeout_ms\n", timeout, cm_convert_to_ms(max_timeout)); timeout = cm_convert_to_ms(max_timeout); } spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ || ib_modify_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD; break; case IB_CM_REP_SENT: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP || ib_modify_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REP_RCVD; break; case IB_CM_ESTABLISHED: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER || cm_id_priv->id.lap_state != IB_CM_LAP_SENT || ib_modify_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg, timeout)) { if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) atomic_long_inc(&work->port-> counter_group[CM_RECV_DUPLICATES]. counter[CM_MRA_COUNTER]); goto out; } cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD; break; case IB_CM_MRA_REQ_RCVD: case IB_CM_MRA_REP_RCVD: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_MRA_COUNTER]); /* fall through */ default: goto out; } cm_id_priv->msg->context[1] = (void *) (unsigned long) cm_id_priv->id.state; ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: spin_unlock_irq(&cm_id_priv->lock); cm_deref_id(cm_id_priv); return -EINVAL; } static void cm_format_lap(struct cm_lap_msg *lap_msg, struct cm_id_private *cm_id_priv, struct ib_sa_path_rec *alternate_path, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP)); lap_msg->local_comm_id = cm_id_priv->id.local_id; lap_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn); /* todo: need remote CM response timeout */ cm_lap_set_remote_resp_timeout(lap_msg, 0x1F); lap_msg->alt_local_lid = alternate_path->slid; lap_msg->alt_remote_lid = alternate_path->dlid; lap_msg->alt_local_gid = alternate_path->sgid; lap_msg->alt_remote_gid = alternate_path->dgid; cm_lap_set_flow_label(lap_msg, alternate_path->flow_label); cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class); lap_msg->alt_hop_limit = alternate_path->hop_limit; cm_lap_set_packet_rate(lap_msg, alternate_path->rate); cm_lap_set_sl(lap_msg, alternate_path->sl); cm_lap_set_subnet_local(lap_msg, 1); /* local only... */ cm_lap_set_local_ack_timeout(lap_msg, cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, alternate_path->packet_life_time)); if (private_data && private_data_len) memcpy(lap_msg->private_data, private_data, private_data_len); } int ib_send_cm_lap(struct ib_cm_id *cm_id, struct ib_sa_path_rec *alternate_path, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; int ret; if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_ESTABLISHED || (cm_id->lap_state != IB_CM_LAP_UNINIT && cm_id->lap_state != IB_CM_LAP_IDLE)) { ret = -EINVAL; goto out; } ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av); if (ret) goto out; cm_id_priv->alt_av.timeout = cm_ack_timeout(cm_id_priv->target_ack_delay, cm_id_priv->alt_av.timeout - 1); ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto out; cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv, alternate_path, private_data, private_data_len); msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED; ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } cm_id->lap_state = IB_CM_LAP_SENT; cm_id_priv->msg = msg; out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_lap); static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv, struct ib_sa_path_rec *path, struct cm_lap_msg *lap_msg) { memset(path, 0, sizeof *path); path->dgid = lap_msg->alt_local_gid; path->sgid = lap_msg->alt_remote_gid; path->dlid = lap_msg->alt_local_lid; path->slid = lap_msg->alt_remote_lid; path->flow_label = cm_lap_get_flow_label(lap_msg); path->hop_limit = lap_msg->alt_hop_limit; path->traffic_class = cm_lap_get_traffic_class(lap_msg); path->reversible = 1; path->pkey = cm_id_priv->pkey; path->sl = cm_lap_get_sl(lap_msg); path->mtu_selector = IB_SA_EQ; path->mtu = cm_id_priv->path_mtu; path->rate_selector = IB_SA_EQ; path->rate = cm_lap_get_packet_rate(lap_msg); path->packet_life_time_selector = IB_SA_EQ; path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg); path->packet_life_time -= (path->packet_life_time > 0); } static int cm_lap_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_lap_msg *lap_msg; struct ib_cm_lap_event_param *param; struct ib_mad_send_buf *msg = NULL; int ret; /* todo: verify LAP request and send reject APR if invalid. */ lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id, lap_msg->local_comm_id); if (!cm_id_priv) return -EINVAL; param = &work->cm_event.param.lap_rcvd; param->alternate_path = &work->path[0]; cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg); work->cm_event.private_data = &lap_msg->private_data; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED) goto unlock; switch (cm_id_priv->id.lap_state) { case IB_CM_LAP_UNINIT: case IB_CM_LAP_IDLE: break; case IB_CM_MRA_LAP_SENT: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_LAP_COUNTER]); if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) goto unlock; cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_OTHER, cm_id_priv->service_timeout, cm_id_priv->private_data, cm_id_priv->private_data_len); spin_unlock_irq(&cm_id_priv->lock); if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; case IB_CM_LAP_RCVD: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_LAP_COUNTER]); goto unlock; default: goto unlock; } cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; cm_id_priv->tid = lap_msg->hdr.tid; cm_init_av_for_response(work->port, work->mad_recv_wc->wc, work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; unlock: spin_unlock_irq(&cm_id_priv->lock); deref: cm_deref_id(cm_id_priv); return -EINVAL; } static void cm_format_apr(struct cm_apr_msg *apr_msg, struct cm_id_private *cm_id_priv, enum ib_cm_apr_status status, void *info, u8 info_length, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid); apr_msg->local_comm_id = cm_id_priv->id.local_id; apr_msg->remote_comm_id = cm_id_priv->id.remote_id; apr_msg->ap_status = (u8) status; if (info && info_length) { apr_msg->info_length = info_length; memcpy(apr_msg->info, info, info_length); } if (private_data && private_data_len) memcpy(apr_msg->private_data, private_data, private_data_len); } int ib_send_cm_apr(struct ib_cm_id *cm_id, enum ib_cm_apr_status status, void *info, u8 info_length, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; int ret; if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) || (info && info_length > IB_CM_APR_INFO_LENGTH)) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_ESTABLISHED || (cm_id->lap_state != IB_CM_LAP_RCVD && cm_id->lap_state != IB_CM_MRA_LAP_SENT)) { ret = -EINVAL; goto out; } ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto out; cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status, info, info_length, private_data, private_data_len); ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } cm_id->lap_state = IB_CM_LAP_IDLE; out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_apr); static int cm_apr_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_apr_msg *apr_msg; int ret; apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id, apr_msg->local_comm_id); if (!cm_id_priv) return -EINVAL; /* Unmatched reply. */ work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status; work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info; work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length; work->cm_event.private_data = &apr_msg->private_data; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED || (cm_id_priv->id.lap_state != IB_CM_LAP_SENT && cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) { spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); cm_id_priv->msg = NULL; ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } static int cm_timewait_handler(struct cm_work *work) { struct cm_timewait_info *timewait_info; struct cm_id_private *cm_id_priv; int ret; timewait_info = (struct cm_timewait_info *)work; spin_lock_irq(&cm.lock); list_del(&timewait_info->list); spin_unlock_irq(&cm.lock); cm_id_priv = cm_acquire_id(timewait_info->work.local_id, timewait_info->work.remote_id); if (!cm_id_priv) return -EINVAL; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_TIMEWAIT || cm_id_priv->remote_qpn != timewait_info->remote_qpn) { spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_id_priv->id.state = IB_CM_IDLE; ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, struct cm_id_private *cm_id_priv, struct ib_cm_sidr_req_param *param) { cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR)); sidr_req_msg->request_id = cm_id_priv->id.local_id; sidr_req_msg->pkey = param->path->pkey; sidr_req_msg->service_id = param->service_id; if (param->private_data && param->private_data_len) memcpy(sidr_req_msg->private_data, param->private_data, param->private_data_len); } int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, struct ib_cm_sidr_req_param *param) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; int ret; if (!param->path || (param->private_data && param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE)) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); ret = cm_init_av_by_path(param->path, &cm_id_priv->av); if (ret) goto out; cm_id->service_id = param->service_id; cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = param->timeout_ms; if (cm_id_priv->timeout_ms > cm_convert_to_ms(max_timeout)) { printk(KERN_WARNING PFX "sidr req timeout_ms %d > %d, " "decreasing used timeout_ms\n", param->timeout_ms, cm_convert_to_ms(max_timeout)); cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout); } cm_id_priv->max_cm_retries = param->max_cm_retries; ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto out; cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv, param); msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT; spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state == IB_CM_IDLE) ret = ib_post_send_mad(msg, NULL); else ret = -EINVAL; if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); goto out; } cm_id->state = IB_CM_SIDR_REQ_SENT; cm_id_priv->msg = msg; spin_unlock_irqrestore(&cm_id_priv->lock, flags); out: return ret; } EXPORT_SYMBOL(ib_send_cm_sidr_req); static void cm_format_sidr_req_event(struct cm_work *work, struct ib_cm_id *listen_id) { struct cm_sidr_req_msg *sidr_req_msg; struct ib_cm_sidr_req_event_param *param; sidr_req_msg = (struct cm_sidr_req_msg *) work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.sidr_req_rcvd; param->pkey = __be16_to_cpu(sidr_req_msg->pkey); param->listen_id = listen_id; param->port = work->port->port_num; work->cm_event.private_data = &sidr_req_msg->private_data; } static int cm_sidr_req_handler(struct cm_work *work) { struct ib_cm_id *cm_id; struct cm_id_private *cm_id_priv, *cur_cm_id_priv; struct cm_sidr_req_msg *sidr_req_msg; struct ib_wc *wc; cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); cm_id_priv = container_of(cm_id, struct cm_id_private, id); /* Record SGID/SLID and request ID for lookup. */ sidr_req_msg = (struct cm_sidr_req_msg *) work->mad_recv_wc->recv_buf.mad; wc = work->mad_recv_wc->wc; cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid); cm_id_priv->av.dgid.global.interface_id = 0; cm_init_av_for_response(work->port, work->mad_recv_wc->wc, work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); cm_id_priv->id.remote_id = sidr_req_msg->request_id; cm_id_priv->tid = sidr_req_msg->hdr.tid; atomic_inc(&cm_id_priv->work_count); spin_lock_irq(&cm.lock); cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv); if (cur_cm_id_priv) { spin_unlock_irq(&cm.lock); atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_SIDR_REQ_COUNTER]); goto out; /* Duplicate message. */ } cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; cur_cm_id_priv = cm_find_listen(cm_id->device, sidr_req_msg->service_id, sidr_req_msg->private_data); if (!cur_cm_id_priv) { spin_unlock_irq(&cm.lock); cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED); goto out; /* No match. */ } atomic_inc(&cur_cm_id_priv->refcount); spin_unlock_irq(&cm.lock); cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; cm_id_priv->id.context = cur_cm_id_priv->id.context; cm_id_priv->id.service_id = sidr_req_msg->service_id; cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_format_sidr_req_event(work, &cur_cm_id_priv->id); cm_process_work(cm_id_priv, work); cm_deref_id(cur_cm_id_priv); return 0; out: ib_destroy_cm_id(&cm_id_priv->id); return -EINVAL; } static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg, struct cm_id_private *cm_id_priv, struct ib_cm_sidr_rep_param *param) { cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID, cm_id_priv->tid); sidr_rep_msg->request_id = cm_id_priv->id.remote_id; sidr_rep_msg->status = param->status; cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num)); sidr_rep_msg->service_id = cm_id_priv->id.service_id; sidr_rep_msg->qkey = cpu_to_be32(param->qkey); if (param->info && param->info_length) memcpy(sidr_rep_msg->info, param->info, param->info_length); if (param->private_data && param->private_data_len) memcpy(sidr_rep_msg->private_data, param->private_data, param->private_data_len); } int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, struct ib_cm_sidr_rep_param *param) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; int ret; if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) || (param->private_data && param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE)) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_SIDR_REQ_RCVD) { ret = -EINVAL; goto error; } ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto error; cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv, param); ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } cm_id->state = IB_CM_IDLE; spin_unlock_irqrestore(&cm_id_priv->lock, flags); spin_lock_irqsave(&cm.lock, flags); rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); spin_unlock_irqrestore(&cm.lock, flags); return 0; error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_sidr_rep); static void cm_format_sidr_rep_event(struct cm_work *work) { struct cm_sidr_rep_msg *sidr_rep_msg; struct ib_cm_sidr_rep_event_param *param; sidr_rep_msg = (struct cm_sidr_rep_msg *) work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.sidr_rep_rcvd; param->status = sidr_rep_msg->status; param->qkey = be32_to_cpu(sidr_rep_msg->qkey); param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg)); param->info = &sidr_rep_msg->info; param->info_len = sidr_rep_msg->info_length; work->cm_event.private_data = &sidr_rep_msg->private_data; } static int cm_sidr_rep_handler(struct cm_work *work) { struct cm_sidr_rep_msg *sidr_rep_msg; struct cm_id_private *cm_id_priv; sidr_rep_msg = (struct cm_sidr_rep_msg *) work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0); if (!cm_id_priv) return -EINVAL; /* Unmatched reply. */ spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) { spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_id_priv->id.state = IB_CM_IDLE; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irq(&cm_id_priv->lock); cm_format_sidr_rep_event(work); cm_process_work(cm_id_priv, work); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } static void cm_process_send_error(struct ib_mad_send_buf *msg, enum ib_wc_status wc_status) { struct cm_id_private *cm_id_priv; struct ib_cm_event cm_event; enum ib_cm_state state; int ret; memset(&cm_event, 0, sizeof cm_event); cm_id_priv = msg->context[0]; /* Discard old sends or ones without a response. */ spin_lock_irq(&cm_id_priv->lock); state = (enum ib_cm_state) (unsigned long) msg->context[1]; if (msg != cm_id_priv->msg || state != cm_id_priv->id.state) goto discard; switch (state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: cm_reset_to_idle(cm_id_priv); cm_event.event = IB_CM_REQ_ERROR; break; case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: cm_reset_to_idle(cm_id_priv); cm_event.event = IB_CM_REP_ERROR; break; case IB_CM_DREQ_SENT: cm_enter_timewait(cm_id_priv); cm_event.event = IB_CM_DREQ_ERROR; break; case IB_CM_SIDR_REQ_SENT: cm_id_priv->id.state = IB_CM_IDLE; cm_event.event = IB_CM_SIDR_REQ_ERROR; break; default: goto discard; } spin_unlock_irq(&cm_id_priv->lock); cm_event.param.send_status = wc_status; /* No other events can occur on the cm_id at this point. */ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event); cm_free_msg(msg); if (ret) ib_destroy_cm_id(&cm_id_priv->id); return; discard: spin_unlock_irq(&cm_id_priv->lock); cm_free_msg(msg); } static void cm_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { struct ib_mad_send_buf *msg = mad_send_wc->send_buf; struct cm_port *port; u16 attr_index; port = mad_agent->context; attr_index = be16_to_cpu(((struct ib_mad_hdr *) msg->mad)->attr_id) - CM_ATTR_ID_OFFSET; /* * If the send was in response to a received message (context[0] is not * set to a cm_id), and is not a REJ, then it is a send that was * manually retried. */ if (!msg->context[0] && (attr_index != CM_REJ_COUNTER)) msg->retries = 1; atomic_long_add(1 + msg->retries, &port->counter_group[CM_XMIT].counter[attr_index]); if (msg->retries) atomic_long_add(msg->retries, &port->counter_group[CM_XMIT_RETRIES]. counter[attr_index]); switch (mad_send_wc->status) { case IB_WC_SUCCESS: case IB_WC_WR_FLUSH_ERR: cm_free_msg(msg); break; default: if (msg->context[0] && msg->context[1]) cm_process_send_error(msg, mad_send_wc->status); else cm_free_msg(msg); break; } } static void cm_work_handler(struct work_struct *_work) { struct cm_work *work = container_of(_work, struct cm_work, work.work); int ret; switch (work->cm_event.event) { case IB_CM_REQ_RECEIVED: ret = cm_req_handler(work); break; case IB_CM_MRA_RECEIVED: ret = cm_mra_handler(work); break; case IB_CM_REJ_RECEIVED: ret = cm_rej_handler(work); break; case IB_CM_REP_RECEIVED: ret = cm_rep_handler(work); break; case IB_CM_RTU_RECEIVED: ret = cm_rtu_handler(work); break; case IB_CM_USER_ESTABLISHED: ret = cm_establish_handler(work); break; case IB_CM_DREQ_RECEIVED: ret = cm_dreq_handler(work); break; case IB_CM_DREP_RECEIVED: ret = cm_drep_handler(work); break; case IB_CM_SIDR_REQ_RECEIVED: ret = cm_sidr_req_handler(work); break; case IB_CM_SIDR_REP_RECEIVED: ret = cm_sidr_rep_handler(work); break; case IB_CM_LAP_RECEIVED: ret = cm_lap_handler(work); break; case IB_CM_APR_RECEIVED: ret = cm_apr_handler(work); break; case IB_CM_TIMEWAIT_EXIT: ret = cm_timewait_handler(work); break; default: ret = -EINVAL; break; } if (ret) cm_free_work(work); } static int cm_establish(struct ib_cm_id *cm_id) { struct cm_id_private *cm_id_priv; struct cm_work *work; unsigned long flags; int ret = 0; work = kmalloc(sizeof *work, GFP_ATOMIC); if (!work) return -ENOMEM; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id->state) { case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: cm_id->state = IB_CM_ESTABLISHED; break; case IB_CM_ESTABLISHED: ret = -EISCONN; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (ret) { kfree(work); goto out; } /* * The CM worker thread may try to destroy the cm_id before it * can execute this work item. To prevent potential deadlock, * we need to find the cm_id once we're in the context of the * worker thread, rather than holding a reference on it. */ INIT_DELAYED_WORK(&work->work, cm_work_handler); work->local_id = cm_id->local_id; work->remote_id = cm_id->remote_id; work->mad_recv_wc = NULL; work->cm_event.event = IB_CM_USER_ESTABLISHED; queue_delayed_work(cm.wq, &work->work, 0); out: return ret; } static int cm_migrate(struct ib_cm_id *cm_id) { struct cm_id_private *cm_id_priv; unsigned long flags; int ret = 0; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state == IB_CM_ESTABLISHED && (cm_id->lap_state == IB_CM_LAP_UNINIT || cm_id->lap_state == IB_CM_LAP_IDLE)) { cm_id->lap_state = IB_CM_LAP_IDLE; cm_id_priv->av = cm_id_priv->alt_av; } else ret = -EINVAL; spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event) { int ret; switch (event) { case IB_EVENT_COMM_EST: ret = cm_establish(cm_id); break; case IB_EVENT_PATH_MIG: ret = cm_migrate(cm_id); break; default: ret = -EINVAL; } return ret; } EXPORT_SYMBOL(ib_cm_notify); static void cm_recv_handler(struct ib_mad_agent *mad_agent, struct ib_mad_recv_wc *mad_recv_wc) { struct cm_port *port = mad_agent->context; struct cm_work *work; enum ib_cm_event_type event; u16 attr_id; int paths = 0; switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) { case CM_REQ_ATTR_ID: paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)-> alt_local_lid != 0); event = IB_CM_REQ_RECEIVED; break; case CM_MRA_ATTR_ID: event = IB_CM_MRA_RECEIVED; break; case CM_REJ_ATTR_ID: event = IB_CM_REJ_RECEIVED; break; case CM_REP_ATTR_ID: event = IB_CM_REP_RECEIVED; break; case CM_RTU_ATTR_ID: event = IB_CM_RTU_RECEIVED; break; case CM_DREQ_ATTR_ID: event = IB_CM_DREQ_RECEIVED; break; case CM_DREP_ATTR_ID: event = IB_CM_DREP_RECEIVED; break; case CM_SIDR_REQ_ATTR_ID: event = IB_CM_SIDR_REQ_RECEIVED; break; case CM_SIDR_REP_ATTR_ID: event = IB_CM_SIDR_REP_RECEIVED; break; case CM_LAP_ATTR_ID: paths = 1; event = IB_CM_LAP_RECEIVED; break; case CM_APR_ATTR_ID: event = IB_CM_APR_RECEIVED; break; default: ib_free_recv_mad(mad_recv_wc); return; } attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id); atomic_long_inc(&port->counter_group[CM_RECV]. counter[attr_id - CM_ATTR_ID_OFFSET]); work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths, GFP_KERNEL); if (!work) { ib_free_recv_mad(mad_recv_wc); return; } INIT_DELAYED_WORK(&work->work, cm_work_handler); work->cm_event.event = event; work->mad_recv_wc = mad_recv_wc; work->port = port; queue_delayed_work(cm.wq, &work->work, 0); } static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE; if (cm_id_priv->responder_resources) qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_ATOMIC; qp_attr->pkey_index = cm_id_priv->av.pkey_index; qp_attr->port_num = cm_id_priv->av.port->port_num; ret = 0; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->id.state) { case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN; qp_attr->ah_attr = cm_id_priv->av.ah_attr; qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); if (cm_id_priv->qp_type == IB_QPT_RC) { *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; qp_attr->max_dest_rd_atomic = cm_id_priv->responder_resources; qp_attr->min_rnr_timer = 0; } if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_ALT_PATH; qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; qp_attr->alt_timeout = cm_id_priv->alt_av.timeout; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; } ret = 0; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->id.state) { /* Allow transition to RTS before sending REP */ case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) { *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); if (cm_id_priv->qp_type == IB_QPT_RC) { *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC; qp_attr->timeout = cm_id_priv->av.timeout; qp_attr->retry_cnt = cm_id_priv->retry_count; qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; } if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_PATH_MIG_STATE; qp_attr->path_mig_state = IB_MIG_REARM; } } else { *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE; qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; qp_attr->alt_timeout = cm_id_priv->alt_av.timeout; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; qp_attr->path_mig_state = IB_MIG_REARM; } ret = 0; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { struct cm_id_private *cm_id_priv; int ret; cm_id_priv = container_of(cm_id, struct cm_id_private, id); switch (qp_attr->qp_state) { case IB_QPS_INIT: ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask); break; case IB_QPS_RTR: ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask); break; case IB_QPS_RTS: ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask); break; default: ret = -EINVAL; break; } return ret; } EXPORT_SYMBOL(ib_cm_init_qp_attr); static void cm_get_ack_delay(struct cm_device *cm_dev) { struct ib_device_attr attr; if (ib_query_device(cm_dev->ib_device, &attr)) cm_dev->ack_delay = 0; /* acks will rely on packet life time */ else cm_dev->ack_delay = attr.local_ca_ack_delay; } static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr, char *buf) { struct cm_counter_group *group; struct cm_counter_attribute *cm_attr; group = container_of(obj, struct cm_counter_group, obj); cm_attr = container_of(attr, struct cm_counter_attribute, attr); return sprintf(buf, "%ld\n", atomic_long_read(&group->counter[cm_attr->index])); } static struct sysfs_ops cm_counter_ops = { .show = cm_show_counter }; static struct kobj_type cm_counter_obj_type = { .sysfs_ops = &cm_counter_ops, .default_attrs = cm_counter_default_attrs }; static void cm_release_port_obj(struct kobject *obj) { struct cm_port *cm_port; cm_port = container_of(obj, struct cm_port, port_obj); kfree(cm_port); } static struct kobj_type cm_port_obj_type = { .release = cm_release_port_obj }; struct class cm_class = { .name = "infiniband_cm", }; EXPORT_SYMBOL(cm_class); static int cm_create_port_fs(struct cm_port *port) { int i, ret; ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type, &port->cm_dev->device->kobj, "%d", port->port_num); if (ret) { kfree(port); return ret; } for (i = 0; i < CM_COUNTER_GROUPS; i++) { ret = kobject_init_and_add(&port->counter_group[i].obj, &cm_counter_obj_type, &port->port_obj, "%s", counter_group_names[i]); if (ret) goto error; } return 0; error: while (i--) kobject_put(&port->counter_group[i].obj); kobject_put(&port->port_obj); return ret; } static void cm_remove_port_fs(struct cm_port *port) { int i; for (i = 0; i < CM_COUNTER_GROUPS; i++) kobject_put(&port->counter_group[i].obj); kobject_put(&port->port_obj); } static void cm_add_one(struct ib_device *ib_device) { struct cm_device *cm_dev; struct cm_port *port; struct ib_mad_reg_req reg_req = { .mgmt_class = IB_MGMT_CLASS_CM, .mgmt_class_version = IB_CM_CLASS_VERSION }; struct ib_port_modify port_modify = { .set_port_cap_mask = IB_PORT_CM_SUP }; unsigned long flags; int ret; u8 i; if (rdma_node_get_transport(ib_device->node_type) != RDMA_TRANSPORT_IB) return; cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) * ib_device->phys_port_cnt, GFP_KERNEL); if (!cm_dev) return; cm_dev->ib_device = ib_device; cm_get_ack_delay(cm_dev); cm_dev->device = device_create(&cm_class, &ib_device->dev, MKDEV(0, 0), NULL, "%s", ib_device->name); if (!cm_dev->device) { kfree(cm_dev); return; } set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); for (i = 1; i <= ib_device->phys_port_cnt; i++) { port = kzalloc(sizeof *port, GFP_KERNEL); if (!port) goto error1; cm_dev->port[i-1] = port; port->cm_dev = cm_dev; port->port_num = i; ret = cm_create_port_fs(port); if (ret) goto error1; port->mad_agent = ib_register_mad_agent(ib_device, i, IB_QPT_GSI, ®_req, 0, cm_send_handler, cm_recv_handler, port); if (IS_ERR(port->mad_agent)) goto error2; ret = ib_modify_port(ib_device, i, 0, &port_modify); if (ret) goto error3; } ib_set_client_data(ib_device, &cm_client, cm_dev); write_lock_irqsave(&cm.device_lock, flags); list_add_tail(&cm_dev->list, &cm.device_list); write_unlock_irqrestore(&cm.device_lock, flags); return; error3: ib_unregister_mad_agent(port->mad_agent); error2: cm_remove_port_fs(port); error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; while (--i) { port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); } device_unregister(cm_dev->device); kfree(cm_dev); } static void cm_remove_one(struct ib_device *ib_device) { struct cm_device *cm_dev; struct cm_port *port; struct ib_port_modify port_modify = { .clr_port_cap_mask = IB_PORT_CM_SUP }; unsigned long flags; int i; cm_dev = ib_get_client_data(ib_device, &cm_client); if (!cm_dev) return; write_lock_irqsave(&cm.device_lock, flags); list_del(&cm_dev->list); write_unlock_irqrestore(&cm.device_lock, flags); for (i = 1; i <= ib_device->phys_port_cnt; i++) { port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); flush_workqueue(cm.wq); cm_remove_port_fs(port); } device_unregister(cm_dev->device); kfree(cm_dev); } static int __init ib_cm_init(void) { int ret; memset(&cm, 0, sizeof cm); INIT_LIST_HEAD(&cm.device_list); rwlock_init(&cm.device_lock); spin_lock_init(&cm.lock); cm.listen_service_table = RB_ROOT; cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID); cm.remote_id_table = RB_ROOT; cm.remote_qp_table = RB_ROOT; cm.remote_sidr_table = RB_ROOT; idr_init(&cm.local_id_table); get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); idr_pre_get(&cm.local_id_table, GFP_KERNEL); INIT_LIST_HEAD(&cm.timewait_list); ret = class_register(&cm_class); if (ret) return -ENOMEM; cm.wq = create_workqueue("ib_cm"); if (!cm.wq) { ret = -ENOMEM; goto error1; } ret = ib_register_client(&cm_client); if (ret) goto error2; return 0; error2: destroy_workqueue(cm.wq); error1: class_unregister(&cm_class); return ret; } static void __exit ib_cm_cleanup(void) { struct cm_timewait_info *timewait_info, *tmp; spin_lock_irq(&cm.lock); list_for_each_entry(timewait_info, &cm.timewait_list, list) cancel_delayed_work(&timewait_info->work.work); spin_unlock_irq(&cm.lock); ib_unregister_client(&cm_client); destroy_workqueue(cm.wq); list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) { list_del(&timewait_info->list); kfree(timewait_info); } class_unregister(&cm_class); idr_destroy(&cm.local_id_table); } module_init_order(ib_cm_init, SI_ORDER_SECOND); -module_exit(ib_cm_cleanup); +module_exit_order(ib_cm_cleanup, SI_ORDER_FIRST); Index: stable/10/sys/ofed/drivers/infiniband/core/device.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/core/device.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/core/device.c (revision 271127) @@ -1,772 +1,771 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include -#include #include #include #include "core_priv.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("core kernel InfiniBand API"); MODULE_LICENSE("Dual BSD/GPL"); #ifdef __ia64__ /* workaround for a bug in hp chipset that would cause kernel panic when dma resources are exhaused */ int dma_map_sg_hp_wa = 0; #endif struct ib_client_data { struct list_head list; struct ib_client *client; void * data; }; static LIST_HEAD(device_list); static LIST_HEAD(client_list); /* * device_mutex protects access to both device_list and client_list. * There's no real point to using multiple locks or something fancier * like an rwsem: we always access both lists, and we're always * modifying one list or the other list. In any case this is not a * hot path so there's no point in trying to optimize. */ static DEFINE_MUTEX(device_mutex); static int ib_device_check_mandatory(struct ib_device *device) { #define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x } static const struct { size_t offset; char *name; } mandatory_table[] = { IB_MANDATORY_FUNC(query_device), IB_MANDATORY_FUNC(query_port), IB_MANDATORY_FUNC(query_pkey), IB_MANDATORY_FUNC(query_gid), IB_MANDATORY_FUNC(alloc_pd), IB_MANDATORY_FUNC(dealloc_pd), IB_MANDATORY_FUNC(create_ah), IB_MANDATORY_FUNC(destroy_ah), IB_MANDATORY_FUNC(create_qp), IB_MANDATORY_FUNC(modify_qp), IB_MANDATORY_FUNC(destroy_qp), IB_MANDATORY_FUNC(post_send), IB_MANDATORY_FUNC(post_recv), IB_MANDATORY_FUNC(create_cq), IB_MANDATORY_FUNC(destroy_cq), IB_MANDATORY_FUNC(poll_cq), IB_MANDATORY_FUNC(req_notify_cq), IB_MANDATORY_FUNC(get_dma_mr), IB_MANDATORY_FUNC(dereg_mr) }; int i; for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { if (!*(void **) ((u_char *) device + mandatory_table[i].offset)) { printk(KERN_WARNING "Device %s is missing mandatory function %s\n", device->name, mandatory_table[i].name); return -EINVAL; } } return 0; } static struct ib_device *__ib_device_get_by_name(const char *name) { struct ib_device *device; list_for_each_entry(device, &device_list, core_list) if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX)) return device; return NULL; } static int alloc_name(char *name) { unsigned long *inuse; char buf[IB_DEVICE_NAME_MAX]; struct ib_device *device; int i; inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL); if (!inuse) return -ENOMEM; list_for_each_entry(device, &device_list, core_list) { if (!sscanf(device->name, name, &i)) continue; if (i < 0 || i >= PAGE_SIZE * 8) continue; snprintf(buf, sizeof buf, name, i); if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX)) set_bit(i, inuse); } i = find_first_zero_bit(inuse, PAGE_SIZE * 8); free_page((unsigned long) inuse); snprintf(buf, sizeof buf, name, i); if (__ib_device_get_by_name(buf)) return -ENFILE; strlcpy(name, buf, IB_DEVICE_NAME_MAX); return 0; } static int start_port(struct ib_device *device) { return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1; } static int end_port(struct ib_device *device) { return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : device->phys_port_cnt; } /** * ib_alloc_device - allocate an IB device struct * @size:size of structure to allocate * * Low-level drivers should use ib_alloc_device() to allocate &struct * ib_device. @size is the size of the structure to be allocated, * including any private data used by the low-level driver. * ib_dealloc_device() must be used to free structures allocated with * ib_alloc_device(). */ struct ib_device *ib_alloc_device(size_t size) { BUG_ON(size < sizeof (struct ib_device)); return kzalloc(size, GFP_KERNEL); } EXPORT_SYMBOL(ib_alloc_device); /** * ib_dealloc_device - free an IB device struct * @device:structure to free * * Free a structure allocated with ib_alloc_device(). */ void ib_dealloc_device(struct ib_device *device) { if (device->reg_state == IB_DEV_UNINITIALIZED) { kfree(device); return; } BUG_ON(device->reg_state != IB_DEV_UNREGISTERED); kobject_put(&device->dev.kobj); } EXPORT_SYMBOL(ib_dealloc_device); static int add_client_context(struct ib_device *device, struct ib_client *client) { struct ib_client_data *context; unsigned long flags; context = kmalloc(sizeof *context, GFP_KERNEL); if (!context) { printk(KERN_WARNING "Couldn't allocate client context for %s/%s\n", device->name, client->name); return -ENOMEM; } context->client = client; context->data = NULL; spin_lock_irqsave(&device->client_data_lock, flags); list_add(&context->list, &device->client_data_list); spin_unlock_irqrestore(&device->client_data_lock, flags); return 0; } static int read_port_table_lengths(struct ib_device *device) { struct ib_port_attr *tprops = NULL; int num_ports, ret = -ENOMEM; u8 port_index; tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) goto out; num_ports = end_port(device) - start_port(device) + 1; device->pkey_tbl_len = kmalloc(sizeof *device->pkey_tbl_len * num_ports, GFP_KERNEL); device->gid_tbl_len = kmalloc(sizeof *device->gid_tbl_len * num_ports, GFP_KERNEL); if (!device->pkey_tbl_len || !device->gid_tbl_len) goto err; for (port_index = 0; port_index < num_ports; ++port_index) { ret = ib_query_port(device, port_index + start_port(device), tprops); if (ret) goto err; device->pkey_tbl_len[port_index] = tprops->pkey_tbl_len; device->gid_tbl_len[port_index] = tprops->gid_tbl_len; } ret = 0; goto out; err: kfree(device->gid_tbl_len); kfree(device->pkey_tbl_len); out: kfree(tprops); return ret; } /** * ib_register_device - Register an IB device with IB core * @device:Device to register * * Low-level drivers use ib_register_device() to register their * devices with the IB core. All registered clients will receive a * callback for each device that is added. @device must be allocated * with ib_alloc_device(). */ int ib_register_device(struct ib_device *device, int (*port_callback)(struct ib_device *, u8, struct kobject *)) { int ret; mutex_lock(&device_mutex); if (strchr(device->name, '%')) { ret = alloc_name(device->name); if (ret) goto out; } if (ib_device_check_mandatory(device)) { ret = -EINVAL; goto out; } INIT_LIST_HEAD(&device->event_handler_list); INIT_LIST_HEAD(&device->client_data_list); spin_lock_init(&device->event_handler_lock); spin_lock_init(&device->client_data_lock); device->ib_uverbs_xrcd_table = RB_ROOT; mutex_init(&device->xrcd_table_mutex); ret = read_port_table_lengths(device); if (ret) { printk(KERN_WARNING "Couldn't create table lengths cache for device %s\n", device->name); goto out; } ret = ib_device_register_sysfs(device, port_callback); if (ret) { printk(KERN_WARNING "Couldn't register device %s with driver model\n", device->name); kfree(device->gid_tbl_len); kfree(device->pkey_tbl_len); goto out; } list_add_tail(&device->core_list, &device_list); device->reg_state = IB_DEV_REGISTERED; { struct ib_client *client; list_for_each_entry(client, &client_list, list) if (client->add && !add_client_context(device, client)) client->add(device); } out: mutex_unlock(&device_mutex); return ret; } EXPORT_SYMBOL(ib_register_device); /** * ib_unregister_device - Unregister an IB device * @device:Device to unregister * * Unregister an IB device. All clients will receive a remove callback. */ void ib_unregister_device(struct ib_device *device) { struct ib_client *client; struct ib_client_data *context, *tmp; unsigned long flags; mutex_lock(&device_mutex); list_for_each_entry_reverse(client, &client_list, list) if (client->remove) client->remove(device); list_del(&device->core_list); kfree(device->gid_tbl_len); kfree(device->pkey_tbl_len); mutex_unlock(&device_mutex); ib_device_unregister_sysfs(device); spin_lock_irqsave(&device->client_data_lock, flags); list_for_each_entry_safe(context, tmp, &device->client_data_list, list) kfree(context); spin_unlock_irqrestore(&device->client_data_lock, flags); device->reg_state = IB_DEV_UNREGISTERED; } EXPORT_SYMBOL(ib_unregister_device); /** * ib_register_client - Register an IB client * @client:Client to register * * Upper level users of the IB drivers can use ib_register_client() to * register callbacks for IB device addition and removal. When an IB * device is added, each registered client's add method will be called * (in the order the clients were registered), and when a device is * removed, each client's remove method will be called (in the reverse * order that clients were registered). In addition, when * ib_register_client() is called, the client will receive an add * callback for all devices already registered. */ int ib_register_client(struct ib_client *client) { struct ib_device *device; mutex_lock(&device_mutex); list_add_tail(&client->list, &client_list); list_for_each_entry(device, &device_list, core_list) if (client->add && !add_client_context(device, client)) client->add(device); mutex_unlock(&device_mutex); return 0; } EXPORT_SYMBOL(ib_register_client); /** * ib_unregister_client - Unregister an IB client * @client:Client to unregister * * Upper level users use ib_unregister_client() to remove their client * registration. When ib_unregister_client() is called, the client * will receive a remove callback for each IB device still registered. */ void ib_unregister_client(struct ib_client *client) { struct ib_client_data *context, *tmp; struct ib_device *device; unsigned long flags; mutex_lock(&device_mutex); list_for_each_entry(device, &device_list, core_list) { if (client->remove) client->remove(device); spin_lock_irqsave(&device->client_data_lock, flags); list_for_each_entry_safe(context, tmp, &device->client_data_list, list) if (context->client == client) { list_del(&context->list); kfree(context); } spin_unlock_irqrestore(&device->client_data_lock, flags); } list_del(&client->list); mutex_unlock(&device_mutex); } EXPORT_SYMBOL(ib_unregister_client); /** * ib_get_client_data - Get IB client context * @device:Device to get context for * @client:Client to get context for * * ib_get_client_data() returns client context set with * ib_set_client_data(). */ void *ib_get_client_data(struct ib_device *device, struct ib_client *client) { struct ib_client_data *context; void *ret = NULL; unsigned long flags; spin_lock_irqsave(&device->client_data_lock, flags); list_for_each_entry(context, &device->client_data_list, list) if (context->client == client) { ret = context->data; break; } spin_unlock_irqrestore(&device->client_data_lock, flags); return ret; } EXPORT_SYMBOL(ib_get_client_data); /** * ib_set_client_data - Set IB client context * @device:Device to set context for * @client:Client to set context for * @data:Context to set * * ib_set_client_data() sets client context that can be retrieved with * ib_get_client_data(). */ void ib_set_client_data(struct ib_device *device, struct ib_client *client, void *data) { struct ib_client_data *context; unsigned long flags; spin_lock_irqsave(&device->client_data_lock, flags); list_for_each_entry(context, &device->client_data_list, list) if (context->client == client) { context->data = data; goto out; } printk(KERN_WARNING "No client context found for %s/%s\n", device->name, client->name); out: spin_unlock_irqrestore(&device->client_data_lock, flags); } EXPORT_SYMBOL(ib_set_client_data); /** * ib_register_event_handler - Register an IB event handler * @event_handler:Handler to register * * ib_register_event_handler() registers an event handler that will be * called back when asynchronous IB events occur (as defined in * chapter 11 of the InfiniBand Architecture Specification). This * callback may occur in interrupt context. */ int ib_register_event_handler (struct ib_event_handler *event_handler) { unsigned long flags; spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); list_add_tail(&event_handler->list, &event_handler->device->event_handler_list); spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); return 0; } EXPORT_SYMBOL(ib_register_event_handler); /** * ib_unregister_event_handler - Unregister an event handler * @event_handler:Handler to unregister * * Unregister an event handler registered with * ib_register_event_handler(). */ int ib_unregister_event_handler(struct ib_event_handler *event_handler) { unsigned long flags; spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); list_del(&event_handler->list); spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); return 0; } EXPORT_SYMBOL(ib_unregister_event_handler); /** * ib_dispatch_event - Dispatch an asynchronous event * @event:Event to dispatch * * Low-level drivers must call ib_dispatch_event() to dispatch the * event to all registered event handlers when an asynchronous event * occurs. */ void ib_dispatch_event(struct ib_event *event) { unsigned long flags; struct ib_event_handler *handler; spin_lock_irqsave(&event->device->event_handler_lock, flags); list_for_each_entry(handler, &event->device->event_handler_list, list) handler->handler(handler, event); spin_unlock_irqrestore(&event->device->event_handler_lock, flags); } EXPORT_SYMBOL(ib_dispatch_event); /** * ib_query_device - Query IB device attributes * @device:Device to query * @device_attr:Device attributes * * ib_query_device() returns the attributes of a device through the * @device_attr pointer. */ int ib_query_device(struct ib_device *device, struct ib_device_attr *device_attr) { return device->query_device(device, device_attr); } EXPORT_SYMBOL(ib_query_device); /** * ib_query_port - Query IB port attributes * @device:Device to query * @port_num:Port number to query * @port_attr:Port attributes * * ib_query_port() returns the attributes of a port through the * @port_attr pointer. */ int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr) { if (port_num < start_port(device) || port_num > end_port(device)) return -EINVAL; return device->query_port(device, port_num, port_attr); } EXPORT_SYMBOL(ib_query_port); /** * ib_query_gid - Get GID table entry * @device:Device to query * @port_num:Port number to query * @index:GID table index to query * @gid:Returned GID * * ib_query_gid() fetches the specified GID table entry. */ int ib_query_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid) { return device->query_gid(device, port_num, index, gid); } EXPORT_SYMBOL(ib_query_gid); /** * ib_query_pkey - Get P_Key table entry * @device:Device to query * @port_num:Port number to query * @index:P_Key table index to query * @pkey:Returned P_Key * * ib_query_pkey() fetches the specified P_Key table entry. */ int ib_query_pkey(struct ib_device *device, u8 port_num, u16 index, u16 *pkey) { return device->query_pkey(device, port_num, index, pkey); } EXPORT_SYMBOL(ib_query_pkey); /** * ib_modify_device - Change IB device attributes * @device:Device to modify * @device_modify_mask:Mask of attributes to change * @device_modify:New attribute values * * ib_modify_device() changes a device's attributes as specified by * the @device_modify_mask and @device_modify structure. */ int ib_modify_device(struct ib_device *device, int device_modify_mask, struct ib_device_modify *device_modify) { return device->modify_device(device, device_modify_mask, device_modify); } EXPORT_SYMBOL(ib_modify_device); /** * ib_modify_port - Modifies the attributes for the specified port. * @device: The device to modify. * @port_num: The number of the port to modify. * @port_modify_mask: Mask used to specify which attributes of the port * to change. * @port_modify: New attribute values for the port. * * ib_modify_port() changes a port's attributes as specified by the * @port_modify_mask and @port_modify structure. */ int ib_modify_port(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify) { if (port_num < start_port(device) || port_num > end_port(device)) return -EINVAL; return device->modify_port(device, port_num, port_modify_mask, port_modify); } EXPORT_SYMBOL(ib_modify_port); /** * ib_find_gid - Returns the port number and GID table index where * a specified GID value occurs. * @device: The device to query. * @gid: The GID value to search for. * @port_num: The port number of the device where the GID value was found. * @index: The index into the GID table where the GID was found. This * parameter may be NULL. */ int ib_find_gid(struct ib_device *device, union ib_gid *gid, u8 *port_num, u16 *index) { union ib_gid tmp_gid; int ret, port, i; for (port = start_port(device); port <= end_port(device); ++port) { for (i = 0; i < device->gid_tbl_len[port - start_port(device)]; ++i) { ret = ib_query_gid(device, port, i, &tmp_gid); if (ret) return ret; if (!memcmp(&tmp_gid, gid, sizeof *gid)) { *port_num = port; if (index) *index = i; return 0; } } } return -ENOENT; } EXPORT_SYMBOL(ib_find_gid); /** * ib_find_pkey - Returns the PKey table index where a specified * PKey value occurs. * @device: The device to query. * @port_num: The port number of the device to search for the PKey. * @pkey: The PKey value to search for. * @index: The index into the PKey table where the PKey was found. */ int ib_find_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index) { int ret, i; u16 tmp_pkey; for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) { ret = ib_query_pkey(device, port_num, i, &tmp_pkey); if (ret) return ret; if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) { *index = i; return 0; } } return -ENOENT; } EXPORT_SYMBOL(ib_find_pkey); static int __init ib_core_init(void) { int ret; #ifdef __ia64__ if (ia64_platform_is("hpzx1")) dma_map_sg_hp_wa = 1; #endif ret = ib_sysfs_setup(); if (ret) printk(KERN_WARNING "Couldn't create InfiniBand device class\n"); ret = ib_cache_setup(); if (ret) { printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n"); ib_sysfs_cleanup(); } return ret; } static void __exit ib_core_cleanup(void) { ib_cache_cleanup(); ib_sysfs_cleanup(); /* Make sure that any pending umem accounting work is done. */ flush_scheduled_work(); } module_init(ib_core_init); module_exit(ib_core_cleanup); #undef MODULE_VERSION #include static int ibcore_evhand(module_t mod, int event, void *arg) { return (0); } static moduledata_t ibcore_mod = { .name = "ibcore", .evhand = ibcore_evhand, }; MODULE_VERSION(ibcore, 1); DECLARE_MODULE(ibcore, ibcore_mod, SI_SUB_SMP, SI_ORDER_ANY); Index: stable/10/sys/ofed/drivers/infiniband/core/iwcm.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/core/iwcm.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/core/iwcm.c (revision 271127) @@ -1,1028 +1,1029 @@ /* * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include #include #include #include #include #include #include #include +#include #include #include #include "iwcm.h" MODULE_AUTHOR("Tom Tucker"); MODULE_DESCRIPTION("iWARP CM"); MODULE_LICENSE("Dual BSD/GPL"); static struct workqueue_struct *iwcm_wq; struct iwcm_work { struct work_struct work; struct iwcm_id_private *cm_id; struct list_head list; struct iw_cm_event event; struct list_head free_list; }; /* * The following services provide a mechanism for pre-allocating iwcm_work * elements. The design pre-allocates them based on the cm_id type: * LISTENING IDS: Get enough elements preallocated to handle the * listen backlog. * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE * * Allocating them in connect and listen avoids having to deal * with allocation failures on the event upcall from the provider (which * is called in the interrupt context). * * One exception is when creating the cm_id for incoming connection requests. * There are two cases: * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If * the backlog is exceeded, then no more connection request events will * be processed. cm_event_handler() returns -ENOMEM in this case. Its up * to the provider to reject the connection request. * 2) in the connection request workqueue handler, cm_conn_req_handler(). * If work elements cannot be allocated for the new connect request cm_id, * then IWCM will call the provider reject method. This is ok since * cm_conn_req_handler() runs in the workqueue thread context. */ static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) { struct iwcm_work *work; if (list_empty(&cm_id_priv->work_free_list)) return NULL; work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work, free_list); list_del_init(&work->free_list); return work; } static void put_work(struct iwcm_work *work) { list_add(&work->free_list, &work->cm_id->work_free_list); } static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) { struct list_head *e, *tmp; list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) kfree(list_entry(e, struct iwcm_work, free_list)); } static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) { struct iwcm_work *work; BUG_ON(!list_empty(&cm_id_priv->work_free_list)); while (count--) { work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); if (!work) { dealloc_work_entries(cm_id_priv); return -ENOMEM; } work->cm_id = cm_id_priv; INIT_LIST_HEAD(&work->list); put_work(work); } return 0; } /* * Save private data from incoming connection requests to * iw_cm_event, so the low level driver doesn't have to. Adjust * the event ptr to point to the local copy. */ static int copy_private_data(struct iw_cm_event *event) { void *p; p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC); if (!p) return -ENOMEM; event->private_data = p; return 0; } static void free_cm_id(struct iwcm_id_private *cm_id_priv) { dealloc_work_entries(cm_id_priv); kfree(cm_id_priv); } /* * Release a reference on cm_id. If the last reference is being * released, enable the waiting thread (in iw_destroy_cm_id) to * get woken up, and return 1 if a thread is already waiting. */ static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) { BUG_ON(atomic_read(&cm_id_priv->refcount)==0); if (atomic_dec_and_test(&cm_id_priv->refcount)) { BUG_ON(!list_empty(&cm_id_priv->work_list)); complete(&cm_id_priv->destroy_comp); return 1; } return 0; } static void add_ref(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); atomic_inc(&cm_id_priv->refcount); } static void rem_ref(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); if (iwcm_deref_id(cm_id_priv) && test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) { BUG_ON(!list_empty(&cm_id_priv->work_list)); free_cm_id(cm_id_priv); } } static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); struct iw_cm_id *iw_create_cm_id(struct ib_device *device, struct socket *so, iw_cm_handler cm_handler, void *context) { struct iwcm_id_private *cm_id_priv; cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); if (!cm_id_priv) return ERR_PTR(-ENOMEM); cm_id_priv->state = IW_CM_STATE_IDLE; cm_id_priv->id.device = device; cm_id_priv->id.cm_handler = cm_handler; cm_id_priv->id.context = context; cm_id_priv->id.event_handler = cm_event_handler; cm_id_priv->id.add_ref = add_ref; cm_id_priv->id.rem_ref = rem_ref; cm_id_priv->id.so = so; spin_lock_init(&cm_id_priv->lock); atomic_set(&cm_id_priv->refcount, 1); init_waitqueue_head(&cm_id_priv->connect_wait); init_completion(&cm_id_priv->destroy_comp); INIT_LIST_HEAD(&cm_id_priv->work_list); INIT_LIST_HEAD(&cm_id_priv->work_free_list); return &cm_id_priv->id; } EXPORT_SYMBOL(iw_create_cm_id); static int iwcm_modify_qp_err(struct ib_qp *qp) { struct ib_qp_attr qp_attr; if (!qp) return -EINVAL; qp_attr.qp_state = IB_QPS_ERR; return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); } /* * This is really the RDMAC CLOSING state. It is most similar to the * IB SQD QP state. */ static int iwcm_modify_qp_sqd(struct ib_qp *qp) { struct ib_qp_attr qp_attr; BUG_ON(qp == NULL); qp_attr.qp_state = IB_QPS_SQD; return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); } /* * CM_ID <-- CLOSING * * Block if a passive or active connection is currently being processed. Then * process the event as follows: * - If we are ESTABLISHED, move to CLOSING and modify the QP state * based on the abrupt flag * - If the connection is already in the CLOSING or IDLE state, the peer is * disconnecting concurrently with us and we've already seen the * DISCONNECT event -- ignore the request and return 0 * - Disconnect on a listening endpoint returns -EINVAL */ int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) { struct iwcm_id_private *cm_id_priv; unsigned long flags; int ret = 0; struct ib_qp *qp = NULL; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); /* Wait if we're currently in a connect or accept downcall */ wait_event(cm_id_priv->connect_wait, !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_ESTABLISHED: cm_id_priv->state = IW_CM_STATE_CLOSING; /* QP could be for user-mode client */ if (cm_id_priv->qp) qp = cm_id_priv->qp; else ret = -EINVAL; break; case IW_CM_STATE_LISTEN: ret = -EINVAL; break; case IW_CM_STATE_CLOSING: /* remote peer closed first */ case IW_CM_STATE_IDLE: /* accept or connect returned !0 */ break; case IW_CM_STATE_CONN_RECV: /* * App called disconnect before/without calling accept after * connect_request event delivered. */ break; case IW_CM_STATE_CONN_SENT: /* Can only get here if wait above fails */ default: BUG(); } spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (qp) { if (abrupt) ret = iwcm_modify_qp_err(qp); else ret = iwcm_modify_qp_sqd(qp); /* * If both sides are disconnecting the QP could * already be in ERR or SQD states */ ret = 0; } return ret; } EXPORT_SYMBOL(iw_cm_disconnect); /* * CM_ID <-- DESTROYING * * Clean up all resources associated with the connection and release * the initial reference taken by iw_create_cm_id. */ static void destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; unsigned long flags; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); /* * Wait if we're currently in a connect or accept downcall. A * listening endpoint should never block here. */ wait_event(cm_id_priv->connect_wait, !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_LISTEN: cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); /* destroy the listening endpoint */ ret = cm_id->device->iwcm->destroy_listen(cm_id); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_ESTABLISHED: cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); /* Abrupt close of the connection */ (void)iwcm_modify_qp_err(cm_id_priv->qp); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_IDLE: case IW_CM_STATE_CLOSING: cm_id_priv->state = IW_CM_STATE_DESTROYING; break; case IW_CM_STATE_CONN_RECV: /* * App called destroy before/without calling accept after * receiving connection request event notification or * returned non zero from the event callback function. * In either case, must tell the provider to reject. */ cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_id->device->iwcm->reject(cm_id, NULL, 0); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_CONN_SENT: case IW_CM_STATE_DESTROYING: default: BUG(); break; } if (cm_id_priv->qp) { cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); cm_id_priv->qp = NULL; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); (void)iwcm_deref_id(cm_id_priv); } /* * This function is only called by the application thread and cannot * be called by the event thread. The function will wait for all * references to be released on the cm_id and then kfree the cm_id * object. */ void iw_destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)); destroy_cm_id(cm_id); wait_for_completion(&cm_id_priv->destroy_comp); free_cm_id(cm_id_priv); } EXPORT_SYMBOL(iw_destroy_cm_id); /* * CM_ID <-- LISTEN * * Start listening for connect requests. Generates one CONNECT_REQUEST * event for each inbound connect request. */ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) { struct iwcm_id_private *cm_id_priv; unsigned long flags; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); ret = alloc_work_entries(cm_id_priv, backlog); if (ret) return ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_IDLE: cm_id_priv->state = IW_CM_STATE_LISTEN; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = cm_id->device->iwcm->create_listen(cm_id, backlog); if (ret) cm_id_priv->state = IW_CM_STATE_IDLE; spin_lock_irqsave(&cm_id_priv->lock, flags); break; default: ret = -EINVAL; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(iw_cm_listen); /* * CM_ID <-- IDLE * * Rejects an inbound connection request. No events are generated. */ int iw_cm_reject(struct iw_cm_id *cm_id, const void *private_data, u8 private_data_len) { struct iwcm_id_private *cm_id_priv; unsigned long flags; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } cm_id_priv->state = IW_CM_STATE_IDLE; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = cm_id->device->iwcm->reject(cm_id, private_data, private_data_len); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return ret; } EXPORT_SYMBOL(iw_cm_reject); /* * CM_ID <-- ESTABLISHED * * Accepts an inbound connection request and generates an ESTABLISHED * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block * until the ESTABLISHED event is received from the provider. */ int iw_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) { struct iwcm_id_private *cm_id_priv; struct ib_qp *qp; unsigned long flags; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } /* Get the ib_qp given the QPN */ qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); if (!qp) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); return -EINVAL; } cm_id->device->iwcm->add_ref(qp); cm_id_priv->qp = qp; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = cm_id->device->iwcm->accept(cm_id, iw_param); if (ret) { /* An error on accept precludes provider events */ BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); cm_id_priv->state = IW_CM_STATE_IDLE; spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->qp) { cm_id->device->iwcm->rem_ref(qp); cm_id_priv->qp = NULL; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); } return ret; } EXPORT_SYMBOL(iw_cm_accept); /* * Active Side: CM_ID <-- CONN_SENT * * If successful, results in the generation of a CONNECT_REPLY * event. iw_cm_disconnect and iw_cm_destroy will block until the * CONNECT_REPLY event is received from the provider. */ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) { struct iwcm_id_private *cm_id_priv; int ret; unsigned long flags; struct ib_qp *qp; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); ret = alloc_work_entries(cm_id_priv, 4); if (ret) return ret; set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->state != IW_CM_STATE_IDLE) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } /* Get the ib_qp given the QPN */ qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); if (!qp) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); return -EINVAL; } cm_id->device->iwcm->add_ref(qp); cm_id_priv->qp = qp; cm_id_priv->state = IW_CM_STATE_CONN_SENT; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = cm_id->device->iwcm->connect(cm_id, iw_param); if (ret) { spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->qp) { cm_id->device->iwcm->rem_ref(qp); cm_id_priv->qp = NULL; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); cm_id_priv->state = IW_CM_STATE_IDLE; clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); } return ret; } EXPORT_SYMBOL(iw_cm_connect); /* * Passive Side: new CM_ID <-- CONN_RECV * * Handles an inbound connect request. The function creates a new * iw_cm_id to represent the new connection and inherits the client * callback function and other attributes from the listening parent. * * The work item contains a pointer to the listen_cm_id and the event. The * listen_cm_id contains the client cm_handler, context and * device. These are copied when the device is cloned. The event * contains the new four tuple. * * An error on the child should not affect the parent, so this * function does not return a value. */ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, struct iw_cm_event *iw_event) { unsigned long flags; struct iw_cm_id *cm_id; struct iwcm_id_private *cm_id_priv; int ret; /* * The provider should never generate a connection request * event with a bad status. */ BUG_ON(iw_event->status); /* * We could be destroying the listening id. If so, ignore this * upcall. */ spin_lock_irqsave(&listen_id_priv->lock, flags); if (listen_id_priv->state != IW_CM_STATE_LISTEN) { spin_unlock_irqrestore(&listen_id_priv->lock, flags); goto out; } spin_unlock_irqrestore(&listen_id_priv->lock, flags); cm_id = iw_create_cm_id(listen_id_priv->id.device, iw_event->so, listen_id_priv->id.cm_handler, listen_id_priv->id.context); /* If the cm_id could not be created, ignore the request */ if (IS_ERR(cm_id)) goto out; cm_id->provider_data = iw_event->provider_data; cm_id->local_addr = iw_event->local_addr; cm_id->remote_addr = iw_event->remote_addr; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); cm_id_priv->state = IW_CM_STATE_CONN_RECV; ret = alloc_work_entries(cm_id_priv, 3); if (ret) { iw_cm_reject(cm_id, NULL, 0); iw_destroy_cm_id(cm_id); goto out; } /* Call the client CM handler */ ret = cm_id->cm_handler(cm_id, iw_event); if (ret) { iw_cm_reject(cm_id, NULL, 0); set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); destroy_cm_id(cm_id); if (atomic_read(&cm_id_priv->refcount)==0) free_cm_id(cm_id_priv); } out: if (iw_event->private_data_len) kfree(iw_event->private_data); } /* * Passive Side: CM_ID <-- ESTABLISHED * * The provider generated an ESTABLISHED event which means that * the MPA negotion has completed successfully and we are now in MPA * FPDU mode. * * This event can only be received in the CONN_RECV state. If the * remote peer closed, the ESTABLISHED event would be received followed * by the CLOSE event. If the app closes, it will block until we wake * it up after processing this event. */ static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); /* * We clear the CONNECT_WAIT bit here to allow the callback * function to call iw_cm_disconnect. Calling iw_destroy_cm_id * from a callback handler is not allowed. */ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); cm_id_priv->state = IW_CM_STATE_ESTABLISHED; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); wake_up_all(&cm_id_priv->connect_wait); return ret; } /* * Active Side: CM_ID <-- ESTABLISHED * * The app has called connect and is waiting for the established event to * post it's requests to the server. This event will wake up anyone * blocked in iw_cm_disconnect or iw_destroy_id. */ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); /* * Clear the connect wait bit so a callback function calling * iw_cm_disconnect will not wait and deadlock this thread */ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) { cm_id_priv->id.local_addr = iw_event->local_addr; cm_id_priv->id.remote_addr = iw_event->remote_addr; cm_id_priv->state = IW_CM_STATE_ESTABLISHED; } else { /* REJECTED or RESET */ cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); cm_id_priv->qp = NULL; cm_id_priv->state = IW_CM_STATE_IDLE; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); if (iw_event->private_data_len) kfree(iw_event->private_data); /* Wake up waiters on connect complete */ wake_up_all(&cm_id_priv->connect_wait); return ret; } /* * CM_ID <-- CLOSING * * If in the ESTABLISHED state, move to CLOSING. */ static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { unsigned long flags; spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) cm_id_priv->state = IW_CM_STATE_CLOSING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); } /* * CM_ID <-- IDLE * * If in the ESTBLISHED or CLOSING states, the QP will have have been * moved by the provider to the ERR state. Disassociate the CM_ID from * the QP, move to IDLE, and remove the 'connected' reference. * * If in some other state, the cm_id was destroyed asynchronously. * This is the last reference that will result in waking up * the app thread blocked in iw_destroy_cm_id. */ static int cm_close_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { unsigned long flags; int ret = 0; spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->qp) { cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); cm_id_priv->qp = NULL; } switch (cm_id_priv->state) { case IW_CM_STATE_ESTABLISHED: case IW_CM_STATE_CLOSING: cm_id_priv->state = IW_CM_STATE_IDLE; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_DESTROYING: break; default: BUG(); } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } static int process_event(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { int ret = 0; switch (iw_event->event) { case IW_CM_EVENT_CONNECT_REQUEST: cm_conn_req_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_CONNECT_REPLY: ret = cm_conn_rep_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_ESTABLISHED: ret = cm_conn_est_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_DISCONNECT: cm_disconnect_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_CLOSE: ret = cm_close_handler(cm_id_priv, iw_event); break; default: BUG(); } return ret; } /* * Process events on the work_list for the cm_id. If the callback * function requests that the cm_id be deleted, a flag is set in the * cm_id flags to indicate that when the last reference is * removed, the cm_id is to be destroyed. This is necessary to * distinguish between an object that will be destroyed by the app * thread asleep on the destroy_comp list vs. an object destroyed * here synchronously when the last reference is removed. */ static void cm_work_handler(struct work_struct *_work) { struct iwcm_work *work = container_of(_work, struct iwcm_work, work); struct iw_cm_event levent; struct iwcm_id_private *cm_id_priv = work->cm_id; unsigned long flags; int empty; int ret = 0; int destroy_id; spin_lock_irqsave(&cm_id_priv->lock, flags); empty = list_empty(&cm_id_priv->work_list); while (!empty) { work = list_entry(cm_id_priv->work_list.next, struct iwcm_work, list); list_del_init(&work->list); empty = list_empty(&cm_id_priv->work_list); levent = work->event; put_work(work); spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = process_event(cm_id_priv, &levent); if (ret) { set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); destroy_cm_id(&cm_id_priv->id); } BUG_ON(atomic_read(&cm_id_priv->refcount)==0); destroy_id = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); if (iwcm_deref_id(cm_id_priv)) { if (destroy_id) { BUG_ON(!list_empty(&cm_id_priv->work_list)); free_cm_id(cm_id_priv); } return; } spin_lock_irqsave(&cm_id_priv->lock, flags); } spin_unlock_irqrestore(&cm_id_priv->lock, flags); } /* * This function is called on interrupt context. Schedule events on * the iwcm_wq thread to allow callback functions to downcall into * the CM and/or block. Events are queued to a per-CM_ID * work_list. If this is the first event on the work_list, the work * element is also queued on the iwcm_wq thread. * * Each event holds a reference on the cm_id. Until the last posted * event has been delivered and processed, the cm_id cannot be * deleted. * * Returns: * 0 - the event was handled. * -ENOMEM - the event was not handled due to lack of resources. */ static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *iw_event) { struct iwcm_work *work; struct iwcm_id_private *cm_id_priv; unsigned long flags; int ret = 0; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); work = get_work(cm_id_priv); if (!work) { ret = -ENOMEM; goto out; } INIT_WORK(&work->work, cm_work_handler); work->cm_id = cm_id_priv; work->event = *iw_event; if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || work->event.event == IW_CM_EVENT_CONNECT_REPLY) && work->event.private_data_len) { ret = copy_private_data(&work->event); if (ret) { put_work(work); goto out; } } atomic_inc(&cm_id_priv->refcount); if (list_empty(&cm_id_priv->work_list)) { list_add_tail(&work->list, &cm_id_priv->work_list); queue_work(iwcm_wq, &work->work); } else list_add_tail(&work->list, &cm_id_priv->work_list); out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_IDLE: case IW_CM_STATE_CONN_SENT: case IW_CM_STATE_CONN_RECV: case IW_CM_STATE_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| IB_ACCESS_REMOTE_READ; ret = 0; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_IDLE: case IW_CM_STATE_CONN_SENT: case IW_CM_STATE_CONN_RECV: case IW_CM_STATE_ESTABLISHED: *qp_attr_mask = 0; ret = 0; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { struct iwcm_id_private *cm_id_priv; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); switch (qp_attr->qp_state) { case IB_QPS_INIT: case IB_QPS_RTR: ret = iwcm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask); break; case IB_QPS_RTS: ret = iwcm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask); break; default: ret = -EINVAL; break; } return ret; } EXPORT_SYMBOL(iw_cm_init_qp_attr); static int __init iw_cm_init(void) { iwcm_wq = create_singlethread_workqueue("iw_cm_wq"); if (!iwcm_wq) return -ENOMEM; return 0; } static void __exit iw_cm_cleanup(void) { destroy_workqueue(iwcm_wq); } module_init(iw_cm_init); module_exit(iw_cm_cleanup); Index: stable/10/sys/ofed/drivers/infiniband/core/sa_query.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/core/sa_query.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/core/sa_query.c (revision 271127) @@ -1,1501 +1,1500 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include -#include #include #include #include #include #include #include #include #include #include #include #include "sa.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand subnet administration query support"); MODULE_LICENSE("Dual BSD/GPL"); struct ib_sa_sm_ah { struct ib_ah *ah; struct kref ref; u16 pkey_index; u8 src_path_mask; }; struct ib_sa_port { struct ib_mad_agent *agent; struct ib_mad_agent *notice_agent; struct ib_sa_sm_ah *sm_ah; struct work_struct update_task; spinlock_t ah_lock; u8 port_num; struct ib_device *device; }; struct ib_sa_device { int start_port, end_port; struct ib_event_handler event_handler; struct ib_sa_port port[0]; }; struct ib_sa_query { void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *); void (*release)(struct ib_sa_query *); struct ib_sa_client *client; struct ib_sa_port *port; struct ib_mad_send_buf *mad_buf; struct ib_sa_sm_ah *sm_ah; int id; }; struct ib_sa_service_query { void (*callback)(int, struct ib_sa_service_rec *, void *); void *context; struct ib_sa_query sa_query; }; struct ib_sa_path_query { void (*callback)(int, struct ib_sa_path_rec *, void *); void *context; struct ib_sa_query sa_query; }; struct ib_sa_mcmember_query { void (*callback)(int, struct ib_sa_mcmember_rec *, void *); void *context; struct ib_sa_query sa_query; }; struct ib_sa_inform_query { void (*callback)(int, struct ib_sa_inform *, void *); void *context; struct ib_sa_query sa_query; }; static void ib_sa_add_one(struct ib_device *device); static void ib_sa_remove_one(struct ib_device *device); static struct ib_client sa_client = { .name = "sa", .add = ib_sa_add_one, .remove = ib_sa_remove_one }; static spinlock_t idr_lock; static DEFINE_IDR(query_idr); static spinlock_t tid_lock; static u32 tid; #define PATH_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_path_rec, field), \ .struct_size_bytes = sizeof ((struct ib_sa_path_rec *) 0)->field, \ .field_name = "sa_path_rec:" #field static const struct ib_field path_rec_table[] = { { PATH_REC_FIELD(service_id), .offset_words = 0, .offset_bits = 0, .size_bits = 64 }, { PATH_REC_FIELD(dgid), .offset_words = 2, .offset_bits = 0, .size_bits = 128 }, { PATH_REC_FIELD(sgid), .offset_words = 6, .offset_bits = 0, .size_bits = 128 }, { PATH_REC_FIELD(dlid), .offset_words = 10, .offset_bits = 0, .size_bits = 16 }, { PATH_REC_FIELD(slid), .offset_words = 10, .offset_bits = 16, .size_bits = 16 }, { PATH_REC_FIELD(raw_traffic), .offset_words = 11, .offset_bits = 0, .size_bits = 1 }, { RESERVED, .offset_words = 11, .offset_bits = 1, .size_bits = 3 }, { PATH_REC_FIELD(flow_label), .offset_words = 11, .offset_bits = 4, .size_bits = 20 }, { PATH_REC_FIELD(hop_limit), .offset_words = 11, .offset_bits = 24, .size_bits = 8 }, { PATH_REC_FIELD(traffic_class), .offset_words = 12, .offset_bits = 0, .size_bits = 8 }, { PATH_REC_FIELD(reversible), .offset_words = 12, .offset_bits = 8, .size_bits = 1 }, { PATH_REC_FIELD(numb_path), .offset_words = 12, .offset_bits = 9, .size_bits = 7 }, { PATH_REC_FIELD(pkey), .offset_words = 12, .offset_bits = 16, .size_bits = 16 }, { PATH_REC_FIELD(qos_class), .offset_words = 13, .offset_bits = 0, .size_bits = 12 }, { PATH_REC_FIELD(sl), .offset_words = 13, .offset_bits = 12, .size_bits = 4 }, { PATH_REC_FIELD(mtu_selector), .offset_words = 13, .offset_bits = 16, .size_bits = 2 }, { PATH_REC_FIELD(mtu), .offset_words = 13, .offset_bits = 18, .size_bits = 6 }, { PATH_REC_FIELD(rate_selector), .offset_words = 13, .offset_bits = 24, .size_bits = 2 }, { PATH_REC_FIELD(rate), .offset_words = 13, .offset_bits = 26, .size_bits = 6 }, { PATH_REC_FIELD(packet_life_time_selector), .offset_words = 14, .offset_bits = 0, .size_bits = 2 }, { PATH_REC_FIELD(packet_life_time), .offset_words = 14, .offset_bits = 2, .size_bits = 6 }, { PATH_REC_FIELD(preference), .offset_words = 14, .offset_bits = 8, .size_bits = 8 }, { RESERVED, .offset_words = 14, .offset_bits = 16, .size_bits = 48 }, }; #define MCMEMBER_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \ .struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \ .field_name = "sa_mcmember_rec:" #field static const struct ib_field mcmember_rec_table[] = { { MCMEMBER_REC_FIELD(mgid), .offset_words = 0, .offset_bits = 0, .size_bits = 128 }, { MCMEMBER_REC_FIELD(port_gid), .offset_words = 4, .offset_bits = 0, .size_bits = 128 }, { MCMEMBER_REC_FIELD(qkey), .offset_words = 8, .offset_bits = 0, .size_bits = 32 }, { MCMEMBER_REC_FIELD(mlid), .offset_words = 9, .offset_bits = 0, .size_bits = 16 }, { MCMEMBER_REC_FIELD(mtu_selector), .offset_words = 9, .offset_bits = 16, .size_bits = 2 }, { MCMEMBER_REC_FIELD(mtu), .offset_words = 9, .offset_bits = 18, .size_bits = 6 }, { MCMEMBER_REC_FIELD(traffic_class), .offset_words = 9, .offset_bits = 24, .size_bits = 8 }, { MCMEMBER_REC_FIELD(pkey), .offset_words = 10, .offset_bits = 0, .size_bits = 16 }, { MCMEMBER_REC_FIELD(rate_selector), .offset_words = 10, .offset_bits = 16, .size_bits = 2 }, { MCMEMBER_REC_FIELD(rate), .offset_words = 10, .offset_bits = 18, .size_bits = 6 }, { MCMEMBER_REC_FIELD(packet_life_time_selector), .offset_words = 10, .offset_bits = 24, .size_bits = 2 }, { MCMEMBER_REC_FIELD(packet_life_time), .offset_words = 10, .offset_bits = 26, .size_bits = 6 }, { MCMEMBER_REC_FIELD(sl), .offset_words = 11, .offset_bits = 0, .size_bits = 4 }, { MCMEMBER_REC_FIELD(flow_label), .offset_words = 11, .offset_bits = 4, .size_bits = 20 }, { MCMEMBER_REC_FIELD(hop_limit), .offset_words = 11, .offset_bits = 24, .size_bits = 8 }, { MCMEMBER_REC_FIELD(scope), .offset_words = 12, .offset_bits = 0, .size_bits = 4 }, { MCMEMBER_REC_FIELD(join_state), .offset_words = 12, .offset_bits = 4, .size_bits = 4 }, { MCMEMBER_REC_FIELD(proxy_join), .offset_words = 12, .offset_bits = 8, .size_bits = 1 }, { RESERVED, .offset_words = 12, .offset_bits = 9, .size_bits = 23 }, }; #define SERVICE_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_service_rec, field), \ .struct_size_bytes = sizeof ((struct ib_sa_service_rec *) 0)->field, \ .field_name = "sa_service_rec:" #field static const struct ib_field service_rec_table[] = { { SERVICE_REC_FIELD(id), .offset_words = 0, .offset_bits = 0, .size_bits = 64 }, { SERVICE_REC_FIELD(gid), .offset_words = 2, .offset_bits = 0, .size_bits = 128 }, { SERVICE_REC_FIELD(pkey), .offset_words = 6, .offset_bits = 0, .size_bits = 16 }, { SERVICE_REC_FIELD(lease), .offset_words = 7, .offset_bits = 0, .size_bits = 32 }, { SERVICE_REC_FIELD(key), .offset_words = 8, .offset_bits = 0, .size_bits = 128 }, { SERVICE_REC_FIELD(name), .offset_words = 12, .offset_bits = 0, .size_bits = 64*8 }, { SERVICE_REC_FIELD(data8), .offset_words = 28, .offset_bits = 0, .size_bits = 16*8 }, { SERVICE_REC_FIELD(data16), .offset_words = 32, .offset_bits = 0, .size_bits = 8*16 }, { SERVICE_REC_FIELD(data32), .offset_words = 36, .offset_bits = 0, .size_bits = 4*32 }, { SERVICE_REC_FIELD(data64), .offset_words = 40, .offset_bits = 0, .size_bits = 2*64 }, }; #define INFORM_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_inform, field), \ .struct_size_bytes = sizeof ((struct ib_sa_inform *) 0)->field, \ .field_name = "sa_inform:" #field static const struct ib_field inform_table[] = { { INFORM_FIELD(gid), .offset_words = 0, .offset_bits = 0, .size_bits = 128 }, { INFORM_FIELD(lid_range_begin), .offset_words = 4, .offset_bits = 0, .size_bits = 16 }, { INFORM_FIELD(lid_range_end), .offset_words = 4, .offset_bits = 16, .size_bits = 16 }, { RESERVED, .offset_words = 5, .offset_bits = 0, .size_bits = 16 }, { INFORM_FIELD(is_generic), .offset_words = 5, .offset_bits = 16, .size_bits = 8 }, { INFORM_FIELD(subscribe), .offset_words = 5, .offset_bits = 24, .size_bits = 8 }, { INFORM_FIELD(type), .offset_words = 6, .offset_bits = 0, .size_bits = 16 }, { INFORM_FIELD(trap.generic.trap_num), .offset_words = 6, .offset_bits = 16, .size_bits = 16 }, { INFORM_FIELD(trap.generic.qpn), .offset_words = 7, .offset_bits = 0, .size_bits = 24 }, { RESERVED, .offset_words = 7, .offset_bits = 24, .size_bits = 3 }, { INFORM_FIELD(trap.generic.resp_time), .offset_words = 7, .offset_bits = 27, .size_bits = 5 }, { RESERVED, .offset_words = 8, .offset_bits = 0, .size_bits = 8 }, { INFORM_FIELD(trap.generic.producer_type), .offset_words = 8, .offset_bits = 8, .size_bits = 24 }, }; #define NOTICE_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_notice, field), \ .struct_size_bytes = sizeof ((struct ib_sa_notice *) 0)->field, \ .field_name = "sa_notice:" #field static const struct ib_field notice_table[] = { { NOTICE_FIELD(is_generic), .offset_words = 0, .offset_bits = 0, .size_bits = 1 }, { NOTICE_FIELD(type), .offset_words = 0, .offset_bits = 1, .size_bits = 7 }, { NOTICE_FIELD(trap.generic.producer_type), .offset_words = 0, .offset_bits = 8, .size_bits = 24 }, { NOTICE_FIELD(trap.generic.trap_num), .offset_words = 1, .offset_bits = 0, .size_bits = 16 }, { NOTICE_FIELD(issuer_lid), .offset_words = 1, .offset_bits = 16, .size_bits = 16 }, { NOTICE_FIELD(notice_toggle), .offset_words = 2, .offset_bits = 0, .size_bits = 1 }, { NOTICE_FIELD(notice_count), .offset_words = 2, .offset_bits = 1, .size_bits = 15 }, { NOTICE_FIELD(data_details), .offset_words = 2, .offset_bits = 16, .size_bits = 432 }, { NOTICE_FIELD(issuer_gid), .offset_words = 16, .offset_bits = 0, .size_bits = 128 }, }; int ib_sa_check_selector(ib_sa_comp_mask comp_mask, ib_sa_comp_mask selector_mask, ib_sa_comp_mask value_mask, u8 selector, u8 src_value, u8 dst_value) { int err; if (!(comp_mask & selector_mask) || !(comp_mask & value_mask)) return 0; switch (selector) { case IB_SA_GT: err = (src_value <= dst_value); break; case IB_SA_LT: err = (src_value >= dst_value); break; case IB_SA_EQ: err = (src_value != dst_value); break; default: err = 0; break; } return err; } int ib_sa_pack_attr(void *dst, void *src, int attr_id) { switch (attr_id) { case IB_SA_ATTR_PATH_REC: ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), src, dst); break; default: return -EINVAL; } return 0; } int ib_sa_unpack_attr(void *dst, void *src, int attr_id) { switch (attr_id) { case IB_SA_ATTR_PATH_REC: ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), src, dst); break; default: return -EINVAL; } return 0; } static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); ib_destroy_ah(sm_ah->ah); kfree(sm_ah); } static void update_sm_ah(struct work_struct *work) { struct ib_sa_port *port = container_of(work, struct ib_sa_port, update_task); struct ib_sa_sm_ah *new_ah; struct ib_port_attr port_attr; struct ib_ah_attr ah_attr; if (ib_query_port(port->agent->device, port->port_num, &port_attr)) { printk(KERN_WARNING "Couldn't query port\n"); return; } new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL); if (!new_ah) { printk(KERN_WARNING "Couldn't allocate new SM AH\n"); return; } kref_init(&new_ah->ref); new_ah->src_path_mask = (1 << port_attr.lmc) - 1; new_ah->pkey_index = 0; if (ib_find_pkey(port->agent->device, port->port_num, IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index)) printk(KERN_ERR "Couldn't find index for default PKey\n"); memset(&ah_attr, 0, sizeof ah_attr); ah_attr.dlid = port_attr.sm_lid; ah_attr.sl = port_attr.sm_sl; ah_attr.port_num = port->port_num; new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr); if (IS_ERR(new_ah->ah)) { printk(KERN_WARNING "Couldn't create new SM AH\n"); kfree(new_ah); return; } spin_lock_irq(&port->ah_lock); if (port->sm_ah) kref_put(&port->sm_ah->ref, free_sm_ah); port->sm_ah = new_ah; spin_unlock_irq(&port->ah_lock); } static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event) { if (event->event == IB_EVENT_PORT_ERR || event->event == IB_EVENT_PORT_ACTIVE || event->event == IB_EVENT_LID_CHANGE || event->event == IB_EVENT_PKEY_CHANGE || event->event == IB_EVENT_SM_CHANGE || event->event == IB_EVENT_CLIENT_REREGISTER) { unsigned long flags; struct ib_sa_device *sa_dev = container_of(handler, typeof(*sa_dev), event_handler); struct ib_sa_port *port = &sa_dev->port[event->element.port_num - sa_dev->start_port]; if (rdma_port_get_link_layer(handler->device, port->port_num) != IB_LINK_LAYER_INFINIBAND) return; spin_lock_irqsave(&port->ah_lock, flags); if (port->sm_ah) kref_put(&port->sm_ah->ref, free_sm_ah); port->sm_ah = NULL; spin_unlock_irqrestore(&port->ah_lock, flags); schedule_work(&sa_dev->port[event->element.port_num - sa_dev->start_port].update_task); } } void ib_sa_register_client(struct ib_sa_client *client) { atomic_set(&client->users, 1); init_completion(&client->comp); } EXPORT_SYMBOL(ib_sa_register_client); void ib_sa_unregister_client(struct ib_sa_client *client) { ib_sa_client_put(client); wait_for_completion(&client->comp); } EXPORT_SYMBOL(ib_sa_unregister_client); /** * ib_sa_cancel_query - try to cancel an SA query * @id:ID of query to cancel * @query:query pointer to cancel * * Try to cancel an SA query. If the id and query don't match up or * the query has already completed, nothing is done. Otherwise the * query is canceled and will complete with a status of -EINTR. */ void ib_sa_cancel_query(int id, struct ib_sa_query *query) { unsigned long flags; struct ib_mad_agent *agent; struct ib_mad_send_buf *mad_buf; spin_lock_irqsave(&idr_lock, flags); if (idr_find(&query_idr, id) != query) { spin_unlock_irqrestore(&idr_lock, flags); return; } agent = query->port->agent; mad_buf = query->mad_buf; spin_unlock_irqrestore(&idr_lock, flags); ib_cancel_mad(agent, mad_buf); } EXPORT_SYMBOL(ib_sa_cancel_query); static u8 get_src_path_mask(struct ib_device *device, u8 port_num) { struct ib_sa_device *sa_dev; struct ib_sa_port *port; unsigned long flags; u8 src_path_mask; sa_dev = ib_get_client_data(device, &sa_client); if (!sa_dev) return 0x7f; port = &sa_dev->port[port_num - sa_dev->start_port]; spin_lock_irqsave(&port->ah_lock, flags); src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f; spin_unlock_irqrestore(&port->ah_lock, flags); return src_path_mask; } int ib_init_ah_from_path(struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr) { int ret; u16 gid_index; int force_grh; memset(ah_attr, 0, sizeof *ah_attr); ah_attr->dlid = be16_to_cpu(rec->dlid); ah_attr->sl = rec->sl; ah_attr->src_path_bits = be16_to_cpu(rec->slid) & get_src_path_mask(device, port_num); ah_attr->port_num = port_num; ah_attr->static_rate = rec->rate; force_grh = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_ETHERNET; if (rec->hop_limit > 1 || force_grh) { ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.dgid = rec->dgid; ret = ib_find_cached_gid(device, &rec->sgid, &port_num, &gid_index); if (ret) return ret; ah_attr->grh.sgid_index = gid_index; ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); ah_attr->grh.hop_limit = rec->hop_limit; ah_attr->grh.traffic_class = rec->traffic_class; } return 0; } EXPORT_SYMBOL(ib_init_ah_from_path); static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask) { unsigned long flags; spin_lock_irqsave(&query->port->ah_lock, flags); if (!query->port->sm_ah) { spin_unlock_irqrestore(&query->port->ah_lock, flags); return -EAGAIN; } kref_get(&query->port->sm_ah->ref); query->sm_ah = query->port->sm_ah; spin_unlock_irqrestore(&query->port->ah_lock, flags); query->mad_buf = ib_create_send_mad(query->port->agent, 1, query->sm_ah->pkey_index, 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA, gfp_mask); if (IS_ERR(query->mad_buf)) { kref_put(&query->sm_ah->ref, free_sm_ah); return -ENOMEM; } query->mad_buf->ah = query->sm_ah->ah; return 0; } static void free_mad(struct ib_sa_query *query) { ib_free_send_mad(query->mad_buf); kref_put(&query->sm_ah->ref, free_sm_ah); } static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) { unsigned long flags; memset(mad, 0, sizeof *mad); mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION; mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; mad->mad_hdr.class_version = IB_SA_CLASS_VERSION; spin_lock_irqsave(&tid_lock, flags); mad->mad_hdr.tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++); spin_unlock_irqrestore(&tid_lock, flags); } static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) { unsigned long flags; int ret, id; retry: if (!idr_pre_get(&query_idr, gfp_mask)) return -ENOMEM; spin_lock_irqsave(&idr_lock, flags); ret = idr_get_new(&query_idr, query, &id); spin_unlock_irqrestore(&idr_lock, flags); if (ret == -EAGAIN) goto retry; if (ret) return ret; query->mad_buf->timeout_ms = timeout_ms; query->mad_buf->context[0] = query; query->id = id; ret = ib_post_send_mad(query->mad_buf, NULL); if (ret) { spin_lock_irqsave(&idr_lock, flags); idr_remove(&query_idr, id); spin_unlock_irqrestore(&idr_lock, flags); } /* * It's not safe to dereference query any more, because the * send may already have completed and freed the query in * another context. */ return ret ? ret : id; } void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec) { ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec); } EXPORT_SYMBOL(ib_sa_unpack_path); static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { struct ib_sa_path_query *query = container_of(sa_query, struct ib_sa_path_query, sa_query); if (mad) { struct ib_sa_path_rec rec; ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), mad->data, &rec); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); } static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) { kfree(container_of(sa_query, struct ib_sa_path_query, sa_query)); } int ib_sa_path_rec_query(struct ib_sa_client *client, struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_path_rec *resp, void *context), void *context, struct ib_sa_query **sa_query) { struct ib_sa_path_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; int ret; if (!sa_dev) return -ENODEV; port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) goto err1; ib_sa_client_get(client); query->sa_query.client = client; query->callback = callback; query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; query->sa_query.release = ib_sa_path_rec_release; mad->mad_hdr.method = IB_MGMT_METHOD_GET; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); mad->sa_hdr.comp_mask = comp_mask; ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; return ret; err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); err1: kfree(query); return ret; } static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { struct ib_sa_service_query *query = container_of(sa_query, struct ib_sa_service_query, sa_query); if (mad) { struct ib_sa_service_rec rec; ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table), mad->data, &rec); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); } static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) { kfree(container_of(sa_query, struct ib_sa_service_query, sa_query)); } /** * ib_sa_service_rec_query - Start Service Record operation * @client:SA client * @device:device to send request on * @port_num: port number to send request on * @method:SA method - should be get, set, or delete * @rec:Service Record to send in request * @comp_mask:component mask to send in request * @timeout_ms:time to wait for response * @gfp_mask:GFP mask to use for internal allocations * @callback:function called when request completes, times out or is * canceled * @context:opaque user context passed to callback * @sa_query:request context, used to cancel request * * Send a Service Record set/get/delete to the SA to register, * unregister or query a service record. * The callback function will be called when the request completes (or * fails); status is 0 for a successful response, -EINTR if the query * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error * occurred sending the query. The resp parameter of the callback is * only valid if status is 0. * * If the return value of ib_sa_service_rec_query() is negative, it is an * error code. Otherwise it is a request ID that can be used to cancel * the query. */ int ib_sa_service_rec_query(struct ib_sa_client *client, struct ib_device *device, u8 port_num, u8 method, struct ib_sa_service_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_service_rec *resp, void *context), void *context, struct ib_sa_query **sa_query) { struct ib_sa_service_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; int ret; if (!sa_dev) return -ENODEV; port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; if (method != IB_MGMT_METHOD_GET && method != IB_MGMT_METHOD_SET && method != IB_SA_METHOD_DELETE) return -EINVAL; query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) goto err1; ib_sa_client_get(client); query->sa_query.client = client; query->callback = callback; query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; query->sa_query.release = ib_sa_service_rec_release; mad->mad_hdr.method = method; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC); mad->sa_hdr.comp_mask = comp_mask; ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table), rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; return ret; err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); err1: kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_service_rec_query); static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { struct ib_sa_mcmember_query *query = container_of(sa_query, struct ib_sa_mcmember_query, sa_query); if (mad) { struct ib_sa_mcmember_rec rec; ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), mad->data, &rec); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); } static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query) { kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query)); } int ib_sa_mcmember_rec_query(struct ib_sa_client *client, struct ib_device *device, u8 port_num, u8 method, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_mcmember_rec *resp, void *context), void *context, struct ib_sa_query **sa_query) { struct ib_sa_mcmember_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; int ret; if (!sa_dev) return -ENODEV; port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) goto err1; ib_sa_client_get(client); query->sa_query.client = client; query->callback = callback; query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; query->sa_query.release = ib_sa_mcmember_rec_release; mad->mad_hdr.method = method; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); mad->sa_hdr.comp_mask = comp_mask; ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; return ret; err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); err1: kfree(query); return ret; } static void ib_sa_inform_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { struct ib_sa_inform_query *query = container_of(sa_query, struct ib_sa_inform_query, sa_query); if (mad) { struct ib_sa_inform rec; ib_unpack(inform_table, ARRAY_SIZE(inform_table), mad->data, &rec); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); } static void ib_sa_inform_release(struct ib_sa_query *sa_query) { kfree(container_of(sa_query, struct ib_sa_inform_query, sa_query)); } int ib_sa_guid_info_rec_query(struct ib_sa_client *client, struct ib_device *device, u8 port_num, struct ib_sa_guidinfo_rec *rec, ib_sa_comp_mask comp_mask, u8 method, int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_guidinfo_rec *resp, void *context), void *context, struct ib_sa_query **sa_query) { // stub function - // called originally from mad.c under mlx4_ib_init_sriov() // which calls mlx4_ib_init_alias_guid_service() in alias_GUID.c // which goes down to this function printk("ERROR: function should be called only in SRIOV flow!!!"); return 0; } /** * ib_sa_informinfo_query - Start an InformInfo registration. * @client:SA client * @device:device to send query on * @port_num: port number to send query on * @rec:Inform record to send in query * @timeout_ms:time to wait for response * @gfp_mask:GFP mask to use for internal allocations * @callback:function called when notice handler registration completes, * times out or is canceled * @context:opaque user context passed to callback * @sa_query:query context, used to cancel query * * This function sends inform info to register with SA to receive * in-service notice. * The callback function will be called when the query completes (or * fails); status is 0 for a successful response, -EINTR if the query * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error * occurred sending the query. The resp parameter of the callback is * only valid if status is 0. * * If the return value of ib_sa_inform_query() is negative, it is an * error code. Otherwise it is a query ID that can be used to cancel * the query. */ int ib_sa_informinfo_query(struct ib_sa_client *client, struct ib_device *device, u8 port_num, struct ib_sa_inform *rec, int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_inform *resp, void *context), void *context, struct ib_sa_query **sa_query) { struct ib_sa_inform_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; int ret; if (!sa_dev) return -ENODEV; port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) goto err1; ib_sa_client_get(client); query->sa_query.client = client; query->callback = callback; query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); query->sa_query.callback = callback ? ib_sa_inform_callback : NULL; query->sa_query.release = ib_sa_inform_release; query->sa_query.port = port; mad->mad_hdr.method = IB_MGMT_METHOD_SET; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_INFORM_INFO); ib_pack(inform_table, ARRAY_SIZE(inform_table), rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; return ret; err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); err1: kfree(query); return ret; } static void ib_sa_notice_resp(struct ib_sa_port *port, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_mad_send_buf *mad_buf; struct ib_sa_mad *mad; int ret; unsigned long flags; mad_buf = ib_create_send_mad(port->notice_agent, 1, 0, 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA, GFP_KERNEL); if (IS_ERR(mad_buf)) return; mad = mad_buf->mad; memcpy(mad, mad_recv_wc->recv_buf.mad, sizeof *mad); mad->mad_hdr.method = IB_MGMT_METHOD_REPORT_RESP; spin_lock_irqsave(&port->ah_lock, flags); if (!port->sm_ah) { spin_unlock_irqrestore(&port->ah_lock, flags); ib_free_send_mad(mad_buf); return; } kref_get(&port->sm_ah->ref); mad_buf->context[0] = &port->sm_ah->ref; mad_buf->ah = port->sm_ah->ah; spin_unlock_irqrestore(&port->ah_lock, flags); ret = ib_post_send_mad(mad_buf, NULL); if (ret) goto err; return; err: kref_put(mad_buf->context[0], free_sm_ah); ib_free_send_mad(mad_buf); } static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { struct ib_sa_query *query = mad_send_wc->send_buf->context[0]; unsigned long flags; if (query->callback) switch (mad_send_wc->status) { case IB_WC_SUCCESS: /* No callback -- already got recv */ break; case IB_WC_RESP_TIMEOUT_ERR: query->callback(query, -ETIMEDOUT, NULL); break; case IB_WC_WR_FLUSH_ERR: query->callback(query, -EINTR, NULL); break; default: query->callback(query, -EIO, NULL); break; } spin_lock_irqsave(&idr_lock, flags); idr_remove(&query_idr, query->id); spin_unlock_irqrestore(&idr_lock, flags); free_mad(query); ib_sa_client_put(query->client); query->release(query); } static void recv_handler(struct ib_mad_agent *mad_agent, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_sa_query *query; struct ib_mad_send_buf *mad_buf; mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id; query = mad_buf->context[0]; if (query->callback) { if (mad_recv_wc->wc->status == IB_WC_SUCCESS) query->callback(query, mad_recv_wc->recv_buf.mad->mad_hdr.status ? -EINVAL : 0, (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad); else query->callback(query, -EIO, NULL); } ib_free_recv_mad(mad_recv_wc); } static void notice_resp_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { kref_put(mad_send_wc->send_buf->context[0], free_sm_ah); ib_free_send_mad(mad_send_wc->send_buf); } static void notice_handler(struct ib_mad_agent *mad_agent, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_sa_port *port; struct ib_sa_mad *mad; struct ib_sa_notice notice; port = mad_agent->context; mad = (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad; ib_unpack(notice_table, ARRAY_SIZE(notice_table), mad->data, ¬ice); if (!notice_dispatch(port->device, port->port_num, ¬ice)) ib_sa_notice_resp(port, mad_recv_wc); ib_free_recv_mad(mad_recv_wc); } static void ib_sa_add_one(struct ib_device *device) { struct ib_sa_device *sa_dev; struct ib_mad_reg_req reg_req = { .mgmt_class = IB_MGMT_CLASS_SUBN_ADM, .mgmt_class_version = 2 }; int s, e, i; if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; if (device->node_type == RDMA_NODE_IB_SWITCH) s = e = 0; else { s = 1; e = device->phys_port_cnt; } sa_dev = kzalloc(sizeof *sa_dev + (e - s + 1) * sizeof (struct ib_sa_port), GFP_KERNEL); if (!sa_dev) return; sa_dev->start_port = s; sa_dev->end_port = e; for (i = 0; i <= e - s; ++i) { spin_lock_init(&sa_dev->port[i].ah_lock); if (rdma_port_get_link_layer(device, i + 1) != IB_LINK_LAYER_INFINIBAND) continue; sa_dev->port[i].sm_ah = NULL; sa_dev->port[i].port_num = i + s; sa_dev->port[i].agent = ib_register_mad_agent(device, i + s, IB_QPT_GSI, NULL, 0, send_handler, recv_handler, sa_dev); if (IS_ERR(sa_dev->port[i].agent)) goto err; sa_dev->port[i].device = device; set_bit(IB_MGMT_METHOD_REPORT, reg_req.method_mask); sa_dev->port[i].notice_agent = ib_register_mad_agent(device, i + s, IB_QPT_GSI, ®_req, 0, notice_resp_handler, notice_handler, &sa_dev->port[i]); if (IS_ERR(sa_dev->port[i].notice_agent)) goto err; INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah); } ib_set_client_data(device, &sa_client, sa_dev); /* * We register our event handler after everything is set up, * and then update our cached info after the event handler is * registered to avoid any problems if a port changes state * during our initialization. */ INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event); if (ib_register_event_handler(&sa_dev->event_handler)) goto err; for (i = 0; i <= e - s; ++i) if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) update_sm_ah(&sa_dev->port[i].update_task); return; err: while (--i >= 0) if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) { if (!IS_ERR(sa_dev->port[i].notice_agent)) ib_unregister_mad_agent(sa_dev->port[i].notice_agent); if (!IS_ERR(sa_dev->port[i].agent)) ib_unregister_mad_agent(sa_dev->port[i].agent); } kfree(sa_dev); return; } static void ib_sa_remove_one(struct ib_device *device) { struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); int i; if (!sa_dev) return; ib_unregister_event_handler(&sa_dev->event_handler); flush_scheduled_work(); for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) { ib_unregister_mad_agent(sa_dev->port[i].notice_agent); ib_unregister_mad_agent(sa_dev->port[i].agent); if (sa_dev->port[i].sm_ah) kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); } } kfree(sa_dev); } static int __init ib_sa_init(void) { int ret; spin_lock_init(&idr_lock); spin_lock_init(&tid_lock); get_random_bytes(&tid, sizeof tid); ret = ib_register_client(&sa_client); if (ret) { printk(KERN_ERR "Couldn't register ib_sa client\n"); goto err1; } ret = mcast_init(); if (ret) { printk(KERN_ERR "Couldn't initialize multicast handling\n"); goto err2; } ret = notice_init(); if (ret) { printk(KERN_ERR "Couldn't initialize notice handling\n"); goto err3; } ret = sa_db_init(); if (ret) { printk(KERN_ERR "Couldn't initialize local SA\n"); goto err4; } return 0; err4: notice_cleanup(); err3: mcast_cleanup(); err2: ib_unregister_client(&sa_client); err1: return ret; } static void __exit ib_sa_cleanup(void) { sa_db_cleanup(); mcast_cleanup(); notice_cleanup(); ib_unregister_client(&sa_client); idr_destroy(&query_idr); } module_init_order(ib_sa_init, SI_ORDER_SECOND); module_exit(ib_sa_cleanup); Index: stable/10/sys/ofed/drivers/infiniband/core/sysfs.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/core/sysfs.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/core/sysfs.c (revision 271127) @@ -1,980 +1,981 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "core_priv.h" #include #include +#include #include #include struct ib_port { struct kobject kobj; struct ib_device *ibdev; struct attribute_group gid_group; struct attribute_group pkey_group; u8 port_num; }; struct port_attribute { struct attribute attr; ssize_t (*show)(struct ib_port *, struct port_attribute *, char *buf); ssize_t (*store)(struct ib_port *, struct port_attribute *, const char *buf, size_t count); }; #define PORT_ATTR(_name, _mode, _show, _store) \ struct port_attribute port_attr_##_name = __ATTR(_name, _mode, _show, _store) #define PORT_ATTR_RO(_name) \ struct port_attribute port_attr_##_name = __ATTR_RO(_name) struct port_table_attribute { struct port_attribute attr; char name[8]; int index; }; static ssize_t port_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct port_attribute *port_attr = container_of(attr, struct port_attribute, attr); struct ib_port *p = container_of(kobj, struct ib_port, kobj); if (!port_attr->show) return -EIO; return port_attr->show(p, port_attr, buf); } static const struct sysfs_ops port_sysfs_ops = { .show = port_attr_show }; static ssize_t state_show(struct ib_port *p, struct port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; static const char *state_name[] = { [IB_PORT_NOP] = "NOP", [IB_PORT_DOWN] = "DOWN", [IB_PORT_INIT] = "INIT", [IB_PORT_ARMED] = "ARMED", [IB_PORT_ACTIVE] = "ACTIVE", [IB_PORT_ACTIVE_DEFER] = "ACTIVE_DEFER" }; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; return sprintf(buf, "%d: %s\n", attr.state, attr.state < ARRAY_SIZE(state_name) ? state_name[attr.state] : "UNKNOWN"); } static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; return sprintf(buf, "0x%x\n", attr.lid); } static ssize_t lid_mask_count_show(struct ib_port *p, struct port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; return sprintf(buf, "%d\n", attr.lmc); } static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; return sprintf(buf, "0x%x\n", attr.sm_lid); } static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; return sprintf(buf, "%d\n", attr.sm_sl); } static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; return sprintf(buf, "0x%08x\n", attr.port_cap_flags); } static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, char *buf) { struct ib_port_attr attr; char *speed = ""; int rate; ssize_t ret; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; switch (attr.active_speed) { case 2: speed = " DDR"; break; case 4: speed = " QDR"; break; } rate = 25 * ib_width_enum_to_int(attr.active_width) * attr.active_speed; if (rate < 0) return -EINVAL; return sprintf(buf, "%d%s Gb/sec (%dX%s)\n", rate / 10, rate % 10 ? ".5" : "", ib_width_enum_to_int(attr.active_width), speed); } static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; switch (attr.phys_state) { case 1: return sprintf(buf, "1: Sleep\n"); case 2: return sprintf(buf, "2: Polling\n"); case 3: return sprintf(buf, "3: Disabled\n"); case 4: return sprintf(buf, "4: PortConfigurationTraining\n"); case 5: return sprintf(buf, "5: LinkUp\n"); case 6: return sprintf(buf, "6: LinkErrorRecovery\n"); case 7: return sprintf(buf, "7: Phy Test\n"); default: return sprintf(buf, "%d: \n", attr.phys_state); } } static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused, char *buf) { switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) { case IB_LINK_LAYER_INFINIBAND: return sprintf(buf, "%s\n", "IB"); case IB_LINK_LAYER_ETHERNET: return sprintf(buf, "%s\n", "Ethernet"); default: return sprintf(buf, "%s\n", "Unknown"); } } static PORT_ATTR_RO(state); static PORT_ATTR_RO(lid); static PORT_ATTR_RO(lid_mask_count); static PORT_ATTR_RO(sm_lid); static PORT_ATTR_RO(sm_sl); static PORT_ATTR_RO(cap_mask); static PORT_ATTR_RO(rate); static PORT_ATTR_RO(phys_state); static PORT_ATTR_RO(link_layer); static struct attribute *port_default_attrs[] = { &port_attr_state.attr, &port_attr_lid.attr, &port_attr_lid_mask_count.attr, &port_attr_sm_lid.attr, &port_attr_sm_sl.attr, &port_attr_cap_mask.attr, &port_attr_rate.attr, &port_attr_phys_state.attr, &port_attr_link_layer.attr, NULL }; static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, char *buf) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); union ib_gid gid; ssize_t ret; u16 *raw; ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid); if (ret) return ret; raw = (u16 *)gid.raw; return sprintf(buf, "%.4x:%.4x:%.4x:%.4x:%.4x:%.4x:%.4x:%.4x\n", htons(raw[0]), htons(raw[1]), htons(raw[2]), htons(raw[3]), htons(raw[4]), htons(raw[5]), htons(raw[6]), htons(raw[7])); } static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr, char *buf) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); u16 pkey; ssize_t ret; ret = ib_query_pkey(p->ibdev, p->port_num, tab_attr->index, &pkey); if (ret) return ret; return sprintf(buf, "0x%04x\n", pkey); } static ssize_t get_pma_counters(struct ib_port *p, struct port_attribute *attr, char *buf, int c_ext) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); int offset = tab_attr->index & 0xffff; int width = (tab_attr->index >> 16) & 0xff; struct ib_mad *in_mad = NULL; struct ib_mad *out_mad = NULL; ssize_t ret; if (!p->ibdev->process_mad) return -ENXIO; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) { ret = -ENOMEM; goto out; } in_mad->mad_hdr.base_version = 1; in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT; in_mad->mad_hdr.class_version = 1; in_mad->mad_hdr.method = IB_MGMT_METHOD_GET; if (c_ext) in_mad->mad_hdr.attr_id = IB_PMA_PORT_COUNTERS_EXT; else in_mad->mad_hdr.attr_id = IB_PMA_PORT_COUNTERS; in_mad->data[41] = p->port_num; /* PortSelect field */ if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY, p->port_num, NULL, NULL, in_mad, out_mad) & (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) != (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) { ret = -EINVAL; goto out; } switch (width) { case 4: ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >> (4 - (offset % 8))) & 0xf); break; case 8: ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]); break; case 16: ret = sprintf(buf, "%u\n", be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8))); break; case 32: ret = sprintf(buf, "%u\n", be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8))); break; case 64: ret = sprintf(buf, "%llu\n", (unsigned long long) be64_to_cpup((__be64 *)(out_mad->data + 40 + offset / 8))); break; default: ret = 0; } out: kfree(in_mad); kfree(out_mad); return ret; } #define PORT_PMA_ATTR(_name, _counter, _width, _offset) \ struct port_table_attribute port_pma_attr_##_name = { \ .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \ .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \ } static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, char *buf) { return get_pma_counters(p, attr, buf, 0); } static PORT_PMA_ATTR(symbol_error , 0, 16, 32); static PORT_PMA_ATTR(link_error_recovery , 1, 8, 48); static PORT_PMA_ATTR(link_downed , 2, 8, 56); static PORT_PMA_ATTR(port_rcv_errors , 3, 16, 64); static PORT_PMA_ATTR(port_rcv_remote_physical_errors, 4, 16, 80); static PORT_PMA_ATTR(port_rcv_switch_relay_errors , 5, 16, 96); static PORT_PMA_ATTR(port_xmit_discards , 6, 16, 112); static PORT_PMA_ATTR(port_xmit_constraint_errors , 7, 8, 128); static PORT_PMA_ATTR(port_rcv_constraint_errors , 8, 8, 136); static PORT_PMA_ATTR(local_link_integrity_errors , 9, 4, 152); static PORT_PMA_ATTR(excessive_buffer_overrun_errors, 10, 4, 156); static PORT_PMA_ATTR(VL15_dropped , 11, 16, 176); static PORT_PMA_ATTR(port_xmit_data , 12, 32, 192); static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224); static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256); static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288); static struct attribute *pma_attrs[] = { &port_pma_attr_symbol_error.attr.attr, &port_pma_attr_link_error_recovery.attr.attr, &port_pma_attr_link_downed.attr.attr, &port_pma_attr_port_rcv_errors.attr.attr, &port_pma_attr_port_rcv_remote_physical_errors.attr.attr, &port_pma_attr_port_rcv_switch_relay_errors.attr.attr, &port_pma_attr_port_xmit_discards.attr.attr, &port_pma_attr_port_xmit_constraint_errors.attr.attr, &port_pma_attr_port_rcv_constraint_errors.attr.attr, &port_pma_attr_local_link_integrity_errors.attr.attr, &port_pma_attr_excessive_buffer_overrun_errors.attr.attr, &port_pma_attr_VL15_dropped.attr.attr, &port_pma_attr_port_xmit_data.attr.attr, &port_pma_attr_port_rcv_data.attr.attr, &port_pma_attr_port_xmit_packets.attr.attr, &port_pma_attr_port_rcv_packets.attr.attr, NULL }; static struct attribute_group pma_group = { .name = "counters", .attrs = pma_attrs }; #define PORT_PMA_ATTR_EXT(_name, _counter, _width, _offset) \ struct port_table_attribute port_pma_attr_ext_##_name = { \ .attr = __ATTR(_name, S_IRUGO, show_pma_counter_ext, NULL), \ .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \ } static ssize_t show_pma_counter_ext(struct ib_port *p, struct port_attribute *attr, char *buf) { return get_pma_counters(p, attr, buf, 1); } static PORT_PMA_ATTR_EXT(port_xmit_data_64 , 0, 64, 64); static PORT_PMA_ATTR_EXT(port_rcv_data_64 , 0, 64, 128); static PORT_PMA_ATTR_EXT(port_xmit_packets_64 , 0, 64, 192); static PORT_PMA_ATTR_EXT(port_rcv_packets_64 , 0, 64, 256); static PORT_PMA_ATTR_EXT(port_unicast_xmit_packets , 0, 64, 320); static PORT_PMA_ATTR_EXT(port_unicast_rcv_packets , 0, 64, 384); static PORT_PMA_ATTR_EXT(port_multicast_xmit_packets , 0, 64, 448); static PORT_PMA_ATTR_EXT(port_multicast_rcv_packets , 0, 64, 512); static struct attribute *pma_attrs_ext[] = { &port_pma_attr_ext_port_xmit_data_64.attr.attr, &port_pma_attr_ext_port_rcv_data_64.attr.attr, &port_pma_attr_ext_port_xmit_packets_64.attr.attr, &port_pma_attr_ext_port_rcv_packets_64.attr.attr, &port_pma_attr_ext_port_unicast_xmit_packets.attr.attr, &port_pma_attr_ext_port_unicast_rcv_packets.attr.attr, &port_pma_attr_ext_port_multicast_xmit_packets.attr.attr, &port_pma_attr_ext_port_multicast_rcv_packets.attr.attr, NULL }; static struct attribute_group pma_ext_group = { .name = "counters_ext", .attrs = pma_attrs_ext }; static void ib_port_release(struct kobject *kobj) { struct ib_port *p = container_of(kobj, struct ib_port, kobj); struct attribute *a; int i; for (i = 0; (a = p->gid_group.attrs[i]); ++i) kfree(a); kfree(p->gid_group.attrs); for (i = 0; (a = p->pkey_group.attrs[i]); ++i) kfree(a); kfree(p->pkey_group.attrs); kfree(p); } static struct kobj_type port_type = { .release = ib_port_release, .sysfs_ops = &port_sysfs_ops, .default_attrs = port_default_attrs }; static void ib_device_release(struct device *device) { struct ib_device *dev = container_of(device, struct ib_device, dev); kfree(dev); } #ifdef __linux__ /* BSD supports this through devfs(5) and devd(8). */ static int ib_device_uevent(struct device *device, struct kobj_uevent_env *env) { struct ib_device *dev = container_of(device, struct ib_device, dev); if (add_uevent_var(env, "NAME=%s", dev->name)) return -ENOMEM; /* * It would be nice to pass the node GUID with the event... */ return 0; } #endif static struct attribute ** alloc_group_attrs(ssize_t (*show)(struct ib_port *, struct port_attribute *, char *buf), int len) { struct attribute **tab_attr; struct port_table_attribute *element; int i; tab_attr = kcalloc(1 + len, sizeof(struct attribute *), GFP_KERNEL); if (!tab_attr) return NULL; for (i = 0; i < len; i++) { element = kzalloc(sizeof(struct port_table_attribute), GFP_KERNEL); if (!element) goto err; if (snprintf(element->name, sizeof(element->name), "%d", i) >= sizeof(element->name)) { kfree(element); goto err; } element->attr.attr.name = element->name; element->attr.attr.mode = S_IRUGO; element->attr.show = show; element->index = i; tab_attr[i] = &element->attr.attr; } return tab_attr; err: while (--i >= 0) kfree(tab_attr[i]); kfree(tab_attr); return NULL; } static int add_port(struct ib_device *device, int port_num, int (*port_callback)(struct ib_device *, u8, struct kobject *)) { struct ib_port *p; struct ib_port_attr attr; int i; int ret; ret = ib_query_port(device, port_num, &attr); if (ret) return ret; p = kzalloc(sizeof *p, GFP_KERNEL); if (!p) return -ENOMEM; p->ibdev = device; p->port_num = port_num; ret = kobject_init_and_add(&p->kobj, &port_type, kobject_get(device->ports_parent), "%d", port_num); if (ret) goto err_put; ret = sysfs_create_group(&p->kobj, &pma_group); if (ret) goto err_put; ret = sysfs_create_group(&p->kobj, &pma_ext_group); if (ret) goto err_remove_pma; p->gid_group.name = "gids"; p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len); if (!p->gid_group.attrs) goto err_remove_pma_ext; ret = sysfs_create_group(&p->kobj, &p->gid_group); if (ret) goto err_free_gid; p->pkey_group.name = "pkeys"; p->pkey_group.attrs = alloc_group_attrs(show_port_pkey, attr.pkey_tbl_len); if (!p->pkey_group.attrs) goto err_remove_gid; ret = sysfs_create_group(&p->kobj, &p->pkey_group); if (ret) goto err_free_pkey; if (port_callback) { ret = port_callback(device, port_num, &p->kobj); if (ret) goto err_remove_pkey; } list_add_tail(&p->kobj.entry, &device->port_list); #ifdef __linux__ kobject_uevent(&p->kobj, KOBJ_ADD); #endif return 0; err_remove_pkey: sysfs_remove_group(&p->kobj, &p->pkey_group); err_free_pkey: for (i = 0; i < attr.pkey_tbl_len; ++i) kfree(p->pkey_group.attrs[i]); kfree(p->pkey_group.attrs); err_remove_gid: sysfs_remove_group(&p->kobj, &p->gid_group); err_free_gid: for (i = 0; i < attr.gid_tbl_len; ++i) kfree(p->gid_group.attrs[i]); kfree(p->gid_group.attrs); err_remove_pma_ext: sysfs_remove_group(&p->kobj, &pma_ext_group); err_remove_pma: sysfs_remove_group(&p->kobj, &pma_group); err_put: kobject_put(device->ports_parent); kfree(p); return ret; } static ssize_t show_node_type(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = container_of(device, struct ib_device, dev); switch (dev->node_type) { case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); default: return sprintf(buf, "%d: \n", dev->node_type); } } static ssize_t show_sys_image_guid(struct device *device, struct device_attribute *dev_attr, char *buf) { struct ib_device *dev = container_of(device, struct ib_device, dev); struct ib_device_attr attr; ssize_t ret; ret = ib_query_device(dev, &attr); if (ret) return ret; return sprintf(buf, "%04x:%04x:%04x:%04x\n", be16_to_cpu(((__be16 *) &attr.sys_image_guid)[0]), be16_to_cpu(((__be16 *) &attr.sys_image_guid)[1]), be16_to_cpu(((__be16 *) &attr.sys_image_guid)[2]), be16_to_cpu(((__be16 *) &attr.sys_image_guid)[3])); } static ssize_t show_node_guid(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = container_of(device, struct ib_device, dev); return sprintf(buf, "%04x:%04x:%04x:%04x\n", be16_to_cpu(((__be16 *) &dev->node_guid)[0]), be16_to_cpu(((__be16 *) &dev->node_guid)[1]), be16_to_cpu(((__be16 *) &dev->node_guid)[2]), be16_to_cpu(((__be16 *) &dev->node_guid)[3])); } static ssize_t show_node_desc(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = container_of(device, struct ib_device, dev); return sprintf(buf, "%.64s\n", dev->node_desc); } static ssize_t set_node_desc(struct device *device, struct device_attribute *attr, const char *buf, size_t count) { struct ib_device *dev = container_of(device, struct ib_device, dev); struct ib_device_modify desc = {}; int ret; if (!dev->modify_device) return -EIO; memcpy(desc.node_desc, buf, min_t(int, count, 64)); ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc); if (ret) return ret; return count; } static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL); static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL); static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL); static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc); static struct device_attribute *ib_class_attributes[] = { &dev_attr_node_type, &dev_attr_sys_image_guid, &dev_attr_node_guid, &dev_attr_node_desc }; static struct class ib_class = { .name = "infiniband", .dev_release = ib_device_release, #ifdef __linux__ .dev_uevent = ib_device_uevent, #endif }; /* Show a given an attribute in the statistics group */ static ssize_t show_protocol_stat(const struct device *device, struct device_attribute *attr, char *buf, unsigned offset) { struct ib_device *dev = container_of(__DECONST(struct device *, device), struct ib_device, dev); union rdma_protocol_stats stats; ssize_t ret; ret = dev->get_protocol_stats(dev, &stats); if (ret) return ret; return sprintf(buf, "%llu\n", (unsigned long long) ((u64 *) &stats)[offset]); } /* generate a read-only iwarp statistics attribute */ #define IW_STATS_ENTRY(name) \ static ssize_t show_##name(struct device *device, \ struct device_attribute *attr, char *buf) \ { \ return show_protocol_stat(device, attr, buf, \ offsetof(struct iw_protocol_stats, name) / \ sizeof (u64)); \ } \ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) IW_STATS_ENTRY(ipInReceives); IW_STATS_ENTRY(ipInHdrErrors); IW_STATS_ENTRY(ipInTooBigErrors); IW_STATS_ENTRY(ipInNoRoutes); IW_STATS_ENTRY(ipInAddrErrors); IW_STATS_ENTRY(ipInUnknownProtos); IW_STATS_ENTRY(ipInTruncatedPkts); IW_STATS_ENTRY(ipInDiscards); IW_STATS_ENTRY(ipInDelivers); IW_STATS_ENTRY(ipOutForwDatagrams); IW_STATS_ENTRY(ipOutRequests); IW_STATS_ENTRY(ipOutDiscards); IW_STATS_ENTRY(ipOutNoRoutes); IW_STATS_ENTRY(ipReasmTimeout); IW_STATS_ENTRY(ipReasmReqds); IW_STATS_ENTRY(ipReasmOKs); IW_STATS_ENTRY(ipReasmFails); IW_STATS_ENTRY(ipFragOKs); IW_STATS_ENTRY(ipFragFails); IW_STATS_ENTRY(ipFragCreates); IW_STATS_ENTRY(ipInMcastPkts); IW_STATS_ENTRY(ipOutMcastPkts); IW_STATS_ENTRY(ipInBcastPkts); IW_STATS_ENTRY(ipOutBcastPkts); IW_STATS_ENTRY(tcpRtoAlgorithm); IW_STATS_ENTRY(tcpRtoMin); IW_STATS_ENTRY(tcpRtoMax); IW_STATS_ENTRY(tcpMaxConn); IW_STATS_ENTRY(tcpActiveOpens); IW_STATS_ENTRY(tcpPassiveOpens); IW_STATS_ENTRY(tcpAttemptFails); IW_STATS_ENTRY(tcpEstabResets); IW_STATS_ENTRY(tcpCurrEstab); IW_STATS_ENTRY(tcpInSegs); IW_STATS_ENTRY(tcpOutSegs); IW_STATS_ENTRY(tcpRetransSegs); IW_STATS_ENTRY(tcpInErrs); IW_STATS_ENTRY(tcpOutRsts); static struct attribute *iw_proto_stats_attrs[] = { &dev_attr_ipInReceives.attr, &dev_attr_ipInHdrErrors.attr, &dev_attr_ipInTooBigErrors.attr, &dev_attr_ipInNoRoutes.attr, &dev_attr_ipInAddrErrors.attr, &dev_attr_ipInUnknownProtos.attr, &dev_attr_ipInTruncatedPkts.attr, &dev_attr_ipInDiscards.attr, &dev_attr_ipInDelivers.attr, &dev_attr_ipOutForwDatagrams.attr, &dev_attr_ipOutRequests.attr, &dev_attr_ipOutDiscards.attr, &dev_attr_ipOutNoRoutes.attr, &dev_attr_ipReasmTimeout.attr, &dev_attr_ipReasmReqds.attr, &dev_attr_ipReasmOKs.attr, &dev_attr_ipReasmFails.attr, &dev_attr_ipFragOKs.attr, &dev_attr_ipFragFails.attr, &dev_attr_ipFragCreates.attr, &dev_attr_ipInMcastPkts.attr, &dev_attr_ipOutMcastPkts.attr, &dev_attr_ipInBcastPkts.attr, &dev_attr_ipOutBcastPkts.attr, &dev_attr_tcpRtoAlgorithm.attr, &dev_attr_tcpRtoMin.attr, &dev_attr_tcpRtoMax.attr, &dev_attr_tcpMaxConn.attr, &dev_attr_tcpActiveOpens.attr, &dev_attr_tcpPassiveOpens.attr, &dev_attr_tcpAttemptFails.attr, &dev_attr_tcpEstabResets.attr, &dev_attr_tcpCurrEstab.attr, &dev_attr_tcpInSegs.attr, &dev_attr_tcpOutSegs.attr, &dev_attr_tcpRetransSegs.attr, &dev_attr_tcpInErrs.attr, &dev_attr_tcpOutRsts.attr, NULL }; static struct attribute_group iw_stats_group = { .name = "proto_stats", .attrs = iw_proto_stats_attrs, }; int ib_device_register_sysfs(struct ib_device *device, int (*port_callback)(struct ib_device *, u8, struct kobject *)) { struct device *class_dev = &device->dev; int ret; int i; class_dev->class = &ib_class; class_dev->parent = device->dma_device; dev_set_name(class_dev, device->name); dev_set_drvdata(class_dev, device); INIT_LIST_HEAD(&device->port_list); ret = device_register(class_dev); if (ret) goto err; for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) { ret = device_create_file(class_dev, ib_class_attributes[i]); if (ret) goto err_unregister; } device->ports_parent = kobject_create_and_add("ports", kobject_get(&class_dev->kobj)); if (!device->ports_parent) { ret = -ENOMEM; goto err_put; } if (device->node_type == RDMA_NODE_IB_SWITCH) { ret = add_port(device, 0, port_callback); if (ret) goto err_put; } else { for (i = 1; i <= device->phys_port_cnt; ++i) { ret = add_port(device, i, port_callback); if (ret) goto err_put; } } if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) { ret = sysfs_create_group(&class_dev->kobj, &iw_stats_group); if (ret) goto err_put; } return 0; err_put: { struct kobject *p, *t; struct ib_port *port; list_for_each_entry_safe(p, t, &device->port_list, entry) { list_del(&p->entry); port = container_of(p, struct ib_port, kobj); sysfs_remove_group(p, &pma_group); sysfs_remove_group(p, &port->pkey_group); sysfs_remove_group(p, &port->gid_group); kobject_put(p); } } kobject_put(&class_dev->kobj); err_unregister: device_unregister(class_dev); err: return ret; } void ib_device_unregister_sysfs(struct ib_device *device) { struct kobject *p, *t; struct ib_port *port; int i; /* Hold kobject until ib_dealloc_device() */ kobject_get(&device->dev.kobj); for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) { device_remove_file(&device->dev, ib_class_attributes[i]); } list_for_each_entry_safe(p, t, &device->port_list, entry) { list_del(&p->entry); port = container_of(p, struct ib_port, kobj); sysfs_remove_group(p, &pma_group); sysfs_remove_group(p, &port->pkey_group); sysfs_remove_group(p, &port->gid_group); kobject_put(p); } kobject_put(device->ports_parent); device_unregister(&device->dev); } int ib_sysfs_setup(void) { return class_register(&ib_class); } void ib_sysfs_cleanup(void) { class_unregister(&ib_class); } /*int ib_sysfs_create_port_files(struct ib_device *device, int (*create)(struct ib_device *dev, u8 port_num, struct kobject *kobj)) { struct kobject *p; struct ib_port *port; int ret = 0; list_for_each_entry(p, &device->port_list, entry) { port = container_of(p, struct ib_port, kobj); ret = create(device, port->port_num, &port->kobj); if (ret) break; } return ret; } EXPORT_SYMBOL(ib_sysfs_create_port_files);*/ Index: stable/10/sys/ofed/drivers/infiniband/core/ucm.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/core/ucm.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/core/ucm.c (revision 271127) @@ -1,1345 +1,1345 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include -#include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include MODULE_AUTHOR("Libor Michalek"); MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); MODULE_LICENSE("Dual BSD/GPL"); struct ib_ucm_device { int devnum; struct cdev cdev; struct device dev; struct ib_device *ib_dev; }; struct ib_ucm_file { struct mutex file_mutex; struct file *filp; struct ib_ucm_device *device; struct list_head ctxs; struct list_head events; wait_queue_head_t poll_wait; }; struct ib_ucm_context { int id; struct completion comp; atomic_t ref; int events_reported; struct ib_ucm_file *file; struct ib_cm_id *cm_id; __u64 uid; struct list_head events; /* list of pending events. */ struct list_head file_list; /* member in file ctx list */ }; struct ib_ucm_event { struct ib_ucm_context *ctx; struct list_head file_list; /* member in file event list */ struct list_head ctx_list; /* member in ctx event list */ struct ib_cm_id *cm_id; struct ib_ucm_event_resp resp; void *data; void *info; int data_len; int info_len; }; enum { IB_UCM_MAJOR = 231, IB_UCM_BASE_MINOR = 224, IB_UCM_MAX_DEVICES = 32 }; #define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) static void ib_ucm_add_one(struct ib_device *device); static void ib_ucm_remove_one(struct ib_device *device); static struct ib_client ucm_client = { .name = "ucm", .add = ib_ucm_add_one, .remove = ib_ucm_remove_one }; static DEFINE_MUTEX(ctx_id_mutex); static DEFINE_IDR(ctx_id_table); static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES); static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) { struct ib_ucm_context *ctx; mutex_lock(&ctx_id_mutex); ctx = idr_find(&ctx_id_table, id); if (!ctx) ctx = ERR_PTR(-ENOENT); else if (ctx->file != file) ctx = ERR_PTR(-EINVAL); else atomic_inc(&ctx->ref); mutex_unlock(&ctx_id_mutex); return ctx; } static void ib_ucm_ctx_put(struct ib_ucm_context *ctx) { if (atomic_dec_and_test(&ctx->ref)) complete(&ctx->comp); } static inline int ib_ucm_new_cm_id(int event) { return event == IB_CM_REQ_RECEIVED || event == IB_CM_SIDR_REQ_RECEIVED; } static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx) { struct ib_ucm_event *uevent; mutex_lock(&ctx->file->file_mutex); list_del(&ctx->file_list); while (!list_empty(&ctx->events)) { uevent = list_entry(ctx->events.next, struct ib_ucm_event, ctx_list); list_del(&uevent->file_list); list_del(&uevent->ctx_list); mutex_unlock(&ctx->file->file_mutex); /* clear incoming connections. */ if (ib_ucm_new_cm_id(uevent->resp.event)) ib_destroy_cm_id(uevent->cm_id); kfree(uevent); mutex_lock(&ctx->file->file_mutex); } mutex_unlock(&ctx->file->file_mutex); } static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) { struct ib_ucm_context *ctx; int result; ctx = kzalloc(sizeof *ctx, GFP_KERNEL); if (!ctx) return NULL; atomic_set(&ctx->ref, 1); init_completion(&ctx->comp); ctx->file = file; INIT_LIST_HEAD(&ctx->events); do { result = idr_pre_get(&ctx_id_table, GFP_KERNEL); if (!result) goto error; mutex_lock(&ctx_id_mutex); result = idr_get_new(&ctx_id_table, ctx, &ctx->id); mutex_unlock(&ctx_id_mutex); } while (result == -EAGAIN); if (result) goto error; list_add_tail(&ctx->file_list, &file->ctxs); return ctx; error: kfree(ctx); return NULL; } static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, struct ib_cm_req_event_param *kreq) { ureq->remote_ca_guid = kreq->remote_ca_guid; ureq->remote_qkey = kreq->remote_qkey; ureq->remote_qpn = kreq->remote_qpn; ureq->qp_type = kreq->qp_type; ureq->starting_psn = kreq->starting_psn; ureq->responder_resources = kreq->responder_resources; ureq->initiator_depth = kreq->initiator_depth; ureq->local_cm_response_timeout = kreq->local_cm_response_timeout; ureq->flow_control = kreq->flow_control; ureq->remote_cm_response_timeout = kreq->remote_cm_response_timeout; ureq->retry_count = kreq->retry_count; ureq->rnr_retry_count = kreq->rnr_retry_count; ureq->srq = kreq->srq; ureq->port = kreq->port; ib_copy_path_rec_to_user(&ureq->primary_path, kreq->primary_path); if (kreq->alternate_path) ib_copy_path_rec_to_user(&ureq->alternate_path, kreq->alternate_path); } static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, struct ib_cm_rep_event_param *krep) { urep->remote_ca_guid = krep->remote_ca_guid; urep->remote_qkey = krep->remote_qkey; urep->remote_qpn = krep->remote_qpn; urep->starting_psn = krep->starting_psn; urep->responder_resources = krep->responder_resources; urep->initiator_depth = krep->initiator_depth; urep->target_ack_delay = krep->target_ack_delay; urep->failover_accepted = krep->failover_accepted; urep->flow_control = krep->flow_control; urep->rnr_retry_count = krep->rnr_retry_count; urep->srq = krep->srq; } static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep, struct ib_cm_sidr_rep_event_param *krep) { urep->status = krep->status; urep->qkey = krep->qkey; urep->qpn = krep->qpn; }; static int ib_ucm_event_process(struct ib_cm_event *evt, struct ib_ucm_event *uvt) { void *info = NULL; switch (evt->event) { case IB_CM_REQ_RECEIVED: ib_ucm_event_req_get(&uvt->resp.u.req_resp, &evt->param.req_rcvd); uvt->data_len = IB_CM_REQ_PRIVATE_DATA_SIZE; uvt->resp.present = IB_UCM_PRES_PRIMARY; uvt->resp.present |= (evt->param.req_rcvd.alternate_path ? IB_UCM_PRES_ALTERNATE : 0); break; case IB_CM_REP_RECEIVED: ib_ucm_event_rep_get(&uvt->resp.u.rep_resp, &evt->param.rep_rcvd); uvt->data_len = IB_CM_REP_PRIVATE_DATA_SIZE; break; case IB_CM_RTU_RECEIVED: uvt->data_len = IB_CM_RTU_PRIVATE_DATA_SIZE; uvt->resp.u.send_status = evt->param.send_status; break; case IB_CM_DREQ_RECEIVED: uvt->data_len = IB_CM_DREQ_PRIVATE_DATA_SIZE; uvt->resp.u.send_status = evt->param.send_status; break; case IB_CM_DREP_RECEIVED: uvt->data_len = IB_CM_DREP_PRIVATE_DATA_SIZE; uvt->resp.u.send_status = evt->param.send_status; break; case IB_CM_MRA_RECEIVED: uvt->resp.u.mra_resp.timeout = evt->param.mra_rcvd.service_timeout; uvt->data_len = IB_CM_MRA_PRIVATE_DATA_SIZE; break; case IB_CM_REJ_RECEIVED: uvt->resp.u.rej_resp.reason = evt->param.rej_rcvd.reason; uvt->data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; uvt->info_len = evt->param.rej_rcvd.ari_length; info = evt->param.rej_rcvd.ari; break; case IB_CM_LAP_RECEIVED: ib_copy_path_rec_to_user(&uvt->resp.u.lap_resp.path, evt->param.lap_rcvd.alternate_path); uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE; uvt->resp.present = IB_UCM_PRES_ALTERNATE; break; case IB_CM_APR_RECEIVED: uvt->resp.u.apr_resp.status = evt->param.apr_rcvd.ap_status; uvt->data_len = IB_CM_APR_PRIVATE_DATA_SIZE; uvt->info_len = evt->param.apr_rcvd.info_len; info = evt->param.apr_rcvd.apr_info; break; case IB_CM_SIDR_REQ_RECEIVED: uvt->resp.u.sidr_req_resp.pkey = evt->param.sidr_req_rcvd.pkey; uvt->resp.u.sidr_req_resp.port = evt->param.sidr_req_rcvd.port; uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; break; case IB_CM_SIDR_REP_RECEIVED: ib_ucm_event_sidr_rep_get(&uvt->resp.u.sidr_rep_resp, &evt->param.sidr_rep_rcvd); uvt->data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; uvt->info_len = evt->param.sidr_rep_rcvd.info_len; info = evt->param.sidr_rep_rcvd.info; break; default: uvt->resp.u.send_status = evt->param.send_status; break; } if (uvt->data_len) { uvt->data = kmemdup(evt->private_data, uvt->data_len, GFP_KERNEL); if (!uvt->data) goto err1; uvt->resp.present |= IB_UCM_PRES_DATA; } if (uvt->info_len) { uvt->info = kmemdup(info, uvt->info_len, GFP_KERNEL); if (!uvt->info) goto err2; uvt->resp.present |= IB_UCM_PRES_INFO; } return 0; err2: kfree(uvt->data); err1: return -ENOMEM; } static int ib_ucm_event_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) { struct ib_ucm_event *uevent; struct ib_ucm_context *ctx; int result = 0; ctx = cm_id->context; uevent = kzalloc(sizeof *uevent, GFP_KERNEL); if (!uevent) goto err1; uevent->ctx = ctx; uevent->cm_id = cm_id; uevent->resp.uid = ctx->uid; uevent->resp.id = ctx->id; uevent->resp.event = event->event; result = ib_ucm_event_process(event, uevent); if (result) goto err2; mutex_lock(&ctx->file->file_mutex); list_add_tail(&uevent->file_list, &ctx->file->events); list_add_tail(&uevent->ctx_list, &ctx->events); wake_up_interruptible(&ctx->file->poll_wait); if (ctx->file->filp) selwakeup(&ctx->file->filp->f_selinfo); mutex_unlock(&ctx->file->file_mutex); return 0; err2: kfree(uevent); err1: /* Destroy new cm_id's */ return ib_ucm_new_cm_id(event->event); } static ssize_t ib_ucm_event(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_context *ctx; struct ib_ucm_event_get cmd; struct ib_ucm_event *uevent; int result = 0; DEFINE_WAIT(wait); if (out_len < sizeof(struct ib_ucm_event_resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&file->file_mutex); while (list_empty(&file->events)) { mutex_unlock(&file->file_mutex); if (file->filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->poll_wait, !list_empty(&file->events))) return -ERESTARTSYS; mutex_lock(&file->file_mutex); } uevent = list_entry(file->events.next, struct ib_ucm_event, file_list); if (ib_ucm_new_cm_id(uevent->resp.event)) { ctx = ib_ucm_ctx_alloc(file); if (!ctx) { result = -ENOMEM; goto done; } ctx->cm_id = uevent->cm_id; ctx->cm_id->context = ctx; uevent->resp.id = ctx->id; } if (copy_to_user((void __user *)(unsigned long)cmd.response, &uevent->resp, sizeof(uevent->resp))) { result = -EFAULT; goto done; } if (uevent->data) { if (cmd.data_len < uevent->data_len) { result = -ENOMEM; goto done; } if (copy_to_user((void __user *)(unsigned long)cmd.data, uevent->data, uevent->data_len)) { result = -EFAULT; goto done; } } if (uevent->info) { if (cmd.info_len < uevent->info_len) { result = -ENOMEM; goto done; } if (copy_to_user((void __user *)(unsigned long)cmd.info, uevent->info, uevent->info_len)) { result = -EFAULT; goto done; } } list_del(&uevent->file_list); list_del(&uevent->ctx_list); uevent->ctx->events_reported++; kfree(uevent->data); kfree(uevent->info); kfree(uevent); done: mutex_unlock(&file->file_mutex); return result; } static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_create_id cmd; struct ib_ucm_create_id_resp resp; struct ib_ucm_context *ctx; int result; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&file->file_mutex); ctx = ib_ucm_ctx_alloc(file); mutex_unlock(&file->file_mutex); if (!ctx) return -ENOMEM; ctx->uid = cmd.uid; ctx->cm_id = ib_create_cm_id(file->device->ib_dev, ib_ucm_event_handler, ctx); if (IS_ERR(ctx->cm_id)) { result = PTR_ERR(ctx->cm_id); goto err1; } resp.id = ctx->id; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) { result = -EFAULT; goto err2; } return 0; err2: ib_destroy_cm_id(ctx->cm_id); err1: mutex_lock(&ctx_id_mutex); idr_remove(&ctx_id_table, ctx->id); mutex_unlock(&ctx_id_mutex); kfree(ctx); return result; } static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_destroy_id cmd; struct ib_ucm_destroy_id_resp resp; struct ib_ucm_context *ctx; int result = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&ctx_id_mutex); ctx = idr_find(&ctx_id_table, cmd.id); if (!ctx) ctx = ERR_PTR(-ENOENT); else if (ctx->file != file) ctx = ERR_PTR(-EINVAL); else idr_remove(&ctx_id_table, ctx->id); mutex_unlock(&ctx_id_mutex); if (IS_ERR(ctx)) return PTR_ERR(ctx); ib_ucm_ctx_put(ctx); wait_for_completion(&ctx->comp); /* No new events will be generated after destroying the cm_id. */ ib_destroy_cm_id(ctx->cm_id); /* Cleanup events not yet reported to the user. */ ib_ucm_cleanup_events(ctx); resp.events_reported = ctx->events_reported; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) result = -EFAULT; kfree(ctx); return result; } static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_attr_id_resp resp; struct ib_ucm_attr_id cmd; struct ib_ucm_context *ctx; int result = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ib_ucm_ctx_get(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); resp.service_id = ctx->cm_id->service_id; resp.service_mask = ctx->cm_id->service_mask; resp.local_id = ctx->cm_id->local_id; resp.remote_id = ctx->cm_id->remote_id; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) result = -EFAULT; ib_ucm_ctx_put(ctx); return result; } static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_uverbs_qp_attr resp; struct ib_ucm_init_qp_attr cmd; struct ib_ucm_context *ctx; struct ib_qp_attr qp_attr; int result = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ib_ucm_ctx_get(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); resp.qp_attr_mask = 0; memset(&qp_attr, 0, sizeof qp_attr); qp_attr.qp_state = cmd.qp_state; result = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); if (result) goto out; ib_copy_qp_attr_to_user(&resp, &qp_attr); if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) result = -EFAULT; out: ib_ucm_ctx_put(ctx); return result; } static int ucm_validate_listen(__be64 service_id, __be64 service_mask) { service_id &= service_mask; if (((service_id & IB_CMA_SERVICE_ID_MASK) == IB_CMA_SERVICE_ID) || ((service_id & IB_SDP_SERVICE_ID_MASK) == IB_SDP_SERVICE_ID)) return -EINVAL; return 0; } static ssize_t ib_ucm_listen(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_listen cmd; struct ib_ucm_context *ctx; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ib_ucm_ctx_get(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); result = ucm_validate_listen(cmd.service_id, cmd.service_mask); if (result) goto out; result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask, NULL); out: ib_ucm_ctx_put(ctx); return result; } static ssize_t ib_ucm_notify(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_notify cmd; struct ib_ucm_context *ctx; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ib_ucm_ctx_get(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); result = ib_cm_notify(ctx->cm_id, (enum ib_event_type) cmd.event); ib_ucm_ctx_put(ctx); return result; } static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len) { void *data; *dest = NULL; if (!len) return 0; data = kmalloc(len, GFP_KERNEL); if (!data) return -ENOMEM; if (copy_from_user(data, (void __user *)(unsigned long)src, len)) { kfree(data); return -EFAULT; } *dest = data; return 0; } static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src) { struct ib_user_path_rec upath; struct ib_sa_path_rec *sa_path; *path = NULL; if (!src) return 0; sa_path = kmalloc(sizeof(*sa_path), GFP_KERNEL); if (!sa_path) return -ENOMEM; if (copy_from_user(&upath, (void __user *)(unsigned long)src, sizeof(upath))) { kfree(sa_path); return -EFAULT; } ib_copy_path_rec_from_user(sa_path, &upath); *path = sa_path; return 0; } static ssize_t ib_ucm_send_req(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_cm_req_param param; struct ib_ucm_context *ctx; struct ib_ucm_req cmd; int result; param.private_data = NULL; param.primary_path = NULL; param.alternate_path = NULL; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); if (result) goto done; result = ib_ucm_path_get(¶m.primary_path, cmd.primary_path); if (result) goto done; result = ib_ucm_path_get(¶m.alternate_path, cmd.alternate_path); if (result) goto done; param.private_data_len = cmd.len; param.service_id = cmd.sid; param.qp_num = cmd.qpn; param.qp_type = cmd.qp_type; param.starting_psn = cmd.psn; param.peer_to_peer = cmd.peer_to_peer; param.responder_resources = cmd.responder_resources; param.initiator_depth = cmd.initiator_depth; param.remote_cm_response_timeout = cmd.remote_cm_response_timeout; param.flow_control = cmd.flow_control; param.local_cm_response_timeout = cmd.local_cm_response_timeout; param.retry_count = cmd.retry_count; param.rnr_retry_count = cmd.rnr_retry_count; param.max_cm_retries = cmd.max_cm_retries; param.srq = cmd.srq; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_req(ctx->cm_id, ¶m); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(param.private_data); kfree(param.primary_path); kfree(param.alternate_path); return result; } static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_cm_rep_param param; struct ib_ucm_context *ctx; struct ib_ucm_rep cmd; int result; param.private_data = NULL; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); if (result) return result; param.qp_num = cmd.qpn; param.starting_psn = cmd.psn; param.private_data_len = cmd.len; param.responder_resources = cmd.responder_resources; param.initiator_depth = cmd.initiator_depth; param.failover_accepted = cmd.failover_accepted; param.flow_control = cmd.flow_control; param.rnr_retry_count = cmd.rnr_retry_count; param.srq = cmd.srq; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { ctx->uid = cmd.uid; result = ib_send_cm_rep(ctx->cm_id, ¶m); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); kfree(param.private_data); return result; } static ssize_t ib_ucm_send_private_data(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int (*func)(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len)) { struct ib_ucm_private_data cmd; struct ib_ucm_context *ctx; const void *private_data = NULL; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(&private_data, cmd.data, cmd.len); if (result) return result; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = func(ctx->cm_id, private_data, cmd.len); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); kfree(private_data); return result; } static ssize_t ib_ucm_send_rtu(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_rtu); } static ssize_t ib_ucm_send_dreq(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_dreq); } static ssize_t ib_ucm_send_drep(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_drep); } static ssize_t ib_ucm_send_info(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int (*func)(struct ib_cm_id *cm_id, int status, const void *info, u8 info_len, const void *data, u8 data_len)) { struct ib_ucm_context *ctx; struct ib_ucm_info cmd; const void *data = NULL; const void *info = NULL; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(&data, cmd.data, cmd.data_len); if (result) goto done; result = ib_ucm_alloc_data(&info, cmd.info, cmd.info_len); if (result) goto done; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = func(ctx->cm_id, cmd.status, info, cmd.info_len, data, cmd.data_len); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(data); kfree(info); return result; } static ssize_t ib_ucm_send_rej(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_rej); } static ssize_t ib_ucm_send_apr(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_apr); } static ssize_t ib_ucm_send_mra(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_context *ctx; struct ib_ucm_mra cmd; const void *data = NULL; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); if (result) return result; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_mra(ctx->cm_id, cmd.timeout, data, cmd.len); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); kfree(data); return result; } static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_context *ctx; struct ib_sa_path_rec *path = NULL; struct ib_ucm_lap cmd; const void *data = NULL; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); if (result) goto done; result = ib_ucm_path_get(&path, cmd.path); if (result) goto done; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_lap(ctx->cm_id, path, data, cmd.len); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(data); kfree(path); return result; } static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_cm_sidr_req_param param; struct ib_ucm_context *ctx; struct ib_ucm_sidr_req cmd; int result; param.private_data = NULL; param.path = NULL; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); if (result) goto done; result = ib_ucm_path_get(¶m.path, cmd.path); if (result) goto done; param.private_data_len = cmd.len; param.service_id = cmd.sid; param.timeout_ms = cmd.timeout; param.max_cm_retries = cmd.max_cm_retries; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_sidr_req(ctx->cm_id, ¶m); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(param.private_data); kfree(param.path); return result; } static ssize_t ib_ucm_send_sidr_rep(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_cm_sidr_rep_param param; struct ib_ucm_sidr_rep cmd; struct ib_ucm_context *ctx; int result; param.info = NULL; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.data_len); if (result) goto done; result = ib_ucm_alloc_data(¶m.info, cmd.info, cmd.info_len); if (result) goto done; param.qp_num = cmd.qpn; param.qkey = cmd.qkey; param.status = cmd.status; param.info_length = cmd.info_len; param.private_data_len = cmd.data_len; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_sidr_rep(ctx->cm_id, ¶m); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(param.private_data); kfree(param.info); return result; } static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) = { [IB_USER_CM_CMD_CREATE_ID] = ib_ucm_create_id, [IB_USER_CM_CMD_DESTROY_ID] = ib_ucm_destroy_id, [IB_USER_CM_CMD_ATTR_ID] = ib_ucm_attr_id, [IB_USER_CM_CMD_LISTEN] = ib_ucm_listen, [IB_USER_CM_CMD_NOTIFY] = ib_ucm_notify, [IB_USER_CM_CMD_SEND_REQ] = ib_ucm_send_req, [IB_USER_CM_CMD_SEND_REP] = ib_ucm_send_rep, [IB_USER_CM_CMD_SEND_RTU] = ib_ucm_send_rtu, [IB_USER_CM_CMD_SEND_DREQ] = ib_ucm_send_dreq, [IB_USER_CM_CMD_SEND_DREP] = ib_ucm_send_drep, [IB_USER_CM_CMD_SEND_REJ] = ib_ucm_send_rej, [IB_USER_CM_CMD_SEND_MRA] = ib_ucm_send_mra, [IB_USER_CM_CMD_SEND_LAP] = ib_ucm_send_lap, [IB_USER_CM_CMD_SEND_APR] = ib_ucm_send_apr, [IB_USER_CM_CMD_SEND_SIDR_REQ] = ib_ucm_send_sidr_req, [IB_USER_CM_CMD_SEND_SIDR_REP] = ib_ucm_send_sidr_rep, [IB_USER_CM_CMD_EVENT] = ib_ucm_event, [IB_USER_CM_CMD_INIT_QP_ATTR] = ib_ucm_init_qp_attr, }; static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, size_t len, loff_t *pos) { struct ib_ucm_file *file = filp->private_data; struct ib_ucm_cmd_hdr hdr; ssize_t result; if (len < sizeof(hdr)) return -EINVAL; if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; if (hdr.in + sizeof(hdr) > len) return -EINVAL; result = ucm_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); if (!result) result = len; return result; } static unsigned int ib_ucm_poll(struct file *filp, struct poll_table_struct *wait) { struct ib_ucm_file *file = filp->private_data; unsigned int mask = 0; poll_wait(filp, &file->poll_wait, wait); if (!list_empty(&file->events)) mask = POLLIN | POLLRDNORM; return mask; } /* * ib_ucm_open() does not need the BKL: * * - no global state is referred to; * - there is no ioctl method to race against; * - no further module initialization is required for open to work * after the device is registered. */ static int ib_ucm_open(struct inode *inode, struct file *filp) { struct ib_ucm_file *file; file = kzalloc(sizeof(*file), GFP_KERNEL); if (!file) return -ENOMEM; INIT_LIST_HEAD(&file->events); INIT_LIST_HEAD(&file->ctxs); init_waitqueue_head(&file->poll_wait); mutex_init(&file->file_mutex); filp->private_data = file; file->filp = filp; file->device = container_of(inode->i_cdev->si_drv1, struct ib_ucm_device, cdev); return 0; } static int ib_ucm_close(struct inode *inode, struct file *filp) { struct ib_ucm_file *file = filp->private_data; struct ib_ucm_context *ctx; mutex_lock(&file->file_mutex); while (!list_empty(&file->ctxs)) { ctx = list_entry(file->ctxs.next, struct ib_ucm_context, file_list); mutex_unlock(&file->file_mutex); mutex_lock(&ctx_id_mutex); idr_remove(&ctx_id_table, ctx->id); mutex_unlock(&ctx_id_mutex); ib_destroy_cm_id(ctx->cm_id); ib_ucm_cleanup_events(ctx); kfree(ctx); mutex_lock(&file->file_mutex); } mutex_unlock(&file->file_mutex); kfree(file); return 0; } static void ib_ucm_release_dev(struct device *dev) { struct ib_ucm_device *ucm_dev; ucm_dev = container_of(dev, struct ib_ucm_device, dev); cdev_del(&ucm_dev->cdev); clear_bit(ucm_dev->devnum, dev_map); kfree(ucm_dev); } static const struct file_operations ucm_fops = { .owner = THIS_MODULE, .open = ib_ucm_open, .release = ib_ucm_close, .write = ib_ucm_write, .poll = ib_ucm_poll, }; static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, char *buf) { struct ib_ucm_device *ucm_dev; ucm_dev = container_of(dev, struct ib_ucm_device, dev); return sprintf(buf, "%s\n", ucm_dev->ib_dev->name); } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static void ib_ucm_add_one(struct ib_device *device) { struct ib_ucm_device *ucm_dev; if (!device->alloc_ucontext || rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL); if (!ucm_dev) return; ucm_dev->ib_dev = device; ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES) goto err; set_bit(ucm_dev->devnum, dev_map); cdev_init(&ucm_dev->cdev, &ucm_fops); ucm_dev->cdev.owner = THIS_MODULE; kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum); if (cdev_add(&ucm_dev->cdev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1)) goto err; ucm_dev->dev.class = &cm_class; ucm_dev->dev.parent = device->dma_device; ucm_dev->dev.devt = ucm_dev->cdev.dev; ucm_dev->dev.release = ib_ucm_release_dev; dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum); if (device_register(&ucm_dev->dev)) goto err_cdev; if (device_create_file(&ucm_dev->dev, &dev_attr_ibdev)) goto err_dev; ib_set_client_data(device, &ucm_client, ucm_dev); return; err_dev: device_unregister(&ucm_dev->dev); err_cdev: cdev_del(&ucm_dev->cdev); clear_bit(ucm_dev->devnum, dev_map); err: kfree(ucm_dev); return; } static void ib_ucm_remove_one(struct ib_device *device) { struct ib_ucm_device *ucm_dev = ib_get_client_data(device, &ucm_client); if (!ucm_dev) return; device_unregister(&ucm_dev->dev); } -static ssize_t show_abi_version(struct class *class, char *buf) +static ssize_t show_abi_version(struct class *class, struct class_attribute *attr, char *buf) { return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION); } static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); static int __init ib_ucm_init(void) { int ret; ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES, "infiniband_cm"); if (ret) { printk(KERN_ERR "ucm: couldn't register device number\n"); goto error1; } ret = class_create_file(&cm_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "ucm: couldn't create abi_version attribute\n"); goto error2; } ret = ib_register_client(&ucm_client); if (ret) { printk(KERN_ERR "ucm: couldn't register client\n"); goto error3; } return 0; error3: class_remove_file(&cm_class, &class_attr_abi_version); error2: unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); error1: return ret; } static void __exit ib_ucm_cleanup(void) { ib_unregister_client(&ucm_client); class_remove_file(&cm_class, &class_attr_abi_version); unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); idr_destroy(&ctx_id_table); } module_init_order(ib_ucm_init, SI_ORDER_THIRD); module_exit(ib_ucm_cleanup); Index: stable/10/sys/ofed/drivers/infiniband/core/user_mad.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/core/user_mad.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/core/user_mad.c (revision 271127) @@ -1,1225 +1,1224 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008 Cisco. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace MAD packet access"); MODULE_LICENSE("Dual BSD/GPL"); enum { IB_UMAD_MAX_PORTS = 64, IB_UMAD_MAX_AGENTS = 32, IB_UMAD_MAJOR = 231, IB_UMAD_MINOR_BASE = 0 }; /* * Our lifetime rules for these structs are the following: each time a * device special file is opened, we look up the corresponding struct * ib_umad_port by minor in the umad_port[] table while holding the * port_lock. If this lookup succeeds, we take a reference on the * ib_umad_port's struct ib_umad_device while still holding the * port_lock; if the lookup fails, we fail the open(). We drop these * references in the corresponding close(). * * In addition to references coming from open character devices, there * is one more reference to each ib_umad_device representing the * module's reference taken when allocating the ib_umad_device in * ib_umad_add_one(). * * When destroying an ib_umad_device, we clear all of its * ib_umad_ports from umad_port[] while holding port_lock before * dropping the module's reference to the ib_umad_device. This is * always safe because any open() calls will either succeed and obtain * a reference before we clear the umad_port[] entries, or fail after * we clear the umad_port[] entries. */ struct ib_umad_port { struct cdev *cdev; struct device *dev; struct cdev *sm_cdev; struct device *sm_dev; struct semaphore sm_sem; struct mutex file_mutex; struct list_head file_list; struct ib_device *ib_dev; struct ib_umad_device *umad_dev; int dev_num; u8 port_num; }; struct ib_umad_device { int start_port, end_port; struct kref ref; struct ib_umad_port port[0]; }; struct ib_umad_file { struct mutex mutex; struct ib_umad_port *port; struct file *filp; struct list_head recv_list; struct list_head send_list; struct list_head port_list; spinlock_t send_lock; wait_queue_head_t recv_wait; struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; int agents_dead; u8 use_pkey_index; u8 already_used; }; struct ib_umad_packet { struct ib_mad_send_buf *msg; struct ib_mad_recv_wc *recv_wc; struct list_head list; int length; struct ib_user_mad mad; }; static struct class *umad_class; static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); static DEFINE_SPINLOCK(port_lock); static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS]; static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); static void ib_umad_add_one(struct ib_device *device); static void ib_umad_remove_one(struct ib_device *device); static void ib_umad_release_dev(struct kref *ref) { struct ib_umad_device *dev = container_of(ref, struct ib_umad_device, ref); kfree(dev); } static int hdr_size(struct ib_umad_file *file) { return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) : sizeof (struct ib_user_mad_hdr_old); } /* caller must hold file->mutex */ static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id) { return file->agents_dead ? NULL : file->agent[id]; } static int queue_packet(struct ib_umad_file *file, struct ib_mad_agent *agent, struct ib_umad_packet *packet) { int ret = 1; mutex_lock(&file->mutex); for (packet->mad.hdr.id = 0; packet->mad.hdr.id < IB_UMAD_MAX_AGENTS; packet->mad.hdr.id++) if (agent == __get_agent(file, packet->mad.hdr.id)) { list_add_tail(&packet->list, &file->recv_list); selwakeup(&file->filp->f_selinfo); wake_up_interruptible(&file->recv_wait); ret = 0; break; } mutex_unlock(&file->mutex); return ret; } static void dequeue_send(struct ib_umad_file *file, struct ib_umad_packet *packet) { spin_lock_irq(&file->send_lock); list_del(&packet->list); spin_unlock_irq(&file->send_lock); } static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *send_wc) { struct ib_umad_file *file = agent->context; struct ib_umad_packet *packet = send_wc->send_buf->context[0]; dequeue_send(file, packet); ib_destroy_ah(packet->msg->ah); ib_free_send_mad(packet->msg); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { packet->length = IB_MGMT_MAD_HDR; packet->mad.hdr.status = ETIMEDOUT; if (!queue_packet(file, agent, packet)) return; } kfree(packet); } static void recv_handler(struct ib_mad_agent *agent, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_umad_file *file = agent->context; struct ib_umad_packet *packet; if (mad_recv_wc->wc->status != IB_WC_SUCCESS) goto err1; packet = kzalloc(sizeof *packet, GFP_KERNEL); if (!packet) goto err1; packet->length = mad_recv_wc->mad_len; packet->recv_wc = mad_recv_wc; packet->mad.hdr.status = 0; packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len; packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); packet->mad.hdr.sl = mad_recv_wc->wc->sl; packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index; packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); if (packet->mad.hdr.grh_present) { struct ib_ah_attr ah_attr; ib_init_ah_from_wc(agent->device, agent->port_num, mad_recv_wc->wc, mad_recv_wc->recv_buf.grh, &ah_attr); packet->mad.hdr.gid_index = ah_attr.grh.sgid_index; packet->mad.hdr.hop_limit = ah_attr.grh.hop_limit; packet->mad.hdr.traffic_class = ah_attr.grh.traffic_class; memcpy(packet->mad.hdr.gid, &ah_attr.grh.dgid, 16); packet->mad.hdr.flow_label = cpu_to_be32(ah_attr.grh.flow_label); } if (queue_packet(file, agent, packet)) goto err2; return; err2: kfree(packet); err1: ib_free_recv_mad(mad_recv_wc); } static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf, struct ib_umad_packet *packet, size_t count) { struct ib_mad_recv_buf *recv_buf; int left, seg_payload, offset, max_seg_payload; /* We need enough room to copy the first (or only) MAD segment. */ recv_buf = &packet->recv_wc->recv_buf; if ((packet->length <= sizeof (*recv_buf->mad) && count < hdr_size(file) + packet->length) || (packet->length > sizeof (*recv_buf->mad) && count < hdr_size(file) + sizeof (*recv_buf->mad))) return -EINVAL; if (copy_to_user(buf, &packet->mad, hdr_size(file))) return -EFAULT; buf += hdr_size(file); seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad)); if (copy_to_user(buf, recv_buf->mad, seg_payload)) return -EFAULT; if (seg_payload < packet->length) { /* * Multipacket RMPP MAD message. Copy remainder of message. * Note that last segment may have a shorter payload. */ if (count < hdr_size(file) + packet->length) { /* * The buffer is too small, return the first RMPP segment, * which includes the RMPP message length. */ return -ENOSPC; } offset = ib_get_mad_data_offset(recv_buf->mad->mad_hdr.mgmt_class); max_seg_payload = sizeof (struct ib_mad) - offset; for (left = packet->length - seg_payload, buf += seg_payload; left; left -= seg_payload, buf += seg_payload) { recv_buf = container_of(recv_buf->list.next, struct ib_mad_recv_buf, list); seg_payload = min(left, max_seg_payload); if (copy_to_user(buf, ((void *) recv_buf->mad) + offset, seg_payload)) return -EFAULT; } } return hdr_size(file) + packet->length; } static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf, struct ib_umad_packet *packet, size_t count) { ssize_t size = hdr_size(file) + packet->length; if (count < size) return -EINVAL; if (copy_to_user(buf, &packet->mad, hdr_size(file))) return -EFAULT; buf += hdr_size(file); if (copy_to_user(buf, packet->mad.data, packet->length)) return -EFAULT; return size; } static ssize_t ib_umad_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { struct ib_umad_file *file = filp->private_data; struct ib_umad_packet *packet; ssize_t ret; if (count < hdr_size(file)) return -EINVAL; mutex_lock(&file->mutex); while (list_empty(&file->recv_list)) { mutex_unlock(&file->mutex); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->recv_wait, !list_empty(&file->recv_list))) return -ERESTARTSYS; mutex_lock(&file->mutex); } packet = list_entry(file->recv_list.next, struct ib_umad_packet, list); list_del(&packet->list); mutex_unlock(&file->mutex); if (packet->recv_wc) ret = copy_recv_mad(file, buf, packet, count); else ret = copy_send_mad(file, buf, packet, count); if (ret < 0) { /* Requeue packet */ mutex_lock(&file->mutex); list_add(&packet->list, &file->recv_list); mutex_unlock(&file->mutex); } else { if (packet->recv_wc) ib_free_recv_mad(packet->recv_wc); kfree(packet); } return ret; } static int copy_rmpp_mad(struct ib_mad_send_buf *msg, const char __user *buf) { int left, seg; /* Copy class specific header */ if ((msg->hdr_len > IB_MGMT_RMPP_HDR) && copy_from_user(msg->mad + IB_MGMT_RMPP_HDR, buf + IB_MGMT_RMPP_HDR, msg->hdr_len - IB_MGMT_RMPP_HDR)) return -EFAULT; /* All headers are in place. Copy data segments. */ for (seg = 1, left = msg->data_len, buf += msg->hdr_len; left > 0; seg++, left -= msg->seg_size, buf += msg->seg_size) { if (copy_from_user(ib_get_rmpp_segment(msg, seg), buf, min(left, msg->seg_size))) return -EFAULT; } return 0; } static int same_destination(struct ib_user_mad_hdr *hdr1, struct ib_user_mad_hdr *hdr2) { if (!hdr1->grh_present && !hdr2->grh_present) return (hdr1->lid == hdr2->lid); if (hdr1->grh_present && hdr2->grh_present) return !memcmp(hdr1->gid, hdr2->gid, 16); return 0; } static int is_duplicate(struct ib_umad_file *file, struct ib_umad_packet *packet) { struct ib_umad_packet *sent_packet; struct ib_mad_hdr *sent_hdr, *hdr; hdr = (struct ib_mad_hdr *) packet->mad.data; list_for_each_entry(sent_packet, &file->send_list, list) { sent_hdr = (struct ib_mad_hdr *) sent_packet->mad.data; if ((hdr->tid != sent_hdr->tid) || (hdr->mgmt_class != sent_hdr->mgmt_class)) continue; /* * No need to be overly clever here. If two new operations have * the same TID, reject the second as a duplicate. This is more * restrictive than required by the spec. */ if (!ib_response_mad((struct ib_mad *) hdr)) { if (!ib_response_mad((struct ib_mad *) sent_hdr)) return 1; continue; } else if (!ib_response_mad((struct ib_mad *) sent_hdr)) continue; if (same_destination(&packet->mad.hdr, &sent_packet->mad.hdr)) return 1; } return 0; } static ssize_t ib_umad_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_umad_file *file = filp->private_data; struct ib_umad_packet *packet; struct ib_mad_agent *agent; struct ib_ah_attr ah_attr; struct ib_ah *ah; struct ib_rmpp_mad *rmpp_mad; __be64 *tid; int ret, data_len, hdr_len, copy_offset, rmpp_active; if (count < hdr_size(file) + IB_MGMT_RMPP_HDR) return -EINVAL; packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); if (!packet) return -ENOMEM; if (copy_from_user(&packet->mad, buf, hdr_size(file))) { ret = -EFAULT; goto err; } if (packet->mad.hdr.id < 0 || packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { ret = -EINVAL; goto err; } buf += hdr_size(file); if (copy_from_user(packet->mad.data, buf, IB_MGMT_RMPP_HDR)) { ret = -EFAULT; goto err; } mutex_lock(&file->mutex); agent = __get_agent(file, packet->mad.hdr.id); if (!agent) { ret = -EINVAL; goto err_up; } memset(&ah_attr, 0, sizeof ah_attr); ah_attr.dlid = be16_to_cpu(packet->mad.hdr.lid); ah_attr.sl = packet->mad.hdr.sl; ah_attr.src_path_bits = packet->mad.hdr.path_bits; ah_attr.port_num = file->port->port_num; if (packet->mad.hdr.grh_present) { ah_attr.ah_flags = IB_AH_GRH; memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16); ah_attr.grh.sgid_index = packet->mad.hdr.gid_index; ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label); ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; } ah = ib_create_ah(agent->qp->pd, &ah_attr); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_up; } rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); if (!ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)) { copy_offset = IB_MGMT_MAD_HDR; rmpp_active = 0; } else { copy_offset = IB_MGMT_RMPP_HDR; rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE; } data_len = count - hdr_size(file) - hdr_len; packet->msg = ib_create_send_mad(agent, be32_to_cpu(packet->mad.hdr.qpn), packet->mad.hdr.pkey_index, rmpp_active, hdr_len, data_len, GFP_KERNEL); if (IS_ERR(packet->msg)) { ret = PTR_ERR(packet->msg); goto err_ah; } packet->msg->ah = ah; packet->msg->timeout_ms = packet->mad.hdr.timeout_ms; packet->msg->retries = packet->mad.hdr.retries; packet->msg->context[0] = packet; /* Copy MAD header. Any RMPP header is already in place. */ memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); if (!rmpp_active) { if (copy_from_user(packet->msg->mad + copy_offset, buf + copy_offset, hdr_len + data_len - copy_offset)) { ret = -EFAULT; goto err_msg; } } else { ret = copy_rmpp_mad(packet->msg, buf); if (ret) goto err_msg; } /* * Set the high-order part of the transaction ID to make MADs from * different agents unique, and allow routing responses back to the * original requestor. */ if (!ib_response_mad(packet->msg->mad)) { tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | (be64_to_cpup(tid) & 0xffffffff)); rmpp_mad->mad_hdr.tid = *tid; } spin_lock_irq(&file->send_lock); ret = is_duplicate(file, packet); if (!ret) list_add_tail(&packet->list, &file->send_list); spin_unlock_irq(&file->send_lock); if (ret) { ret = -EINVAL; goto err_msg; } ret = ib_post_send_mad(packet->msg, NULL); if (ret) goto err_send; mutex_unlock(&file->mutex); return count; err_send: dequeue_send(file, packet); err_msg: ib_free_send_mad(packet->msg); err_ah: ib_destroy_ah(ah); err_up: mutex_unlock(&file->mutex); err: kfree(packet); return ret; } static unsigned int ib_umad_poll(struct file *filp, struct poll_table_struct *wait) { struct ib_umad_file *file = filp->private_data; /* we will always be able to post a MAD send */ unsigned int mask = POLLOUT | POLLWRNORM; poll_wait(filp, &file->recv_wait, wait); if (!list_empty(&file->recv_list)) mask |= POLLIN | POLLRDNORM; return mask; } static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, int compat_method_mask) { struct ib_user_mad_reg_req ureq; struct ib_mad_reg_req req; struct ib_mad_agent *agent = NULL; int agent_id; int ret; mutex_lock(&file->port->file_mutex); mutex_lock(&file->mutex); if (!file->port->ib_dev) { ret = -EPIPE; goto out; } if (copy_from_user(&ureq, arg, sizeof ureq)) { ret = -EFAULT; goto out; } if (ureq.qpn != 0 && ureq.qpn != 1) { ret = -EINVAL; goto out; } for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id) if (!__get_agent(file, agent_id)) goto found; ret = -ENOMEM; goto out; found: if (ureq.mgmt_class) { req.mgmt_class = ureq.mgmt_class; req.mgmt_class_version = ureq.mgmt_class_version; memcpy(req.oui, ureq.oui, sizeof req.oui); if (compat_method_mask) { u32 *umm = (u32 *) ureq.method_mask; int i; for (i = 0; i < BITS_TO_LONGS(IB_MGMT_MAX_METHODS); ++i) req.method_mask[i] = umm[i * 2] | ((u64) umm[i * 2 + 1] << 32); } else memcpy(req.method_mask, ureq.method_mask, sizeof req.method_mask); } agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, ureq.mgmt_class ? &req : NULL, ureq.rmpp_version, send_handler, recv_handler, file); if (IS_ERR(agent)) { ret = PTR_ERR(agent); agent = NULL; goto out; } if (put_user(agent_id, (u32 __user *) (arg + offsetof(struct ib_user_mad_reg_req, id)))) { ret = -EFAULT; goto out; } if (!file->already_used) { file->already_used = 1; if (!file->use_pkey_index) { printk(KERN_WARNING "user_mad: process %s did not enable " "P_Key index support.\n", curproc->p_comm); printk(KERN_WARNING "user_mad: Documentation/infiniband/user_mad.txt " "has info on the new ABI.\n"); } } file->agent[agent_id] = agent; ret = 0; out: mutex_unlock(&file->mutex); if (ret && agent) ib_unregister_mad_agent(agent); mutex_unlock(&file->port->file_mutex); return ret; } static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) { struct ib_mad_agent *agent = NULL; u32 id; int ret = 0; if (get_user(id, arg)) return -EFAULT; mutex_lock(&file->port->file_mutex); mutex_lock(&file->mutex); if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { ret = -EINVAL; goto out; } agent = file->agent[id]; file->agent[id] = NULL; out: mutex_unlock(&file->mutex); if (agent) ib_unregister_mad_agent(agent); mutex_unlock(&file->port->file_mutex); return ret; } static long ib_umad_enable_pkey(struct ib_umad_file *file) { int ret = 0; mutex_lock(&file->mutex); if (file->already_used) ret = -EINVAL; else file->use_pkey_index = 1; mutex_unlock(&file->mutex); return ret; } static long ib_umad_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { case IB_USER_MAD_REGISTER_AGENT: return ib_umad_reg_agent(filp->private_data, (void __user *) arg, 0); case IB_USER_MAD_UNREGISTER_AGENT: return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg); case IB_USER_MAD_ENABLE_PKEY: return ib_umad_enable_pkey(filp->private_data); default: return -ENOIOCTLCMD; } } #ifdef CONFIG_COMPAT static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { case IB_USER_MAD_REGISTER_AGENT: return ib_umad_reg_agent(filp->private_data, compat_ptr(arg), 1); case IB_USER_MAD_UNREGISTER_AGENT: return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg)); case IB_USER_MAD_ENABLE_PKEY: return ib_umad_enable_pkey(filp->private_data); default: return -ENOIOCTLCMD; } } #endif /* * ib_umad_open() does not need the BKL: * * - umad_port[] accesses are protected by port_lock, the * ib_umad_port structures are properly reference counted, and * everything else is purely local to the file being created, so * races against other open calls are not a problem; * - the ioctl method does not affect any global state outside of the * file structure being operated on; * - the port is added to umad_port[] as the last part of module * initialization so the open method will either immediately run * -ENXIO, or all required initialization will be done. */ static int ib_umad_open(struct inode *inode, struct file *filp) { struct ib_umad_port *port; struct ib_umad_file *file; int ret = 0; spin_lock(&port_lock); port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE]; if (port) kref_get(&port->umad_dev->ref); spin_unlock(&port_lock); if (!port) return -ENXIO; mutex_lock(&port->file_mutex); if (!port->ib_dev) { ret = -ENXIO; goto out; } file = kzalloc(sizeof *file, GFP_KERNEL); if (!file) { kref_put(&port->umad_dev->ref, ib_umad_release_dev); ret = -ENOMEM; goto out; } mutex_init(&file->mutex); spin_lock_init(&file->send_lock); INIT_LIST_HEAD(&file->recv_list); INIT_LIST_HEAD(&file->send_list); init_waitqueue_head(&file->recv_wait); file->port = port; file->filp = filp; filp->private_data = file; list_add_tail(&file->port_list, &port->file_list); out: mutex_unlock(&port->file_mutex); return ret; } static int ib_umad_close(struct inode *inode, struct file *filp) { struct ib_umad_file *file = filp->private_data; struct ib_umad_device *dev = file->port->umad_dev; struct ib_umad_packet *packet, *tmp; int already_dead; int i; mutex_lock(&file->port->file_mutex); mutex_lock(&file->mutex); already_dead = file->agents_dead; file->agents_dead = 1; list_for_each_entry_safe(packet, tmp, &file->recv_list, list) { if (packet->recv_wc) ib_free_recv_mad(packet->recv_wc); kfree(packet); } list_del(&file->port_list); mutex_unlock(&file->mutex); if (!already_dead) for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i) if (file->agent[i]) ib_unregister_mad_agent(file->agent[i]); mutex_unlock(&file->port->file_mutex); kfree(file); kref_put(&dev->ref, ib_umad_release_dev); return 0; } static const struct file_operations umad_fops = { .owner = THIS_MODULE, .read = ib_umad_read, .write = ib_umad_write, .poll = ib_umad_poll, .unlocked_ioctl = ib_umad_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ib_umad_compat_ioctl, #endif .open = ib_umad_open, .release = ib_umad_close }; static int ib_umad_sm_open(struct inode *inode, struct file *filp) { struct ib_umad_port *port; struct ib_port_modify props = { .set_port_cap_mask = IB_PORT_SM }; int ret; spin_lock(&port_lock); port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS]; if (port) kref_get(&port->umad_dev->ref); spin_unlock(&port_lock); if (!port) return -ENXIO; if (filp->f_flags & O_NONBLOCK) { if (down_trylock(&port->sm_sem)) { ret = -EAGAIN; goto fail; } } else { if (down_interruptible(&port->sm_sem)) { ret = -ERESTARTSYS; goto fail; } } ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); if (ret) { up(&port->sm_sem); goto fail; } filp->private_data = port; return 0; fail: kref_put(&port->umad_dev->ref, ib_umad_release_dev); return ret; } static int ib_umad_sm_close(struct inode *inode, struct file *filp) { struct ib_umad_port *port = filp->private_data; struct ib_port_modify props = { .clr_port_cap_mask = IB_PORT_SM }; int ret = 0; mutex_lock(&port->file_mutex); if (port->ib_dev) ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); mutex_unlock(&port->file_mutex); up(&port->sm_sem); kref_put(&port->umad_dev->ref, ib_umad_release_dev); return ret; } static const struct file_operations umad_sm_fops = { .owner = THIS_MODULE, .open = ib_umad_sm_open, .release = ib_umad_sm_close }; static struct ib_client umad_client = { .name = "umad", .add = ib_umad_add_one, .remove = ib_umad_remove_one }; static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, char *buf) { struct ib_umad_port *port = dev_get_drvdata(dev); if (!port) return -ENODEV; return sprintf(buf, "%s\n", port->ib_dev->name); } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static ssize_t show_port(struct device *dev, struct device_attribute *attr, char *buf) { struct ib_umad_port *port = dev_get_drvdata(dev); if (!port) return -ENODEV; return sprintf(buf, "%d\n", port->port_num); } static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); -static ssize_t show_abi_version(struct class *class, char *buf) +static ssize_t show_abi_version(struct class *class, struct class_attribute *attr, char *buf) { return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION); } static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); static int ib_umad_init_port(struct ib_device *device, int port_num, struct ib_umad_port *port) { spin_lock(&port_lock); port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); if (port->dev_num >= IB_UMAD_MAX_PORTS) { spin_unlock(&port_lock); return -1; } set_bit(port->dev_num, dev_map); spin_unlock(&port_lock); port->ib_dev = device; port->port_num = port_num; init_MUTEX(&port->sm_sem); mutex_init(&port->file_mutex); INIT_LIST_HEAD(&port->file_list); port->cdev = cdev_alloc(); if (!port->cdev) return -1; port->cdev->owner = THIS_MODULE; port->cdev->ops = &umad_fops; kobject_set_name(&port->cdev->kobj, "umad%d", port->dev_num); if (cdev_add(port->cdev, base_dev + port->dev_num, 1)) goto err_cdev; port->dev = device_create(umad_class, device->dma_device, port->cdev->dev, port, "umad%d", port->dev_num); if (IS_ERR(port->dev)) goto err_cdev; if (device_create_file(port->dev, &dev_attr_ibdev)) goto err_dev; if (device_create_file(port->dev, &dev_attr_port)) goto err_dev; port->sm_cdev = cdev_alloc(); if (!port->sm_cdev) goto err_dev; port->sm_cdev->owner = THIS_MODULE; port->sm_cdev->ops = &umad_sm_fops; kobject_set_name(&port->sm_cdev->kobj, "issm%d", port->dev_num); if (cdev_add(port->sm_cdev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1)) goto err_sm_cdev; port->sm_dev = device_create(umad_class, device->dma_device, port->sm_cdev->dev, port, "issm%d", port->dev_num); if (IS_ERR(port->sm_dev)) goto err_sm_cdev; if (device_create_file(port->sm_dev, &dev_attr_ibdev)) goto err_sm_dev; if (device_create_file(port->sm_dev, &dev_attr_port)) goto err_sm_dev; spin_lock(&port_lock); umad_port[port->dev_num] = port; spin_unlock(&port_lock); return 0; err_sm_dev: device_destroy(umad_class, port->sm_cdev->dev); err_sm_cdev: cdev_del(port->sm_cdev); err_dev: device_destroy(umad_class, port->cdev->dev); err_cdev: cdev_del(port->cdev); clear_bit(port->dev_num, dev_map); return -1; } static void ib_umad_kill_port(struct ib_umad_port *port) { struct ib_umad_file *file; int already_dead; int id; dev_set_drvdata(port->dev, NULL); dev_set_drvdata(port->sm_dev, NULL); device_destroy(umad_class, port->cdev->dev); device_destroy(umad_class, port->sm_cdev->dev); cdev_del(port->cdev); cdev_del(port->sm_cdev); spin_lock(&port_lock); umad_port[port->dev_num] = NULL; spin_unlock(&port_lock); mutex_lock(&port->file_mutex); port->ib_dev = NULL; list_for_each_entry(file, &port->file_list, port_list) { mutex_lock(&file->mutex); already_dead = file->agents_dead; file->agents_dead = 1; mutex_unlock(&file->mutex); for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) if (file->agent[id]) ib_unregister_mad_agent(file->agent[id]); } mutex_unlock(&port->file_mutex); clear_bit(port->dev_num, dev_map); } static void ib_umad_add_one(struct ib_device *device) { struct ib_umad_device *umad_dev; int s, e, i; if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; if (device->node_type == RDMA_NODE_IB_SWITCH) s = e = 0; else { s = 1; e = device->phys_port_cnt; } umad_dev = kzalloc(sizeof *umad_dev + (e - s + 1) * sizeof (struct ib_umad_port), GFP_KERNEL); if (!umad_dev) return; kref_init(&umad_dev->ref); umad_dev->start_port = s; umad_dev->end_port = e; for (i = s; i <= e; ++i) { umad_dev->port[i - s].umad_dev = umad_dev; if (rdma_port_get_link_layer(device, i) == IB_LINK_LAYER_INFINIBAND) if (ib_umad_init_port(device, i, &umad_dev->port[i - s])) goto err; } ib_set_client_data(device, &umad_client, umad_dev); return; err: while (--i >= s) if (rdma_port_get_link_layer(device, i) == IB_LINK_LAYER_INFINIBAND) ib_umad_kill_port(&umad_dev->port[i - s]); kref_put(&umad_dev->ref, ib_umad_release_dev); } static void ib_umad_remove_one(struct ib_device *device) { struct ib_umad_device *umad_dev = ib_get_client_data(device, &umad_client); int i; if (!umad_dev) return; for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) ib_umad_kill_port(&umad_dev->port[i]); kref_put(&umad_dev->ref, ib_umad_release_dev); } static int __init ib_umad_init(void) { int ret; ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2, "infiniband_mad"); if (ret) { printk(KERN_ERR "user_mad: couldn't register device number\n"); goto out; } umad_class = class_create(THIS_MODULE, "infiniband_mad"); if (IS_ERR(umad_class)) { ret = PTR_ERR(umad_class); printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n"); goto out_chrdev; } ret = class_create_file(umad_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n"); goto out_class; } ret = ib_register_client(&umad_client); if (ret) { printk(KERN_ERR "user_mad: couldn't register ib_umad client\n"); goto out_class; } return 0; out_class: class_destroy(umad_class); out_chrdev: unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); out: return ret; } static void __exit ib_umad_cleanup(void) { ib_unregister_client(&umad_client); class_destroy(umad_class); unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); } module_init(ib_umad_init); module_exit(ib_umad_cleanup); Index: stable/10/sys/ofed/drivers/infiniband/core/uverbs_cmd.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/core/uverbs_cmd.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/core/uverbs_cmd.c (revision 271127) @@ -1,3022 +1,3023 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. * Copyright (c) 2006 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include +#include #include #include #include "uverbs.h" static struct lock_class_key pd_lock_key; static struct lock_class_key mr_lock_key; static struct lock_class_key cq_lock_key; static struct lock_class_key qp_lock_key; static struct lock_class_key ah_lock_key; static struct lock_class_key srq_lock_key; #define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ do { \ (udata)->inbuf = (void __user *) (ibuf); \ (udata)->outbuf = (void __user *) (obuf); \ (udata)->inlen = (ilen); \ (udata)->outlen = (olen); \ } while (0) /* * The ib_uobject locking scheme is as follows: * * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it * needs to be held during all idr operations. When an object is * looked up, a reference must be taken on the object's kref before * dropping this lock. * * - Each object also has an rwsem. This rwsem must be held for * reading while an operation that uses the object is performed. * For example, while registering an MR, the associated PD's * uobject.mutex must be held for reading. The rwsem must be held * for writing while initializing or destroying an object. * * - In addition, each object has a "live" flag. If this flag is not * set, then lookups of the object will fail even if it is found in * the idr. This handles a reader that blocks and does not acquire * the rwsem until after the object is destroyed. The destroy * operation will set the live flag to 0 and then drop the rwsem; * this will allow the reader to acquire the rwsem, see that the * live flag is 0, and then drop the rwsem and its reference to * object. The underlying storage will not be freed until the last * reference to the object is dropped. */ static void init_uobj(struct ib_uobject *uobj, u64 user_handle, struct ib_ucontext *context, struct lock_class_key *key) { uobj->user_handle = user_handle; uobj->context = context; kref_init(&uobj->ref); init_rwsem(&uobj->mutex); lockdep_set_class(&uobj->mutex, key); uobj->live = 0; } static void release_uobj(struct kref *kref) { kfree(container_of(kref, struct ib_uobject, ref)); } static void put_uobj(struct ib_uobject *uobj) { kref_put(&uobj->ref, release_uobj); } static void put_uobj_read(struct ib_uobject *uobj) { up_read(&uobj->mutex); put_uobj(uobj); } static void put_uobj_write(struct ib_uobject *uobj) { up_write(&uobj->mutex); put_uobj(uobj); } static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj) { int ret; retry: if (!idr_pre_get(idr, GFP_KERNEL)) return -ENOMEM; spin_lock(&ib_uverbs_idr_lock); ret = idr_get_new(idr, uobj, &uobj->id); spin_unlock(&ib_uverbs_idr_lock); if (ret == -EAGAIN) goto retry; return ret; } void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj) { spin_lock(&ib_uverbs_idr_lock); idr_remove(idr, uobj->id); spin_unlock(&ib_uverbs_idr_lock); } static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, struct ib_ucontext *context) { struct ib_uobject *uobj; spin_lock(&ib_uverbs_idr_lock); uobj = idr_find(idr, id); if (uobj) { if (uobj->context == context) kref_get(&uobj->ref); else uobj = NULL; } spin_unlock(&ib_uverbs_idr_lock); return uobj; } static struct ib_uobject *idr_read_uobj(struct idr *idr, int id, struct ib_ucontext *context, int nested) { struct ib_uobject *uobj; uobj = __idr_get_uobj(idr, id, context); if (!uobj) return NULL; if (nested) down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING); else down_read(&uobj->mutex); if (!uobj->live) { put_uobj_read(uobj); return NULL; } return uobj; } static struct ib_uobject *idr_write_uobj(struct idr *idr, int id, struct ib_ucontext *context) { struct ib_uobject *uobj; uobj = __idr_get_uobj(idr, id, context); if (!uobj) return NULL; down_write(&uobj->mutex); if (!uobj->live) { put_uobj_write(uobj); return NULL; } return uobj; } static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context, int nested) { struct ib_uobject *uobj; uobj = idr_read_uobj(idr, id, context, nested); return uobj ? uobj->object : NULL; } static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context) { return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0); } static void put_pd_read(struct ib_pd *pd) { put_uobj_read(pd->uobject); } static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested) { return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested); } static void put_cq_read(struct ib_cq *cq) { put_uobj_read(cq->uobject); } static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context) { return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0); } static void put_ah_read(struct ib_ah *ah) { put_uobj_read(ah->uobject); } static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) { return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); } static void put_qp_read(struct ib_qp *qp) { put_uobj_read(qp->uobject); } static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) { return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0); } static void put_srq_read(struct ib_srq *srq) { put_uobj_read(srq->uobject); } static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context, struct ib_uobject **uobj) { *uobj = idr_read_uobj(&ib_uverbs_xrc_domain_idr, xrcd_handle, context, 0); return *uobj ? (*uobj)->object : NULL; } static void put_xrcd_read(struct ib_uobject *uobj) { put_uobj_read(uobj); } ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_get_context cmd; struct ib_uverbs_get_context_resp resp; struct ib_udata udata; struct ib_device *ibdev = file->device->ib_dev; struct ib_ucontext *ucontext; struct file *filp; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; mutex_lock(&file->mutex); if (file->ucontext) { ret = -EINVAL; goto err; } INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); ucontext = ibdev->alloc_ucontext(ibdev, &udata); if (IS_ERR(ucontext)) { ret = PTR_ERR(file->ucontext); goto err; } ucontext->device = ibdev; INIT_LIST_HEAD(&ucontext->pd_list); INIT_LIST_HEAD(&ucontext->mr_list); INIT_LIST_HEAD(&ucontext->mw_list); INIT_LIST_HEAD(&ucontext->cq_list); INIT_LIST_HEAD(&ucontext->qp_list); INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); INIT_LIST_HEAD(&ucontext->xrcd_list); ucontext->closing = 0; resp.num_comp_vectors = file->device->num_comp_vectors; filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd); if (IS_ERR(filp)) { ret = PTR_ERR(filp); goto err_free; } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; goto err_file; } file->async_file = filp->private_data; INIT_IB_EVENT_HANDLER(&file->event_handler, file->device->ib_dev, ib_uverbs_event_handler); ret = ib_register_event_handler(&file->event_handler); if (ret) goto err_file; kref_get(&file->async_file->ref); kref_get(&file->ref); file->ucontext = ucontext; fd_install(resp.async_fd, filp); mutex_unlock(&file->mutex); return in_len; err_file: put_unused_fd(resp.async_fd); fput(filp); err_free: ibdev->dealloc_ucontext(ucontext); err: mutex_unlock(&file->mutex); return ret; } ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_query_device cmd; struct ib_uverbs_query_device_resp resp; struct ib_device_attr attr; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; ret = ib_query_device(file->device->ib_dev, &attr); if (ret) return ret; memset(&resp, 0, sizeof resp); resp.fw_ver = attr.fw_ver; resp.node_guid = file->device->ib_dev->node_guid; resp.sys_image_guid = attr.sys_image_guid; resp.max_mr_size = attr.max_mr_size; resp.page_size_cap = attr.page_size_cap; resp.vendor_id = attr.vendor_id; resp.vendor_part_id = attr.vendor_part_id; resp.hw_ver = attr.hw_ver; resp.max_qp = attr.max_qp; resp.max_qp_wr = attr.max_qp_wr; resp.device_cap_flags = attr.device_cap_flags; resp.max_sge = attr.max_sge; resp.max_sge_rd = attr.max_sge_rd; resp.max_cq = attr.max_cq; resp.max_cqe = attr.max_cqe; resp.max_mr = attr.max_mr; resp.max_pd = attr.max_pd; resp.max_qp_rd_atom = attr.max_qp_rd_atom; resp.max_ee_rd_atom = attr.max_ee_rd_atom; resp.max_res_rd_atom = attr.max_res_rd_atom; resp.max_qp_init_rd_atom = attr.max_qp_init_rd_atom; resp.max_ee_init_rd_atom = attr.max_ee_init_rd_atom; resp.atomic_cap = attr.atomic_cap; resp.max_ee = attr.max_ee; resp.max_rdd = attr.max_rdd; resp.max_mw = attr.max_mw; resp.max_raw_ipv6_qp = attr.max_raw_ipv6_qp; resp.max_raw_ethy_qp = attr.max_raw_ethy_qp; resp.max_mcast_grp = attr.max_mcast_grp; resp.max_mcast_qp_attach = attr.max_mcast_qp_attach; resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach; resp.max_ah = attr.max_ah; resp.max_fmr = attr.max_fmr; resp.max_map_per_fmr = attr.max_map_per_fmr; resp.max_srq = attr.max_srq; resp.max_srq_wr = attr.max_srq_wr; resp.max_srq_sge = attr.max_srq_sge; resp.max_pkeys = attr.max_pkeys; resp.local_ca_ack_delay = attr.local_ca_ack_delay; resp.phys_port_cnt = file->device->ib_dev->phys_port_cnt; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) return -EFAULT; return in_len; } ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_query_port cmd; struct ib_uverbs_query_port_resp resp; struct ib_port_attr attr; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; ret = ib_query_port(file->device->ib_dev, cmd.port_num, &attr); if (ret) return ret; memset(&resp, 0, sizeof resp); resp.state = attr.state; resp.max_mtu = attr.max_mtu; resp.active_mtu = attr.active_mtu; resp.gid_tbl_len = attr.gid_tbl_len; resp.port_cap_flags = attr.port_cap_flags; resp.max_msg_sz = attr.max_msg_sz; resp.bad_pkey_cntr = attr.bad_pkey_cntr; resp.qkey_viol_cntr = attr.qkey_viol_cntr; resp.pkey_tbl_len = attr.pkey_tbl_len; resp.lid = attr.lid; resp.sm_lid = attr.sm_lid; resp.lmc = attr.lmc; resp.max_vl_num = attr.max_vl_num; resp.sm_sl = attr.sm_sl; resp.subnet_timeout = attr.subnet_timeout; resp.init_type_reply = attr.init_type_reply; resp.active_width = attr.active_width; resp.active_speed = attr.active_speed; resp.phys_state = attr.phys_state; resp.link_layer = attr.link_layer; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) return -EFAULT; return in_len; } ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_alloc_pd cmd; struct ib_uverbs_alloc_pd_resp resp; struct ib_udata udata; struct ib_uobject *uobj; struct ib_pd *pd; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; init_uobj(uobj, 0, file->ucontext, &pd_lock_key); down_write(&uobj->mutex); pd = file->device->ib_dev->alloc_pd(file->device->ib_dev, file->ucontext, &udata); if (IS_ERR(pd)) { ret = PTR_ERR(pd); goto err; } pd->device = file->device->ib_dev; pd->uobject = uobj; atomic_set(&pd->usecnt, 0); uobj->object = pd; ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj); if (ret) goto err_idr; memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } mutex_lock(&file->mutex); list_add_tail(&uobj->list, &file->ucontext->pd_list); mutex_unlock(&file->mutex); uobj->live = 1; up_write(&uobj->mutex); return in_len; err_copy: idr_remove_uobj(&ib_uverbs_pd_idr, uobj); err_idr: ib_dealloc_pd(pd); err: put_uobj_write(uobj); return ret; } ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_dealloc_pd cmd; struct ib_uobject *uobj; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext); if (!uobj) return -EINVAL; ret = ib_dealloc_pd(uobj->object); if (!ret) uobj->live = 0; put_uobj_write(uobj); if (ret) return ret; idr_remove_uobj(&ib_uverbs_pd_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); put_uobj(uobj); return in_len; } ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_reg_mr cmd; struct ib_uverbs_reg_mr_resp resp; struct ib_udata udata; struct ib_uobject *uobj; struct ib_pd *pd; struct ib_mr *mr; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; /* * Local write permission is required if remote write or * remote atomic permission is also requested. */ if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) && !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) return -EINVAL; uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; init_uobj(uobj, 0, file->ucontext, &mr_lock_key); down_write(&uobj->mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err_free; } mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, cmd.access_flags, &udata, 0); if (IS_ERR(mr)) { ret = PTR_ERR(mr); goto err_put; } mr->device = pd->device; mr->pd = pd; mr->uobject = uobj; atomic_inc(&pd->usecnt); atomic_set(&mr->usecnt, 0); uobj->object = mr; ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj); if (ret) goto err_unreg; memset(&resp, 0, sizeof resp); resp.lkey = mr->lkey; resp.rkey = mr->rkey; resp.mr_handle = uobj->id; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } put_pd_read(pd); mutex_lock(&file->mutex); list_add_tail(&uobj->list, &file->ucontext->mr_list); mutex_unlock(&file->mutex); uobj->live = 1; up_write(&uobj->mutex); return in_len; err_copy: idr_remove_uobj(&ib_uverbs_mr_idr, uobj); err_unreg: ib_dereg_mr(mr); err_put: put_pd_read(pd); err_free: put_uobj_write(uobj); return ret; } ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_dereg_mr cmd; struct ib_mr *mr; struct ib_uobject *uobj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext); if (!uobj) return -EINVAL; mr = uobj->object; ret = ib_dereg_mr(mr); if (!ret) uobj->live = 0; put_uobj_write(uobj); if (ret) return ret; idr_remove_uobj(&ib_uverbs_mr_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); put_uobj(uobj); return in_len; } ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_create_comp_channel cmd; struct ib_uverbs_create_comp_channel_resp resp; struct file *filp; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd); if (IS_ERR(filp)) return PTR_ERR(filp); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { put_unused_fd(resp.fd); fput(filp); return -EFAULT; } fd_install(resp.fd, filp); return in_len; } ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_create_cq cmd; struct ib_uverbs_create_cq_resp resp; struct ib_udata udata; struct ib_ucq_object *obj; struct ib_uverbs_event_file *ev_file = NULL; struct ib_cq *cq; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); if (cmd.comp_vector >= file->device->num_comp_vectors) return -EINVAL; obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_key); down_write(&obj->uobject.mutex); if (cmd.comp_channel >= 0) { ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); if (!ev_file) { ret = -EINVAL; goto err; } } obj->uverbs_file = file; obj->comp_events_reported = 0; obj->async_events_reported = 0; INIT_LIST_HEAD(&obj->comp_list); INIT_LIST_HEAD(&obj->async_list); cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe, cmd.comp_vector, file->ucontext, &udata); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_file; } cq->device = file->device->ib_dev; cq->uobject = &obj->uobject; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = ev_file; atomic_set(&cq->usecnt, 0); obj->uobject.object = cq; ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject); if (ret) goto err_free; memset(&resp, 0, sizeof resp); resp.cq_handle = obj->uobject.id; resp.cqe = cq->cqe; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } mutex_lock(&file->mutex); list_add_tail(&obj->uobject.list, &file->ucontext->cq_list); mutex_unlock(&file->mutex); obj->uobject.live = 1; up_write(&obj->uobject.mutex); return in_len; err_copy: idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject); err_free: ib_destroy_cq(cq); err_file: if (ev_file) ib_uverbs_release_ucq(file, ev_file, obj); err: put_uobj_write(&obj->uobject); return ret; } ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_resize_cq cmd; struct ib_uverbs_resize_cq_resp resp; struct ib_udata udata; struct ib_cq *cq; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); if (!cq) return -EINVAL; ret = cq->device->resize_cq(cq, cmd.cqe, &udata); if (ret) goto out; resp.cqe = cq->cqe; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp.cqe)) ret = -EFAULT; out: put_cq_read(cq); return ret ? ret : in_len; } ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_poll_cq cmd; struct ib_uverbs_poll_cq_resp *resp; struct ib_cq *cq; struct ib_wc *wc; int ret = 0; int i; int rsize; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL); if (!wc) return -ENOMEM; rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc); resp = kmalloc(rsize, GFP_KERNEL); if (!resp) { ret = -ENOMEM; goto out_wc; } cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); if (!cq) { ret = -EINVAL; goto out; } resp->count = ib_poll_cq(cq, cmd.ne, wc); put_cq_read(cq); for (i = 0; i < resp->count; i++) { resp->wc[i].wr_id = wc[i].wr_id; resp->wc[i].status = wc[i].status; resp->wc[i].opcode = wc[i].opcode; resp->wc[i].vendor_err = wc[i].vendor_err; resp->wc[i].byte_len = wc[i].byte_len; resp->wc[i].ex.imm_data = (__u32 __force) wc[i].ex.imm_data; resp->wc[i].qp_num = wc[i].qp->qp_num; resp->wc[i].src_qp = wc[i].src_qp; resp->wc[i].wc_flags = wc[i].wc_flags; resp->wc[i].pkey_index = wc[i].pkey_index; resp->wc[i].slid = wc[i].slid; resp->wc[i].sl = wc[i].sl; resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits; resp->wc[i].port_num = wc[i].port_num; } if (copy_to_user((void __user *) (unsigned long) cmd.response, resp, rsize)) ret = -EFAULT; out: kfree(resp); out_wc: kfree(wc); return ret ? ret : in_len; } ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_req_notify_cq cmd; struct ib_cq *cq; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); if (!cq) return -EINVAL; ib_req_notify_cq(cq, cmd.solicited_only ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); put_cq_read(cq); return in_len; } ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_cq cmd; struct ib_uverbs_destroy_cq_resp resp; struct ib_uobject *uobj; struct ib_cq *cq; struct ib_ucq_object *obj; struct ib_uverbs_event_file *ev_file; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); if (!uobj) return -EINVAL; cq = uobj->object; ev_file = cq->cq_context; obj = container_of(cq->uobject, struct ib_ucq_object, uobject); ret = ib_destroy_cq(cq); if (!ret) uobj->live = 0; put_uobj_write(uobj); if (ret) return ret; idr_remove_uobj(&ib_uverbs_cq_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); ib_uverbs_release_ucq(file, ev_file, obj); memset(&resp, 0, sizeof resp); resp.comp_events_reported = obj->comp_events_reported; resp.async_events_reported = obj->async_events_reported; put_uobj(uobj); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) return -EFAULT; return in_len; } ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_create_qp cmd; struct ib_uverbs_create_qp_resp resp; struct ib_udata udata; struct ib_uqp_object *obj; struct ib_pd *pd; struct ib_cq *scq, *rcq; struct ib_srq *srq; struct ib_qp *qp; struct ib_qp_init_attr attr; struct ib_xrcd *xrcd; struct ib_uobject *xrcd_uobj; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); down_write(&obj->uevent.uobject.mutex); srq = (cmd.is_srq && cmd.qp_type != IB_QPT_XRC) ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL; xrcd = cmd.qp_type == IB_QPT_XRC ? idr_read_xrcd(cmd.srq_handle, file->ucontext, &xrcd_uobj) : NULL; pd = idr_read_pd(cmd.pd_handle, file->ucontext); scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0); rcq = cmd.recv_cq_handle == cmd.send_cq_handle ? scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1); if (!pd || !scq || !rcq || (cmd.is_srq && !srq) || (cmd.qp_type == IB_QPT_XRC && !xrcd)) { ret = -EINVAL; goto err_put; } attr.create_flags = 0; attr.event_handler = ib_uverbs_qp_event_handler; attr.qp_context = file; attr.send_cq = scq; attr.recv_cq = rcq; attr.srq = srq; attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; attr.qp_type = cmd.qp_type; attr.xrcd = xrcd; attr.create_flags = 0; attr.cap.max_send_wr = cmd.max_send_wr; attr.cap.max_recv_wr = cmd.max_recv_wr; attr.cap.max_send_sge = cmd.max_send_sge; attr.cap.max_recv_sge = cmd.max_recv_sge; attr.cap.max_inline_data = cmd.max_inline_data; obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); INIT_LIST_HEAD(&obj->mcast_list); qp = pd->device->create_qp(pd, &attr, &udata); if (IS_ERR(qp)) { ret = PTR_ERR(qp); goto err_put; } qp->device = pd->device; qp->pd = pd; qp->send_cq = attr.send_cq; qp->recv_cq = attr.recv_cq; qp->srq = attr.srq; qp->uobject = &obj->uevent.uobject; qp->event_handler = attr.event_handler; qp->qp_context = attr.qp_context; qp->qp_type = attr.qp_type; qp->xrcd = attr.xrcd; atomic_inc(&pd->usecnt); atomic_inc(&attr.send_cq->usecnt); atomic_inc(&attr.recv_cq->usecnt); if (attr.srq) atomic_inc(&attr.srq->usecnt); else if (attr.xrcd) atomic_inc(&attr.xrcd->usecnt); obj->uevent.uobject.object = qp; ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); if (ret) goto err_destroy; memset(&resp, 0, sizeof resp); resp.qpn = qp->qp_num; resp.qp_handle = obj->uevent.uobject.id; resp.max_recv_sge = attr.cap.max_recv_sge; resp.max_send_sge = attr.cap.max_send_sge; resp.max_recv_wr = attr.cap.max_recv_wr; resp.max_send_wr = attr.cap.max_send_wr; resp.max_inline_data = attr.cap.max_inline_data; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } put_pd_read(pd); put_cq_read(scq); if (rcq != scq) put_cq_read(rcq); if (srq) put_srq_read(srq); if (xrcd) put_xrcd_read(xrcd_uobj); mutex_lock(&file->mutex); list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); mutex_unlock(&file->mutex); obj->uevent.uobject.live = 1; up_write(&obj->uevent.uobject.mutex); return in_len; err_copy: idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); err_destroy: ib_destroy_qp(qp); err_put: if (pd) put_pd_read(pd); if (scq) put_cq_read(scq); if (rcq && rcq != scq) put_cq_read(rcq); if (srq) put_srq_read(srq); if (xrcd) put_xrcd_read(xrcd_uobj); put_uobj_write(&obj->uevent.uobject); return ret; } ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_query_qp cmd; struct ib_uverbs_query_qp_resp resp; struct ib_qp *qp; struct ib_qp_attr *attr; struct ib_qp_init_attr *init_attr; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; attr = kmalloc(sizeof *attr, GFP_KERNEL); init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL); if (!attr || !init_attr) { ret = -ENOMEM; goto out; } qp = idr_read_qp(cmd.qp_handle, file->ucontext); if (!qp) { ret = -EINVAL; goto out; } ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr); put_qp_read(qp); if (ret) goto out; memset(&resp, 0, sizeof resp); resp.qp_state = attr->qp_state; resp.cur_qp_state = attr->cur_qp_state; resp.path_mtu = attr->path_mtu; resp.path_mig_state = attr->path_mig_state; resp.qkey = attr->qkey; resp.rq_psn = attr->rq_psn; resp.sq_psn = attr->sq_psn; resp.dest_qp_num = attr->dest_qp_num; resp.qp_access_flags = attr->qp_access_flags; resp.pkey_index = attr->pkey_index; resp.alt_pkey_index = attr->alt_pkey_index; resp.sq_draining = attr->sq_draining; resp.max_rd_atomic = attr->max_rd_atomic; resp.max_dest_rd_atomic = attr->max_dest_rd_atomic; resp.min_rnr_timer = attr->min_rnr_timer; resp.port_num = attr->port_num; resp.timeout = attr->timeout; resp.retry_cnt = attr->retry_cnt; resp.rnr_retry = attr->rnr_retry; resp.alt_port_num = attr->alt_port_num; resp.alt_timeout = attr->alt_timeout; memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16); resp.dest.flow_label = attr->ah_attr.grh.flow_label; resp.dest.sgid_index = attr->ah_attr.grh.sgid_index; resp.dest.hop_limit = attr->ah_attr.grh.hop_limit; resp.dest.traffic_class = attr->ah_attr.grh.traffic_class; resp.dest.dlid = attr->ah_attr.dlid; resp.dest.sl = attr->ah_attr.sl; resp.dest.src_path_bits = attr->ah_attr.src_path_bits; resp.dest.static_rate = attr->ah_attr.static_rate; resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH); resp.dest.port_num = attr->ah_attr.port_num; memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16); resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label; resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index; resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit; resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class; resp.alt_dest.dlid = attr->alt_ah_attr.dlid; resp.alt_dest.sl = attr->alt_ah_attr.sl; resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits; resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate; resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH); resp.alt_dest.port_num = attr->alt_ah_attr.port_num; resp.max_send_wr = init_attr->cap.max_send_wr; resp.max_recv_wr = init_attr->cap.max_recv_wr; resp.max_send_sge = init_attr->cap.max_send_sge; resp.max_recv_sge = init_attr->cap.max_recv_sge; resp.max_inline_data = init_attr->cap.max_inline_data; resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; out: kfree(attr); kfree(init_attr); return ret ? ret : in_len; } ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_modify_qp cmd; struct ib_udata udata; struct ib_qp *qp; struct ib_qp_attr *attr; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, out_len); attr = kmalloc(sizeof *attr, GFP_KERNEL); if (!attr) return -ENOMEM; qp = idr_read_qp(cmd.qp_handle, file->ucontext); if (!qp) { ret = -EINVAL; goto out; } attr->qp_state = cmd.qp_state; attr->cur_qp_state = cmd.cur_qp_state; attr->path_mtu = cmd.path_mtu; attr->path_mig_state = cmd.path_mig_state; attr->qkey = cmd.qkey; attr->rq_psn = cmd.rq_psn; attr->sq_psn = cmd.sq_psn; attr->dest_qp_num = cmd.dest_qp_num; attr->qp_access_flags = cmd.qp_access_flags; attr->pkey_index = cmd.pkey_index; attr->alt_pkey_index = cmd.alt_pkey_index; attr->en_sqd_async_notify = cmd.en_sqd_async_notify; attr->max_rd_atomic = cmd.max_rd_atomic; attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; attr->min_rnr_timer = cmd.min_rnr_timer; attr->port_num = cmd.port_num; attr->timeout = cmd.timeout; attr->retry_cnt = cmd.retry_cnt; attr->rnr_retry = cmd.rnr_retry; attr->alt_port_num = cmd.alt_port_num; attr->alt_timeout = cmd.alt_timeout; memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); attr->ah_attr.grh.flow_label = cmd.dest.flow_label; attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; attr->ah_attr.dlid = cmd.dest.dlid; attr->ah_attr.sl = cmd.dest.sl; attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; attr->ah_attr.static_rate = cmd.dest.static_rate; attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; attr->ah_attr.port_num = cmd.dest.port_num; memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; attr->alt_ah_attr.sl = cmd.alt_dest.sl; attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata); put_qp_read(qp); if (ret) goto out; ret = in_len; out: kfree(attr); return ret; } ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_qp cmd; struct ib_uverbs_destroy_qp_resp resp; struct ib_uobject *uobj; struct ib_qp *qp; struct ib_uqp_object *obj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; memset(&resp, 0, sizeof resp); uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext); if (!uobj) return -EINVAL; qp = uobj->object; obj = container_of(uobj, struct ib_uqp_object, uevent.uobject); if (!list_empty(&obj->mcast_list)) { put_uobj_write(uobj); return -EBUSY; } ret = ib_destroy_qp(qp); if (!ret) uobj->live = 0; put_uobj_write(uobj); if (ret) return ret; idr_remove_uobj(&ib_uverbs_qp_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); ib_uverbs_release_uevent(file, &obj->uevent); resp.events_reported = obj->uevent.events_reported; put_uobj(uobj); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) return -EFAULT; return in_len; } ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_post_send cmd; struct ib_uverbs_post_send_resp resp; struct ib_uverbs_send_wr *user_wr; struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; struct ib_qp *qp; int i, sg_ind; int is_ud; ssize_t ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count + cmd.sge_count * sizeof (struct ib_uverbs_sge)) return -EINVAL; if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr)) return -EINVAL; user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL); if (!user_wr) return -ENOMEM; qp = idr_read_qp(cmd.qp_handle, file->ucontext); if (!qp) goto out; is_ud = qp->qp_type == IB_QPT_UD; sg_ind = 0; last = NULL; for (i = 0; i < cmd.wr_count; ++i) { if (copy_from_user(user_wr, buf + sizeof cmd + i * cmd.wqe_size, cmd.wqe_size)) { ret = -EFAULT; goto out_put; } if (user_wr->num_sge + sg_ind > cmd.sge_count) { ret = -EINVAL; goto out_put; } next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + user_wr->num_sge * sizeof (struct ib_sge), GFP_KERNEL); if (!next) { ret = -ENOMEM; goto out_put; } if (!last) wr = next; else last->next = next; last = next; next->next = NULL; next->wr_id = user_wr->wr_id; next->num_sge = user_wr->num_sge; next->opcode = user_wr->opcode; next->send_flags = user_wr->send_flags; if (is_ud) { next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext); if (!next->wr.ud.ah) { ret = -EINVAL; goto out_put; } next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; } else { switch (next->opcode) { case IB_WR_RDMA_WRITE_WITH_IMM: next->ex.imm_data = (__be32 __force) user_wr->ex.imm_data; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_READ: next->wr.rdma.remote_addr = user_wr->wr.rdma.remote_addr; next->wr.rdma.rkey = user_wr->wr.rdma.rkey; break; case IB_WR_SEND_WITH_IMM: next->ex.imm_data = (__be32 __force) user_wr->ex.imm_data; break; case IB_WR_SEND_WITH_INV: next->ex.invalidate_rkey = user_wr->ex.invalidate_rkey; break; case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: next->wr.atomic.remote_addr = user_wr->wr.atomic.remote_addr; next->wr.atomic.compare_add = user_wr->wr.atomic.compare_add; next->wr.atomic.swap = user_wr->wr.atomic.swap; next->wr.atomic.rkey = user_wr->wr.atomic.rkey; break; default: break; } } if (next->num_sge) { next->sg_list = (void *) next + ALIGN(sizeof *next, sizeof (struct ib_sge)); if (copy_from_user(next->sg_list, buf + sizeof cmd + cmd.wr_count * cmd.wqe_size + sg_ind * sizeof (struct ib_sge), next->num_sge * sizeof (struct ib_sge))) { ret = -EFAULT; goto out_put; } sg_ind += next->num_sge; } else next->sg_list = NULL; } resp.bad_wr = 0; ret = qp->device->post_send(qp, wr, &bad_wr); if (ret) for (next = wr; next; next = next->next) { ++resp.bad_wr; if (next == bad_wr) break; } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; out_put: put_qp_read(qp); while (wr) { if (is_ud && wr->wr.ud.ah) put_ah_read(wr->wr.ud.ah); next = wr->next; kfree(wr); wr = next; } out: kfree(user_wr); return ret ? ret : in_len; } static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, int in_len, u32 wr_count, u32 sge_count, u32 wqe_size) { struct ib_uverbs_recv_wr *user_wr; struct ib_recv_wr *wr = NULL, *last, *next; int sg_ind; int i; int ret; if (in_len < wqe_size * wr_count + sge_count * sizeof (struct ib_uverbs_sge)) return ERR_PTR(-EINVAL); if (wqe_size < sizeof (struct ib_uverbs_recv_wr)) return ERR_PTR(-EINVAL); user_wr = kmalloc(wqe_size, GFP_KERNEL); if (!user_wr) return ERR_PTR(-ENOMEM); sg_ind = 0; last = NULL; for (i = 0; i < wr_count; ++i) { if (copy_from_user(user_wr, buf + i * wqe_size, wqe_size)) { ret = -EFAULT; goto err; } if (user_wr->num_sge + sg_ind > sge_count) { ret = -EINVAL; goto err; } next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + user_wr->num_sge * sizeof (struct ib_sge), GFP_KERNEL); if (!next) { ret = -ENOMEM; goto err; } if (!last) wr = next; else last->next = next; last = next; next->next = NULL; next->wr_id = user_wr->wr_id; next->num_sge = user_wr->num_sge; if (next->num_sge) { next->sg_list = (void *) next + ALIGN(sizeof *next, sizeof (struct ib_sge)); if (copy_from_user(next->sg_list, buf + wr_count * wqe_size + sg_ind * sizeof (struct ib_sge), next->num_sge * sizeof (struct ib_sge))) { ret = -EFAULT; goto err; } sg_ind += next->num_sge; } else next->sg_list = NULL; } kfree(user_wr); return wr; err: kfree(user_wr); while (wr) { next = wr->next; kfree(wr); wr = next; } return ERR_PTR(ret); } ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_post_recv cmd; struct ib_uverbs_post_recv_resp resp; struct ib_recv_wr *wr, *next, *bad_wr; struct ib_qp *qp; ssize_t ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, in_len - sizeof cmd, cmd.wr_count, cmd.sge_count, cmd.wqe_size); if (IS_ERR(wr)) return PTR_ERR(wr); qp = idr_read_qp(cmd.qp_handle, file->ucontext); if (!qp) goto out; resp.bad_wr = 0; ret = qp->device->post_recv(qp, wr, &bad_wr); put_qp_read(qp); if (ret) for (next = wr; next; next = next->next) { ++resp.bad_wr; if (next == bad_wr) break; } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; out: while (wr) { next = wr->next; kfree(wr); wr = next; } return ret ? ret : in_len; } ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_post_srq_recv cmd; struct ib_uverbs_post_srq_recv_resp resp; struct ib_recv_wr *wr, *next, *bad_wr; struct ib_srq *srq; ssize_t ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, in_len - sizeof cmd, cmd.wr_count, cmd.sge_count, cmd.wqe_size); if (IS_ERR(wr)) return PTR_ERR(wr); srq = idr_read_srq(cmd.srq_handle, file->ucontext); if (!srq) goto out; resp.bad_wr = 0; ret = srq->device->post_srq_recv(srq, wr, &bad_wr); put_srq_read(srq); if (ret) for (next = wr; next; next = next->next) { ++resp.bad_wr; if (next == bad_wr) break; } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; out: while (wr) { next = wr->next; kfree(wr); wr = next; } return ret ? ret : in_len; } ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_create_ah cmd; struct ib_uverbs_create_ah_resp resp; struct ib_uobject *uobj; struct ib_pd *pd; struct ib_ah *ah; struct ib_ah_attr attr; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_key); down_write(&uobj->mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err; } attr.dlid = cmd.attr.dlid; attr.sl = cmd.attr.sl; attr.src_path_bits = cmd.attr.src_path_bits; attr.static_rate = cmd.attr.static_rate; attr.ah_flags = cmd.attr.is_global ? IB_AH_GRH : 0; attr.port_num = cmd.attr.port_num; attr.grh.flow_label = cmd.attr.grh.flow_label; attr.grh.sgid_index = cmd.attr.grh.sgid_index; attr.grh.hop_limit = cmd.attr.grh.hop_limit; attr.grh.traffic_class = cmd.attr.grh.traffic_class; memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); ah = ib_create_ah(pd, &attr); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_put; } ah->uobject = uobj; uobj->object = ah; ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj); if (ret) goto err_destroy; resp.ah_handle = uobj->id; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } put_pd_read(pd); mutex_lock(&file->mutex); list_add_tail(&uobj->list, &file->ucontext->ah_list); mutex_unlock(&file->mutex); uobj->live = 1; up_write(&uobj->mutex); return in_len; err_copy: idr_remove_uobj(&ib_uverbs_ah_idr, uobj); err_destroy: ib_destroy_ah(ah); err_put: put_pd_read(pd); err: put_uobj_write(uobj); return ret; } ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_ah cmd; struct ib_ah *ah; struct ib_uobject *uobj; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext); if (!uobj) return -EINVAL; ah = uobj->object; ret = ib_destroy_ah(ah); if (!ret) uobj->live = 0; put_uobj_write(uobj); if (ret) return ret; idr_remove_uobj(&ib_uverbs_ah_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); put_uobj(uobj); return in_len; } ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_attach_mcast cmd; struct ib_qp *qp; struct ib_uqp_object *obj; struct ib_uverbs_mcast_entry *mcast; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; qp = idr_read_qp(cmd.qp_handle, file->ucontext); if (!qp) return -EINVAL; obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); list_for_each_entry(mcast, &obj->mcast_list, list) if (cmd.mlid == mcast->lid && !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { ret = 0; goto out_put; } mcast = kmalloc(sizeof *mcast, GFP_KERNEL); if (!mcast) { ret = -ENOMEM; goto out_put; } mcast->lid = cmd.mlid; memcpy(mcast->gid.raw, cmd.gid, sizeof mcast->gid.raw); ret = ib_attach_mcast(qp, &mcast->gid, cmd.mlid); if (!ret) list_add_tail(&mcast->list, &obj->mcast_list); else kfree(mcast); out_put: put_qp_read(qp); return ret ? ret : in_len; } ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_detach_mcast cmd; struct ib_uqp_object *obj; struct ib_qp *qp; struct ib_uverbs_mcast_entry *mcast; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; qp = idr_read_qp(cmd.qp_handle, file->ucontext); if (!qp) return -EINVAL; ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); if (ret) goto out_put; obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); list_for_each_entry(mcast, &obj->mcast_list, list) if (cmd.mlid == mcast->lid && !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { list_del(&mcast->list); kfree(mcast); break; } out_put: put_qp_read(qp); return ret ? ret : in_len; } ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_create_srq cmd; struct ib_uverbs_create_srq_resp resp; struct ib_udata udata; struct ib_uevent_object *obj; struct ib_pd *pd; struct ib_srq *srq; struct ib_srq_init_attr attr; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &srq_lock_key); down_write(&obj->uobject.mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err; } attr.event_handler = ib_uverbs_srq_event_handler; attr.srq_context = file; attr.attr.max_wr = cmd.max_wr; attr.attr.max_sge = cmd.max_sge; attr.attr.srq_limit = cmd.srq_limit; obj->events_reported = 0; INIT_LIST_HEAD(&obj->event_list); srq = pd->device->create_srq(pd, &attr, &udata); if (IS_ERR(srq)) { ret = PTR_ERR(srq); goto err_put; } srq->device = pd->device; srq->pd = pd; srq->uobject = &obj->uobject; srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; srq->ext.xrc.cq = NULL; srq->ext.xrc.xrcd = NULL; atomic_inc(&pd->usecnt); atomic_set(&srq->usecnt, 0); obj->uobject.object = srq; ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject); if (ret) goto err_destroy; memset(&resp, 0, sizeof resp); resp.srq_handle = obj->uobject.id; resp.max_wr = attr.attr.max_wr; resp.max_sge = attr.attr.max_sge; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } put_pd_read(pd); mutex_lock(&file->mutex); list_add_tail(&obj->uobject.list, &file->ucontext->srq_list); mutex_unlock(&file->mutex); obj->uobject.live = 1; up_write(&obj->uobject.mutex); return in_len; err_copy: idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject); err_destroy: ib_destroy_srq(srq); err_put: put_pd_read(pd); err: put_uobj_write(&obj->uobject); return ret; } ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_create_xsrq cmd; struct ib_uverbs_create_srq_resp resp; struct ib_udata udata; struct ib_uevent_object *obj; struct ib_pd *pd; struct ib_srq *srq; struct ib_cq *xrc_cq; struct ib_xrcd *xrcd; struct ib_srq_init_attr attr; struct ib_uobject *xrcd_uobj; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &srq_lock_key); down_write(&obj->uobject.mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err; } xrc_cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); if (!xrc_cq) { ret = -EINVAL; goto err_put_pd; } xrcd = idr_read_xrcd(cmd.xrcd_handle, file->ucontext, &xrcd_uobj); if (!xrcd) { ret = -EINVAL; goto err_put_cq; } attr.event_handler = ib_uverbs_srq_event_handler; attr.srq_context = file; attr.attr.max_wr = cmd.max_wr; attr.attr.max_sge = cmd.max_sge; attr.attr.srq_limit = cmd.srq_limit; obj->events_reported = 0; INIT_LIST_HEAD(&obj->event_list); srq = pd->device->create_xrc_srq(pd, xrc_cq, xrcd, &attr, &udata); if (IS_ERR(srq)) { ret = PTR_ERR(srq); goto err_put; } srq->device = pd->device; srq->pd = pd; srq->uobject = &obj->uobject; srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; srq->ext.xrc.cq = xrc_cq; srq->ext.xrc.xrcd = xrcd; atomic_inc(&pd->usecnt); atomic_inc(&xrc_cq->usecnt); atomic_inc(&xrcd->usecnt); atomic_set(&srq->usecnt, 0); obj->uobject.object = srq; ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject); if (ret) goto err_destroy; memset(&resp, 0, sizeof resp); resp.srq_handle = obj->uobject.id; resp.max_wr = attr.attr.max_wr; resp.max_sge = attr.attr.max_sge; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } put_xrcd_read(xrcd_uobj); put_cq_read(xrc_cq); put_pd_read(pd); mutex_lock(&file->mutex); list_add_tail(&obj->uobject.list, &file->ucontext->srq_list); mutex_unlock(&file->mutex); obj->uobject.live = 1; up_write(&obj->uobject.mutex); return in_len; err_copy: idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject); err_destroy: ib_destroy_srq(srq); err_put: put_xrcd_read(xrcd_uobj); err_put_cq: put_cq_read(xrc_cq); err_put_pd: put_pd_read(pd); err: put_uobj_write(&obj->uobject); return ret; } ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_modify_srq cmd; struct ib_udata udata; struct ib_srq *srq; struct ib_srq_attr attr; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, out_len); srq = idr_read_srq(cmd.srq_handle, file->ucontext); if (!srq) return -EINVAL; attr.max_wr = cmd.max_wr; attr.srq_limit = cmd.srq_limit; ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata); put_srq_read(srq); return ret ? ret : in_len; } ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_query_srq cmd; struct ib_uverbs_query_srq_resp resp; struct ib_srq_attr attr; struct ib_srq *srq; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; srq = idr_read_srq(cmd.srq_handle, file->ucontext); if (!srq) return -EINVAL; ret = ib_query_srq(srq, &attr); put_srq_read(srq); if (ret) return ret; memset(&resp, 0, sizeof resp); resp.max_wr = attr.max_wr; resp.max_sge = attr.max_sge; resp.srq_limit = attr.srq_limit; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) return -EFAULT; return in_len; } ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_srq cmd; struct ib_uverbs_destroy_srq_resp resp; struct ib_uobject *uobj; struct ib_srq *srq; struct ib_uevent_object *obj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext); if (!uobj) return -EINVAL; srq = uobj->object; obj = container_of(uobj, struct ib_uevent_object, uobject); ret = ib_destroy_srq(srq); if (!ret) uobj->live = 0; put_uobj_write(uobj); if (ret) return ret; idr_remove_uobj(&ib_uverbs_srq_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); ib_uverbs_release_uevent(file, obj); memset(&resp, 0, sizeof resp); resp.events_reported = obj->events_reported; put_uobj(uobj); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; return ret ? ret : in_len; } static struct inode *xrc_file2inode(struct file *f) { return f->f_dentry->d_inode; } struct xrcd_table_entry { struct rb_node node; struct inode *inode; struct ib_xrcd *xrcd; }; static int xrcd_table_insert(struct ib_device *dev, struct inode *i_n, struct ib_xrcd *xrcd) { struct xrcd_table_entry *entry, *scan; struct rb_node **p = &dev->ib_uverbs_xrcd_table.rb_node; struct rb_node *parent = NULL; entry = kmalloc(sizeof(struct xrcd_table_entry), GFP_KERNEL); if (!entry) return -ENOMEM; entry->inode = i_n; entry->xrcd = xrcd; while (*p) { parent = *p; scan = rb_entry(parent, struct xrcd_table_entry, node); if (i_n < scan->inode) p = &(*p)->rb_left; else if (i_n > scan->inode) p = &(*p)->rb_right; else { kfree(entry); return -EEXIST; } } rb_link_node(&entry->node, parent, p); rb_insert_color(&entry->node, &dev->ib_uverbs_xrcd_table); igrab(i_n); return 0; } static struct xrcd_table_entry *xrcd_table_search(struct ib_device *dev, struct inode *i_n) { struct xrcd_table_entry *scan; struct rb_node **p = &dev->ib_uverbs_xrcd_table.rb_node; struct rb_node *parent = NULL; while (*p) { parent = *p; scan = rb_entry(parent, struct xrcd_table_entry, node); if (i_n < scan->inode) p = &(*p)->rb_left; else if (i_n > scan->inode) p = &(*p)->rb_right; else return scan; } return NULL; } static int find_xrcd(struct ib_device *dev, struct inode *i_n, struct ib_xrcd **xrcd) { struct xrcd_table_entry *entry; entry = xrcd_table_search(dev, i_n); if (!entry) return -EINVAL; *xrcd = entry->xrcd; return 0; } static void xrcd_table_delete(struct ib_device *dev, struct inode *i_n) { struct xrcd_table_entry *entry = xrcd_table_search(dev, i_n); if (entry) { iput(i_n); rb_erase(&entry->node, &dev->ib_uverbs_xrcd_table); kfree(entry); } } ssize_t ib_uverbs_open_xrc_domain(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_open_xrc_domain cmd; struct ib_uverbs_open_xrc_domain_resp resp; struct ib_udata udata; struct ib_uobject *uobj; struct ib_uxrcd_object *xrcd_uobj; struct ib_xrcd *xrcd = NULL; struct file *f = NULL; struct inode *inode = NULL; int ret = 0; int new_xrcd = 0; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); mutex_lock(&file->device->ib_dev->xrcd_table_mutex); if (cmd.fd != (u32) (-1)) { /* search for file descriptor */ f = fget(cmd.fd); if (!f) { ret = -EBADF; goto err_table_mutex_unlock; } inode = xrc_file2inode(f); if (!inode) { ret = -EBADF; goto err_table_mutex_unlock; } ret = find_xrcd(file->device->ib_dev, inode, &xrcd); if (ret && !(cmd.oflags & O_CREAT)) { /* no file descriptor. Need CREATE flag */ ret = -EAGAIN; goto err_table_mutex_unlock; } if (xrcd && cmd.oflags & O_EXCL) { ret = -EINVAL; goto err_table_mutex_unlock; } } xrcd_uobj = kmalloc(sizeof *xrcd_uobj, GFP_KERNEL); if (!xrcd_uobj) { ret = -ENOMEM; goto err_table_mutex_unlock; } uobj = &xrcd_uobj->uobject; init_uobj(uobj, 0, file->ucontext, &pd_lock_key); down_write(&uobj->mutex); if (!xrcd) { xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev, file->ucontext, &udata); if (IS_ERR(xrcd)) { ret = PTR_ERR(xrcd); goto err; } xrcd->uobject = (cmd.fd == -1) ? uobj : NULL; xrcd->inode = inode; xrcd->device = file->device->ib_dev; atomic_set(&xrcd->usecnt, 0); new_xrcd = 1; } uobj->object = xrcd; ret = idr_add_uobj(&ib_uverbs_xrc_domain_idr, uobj); if (ret) goto err_idr; memset(&resp, 0, sizeof resp); resp.xrcd_handle = uobj->id; if (inode) { if (new_xrcd) { /* create new inode/xrcd table entry */ ret = xrcd_table_insert(file->device->ib_dev, inode, xrcd); if (ret) goto err_insert_xrcd; } atomic_inc(&xrcd->usecnt); } if (f) fput(f); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } INIT_LIST_HEAD(&xrcd_uobj->xrc_reg_qp_list); mutex_lock(&file->mutex); list_add_tail(&uobj->list, &file->ucontext->xrcd_list); mutex_unlock(&file->mutex); uobj->live = 1; up_write(&uobj->mutex); mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); return in_len; err_copy: if (inode) { if (new_xrcd) xrcd_table_delete(file->device->ib_dev, inode); atomic_dec(&xrcd->usecnt); } err_insert_xrcd: idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj); err_idr: ib_dealloc_xrcd(xrcd); err: put_uobj_write(uobj); err_table_mutex_unlock: if (f) fput(f); mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); return ret; } ssize_t ib_uverbs_close_xrc_domain(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_close_xrc_domain cmd; struct ib_uobject *uobj, *t_uobj; struct ib_uxrcd_object *xrcd_uobj; struct ib_xrcd *xrcd = NULL; struct inode *inode = NULL; int ret = 0; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; mutex_lock(&file->device->ib_dev->xrcd_table_mutex); uobj = idr_write_uobj(&ib_uverbs_xrc_domain_idr, cmd.xrcd_handle, file->ucontext); if (!uobj) { ret = -EINVAL; goto err_unlock_mutex; } mutex_lock(&file->mutex); if (!ret) { list_for_each_entry(t_uobj, &file->ucontext->qp_list, list) { struct ib_qp *qp = t_uobj->object; if (qp->xrcd && qp->xrcd == uobj->object) { ret = -EBUSY; break; } } } if (!ret) { list_for_each_entry(t_uobj, &file->ucontext->srq_list, list) { struct ib_srq *srq = t_uobj->object; if (srq->ext.xrc.xrcd && srq->ext.xrc.xrcd == uobj->object) { ret = -EBUSY; break; } } } mutex_unlock(&file->mutex); if (ret) { put_uobj_write(uobj); goto err_unlock_mutex; } xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject); if (!list_empty(&xrcd_uobj->xrc_reg_qp_list)) { ret = -EBUSY; put_uobj_write(uobj); goto err_unlock_mutex; } xrcd = (struct ib_xrcd *) (uobj->object); inode = xrcd->inode; if (inode) atomic_dec(&xrcd->usecnt); ret = ib_dealloc_xrcd(uobj->object); if (!ret) uobj->live = 0; put_uobj_write(uobj); if (ret && !inode) goto err_unlock_mutex; if (!ret && inode) xrcd_table_delete(file->device->ib_dev, inode); idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); put_uobj(uobj); mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); return in_len; err_unlock_mutex: mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); return ret; } void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev, struct ib_xrcd *xrcd) { struct inode *inode = NULL; int ret = 0; inode = xrcd->inode; if (inode) atomic_dec(&xrcd->usecnt); ret = ib_dealloc_xrcd(xrcd); if (!ret && inode) xrcd_table_delete(ib_dev, inode); } ssize_t ib_uverbs_create_xrc_rcv_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_create_xrc_rcv_qp cmd; struct ib_uverbs_create_xrc_rcv_qp_resp resp; struct ib_uxrc_rcv_object *obj; struct ib_qp_init_attr init_attr; struct ib_xrcd *xrcd; struct ib_uobject *uobj; struct ib_uxrcd_object *xrcd_uobj; u32 qp_num; int err; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; obj = kzalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj); if (!xrcd) { err = -EINVAL; goto err_out; } init_attr.event_handler = ib_uverbs_xrc_rcv_qp_event_handler; init_attr.qp_context = file; init_attr.srq = NULL; init_attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; init_attr.qp_type = IB_QPT_XRC; init_attr.xrcd = xrcd; init_attr.cap.max_send_wr = 1; init_attr.cap.max_recv_wr = 0; init_attr.cap.max_send_sge = 1; init_attr.cap.max_recv_sge = 0; init_attr.cap.max_inline_data = 0; err = xrcd->device->create_xrc_rcv_qp(&init_attr, &qp_num); if (err) goto err_put; memset(&resp, 0, sizeof resp); resp.qpn = qp_num; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { err = -EFAULT; goto err_destroy; } atomic_inc(&xrcd->usecnt); put_xrcd_read(uobj); obj->qp_num = qp_num; obj->domain_handle = cmd.xrc_domain_handle; xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject); mutex_lock(&file->device->ib_dev->xrcd_table_mutex); list_add_tail(&obj->list, &xrcd_uobj->xrc_reg_qp_list); mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); return in_len; err_destroy: xrcd->device->unreg_xrc_rcv_qp(xrcd, file, qp_num); err_put: put_xrcd_read(uobj); err_out: kfree(obj); return err; } ssize_t ib_uverbs_modify_xrc_rcv_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_modify_xrc_rcv_qp cmd; struct ib_qp_attr *attr; struct ib_xrcd *xrcd; struct ib_uobject *uobj; int err; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; attr = kzalloc(sizeof *attr, GFP_KERNEL); if (!attr) return -ENOMEM; xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj); if (!xrcd) { kfree(attr); return -EINVAL; } attr->qp_state = cmd.qp_state; attr->cur_qp_state = cmd.cur_qp_state; attr->qp_access_flags = cmd.qp_access_flags; attr->pkey_index = cmd.pkey_index; attr->port_num = cmd.port_num; attr->path_mtu = cmd.path_mtu; attr->path_mig_state = cmd.path_mig_state; attr->qkey = cmd.qkey; attr->rq_psn = cmd.rq_psn; attr->sq_psn = cmd.sq_psn; attr->dest_qp_num = cmd.dest_qp_num; attr->alt_pkey_index = cmd.alt_pkey_index; attr->en_sqd_async_notify = cmd.en_sqd_async_notify; attr->max_rd_atomic = cmd.max_rd_atomic; attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; attr->min_rnr_timer = cmd.min_rnr_timer; attr->port_num = cmd.port_num; attr->timeout = cmd.timeout; attr->retry_cnt = cmd.retry_cnt; attr->rnr_retry = cmd.rnr_retry; attr->alt_port_num = cmd.alt_port_num; attr->alt_timeout = cmd.alt_timeout; memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); attr->ah_attr.grh.flow_label = cmd.dest.flow_label; attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; attr->ah_attr.dlid = cmd.dest.dlid; attr->ah_attr.sl = cmd.dest.sl; attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; attr->ah_attr.static_rate = cmd.dest.static_rate; attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; attr->ah_attr.port_num = cmd.dest.port_num; memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; attr->alt_ah_attr.sl = cmd.alt_dest.sl; attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; err = xrcd->device->modify_xrc_rcv_qp(xrcd, cmd.qp_num, attr, cmd.attr_mask); put_xrcd_read(uobj); kfree(attr); return err ? err : in_len; } ssize_t ib_uverbs_query_xrc_rcv_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_query_xrc_rcv_qp cmd; struct ib_uverbs_query_qp_resp resp; struct ib_qp_attr *attr; struct ib_qp_init_attr *init_attr; struct ib_xrcd *xrcd; struct ib_uobject *uobj; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; attr = kmalloc(sizeof *attr, GFP_KERNEL); init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL); if (!attr || !init_attr) { ret = -ENOMEM; goto out; } xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj); if (!xrcd) { ret = -EINVAL; goto out; } ret = xrcd->device->query_xrc_rcv_qp(xrcd, cmd.qp_num, attr, cmd.attr_mask, init_attr); put_xrcd_read(uobj); if (ret) goto out; memset(&resp, 0, sizeof resp); resp.qp_state = attr->qp_state; resp.cur_qp_state = attr->cur_qp_state; resp.path_mtu = attr->path_mtu; resp.path_mig_state = attr->path_mig_state; resp.qkey = attr->qkey; resp.rq_psn = attr->rq_psn; resp.sq_psn = attr->sq_psn; resp.dest_qp_num = attr->dest_qp_num; resp.qp_access_flags = attr->qp_access_flags; resp.pkey_index = attr->pkey_index; resp.alt_pkey_index = attr->alt_pkey_index; resp.sq_draining = attr->sq_draining; resp.max_rd_atomic = attr->max_rd_atomic; resp.max_dest_rd_atomic = attr->max_dest_rd_atomic; resp.min_rnr_timer = attr->min_rnr_timer; resp.port_num = attr->port_num; resp.timeout = attr->timeout; resp.retry_cnt = attr->retry_cnt; resp.rnr_retry = attr->rnr_retry; resp.alt_port_num = attr->alt_port_num; resp.alt_timeout = attr->alt_timeout; memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16); resp.dest.flow_label = attr->ah_attr.grh.flow_label; resp.dest.sgid_index = attr->ah_attr.grh.sgid_index; resp.dest.hop_limit = attr->ah_attr.grh.hop_limit; resp.dest.traffic_class = attr->ah_attr.grh.traffic_class; resp.dest.dlid = attr->ah_attr.dlid; resp.dest.sl = attr->ah_attr.sl; resp.dest.src_path_bits = attr->ah_attr.src_path_bits; resp.dest.static_rate = attr->ah_attr.static_rate; resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH); resp.dest.port_num = attr->ah_attr.port_num; memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16); resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label; resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index; resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit; resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class; resp.alt_dest.dlid = attr->alt_ah_attr.dlid; resp.alt_dest.sl = attr->alt_ah_attr.sl; resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits; resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate; resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH); resp.alt_dest.port_num = attr->alt_ah_attr.port_num; resp.max_send_wr = init_attr->cap.max_send_wr; resp.max_recv_wr = init_attr->cap.max_recv_wr; resp.max_send_sge = init_attr->cap.max_send_sge; resp.max_recv_sge = init_attr->cap.max_recv_sge; resp.max_inline_data = init_attr->cap.max_inline_data; resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; out: kfree(attr); kfree(init_attr); return ret ? ret : in_len; } ssize_t ib_uverbs_reg_xrc_rcv_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_reg_xrc_rcv_qp cmd; struct ib_uxrc_rcv_object *qp_obj, *tmp; struct ib_xrcd *xrcd; struct ib_uobject *uobj; struct ib_uxrcd_object *xrcd_uobj; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; qp_obj = kmalloc(sizeof *qp_obj, GFP_KERNEL); if (!qp_obj) return -ENOMEM; xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj); if (!xrcd) { ret = -EINVAL; goto err_out; } ret = xrcd->device->reg_xrc_rcv_qp(xrcd, file, cmd.qp_num); if (ret) goto err_put; xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject); mutex_lock(&file->device->ib_dev->xrcd_table_mutex); list_for_each_entry(tmp, &xrcd_uobj->xrc_reg_qp_list, list) if (cmd.qp_num == tmp->qp_num) { kfree(qp_obj); mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); put_xrcd_read(uobj); return in_len; } qp_obj->qp_num = cmd.qp_num; qp_obj->domain_handle = cmd.xrc_domain_handle; list_add_tail(&qp_obj->list, &xrcd_uobj->xrc_reg_qp_list); mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); atomic_inc(&xrcd->usecnt); put_xrcd_read(uobj); return in_len; err_put: put_xrcd_read(uobj); err_out: kfree(qp_obj); return ret; } int ib_uverbs_cleanup_xrc_rcv_qp(struct ib_uverbs_file *file, struct ib_xrcd *xrcd, u32 qp_num) { int err; err = xrcd->device->unreg_xrc_rcv_qp(xrcd, file, qp_num); if (!err) atomic_dec(&xrcd->usecnt); return err; } ssize_t ib_uverbs_unreg_xrc_rcv_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_unreg_xrc_rcv_qp cmd; struct ib_uxrc_rcv_object *qp_obj, *tmp; struct ib_xrcd *xrcd; struct ib_uobject *uobj; struct ib_uxrcd_object *xrcd_uobj; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj); if (!xrcd) return -EINVAL; ret = xrcd->device->unreg_xrc_rcv_qp(xrcd, file, cmd.qp_num); if (ret) { put_xrcd_read(uobj); return -EINVAL; } atomic_dec(&xrcd->usecnt); xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject); mutex_lock(&file->device->ib_dev->xrcd_table_mutex); list_for_each_entry_safe(qp_obj, tmp, &xrcd_uobj->xrc_reg_qp_list, list) if (cmd.qp_num == qp_obj->qp_num) { list_del(&qp_obj->list); kfree(qp_obj); break; } mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); put_xrcd_read(uobj); return in_len; } Index: stable/10/sys/ofed/drivers/infiniband/core/uverbs_main.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/core/uverbs_main.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/core/uverbs_main.c (revision 271127) @@ -1,1011 +1,1014 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include -#include #include #include #include #include #include #include #include #include #include "uverbs.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace verbs access"); MODULE_LICENSE("Dual BSD/GPL"); #define INFINIBANDEVENTFS_MAGIC 0x49426576 /* "IBev" */ enum { IB_UVERBS_MAJOR = 231, IB_UVERBS_BASE_MINOR = 192, IB_UVERBS_MAX_DEVICES = 32 }; #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) static struct class *uverbs_class; DEFINE_SPINLOCK(ib_uverbs_idr_lock); DEFINE_IDR(ib_uverbs_pd_idr); DEFINE_IDR(ib_uverbs_mr_idr); DEFINE_IDR(ib_uverbs_mw_idr); DEFINE_IDR(ib_uverbs_ah_idr); DEFINE_IDR(ib_uverbs_cq_idr); DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); DEFINE_IDR(ib_uverbs_xrc_domain_idr); static spinlock_t map_lock; static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES]; static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) = { [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, [IB_USER_VERBS_CMD_CREATE_XRC_SRQ] = ib_uverbs_create_xrc_srq, [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrc_domain, [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrc_domain, [IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP] = ib_uverbs_create_xrc_rcv_qp, [IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP] = ib_uverbs_modify_xrc_rcv_qp, [IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP] = ib_uverbs_query_xrc_rcv_qp, [IB_USER_VERBS_CMD_REG_XRC_RCV_QP] = ib_uverbs_reg_xrc_rcv_qp, [IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP] = ib_uverbs_unreg_xrc_rcv_qp, }; #ifdef __linux__ /* BSD Does not require a fake mountpoint for all files. */ static struct vfsmount *uverbs_event_mnt; #endif static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device); static void ib_uverbs_release_dev(struct kref *ref) { struct ib_uverbs_device *dev = container_of(ref, struct ib_uverbs_device, ref); complete(&dev->comp); } static void ib_uverbs_release_event_file(struct kref *ref) { struct ib_uverbs_event_file *file = container_of(ref, struct ib_uverbs_event_file, ref); kfree(file); } void ib_uverbs_release_ucq(struct ib_uverbs_file *file, struct ib_uverbs_event_file *ev_file, struct ib_ucq_object *uobj) { struct ib_uverbs_event *evt, *tmp; if (ev_file) { spin_lock_irq(&ev_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&ev_file->lock); kref_put(&ev_file->ref, ib_uverbs_release_event_file); } spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&file->async_file->lock); } void ib_uverbs_release_uevent(struct ib_uverbs_file *file, struct ib_uevent_object *uobj) { struct ib_uverbs_event *evt, *tmp; spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&file->async_file->lock); } static void ib_uverbs_detach_umcast(struct ib_qp *qp, struct ib_uqp_object *uobj) { struct ib_uverbs_mcast_entry *mcast, *tmp; list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) { ib_detach_mcast(qp, &mcast->gid, mcast->lid); list_del(&mcast->list); kfree(mcast); } } static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, struct ib_ucontext *context) { struct ib_uobject *uobj, *tmp; if (!context) return 0; context->closing = 1; list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { struct ib_ah *ah = uobj->object; idr_remove_uobj(&ib_uverbs_ah_idr, uobj); ib_destroy_ah(ah); kfree(uobj); } list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { struct ib_qp *qp = uobj->object; struct ib_uqp_object *uqp = container_of(uobj, struct ib_uqp_object, uevent.uobject); idr_remove_uobj(&ib_uverbs_qp_idr, uobj); ib_uverbs_detach_umcast(qp, uqp); ib_destroy_qp(qp); ib_uverbs_release_uevent(file, &uqp->uevent); kfree(uqp); } list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { struct ib_srq *srq = uobj->object; struct ib_uevent_object *uevent = container_of(uobj, struct ib_uevent_object, uobject); idr_remove_uobj(&ib_uverbs_srq_idr, uobj); ib_destroy_srq(srq); ib_uverbs_release_uevent(file, uevent); kfree(uevent); } list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { struct ib_cq *cq = uobj->object; struct ib_uverbs_event_file *ev_file = cq->cq_context; struct ib_ucq_object *ucq = container_of(uobj, struct ib_ucq_object, uobject); idr_remove_uobj(&ib_uverbs_cq_idr, uobj); ib_destroy_cq(cq); ib_uverbs_release_ucq(file, ev_file, ucq); kfree(ucq); } /* XXX Free MWs */ list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { struct ib_mr *mr = uobj->object; idr_remove_uobj(&ib_uverbs_mr_idr, uobj); ib_dereg_mr(mr); kfree(uobj); } mutex_lock(&file->device->ib_dev->xrcd_table_mutex); list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) { struct ib_xrcd *xrcd = uobj->object; struct ib_uxrc_rcv_object *xrc_qp_obj, *tmp1; struct ib_uxrcd_object *xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject); list_for_each_entry_safe(xrc_qp_obj, tmp1, &xrcd_uobj->xrc_reg_qp_list, list) { list_del(&xrc_qp_obj->list); ib_uverbs_cleanup_xrc_rcv_qp(file, xrcd, xrc_qp_obj->qp_num); kfree(xrc_qp_obj); } idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj); ib_uverbs_dealloc_xrcd(file->device->ib_dev, xrcd); kfree(uobj); } mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { struct ib_pd *pd = uobj->object; idr_remove_uobj(&ib_uverbs_pd_idr, uobj); ib_dealloc_pd(pd); kfree(uobj); } return context->device->dealloc_ucontext(context); } static void ib_uverbs_release_file(struct kref *ref) { struct ib_uverbs_file *file = container_of(ref, struct ib_uverbs_file, ref); module_put(file->device->ib_dev->owner); kref_put(&file->device->ref, ib_uverbs_release_dev); kfree(file); } static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_event_file *file = filp->private_data; struct ib_uverbs_event *event; int eventsz; int ret = 0; spin_lock_irq(&file->lock); while (list_empty(&file->event_list)) { spin_unlock_irq(&file->lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->poll_wait, !list_empty(&file->event_list))) return -ERESTARTSYS; spin_lock_irq(&file->lock); } event = list_entry(file->event_list.next, struct ib_uverbs_event, list); if (file->is_async) eventsz = sizeof (struct ib_uverbs_async_event_desc); else eventsz = sizeof (struct ib_uverbs_comp_event_desc); if (eventsz > count) { ret = -EINVAL; event = NULL; } else { list_del(file->event_list.next); if (event->counter) { ++(*event->counter); list_del(&event->obj_list); } } spin_unlock_irq(&file->lock); if (event) { if (copy_to_user(buf, event, eventsz)) ret = -EFAULT; else ret = eventsz; } kfree(event); return ret; } static unsigned int ib_uverbs_event_poll(struct file *filp, struct poll_table_struct *wait) { unsigned int pollflags = 0; struct ib_uverbs_event_file *file = filp->private_data; file->filp = filp; poll_wait(filp, &file->poll_wait, wait); spin_lock_irq(&file->lock); if (!list_empty(&file->event_list)) pollflags = POLLIN | POLLRDNORM; spin_unlock_irq(&file->lock); return pollflags; } static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) { struct ib_uverbs_event_file *file = filp->private_data; return fasync_helper(fd, filp, on, &file->async_queue); } static int ib_uverbs_event_close(struct inode *inode, struct file *filp) { struct ib_uverbs_event_file *file = filp->private_data; struct ib_uverbs_event *entry, *tmp; spin_lock_irq(&file->lock); file->is_closed = 1; list_for_each_entry_safe(entry, tmp, &file->event_list, list) { if (entry->counter) list_del(&entry->obj_list); kfree(entry); } spin_unlock_irq(&file->lock); if (file->is_async) { ib_unregister_event_handler(&file->uverbs_file->event_handler); kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); } kref_put(&file->ref, ib_uverbs_release_event_file); return 0; } static const struct file_operations uverbs_event_fops = { .owner = THIS_MODULE, .read = ib_uverbs_event_read, .poll = ib_uverbs_event_poll, .release = ib_uverbs_event_close, .fasync = ib_uverbs_event_fasync }; void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { struct ib_uverbs_event_file *file = cq_context; struct ib_ucq_object *uobj; struct ib_uverbs_event *entry; unsigned long flags; if (!file) return; spin_lock_irqsave(&file->lock, flags); if (file->is_closed) { spin_unlock_irqrestore(&file->lock, flags); return; } entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) { spin_unlock_irqrestore(&file->lock, flags); return; } uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); entry->desc.comp.cq_handle = cq->uobject->user_handle; entry->counter = &uobj->comp_events_reported; list_add_tail(&entry->list, &file->event_list); list_add_tail(&entry->obj_list, &uobj->comp_list); spin_unlock_irqrestore(&file->lock, flags); wake_up_interruptible(&file->poll_wait); if (file->filp) selwakeup(&file->filp->f_selinfo); kill_fasync(&file->async_queue, SIGIO, POLL_IN); } static void ib_uverbs_async_handler(struct ib_uverbs_file *file, __u64 element, __u64 event, struct list_head *obj_list, u32 *counter) { struct ib_uverbs_event *entry; unsigned long flags; spin_lock_irqsave(&file->async_file->lock, flags); if (file->async_file->is_closed) { spin_unlock_irqrestore(&file->async_file->lock, flags); return; } entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) { spin_unlock_irqrestore(&file->async_file->lock, flags); return; } entry->desc.async.element = element; entry->desc.async.event_type = event; entry->counter = counter; list_add_tail(&entry->list, &file->async_file->event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); spin_unlock_irqrestore(&file->async_file->lock, flags); wake_up_interruptible(&file->async_file->poll_wait); if (file->async_file->filp) selwakeup(&file->async_file->filp->f_selinfo); kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN); } void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) { struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, struct ib_ucq_object, uobject); ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle, event->event, &uobj->async_list, &uobj->async_events_reported); } void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj; uobj = container_of(event->element.qp->uobject, struct ib_uevent_object, uobject); ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, event->event, &uobj->event_list, &uobj->events_reported); } void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj; uobj = container_of(event->element.srq->uobject, struct ib_uevent_object, uobject); ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, event->event, &uobj->event_list, &uobj->events_reported); } void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event) { struct ib_uverbs_file *file = container_of(handler, struct ib_uverbs_file, event_handler); ib_uverbs_async_handler(file, event->element.port_num, event->event, NULL, NULL); } void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event, void *context_ptr) { ib_uverbs_async_handler(context_ptr, event->element.xrc_qp_num, event->event, NULL, NULL); } struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, int is_async, int *fd) { struct ib_uverbs_event_file *ev_file; struct file *filp; int ret; ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL); if (!ev_file) return ERR_PTR(-ENOMEM); kref_init(&ev_file->ref); spin_lock_init(&ev_file->lock); INIT_LIST_HEAD(&ev_file->event_list); init_waitqueue_head(&ev_file->poll_wait); ev_file->uverbs_file = uverbs_file; ev_file->async_queue = NULL; ev_file->is_async = is_async; ev_file->is_closed = 0; ev_file->filp = NULL; *fd = get_unused_fd(); if (*fd < 0) { ret = *fd; goto err; } /* * fops_get() can't fail here, because we're coming from a * system call on a uverbs file, which will already have a * module reference. */ +#ifdef __linux__ filp = alloc_file(uverbs_event_mnt, dget(uverbs_event_mnt->mnt_root), FMODE_READ, fops_get(&uverbs_event_fops)); +#else + filp = alloc_file(FMODE_READ, fops_get(&uverbs_event_fops)); +#endif if (!filp) { ret = -ENFILE; goto err_fd; } filp->private_data = ev_file; return filp; err_fd: put_unused_fd(*fd); err: kfree(ev_file); return ERR_PTR(ret); } /* * Look up a completion event file by FD. If lookup is successful, * takes a ref to the event file struct that it returns; if * unsuccessful, returns NULL. */ struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) { struct ib_uverbs_event_file *ev_file = NULL; struct file *filp; filp = fget(fd); if (!filp) return NULL; if (filp->f_op != &uverbs_event_fops) goto out; ev_file = filp->private_data; if (ev_file->is_async) { ev_file = NULL; goto out; } kref_get(&ev_file->ref); out: fput(filp); return ev_file; } static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_file *file = filp->private_data; struct ib_uverbs_cmd_hdr hdr; if (count < sizeof hdr) return -EINVAL; if (copy_from_user(&hdr, buf, sizeof hdr)) return -EFAULT; if (hdr.in_words * 4 != count) return -EINVAL; if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || !uverbs_cmd_table[hdr.command] || !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command))) return -EINVAL; if (!file->ucontext && hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) return -EINVAL; return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr, hdr.in_words * 4, hdr.out_words * 4); } static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) { struct ib_uverbs_file *file = filp->private_data; if (!file->ucontext) return -ENODEV; else return file->device->ib_dev->mmap(file->ucontext, vma); } /* * ib_uverbs_open() does not need the BKL: * * - dev_table[] accesses are protected by map_lock, the * ib_uverbs_device structures are properly reference counted, and * everything else is purely local to the file being created, so * races against other open calls are not a problem; * - there is no ioctl method to race against; * - the device is added to dev_table[] as the last part of module * initialization, the open method will either immediately run * -ENXIO, or all required initialization will be done. */ static int ib_uverbs_open(struct inode *inode, struct file *filp) { struct ib_uverbs_device *dev; struct ib_uverbs_file *file; int ret; spin_lock(&map_lock); dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR]; if (dev) kref_get(&dev->ref); spin_unlock(&map_lock); if (!dev) return -ENXIO; if (!try_module_get(dev->ib_dev->owner)) { ret = -ENODEV; goto err; } file = kmalloc(sizeof *file, GFP_KERNEL); if (!file) { ret = -ENOMEM; goto err_module; } file->device = dev; file->ucontext = NULL; file->async_file = NULL; kref_init(&file->ref); mutex_init(&file->mutex); filp->private_data = file; return 0; err_module: module_put(dev->ib_dev->owner); err: kref_put(&dev->ref, ib_uverbs_release_dev); return ret; } static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; ib_uverbs_cleanup_ucontext(file, file->ucontext); if (file->async_file) kref_put(&file->async_file->ref, ib_uverbs_release_event_file); kref_put(&file->ref, ib_uverbs_release_file); return 0; } static const struct file_operations uverbs_fops = { .owner = THIS_MODULE, .write = ib_uverbs_write, .open = ib_uverbs_open, .release = ib_uverbs_close }; static const struct file_operations uverbs_mmap_fops = { .owner = THIS_MODULE, .write = ib_uverbs_write, .mmap = ib_uverbs_mmap, .open = ib_uverbs_open, .release = ib_uverbs_close }; static struct ib_client uverbs_client = { .name = "uverbs", .add = ib_uverbs_add_one, .remove = ib_uverbs_remove_one }; static ssize_t show_ibdev(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); if (!dev) return -ENODEV; return sprintf(buf, "%s\n", dev->ib_dev->name); } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static ssize_t show_dev_abi_version(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); if (!dev) return -ENODEV; return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver); } static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); -static ssize_t show_abi_version(struct class *class, char *buf) +static ssize_t show_abi_version(struct class *class, struct class_attribute *attr, char *buf) { return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION); } static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); #include static ssize_t show_dev_device(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); if (!dev) return -ENODEV; return sprintf(buf, "0x%04x\n", ((struct pci_dev *)dev->ib_dev->dma_device)->device); } static DEVICE_ATTR(device, S_IRUGO, show_dev_device, NULL); static ssize_t show_dev_vendor(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); if (!dev) return -ENODEV; return sprintf(buf, "0x%04x\n", ((struct pci_dev *)dev->ib_dev->dma_device)->vendor); } static DEVICE_ATTR(vendor, S_IRUGO, show_dev_vendor, NULL); struct attribute *device_attrs[] = { &dev_attr_device.attr, &dev_attr_vendor.attr, NULL }; static struct attribute_group device_group = { .name = "device", .attrs = device_attrs }; static void ib_uverbs_add_one(struct ib_device *device) { struct ib_uverbs_device *uverbs_dev; if (!device->alloc_ucontext) return; uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL); if (!uverbs_dev) return; kref_init(&uverbs_dev->ref); init_completion(&uverbs_dev->comp); spin_lock(&map_lock); uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) { spin_unlock(&map_lock); goto err; } set_bit(uverbs_dev->devnum, dev_map); spin_unlock(&map_lock); uverbs_dev->ib_dev = device; uverbs_dev->num_comp_vectors = device->num_comp_vectors; uverbs_dev->cdev = cdev_alloc(); if (!uverbs_dev->cdev) goto err; uverbs_dev->cdev->owner = THIS_MODULE; uverbs_dev->cdev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; kobject_set_name(&uverbs_dev->cdev->kobj, "uverbs%d", uverbs_dev->devnum); if (cdev_add(uverbs_dev->cdev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) goto err_cdev; uverbs_dev->dev = device_create(uverbs_class, device->dma_device, uverbs_dev->cdev->dev, uverbs_dev, "uverbs%d", uverbs_dev->devnum); if (IS_ERR(uverbs_dev->dev)) goto err_cdev; if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev)) goto err_class; if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) goto err_class; if (sysfs_create_group(&uverbs_dev->dev->kobj, &device_group)) goto err_class; spin_lock(&map_lock); dev_table[uverbs_dev->devnum] = uverbs_dev; spin_unlock(&map_lock); ib_set_client_data(device, &uverbs_client, uverbs_dev); return; err_class: device_destroy(uverbs_class, uverbs_dev->cdev->dev); err_cdev: cdev_del(uverbs_dev->cdev); clear_bit(uverbs_dev->devnum, dev_map); err: kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); wait_for_completion(&uverbs_dev->comp); kfree(uverbs_dev); return; } static void ib_uverbs_remove_one(struct ib_device *device) { struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client); if (!uverbs_dev) return; sysfs_remove_group(&uverbs_dev->dev->kobj, &device_group); dev_set_drvdata(uverbs_dev->dev, NULL); device_destroy(uverbs_class, uverbs_dev->cdev->dev); cdev_del(uverbs_dev->cdev); spin_lock(&map_lock); dev_table[uverbs_dev->devnum] = NULL; spin_unlock(&map_lock); clear_bit(uverbs_dev->devnum, dev_map); kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); wait_for_completion(&uverbs_dev->comp); kfree(uverbs_dev); } #ifdef __linux__ static int uverbs_event_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { return get_sb_pseudo(fs_type, "infinibandevent:", NULL, INFINIBANDEVENTFS_MAGIC, mnt); } static struct file_system_type uverbs_event_fs = { /* No owner field so module can be unloaded */ .name = "infinibandeventfs", .get_sb = uverbs_event_get_sb, .kill_sb = kill_litter_super }; #endif static int __init ib_uverbs_init(void) { int ret; spin_lock_init(&map_lock); ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES, "infiniband_verbs"); if (ret) { printk(KERN_ERR "user_verbs: couldn't register device number\n"); goto out; } uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); if (IS_ERR(uverbs_class)) { ret = PTR_ERR(uverbs_class); printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n"); goto out_chrdev; } ret = class_create_file(uverbs_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); goto out_class; } #ifdef __linux__ ret = register_filesystem(&uverbs_event_fs); if (ret) { printk(KERN_ERR "user_verbs: couldn't register infinibandeventfs\n"); goto out_class; } uverbs_event_mnt = kern_mount(&uverbs_event_fs); if (IS_ERR(uverbs_event_mnt)) { ret = PTR_ERR(uverbs_event_mnt); printk(KERN_ERR "user_verbs: couldn't mount infinibandeventfs\n"); goto out_fs; } #endif ret = ib_register_client(&uverbs_client); if (ret) { printk(KERN_ERR "user_verbs: couldn't register client\n"); goto out_mnt; } return 0; out_mnt: #ifdef __linux__ mntput(uverbs_event_mnt); out_fs: unregister_filesystem(&uverbs_event_fs); #endif out_class: class_destroy(uverbs_class); out_chrdev: unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); out: return ret; } static void __exit ib_uverbs_cleanup(void) { ib_unregister_client(&uverbs_client); #ifdef __linux__ mntput(uverbs_event_mnt); unregister_filesystem(&uverbs_event_fs); #endif class_destroy(uverbs_class); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); idr_destroy(&ib_uverbs_pd_idr); idr_destroy(&ib_uverbs_mr_idr); idr_destroy(&ib_uverbs_mw_idr); idr_destroy(&ib_uverbs_ah_idr); idr_destroy(&ib_uverbs_cq_idr); idr_destroy(&ib_uverbs_qp_idr); idr_destroy(&ib_uverbs_srq_idr); } module_init(ib_uverbs_init); module_exit(ib_uverbs_cleanup); Index: stable/10/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c (revision 271127) @@ -1,688 +1,687 @@ /* * Copyright (c) 2012 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /***********************************************************/ /*This file support the handling of the Alias GUID feature. */ /***********************************************************/ #include #include #include #include #include #include #include -#include #include #include #include #include "mlx4_ib.h" /* The driver keeps the current state of all guids, as they are in the HW. Whenever we receive an smp mad GUIDInfo record, the data will be cached. */ struct mlx4_alias_guid_work_context { u8 port; struct mlx4_ib_dev *dev ; struct ib_sa_query *sa_query; struct completion done; int query_id; struct list_head list; int block_num; }; struct mlx4_next_alias_guid_work { u8 port; u8 block_num; struct mlx4_sriov_alias_guid_info_rec_det rec_det; }; void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num, u8 port_num, u8 *p_data) { int i; u64 guid_indexes; int slave_id; int port_index = port_num - 1; if (!mlx4_is_master(dev->dev)) return; guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid. ports_guid[port_num - 1]. all_rec_per_port[block_num].guid_indexes); - pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes); + pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, (long long)guid_indexes); for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { /* The location of the specific index starts from bit number 4 * until bit num 11 */ if (test_bit(i + 4, (unsigned long *)&guid_indexes)) { slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; if (slave_id >= dev->dev->num_slaves) { pr_debug("The last slave: %d\n", slave_id); return; } /* cache the guid: */ memcpy(&dev->sriov.demux[port_index].guid_cache[slave_id], &p_data[i * GUID_REC_SIZE], GUID_REC_SIZE); } else pr_debug("Guid number: %d in block: %d" " was not updated\n", i, block_num); } } static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index) { if (index >= NUM_ALIAS_GUID_PER_PORT) { pr_err("%s: ERROR: asked for index:%d\n", __func__, index); return (__force __be64) -1; } return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index]; } ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index) { return IB_SA_COMP_MASK(4 + index); } /* * Whenever new GUID is set/unset (guid table change) create event and * notify the relevant slave (master also should be notified). * If the GUID value is not as we have in the cache the slave will not be * updated; in this case it waits for the smp_snoop or the port management * event to call the function and to update the slave. * block_number - the index of the block (16 blocks available) * port_number - 1 or 2 */ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, int block_num, u8 port_num, u8 *p_data) { int i; u64 guid_indexes; int slave_id; enum slave_port_state new_state; enum slave_port_state prev_state; __be64 tmp_cur_ag, form_cache_ag; enum slave_port_gen_event gen_event; if (!mlx4_is_master(dev->dev)) return; guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid. ports_guid[port_num - 1]. all_rec_per_port[block_num].guid_indexes); - pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes); + pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, (long long)guid_indexes); /*calculate the slaves and notify them*/ for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { /* the location of the specific index runs from bits 4..11 */ if (!(test_bit(i + 4, (unsigned long *)&guid_indexes))) continue; slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; if (slave_id >= dev->dev->num_slaves) return; tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE]; form_cache_ag = get_cached_alias_guid(dev, port_num, (NUM_ALIAS_GUID_IN_REC * block_num) + i); /* * Check if guid is not the same as in the cache, * If it is different, wait for the snoop_smp or the port mgmt * change event to update the slave on its port state change */ if (tmp_cur_ag != form_cache_ag) continue; mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num); /*2 cases: Valid GUID, and Invalid Guid*/ if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/ prev_state = mlx4_get_slave_port_state(dev->dev, slave_id, port_num); new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num, MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID, &gen_event); pr_debug("slave: %d, port: %d prev_port_state: %d," " new_port_state: %d, gen_event: %d\n", slave_id, port_num, prev_state, new_state, gen_event); if (gen_event == SLAVE_PORT_GEN_EVENT_UP) { pr_debug("sending PORT_UP event to slave: %d, port: %d\n", slave_id, port_num); mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE); } } else { /* request to invalidate GUID */ set_and_calc_slave_port_state(dev->dev, slave_id, port_num, MLX4_PORT_STATE_IB_EVENT_GID_INVALID, &gen_event); pr_debug("sending PORT DOWN event to slave: %d, port: %d\n", slave_id, port_num); mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num, MLX4_PORT_CHANGE_SUBTYPE_DOWN); } } } static void aliasguid_query_handler(int status, struct ib_sa_guidinfo_rec *guid_rec, void *context) { struct mlx4_ib_dev *dev; struct mlx4_alias_guid_work_context *cb_ctx = context; u8 port_index ; int i; struct mlx4_sriov_alias_guid_info_rec_det *rec; unsigned long flags, flags1; if (!context) return; dev = cb_ctx->dev; port_index = cb_ctx->port - 1; rec = &dev->sriov.alias_guid.ports_guid[port_index]. all_rec_per_port[cb_ctx->block_num]; if (status) { rec->status = MLX4_GUID_INFO_STATUS_IDLE; pr_debug("(port: %d) failed: status = %d\n", cb_ctx->port, status); goto out; } if (guid_rec->block_num != cb_ctx->block_num) { pr_err("block num mismatch: %d != %d\n", cb_ctx->block_num, guid_rec->block_num); goto out; } pr_debug("lid/port: %d/%d, block_num: %d\n", be16_to_cpu(guid_rec->lid), cb_ctx->port, guid_rec->block_num); rec = &dev->sriov.alias_guid.ports_guid[port_index]. all_rec_per_port[guid_rec->block_num]; rec->status = MLX4_GUID_INFO_STATUS_SET; rec->method = MLX4_GUID_INFO_RECORD_SET; for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) { __be64 tmp_cur_ag; tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE]; /* check if the SM didn't assign one of the records. * if it didn't, if it was not sysadmin request: * ask the SM to give a new GUID, (instead of the driver request). */ if (tmp_cur_ag == MLX4_NOT_SET_GUID) { mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in " "block_num: %d was declined by SM, " "ownership by %d (0 = driver, 1=sysAdmin," " 2=None)\n", __func__, i, guid_rec->block_num, rec->ownership); if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) { /* if it is driver assign, asks for new GUID from SM*/ *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] = MLX4_NOT_SET_GUID; /* Mark the record as not assigned, and let it * be sent again in the next work sched.*/ rec->status = MLX4_GUID_INFO_STATUS_IDLE; rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i); } } else { /* properly assigned record. */ /* We save the GUID we just got from the SM in the * admin_guid in order to be persistent, and in the * request from the sm the process will ask for the same GUID */ if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN && tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) { /* the sysadmin assignment failed.*/ mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set" " admin guid after SysAdmin " "configuration. " "Record num %d in block_num:%d " "was declined by SM, " "new val(0x%llx) was kept\n", __func__, i, guid_rec->block_num, (long long)be64_to_cpu(*(__be64 *) & rec->all_recs[i * GUID_REC_SIZE])); } else { memcpy(&rec->all_recs[i * GUID_REC_SIZE], &guid_rec->guid_info_list[i * GUID_REC_SIZE], GUID_REC_SIZE); } } } /* The func is call here to close the cases when the sm doesn't send smp, so in the sa response the driver notifies the slave. */ mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num, cb_ctx->port, guid_rec->guid_info_list); out: spin_lock_irqsave(&dev->sriov.going_down_lock, flags); spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); if (!dev->sriov.is_going_down) queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq, &dev->sriov.alias_guid.ports_guid[port_index]. alias_guid_work, 0); if (cb_ctx->sa_query) { list_del(&cb_ctx->list); kfree(cb_ctx); } else complete(&cb_ctx->done); spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); } static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index) { int i; u64 cur_admin_val; ib_sa_comp_mask comp_mask = 0; dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status = MLX4_GUID_INFO_STATUS_IDLE; dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method = MLX4_GUID_INFO_RECORD_SET; /* calculate the comp_mask for that record.*/ for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { cur_admin_val = *(u64 *)&dev->sriov.alias_guid.ports_guid[port - 1]. all_rec_per_port[index].all_recs[GUID_REC_SIZE * i]; /* check the admin value: if it's for delete (~00LL) or it is the first guid of the first record (hw guid) or the records is not in ownership of the sysadmin and the sm doesn't need to assign GUIDs, then don't put it up for assignment. */ if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val || (!index && !i) || MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid. ports_guid[port - 1].all_rec_per_port[index].ownership) continue; comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i); } dev->sriov.alias_guid.ports_guid[port - 1]. all_rec_per_port[index].guid_indexes = comp_mask; } static int set_guid_rec(struct ib_device *ibdev, u8 port, int index, struct mlx4_sriov_alias_guid_info_rec_det *rec_det) { int err; struct mlx4_ib_dev *dev = to_mdev(ibdev); struct ib_sa_guidinfo_rec guid_info_rec; ib_sa_comp_mask comp_mask; struct ib_port_attr attr; struct mlx4_alias_guid_work_context *callback_context; unsigned long resched_delay, flags, flags1; struct list_head *head = &dev->sriov.alias_guid.ports_guid[port - 1].cb_list; err = __mlx4_ib_query_port(ibdev, port, &attr, 1); if (err) { pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n", err, port); return err; } /*check the port was configured by the sm, otherwise no need to send */ if (attr.state != IB_PORT_ACTIVE) { pr_debug("port %d not active...rescheduling\n", port); resched_delay = 5 * HZ; err = -EAGAIN; goto new_schedule; } callback_context = kmalloc(sizeof *callback_context, GFP_KERNEL); if (!callback_context) { err = -ENOMEM; resched_delay = HZ * 5; goto new_schedule; } callback_context->port = port; callback_context->dev = dev; callback_context->block_num = index; memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec)); guid_info_rec.lid = cpu_to_be16(attr.lid); guid_info_rec.block_num = index; memcpy(guid_info_rec.guid_info_list, rec_det->all_recs, GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC); comp_mask = IB_SA_GUIDINFO_REC_LID | IB_SA_GUIDINFO_REC_BLOCK_NUM | rec_det->guid_indexes; init_completion(&callback_context->done); spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); list_add_tail(&callback_context->list, head); spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); callback_context->query_id = ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client, ibdev, port, &guid_info_rec, comp_mask, rec_det->method, 1000, GFP_KERNEL, aliasguid_query_handler, callback_context, &callback_context->sa_query); if (callback_context->query_id < 0) { pr_debug("ib_sa_guid_info_rec_query failed, query_id: " "%d. will reschedule to the next 1 sec.\n", callback_context->query_id); spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); list_del(&callback_context->list); kfree(callback_context); spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); resched_delay = 1 * HZ; err = -EAGAIN; goto new_schedule; } err = 0; goto out; new_schedule: spin_lock_irqsave(&dev->sriov.going_down_lock, flags); spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); invalidate_guid_record(dev, port, index); if (!dev->sriov.is_going_down) { queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq, &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work, resched_delay); } spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); out: return err; } void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port) { int i; unsigned long flags, flags1; pr_debug("port %d\n", port); spin_lock_irqsave(&dev->sriov.going_down_lock, flags); spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++) invalidate_guid_record(dev, port, i); if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) { /* make sure no work waits in the queue, if the work is already queued(not on the timer) the cancel will fail. That is not a problem because we just want the work started. */ cancel_delayed_work(&dev->sriov.alias_guid. ports_guid[port - 1].alias_guid_work); queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq, &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work, 0); } spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); } /* The function returns the next record that was * not configured (or failed to be configured) */ static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port, struct mlx4_next_alias_guid_work *rec) { int j; unsigned long flags; for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) { spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags); if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status == MLX4_GUID_INFO_STATUS_IDLE) { memcpy(&rec->rec_det, &dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j], sizeof (struct mlx4_sriov_alias_guid_info_rec_det)); rec->port = port; rec->block_num = j; dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status = MLX4_GUID_INFO_STATUS_PENDING; spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags); return 0; } spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags); } return -ENOENT; } static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port, int rec_index, struct mlx4_sriov_alias_guid_info_rec_det *rec_det) { dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes = rec_det->guid_indexes; memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs, rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE); dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status = rec_det->status; } static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port) { int j; struct mlx4_sriov_alias_guid_info_rec_det rec_det ; for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) { memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE); rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) | IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 | IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 | IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 | IB_SA_GUIDINFO_REC_GID7; rec_det.status = MLX4_GUID_INFO_STATUS_IDLE; set_administratively_guid_record(dev, port, j, &rec_det); } } static void alias_guid_work(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); int ret = 0; struct mlx4_next_alias_guid_work *rec; struct mlx4_sriov_alias_guid_port_rec_det *sriov_alias_port = container_of(delay, struct mlx4_sriov_alias_guid_port_rec_det, alias_guid_work); struct mlx4_sriov_alias_guid *sriov_alias_guid = sriov_alias_port->parent; struct mlx4_ib_sriov *ib_sriov = container_of(sriov_alias_guid, struct mlx4_ib_sriov, alias_guid); struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov); rec = kzalloc(sizeof *rec, GFP_KERNEL); if (!rec) { pr_err("alias_guid_work: No Memory\n"); return; } pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1); ret = get_next_record_to_update(dev, sriov_alias_port->port, rec); if (ret) { pr_debug("No more records to update.\n"); goto out; } set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num, &rec->rec_det); out: kfree(rec); } void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port) { unsigned long flags, flags1; if (!mlx4_is_master(dev->dev)) return; spin_lock_irqsave(&dev->sriov.going_down_lock, flags); spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); if (!dev->sriov.is_going_down) { queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq, &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0); } spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); } void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev) { int i; struct mlx4_ib_sriov *sriov = &dev->sriov; struct mlx4_alias_guid_work_context *cb_ctx; struct mlx4_sriov_alias_guid_port_rec_det *det; struct ib_sa_query *sa_query; unsigned long flags; for (i = 0 ; i < dev->num_ports; i++) { cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work); det = &sriov->alias_guid.ports_guid[i]; spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags); while (!list_empty(&det->cb_list)) { cb_ctx = list_entry(det->cb_list.next, struct mlx4_alias_guid_work_context, list); sa_query = cb_ctx->sa_query; cb_ctx->sa_query = NULL; list_del(&cb_ctx->list); spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags); ib_sa_cancel_query(cb_ctx->query_id, sa_query); wait_for_completion(&cb_ctx->done); kfree(cb_ctx); spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags); } spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags); } for (i = 0 ; i < dev->num_ports; i++) { flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); } ib_sa_unregister_client(dev->sriov.alias_guid.sa_client); kfree(dev->sriov.alias_guid.sa_client); } int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev) { char alias_wq_name[15]; int ret = 0; int i, j, k; union ib_gid gid; if (!mlx4_is_master(dev->dev)) return 0; dev->sriov.alias_guid.sa_client = kzalloc(sizeof *dev->sriov.alias_guid.sa_client, GFP_KERNEL); if (!dev->sriov.alias_guid.sa_client) return -ENOMEM; ib_sa_register_client(dev->sriov.alias_guid.sa_client); spin_lock_init(&dev->sriov.alias_guid.ag_work_lock); for (i = 1; i <= dev->num_ports; ++i) { if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) { ret = -EFAULT; goto err_unregister; } } for (i = 0 ; i < dev->num_ports; i++) { memset(&dev->sriov.alias_guid.ports_guid[i], 0, sizeof (struct mlx4_sriov_alias_guid_port_rec_det)); /*Check if the SM doesn't need to assign the GUIDs*/ for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) { if (mlx4_ib_sm_guid_assign) { dev->sriov.alias_guid.ports_guid[i]. all_rec_per_port[j]. ownership = MLX4_GUID_DRIVER_ASSIGN; continue; } dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j]. ownership = MLX4_GUID_NONE_ASSIGN; /*mark each val as it was deleted, till the sysAdmin will give it valid val*/ for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) { *(__be64 *)&dev->sriov.alias_guid.ports_guid[i]. all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] = cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL); } } INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list); /*prepare the records, set them to be allocated by sm*/ for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) invalidate_guid_record(dev, i + 1, j); dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid; dev->sriov.alias_guid.ports_guid[i].port = i; if (mlx4_ib_sm_guid_assign) set_all_slaves_guids(dev, i); snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i); dev->sriov.alias_guid.ports_guid[i].wq = create_singlethread_workqueue(alias_wq_name); if (!dev->sriov.alias_guid.ports_guid[i].wq) { ret = -ENOMEM; goto err_thread; } INIT_DELAYED_WORK(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work, alias_guid_work); } return 0; err_thread: for (--i; i >= 0; i--) { destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); dev->sriov.alias_guid.ports_guid[i].wq = NULL; } err_unregister: ib_sa_unregister_client(dev->sriov.alias_guid.sa_client); kfree(dev->sriov.alias_guid.sa_client); dev->sriov.alias_guid.sa_client = NULL; pr_err("init_alias_guid_service: Failed. (ret:%d)\n", ret); return ret; } Index: stable/10/sys/ofed/drivers/infiniband/hw/mlx4/cm.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/hw/mlx4/cm.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/hw/mlx4/cm.c (revision 271127) @@ -1,440 +1,440 @@ /* * Copyright (c) 2012 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include "mlx4_ib.h" #define CM_CLEANUP_CACHE_TIMEOUT (5 * HZ) struct id_map_entry { struct rb_node node; u32 sl_cm_id; u32 pv_cm_id; int slave_id; int scheduled_delete; struct mlx4_ib_dev *dev; struct list_head list; struct delayed_work timeout; }; struct cm_generic_msg { struct ib_mad_hdr hdr; __be32 local_comm_id; __be32 remote_comm_id; }; struct cm_req_msg { unsigned char unused[0x60]; union ib_gid primary_path_sgid; }; static void set_local_comm_id(struct ib_mad *mad, u32 cm_id) { struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; msg->local_comm_id = cpu_to_be32(cm_id); } static u32 get_local_comm_id(struct ib_mad *mad) { struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; return be32_to_cpu(msg->local_comm_id); } static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id) { struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; msg->remote_comm_id = cpu_to_be32(cm_id); } static u32 get_remote_comm_id(struct ib_mad *mad) { struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; return be32_to_cpu(msg->remote_comm_id); } static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad) { struct cm_req_msg *msg = (struct cm_req_msg *)mad; return msg->primary_path_sgid; } /* Lock should be taken before called */ static struct id_map_entry * id_map_find_by_sl_id(struct ib_device *ibdev, u32 slave_id, u32 sl_cm_id) { struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map; struct rb_node *node = sl_id_map->rb_node; while (node) { struct id_map_entry *id_map_entry = rb_entry(node, struct id_map_entry, node); if (id_map_entry->sl_cm_id > sl_cm_id) node = node->rb_left; else if (id_map_entry->sl_cm_id < sl_cm_id) node = node->rb_right; else if (id_map_entry->slave_id > slave_id) node = node->rb_left; else if (id_map_entry->slave_id < slave_id) node = node->rb_right; else return id_map_entry; } return NULL; } static void id_map_ent_timeout(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); struct id_map_entry *ent = container_of(delay, struct id_map_entry, timeout); struct id_map_entry *db_ent, *found_ent; struct mlx4_ib_dev *dev = ent->dev; struct mlx4_ib_sriov *sriov = &dev->sriov; struct rb_root *sl_id_map = &sriov->sl_id_map; int pv_id = (int) ent->pv_cm_id; spin_lock(&sriov->id_map_lock); db_ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_id); if (!db_ent) goto out; found_ent = id_map_find_by_sl_id(&dev->ib_dev, ent->slave_id, ent->sl_cm_id); if (found_ent && found_ent == ent) rb_erase(&found_ent->node, sl_id_map); idr_remove(&sriov->pv_id_table, pv_id); out: list_del(&ent->list); spin_unlock(&sriov->id_map_lock); kfree(ent); } static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id) { struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; struct rb_root *sl_id_map = &sriov->sl_id_map; struct id_map_entry *ent, *found_ent; spin_lock(&sriov->id_map_lock); ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_cm_id); if (!ent) goto out; found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id); if (found_ent && found_ent == ent) rb_erase(&found_ent->node, sl_id_map); idr_remove(&sriov->pv_id_table, pv_cm_id); out: spin_unlock(&sriov->id_map_lock); } static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new) { struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map; struct rb_node **link = &sl_id_map->rb_node, *parent = NULL; struct id_map_entry *ent; int slave_id = new->slave_id; int sl_cm_id = new->sl_cm_id; ent = id_map_find_by_sl_id(ibdev, slave_id, sl_cm_id); if (ent) { pr_debug("overriding existing sl_id_map entry (cm_id = %x)\n", sl_cm_id); rb_replace_node(&ent->node, &new->node, sl_id_map); return; } /* Go to the bottom of the tree */ while (*link) { parent = *link; ent = rb_entry(parent, struct id_map_entry, node); if (ent->sl_cm_id > sl_cm_id || (ent->sl_cm_id == sl_cm_id && ent->slave_id > slave_id)) link = &(*link)->rb_left; else link = &(*link)->rb_right; } rb_link_node(&new->node, parent, link); rb_insert_color(&new->node, sl_id_map); } static struct id_map_entry * id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id) { int ret, id; static int next_id; struct id_map_entry *ent; struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; ent = kmalloc(sizeof (struct id_map_entry), GFP_KERNEL); if (!ent) { mlx4_ib_warn(ibdev, "Couldn't allocate id cache entry - out of memory\n"); return ERR_PTR(-ENOMEM); } ent->sl_cm_id = sl_cm_id; ent->slave_id = slave_id; ent->scheduled_delete = 0; ent->dev = to_mdev(ibdev); INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout); do { spin_lock(&to_mdev(ibdev)->sriov.id_map_lock); ret = idr_get_new_above(&sriov->pv_id_table, ent, next_id, &id); if (!ret) { next_id = ((unsigned) id + 1) & MAX_IDR_MASK; ent->pv_cm_id = (u32)id; sl_id_map_add(ibdev, ent); } spin_unlock(&sriov->id_map_lock); } while (ret == -EAGAIN && idr_pre_get(&sriov->pv_id_table, GFP_KERNEL)); /*the function idr_get_new_above can return -ENOSPC, so don't insert in that case.*/ if (!ret) { spin_lock(&sriov->id_map_lock); list_add_tail(&ent->list, &sriov->cm_list); spin_unlock(&sriov->id_map_lock); return ent; } /*error flow*/ kfree(ent); mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret); return ERR_PTR(-ENOMEM); } static struct id_map_entry * id_map_get(struct ib_device *ibdev, int *pv_cm_id, int sl_cm_id, int slave_id) { struct id_map_entry *ent; struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; spin_lock(&sriov->id_map_lock); if (*pv_cm_id == -1) { ent = id_map_find_by_sl_id(ibdev, sl_cm_id, slave_id); if (ent) *pv_cm_id = (int) ent->pv_cm_id; } else ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, *pv_cm_id); spin_unlock(&sriov->id_map_lock); return ent; } static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id) { struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; unsigned long flags; spin_lock(&sriov->id_map_lock); spin_lock_irqsave(&sriov->going_down_lock, flags); /*make sure that there is no schedule inside the scheduled work.*/ if (!sriov->is_going_down) { id->scheduled_delete = 1; schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT); } spin_unlock_irqrestore(&sriov->going_down_lock, flags); spin_unlock(&sriov->id_map_lock); } int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id, struct ib_mad *mad) { struct id_map_entry *id; u32 sl_cm_id; int pv_cm_id = -1; sl_cm_id = get_local_comm_id(mad); if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID || mad->mad_hdr.attr_id == CM_REP_ATTR_ID) { id = id_map_alloc(ibdev, slave_id, sl_cm_id); if (IS_ERR(id)) { mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n", __func__, slave_id, sl_cm_id); return PTR_ERR(id); } } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) { return 0; } else { id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id); } if (!id) { pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n", slave_id, sl_cm_id); return -EINVAL; } set_local_comm_id(mad, id->pv_cm_id); if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID) schedule_delayed(ibdev, id); else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) id_map_find_del(ibdev, pv_cm_id); return 0; } int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, struct ib_mad *mad, int is_eth) { u32 pv_cm_id; struct id_map_entry *id; if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) { union ib_gid gid; if (is_eth) return 0; gid = gid_from_req_msg(ibdev, mad); *slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id); if (*slave < 0) { mlx4_ib_warn(ibdev, "failed matching slave_id by gid (0x%llx)\n", - gid.global.interface_id); + (long long)gid.global.interface_id); return -ENOENT; } return 0; } pv_cm_id = get_remote_comm_id(mad); id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1); if (!id) { pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id); return -ENOENT; } if (!is_eth) *slave = id->slave_id; set_remote_comm_id(mad, id->sl_cm_id); if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID) schedule_delayed(ibdev, id); else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID || mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) { id_map_find_del(ibdev, (int) pv_cm_id); } return 0; } void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev) { spin_lock_init(&dev->sriov.id_map_lock); INIT_LIST_HEAD(&dev->sriov.cm_list); dev->sriov.sl_id_map = RB_ROOT; idr_init(&dev->sriov.pv_id_table); idr_pre_get(&dev->sriov.pv_id_table, GFP_KERNEL); } /* slave = -1 ==> all slaves */ /* TBD -- call paravirt clean for single slave. Need for slave RESET event */ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave) { struct mlx4_ib_sriov *sriov = &dev->sriov; struct rb_root *sl_id_map = &sriov->sl_id_map; struct list_head lh; struct rb_node *nd; int need_flush = 1; struct id_map_entry *map, *tmp_map; /* cancel all delayed work queue entries */ INIT_LIST_HEAD(&lh); spin_lock(&sriov->id_map_lock); list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) { if (slave < 0 || slave == map->slave_id) { if (map->scheduled_delete) need_flush &= !!cancel_delayed_work(&map->timeout); } } spin_unlock(&sriov->id_map_lock); if (!need_flush) flush_scheduled_work(); /* make sure all timers were flushed */ /* now, remove all leftover entries from databases*/ spin_lock(&sriov->id_map_lock); if (slave < 0) { while (rb_first(sl_id_map)) { struct id_map_entry *ent = rb_entry(rb_first(sl_id_map), struct id_map_entry, node); rb_erase(&ent->node, sl_id_map); idr_remove(&sriov->pv_id_table, (int) ent->pv_cm_id); } list_splice_init(&dev->sriov.cm_list, &lh); } else { /* first, move nodes belonging to slave to db remove list */ nd = rb_first(sl_id_map); while (nd) { struct id_map_entry *ent = rb_entry(nd, struct id_map_entry, node); nd = rb_next(nd); if (ent->slave_id == slave) list_move_tail(&ent->list, &lh); } /* remove those nodes from databases */ list_for_each_entry_safe(map, tmp_map, &lh, list) { rb_erase(&map->node, sl_id_map); idr_remove(&sriov->pv_id_table, (int) map->pv_cm_id); } /* add remaining nodes from cm_list */ list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) { if (slave == map->slave_id) list_move_tail(&map->list, &lh); } } spin_unlock(&sriov->id_map_lock); /* free any map entries left behind due to cancel_delayed_work above */ list_for_each_entry_safe(map, tmp_map, &lh, list) { list_del(&map->list); kfree(map); } } Index: stable/10/sys/ofed/drivers/infiniband/hw/mlx4/mad.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/hw/mlx4/mad.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/hw/mlx4/mad.c (revision 271127) @@ -1,2294 +1,2294 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include "mlx4_ib.h" enum { MLX4_IB_VENDOR_CLASS1 = 0x9, MLX4_IB_VENDOR_CLASS2 = 0xa }; #define MLX4_TUN_SEND_WRID_SHIFT 34 #define MLX4_TUN_QPN_SHIFT 32 #define MLX4_TUN_WRID_RECV (((u64) 1) << MLX4_TUN_SEND_WRID_SHIFT) #define MLX4_TUN_SET_WRID_QPN(a) (((u64) ((a) & 0x3)) << MLX4_TUN_QPN_SHIFT) #define MLX4_TUN_IS_RECV(a) (((a) >> MLX4_TUN_SEND_WRID_SHIFT) & 0x1) #define MLX4_TUN_WRID_QPN(a) (((a) >> MLX4_TUN_QPN_SHIFT) & 0x3) /* Port mgmt change event handling */ #define GET_BLK_PTR_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.block_ptr) #define GET_MASK_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.tbl_entries_mask) #define NUM_IDX_IN_PKEY_TBL_BLK 32 #define GUID_TBL_ENTRY_SIZE 8 /* size in bytes */ #define GUID_TBL_BLK_NUM_ENTRIES 8 #define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES) struct mlx4_mad_rcv_buf { struct ib_grh grh; u8 payload[256]; } __packed; struct mlx4_mad_snd_buf { u8 payload[256]; } __packed; struct mlx4_tunnel_mad { struct ib_grh grh; struct mlx4_ib_tunnel_header hdr; struct ib_mad mad; } __packed; struct mlx4_rcv_tunnel_mad { struct mlx4_rcv_tunnel_hdr hdr; struct ib_grh grh; struct ib_mad mad; } __packed; static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num); static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num); static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num, int block, u32 change_bitmap); __be64 mlx4_ib_gen_node_guid(void) { #define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40)) return cpu_to_be64(NODE_GUID_HI | random()); } __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx) { return cpu_to_be64(atomic_inc_return(&ctx->tid)) | cpu_to_be64(0xff00000000000000LL); } int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, int port, struct ib_wc *in_wc, struct ib_grh *in_grh, void *in_mad, void *response_mad) { struct mlx4_cmd_mailbox *inmailbox, *outmailbox; void *inbox; int err; u32 in_modifier = port; u8 op_modifier = 0; inmailbox = mlx4_alloc_cmd_mailbox(dev->dev); if (IS_ERR(inmailbox)) return PTR_ERR(inmailbox); inbox = inmailbox->buf; outmailbox = mlx4_alloc_cmd_mailbox(dev->dev); if (IS_ERR(outmailbox)) { mlx4_free_cmd_mailbox(dev->dev, inmailbox); return PTR_ERR(outmailbox); } memcpy(inbox, in_mad, 256); /* * Key check traps can't be generated unless we have in_wc to * tell us where to send the trap. */ if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_MKEY) || !in_wc) op_modifier |= 0x1; if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_BKEY) || !in_wc) op_modifier |= 0x2; if (mlx4_is_mfunc(dev->dev) && (mad_ifc_flags & MLX4_MAD_IFC_NET_VIEW || in_wc)) op_modifier |= 0x8; if (in_wc) { struct { __be32 my_qpn; u32 reserved1; __be32 rqpn; u8 sl; u8 g_path; u16 reserved2[2]; __be16 pkey; u32 reserved3[11]; u8 grh[40]; } *ext_info; memset(inbox + 256, 0, 256); ext_info = inbox + 256; ext_info->my_qpn = cpu_to_be32(in_wc->qp->qp_num); ext_info->rqpn = cpu_to_be32(in_wc->src_qp); ext_info->sl = in_wc->sl << 4; ext_info->g_path = in_wc->dlid_path_bits | (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0); ext_info->pkey = cpu_to_be16(in_wc->pkey_index); if (in_grh) memcpy(ext_info->grh, in_grh, 40); op_modifier |= 0x4; in_modifier |= in_wc->slid << 16; } err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier, mlx4_is_master(dev->dev) ? (op_modifier & ~0x8) : op_modifier, MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, (op_modifier & 0x8) ? MLX4_CMD_NATIVE : MLX4_CMD_WRAPPED); if (!err) memcpy(response_mad, outmailbox->buf, 256); mlx4_free_cmd_mailbox(dev->dev, inmailbox); mlx4_free_cmd_mailbox(dev->dev, outmailbox); return err; } static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) { struct ib_ah *new_ah; struct ib_ah_attr ah_attr; unsigned long flags; if (!dev->send_agent[port_num - 1][0]) return; memset(&ah_attr, 0, sizeof ah_attr); ah_attr.dlid = lid; ah_attr.sl = sl; ah_attr.port_num = port_num; new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, &ah_attr); if (IS_ERR(new_ah)) return; spin_lock_irqsave(&dev->sm_lock, flags); if (dev->sm_ah[port_num - 1]) ib_destroy_ah(dev->sm_ah[port_num - 1]); dev->sm_ah[port_num - 1] = new_ah; spin_unlock_irqrestore(&dev->sm_lock, flags); } /* * Snoop SM MADs for port info, GUID info, and P_Key table sets, so we can * synthesize LID change, Client-Rereg, GID change, and P_Key change events. */ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, u16 prev_lid) { struct ib_port_info *pinfo; u16 lid; __be16 *base; u32 bn, pkey_change_bitmap; int i; struct mlx4_ib_dev *dev = to_mdev(ibdev); if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && mad->mad_hdr.method == IB_MGMT_METHOD_SET) switch (mad->mad_hdr.attr_id) { case IB_SMP_ATTR_PORT_INFO: pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data; lid = be16_to_cpu(pinfo->lid); update_sm_ah(dev, port_num, be16_to_cpu(pinfo->sm_lid), pinfo->neighbormtu_mastersmsl & 0xf); if (pinfo->clientrereg_resv_subnetto & 0x80) handle_client_rereg_event(dev, port_num); if (prev_lid != lid) handle_lid_change_event(dev, port_num); break; case IB_SMP_ATTR_PKEY_TABLE: if (!mlx4_is_mfunc(dev->dev)) { mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_PKEY_CHANGE); break; } /* at this point, we are running in the master. * Slaves do not receive SMPs. */ bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod) & 0xFFFF; base = (__be16 *) &(((struct ib_smp *)mad)->data[0]); pkey_change_bitmap = 0; for (i = 0; i < 32; i++) { pr_debug("PKEY[%d] = x%x\n", i + bn*32, be16_to_cpu(base[i])); if (be16_to_cpu(base[i]) != dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32]) { pkey_change_bitmap |= (1 << i); dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32] = be16_to_cpu(base[i]); } } pr_debug("PKEY Change event: port=%d, " "block=0x%x, change_bitmap=0x%x\n", port_num, bn, pkey_change_bitmap); if (pkey_change_bitmap) { mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_PKEY_CHANGE); if (!dev->sriov.is_going_down) __propagate_pkey_ev(dev, port_num, bn, pkey_change_bitmap); } break; case IB_SMP_ATTR_GUID_INFO: /* paravirtualized master's guid is guid 0 -- does not change */ if (!mlx4_is_master(dev->dev)) mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_GID_CHANGE); /*if master, notify relevant slaves*/ if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) { bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod); mlx4_ib_update_cache_on_guid_change(dev, bn, port_num, (u8 *)(&((struct ib_smp *)mad)->data)); mlx4_ib_notify_slaves_on_guid_change(dev, bn, port_num, (u8 *)(&((struct ib_smp *)mad)->data)); } break; default: break; } } static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num, int block, u32 change_bitmap) { int i, ix, slave, err; int have_event = 0; for (slave = 0; slave < dev->dev->caps.sqp_demux; slave++) { if (slave == mlx4_master_func_num(dev->dev)) continue; if (!mlx4_is_slave_active(dev->dev, slave)) continue; have_event = 0; for (i = 0; i < 32; i++) { if (!(change_bitmap & (1 << i))) continue; for (ix = 0; ix < dev->dev->caps.pkey_table_len[port_num]; ix++) { if (dev->pkeys.virt2phys_pkey[slave][port_num - 1] [ix] == i + 32 * block) { err = mlx4_gen_pkey_eqe(dev->dev, slave, port_num); pr_debug("propagate_pkey_ev: slave %d," " port %d, ix %d (%d)\n", slave, port_num, ix, err); have_event = 1; break; } } if (have_event) break; } } } static void node_desc_override(struct ib_device *dev, struct ib_mad *mad) { unsigned long flags; if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP && mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) { spin_lock_irqsave(&to_mdev(dev)->sm_lock, flags); memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64); spin_unlock_irqrestore(&to_mdev(dev)->sm_lock, flags); } } static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *mad) { int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED; struct ib_mad_send_buf *send_buf; struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn]; int ret; unsigned long flags; if (agent) { send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC); if (IS_ERR(send_buf)) return; /* * We rely here on the fact that MLX QPs don't use the * address handle after the send is posted (this is * wrong following the IB spec strictly, but we know * it's OK for our devices). */ spin_lock_irqsave(&dev->sm_lock, flags); memcpy(send_buf->mad, mad, sizeof *mad); if ((send_buf->ah = dev->sm_ah[port_num - 1])) ret = ib_post_send_mad(send_buf, NULL); else ret = -EINVAL; spin_unlock_irqrestore(&dev->sm_lock, flags); if (ret) ib_free_send_mad(send_buf); } } static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave, struct ib_sa_mad *sa_mad) { int ret = 0; /* dispatch to different sa handlers */ switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) { case IB_SA_ATTR_MC_MEMBER_REC: ret = mlx4_ib_mcg_demux_handler(ibdev, port, slave, sa_mad); break; default: break; } return ret; } int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid) { struct mlx4_ib_dev *dev = to_mdev(ibdev); int i; for (i = 0; i < dev->dev->caps.sqp_demux; i++) { if (dev->sriov.demux[port - 1].guid_cache[i] == guid) return i; } return -1; } static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave, u8 port, u16 pkey, u16 *ix) { int i, ret; u8 unassigned_pkey_ix, pkey_ix, partial_ix = 0xFF; u16 slot_pkey; if (slave == mlx4_master_func_num(dev->dev)) return ib_find_cached_pkey(&dev->ib_dev, port, pkey, ix); unassigned_pkey_ix = dev->dev->phys_caps.pkey_phys_table_len[port] - 1; for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) { if (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == unassigned_pkey_ix) continue; pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][i]; ret = ib_get_cached_pkey(&dev->ib_dev, port, pkey_ix, &slot_pkey); if (ret) continue; if ((slot_pkey & 0x7FFF) == (pkey & 0x7FFF)) { if (slot_pkey & 0x8000) { *ix = (u16) pkey_ix; return 0; } else { /* take first partial pkey index found */ if (partial_ix == 0xFF) partial_ix = pkey_ix; } } } if (partial_ix < 0xFF) { *ix = (u16) partial_ix; return 0; } return -EINVAL; } int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, struct ib_wc *wc, struct ib_grh *grh, struct ib_mad *mad) { struct ib_sge list; struct ib_send_wr wr, *bad_wr; struct mlx4_ib_demux_pv_ctx *tun_ctx; struct mlx4_ib_demux_pv_qp *tun_qp; struct mlx4_rcv_tunnel_mad *tun_mad; struct ib_ah_attr attr; struct ib_ah *ah; struct ib_qp *src_qp = NULL; unsigned tun_tx_ix = 0; int dqpn; int ret = 0; u16 tun_pkey_ix; u16 cached_pkey; u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; if (dest_qpt > IB_QPT_GSI) return -EINVAL; tun_ctx = dev->sriov.demux[port-1].tun[slave]; /* check if proxy qp created */ if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE) return -EAGAIN; /* QP0 forwarding only for Dom0 */ if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave)) return -EINVAL; if (!dest_qpt) tun_qp = &tun_ctx->qp[0]; else tun_qp = &tun_ctx->qp[1]; /* compute P_Key index to put in tunnel header for slave */ if (dest_qpt) { u16 pkey_ix; ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey); if (ret) return -EINVAL; ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix); if (ret) return -EINVAL; tun_pkey_ix = pkey_ix; } else tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0]; dqpn = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave + port + (dest_qpt * 2) - 1; /* get tunnel tx data buf for slave */ src_qp = tun_qp->qp; /* create ah. Just need an empty one with the port num for the post send. * The driver will set the force loopback bit in post_send */ memset(&attr, 0, sizeof attr); attr.port_num = port; if (is_eth) { memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16); attr.ah_flags = IB_AH_GRH; } ah = ib_create_ah(tun_ctx->pd, &attr); if (IS_ERR(ah)) return -ENOMEM; /* allocate tunnel tx buf after pass failure returns */ spin_lock(&tun_qp->tx_lock); if (tun_qp->tx_ix_head - tun_qp->tx_ix_tail >= (MLX4_NUM_TUNNEL_BUFS - 1)) ret = -EAGAIN; else tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1); spin_unlock(&tun_qp->tx_lock); if (ret) goto out; tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr); if (tun_qp->tx_ring[tun_tx_ix].ah) ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah); tun_qp->tx_ring[tun_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, tun_qp->tx_ring[tun_tx_ix].buf.map, sizeof (struct mlx4_rcv_tunnel_mad), DMA_TO_DEVICE); /* copy over to tunnel buffer */ if (grh) memcpy(&tun_mad->grh, grh, sizeof *grh); memcpy(&tun_mad->mad, mad, sizeof *mad); /* adjust tunnel data */ tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix); tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12); tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid); tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF); tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0; ib_dma_sync_single_for_device(&dev->ib_dev, tun_qp->tx_ring[tun_tx_ix].buf.map, sizeof (struct mlx4_rcv_tunnel_mad), DMA_TO_DEVICE); list.addr = tun_qp->tx_ring[tun_tx_ix].buf.map; list.length = sizeof (struct mlx4_rcv_tunnel_mad); list.lkey = tun_ctx->mr->lkey; wr.wr.ud.ah = ah; wr.wr.ud.port_num = port; wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; wr.wr.ud.remote_qpn = dqpn; wr.next = NULL; wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt); wr.sg_list = &list; wr.num_sge = 1; wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; ret = ib_post_send(src_qp, &wr, &bad_wr); out: if (ret) ib_destroy_ah(ah); return ret; } static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, struct ib_wc *wc, struct ib_grh *grh, struct ib_mad *mad) { struct mlx4_ib_dev *dev = to_mdev(ibdev); int err; int slave; u8 *slave_id; int is_eth = 0; if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) is_eth = 0; else is_eth = 1; if (is_eth) { if (!wc->wc_flags & IB_WC_GRH) { mlx4_ib_warn(ibdev, "RoCE grh not present.\n"); return -EINVAL; } if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) { mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n"); return -EINVAL; } if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) { mlx4_ib_warn(ibdev, "failed matching grh\n"); return -ENOENT; } if (slave >= dev->dev->caps.sqp_demux) { mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n", slave, dev->dev->caps.sqp_demux); return -ENOENT; } if (mlx4_ib_demux_cm_handler(ibdev, port, &slave, mad, is_eth)) return 0; err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad); if (err) pr_debug("failed sending to slave %d via tunnel qp (%d)\n", slave, err); return 0; } /* Initially assume that this mad is for us */ slave = mlx4_master_func_num(dev->dev); /* See if the slave id is encoded in a response mad */ if (mad->mad_hdr.method & 0x80) { slave_id = (u8 *) &mad->mad_hdr.tid; slave = *slave_id; if (slave != 255) /*255 indicates the dom0*/ *slave_id = 0; /* remap tid */ } /* If a grh is present, we demux according to it */ if (wc->wc_flags & IB_WC_GRH) { slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id); if (slave < 0) { mlx4_ib_warn(ibdev, "failed matching grh\n"); return -ENOENT; } } /* Class-specific handling */ switch (mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_ADM: if (mlx4_ib_demux_sa_handler(ibdev, port, slave, (struct ib_sa_mad *) mad)) return 0; break; case IB_MGMT_CLASS_CM: if (mlx4_ib_demux_cm_handler(ibdev, port, &slave, mad, is_eth)) return 0; break; case IB_MGMT_CLASS_DEVICE_MGMT: if (mad->mad_hdr.method != IB_MGMT_METHOD_GET_RESP) return 0; break; default: /* Drop unsupported classes for slaves in tunnel mode */ if (slave != mlx4_master_func_num(dev->dev)) { pr_debug("dropping unsupported ingress mad from class:%d " "for slave:%d\n", mad->mad_hdr.mgmt_class, slave); return 0; } } /*make sure that no slave==255 was not handled yet.*/ if (slave >= dev->dev->caps.sqp_demux) { mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n", slave, dev->dev->caps.sqp_demux); return -ENOENT; } err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad); if (err) pr_debug("failed sending to slave %d via tunnel qp (%d)\n", slave, err); return 0; } static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad) { u16 slid, prev_lid = 0; int err; struct ib_port_attr pattr; if (in_wc && in_wc->qp->qp_num) { pr_debug("received MAD: slid:%d sqpn:%d " "dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n", in_wc->slid, in_wc->src_qp, in_wc->dlid_path_bits, in_wc->qp->qp_num, in_wc->wc_flags, in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method, be16_to_cpu(in_mad->mad_hdr.attr_id)); if (in_wc->wc_flags & IB_WC_GRH) { pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n", (long long)be64_to_cpu(in_grh->sgid.global.subnet_prefix), (long long) be64_to_cpu(in_grh->sgid.global.interface_id)); pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n", (long long)be64_to_cpu(in_grh->dgid.global.subnet_prefix), (long long)be64_to_cpu(in_grh->dgid.global.interface_id)); } } slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) { forward_trap(to_mdev(ibdev), port_num, in_mad); return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; } if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) return IB_MAD_RESULT_SUCCESS; /* * Don't process SMInfo queries -- the SMA can't handle them. */ if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) return IB_MAD_RESULT_SUCCESS; } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 || in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2 || in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) { if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) return IB_MAD_RESULT_SUCCESS; } else return IB_MAD_RESULT_SUCCESS; if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && !ib_query_port(ibdev, port_num, &pattr)) prev_lid = pattr.lid; err = mlx4_MAD_IFC(to_mdev(ibdev), (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) | (mad_flags & IB_MAD_IGNORE_BKEY ? MLX4_MAD_IFC_IGNORE_BKEY : 0) | MLX4_MAD_IFC_NET_VIEW, port_num, in_wc, in_grh, in_mad, out_mad); if (err) return IB_MAD_RESULT_FAILURE; if (!out_mad->mad_hdr.status) { if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)) smp_snoop(ibdev, port_num, in_mad, prev_lid); /* slaves get node desc from FW */ if (!mlx4_is_slave(to_mdev(ibdev)->dev)) node_desc_override(ibdev, out_mad); } /* set return bit in status of directed route responses */ if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) /* no response for trap repress */ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } static void edit_counter_ext(struct mlx4_if_stat_extended *cnt, void *counters, __be16 attr_id) { switch (attr_id) { case IB_PMA_PORT_COUNTERS: { struct ib_pma_portcounters *pma_cnt = (struct ib_pma_portcounters *)counters; pma_cnt->port_xmit_data = cpu_to_be32((be64_to_cpu(cnt->counters[0]. IfTxUnicastOctets) + be64_to_cpu(cnt->counters[0]. IfTxMulticastOctets) + be64_to_cpu(cnt->counters[0]. IfTxBroadcastOctets) + be64_to_cpu(cnt->counters[0]. IfTxDroppedOctets)) >> 2); pma_cnt->port_rcv_data = cpu_to_be32((be64_to_cpu(cnt->counters[0]. IfRxUnicastOctets) + be64_to_cpu(cnt->counters[0]. IfRxMulticastOctets) + be64_to_cpu(cnt->counters[0]. IfRxBroadcastOctets) + be64_to_cpu(cnt->counters[0]. IfRxNoBufferOctets) + be64_to_cpu(cnt->counters[0]. IfRxErrorOctets)) >> 2); pma_cnt->port_xmit_packets = cpu_to_be32(be64_to_cpu(cnt->counters[0]. IfTxUnicastFrames) + be64_to_cpu(cnt->counters[0]. IfTxMulticastFrames) + be64_to_cpu(cnt->counters[0]. IfTxBroadcastFrames) + be64_to_cpu(cnt->counters[0]. IfTxDroppedFrames)); pma_cnt->port_rcv_packets = cpu_to_be32(be64_to_cpu(cnt->counters[0]. IfRxUnicastFrames) + be64_to_cpu(cnt->counters[0]. IfRxMulticastFrames) + be64_to_cpu(cnt->counters[0]. IfRxBroadcastFrames) + be64_to_cpu(cnt->counters[0]. IfRxNoBufferFrames) + be64_to_cpu(cnt->counters[0]. IfRxErrorFrames)); pma_cnt->port_rcv_errors = cpu_to_be32(be64_to_cpu(cnt-> counters[0]. IfRxErrorFrames)); break; } case IB_PMA_PORT_COUNTERS_EXT: { struct ib_pma_portcounters_ext *pma_cnt_ext = (struct ib_pma_portcounters_ext *)counters; pma_cnt_ext->port_xmit_data = cpu_to_be64((be64_to_cpu(cnt->counters[0]. IfTxUnicastOctets) + be64_to_cpu(cnt->counters[0]. IfTxMulticastOctets) + be64_to_cpu(cnt->counters[0]. IfTxBroadcastOctets) + be64_to_cpu(cnt->counters[0]. IfTxDroppedOctets)) >> 2); pma_cnt_ext->port_rcv_data = cpu_to_be64((be64_to_cpu(cnt->counters[0]. IfRxUnicastOctets) + be64_to_cpu(cnt->counters[0]. IfRxMulticastOctets) + be64_to_cpu(cnt->counters[0]. IfRxBroadcastOctets) + be64_to_cpu(cnt->counters[0]. IfRxNoBufferOctets) + be64_to_cpu(cnt->counters[0]. IfRxErrorOctets)) >> 2); pma_cnt_ext->port_xmit_packets = cpu_to_be64(be64_to_cpu(cnt->counters[0]. IfTxUnicastFrames) + be64_to_cpu(cnt->counters[0]. IfTxMulticastFrames) + be64_to_cpu(cnt->counters[0]. IfTxBroadcastFrames) + be64_to_cpu(cnt->counters[0]. IfTxDroppedFrames)); pma_cnt_ext->port_rcv_packets = cpu_to_be64(be64_to_cpu(cnt->counters[0]. IfRxUnicastFrames) + be64_to_cpu(cnt->counters[0]. IfRxMulticastFrames) + be64_to_cpu(cnt->counters[0]. IfRxBroadcastFrames) + be64_to_cpu(cnt->counters[0]. IfRxNoBufferFrames) + be64_to_cpu(cnt->counters[0]. IfRxErrorFrames)); pma_cnt_ext->port_unicast_xmit_packets = cnt->counters[0]. IfTxUnicastFrames; pma_cnt_ext->port_unicast_rcv_packets = cnt->counters[0]. IfRxUnicastFrames; pma_cnt_ext->port_multicast_xmit_packets = cpu_to_be64(be64_to_cpu(cnt->counters[0]. IfTxMulticastFrames) + be64_to_cpu(cnt->counters[0]. IfTxBroadcastFrames)); pma_cnt_ext->port_multicast_rcv_packets = cpu_to_be64(be64_to_cpu(cnt->counters[0]. IfTxMulticastFrames) + be64_to_cpu(cnt->counters[0]. IfTxBroadcastFrames)); break; } default: pr_warn("Unsupported attr_id 0x%x\n", attr_id); break; } } static void edit_counter(struct mlx4_if_stat_basic *cnt, void *counters, __be16 attr_id) { switch (attr_id) { case IB_PMA_PORT_COUNTERS: { struct ib_pma_portcounters *pma_cnt = (struct ib_pma_portcounters *) counters; pma_cnt->port_xmit_data = cpu_to_be32(be64_to_cpu( cnt->counters[0].IfTxOctets) >> 2); pma_cnt->port_rcv_data = cpu_to_be32(be64_to_cpu( cnt->counters[0].IfRxOctets) >> 2); pma_cnt->port_xmit_packets = cpu_to_be32(be64_to_cpu(cnt->counters[0].IfTxFrames)); pma_cnt->port_rcv_packets = cpu_to_be32(be64_to_cpu(cnt->counters[0].IfRxFrames)); break; } case IB_PMA_PORT_COUNTERS_EXT: { struct ib_pma_portcounters_ext *pma_cnt_ext = (struct ib_pma_portcounters_ext *) counters; pma_cnt_ext->port_xmit_data = cpu_to_be64((be64_to_cpu(cnt->counters[0]. IfTxOctets) >> 2)); pma_cnt_ext->port_rcv_data = cpu_to_be64((be64_to_cpu(cnt->counters[0]. IfRxOctets) >> 2)); pma_cnt_ext->port_xmit_packets = cnt->counters[0].IfTxFrames; pma_cnt_ext->port_rcv_packets = cnt->counters[0].IfRxFrames; break; } default: pr_warn("Unsupported attr_id 0x%x\n", attr_id); break; } } int mlx4_ib_query_if_stat(struct mlx4_ib_dev *dev, u32 counter_index, union mlx4_counter *counter, u8 clear) { struct mlx4_cmd_mailbox *mailbox; int err; u32 inmod = counter_index | ((clear & 1) << 31); mailbox = mlx4_alloc_cmd_mailbox(dev->dev); if (IS_ERR(mailbox)) return IB_MAD_RESULT_FAILURE; err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0, MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_WRAPPED); if (!err) memcpy(counter, mailbox->buf, MLX4_IF_STAT_SZ(1)); mlx4_free_cmd_mailbox(dev->dev, mailbox); return err; } static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad) { struct mlx4_ib_dev *dev = to_mdev(ibdev); int err; u32 counter_index = dev->counters[port_num - 1] & 0xffff; u8 mode; char counter_buf[MLX4_IF_STAT_SZ(1)]; union mlx4_counter *counter = (union mlx4_counter *) counter_buf; if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) return -EINVAL; if (mlx4_ib_query_if_stat(dev, counter_index, counter, 0)) { err = IB_MAD_RESULT_FAILURE; } else { memset(out_mad->data, 0, sizeof out_mad->data); mode = counter->control.cnt_mode & 0xFF; err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; switch (mode & 0xf) { case 0: edit_counter((void *)counter, (void *)(out_mad->data + 40), in_mad->mad_hdr.attr_id); break; case 1: edit_counter_ext((void *)counter, (void *)(out_mad->data + 40), in_mad->mad_hdr.attr_id); break; default: err = IB_MAD_RESULT_FAILURE; } } return err; } int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad) { switch (rdma_port_get_link_layer(ibdev, port_num)) { case IB_LINK_LAYER_INFINIBAND: return ib_process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, in_mad, out_mad); case IB_LINK_LAYER_ETHERNET: return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, in_mad, out_mad); default: return -EINVAL; } } static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { if (mad_send_wc->send_buf->context[0]) ib_destroy_ah(mad_send_wc->send_buf->context[0]); ib_free_send_mad(mad_send_wc->send_buf); } int mlx4_ib_mad_init(struct mlx4_ib_dev *dev) { struct ib_mad_agent *agent; int p, q; int ret; enum rdma_link_layer ll; for (p = 0; p < dev->num_ports; ++p) { ll = rdma_port_get_link_layer(&dev->ib_dev, p + 1); for (q = 0; q <= 1; ++q) { if (ll == IB_LINK_LAYER_INFINIBAND) { agent = ib_register_mad_agent(&dev->ib_dev, p + 1, q ? IB_QPT_GSI : IB_QPT_SMI, NULL, 0, send_handler, NULL, NULL); if (IS_ERR(agent)) { ret = PTR_ERR(agent); goto err; } dev->send_agent[p][q] = agent; } else dev->send_agent[p][q] = NULL; } } return 0; err: for (p = 0; p < dev->num_ports; ++p) for (q = 0; q <= 1; ++q) if (dev->send_agent[p][q]) ib_unregister_mad_agent(dev->send_agent[p][q]); return ret; } void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) { struct ib_mad_agent *agent; int p, q; for (p = 0; p < dev->num_ports; ++p) { for (q = 0; q <= 1; ++q) { agent = dev->send_agent[p][q]; if (agent) { dev->send_agent[p][q] = NULL; ib_unregister_mad_agent(agent); } } if (dev->sm_ah[p]) ib_destroy_ah(dev->sm_ah[p]); } } static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num) { mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_LID_CHANGE); if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num, MLX4_EQ_PORT_INFO_LID_CHANGE_MASK); } static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num) { /* re-configure the alias-guid and mcg's */ if (mlx4_is_master(dev->dev)) { mlx4_ib_invalidate_all_guid_record(dev, port_num); if (!dev->sriov.is_going_down) { mlx4_ib_mcg_port_cleanup(&dev->sriov.demux[port_num - 1], 0); mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num, MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK); } } mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER); } static void propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num, struct mlx4_eqe *eqe) { __propagate_pkey_ev(dev, port_num, GET_BLK_PTR_FROM_EQE(eqe), GET_MASK_FROM_EQE(eqe)); } static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u8 port_num, u32 guid_tbl_blk_num, u32 change_bitmap) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; u16 i; if (!mlx4_is_mfunc(dev->dev) || !mlx4_is_master(dev->dev)) return; in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) { mlx4_ib_warn(&dev->ib_dev, "failed to allocate memory for guid info mads\n"); goto out; } guid_tbl_blk_num *= 4; for (i = 0; i < 4; i++) { if (change_bitmap && (!((change_bitmap >> (8 * i)) & 0xff))) continue; memset(in_mad, 0, sizeof *in_mad); memset(out_mad, 0, sizeof *out_mad); in_mad->base_version = 1; in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; in_mad->class_version = 1; in_mad->method = IB_MGMT_METHOD_GET; in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; in_mad->attr_mod = cpu_to_be32(guid_tbl_blk_num + i); if (mlx4_MAD_IFC(dev, MLX4_MAD_IFC_IGNORE_KEYS | MLX4_MAD_IFC_NET_VIEW, port_num, NULL, NULL, in_mad, out_mad)) { mlx4_ib_warn(&dev->ib_dev, "Failed in get GUID INFO MAD_IFC\n"); goto out; } mlx4_ib_update_cache_on_guid_change(dev, guid_tbl_blk_num + i, port_num, (u8 *)(&((struct ib_smp *)out_mad)->data)); mlx4_ib_notify_slaves_on_guid_change(dev, guid_tbl_blk_num + i, port_num, (u8 *)(&((struct ib_smp *)out_mad)->data)); } out: kfree(in_mad); kfree(out_mad); return; } void handle_port_mgmt_change_event(struct work_struct *work) { struct ib_event_work *ew = container_of(work, struct ib_event_work, work); struct mlx4_ib_dev *dev = ew->ib_dev; struct mlx4_eqe *eqe = &(ew->ib_eqe); u8 port = eqe->event.port_mgmt_change.port; u32 changed_attr; u32 tbl_block; u32 change_bitmap; switch (eqe->subtype) { case MLX4_DEV_PMC_SUBTYPE_PORT_INFO: changed_attr = be32_to_cpu(eqe->event.port_mgmt_change.params.port_info.changed_attr); /* Update the SM ah - This should be done before handling the other changed attributes so that MADs can be sent to the SM */ if (changed_attr & MSTR_SM_CHANGE_MASK) { u16 lid = be16_to_cpu(eqe->event.port_mgmt_change.params.port_info.mstr_sm_lid); u8 sl = eqe->event.port_mgmt_change.params.port_info.mstr_sm_sl & 0xf; update_sm_ah(dev, port, lid, sl); } /* Check if it is a lid change event */ if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK) handle_lid_change_event(dev, port); /* Generate GUID changed event */ if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) { mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); /*if master, notify all slaves*/ if (mlx4_is_master(dev->dev)) mlx4_gen_slaves_port_mgt_ev(dev->dev, port, MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK); } if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK) handle_client_rereg_event(dev, port); break; case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE: mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE); if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) propagate_pkey_ev(dev, port, eqe); break; case MLX4_DEV_PMC_SUBTYPE_GUID_INFO: /* paravirtualized master's guid is guid 0 -- does not change */ if (!mlx4_is_master(dev->dev)) mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); /*if master, notify relevant slaves*/ else if (!dev->sriov.is_going_down) { tbl_block = GET_BLK_PTR_FROM_EQE(eqe); change_bitmap = GET_MASK_FROM_EQE(eqe); handle_slaves_guid_change(dev, port, tbl_block, change_bitmap); } break; default: pr_warn("Unsupported subtype 0x%x for " "Port Management Change event\n", eqe->subtype); } kfree(ew); } void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, enum ib_event_type type) { struct ib_event event; event.device = &dev->ib_dev; event.element.port_num = port_num; event.event = type; ib_dispatch_event(&event); } static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg) { unsigned long flags; struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context; struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); spin_lock_irqsave(&dev->sriov.going_down_lock, flags); if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE) queue_work(ctx->wq, &ctx->work); spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); } static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx, struct mlx4_ib_demux_pv_qp *tun_qp, int index) { struct ib_sge sg_list; struct ib_recv_wr recv_wr, *bad_recv_wr; int size; size = (tun_qp->qp->qp_type == IB_QPT_UD) ? sizeof (struct mlx4_tunnel_mad) : sizeof (struct mlx4_mad_rcv_buf); sg_list.addr = tun_qp->ring[index].map; sg_list.length = size; sg_list.lkey = ctx->mr->lkey; recv_wr.next = NULL; recv_wr.sg_list = &sg_list; recv_wr.num_sge = 1; recv_wr.wr_id = (u64) index | MLX4_TUN_WRID_RECV | MLX4_TUN_SET_WRID_QPN(tun_qp->proxy_qpt); ib_dma_sync_single_for_device(ctx->ib_dev, tun_qp->ring[index].map, size, DMA_FROM_DEVICE); return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr); } static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port, int slave, struct ib_sa_mad *sa_mad) { int ret = 0; /* dispatch to different sa handlers */ switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) { case IB_SA_ATTR_MC_MEMBER_REC: ret = mlx4_ib_mcg_multiplex_handler(ibdev, port, slave, sa_mad); break; default: break; } return ret; } static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) { int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave; return (qpn >= proxy_start && qpn <= proxy_start + 1); } int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad) { struct ib_sge list; struct ib_send_wr wr, *bad_wr; struct mlx4_ib_demux_pv_ctx *sqp_ctx; struct mlx4_ib_demux_pv_qp *sqp; struct mlx4_mad_snd_buf *sqp_mad; struct ib_ah *ah; struct ib_qp *send_qp = NULL; unsigned wire_tx_ix = 0; int ret = 0; u16 wire_pkey_ix; int src_qpnum; u8 sgid_index; sqp_ctx = dev->sriov.sqps[port-1]; /* check if proxy qp created */ if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE) return -EAGAIN; /* QP0 forwarding only for Dom0 */ if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave)) return -EINVAL; if (dest_qpt == IB_QPT_SMI) { src_qpnum = 0; sqp = &sqp_ctx->qp[0]; wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0]; } else { src_qpnum = 1; sqp = &sqp_ctx->qp[1]; wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][pkey_index]; } send_qp = sqp->qp; /* create ah */ sgid_index = attr->grh.sgid_index; attr->grh.sgid_index = 0; ah = ib_create_ah(sqp_ctx->pd, attr); if (IS_ERR(ah)) return -ENOMEM; attr->grh.sgid_index = sgid_index; to_mah(ah)->av.ib.gid_index = sgid_index; /* get rid of force-loopback bit */ to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF); spin_lock(&sqp->tx_lock); if (sqp->tx_ix_head - sqp->tx_ix_tail >= (MLX4_NUM_TUNNEL_BUFS - 1)) ret = -EAGAIN; else wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1); spin_unlock(&sqp->tx_lock); if (ret) goto out; sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr); if (sqp->tx_ring[wire_tx_ix].ah) ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah); sqp->tx_ring[wire_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, sqp->tx_ring[wire_tx_ix].buf.map, sizeof (struct mlx4_mad_snd_buf), DMA_TO_DEVICE); memcpy(&sqp_mad->payload, mad, sizeof *mad); ib_dma_sync_single_for_device(&dev->ib_dev, sqp->tx_ring[wire_tx_ix].buf.map, sizeof (struct mlx4_mad_snd_buf), DMA_TO_DEVICE); list.addr = sqp->tx_ring[wire_tx_ix].buf.map; list.length = sizeof (struct mlx4_mad_snd_buf); list.lkey = sqp_ctx->mr->lkey; wr.wr.ud.ah = ah; wr.wr.ud.port_num = port; wr.wr.ud.pkey_index = wire_pkey_ix; wr.wr.ud.remote_qkey = qkey; wr.wr.ud.remote_qpn = remote_qpn; wr.next = NULL; wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum); wr.sg_list = &list; wr.num_sge = 1; wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; ret = ib_post_send(send_qp, &wr, &bad_wr); out: if (ret) ib_destroy_ah(ah); return ret; } static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port) { int gids; int vfs; if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) return slave; gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; vfs = dev->dev->num_vfs; if (slave == 0) return 0; if (slave <= gids % vfs) return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1); return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1)); } static int get_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port, struct ib_ah_attr *ah_attr) { if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) { ah_attr->grh.sgid_index = slave; return 0; } ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port); return 0; } static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc) { struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); struct mlx4_ib_demux_pv_qp *tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc->wr_id)]; int wr_ix = wc->wr_id & (MLX4_NUM_TUNNEL_BUFS - 1); struct mlx4_tunnel_mad *tunnel = tun_qp->ring[wr_ix].addr; struct mlx4_ib_ah ah; struct ib_ah_attr ah_attr; u8 *slave_id; int slave; /* Get slave that sent this packet */ if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn || wc->src_qp >= dev->dev->phys_caps.base_proxy_sqpn + 8 * MLX4_MFUNC_MAX || (wc->src_qp & 0x1) != ctx->port - 1 || wc->src_qp & 0x4) { mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d\n", wc->src_qp); return; } slave = ((wc->src_qp & ~0x7) - dev->dev->phys_caps.base_proxy_sqpn) / 8; if (slave != ctx->slave) { mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: " "belongs to another slave\n", wc->src_qp); return; } if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) { mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: " "non-master trying to send QP0 packets\n", wc->src_qp); return; } /* Map transaction ID */ ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map, sizeof (struct mlx4_tunnel_mad), DMA_FROM_DEVICE); switch (tunnel->mad.mad_hdr.method) { case IB_MGMT_METHOD_SET: case IB_MGMT_METHOD_GET: case IB_MGMT_METHOD_REPORT: case IB_SA_METHOD_GET_TABLE: case IB_SA_METHOD_DELETE: case IB_SA_METHOD_GET_MULTI: case IB_SA_METHOD_GET_TRACE_TBL: slave_id = (u8 *) &tunnel->mad.mad_hdr.tid; if (*slave_id) { mlx4_ib_warn(ctx->ib_dev, "egress mad has non-null tid msb:%d " "class:%d slave:%d\n", *slave_id, tunnel->mad.mad_hdr.mgmt_class, slave); return; } else *slave_id = slave; default: /* nothing */; } /* Class-specific handling */ switch (tunnel->mad.mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_ADM: if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave, (struct ib_sa_mad *) &tunnel->mad)) return; break; case IB_MGMT_CLASS_CM: if (mlx4_ib_multiplex_cm_handler(ctx->ib_dev, ctx->port, slave, (struct ib_mad *) &tunnel->mad)) return; break; case IB_MGMT_CLASS_DEVICE_MGMT: if (tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_GET && tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_SET) return; break; default: /* Drop unsupported classes for slaves in tunnel mode */ if (slave != mlx4_master_func_num(dev->dev)) { mlx4_ib_warn(ctx->ib_dev, "dropping unsupported egress mad from class:%d " "for slave:%d\n", tunnel->mad.mad_hdr.mgmt_class, slave); return; } } /* We are using standard ib_core services to send the mad, so generate a * stadard address handle by decoding the tunnelled mlx4_ah fields */ memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av)); ah.ibah.device = ctx->ib_dev; mlx4_ib_query_ah(&ah.ibah, &ah_attr); if (ah_attr.ah_flags & IB_AH_GRH) if (get_real_sgid_index(dev, slave, ctx->port, &ah_attr)) return; mlx4_ib_send_to_wire(dev, slave, ctx->port, is_proxy_qp0(dev, wc->src_qp, slave) ? IB_QPT_SMI : IB_QPT_GSI, be16_to_cpu(tunnel->hdr.pkey_index), be32_to_cpu(tunnel->hdr.remote_qpn), be32_to_cpu(tunnel->hdr.qkey), &ah_attr, &tunnel->mad); } static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, enum ib_qp_type qp_type, int is_tun) { int i; struct mlx4_ib_demux_pv_qp *tun_qp; int rx_buf_size, tx_buf_size; if (qp_type > IB_QPT_GSI) return -EINVAL; tun_qp = &ctx->qp[qp_type]; tun_qp->ring = kzalloc(sizeof (struct mlx4_ib_buf) * MLX4_NUM_TUNNEL_BUFS, GFP_KERNEL); if (!tun_qp->ring) return -ENOMEM; tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS, sizeof (struct mlx4_ib_tun_tx_buf), GFP_KERNEL); if (!tun_qp->tx_ring) { kfree(tun_qp->ring); tun_qp->ring = NULL; return -ENOMEM; } if (is_tun) { rx_buf_size = sizeof (struct mlx4_tunnel_mad); tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad); } else { rx_buf_size = sizeof (struct mlx4_mad_rcv_buf); tx_buf_size = sizeof (struct mlx4_mad_snd_buf); } for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL); if (!tun_qp->ring[i].addr) goto err; tun_qp->ring[i].map = ib_dma_map_single(ctx->ib_dev, tun_qp->ring[i].addr, rx_buf_size, DMA_FROM_DEVICE); } for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { tun_qp->tx_ring[i].buf.addr = kmalloc(tx_buf_size, GFP_KERNEL); if (!tun_qp->tx_ring[i].buf.addr) goto tx_err; tun_qp->tx_ring[i].buf.map = ib_dma_map_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.addr, tx_buf_size, DMA_TO_DEVICE); tun_qp->tx_ring[i].ah = NULL; } spin_lock_init(&tun_qp->tx_lock); tun_qp->tx_ix_head = 0; tun_qp->tx_ix_tail = 0; tun_qp->proxy_qpt = qp_type; return 0; tx_err: while (i > 0) { --i; ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map, tx_buf_size, DMA_TO_DEVICE); kfree(tun_qp->tx_ring[i].buf.addr); } kfree(tun_qp->tx_ring); tun_qp->tx_ring = NULL; i = MLX4_NUM_TUNNEL_BUFS; err: while (i > 0) { --i; ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map, rx_buf_size, DMA_FROM_DEVICE); kfree(tun_qp->ring[i].addr); } kfree(tun_qp->ring); tun_qp->ring = NULL; return -ENOMEM; } static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx, enum ib_qp_type qp_type, int is_tun) { int i; struct mlx4_ib_demux_pv_qp *tun_qp; int rx_buf_size, tx_buf_size; if (qp_type > IB_QPT_GSI) return; tun_qp = &ctx->qp[qp_type]; if (is_tun) { rx_buf_size = sizeof (struct mlx4_tunnel_mad); tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad); } else { rx_buf_size = sizeof (struct mlx4_mad_rcv_buf); tx_buf_size = sizeof (struct mlx4_mad_snd_buf); } for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map, rx_buf_size, DMA_FROM_DEVICE); kfree(tun_qp->ring[i].addr); } for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map, tx_buf_size, DMA_TO_DEVICE); kfree(tun_qp->tx_ring[i].buf.addr); if (tun_qp->tx_ring[i].ah) ib_destroy_ah(tun_qp->tx_ring[i].ah); } kfree(tun_qp->tx_ring); kfree(tun_qp->ring); } static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) { struct mlx4_ib_demux_pv_ctx *ctx; struct mlx4_ib_demux_pv_qp *tun_qp; struct ib_wc wc; int ret; ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work); ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP); while (ib_poll_cq(ctx->cq, 1, &wc) == 1) { tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)]; if (wc.status == IB_WC_SUCCESS) { switch (wc.opcode) { case IB_WC_RECV: mlx4_ib_multiplex_mad(ctx, &wc); ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)); if (ret) pr_err("Failed reposting tunnel " - "buf:%lld\n", wc.wr_id); + "buf:%lld\n", (long long)wc.wr_id); break; case IB_WC_SEND: pr_debug("received tunnel send completion:" "wrid=0x%llx, status=0x%x\n", - wc.wr_id, wc.status); + (long long)wc.wr_id, wc.status); ib_destroy_ah(tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah); tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&tun_qp->tx_lock); tun_qp->tx_ix_tail++; spin_unlock(&tun_qp->tx_lock); break; default: break; } } else { pr_debug("mlx4_ib: completion error in tunnel: %d." " status = %d, wrid = 0x%llx\n", - ctx->slave, wc.status, wc.wr_id); + ctx->slave, wc.status, (long long)wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { ib_destroy_ah(tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah); tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&tun_qp->tx_lock); tun_qp->tx_ix_tail++; spin_unlock(&tun_qp->tx_lock); } } } } static void pv_qp_event_handler(struct ib_event *event, void *qp_context) { struct mlx4_ib_demux_pv_ctx *sqp = qp_context; /* It's worse than that! He's dead, Jim! */ pr_err("Fatal error (%d) on a MAD QP on port %d\n", event->event, sqp->port); } static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx, enum ib_qp_type qp_type, int create_tun) { int i, ret; struct mlx4_ib_demux_pv_qp *tun_qp; struct mlx4_ib_qp_tunnel_init_attr qp_init_attr; struct ib_qp_attr attr; int qp_attr_mask_INIT; if (qp_type > IB_QPT_GSI) return -EINVAL; tun_qp = &ctx->qp[qp_type]; memset(&qp_init_attr, 0, sizeof qp_init_attr); qp_init_attr.init_attr.send_cq = ctx->cq; qp_init_attr.init_attr.recv_cq = ctx->cq; qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS; qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS; qp_init_attr.init_attr.cap.max_send_sge = 1; qp_init_attr.init_attr.cap.max_recv_sge = 1; if (create_tun) { qp_init_attr.init_attr.qp_type = IB_QPT_UD; qp_init_attr.init_attr.create_flags = (enum ib_qp_create_flags)MLX4_IB_SRIOV_TUNNEL_QP; qp_init_attr.port = ctx->port; qp_init_attr.slave = ctx->slave; qp_init_attr.proxy_qp_type = qp_type; qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT; } else { qp_init_attr.init_attr.qp_type = qp_type; qp_init_attr.init_attr.create_flags = (enum ib_qp_create_flags)MLX4_IB_SRIOV_SQP; qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY; } qp_init_attr.init_attr.port_num = ctx->port; qp_init_attr.init_attr.qp_context = ctx; qp_init_attr.init_attr.event_handler = pv_qp_event_handler; tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr); if (IS_ERR(tun_qp->qp)) { ret = PTR_ERR(tun_qp->qp); tun_qp->qp = NULL; pr_err("Couldn't create %s QP (%d)\n", create_tun ? "tunnel" : "special", ret); return ret; } memset(&attr, 0, sizeof attr); attr.qp_state = IB_QPS_INIT; attr.pkey_index = to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0]; attr.qkey = IB_QP1_QKEY; attr.port_num = ctx->port; ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT); if (ret) { pr_err("Couldn't change %s qp state to INIT (%d)\n", create_tun ? "tunnel" : "special", ret); goto err_qp; } attr.qp_state = IB_QPS_RTR; ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE); if (ret) { pr_err("Couldn't change %s qp state to RTR (%d)\n", create_tun ? "tunnel" : "special", ret); goto err_qp; } attr.qp_state = IB_QPS_RTS; attr.sq_psn = 0; ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN); if (ret) { pr_err("Couldn't change %s qp state to RTS (%d)\n", create_tun ? "tunnel" : "special", ret); goto err_qp; } for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i); if (ret) { pr_err(" mlx4_ib_post_pv_buf error" " (err = %d, i = %d)\n", ret, i); goto err_qp; } } return 0; err_qp: ib_destroy_qp(tun_qp->qp); tun_qp->qp = NULL; return ret; } /* * IB MAD completion callback for real SQPs */ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) { struct mlx4_ib_demux_pv_ctx *ctx; struct mlx4_ib_demux_pv_qp *sqp; struct ib_wc wc; struct ib_grh *grh; struct ib_mad *mad; ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work); ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP); while (mlx4_ib_poll_cq(ctx->cq, 1, &wc) == 1) { sqp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)]; if (wc.status == IB_WC_SUCCESS) { switch (wc.opcode) { case IB_WC_SEND: ib_destroy_ah(sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); sqp->tx_ix_tail++; spin_unlock(&sqp->tx_lock); break; case IB_WC_RECV: mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *) (sqp->ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload); grh = &(((struct mlx4_mad_rcv_buf *) (sqp->ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh); mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad); if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1))) pr_err("Failed reposting SQP " - "buf:%lld\n", wc.wr_id); + "buf:%lld\n", (long long)wc.wr_id); break; default: BUG_ON(1); break; } } else { pr_debug("mlx4_ib: completion error in tunnel: %d." " status = %d, wrid = 0x%llx\n", - ctx->slave, wc.status, wc.wr_id); + ctx->slave, wc.status, (long long)wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { ib_destroy_ah(sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); sqp->tx_ix_tail++; spin_unlock(&sqp->tx_lock); } } } } static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port, struct mlx4_ib_demux_pv_ctx **ret_ctx) { struct mlx4_ib_demux_pv_ctx *ctx; *ret_ctx = NULL; ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL); if (!ctx) { pr_err("failed allocating pv resource context " "for port %d, slave %d\n", port, slave); return -ENOMEM; } ctx->ib_dev = &dev->ib_dev; ctx->port = port; ctx->slave = slave; *ret_ctx = ctx; return 0; } static void free_pv_object(struct mlx4_ib_dev *dev, int slave, int port) { if (dev->sriov.demux[port - 1].tun[slave]) { kfree(dev->sriov.demux[port - 1].tun[slave]); dev->sriov.demux[port - 1].tun[slave] = NULL; } } static int create_pv_resources(struct ib_device *ibdev, int slave, int port, int create_tun, struct mlx4_ib_demux_pv_ctx *ctx) { int ret, cq_size; if (ctx->state != DEMUX_PV_STATE_DOWN) return -EEXIST; ctx->state = DEMUX_PV_STATE_STARTING; /* have QP0 only on port owner, and only if link layer is IB */ if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) && rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND) ctx->has_smi = 1; if (ctx->has_smi) { ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_SMI, create_tun); if (ret) { pr_err("Failed allocating qp0 tunnel bufs (%d)\n", ret); goto err_out; } } ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_GSI, create_tun); if (ret) { pr_err("Failed allocating qp1 tunnel bufs (%d)\n", ret); goto err_out_qp0; } cq_size = 2 * MLX4_NUM_TUNNEL_BUFS; if (ctx->has_smi) cq_size *= 2; ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler, NULL, ctx, cq_size, 0); if (IS_ERR(ctx->cq)) { ret = PTR_ERR(ctx->cq); pr_err("Couldn't create tunnel CQ (%d)\n", ret); goto err_buf; } ctx->pd = ib_alloc_pd(ctx->ib_dev); if (IS_ERR(ctx->pd)) { ret = PTR_ERR(ctx->pd); pr_err("Couldn't create tunnel PD (%d)\n", ret); goto err_cq; } ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(ctx->mr)) { ret = PTR_ERR(ctx->mr); pr_err("Couldn't get tunnel DMA MR (%d)\n", ret); goto err_pd; } if (ctx->has_smi) { ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun); if (ret) { pr_err("Couldn't create %s QP0 (%d)\n", create_tun ? "tunnel for" : "", ret); goto err_mr; } } ret = create_pv_sqp(ctx, IB_QPT_GSI, create_tun); if (ret) { pr_err("Couldn't create %s QP1 (%d)\n", create_tun ? "tunnel for" : "", ret); goto err_qp0; } if (create_tun) INIT_WORK(&ctx->work, mlx4_ib_tunnel_comp_worker); else INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker); ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq; ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP); if (ret) { pr_err("Couldn't arm tunnel cq (%d)\n", ret); goto err_wq; } ctx->state = DEMUX_PV_STATE_ACTIVE; return 0; err_wq: ctx->wq = NULL; ib_destroy_qp(ctx->qp[1].qp); ctx->qp[1].qp = NULL; err_qp0: if (ctx->has_smi) ib_destroy_qp(ctx->qp[0].qp); ctx->qp[0].qp = NULL; err_mr: ib_dereg_mr(ctx->mr); ctx->mr = NULL; err_pd: ib_dealloc_pd(ctx->pd); ctx->pd = NULL; err_cq: ib_destroy_cq(ctx->cq); ctx->cq = NULL; err_buf: mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, create_tun); err_out_qp0: if (ctx->has_smi) mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, create_tun); err_out: ctx->state = DEMUX_PV_STATE_DOWN; return ret; } static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port, struct mlx4_ib_demux_pv_ctx *ctx, int flush) { if (!ctx) return; if (ctx->state > DEMUX_PV_STATE_DOWN) { ctx->state = DEMUX_PV_STATE_DOWNING; if (flush) flush_workqueue(ctx->wq); if (ctx->has_smi) { ib_destroy_qp(ctx->qp[0].qp); ctx->qp[0].qp = NULL; mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, 1); } ib_destroy_qp(ctx->qp[1].qp); ctx->qp[1].qp = NULL; mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1); ib_dereg_mr(ctx->mr); ctx->mr = NULL; ib_dealloc_pd(ctx->pd); ctx->pd = NULL; ib_destroy_cq(ctx->cq); ctx->cq = NULL; ctx->state = DEMUX_PV_STATE_DOWN; } } static int mlx4_ib_tunnels_update(struct mlx4_ib_dev *dev, int slave, int port, int do_init) { int ret = 0; if (!do_init) { clean_vf_mcast(&dev->sriov.demux[port - 1], slave); /* for master, destroy real sqp resources */ if (slave == mlx4_master_func_num(dev->dev)) destroy_pv_resources(dev, slave, port, dev->sriov.sqps[port - 1], 1); /* destroy the tunnel qp resources */ destroy_pv_resources(dev, slave, port, dev->sriov.demux[port - 1].tun[slave], 1); return 0; } /* create the tunnel qp resources */ ret = create_pv_resources(&dev->ib_dev, slave, port, 1, dev->sriov.demux[port - 1].tun[slave]); /* for master, create the real sqp resources */ if (!ret && slave == mlx4_master_func_num(dev->dev)) ret = create_pv_resources(&dev->ib_dev, slave, port, 0, dev->sriov.sqps[port - 1]); return ret; } void mlx4_ib_tunnels_update_work(struct work_struct *work) { struct mlx4_ib_demux_work *dmxw; dmxw = container_of(work, struct mlx4_ib_demux_work, work); mlx4_ib_tunnels_update(dmxw->dev, dmxw->slave, (int) dmxw->port, dmxw->do_init); kfree(dmxw); return; } static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, struct mlx4_ib_demux_ctx *ctx, int port) { char name[12]; int ret = 0; int i; ctx->tun = kcalloc(dev->dev->caps.sqp_demux, sizeof (struct mlx4_ib_demux_pv_ctx *), GFP_KERNEL); if (!ctx->tun) return -ENOMEM; ctx->dev = dev; ctx->port = port; ctx->ib_dev = &dev->ib_dev; for (i = 0; i < dev->dev->caps.sqp_demux; i++) { ret = alloc_pv_object(dev, i, port, &ctx->tun[i]); if (ret) { ret = -ENOMEM; goto err_mcg; } } ret = mlx4_ib_mcg_port_init(ctx); if (ret) { pr_err("Failed initializing mcg para-virt (%d)\n", ret); goto err_mcg; } snprintf(name, sizeof name, "mlx4_ibt%d", port); ctx->wq = create_singlethread_workqueue(name); if (!ctx->wq) { pr_err("Failed to create tunnelling WQ for port %d\n", port); ret = -ENOMEM; goto err_wq; } snprintf(name, sizeof name, "mlx4_ibud%d", port); ctx->ud_wq = create_singlethread_workqueue(name); if (!ctx->ud_wq) { pr_err("Failed to create up/down WQ for port %d\n", port); ret = -ENOMEM; goto err_udwq; } return 0; err_udwq: destroy_workqueue(ctx->wq); ctx->wq = NULL; err_wq: mlx4_ib_mcg_port_cleanup(ctx, 1); err_mcg: for (i = 0; i < dev->dev->caps.sqp_demux; i++) free_pv_object(dev, i, port); kfree(ctx->tun); ctx->tun = NULL; return ret; } static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx) { if (sqp_ctx->state > DEMUX_PV_STATE_DOWN) { sqp_ctx->state = DEMUX_PV_STATE_DOWNING; flush_workqueue(sqp_ctx->wq); if (sqp_ctx->has_smi) { ib_destroy_qp(sqp_ctx->qp[0].qp); sqp_ctx->qp[0].qp = NULL; mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_SMI, 0); } ib_destroy_qp(sqp_ctx->qp[1].qp); sqp_ctx->qp[1].qp = NULL; mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0); ib_dereg_mr(sqp_ctx->mr); sqp_ctx->mr = NULL; ib_dealloc_pd(sqp_ctx->pd); sqp_ctx->pd = NULL; ib_destroy_cq(sqp_ctx->cq); sqp_ctx->cq = NULL; sqp_ctx->state = DEMUX_PV_STATE_DOWN; } } static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx) { int i; if (ctx) { struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); mlx4_ib_mcg_port_cleanup(ctx, 1); for (i = 0; i < dev->dev->caps.sqp_demux; i++) { if (!ctx->tun[i]) continue; if (ctx->tun[i]->state > DEMUX_PV_STATE_DOWN) ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING; } flush_workqueue(ctx->wq); for (i = 0; i < dev->dev->caps.sqp_demux; i++) { destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0); free_pv_object(dev, i, ctx->port); } kfree(ctx->tun); destroy_workqueue(ctx->ud_wq); destroy_workqueue(ctx->wq); } } static void mlx4_ib_master_tunnels(struct mlx4_ib_dev *dev, int do_init) { int i; if (!mlx4_is_master(dev->dev)) return; /* initialize or tear down tunnel QPs for the master */ for (i = 0; i < dev->dev->caps.num_ports; i++) mlx4_ib_tunnels_update(dev, mlx4_master_func_num(dev->dev), i + 1, do_init); return; } int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) { int i = 0; int err; if (!mlx4_is_mfunc(dev->dev)) return 0; dev->sriov.is_going_down = 0; spin_lock_init(&dev->sriov.going_down_lock); mlx4_ib_cm_paravirt_init(dev); mlx4_ib_warn(&dev->ib_dev, "multi-function enabled\n"); if (mlx4_is_slave(dev->dev)) { mlx4_ib_warn(&dev->ib_dev, "operating in qp1 tunnel mode\n"); return 0; } for (i = 0; i < dev->dev->caps.sqp_demux; i++) { if (i == mlx4_master_func_num(dev->dev)) mlx4_put_slave_node_guid(dev->dev, i, dev->ib_dev.node_guid); else mlx4_put_slave_node_guid(dev->dev, i, mlx4_ib_gen_node_guid()); } err = mlx4_ib_init_alias_guid_service(dev); if (err) { mlx4_ib_warn(&dev->ib_dev, "Failed init alias guid process.\n"); goto paravirt_err; } err = mlx4_ib_device_register_sysfs(dev); if (err) { mlx4_ib_warn(&dev->ib_dev, "Failed to register sysfs\n"); goto sysfs_err; } mlx4_ib_warn(&dev->ib_dev, "initializing demux service for %d qp1 clients\n", dev->dev->caps.sqp_demux); for (i = 0; i < dev->num_ports; i++) { union ib_gid gid; err = __mlx4_ib_query_gid(&dev->ib_dev, i + 1, 0, &gid, 1); if (err) goto demux_err; dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id; err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1, &dev->sriov.sqps[i]); if (err) goto demux_err; err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1); if (err) goto demux_err; } mlx4_ib_master_tunnels(dev, 1); return 0; demux_err: while (i > 0) { free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1); mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]); --i; } mlx4_ib_device_unregister_sysfs(dev); sysfs_err: mlx4_ib_destroy_alias_guid_service(dev); paravirt_err: mlx4_ib_cm_paravirt_clean(dev, -1); return err; } void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev) { int i; unsigned long flags; if (!mlx4_is_mfunc(dev->dev)) return; spin_lock_irqsave(&dev->sriov.going_down_lock, flags); dev->sriov.is_going_down = 1; spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); if (mlx4_is_master(dev->dev)) { for (i = 0; i < dev->num_ports; i++) { flush_workqueue(dev->sriov.demux[i].ud_wq); mlx4_ib_free_sqp_ctx(dev->sriov.sqps[i]); kfree(dev->sriov.sqps[i]); dev->sriov.sqps[i] = NULL; mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]); } mlx4_ib_cm_paravirt_clean(dev, -1); mlx4_ib_destroy_alias_guid_service(dev); mlx4_ib_device_unregister_sysfs(dev); } } Index: stable/10/sys/ofed/drivers/infiniband/hw/mlx4/main.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/hw/mlx4/main.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/hw/mlx4/main.c (revision 271127) @@ -1,2419 +1,2418 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #ifdef __linux__ #include #endif -#include #include #include #include #include -#include #include #include #include +#include #include #include #include #include #include #include #include "mlx4_ib.h" #include "user.h" #include "wc.h" #define DRV_NAME MLX4_IB_DRV_NAME #define DRV_VERSION "1.0" #define DRV_RELDATE "April 4, 2008" #define MLX4_IB_DRIVER_PROC_DIR_NAME "driver/mlx4_ib" #define MLX4_IB_MRS_PROC_DIR_NAME "mrs" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); int mlx4_ib_sm_guid_assign = 1; #ifdef __linux__ struct proc_dir_entry *mlx4_mrs_dir_entry; static struct proc_dir_entry *mlx4_ib_driver_dir_entry; #endif module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444); MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)"); static char dev_assign_str[512]; //module_param_string(dev_assign_str, dev_assign_str, sizeof(dev_assign_str), 0644); MODULE_PARM_DESC(dev_assign_str, "Map all device function numbers to " "IB device numbers following the pattern: " "bb:dd.f-0,bb:dd.f-1,... (all numbers are hexadecimals)." " Max supported devices - 32"); static const char mlx4_ib_version[] = DRV_NAME ": Mellanox ConnectX InfiniBand driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; struct update_gid_work { struct work_struct work; union ib_gid gids[128]; struct mlx4_ib_dev *dev; int port; }; struct dev_rec { int bus; int dev; int func; int nr; }; #define MAX_DR 32 static struct dev_rec dr[MAX_DR]; static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init); static struct workqueue_struct *wq; static void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; mad->class_version = 1; mad->method = IB_MGMT_METHOD_GET; } static union ib_gid zgid; static int mlx4_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { struct mlx4_ib_dev *dev = to_mdev(ibdev); struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS, 1, NULL, NULL, in_mad, out_mad); if (err) goto out; memset(props, 0, sizeof *props); props->fw_ver = dev->dev->caps.fw_ver; props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK | IB_DEVICE_SHARED_MR; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR) props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM) props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT) props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH) props->device_cap_flags |= IB_DEVICE_UD_TSO; if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) && (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) && (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) props->device_cap_flags |= IB_DEVICE_XRC; props->device_cap_flags |= IB_DEVICE_QPG; if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) { props->device_cap_flags |= IB_DEVICE_UD_RSS; props->max_rss_tbl_sz = dev->dev->caps.max_rss_tbl_sz; } props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; props->vendor_part_id = dev->dev->pdev->device; props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&props->sys_image_guid, out_mad->data + 4, 8); props->max_mr_size = ~0ull; props->page_size_cap = dev->dev->caps.page_size_cap; props->max_qp = dev->dev->quotas.qp; props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; props->max_sge = min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg); props->max_cq = dev->dev->quotas.cq; props->max_cqe = dev->dev->caps.max_cqes; props->max_mr = dev->dev->quotas.mpt; props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds; props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma; props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma; props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq = dev->dev->quotas.srq; props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; props->max_srq_sge = dev->dev->caps.max_srq_sge; props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES; props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; props->masked_atomic_cap = props->atomic_cap; props->max_pkeys = dev->dev->caps.pkey_table_len[1]; props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = dev->dev->caps.max_fmr_maps; out: kfree(in_mad); kfree(out_mad); return err; } static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num) { struct mlx4_dev *dev = to_mdev(device)->dev; return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ? IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; } static int ib_link_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props, int netw_view) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int ext_active_speed; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view) mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); props->lmc = out_mad->data[34] & 0x7; props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); props->sm_sl = out_mad->data[36] & 0xf; props->state = out_mad->data[32] & 0xf; props->phys_state = out_mad->data[33] >> 4; props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); if (netw_view) props->gid_tbl_len = out_mad->data[50]; else props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port]; props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; props->active_speed = out_mad->data[35] >> 4; props->max_mtu = out_mad->data[41] & 0xf; props->active_mtu = out_mad->data[36] >> 4; props->subnet_timeout = out_mad->data[51] & 0x1f; props->max_vl_num = out_mad->data[37] >> 4; props->init_type_reply = out_mad->data[41] >> 4; /* Check if extended speeds (EDR/FDR/...) are supported */ if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) { ext_active_speed = out_mad->data[62] >> 4; switch (ext_active_speed) { case 1: props->active_speed = IB_SPEED_FDR; break; case 2: props->active_speed = IB_SPEED_EDR; break; } } /* If reported active speed is QDR, check if is FDR-10 */ if (props->active_speed == IB_SPEED_QDR) { init_query_mad(in_mad); in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; /* Checking LinkSpeedActive for FDR-10 */ if (out_mad->data[15] & 0x1) props->active_speed = IB_SPEED_FDR10; } /* Avoid wrong speed value returned by FW if the IB link is down. */ if (props->state == IB_PORT_DOWN) props->active_speed = IB_SPEED_SDR; out: kfree(in_mad); kfree(out_mad); return err; } static u8 state_to_phys_state(enum ib_port_state state) { return state == IB_PORT_ACTIVE ? 5 : 3; } static int eth_link_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props, int netw_view) { struct mlx4_ib_dev *mdev = to_mdev(ibdev); struct mlx4_ib_iboe *iboe = &mdev->iboe; struct net_device *ndev; enum ib_mtu tmp; struct mlx4_cmd_mailbox *mailbox; int err = 0; mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0, MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); if (err) goto out; props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ? IB_WIDTH_4X : IB_WIDTH_1X; props->active_speed = IB_SPEED_QDR; props->port_cap_flags = IB_PORT_CM_SUP; if (netw_view) props->gid_tbl_len = MLX4_ROCE_MAX_GIDS; else props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; props->max_msg_sz = mdev->dev->caps.max_msg_sz; props->pkey_tbl_len = 1; props->max_mtu = IB_MTU_4096; props->max_vl_num = 2; props->state = IB_PORT_DOWN; props->phys_state = state_to_phys_state(props->state); props->active_mtu = IB_MTU_256; spin_lock(&iboe->lock); ndev = iboe->netdevs[port - 1]; if (!ndev) goto out_unlock; tmp = iboe_get_mtu(ndev->if_mtu); props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256; props->state = (netif_running(ndev) && netif_carrier_ok(ndev)) ? IB_PORT_ACTIVE : IB_PORT_DOWN; props->phys_state = state_to_phys_state(props->state); out_unlock: spin_unlock(&iboe->lock); out: mlx4_free_cmd_mailbox(mdev->dev, mailbox); return err; } int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props, int netw_view) { int err; memset(props, 0, sizeof *props); err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? ib_link_query_port(ibdev, port, props, netw_view) : eth_link_query_port(ibdev, port, props, netw_view); return err; } static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { /* returns host view */ return __mlx4_ib_query_port(ibdev, port, props, 0); } int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid, int netw_view) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; struct mlx4_ib_dev *dev = to_mdev(ibdev); int clear = 0; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); if (mlx4_is_mfunc(dev->dev) && netw_view) mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; memcpy(gid->raw, out_mad->data + 8, 8); if (mlx4_is_mfunc(dev->dev) && !netw_view) { if (index) { /* For any index > 0, return the null guid */ err = 0; clear = 1; goto out; } } init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; in_mad->attr_mod = cpu_to_be32(index / 8); err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); out: if (clear) memset(gid->raw + 8, 0, 8); kfree(in_mad); kfree(out_mad); return err; } static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { struct mlx4_ib_dev *dev = to_mdev(ibdev); *gid = dev->iboe.gid_table[port - 1][index]; return 0; } static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) return __mlx4_ib_query_gid(ibdev, port, index, gid, 0); else return iboe_query_gid(ibdev, port, index, gid); } int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey, int netw_view) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; in_mad->attr_mod = cpu_to_be32(index / 32); if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view) mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]); out: kfree(in_mad); kfree(out_mad); return err; } static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0); } static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, struct ib_device_modify *props) { struct mlx4_cmd_mailbox *mailbox; unsigned long flags; if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) return -EOPNOTSUPP; if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) return 0; if (mlx4_is_slave(to_mdev(ibdev)->dev)) return -EOPNOTSUPP; spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags); memcpy(ibdev->node_desc, props->node_desc, 64); spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags); /* * If possible, pass node desc to FW, so it can generate * a 144 trap. If cmd fails, just ignore. */ mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev); if (IS_ERR(mailbox)) return 0; memset(mailbox->buf, 0, 256); memcpy(mailbox->buf, props->node_desc, 64); mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox); return 0; } static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, u32 cap_mask) { struct mlx4_cmd_mailbox *mailbox; int err; u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; mailbox = mlx4_alloc_cmd_mailbox(dev->dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); memset(mailbox->buf, 0, 256); if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { *(u8 *) mailbox->buf = !!reset_qkey_viols << 6; ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask); } else { ((u8 *) mailbox->buf)[3] = !!reset_qkey_viols; ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask); } err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev->dev, mailbox); return err; } static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { struct ib_port_attr attr; u32 cap_mask; int err; mutex_lock(&to_mdev(ibdev)->cap_mask_mutex); err = mlx4_ib_query_port(ibdev, port, &attr); if (err) goto out; cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) & ~props->clr_port_cap_mask; err = mlx4_SET_PORT(to_mdev(ibdev), port, !!(mask & IB_PORT_RESET_QKEY_CNTR), cap_mask); out: mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); return err; } static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_ucontext *context; struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3; struct mlx4_ib_alloc_ucontext_resp resp; int err; if (!dev->ib_active) return ERR_PTR(-EAGAIN); if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) { resp_v3.qp_tab_size = dev->dev->caps.num_qps; if (mlx4_wc_enabled()) { resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size; resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; } else { resp_v3.bf_reg_size = 0; resp_v3.bf_regs_per_page = 0; } } else { resp.dev_caps = dev->dev->caps.userspace_caps; resp.qp_tab_size = dev->dev->caps.num_qps; if (mlx4_wc_enabled()) { resp.bf_reg_size = dev->dev->caps.bf_reg_size; resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; } else { resp.bf_reg_size = 0; resp.bf_regs_per_page = 0; } resp.cqe_size = dev->dev->caps.cqe_size; } context = kmalloc(sizeof *context, GFP_KERNEL); if (!context) return ERR_PTR(-ENOMEM); err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar); if (err) { kfree(context); return ERR_PTR(err); } INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3)); else err = ib_copy_to_udata(udata, &resp, sizeof(resp)); if (err) { mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar); kfree(context); return ERR_PTR(-EFAULT); } return &context->ibucontext; } static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct mlx4_ib_ucontext *context = to_mucontext(ibcontext); mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar); kfree(context); return 0; } #ifdef __linux__ static unsigned long mlx4_ib_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct mm_struct *mm; struct vm_area_struct *vma; unsigned long start_addr; unsigned long page_size_order; unsigned long command; mm = current->mm; if (addr) return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); /* Last 8 bits hold the command others are data per that command */ command = pgoff & MLX4_IB_MMAP_CMD_MASK; if (command != MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES) return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); page_size_order = pgoff >> MLX4_IB_MMAP_CMD_BITS; /* code is based on the huge-pages get_unmapped_area code */ start_addr = mm->free_area_cache; if (len <= mm->cached_hole_size) start_addr = TASK_UNMAPPED_BASE; full_search: addr = ALIGN(start_addr, 1 << page_size_order); for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ if (TASK_SIZE - len < addr) { /* * Start a new search - just in case we missed * some holes. */ if (start_addr != TASK_UNMAPPED_BASE) { start_addr = TASK_UNMAPPED_BASE; goto full_search; } return -ENOMEM; } if (!vma || addr + len <= vma->vm_start) return addr; addr = ALIGN(vma->vm_end, 1 << page_size_order); } } #endif static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { struct mlx4_ib_dev *dev = to_mdev(context->device); int err; /* Last 8 bits hold the command others are data per that command */ unsigned long command = vma->vm_pgoff & MLX4_IB_MMAP_CMD_MASK; if (command < MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES) { /* compatability handling for commands 0 & 1*/ if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; } if (command == MLX4_IB_MMAP_UAR_PAGE) { vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); if (io_remap_pfn_range(vma, vma->vm_start, to_mucontext(context)->uar.pfn, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; } else if (command == MLX4_IB_MMAP_BLUE_FLAME_PAGE && dev->dev->caps.bf_reg_size != 0) { vma->vm_page_prot = pgprot_wc(vma->vm_page_prot); if (io_remap_pfn_range(vma, vma->vm_start, to_mucontext(context)->uar.pfn + dev->dev->caps.num_uars, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; } else if (command == MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES) { /* Getting contiguous physical pages */ unsigned long total_size = vma->vm_end - vma->vm_start; unsigned long page_size_order = (vma->vm_pgoff) >> MLX4_IB_MMAP_CMD_BITS; struct ib_cmem *ib_cmem; ib_cmem = ib_cmem_alloc_contiguous_pages(context, total_size, page_size_order); if (IS_ERR(ib_cmem)) { err = PTR_ERR(ib_cmem); return err; } err = ib_cmem_map_contiguous_pages_to_vma(ib_cmem, vma); if (err) { ib_cmem_release_contiguous_pages(ib_cmem); return err; } return 0; } else return -EINVAL; return 0; } static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) { struct mlx4_ib_pd *pd; int err; pd = kmalloc(sizeof *pd, GFP_KERNEL); if (!pd) return ERR_PTR(-ENOMEM); err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn); if (err) { kfree(pd); return ERR_PTR(err); } if (context) if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) { mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn); kfree(pd); return ERR_PTR(-EFAULT); } return &pd->ibpd; } static int mlx4_ib_dealloc_pd(struct ib_pd *pd) { mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); kfree(pd); return 0; } static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) { struct mlx4_ib_xrcd *xrcd; int err; if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) return ERR_PTR(-ENOSYS); xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL); if (!xrcd) return ERR_PTR(-ENOMEM); err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn); if (err) goto err1; xrcd->pd = ib_alloc_pd(ibdev); if (IS_ERR(xrcd->pd)) { err = PTR_ERR(xrcd->pd); goto err2; } xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0); if (IS_ERR(xrcd->cq)) { err = PTR_ERR(xrcd->cq); goto err3; } return &xrcd->ibxrcd; err3: ib_dealloc_pd(xrcd->pd); err2: mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn); err1: kfree(xrcd); return ERR_PTR(err); } static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd) { ib_destroy_cq(to_mxrcd(xrcd)->cq); ib_dealloc_pd(to_mxrcd(xrcd)->pd); mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn); kfree(xrcd); return 0; } static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) { struct mlx4_ib_qp *mqp = to_mqp(ibqp); struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); struct mlx4_ib_gid_entry *ge; ge = kzalloc(sizeof *ge, GFP_KERNEL); if (!ge) return -ENOMEM; ge->gid = *gid; if (mlx4_ib_add_mc(mdev, mqp, gid)) { ge->port = mqp->port; ge->added = 1; } mutex_lock(&mqp->mutex); list_add_tail(&ge->list, &mqp->gid_list); mutex_unlock(&mqp->mutex); return 0; } int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, union ib_gid *gid) { u8 mac[6]; struct net_device *ndev; int ret = 0; if (!mqp->port) return 0; spin_lock(&mdev->iboe.lock); ndev = mdev->iboe.netdevs[mqp->port - 1]; if (ndev) dev_hold(ndev); spin_unlock(&mdev->iboe.lock); if (ndev) { rdma_get_mcast_mac((struct in6_addr *)gid, mac); rtnl_lock(); dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac, 6, 0); ret = 1; rtnl_unlock(); dev_put(ndev); } return ret; } struct mlx4_ib_steering { struct list_head list; u64 reg_id; union ib_gid gid; }; static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { int err; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); struct mlx4_ib_qp *mqp = to_mqp(ibqp); u64 reg_id; struct mlx4_ib_steering *ib_steering = NULL; if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL); if (!ib_steering) return -ENOMEM; } err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port, !!(mqp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), MLX4_PROT_IB_IPV6, ®_id); if (err) goto err_malloc; err = add_gid_entry(ibqp, gid); if (err) goto err_add; if (ib_steering) { memcpy(ib_steering->gid.raw, gid->raw, 16); ib_steering->reg_id = reg_id; mutex_lock(&mqp->mutex); list_add(&ib_steering->list, &mqp->steering_rules); mutex_unlock(&mqp->mutex); } return 0; err_add: mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6, reg_id); err_malloc: kfree(ib_steering); return err; } enum { IBV_FLOW_L4_NONE = 0, IBV_FLOW_L4_OTHER = 3, IBV_FLOW_L4_UDP = 5, IBV_FLOW_L4_TCP = 6 }; struct mlx4_cm_steering { struct list_head list; u64 reg_id; struct ib_flow_spec spec; }; static int flow_spec_to_net_rule(struct ib_device *dev, struct ib_flow_spec *flow_spec, struct list_head *rule_list_h) { struct mlx4_spec_list *spec_l2, *spec_l3, *spec_l4; u64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16); spec_l2 = kzalloc(sizeof *spec_l2, GFP_KERNEL); if (!spec_l2) return -ENOMEM; switch (flow_spec->type) { case IB_FLOW_ETH: spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH; memcpy(spec_l2->eth.dst_mac, flow_spec->l2_id.eth.mac, ETH_ALEN); memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN); spec_l2->eth.ether_type = flow_spec->l2_id.eth.ethertype; if (flow_spec->l2_id.eth.vlan_present) { spec_l2->eth.vlan_id = flow_spec->l2_id.eth.vlan; spec_l2->eth.vlan_id_msk = cpu_to_be16(0x0fff); } break; case IB_FLOW_IB_UC: spec_l2->id = MLX4_NET_TRANS_RULE_ID_IB; if(flow_spec->l2_id.ib_uc.qpn) { spec_l2->ib.r_u_qpn = cpu_to_be32(flow_spec->l2_id.ib_uc.qpn); spec_l2->ib.qpn_msk = cpu_to_be32(0xffffff); } break; case IB_FLOW_IB_MC_IPV4: case IB_FLOW_IB_MC_IPV6: spec_l2->id = MLX4_NET_TRANS_RULE_ID_IB; memcpy(spec_l2->ib.dst_gid, flow_spec->l2_id.ib_mc.mgid, 16); memset(spec_l2->ib.dst_gid_msk, 0xff, 16); break; } list_add_tail(&spec_l2->list, rule_list_h); if (flow_spec->l2_id.eth.ethertype == cpu_to_be16(ETH_P_IP) || flow_spec->type != IB_FLOW_ETH) { spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL); if (!spec_l3) return -ENOMEM; spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4; spec_l3->ipv4.src_ip = flow_spec->src_ip; if (flow_spec->type != IB_FLOW_IB_MC_IPV4 && flow_spec->type != IB_FLOW_IB_MC_IPV6) spec_l3->ipv4.dst_ip = flow_spec->dst_ip; if (spec_l3->ipv4.src_ip) spec_l3->ipv4.src_ip_msk = MLX4_BE_WORD_MASK; if (spec_l3->ipv4.dst_ip) spec_l3->ipv4.dst_ip_msk = MLX4_BE_WORD_MASK; list_add_tail(&spec_l3->list, rule_list_h); } if (flow_spec->l4_protocol) { spec_l4 = kzalloc(sizeof(*spec_l4), GFP_KERNEL); if (!spec_l4) return -ENOMEM; spec_l4->tcp_udp.src_port = flow_spec->src_port; spec_l4->tcp_udp.dst_port = flow_spec->dst_port; if (spec_l4->tcp_udp.src_port) spec_l4->tcp_udp.src_port_msk = MLX4_BE_SHORT_MASK; if (spec_l4->tcp_udp.dst_port) spec_l4->tcp_udp.dst_port_msk = MLX4_BE_SHORT_MASK; switch (flow_spec->l4_protocol) { case IBV_FLOW_L4_UDP: spec_l4->id = MLX4_NET_TRANS_RULE_ID_UDP; break; case IBV_FLOW_L4_TCP: spec_l4->id = MLX4_NET_TRANS_RULE_ID_TCP; break; default: dev_err(dev->dma_device, "Unsupported l4 protocol.\n"); kfree(spec_l4); return -EPROTONOSUPPORT; } list_add_tail(&spec_l4->list, rule_list_h); } return 0; } static int __mlx4_ib_flow_attach(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, struct ib_flow_spec *flow_spec, int priority, int lock_qp) { u64 reg_id = 0; int err = 0; struct mlx4_cm_steering *cm_flow; struct mlx4_spec_list *spec, *tmp_spec; struct mlx4_net_trans_rule rule = { .queue_mode = MLX4_NET_TRANS_Q_FIFO, .exclusive = 0, }; rule.promisc_mode = flow_spec->rule_type; rule.port = mqp->port; rule.qpn = mqp->mqp.qpn; INIT_LIST_HEAD(&rule.list); cm_flow = kmalloc(sizeof(*cm_flow), GFP_KERNEL); if (!cm_flow) return -ENOMEM; if (rule.promisc_mode == MLX4_FS_REGULAR) { rule.allow_loopback = !flow_spec->block_mc_loopback; rule.priority = MLX4_DOMAIN_UVERBS | priority; err = flow_spec_to_net_rule(&mdev->ib_dev, flow_spec, &rule.list); if (err) goto free_list; } err = mlx4_flow_attach(mdev->dev, &rule, ®_id); if (err) goto free_list; memcpy(&cm_flow->spec, flow_spec, sizeof(*flow_spec)); cm_flow->reg_id = reg_id; if (lock_qp) mutex_lock(&mqp->mutex); list_add(&cm_flow->list, &mqp->rules_list); if (lock_qp) mutex_unlock(&mqp->mutex); free_list: list_for_each_entry_safe(spec, tmp_spec, &rule.list, list) { list_del(&spec->list); kfree(spec); } if (err) { kfree(cm_flow); dev_err(mdev->ib_dev.dma_device, "Fail to attach flow steering rule\n"); } return err; } static int __mlx4_ib_flow_detach(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, struct ib_flow_spec *spec, int priority, int lock_qp) { struct mlx4_cm_steering *cm_flow; int ret; if (lock_qp) mutex_lock(&mqp->mutex); list_for_each_entry(cm_flow, &mqp->rules_list, list) { if (!memcmp(&cm_flow->spec, spec, sizeof(*spec))) { list_del(&cm_flow->list); break; } } if (lock_qp) mutex_unlock(&mqp->mutex); if (&cm_flow->list == &mqp->rules_list) { dev_err(mdev->ib_dev.dma_device, "Couldn't find reg_id for flow spec. " "Steering rule is left attached\n"); return -EINVAL; } ret = mlx4_flow_detach(mdev->dev, cm_flow->reg_id); kfree(cm_flow); return ret; } static int mlx4_ib_flow_attach(struct ib_qp *qp, struct ib_flow_spec *flow_spec, int priority) { return __mlx4_ib_flow_attach(to_mdev(qp->device), to_mqp(qp), flow_spec, priority, 1); } static int mlx4_ib_flow_detach(struct ib_qp *qp, struct ib_flow_spec *spec, int priority) { return __mlx4_ib_flow_detach(to_mdev(qp->device), to_mqp(qp), spec, priority, 1); } static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw) { struct mlx4_ib_gid_entry *ge; struct mlx4_ib_gid_entry *tmp; struct mlx4_ib_gid_entry *ret = NULL; list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { if (!memcmp(raw, ge->gid.raw, 16)) { ret = ge; break; } } return ret; } static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { int err; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); struct mlx4_ib_qp *mqp = to_mqp(ibqp); u8 mac[6]; struct net_device *ndev; struct mlx4_ib_gid_entry *ge; u64 reg_id = 0; if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { struct mlx4_ib_steering *ib_steering; mutex_lock(&mqp->mutex); list_for_each_entry(ib_steering, &mqp->steering_rules, list) { if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) { list_del(&ib_steering->list); break; } } mutex_unlock(&mqp->mutex); if (&ib_steering->list == &mqp->steering_rules) { pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n"); return -EINVAL; } reg_id = ib_steering->reg_id; kfree(ib_steering); } err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6, reg_id); if (err) return err; mutex_lock(&mqp->mutex); ge = find_gid_entry(mqp, gid->raw); if (ge) { spin_lock(&mdev->iboe.lock); ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL; if (ndev) dev_hold(ndev); spin_unlock(&mdev->iboe.lock); rdma_get_mcast_mac((struct in6_addr *)gid, mac); if (ndev) { rtnl_lock(); dev_mc_delete(mdev->iboe.netdevs[ge->port - 1], mac, 6, 0); rtnl_unlock(); dev_put(ndev); } list_del(&ge->list); kfree(ge); } else pr_warn("could not find mgid entry\n"); mutex_unlock(&mqp->mutex); return 0; } static int init_node_data(struct mlx4_ib_dev *dev) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; if (mlx4_is_master(dev->dev)) mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad); if (err) goto out; memcpy(dev->ib_dev.node_desc, out_mad->data, 64); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad); if (err) goto out; dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); out: kfree(in_mad); kfree(out_mad); return err; } static ssize_t show_hca(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); return sprintf(buf, "MT%d\n", dev->dev->pdev->device); } static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32), (int) (dev->dev->caps.fw_ver >> 16) & 0xffff, (int) dev->dev->caps.fw_ver & 0xffff); } static ssize_t show_rev(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); return sprintf(buf, "%x\n", dev->dev->rev_id); } static ssize_t show_board(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id); } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct device_attribute *mlx4_class_attributes[] = { &dev_attr_hw_rev, &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id }; static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev) { #ifdef __linux__ memcpy(eui, dev->dev_addr, 3); memcpy(eui + 5, dev->dev_addr + 3, 3); #else memcpy(eui, IF_LLADDR(dev), 3); memcpy(eui + 5, IF_LLADDR(dev) + 3, 3); #endif if (vlan_id < 0x1000) { eui[3] = vlan_id >> 8; eui[4] = vlan_id & 0xff; } else { eui[3] = 0xff; eui[4] = 0xfe; } eui[0] ^= 2; } static void update_gids_task(struct work_struct *work) { struct update_gid_work *gw = container_of(work, struct update_gid_work, work); struct mlx4_cmd_mailbox *mailbox; union ib_gid *gids; int err; struct mlx4_dev *dev = gw->dev->dev; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox)); return; } gids = mailbox->buf; memcpy(gids, gw->gids, sizeof gw->gids); err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port, 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); if (err) pr_warn("set port command failed\n"); else { memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids); mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE); } mlx4_free_cmd_mailbox(dev, mailbox); kfree(gw); } static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear) { struct net_device *ndev = dev->iboe.netdevs[port - 1]; struct update_gid_work *work; struct net_device *tmp; int i; u8 *hits; union ib_gid gid; int index_free; int found; int need_update = 0; int max_gids; u16 vid; work = kzalloc(sizeof *work, GFP_ATOMIC); if (!work) return -ENOMEM; hits = kzalloc(128, GFP_ATOMIC); if (!hits) { kfree(work); return -ENOMEM; } max_gids = dev->dev->caps.gid_table_len[port]; #ifdef __linux__ rcu_read_lock(); for_each_netdev_rcu(&init_net, tmp) { #else IFNET_RLOCK(); TAILQ_FOREACH(tmp, &V_ifnet, if_link) { #endif if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) { gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); vid = rdma_vlan_dev_vlan_id(tmp); mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev); found = 0; index_free = -1; for (i = 0; i < max_gids; ++i) { if (index_free < 0 && !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) index_free = i; if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) { hits[i] = 1; found = 1; break; } } if (!found) { if (tmp == ndev && (memcmp(&dev->iboe.gid_table[port - 1][0], &gid, sizeof gid) || !memcmp(&dev->iboe.gid_table[port - 1][0], &zgid, sizeof gid))) { dev->iboe.gid_table[port - 1][0] = gid; ++need_update; hits[0] = 1; } else if (index_free >= 0) { dev->iboe.gid_table[port - 1][index_free] = gid; hits[index_free] = 1; ++need_update; } } } #ifdef __linux__ } rcu_read_unlock(); #else } IFNET_RUNLOCK(); #endif for (i = 0; i < max_gids; ++i) if (!hits[i]) { if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) ++need_update; dev->iboe.gid_table[port - 1][i] = zgid; } if (need_update) { memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids); INIT_WORK(&work->work, update_gids_task); work->port = port; work->dev = dev; queue_work(wq, &work->work); } else kfree(work); kfree(hits); return 0; } static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event) { switch (event) { case NETDEV_UP: #ifdef __linux__ case NETDEV_CHANGEADDR: #endif update_ipv6_gids(dev, port, 0); break; case NETDEV_DOWN: update_ipv6_gids(dev, port, 1); dev->iboe.netdevs[port - 1] = NULL; } } static void netdev_added(struct mlx4_ib_dev *dev, int port) { update_ipv6_gids(dev, port, 0); } static void netdev_removed(struct mlx4_ib_dev *dev, int port) { update_ipv6_gids(dev, port, 1); } static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; struct mlx4_ib_dev *ibdev; struct net_device *oldnd; struct mlx4_ib_iboe *iboe; int port; #ifdef __linux__ if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; #endif ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb); iboe = &ibdev->iboe; spin_lock(&iboe->lock); mlx4_foreach_ib_transport_port(port, ibdev->dev) { oldnd = iboe->netdevs[port - 1]; iboe->netdevs[port - 1] = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port); if (oldnd != iboe->netdevs[port - 1]) { if (iboe->netdevs[port - 1]) netdev_added(ibdev, port); else netdev_removed(ibdev, port); } } if (dev == iboe->netdevs[0] || (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0])) handle_en_event(ibdev, 1, event); else if (dev == iboe->netdevs[1] || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1])) handle_en_event(ibdev, 2, event); spin_unlock(&iboe->lock); return NOTIFY_DONE; } static void init_pkeys(struct mlx4_ib_dev *ibdev) { int port; int slave; int i; if (mlx4_is_master(ibdev->dev)) { for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) { for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { for (i = 0; i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; ++i) { ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] = /* master has the identity virt2phys pkey mapping */ (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i : ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1; mlx4_sync_pkey_table(ibdev->dev, slave, port, i, ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]); } } } /* initialize pkey cache */ for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { for (i = 0; i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; ++i) ibdev->pkeys.phys_pkey_cache[port-1][i] = (i) ? 0 : 0xFFFF; } } } static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { char name[32]; int eq_per_port = 0; int added_eqs = 0; int total_eqs = 0; int i, j, eq; /* Legacy mode or comp_pool is not large enough */ if (dev->caps.comp_pool == 0 || dev->caps.num_ports > dev->caps.comp_pool) return; eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/ dev->caps.num_ports); /* Init eq table */ added_eqs = 0; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) added_eqs += eq_per_port; total_eqs = dev->caps.num_comp_vectors + added_eqs; ibdev->eq_table = kzalloc(total_eqs * sizeof(int), GFP_KERNEL); if (!ibdev->eq_table) return; ibdev->eq_added = added_eqs; eq = 0; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) { for (j = 0; j < eq_per_port; j++) { //sprintf(name, "mlx4-ib-%d-%d@%s", // i, j, dev->pdev->bus->conf.pd_name); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(dev, name, &ibdev->eq_table[eq])) { /* Use legacy (same as mlx4_en driver) */ pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq); ibdev->eq_table[eq] = (eq % dev->caps.num_comp_vectors); } eq++; } } /* Fill the reset of the vector with legacy EQ */ for (i = 0, eq = added_eqs; i < dev->caps.num_comp_vectors; i++) ibdev->eq_table[eq++] = i; /* Advertise the new number of EQs to clients */ ibdev->ib_dev.num_comp_vectors = total_eqs; } static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { int i; /* no additional eqs were added */ if (!ibdev->eq_table) return; /* Reset the advertised EQ number */ ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; /* Free only the added eqs */ for (i = 0; i < ibdev->eq_added; i++) { /* Don't free legacy eqs if used */ if (ibdev->eq_table[i] <= dev->caps.num_comp_vectors) continue; mlx4_release_eq(dev, ibdev->eq_table[i]); } kfree(ibdev->eq_table); } /* * create show function and a device_attribute struct pointing to * the function for _name */ #define DEVICE_DIAG_RPRT_ATTR(_name, _offset, _op_mod) \ static ssize_t show_rprt_##_name(struct device *dev, \ struct device_attribute *attr, \ char *buf){ \ return show_diag_rprt(dev, buf, _offset, _op_mod); \ } \ static DEVICE_ATTR(_name, S_IRUGO, show_rprt_##_name, NULL); #define MLX4_DIAG_RPRT_CLEAR_DIAGS 3 static size_t show_diag_rprt(struct device *device, char *buf, u32 offset, u8 op_modifier) { size_t ret; u32 counter_offset = offset; u32 diag_counter = 0; struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); ret = mlx4_query_diag_counters(dev->dev, 1, op_modifier, &counter_offset, &diag_counter); if (ret) return ret; return sprintf(buf, "%d\n", diag_counter); } static ssize_t clear_diag_counters(struct device *device, struct device_attribute *attr, const char *buf, size_t length) { size_t ret; struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); ret = mlx4_query_diag_counters(dev->dev, 0, MLX4_DIAG_RPRT_CLEAR_DIAGS, NULL, NULL); if (ret) return ret; return length; } DEVICE_DIAG_RPRT_ATTR(rq_num_lle , 0x00, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_lle , 0x04, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_lqpoe , 0x08, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_lqpoe , 0x0C, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_lpe , 0x18, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_lpe , 0x1C, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_wrfe , 0x20, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_wrfe , 0x24, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_mwbe , 0x2C, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_bre , 0x34, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_lae , 0x38, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_rire , 0x44, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_rire , 0x48, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_rae , 0x4C, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_rae , 0x50, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_roe , 0x54, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_tree , 0x5C, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_rree , 0x64, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_rnr , 0x68, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_rnr , 0x6C, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_oos , 0x100, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_oos , 0x104, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_mce , 0x108, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_udsdprd , 0x118, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_ucsdprd , 0x120, 2); DEVICE_DIAG_RPRT_ATTR(num_cqovf , 0x1A0, 2); DEVICE_DIAG_RPRT_ATTR(num_eqovf , 0x1A4, 2); DEVICE_DIAG_RPRT_ATTR(num_baddb , 0x1A8, 2); static DEVICE_ATTR(clear_diag, S_IWUSR, NULL, clear_diag_counters); static struct attribute *diag_rprt_attrs[] = { &dev_attr_rq_num_lle.attr, &dev_attr_sq_num_lle.attr, &dev_attr_rq_num_lqpoe.attr, &dev_attr_sq_num_lqpoe.attr, &dev_attr_rq_num_lpe.attr, &dev_attr_sq_num_lpe.attr, &dev_attr_rq_num_wrfe.attr, &dev_attr_sq_num_wrfe.attr, &dev_attr_sq_num_mwbe.attr, &dev_attr_sq_num_bre.attr, &dev_attr_rq_num_lae.attr, &dev_attr_sq_num_rire.attr, &dev_attr_rq_num_rire.attr, &dev_attr_sq_num_rae.attr, &dev_attr_rq_num_rae.attr, &dev_attr_sq_num_roe.attr, &dev_attr_sq_num_tree.attr, &dev_attr_sq_num_rree.attr, &dev_attr_rq_num_rnr.attr, &dev_attr_sq_num_rnr.attr, &dev_attr_rq_num_oos.attr, &dev_attr_sq_num_oos.attr, &dev_attr_rq_num_mce.attr, &dev_attr_rq_num_udsdprd.attr, &dev_attr_rq_num_ucsdprd.attr, &dev_attr_num_cqovf.attr, &dev_attr_num_eqovf.attr, &dev_attr_num_baddb.attr, &dev_attr_clear_diag.attr, NULL }; static struct attribute_group diag_counters_group = { .name = "diag_counters", .attrs = diag_rprt_attrs }; #ifdef __linux__ static int mlx4_ib_proc_init(void) { /* Creating procfs directories /proc/drivers/mlx4_ib/ && /proc/drivers/mlx4_ib/mrs for further use by the driver. */ int err; mlx4_ib_driver_dir_entry = proc_mkdir(MLX4_IB_DRIVER_PROC_DIR_NAME, NULL); if (!mlx4_ib_driver_dir_entry) { pr_err("mlx4_ib_proc_init has failed for %s\n", MLX4_IB_DRIVER_PROC_DIR_NAME); err = -ENODEV; goto error; } mlx4_mrs_dir_entry = proc_mkdir(MLX4_IB_MRS_PROC_DIR_NAME, mlx4_ib_driver_dir_entry); if (!mlx4_mrs_dir_entry) { pr_err("mlx4_ib_proc_init has failed for %s\n", MLX4_IB_MRS_PROC_DIR_NAME); err = -ENODEV; goto remove_entry; } return 0; remove_entry: remove_proc_entry(MLX4_IB_DRIVER_PROC_DIR_NAME, NULL); error: return err; } #endif static void init_dev_assign(void) { int bus, slot, fn, ib_idx; char *p = dev_assign_str, *t; char curr_val[32] = {0}; int ret; int j, i = 0; memset(dr, 0, sizeof dr); if (dev_assign_str[0] == 0) return; while (strlen(p)) { ret = sscanf(p, "%02x:%02x.%x-%x", &bus, &slot, &fn, &ib_idx); if (ret != 4 || ib_idx < 0) goto err; for (j = 0; j < i; j++) if (dr[j].nr == ib_idx) goto err; dr[i].bus = bus; dr[i].dev = slot; dr[i].func = fn; dr[i].nr = ib_idx; t = strchr(p, ','); sprintf(curr_val, "%02x:%02x.%x-%x", bus, slot, fn, ib_idx); if ((!t) && strlen(p) == strlen(curr_val)) return; if (!t || (t + 1) >= dev_assign_str + sizeof dev_assign_str) goto err; ++i; if (i >= MAX_DR) goto err; p = t + 1; } return; err: memset(dr, 0, sizeof dr); printk(KERN_WARNING "mlx4_ib: The value of 'dev_assign_str' parameter " "is incorrect. The parameter value is discarded!"); } static void *mlx4_ib_add(struct mlx4_dev *dev) { struct mlx4_ib_dev *ibdev; int num_ports = 0; int i, j; int err; struct mlx4_ib_iboe *iboe; printk(KERN_INFO "%s", mlx4_ib_version); mlx4_foreach_ib_transport_port(i, dev) num_ports++; /* No point in registering a device with no ports... */ if (num_ports == 0) return NULL; ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev); if (!ibdev) { dev_err(&dev->pdev->dev, "Device struct alloc failed\n"); return NULL; } iboe = &ibdev->iboe; if (mlx4_pd_alloc(dev, &ibdev->priv_pdn)) goto err_dealloc; if (mlx4_uar_alloc(dev, &ibdev->priv_uar)) goto err_pd; ibdev->priv_uar.map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE); if (!ibdev->priv_uar.map) goto err_uar; MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock); ibdev->dev = dev; strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); ibdev->ib_dev.owner = THIS_MODULE; ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; ibdev->num_ports = num_ports; ibdev->ib_dev.phys_port_cnt = ibdev->num_ports; ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dma_device = &dev->pdev->dev; if (dev->caps.userspace_caps) ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; else ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION; ibdev->ib_dev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | (1ull << IB_USER_VERBS_CMD_REG_MR) | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | (1ull << IB_USER_VERBS_CMD_CREATE_QP) | (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | (1ull << IB_USER_VERBS_CMD_QUERY_QP) | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | (1ull << IB_USER_VERBS_CMD_OPEN_QP) | (1ull << IB_USER_VERBS_CMD_ATTACH_FLOW) | (1ull << IB_USER_VERBS_CMD_DETACH_FLOW) | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); ibdev->ib_dev.query_device = mlx4_ib_query_device; ibdev->ib_dev.query_port = mlx4_ib_query_port; ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer; ibdev->ib_dev.query_gid = mlx4_ib_query_gid; ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey; ibdev->ib_dev.modify_device = mlx4_ib_modify_device; ibdev->ib_dev.modify_port = mlx4_ib_modify_port; ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext; ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext; ibdev->ib_dev.mmap = mlx4_ib_mmap; #ifdef __linux__ ibdev->ib_dev.get_unmapped_area = mlx4_ib_get_unmapped_area; #endif ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd; ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd; ibdev->ib_dev.create_ah = mlx4_ib_create_ah; ibdev->ib_dev.query_ah = mlx4_ib_query_ah; ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah; ibdev->ib_dev.create_srq = mlx4_ib_create_srq; ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq; ibdev->ib_dev.query_srq = mlx4_ib_query_srq; ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq; ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv; ibdev->ib_dev.create_qp = mlx4_ib_create_qp; ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; ibdev->ib_dev.query_qp = mlx4_ib_query_qp; ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; ibdev->ib_dev.post_send = mlx4_ib_post_send; ibdev->ib_dev.post_recv = mlx4_ib_post_recv; ibdev->ib_dev.create_cq = mlx4_ib_create_cq; ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq; ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq; ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq; ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq; ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq; ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr; ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list; ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; ibdev->ib_dev.attach_flow = mlx4_ib_flow_attach; ibdev->ib_dev.detach_flow = mlx4_ib_flow_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; if (!mlx4_is_slave(ibdev->dev)) { ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd; ibdev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); } mlx4_ib_alloc_eqs(dev, ibdev); spin_lock_init(&iboe->lock); if (init_node_data(ibdev)) goto err_map; for (i = 0; i < ibdev->num_ports; ++i) { if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) == IB_LINK_LAYER_ETHERNET) { err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]); if (err) ibdev->counters[i] = -1; } else ibdev->counters[i] = -1; } spin_lock_init(&ibdev->sm_lock); mutex_init(&ibdev->cap_mask_mutex); if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED && !mlx4_is_slave(dev)) { ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS; err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count, MLX4_IB_UC_STEER_QPN_ALIGN, &ibdev->steer_qpn_base, 0); if (err) goto err_counter; ibdev->ib_uc_qpns_bitmap = kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) * sizeof(long), GFP_KERNEL); if (!ibdev->ib_uc_qpns_bitmap) { dev_err(&dev->pdev->dev, "bit map alloc failed\n"); goto err_steer_qp_release; } bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count); err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_base + ibdev->steer_qpn_count - 1); if (err) goto err_steer_free_bitmap; } if (ib_register_device(&ibdev->ib_dev, NULL)) goto err_steer_free_bitmap; if (mlx4_ib_mad_init(ibdev)) goto err_reg; if (mlx4_ib_init_sriov(ibdev)) goto err_mad; if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) { iboe->nb.notifier_call = mlx4_ib_netdev_event; err = register_netdevice_notifier(&iboe->nb); if (err) goto err_sriov; } for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { if (device_create_file(&ibdev->ib_dev.dev, mlx4_class_attributes[j])) goto err_notif; } if (sysfs_create_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group)) goto err_notif; ibdev->ib_active = true; if (mlx4_is_mfunc(ibdev->dev)) init_pkeys(ibdev); /* create paravirt contexts for any VFs which are active */ if (mlx4_is_master(ibdev->dev)) { for (j = 0; j < MLX4_MFUNC_MAX; j++) { if (j == mlx4_master_func_num(ibdev->dev)) continue; if (mlx4_is_slave_active(ibdev->dev, j)) do_slave_init(ibdev, j, 1); } } return ibdev; err_notif: if (unregister_netdevice_notifier(&ibdev->iboe.nb)) pr_warn("failure unregistering notifier\n"); flush_workqueue(wq); err_sriov: mlx4_ib_close_sriov(ibdev); err_mad: mlx4_ib_mad_cleanup(ibdev); err_reg: ib_unregister_device(&ibdev->ib_dev); err_steer_free_bitmap: kfree(ibdev->ib_uc_qpns_bitmap); err_steer_qp_release: if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) mlx4_qp_release_range(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_count); err_counter: for (; i; --i) if (ibdev->counters[i - 1] != -1) mlx4_counter_free(ibdev->dev, ibdev->counters[i - 1]); err_map: iounmap(ibdev->priv_uar.map); mlx4_ib_free_eqs(dev, ibdev); err_uar: mlx4_uar_free(dev, &ibdev->priv_uar); err_pd: mlx4_pd_free(dev, ibdev->priv_pdn); err_dealloc: ib_dealloc_device(&ibdev->ib_dev); return NULL; } int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn) { int offset; WARN_ON(!dev->ib_uc_qpns_bitmap); offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap, dev->steer_qpn_count, get_count_order(count)); if (offset < 0) return offset; *qpn = dev->steer_qpn_base + offset; return 0; } void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count) { if (!qpn || dev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) return; BUG_ON(qpn < dev->steer_qpn_base); bitmap_release_region(dev->ib_uc_qpns_bitmap, qpn - dev->steer_qpn_base, get_count_order(count)); } int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, int is_attach) { struct ib_flow_spec spec = { .type = IB_FLOW_IB_UC, .l2_id.ib_uc.qpn = mqp->ibqp.qp_num, }; return is_attach ? __mlx4_ib_flow_attach(mdev, mqp, &spec, MLX4_DOMAIN_NIC, 0) : __mlx4_ib_flow_detach(mdev, mqp, &spec, MLX4_DOMAIN_NIC, 0); } static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) { struct mlx4_ib_dev *ibdev = ibdev_ptr; int p,j; mlx4_ib_close_sriov(ibdev); sysfs_remove_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group); mlx4_ib_mad_cleanup(ibdev); for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { device_remove_file(&ibdev->ib_dev.dev, mlx4_class_attributes[j]); } ib_unregister_device(&ibdev->ib_dev); if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { mlx4_qp_release_range(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_count); kfree(ibdev->ib_uc_qpns_bitmap); } if (ibdev->iboe.nb.notifier_call) { if (unregister_netdevice_notifier(&ibdev->iboe.nb)) pr_warn("failure unregistering notifier\n"); ibdev->iboe.nb.notifier_call = NULL; } iounmap(ibdev->priv_uar.map); for (p = 0; p < ibdev->num_ports; ++p) if (ibdev->counters[p] != -1) mlx4_counter_free(ibdev->dev, ibdev->counters[p]); mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) mlx4_CLOSE_PORT(dev, p); mlx4_ib_free_eqs(dev, ibdev); mlx4_uar_free(dev, &ibdev->priv_uar); mlx4_pd_free(dev, ibdev->priv_pdn); ib_dealloc_device(&ibdev->ib_dev); } static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) { struct mlx4_ib_demux_work **dm = NULL; struct mlx4_dev *dev = ibdev->dev; int i; unsigned long flags; if (!mlx4_is_master(dev)) return; dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC); if (!dm) { pr_err("failed to allocate memory for tunneling qp update\n"); goto out; } for (i = 0; i < dev->caps.num_ports; i++) { dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC); if (!dm[i]) { pr_err("failed to allocate memory for tunneling qp update work struct\n"); for (i = 0; i < dev->caps.num_ports; i++) { if (dm[i]) kfree(dm[i]); } goto out; } } /* initialize or tear down tunnel QPs for the slave */ for (i = 0; i < dev->caps.num_ports; i++) { INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work); dm[i]->port = i + 1; dm[i]->slave = slave; dm[i]->do_init = do_init; dm[i]->dev = ibdev; spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); if (!ibdev->sriov.is_going_down) queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work); spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); } out: if (dm) kfree(dm); return; } static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, enum mlx4_dev_event event, unsigned long param) { struct ib_event ibev; struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); struct mlx4_eqe *eqe = NULL; struct ib_event_work *ew; int p = 0; if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) eqe = (struct mlx4_eqe *)param; else p = (int) param; switch (event) { case MLX4_DEV_EVENT_PORT_UP: if (p > ibdev->num_ports) return; if (mlx4_is_master(dev) && rdma_port_get_link_layer(&ibdev->ib_dev, p) == IB_LINK_LAYER_INFINIBAND) { mlx4_ib_invalidate_all_guid_record(ibdev, p); } mlx4_ib_info((struct ib_device *) ibdev_ptr, "Port %d logical link is up\n", p); ibev.event = IB_EVENT_PORT_ACTIVE; break; case MLX4_DEV_EVENT_PORT_DOWN: if (p > ibdev->num_ports) return; mlx4_ib_info((struct ib_device *) ibdev_ptr, "Port %d logical link is down\n", p); ibev.event = IB_EVENT_PORT_ERR; break; case MLX4_DEV_EVENT_CATASTROPHIC_ERROR: ibdev->ib_active = false; ibev.event = IB_EVENT_DEVICE_FATAL; break; case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: ew = kmalloc(sizeof *ew, GFP_ATOMIC); if (!ew) { pr_err("failed to allocate memory for events work\n"); break; } INIT_WORK(&ew->work, handle_port_mgmt_change_event); memcpy(&ew->ib_eqe, eqe, sizeof *eqe); ew->ib_dev = ibdev; /* need to queue only for port owner, which uses GEN_EQE */ if (mlx4_is_master(dev)) queue_work(wq, &ew->work); else handle_port_mgmt_change_event(&ew->work); return; case MLX4_DEV_EVENT_SLAVE_INIT: /* here, p is the slave id */ do_slave_init(ibdev, p, 1); return; case MLX4_DEV_EVENT_SLAVE_SHUTDOWN: /* here, p is the slave id */ do_slave_init(ibdev, p, 0); return; default: return; } ibev.device = ibdev_ptr; ibev.element.port_num = (u8) p; ib_dispatch_event(&ibev); } static struct mlx4_interface mlx4_ib_interface = { .add = mlx4_ib_add, .remove = mlx4_ib_remove, .event = mlx4_ib_event, .protocol = MLX4_PROT_IB_IPV6 }; static int __init mlx4_ib_init(void) { int err; wq = create_singlethread_workqueue("mlx4_ib"); if (!wq) return -ENOMEM; #ifdef __linux__ err = mlx4_ib_proc_init(); if (err) goto clean_wq; #endif err = mlx4_ib_mcg_init(); if (err) goto clean_proc; init_dev_assign(); err = mlx4_register_interface(&mlx4_ib_interface); if (err) goto clean_mcg; return 0; clean_mcg: mlx4_ib_mcg_destroy(); clean_proc: #ifdef __linux__ remove_proc_entry(MLX4_IB_MRS_PROC_DIR_NAME, mlx4_ib_driver_dir_entry); remove_proc_entry(MLX4_IB_DRIVER_PROC_DIR_NAME, NULL); clean_wq: #endif destroy_workqueue(wq); return err; } static void __exit mlx4_ib_cleanup(void) { mlx4_unregister_interface(&mlx4_ib_interface); mlx4_ib_mcg_destroy(); destroy_workqueue(wq); /* Remove proc entries */ #ifdef __linux__ remove_proc_entry(MLX4_IB_MRS_PROC_DIR_NAME, mlx4_ib_driver_dir_entry); remove_proc_entry(MLX4_IB_DRIVER_PROC_DIR_NAME, NULL); #endif } module_init_order(mlx4_ib_init, SI_ORDER_MIDDLE); module_exit(mlx4_ib_cleanup); #undef MODULE_VERSION #include static int mlx4ib_evhand(module_t mod, int event, void *arg) { return (0); } static moduledata_t mlx4ib_mod = { .name = "mlx4ib", .evhand = mlx4ib_evhand, }; DECLARE_MODULE(mlx4ib, mlx4ib_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY); MODULE_DEPEND(mlx4ib, mlx4, 1, 1, 1); MODULE_DEPEND(mlx4ib, ibcore, 1, 1, 1); Index: stable/10/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h =================================================================== --- stable/10/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h (revision 271127) @@ -1,802 +1,803 @@ /* * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef MLX4_IB_H #define MLX4_IB_H #include #include #include #include +#include #include #include #include #include #include #include #include #define MLX4_IB_DRV_NAME "mlx4_ib" #ifdef pr_fmt #undef pr_fmt #endif #define pr_fmt(fmt) "<" MLX4_IB_DRV_NAME "> %s: " fmt, __func__ #define mlx4_ib_warn(ibdev, format, arg...) \ dev_warn((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg) #define mlx4_ib_info(ibdev, format, arg...) \ dev_info((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg) enum { MLX4_IB_SQ_MIN_WQE_SHIFT = 6, MLX4_IB_MAX_HEADROOM = 2048 }; #define MLX4_IB_SQ_HEADROOM(shift) ((MLX4_IB_MAX_HEADROOM >> (shift)) + 1) #define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT)) /*module param to indicate if SM assigns the alias_GUID*/ extern int mlx4_ib_sm_guid_assign; #ifdef __linux__ extern struct proc_dir_entry *mlx4_mrs_dir_entry; #endif #define MLX4_IB_UC_STEER_QPN_ALIGN 1 #define MLX4_IB_UC_MAX_NUM_QPS (256 * 1024) #define MLX4_IB_MMAP_CMD_MASK 0xFF #define MLX4_IB_MMAP_CMD_BITS 8 struct mlx4_ib_ucontext { struct ib_ucontext ibucontext; struct mlx4_uar uar; struct list_head db_page_list; struct mutex db_page_mutex; }; struct mlx4_ib_pd { struct ib_pd ibpd; u32 pdn; }; struct mlx4_ib_xrcd { struct ib_xrcd ibxrcd; u32 xrcdn; struct ib_pd *pd; struct ib_cq *cq; }; struct mlx4_ib_cq_buf { struct mlx4_buf buf; struct mlx4_mtt mtt; int entry_size; }; struct mlx4_ib_cq_resize { struct mlx4_ib_cq_buf buf; int cqe; }; struct mlx4_shared_mr_info { int mr_id; struct ib_umem *umem; }; struct mlx4_ib_cq { struct ib_cq ibcq; struct mlx4_cq mcq; struct mlx4_ib_cq_buf buf; struct mlx4_ib_cq_resize *resize_buf; struct mlx4_db db; spinlock_t lock; struct mutex resize_mutex; struct ib_umem *umem; struct ib_umem *resize_umem; }; struct mlx4_ib_mr { struct ib_mr ibmr; struct mlx4_mr mmr; struct ib_umem *umem; struct mlx4_shared_mr_info *smr_info; }; struct mlx4_ib_fast_reg_page_list { struct ib_fast_reg_page_list ibfrpl; __be64 *mapped_page_list; dma_addr_t map; }; struct mlx4_ib_fmr { struct ib_fmr ibfmr; struct mlx4_fmr mfmr; }; struct mlx4_ib_wq { u64 *wrid; spinlock_t lock; int wqe_cnt; int max_post; int max_gs; int offset; int wqe_shift; unsigned head; unsigned tail; }; enum mlx4_ib_qp_flags { MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP, MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30, MLX4_IB_SRIOV_SQP = 1 << 31, }; struct mlx4_ib_gid_entry { struct list_head list; union ib_gid gid; int added; u8 port; }; enum mlx4_ib_mmap_cmd { MLX4_IB_MMAP_UAR_PAGE = 0, MLX4_IB_MMAP_BLUE_FLAME_PAGE = 1, MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES = 2, }; enum mlx4_ib_qp_type { /* * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries * here (and in that order) since the MAD layer uses them as * indices into a 2-entry table. */ MLX4_IB_QPT_SMI = IB_QPT_SMI, MLX4_IB_QPT_GSI = IB_QPT_GSI, MLX4_IB_QPT_RC = IB_QPT_RC, MLX4_IB_QPT_UC = IB_QPT_UC, MLX4_IB_QPT_UD = IB_QPT_UD, MLX4_IB_QPT_RAW_IPV6 = IB_QPT_RAW_IPV6, MLX4_IB_QPT_RAW_ETHERTYPE = IB_QPT_RAW_ETHERTYPE, MLX4_IB_QPT_RAW_PACKET = IB_QPT_RAW_PACKET, MLX4_IB_QPT_XRC_INI = IB_QPT_XRC_INI, MLX4_IB_QPT_XRC_TGT = IB_QPT_XRC_TGT, MLX4_IB_QPT_PROXY_SMI_OWNER = 1 << 16, MLX4_IB_QPT_PROXY_SMI = 1 << 17, MLX4_IB_QPT_PROXY_GSI = 1 << 18, MLX4_IB_QPT_TUN_SMI_OWNER = 1 << 19, MLX4_IB_QPT_TUN_SMI = 1 << 20, MLX4_IB_QPT_TUN_GSI = 1 << 21, }; #define MLX4_IB_QPT_ANY_SRIOV (MLX4_IB_QPT_PROXY_SMI_OWNER | \ MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \ MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI) enum mlx4_ib_mad_ifc_flags { MLX4_MAD_IFC_IGNORE_MKEY = 1, MLX4_MAD_IFC_IGNORE_BKEY = 2, MLX4_MAD_IFC_IGNORE_KEYS = (MLX4_MAD_IFC_IGNORE_MKEY | MLX4_MAD_IFC_IGNORE_BKEY), MLX4_MAD_IFC_NET_VIEW = 4, }; enum { MLX4_NUM_TUNNEL_BUFS = 256, }; struct mlx4_ib_tunnel_header { struct mlx4_av av; __be32 remote_qpn; __be32 qkey; __be16 vlan; u8 mac[6]; __be16 pkey_index; u8 reserved[6]; }; struct mlx4_ib_buf { void *addr; dma_addr_t map; }; struct mlx4_rcv_tunnel_hdr { __be32 flags_src_qp; /* flags[6:5] is defined for VLANs: * 0x0 - no vlan was in the packet * 0x01 - C-VLAN was in the packet */ u8 g_ml_path; /* gid bit stands for ipv6/4 header in RoCE */ u8 reserved; __be16 pkey_index; __be16 sl_vid; __be16 slid_mac_47_32; __be32 mac_31_0; }; struct mlx4_ib_proxy_sqp_hdr { struct ib_grh grh; struct mlx4_rcv_tunnel_hdr tun; } __packed; struct mlx4_roce_smac_vlan_info { u64 smac; int smac_index; int smac_port; u64 candidate_smac; int candidate_smac_index; int candidate_smac_port; u16 vid; int vlan_index; int vlan_port; u16 candidate_vid; int candidate_vlan_index; int candidate_vlan_port; int update_vid; }; struct mlx4_ib_qpg_data { unsigned long *tss_bitmap; unsigned long *rss_bitmap; struct mlx4_ib_qp *qpg_parent; int tss_qpn_base; int rss_qpn_base; u32 tss_child_count; u32 rss_child_count; u32 qpg_tss_mask_sz; }; struct mlx4_ib_qp { struct ib_qp ibqp; struct mlx4_qp mqp; struct mlx4_buf buf; struct mlx4_db db; struct mlx4_ib_wq rq; u32 doorbell_qpn; __be32 sq_signal_bits; unsigned sq_next_wqe; int sq_max_wqes_per_wr; int sq_spare_wqes; struct mlx4_ib_wq sq; enum mlx4_ib_qp_type mlx4_ib_qp_type; struct ib_umem *umem; struct mlx4_mtt mtt; int buf_size; struct mutex mutex; u16 xrcdn; u32 flags; u8 port; u8 alt_port; u8 atomic_rd_en; u8 resp_depth; u8 sq_no_prefetch; u8 state; int mlx_type; enum ib_qpg_type qpg_type; struct mlx4_ib_qpg_data *qpg_data; struct list_head gid_list; struct list_head steering_rules; struct mlx4_ib_buf *sqp_proxy_rcv; struct mlx4_roce_smac_vlan_info pri; struct mlx4_roce_smac_vlan_info alt; struct list_head rules_list; int max_inline_data; struct mlx4_bf bf; }; struct mlx4_ib_srq { struct ib_srq ibsrq; struct mlx4_srq msrq; struct mlx4_buf buf; struct mlx4_db db; u64 *wrid; spinlock_t lock; int head; int tail; u16 wqe_ctr; struct ib_umem *umem; struct mlx4_mtt mtt; struct mutex mutex; }; struct mlx4_ib_ah { struct ib_ah ibah; union mlx4_ext_av av; }; /****************************************/ /* alias guid support */ /****************************************/ #define NUM_PORT_ALIAS_GUID 2 #define NUM_ALIAS_GUID_IN_REC 8 #define NUM_ALIAS_GUID_REC_IN_PORT 16 #define GUID_REC_SIZE 8 #define NUM_ALIAS_GUID_PER_PORT 128 #define MLX4_NOT_SET_GUID (0x00LL) #define MLX4_GUID_FOR_DELETE_VAL (~(0x00LL)) enum mlx4_guid_alias_rec_status { MLX4_GUID_INFO_STATUS_IDLE, MLX4_GUID_INFO_STATUS_SET, MLX4_GUID_INFO_STATUS_PENDING, }; enum mlx4_guid_alias_rec_ownership { MLX4_GUID_DRIVER_ASSIGN, MLX4_GUID_SYSADMIN_ASSIGN, MLX4_GUID_NONE_ASSIGN, /*init state of each record*/ }; enum mlx4_guid_alias_rec_method { MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET, MLX4_GUID_INFO_RECORD_DELETE = IB_SA_METHOD_DELETE, }; struct mlx4_sriov_alias_guid_info_rec_det { u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC]; ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/ enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/ u8 method; /*set or delete*/ enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/ }; struct mlx4_sriov_alias_guid_port_rec_det { struct mlx4_sriov_alias_guid_info_rec_det all_rec_per_port[NUM_ALIAS_GUID_REC_IN_PORT]; struct workqueue_struct *wq; struct delayed_work alias_guid_work; u8 port; struct mlx4_sriov_alias_guid *parent; struct list_head cb_list; }; struct mlx4_sriov_alias_guid { struct mlx4_sriov_alias_guid_port_rec_det ports_guid[MLX4_MAX_PORTS]; spinlock_t ag_work_lock; struct ib_sa_client *sa_client; }; struct mlx4_ib_demux_work { struct work_struct work; struct mlx4_ib_dev *dev; int slave; int do_init; u8 port; }; struct mlx4_ib_tun_tx_buf { struct mlx4_ib_buf buf; struct ib_ah *ah; }; struct mlx4_ib_demux_pv_qp { struct ib_qp *qp; enum ib_qp_type proxy_qpt; struct mlx4_ib_buf *ring; struct mlx4_ib_tun_tx_buf *tx_ring; spinlock_t tx_lock; unsigned tx_ix_head; unsigned tx_ix_tail; }; enum mlx4_ib_demux_pv_state { DEMUX_PV_STATE_DOWN, DEMUX_PV_STATE_STARTING, DEMUX_PV_STATE_ACTIVE, DEMUX_PV_STATE_DOWNING, }; struct mlx4_ib_demux_pv_ctx { int port; int slave; enum mlx4_ib_demux_pv_state state; int has_smi; struct ib_device *ib_dev; struct ib_cq *cq; struct ib_pd *pd; struct ib_mr *mr; struct work_struct work; struct workqueue_struct *wq; struct mlx4_ib_demux_pv_qp qp[2]; }; struct mlx4_ib_demux_ctx { struct ib_device *ib_dev; int port; struct workqueue_struct *wq; struct workqueue_struct *ud_wq; spinlock_t ud_lock; __be64 subnet_prefix; __be64 guid_cache[128]; struct mlx4_ib_dev *dev; /* the following lock protects both mcg_table and mcg_mgid0_list */ struct mutex mcg_table_lock; struct rb_root mcg_table; struct list_head mcg_mgid0_list; struct workqueue_struct *mcg_wq; struct mlx4_ib_demux_pv_ctx **tun; atomic_t tid; int flushing; /* flushing the work queue */ }; struct mlx4_ib_sriov { struct mlx4_ib_demux_ctx demux[MLX4_MAX_PORTS]; struct mlx4_ib_demux_pv_ctx *sqps[MLX4_MAX_PORTS]; /* when using this spinlock you should use "irq" because * it may be called from interrupt context.*/ spinlock_t going_down_lock; int is_going_down; struct mlx4_sriov_alias_guid alias_guid; /* CM paravirtualization fields */ struct list_head cm_list; spinlock_t id_map_lock; struct rb_root sl_id_map; struct idr pv_id_table; }; struct mlx4_ib_iboe { spinlock_t lock; struct net_device *netdevs[MLX4_MAX_PORTS]; struct notifier_block nb; union ib_gid gid_table[MLX4_MAX_PORTS][128]; }; struct pkey_mgt { u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; u16 phys_pkey_cache[MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; struct list_head pkey_port_list[MLX4_MFUNC_MAX]; struct kobject *device_parent[MLX4_MFUNC_MAX]; }; struct mlx4_ib_iov_sysfs_attr { void *ctx; struct kobject *kobj; unsigned long data; u32 entry_num; char name[15]; struct device_attribute dentry; struct device *dev; }; struct mlx4_ib_iov_sysfs_attr_ar { struct mlx4_ib_iov_sysfs_attr dentries[3 * NUM_ALIAS_GUID_PER_PORT + 1]; }; struct mlx4_ib_iov_port { char name[100]; u8 num; struct mlx4_ib_dev *dev; struct list_head list; struct mlx4_ib_iov_sysfs_attr_ar *dentr_ar; struct ib_port_attr attr; struct kobject *cur_port; struct kobject *admin_alias_parent; struct kobject *gids_parent; struct kobject *pkeys_parent; struct kobject *mcgs_parent; struct mlx4_ib_iov_sysfs_attr mcg_dentry; }; struct mlx4_ib_dev { struct ib_device ib_dev; struct mlx4_dev *dev; int num_ports; struct mlx4_uar priv_uar; u32 priv_pdn; MLX4_DECLARE_DOORBELL_LOCK(uar_lock); struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2]; struct ib_ah *sm_ah[MLX4_MAX_PORTS]; spinlock_t sm_lock; struct mlx4_ib_sriov sriov; struct mutex cap_mask_mutex; bool ib_active; struct mlx4_ib_iboe iboe; int counters[MLX4_MAX_PORTS]; int *eq_table; int eq_added; struct kobject *iov_parent; struct kobject *ports_parent; struct kobject *dev_ports_parent[MLX4_MFUNC_MAX]; struct mlx4_ib_iov_port iov_ports[MLX4_MAX_PORTS]; struct pkey_mgt pkeys; unsigned long *ib_uc_qpns_bitmap; int steer_qpn_count; int steer_qpn_base; }; struct ib_event_work { struct work_struct work; struct mlx4_ib_dev *ib_dev; struct mlx4_eqe ib_eqe; }; struct mlx4_ib_qp_tunnel_init_attr { struct ib_qp_init_attr init_attr; int slave; enum ib_qp_type proxy_qp_type; u8 port; }; static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) { return container_of(ibdev, struct mlx4_ib_dev, ib_dev); } static inline struct mlx4_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) { return container_of(ibucontext, struct mlx4_ib_ucontext, ibucontext); } static inline struct mlx4_ib_pd *to_mpd(struct ib_pd *ibpd) { return container_of(ibpd, struct mlx4_ib_pd, ibpd); } static inline struct mlx4_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd) { return container_of(ibxrcd, struct mlx4_ib_xrcd, ibxrcd); } static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq) { return container_of(ibcq, struct mlx4_ib_cq, ibcq); } static inline struct mlx4_ib_cq *to_mibcq(struct mlx4_cq *mcq) { return container_of(mcq, struct mlx4_ib_cq, mcq); } static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr) { return container_of(ibmr, struct mlx4_ib_mr, ibmr); } static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) { return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl); } static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) { return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); } static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp) { return container_of(ibqp, struct mlx4_ib_qp, ibqp); } static inline struct mlx4_ib_qp *to_mibqp(struct mlx4_qp *mqp) { return container_of(mqp, struct mlx4_ib_qp, mqp); } static inline struct mlx4_ib_srq *to_msrq(struct ib_srq *ibsrq) { return container_of(ibsrq, struct mlx4_ib_srq, ibsrq); } static inline struct mlx4_ib_srq *to_mibsrq(struct mlx4_srq *msrq) { return container_of(msrq, struct mlx4_ib_srq, msrq); } static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah) { return container_of(ibah, struct mlx4_ib_ah, ibah); } int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev); void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev); int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, struct mlx4_db *db); void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db); struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc); int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, struct ib_umem *umem); int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, int *num_of_mtts); struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata, int mr_id); int mlx4_ib_dereg_mr(struct ib_mr *mr); struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len); void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); int mlx4_ib_ignore_overrun_cq(struct ib_cq *ibcq); struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, struct ib_ucontext *context, struct ib_udata *udata); int mlx4_ib_destroy_cq(struct ib_cq *cq); int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); int mlx4_ib_destroy_ah(struct ib_ah *ah); struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int mlx4_ib_destroy_srq(struct ib_srq *srq); void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); int mlx4_ib_destroy_qp(struct ib_qp *qp); int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, int port, struct ib_wc *in_wc, struct ib_grh *in_grh, void *in_mad, void *response_mad); int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad); int mlx4_ib_mad_init(struct mlx4_ib_dev *dev); void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev); struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr); int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages, u64 iova); int mlx4_ib_unmap_fmr(struct list_head *fmr_list); int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr); int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props, int netw_view); int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey, int netw_view); int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid, int netw_view); int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, u8 *mac, int *is_mcast, u8 port); int mlx4_ib_query_if_stat(struct mlx4_ib_dev *dev, u32 counter_index, union mlx4_counter *counter, u8 clear); static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) { u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3; if (rdma_port_get_link_layer(ah->ibah.device, port) == IB_LINK_LAYER_ETHERNET) return 1; return !!(ah->av.ib.g_slid & 0x80); } int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx); void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq); void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave); int mlx4_ib_mcg_init(void); void mlx4_ib_mcg_destroy(void); int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid); int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, int slave, struct ib_sa_mad *sa_mad); int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave, struct ib_sa_mad *mad); int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, union ib_gid *gid); void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, enum ib_event_type type); void mlx4_ib_tunnels_update_work(struct work_struct *work); int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type qpt, struct ib_wc *wc, struct ib_grh *grh, struct ib_mad *mad); int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad); __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx); int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, struct ib_mad *mad, int is_eth); int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id, struct ib_mad *mad); void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev); void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave_id); /* alias guid support */ void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port); int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev); void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev); void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port); void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, int block_num, u8 port_num, u8 *p_data); void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num, u8 port_num, u8 *p_data); int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, struct attribute *attr); void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, struct attribute *attr); ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index); int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ; void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device); __be64 mlx4_ib_gen_node_guid(void); int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn); void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count); int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, int is_attach); #endif /* MLX4_IB_H */ Index: stable/10/sys/ofed/drivers/infiniband/hw/mlx4/mr.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/hw/mlx4/mr.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/hw/mlx4/mr.c (revision 271127) @@ -1,753 +1,753 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #ifdef __linux__ #include #include #endif #include "mlx4_ib.h" static u32 convert_access(int acc) { return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) | (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) | MLX4_PERM_LOCAL_READ; } #ifdef __linux__ static ssize_t shared_mr_proc_read(struct file *file, char __user *buffer, size_t len, loff_t *offset) { return -ENOSYS; } static ssize_t shared_mr_proc_write(struct file *file, const char __user *buffer, size_t len, loff_t *offset) { return -ENOSYS; } static int shared_mr_mmap(struct file *filep, struct vm_area_struct *vma) { struct proc_dir_entry *pde = PDE(filep->f_path.dentry->d_inode); struct mlx4_shared_mr_info *smr_info = (struct mlx4_shared_mr_info *)pde->data; /* Prevent any mapping not on start of area */ if (vma->vm_pgoff != 0) return -EINVAL; return ib_umem_map_to_vma(smr_info->umem, vma); } static const struct file_operations shared_mr_proc_ops = { .owner = THIS_MODULE, .read = shared_mr_proc_read, .write = shared_mr_proc_write, .mmap = shared_mr_mmap }; static mode_t convert_shared_access(int acc) { return (acc & IB_ACCESS_SHARED_MR_USER_READ ? S_IRUSR : 0) | (acc & IB_ACCESS_SHARED_MR_USER_WRITE ? S_IWUSR : 0) | (acc & IB_ACCESS_SHARED_MR_GROUP_READ ? S_IRGRP : 0) | (acc & IB_ACCESS_SHARED_MR_GROUP_WRITE ? S_IWGRP : 0) | (acc & IB_ACCESS_SHARED_MR_OTHER_READ ? S_IROTH : 0) | (acc & IB_ACCESS_SHARED_MR_OTHER_WRITE ? S_IWOTH : 0); } #endif struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx4_ib_mr *mr; int err; mr = kzalloc(sizeof *mr, GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0, ~0ull, convert_access(acc), 0, 0, &mr->mmr); if (err) goto err_free; err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr); if (err) goto err_mr; mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; mr->umem = NULL; return &mr->ibmr; err_mr: mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); err_free: kfree(mr); return ERR_PTR(err); } static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, u64 mtt_size, u64 mtt_shift, u64 len, u64 cur_start_addr, u64 *pages, int *start_index, int *npages) { int k; int err = 0; u64 mtt_entries; u64 cur_end_addr = cur_start_addr + len; u64 cur_end_addr_aligned = 0; len += (cur_start_addr & (mtt_size-1ULL)); cur_end_addr_aligned = round_up(cur_end_addr, mtt_size); len += (cur_end_addr_aligned - cur_end_addr); if (len & (mtt_size-1ULL)) { WARN(1 , "write_block: len %llx is not aligned to mtt_size %llx\n", - len, mtt_size); + (long long)len, (long long)mtt_size); return -EINVAL; } mtt_entries = (len >> mtt_shift); /* Align the MTT start address to the mtt_size. Required to handle cases when the MR starts in the middle of an MTT record. Was not required in old code since the physical addresses provided by the dma subsystem were page aligned, which was also the MTT size. */ cur_start_addr = round_down(cur_start_addr, mtt_size); /* A new block is started ...*/ for (k = 0; k < mtt_entries; ++k) { pages[*npages] = cur_start_addr + (mtt_size * k); (*npages)++; /* * Be friendly to mlx4_write_mtt() and * pass it chunks of appropriate size. */ if (*npages == PAGE_SIZE / sizeof(u64)) { err = mlx4_write_mtt(dev->dev, mtt, *start_index, *npages, pages); if (err) return err; (*start_index) += *npages; *npages = 0; } } return 0; } int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, struct ib_umem *umem) { u64 *pages; struct ib_umem_chunk *chunk; int j; u64 len = 0; int err = 0; u64 mtt_size; u64 cur_start_addr = 0; u64 mtt_shift; int start_index = 0; int npages = 0; pages = (u64 *) __get_free_page(GFP_KERNEL); if (!pages) return -ENOMEM; mtt_shift = mtt->page_shift; mtt_size = 1ULL << mtt_shift; list_for_each_entry(chunk, &umem->chunk_list, list) for (j = 0; j < chunk->nmap; ++j) { if (cur_start_addr + len == sg_dma_address(&chunk->page_list[j])) { /* still the same block */ len += sg_dma_len(&chunk->page_list[j]); continue; } /* A new block is started ...*/ /* If len is malaligned, write an extra mtt entry to cover the misaligned area (round up the division) */ err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size, mtt_shift, len, cur_start_addr, pages, &start_index, &npages); if (err) goto out; cur_start_addr = sg_dma_address(&chunk->page_list[j]); len = sg_dma_len(&chunk->page_list[j]); } /* Handle the last block */ if (len > 0) { /* If len is malaligned, write an extra mtt entry to cover the misaligned area (round up the division) */ err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size, mtt_shift, len, cur_start_addr, pages, &start_index, &npages); if (err) goto out; } if (npages) err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages); out: free_page((unsigned long) pages); return err; } static inline u64 alignment_of(u64 ptr) { return ilog2(ptr & (~(ptr-1))); } static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start, u64 current_block_end, u64 block_shift) { /* Check whether the alignment of the new block is aligned as well as the previous block. Block address must start with zeros till size of entity_size. */ if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0) /* It is not as well aligned as the previous block-reduce the mtt size accordingly. Here we take the last right bit which is 1. */ block_shift = alignment_of(next_block_start); /* Check whether the alignment of the end of previous block - is it aligned as well as the start of the block */ if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0) /* It is not as well aligned as the start of the block - reduce the mtt size accordingly. */ block_shift = alignment_of(current_block_end); return block_shift; } /* Calculate optimal mtt size based on contiguous pages. * Function will return also the number of pages that are not aligned to the calculated mtt_size to be added to total number of pages. For that we should check the first chunk length & last chunk length and if not aligned to mtt_size we should increment the non_aligned_pages number. All chunks in the middle already handled as part of mtt shift calculation for both their start & end addresses. */ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, int *num_of_mtts) { struct ib_umem_chunk *chunk; int j; u64 block_shift = MLX4_MAX_MTT_SHIFT; u64 current_block_len = 0; u64 current_block_start = 0; u64 misalignment_bits; u64 first_block_start = 0; u64 last_block_end = 0; u64 total_len = 0; u64 last_block_aligned_end = 0; u64 min_shift = ilog2(umem->page_size); list_for_each_entry(chunk, &umem->chunk_list, list) { /* Initialization - save the first chunk start as the current_block_start - block means contiguous pages. */ if (current_block_len == 0 && current_block_start == 0) { first_block_start = current_block_start = sg_dma_address(&chunk->page_list[0]); /* Find the bits that are different between the physical address and the virtual address for the start of the MR. */ /* umem_get aligned the start_va to a page boundry. Therefore, we need to align the start va to the same boundry */ /* misalignment_bits is needed to handle the case of a single memory region. In this case, the rest of the logic will not reduce the block size. If we use a block size which is bigger than the alignment of the misalignment bits, we might use the virtual page number instead of the physical page number, resulting in access to the wrong data. */ misalignment_bits = (start_va & (~(((u64)(umem->page_size))-1ULL))) ^ current_block_start; block_shift = min(alignment_of(misalignment_bits) , block_shift); } /* Go over the scatter entries in the current chunk, check if they continue the previous scatter entry. */ for (j = 0; j < chunk->nmap; ++j) { u64 next_block_start = sg_dma_address(&chunk->page_list[j]); u64 current_block_end = current_block_start + current_block_len; /* If we have a split (non-contig.) between two block*/ if (current_block_end != next_block_start) { block_shift = mlx4_ib_umem_calc_block_mtt( next_block_start, current_block_end, block_shift); /* If we reached the minimum shift for 4k page we stop the loop. */ if (block_shift <= min_shift) goto end; /* If not saved yet we are in first block - we save the length of first block to calculate the non_aligned_pages number at * the end. */ total_len += current_block_len; /* Start a new block */ current_block_start = next_block_start; current_block_len = sg_dma_len(&chunk->page_list[j]); continue; } /* The scatter entry is another part of the current block, increase the block size * An entry in the scatter can be larger than 4k (page) as of dma mapping which merge some blocks together. */ current_block_len += sg_dma_len(&chunk->page_list[j]); } } /* Account for the last block in the total len */ total_len += current_block_len; /* Add to the first block the misalignment that it suffers from.*/ total_len += (first_block_start & ((1ULL<> block_shift; end: if (block_shift < min_shift) { /* If shift is less than the min we set a WARN and return the min shift. */ WARN(1, "mlx4_ib_umem_calc_optimal_mtt_size - unexpected shift %lld\n", - block_shift); + (long long)block_shift); block_shift = min_shift; } return block_shift; } #ifdef __linux__ static int prepare_shared_mr(struct mlx4_ib_mr *mr, int access_flags, int mr_id) { struct proc_dir_entry *mr_proc_entry; mode_t mode = S_IFREG; char name_buff[16]; mode |= convert_shared_access(access_flags); sprintf(name_buff, "%X", mr_id); mr->smr_info = kmalloc(sizeof(struct mlx4_shared_mr_info), GFP_KERNEL); mr->smr_info->mr_id = mr_id; mr->smr_info->umem = mr->umem; mr_proc_entry = proc_create_data(name_buff, mode, mlx4_mrs_dir_entry, &shared_mr_proc_ops, mr->smr_info); if (!mr_proc_entry) { pr_err("prepare_shared_mr failed via proc\n"); kfree(mr->smr_info); return -ENODEV; } current_uid_gid(&(mr_proc_entry->uid), &(mr_proc_entry->gid)); mr_proc_entry->size = mr->umem->length; return 0; } static int is_shared_mr(int access_flags) { /* We should check whether IB_ACCESS_SHARED_MR_USER_READ or other shared bits were turned on. */ return !!(access_flags & (IB_ACCESS_SHARED_MR_USER_READ | IB_ACCESS_SHARED_MR_USER_WRITE | IB_ACCESS_SHARED_MR_GROUP_READ | IB_ACCESS_SHARED_MR_GROUP_WRITE | IB_ACCESS_SHARED_MR_OTHER_READ | IB_ACCESS_SHARED_MR_OTHER_WRITE)); } #endif struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata, int mr_id) { struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_mr *mr; int shift; int err; int n; mr = kzalloc(sizeof *mr, GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); mr->umem = ib_umem_get(pd->uobject->context, start, length, access_flags, 0); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err_free; } n = ib_umem_page_count(mr->umem); shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n); err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, convert_access(access_flags), n, shift, &mr->mmr); if (err) goto err_umem; err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem); if (err) goto err_mr; err = mlx4_mr_enable(dev->dev, &mr->mmr); if (err) goto err_mr; mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; #ifdef __linux__ /* Check whether MR should be shared */ if (is_shared_mr(access_flags)) { /* start address and length must be aligned to page size in order to map a full page and preventing leakage of data */ if (mr->umem->offset || (length & ~PAGE_MASK)) { err = -EINVAL; goto err_mr; } err = prepare_shared_mr(mr, access_flags, mr_id); if (err) goto err_mr; } #endif return &mr->ibmr; err_mr: mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); err_umem: ib_umem_release(mr->umem); err_free: kfree(mr); return ERR_PTR(err); } int mlx4_ib_dereg_mr(struct ib_mr *ibmr) { struct mlx4_ib_mr *mr = to_mmr(ibmr); mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); if (mr->smr_info) { /* When master/parent shared mr is dereged there is no ability to share this mr any more - its mr_id will be returned to the kernel as part of ib_uverbs_dereg_mr and may be allocated again as part of other reg_mr. */ char name_buff[16]; sprintf(name_buff, "%X", mr->smr_info->mr_id); /* Remove proc entry is checking internally that no operation was strated on that proc fs file and if in the middle current process will wait till end of operation. That's why no sync mechanism is needed when we release below the shared umem. */ #ifdef __linux__ remove_proc_entry(name_buff, mlx4_mrs_dir_entry); kfree(mr->smr_info); #endif } if (mr->umem) ib_umem_release(mr->umem); kfree(mr); return 0; } struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_mr *mr; int err; mr = kzalloc(sizeof *mr, GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0, max_page_list_len, 0, &mr->mmr); if (err) goto err_free; err = mlx4_mr_enable(dev->dev, &mr->mmr); if (err) goto err_mr; mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; mr->umem = NULL; return &mr->ibmr; err_mr: mlx4_mr_free(dev->dev, &mr->mmr); err_free: kfree(mr); return ERR_PTR(err); } struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len) { struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_fast_reg_page_list *mfrpl; int size = page_list_len * sizeof (u64); if (page_list_len > MLX4_MAX_FAST_REG_PAGES) return ERR_PTR(-EINVAL); mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL); if (!mfrpl) return ERR_PTR(-ENOMEM); mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); if (!mfrpl->ibfrpl.page_list) goto err_free; mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev, size, &mfrpl->map, GFP_KERNEL); if (!mfrpl->mapped_page_list) goto err_free; WARN_ON(mfrpl->map & 0x3f); return &mfrpl->ibfrpl; err_free: kfree(mfrpl->ibfrpl.page_list); kfree(mfrpl); return ERR_PTR(-ENOMEM); } void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) { struct mlx4_ib_dev *dev = to_mdev(page_list->device); struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); int size = page_list->max_page_list_len * sizeof (u64); dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list, mfrpl->map); kfree(mfrpl->ibfrpl.page_list); kfree(mfrpl); } struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, struct ib_fmr_attr *fmr_attr) { struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_fmr *fmr; int err = -ENOMEM; fmr = kmalloc(sizeof *fmr, GFP_KERNEL); if (!fmr) return ERR_PTR(-ENOMEM); err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc), fmr_attr->max_pages, fmr_attr->max_maps, fmr_attr->page_shift, &fmr->mfmr); if (err) goto err_free; err = mlx4_fmr_enable(to_mdev(pd->device)->dev, &fmr->mfmr); if (err) goto err_mr; fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key; return &fmr->ibfmr; err_mr: mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr); err_free: kfree(fmr); return ERR_PTR(err); } int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages, u64 iova) { struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device); return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey); } int mlx4_ib_unmap_fmr(struct list_head *fmr_list) { struct ib_fmr *ibfmr; int err; struct mlx4_dev *mdev = NULL; list_for_each_entry(ibfmr, fmr_list, list) { if (mdev && to_mdev(ibfmr->device)->dev != mdev) return -EINVAL; mdev = to_mdev(ibfmr->device)->dev; } if (!mdev) return 0; list_for_each_entry(ibfmr, fmr_list, list) { struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey); } /* * Make sure all MPT status updates are visible before issuing * SYNC_TPT firmware command. */ wmb(); err = mlx4_SYNC_TPT(mdev); if (err) pr_warn("SYNC_TPT error %d when " "unmapping FMRs\n", err); return 0; } int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr) { struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); struct mlx4_ib_dev *dev = to_mdev(ibfmr->device); int err; err = mlx4_fmr_free(dev->dev, &ifmr->mfmr); if (!err) kfree(ifmr); return err; } Index: stable/10/sys/ofed/drivers/infiniband/hw/mlx4/qp.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/hw/mlx4/qp.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/hw/mlx4/qp.c (revision 271127) @@ -1,3588 +1,3587 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include -#include #include #include #include #include #include #include #include #include #ifndef __linux__ #define asm __asm #endif #include "mlx4_ib.h" #include "user.h" enum { MLX4_IB_ACK_REQ_FREQ = 8, }; enum { MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83, MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f, MLX4_IB_LINK_TYPE_IB = 0, MLX4_IB_LINK_TYPE_ETH = 1 }; enum { /* * Largest possible UD header: send with GRH and immediate * data plus 18 bytes for an Ethernet header with VLAN/802.1Q * tag. (LRH would only use 8 bytes, so Ethernet is the * biggest case) */ MLX4_IB_UD_HEADER_SIZE = 82, MLX4_IB_LSO_HEADER_SPARE = 128, }; enum { MLX4_IB_IBOE_ETHERTYPE = 0x8915 }; struct mlx4_ib_sqp { struct mlx4_ib_qp qp; int pkey_index; u32 qkey; u32 send_psn; struct ib_ud_header ud_header; u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; }; enum { MLX4_IB_MIN_SQ_STRIDE = 6, MLX4_IB_CACHE_LINE_SIZE = 64, }; enum { MLX4_RAW_QP_MTU = 7, MLX4_RAW_QP_MSGMAX = 31, }; static const __be32 mlx4_ib_opcode[] = { [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND), [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO), [IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM), [IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), [IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), [IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ), [IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), }; #ifndef wc_wmb #if defined(__i386__) #define wc_wmb() asm volatile("lock; addl $0,0(%%esp) " ::: "memory") #elif defined(__x86_64__) #define wc_wmb() asm volatile("sfence" ::: "memory") #elif defined(__ia64__) #define wc_wmb() asm volatile("fwb" ::: "memory") #else #define wc_wmb() wmb() #endif #endif static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) { return container_of(mqp, struct mlx4_ib_sqp, qp); } static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { if (!mlx4_is_master(dev->dev)) return 0; return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn && qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX; } static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { int proxy_sqp = 0; int real_sqp = 0; int i; /* PPF or Native -- real SQP */ real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) && qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn && qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3); if (real_sqp) return 1; /* VF or PF -- proxy SQP */ if (mlx4_is_mfunc(dev->dev)) { for (i = 0; i < dev->dev->caps.num_ports; i++) { if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] || qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) { proxy_sqp = 1; break; } } } return proxy_sqp; } /* used for INIT/CLOSE port logic */ static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { int proxy_qp0 = 0; int real_qp0 = 0; int i; /* PPF or Native -- real QP0 */ real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) && qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn && qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1); if (real_qp0) return 1; /* VF or PF -- proxy QP0 */ if (mlx4_is_mfunc(dev->dev)) { for (i = 0; i < dev->dev->caps.num_ports; i++) { if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) { proxy_qp0 = 1; break; } } } return proxy_qp0; } static void *get_wqe(struct mlx4_ib_qp *qp, int offset) { return mlx4_buf_offset(&qp->buf, offset); } static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n) { return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); } static void *get_send_wqe(struct mlx4_ib_qp *qp, int n) { return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift)); } /* * Stamp a SQ WQE so that it is invalid if prefetched by marking the * first four bytes of every 64 byte chunk with * 0x7FFFFFF | (invalid_ownership_value << 31). * * When the max work request size is less than or equal to the WQE * basic block size, as an optimization, we can stamp all WQEs with * 0xffffffff, and skip the very first chunk of each WQE. */ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) { __be32 *wqe; int i; int s; int ind; void *buf; __be32 stamp; struct mlx4_wqe_ctrl_seg *ctrl; if (qp->sq_max_wqes_per_wr > 1) { s = roundup(size, 1U << qp->sq.wqe_shift); for (i = 0; i < s; i += 64) { ind = (i >> qp->sq.wqe_shift) + n; stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) : cpu_to_be32(0xffffffff); buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1)); *wqe = stamp; } } else { ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); s = (ctrl->fence_size & 0x3f) << 4; for (i = 64; i < s; i += 64) { wqe = buf + i; *wqe = cpu_to_be32(0xffffffff); } } } static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size) { struct mlx4_wqe_ctrl_seg *ctrl; struct mlx4_wqe_inline_seg *inl; void *wqe; int s; ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); s = sizeof(struct mlx4_wqe_ctrl_seg); if (qp->ibqp.qp_type == IB_QPT_UD) { struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl; struct mlx4_av *av = (struct mlx4_av *)dgram->av; memset(dgram, 0, sizeof *dgram); av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn); s += sizeof(struct mlx4_wqe_datagram_seg); } /* Pad the remainder of the WQE with an inline data segment. */ if (size > s) { inl = wqe + s; inl->byte_count = cpu_to_be32(1U << 31 | (size - s - sizeof *inl)); } ctrl->srcrb_flags = 0; ctrl->fence_size = size / 16; /* * Make sure descriptor is fully written before setting ownership bit * (because HW can start executing as soon as we do). */ wmb(); ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) | (n & qp->sq.wqe_cnt ? cpu_to_be32(1U << 31) : 0); stamp_send_wqe(qp, n + qp->sq_spare_wqes, size); } /* Post NOP WQE to prevent wrap-around in the middle of WR */ static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind) { unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1)); if (unlikely(s < qp->sq_max_wqes_per_wr)) { post_nop_wqe(qp, ind, s << qp->sq.wqe_shift); ind += s; } return ind; } static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) { struct ib_event event; struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; if (type == MLX4_EVENT_TYPE_PATH_MIG) to_mibqp(qp)->port = to_mibqp(qp)->alt_port; if (ibqp->event_handler) { event.device = ibqp->device; event.element.qp = ibqp; switch (type) { case MLX4_EVENT_TYPE_PATH_MIG: event.event = IB_EVENT_PATH_MIG; break; case MLX4_EVENT_TYPE_COMM_EST: event.event = IB_EVENT_COMM_EST; break; case MLX4_EVENT_TYPE_SQ_DRAINED: event.event = IB_EVENT_SQ_DRAINED; break; case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE: event.event = IB_EVENT_QP_LAST_WQE_REACHED; break; case MLX4_EVENT_TYPE_WQ_CATAS_ERROR: event.event = IB_EVENT_QP_FATAL; break; case MLX4_EVENT_TYPE_PATH_MIG_FAILED: event.event = IB_EVENT_PATH_MIG_ERR; break; case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR: event.event = IB_EVENT_QP_REQ_ERR; break; case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR: event.event = IB_EVENT_QP_ACCESS_ERR; break; default: pr_warn("Unexpected event type %d " "on QP %06x\n", type, qp->qpn); return; } ibqp->event_handler(&event, ibqp->qp_context); } } static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) { /* * UD WQEs must have a datagram segment. * RC and UC WQEs might have a remote address segment. * MLX WQEs need two extra inline data segments (for the UD * header and space for the ICRC). */ switch (type) { case MLX4_IB_QPT_UD: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_datagram_seg) + ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0); case MLX4_IB_QPT_PROXY_SMI_OWNER: case MLX4_IB_QPT_PROXY_SMI: case MLX4_IB_QPT_PROXY_GSI: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_datagram_seg) + 64; case MLX4_IB_QPT_TUN_SMI_OWNER: case MLX4_IB_QPT_TUN_GSI: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_datagram_seg); case MLX4_IB_QPT_UC: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_raddr_seg); case MLX4_IB_QPT_RC: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_masked_atomic_seg) + sizeof (struct mlx4_wqe_raddr_seg); case MLX4_IB_QPT_SMI: case MLX4_IB_QPT_GSI: return sizeof (struct mlx4_wqe_ctrl_seg) + ALIGN(MLX4_IB_UD_HEADER_SIZE + DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE, MLX4_INLINE_ALIGN) * sizeof (struct mlx4_wqe_inline_seg), sizeof (struct mlx4_wqe_data_seg)) + ALIGN(4 + sizeof (struct mlx4_wqe_inline_seg), sizeof (struct mlx4_wqe_data_seg)); default: return sizeof (struct mlx4_wqe_ctrl_seg); } } static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, int is_user, int has_rq, struct mlx4_ib_qp *qp) { /* Sanity check RQ size before proceeding */ if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE || cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)) return -EINVAL; if (!has_rq) { if (cap->max_recv_wr) return -EINVAL; qp->rq.wqe_cnt = qp->rq.max_gs = 0; } else { /* HW requires >= 1 RQ entry with >= 1 gather entry */ if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) return -EINVAL; qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr)); qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg)); } /* leave userspace return values as they were, so as not to break ABI */ if (is_user) { cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt; cap->max_recv_sge = qp->rq.max_gs; } else { cap->max_recv_wr = qp->rq.max_post = min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt); cap->max_recv_sge = min(qp->rq.max_gs, min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)); } return 0; } static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp) { int s; /* Sanity check SQ size before proceeding */ if (cap->max_send_wr > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) || cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) || cap->max_inline_data + send_wqe_overhead(type, qp->flags) + sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) return -EINVAL; /* * For MLX transport we need 2 extra S/G entries: * one for the header and one for the checksum at the end */ if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI || type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) && cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) return -EINVAL; s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg), cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) + send_wqe_overhead(type, qp->flags); if (s > dev->dev->caps.max_sq_desc_sz) return -EINVAL; /* * Hermon supports shrinking WQEs, such that a single work * request can include multiple units of 1 << wqe_shift. This * way, work requests can differ in size, and do not have to * be a power of 2 in size, saving memory and speeding up send * WR posting. Unfortunately, if we do this then the * wqe_index field in CQEs can't be used to look up the WR ID * anymore, so we do this only if selective signaling is off. * * Further, on 32-bit platforms, we can't use vmap() to make * the QP buffer virtually contiguous. Thus we have to use * constant-sized WRs to make sure a WR is always fully within * a single page-sized chunk. * * Finally, we use NOP work requests to pad the end of the * work queue, to avoid wrap-around in the middle of WR. We * set NEC bit to avoid getting completions with error for * these NOP WRs, but since NEC is only supported starting * with firmware 2.2.232, we use constant-sized WRs for older * firmware. * * And, since MLX QPs only support SEND, we use constant-sized * WRs in this case. * * We look for the smallest value of wqe_shift such that the * resulting number of wqes does not exceed device * capabilities. * * We set WQE size to at least 64 bytes, this way stamping * invalidates each WQE. */ if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && qp->sq_signal_bits && BITS_PER_LONG == 64 && type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI && !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) qp->sq.wqe_shift = ilog2(64); else qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); for (;;) { qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift); /* * We need to leave 2 KB + 1 WR of headroom in the SQ to * allow HW to prefetch. */ qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr; qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr * qp->sq_max_wqes_per_wr + qp->sq_spare_wqes); if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes) break; if (qp->sq_max_wqes_per_wr <= 1) return -EINVAL; ++qp->sq.wqe_shift; } qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz, (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) - send_wqe_overhead(type, qp->flags)) / sizeof (struct mlx4_wqe_data_seg); qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + (qp->sq.wqe_cnt << qp->sq.wqe_shift); if (qp->rq.wqe_shift > qp->sq.wqe_shift) { qp->rq.offset = 0; qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; } else { qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift; qp->sq.offset = 0; } cap->max_send_wr = qp->sq.max_post = (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr; cap->max_send_sge = min(qp->sq.max_gs, min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)); qp->max_inline_data = cap->max_inline_data; return 0; } static int set_user_sq_size(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, struct mlx4_ib_create_qp *ucmd) { /* Sanity check SQ size before proceeding */ if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes || ucmd->log_sq_stride > ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) || ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE) return -EINVAL; qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; qp->sq.wqe_shift = ucmd->log_sq_stride; qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + (qp->sq.wqe_cnt << qp->sq.wqe_shift); return 0; } static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp) { int i; qp->sqp_proxy_rcv = kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt, GFP_KERNEL); if (!qp->sqp_proxy_rcv) return -ENOMEM; for (i = 0; i < qp->rq.wqe_cnt; i++) { qp->sqp_proxy_rcv[i].addr = kmalloc(sizeof (struct mlx4_ib_proxy_sqp_hdr), GFP_KERNEL); if (!qp->sqp_proxy_rcv[i].addr) goto err; qp->sqp_proxy_rcv[i].map = ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr, sizeof (struct mlx4_ib_proxy_sqp_hdr), DMA_FROM_DEVICE); } return 0; err: while (i > 0) { --i; ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map, sizeof (struct mlx4_ib_proxy_sqp_hdr), DMA_FROM_DEVICE); kfree(qp->sqp_proxy_rcv[i].addr); } kfree(qp->sqp_proxy_rcv); qp->sqp_proxy_rcv = NULL; return -ENOMEM; } static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp) { int i; for (i = 0; i < qp->rq.wqe_cnt; i++) { ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map, sizeof (struct mlx4_ib_proxy_sqp_hdr), DMA_FROM_DEVICE); kfree(qp->sqp_proxy_rcv[i].addr); } kfree(qp->sqp_proxy_rcv); } static int qp_has_rq(struct ib_qp_init_attr *attr) { if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT) return 0; return !attr->srq; } #ifdef __linux__ static int init_qpg_parent(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *pqp, struct ib_qp_init_attr *attr, int *qpn) { struct mlx4_ib_qpg_data *qpg_data; int tss_num, rss_num; int tss_align_num, rss_align_num; int tss_base, rss_base = 0; int err; /* Parent is part of the TSS range (in SW TSS ARP is sent via parent) */ tss_num = 1 + attr->parent_attrib.tss_child_count; tss_align_num = roundup_pow_of_two(tss_num); rss_num = attr->parent_attrib.rss_child_count; rss_align_num = roundup_pow_of_two(rss_num); if (rss_num > 1) { /* RSS is requested */ if (!(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS)) return -ENOSYS; if (rss_align_num > dev->dev->caps.max_rss_tbl_sz) return -EINVAL; /* We must work with power of two */ attr->parent_attrib.rss_child_count = rss_align_num; } qpg_data = kzalloc(sizeof *qpg_data, GFP_KERNEL); if (!qpg_data) return -ENOMEM; if(pqp->flags & MLX4_IB_QP_NETIF) err = mlx4_ib_steer_qp_alloc(dev, tss_align_num, &tss_base); else err = mlx4_qp_reserve_range(dev->dev, tss_align_num, tss_align_num, &tss_base, 1); if (err) goto err1; if (tss_num > 1) { u32 alloc = BITS_TO_LONGS(tss_align_num) * sizeof(long); qpg_data->tss_bitmap = kzalloc(alloc, GFP_KERNEL); if (qpg_data->tss_bitmap == NULL) { err = -ENOMEM; goto err2; } bitmap_fill(qpg_data->tss_bitmap, tss_num); /* Note parent takes first index */ clear_bit(0, qpg_data->tss_bitmap); } if (rss_num > 1) { u32 alloc = BITS_TO_LONGS(rss_align_num) * sizeof(long); err = mlx4_qp_reserve_range(dev->dev, rss_align_num, 1, &rss_base, 0); if (err) goto err3; qpg_data->rss_bitmap = kzalloc(alloc, GFP_KERNEL); if (qpg_data->rss_bitmap == NULL) { err = -ENOMEM; goto err4; } bitmap_fill(qpg_data->rss_bitmap, rss_align_num); } qpg_data->tss_child_count = attr->parent_attrib.tss_child_count; qpg_data->rss_child_count = attr->parent_attrib.rss_child_count; qpg_data->qpg_parent = pqp; qpg_data->qpg_tss_mask_sz = ilog2(tss_align_num); qpg_data->tss_qpn_base = tss_base; qpg_data->rss_qpn_base = rss_base; pqp->qpg_data = qpg_data; *qpn = tss_base; return 0; err4: mlx4_qp_release_range(dev->dev, rss_base, rss_align_num); err3: if (tss_num > 1) kfree(qpg_data->tss_bitmap); err2: if(pqp->flags & MLX4_IB_QP_NETIF) mlx4_ib_steer_qp_free(dev, tss_base, tss_align_num); else mlx4_qp_release_range(dev->dev, tss_base, tss_align_num); err1: kfree(qpg_data); return err; } static void free_qpg_parent(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *pqp) { struct mlx4_ib_qpg_data *qpg_data = pqp->qpg_data; int align_num; if (qpg_data->tss_child_count > 1) kfree(qpg_data->tss_bitmap); align_num = roundup_pow_of_two(1 + qpg_data->tss_child_count); if(pqp->flags & MLX4_IB_QP_NETIF) mlx4_ib_steer_qp_free(dev, qpg_data->tss_qpn_base, align_num); else mlx4_qp_release_range(dev->dev, qpg_data->tss_qpn_base, align_num); if (qpg_data->rss_child_count > 1) { kfree(qpg_data->rss_bitmap); align_num = roundup_pow_of_two(qpg_data->rss_child_count); mlx4_qp_release_range(dev->dev, qpg_data->rss_qpn_base, align_num); } kfree(qpg_data); } static int alloc_qpg_qpn(struct ib_qp_init_attr *init_attr, struct mlx4_ib_qp *pqp, int *qpn) { struct mlx4_ib_qp *mqp = to_mqp(init_attr->qpg_parent); struct mlx4_ib_qpg_data *qpg_data = mqp->qpg_data; u32 idx, old; switch (init_attr->qpg_type) { case IB_QPG_CHILD_TX: if (qpg_data->tss_child_count == 0) return -EINVAL; do { /* Parent took index 0 */ idx = find_first_bit(qpg_data->tss_bitmap, qpg_data->tss_child_count + 1); if (idx >= qpg_data->tss_child_count + 1) return -ENOMEM; old = test_and_clear_bit(idx, qpg_data->tss_bitmap); } while (old == 0); idx += qpg_data->tss_qpn_base; break; case IB_QPG_CHILD_RX: if (qpg_data->rss_child_count == 0) return -EINVAL; do { idx = find_first_bit(qpg_data->rss_bitmap, qpg_data->rss_child_count); if (idx >= qpg_data->rss_child_count) return -ENOMEM; old = test_and_clear_bit(idx, qpg_data->rss_bitmap); } while (old == 0); idx += qpg_data->rss_qpn_base; break; default: return -EINVAL; } pqp->qpg_data = qpg_data; *qpn = idx; return 0; } static void free_qpg_qpn(struct mlx4_ib_qp *mqp, int qpn) { struct mlx4_ib_qpg_data *qpg_data = mqp->qpg_data; switch (mqp->qpg_type) { case IB_QPG_CHILD_TX: /* Do range check */ qpn -= qpg_data->tss_qpn_base; set_bit(qpn, qpg_data->tss_bitmap); break; case IB_QPG_CHILD_RX: qpn -= qpg_data->rss_qpn_base; set_bit(qpn, qpg_data->rss_bitmap); break; default: /* error */ pr_warn("wrong qpg type (%d)\n", mqp->qpg_type); break; } } #endif static int alloc_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, struct ib_qp_init_attr *attr, int *qpn) { int err = 0; switch (attr->qpg_type) { case IB_QPG_NONE: /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE * BlueFlame setup flow wrongly causes VLAN insertion. */ if (attr->qp_type == IB_QPT_RAW_PACKET) { err = mlx4_qp_reserve_range(dev->dev, 1, 1, qpn, 1); } else { if(qp->flags & MLX4_IB_QP_NETIF) err = mlx4_ib_steer_qp_alloc(dev, 1, qpn); else err = mlx4_qp_reserve_range(dev->dev, 1, 1, qpn, 0); } break; case IB_QPG_PARENT: #ifdef __linux__ err = init_qpg_parent(dev, qp, attr, qpn); #endif break; case IB_QPG_CHILD_TX: case IB_QPG_CHILD_RX: #ifdef __linux__ err = alloc_qpg_qpn(attr, qp, qpn); #endif break; default: qp->qpg_type = IB_QPG_NONE; err = -EINVAL; break; } if (err) return err; qp->qpg_type = attr->qpg_type; return 0; } static void free_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, enum ib_qpg_type qpg_type, int qpn) { switch (qpg_type) { case IB_QPG_NONE: if (qp->flags & MLX4_IB_QP_NETIF) mlx4_ib_steer_qp_free(dev, qpn, 1); else mlx4_qp_release_range(dev->dev, qpn, 1); break; case IB_QPG_PARENT: #ifdef __linux__ free_qpg_parent(dev, qp); #endif break; case IB_QPG_CHILD_TX: case IB_QPG_CHILD_RX: #ifdef __linux__ free_qpg_qpn(qp, qpn); #endif break; default: break; } } /* Revert allocation on create_qp_common */ static void unalloc_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, struct ib_qp_init_attr *attr, int qpn) { free_qpn_common(dev, qp, attr->qpg_type, qpn); } static void release_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { free_qpn_common(dev, qp, qp->qpg_type, qp->mqp.qpn); } static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp) { int qpn; int err; struct mlx4_ib_sqp *sqp; struct mlx4_ib_qp *qp; enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; #ifndef __linux__ init_attr->qpg_type = IB_QPG_NONE; #endif /* When tunneling special qps, we use a plain UD qp */ if (sqpn) { if (mlx4_is_mfunc(dev->dev) && (!mlx4_is_master(dev->dev) || !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) { if (init_attr->qp_type == IB_QPT_GSI) qp_type = MLX4_IB_QPT_PROXY_GSI; else if (mlx4_is_master(dev->dev)) qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER; else qp_type = MLX4_IB_QPT_PROXY_SMI; } qpn = sqpn; /* add extra sg entry for tunneling */ init_attr->cap.max_recv_sge++; } else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) { struct mlx4_ib_qp_tunnel_init_attr *tnl_init = container_of(init_attr, struct mlx4_ib_qp_tunnel_init_attr, init_attr); if ((tnl_init->proxy_qp_type != IB_QPT_SMI && tnl_init->proxy_qp_type != IB_QPT_GSI) || !mlx4_is_master(dev->dev)) return -EINVAL; if (tnl_init->proxy_qp_type == IB_QPT_GSI) qp_type = MLX4_IB_QPT_TUN_GSI; else if (tnl_init->slave == mlx4_master_func_num(dev->dev)) qp_type = MLX4_IB_QPT_TUN_SMI_OWNER; else qp_type = MLX4_IB_QPT_TUN_SMI; /* we are definitely in the PPF here, since we are creating * tunnel QPs. base_tunnel_sqpn is therefore valid. */ qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave + tnl_init->proxy_qp_type * 2 + tnl_init->port - 1; sqpn = qpn; } if (!*caller_qp) { if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI || (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) { sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL); if (!sqp) return -ENOMEM; qp = &sqp->qp; qp->pri.vid = qp->alt.vid = 0xFFFF; } else { qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL); if (!qp) return -ENOMEM; qp->pri.vid = qp->alt.vid = 0xFFFF; } } else qp = *caller_qp; qp->mlx4_ib_qp_type = qp_type; mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); INIT_LIST_HEAD(&qp->gid_list); INIT_LIST_HEAD(&qp->steering_rules); INIT_LIST_HEAD(&qp->rules_list); qp->state = IB_QPS_RESET; if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp); if (err) goto err; if (pd->uobject) { struct mlx4_ib_create_qp ucmd; int shift; int n; if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { err = -EFAULT; goto err; } qp->sq_no_prefetch = ucmd.sq_no_prefetch; err = set_user_sq_size(dev, qp, &ucmd); if (err) goto err; qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, qp->buf_size, 0, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; } n = ib_umem_page_count(qp->umem); shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n); err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt); if (err) goto err_buf; err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem); if (err) goto err_mtt; if (qp_has_rq(init_attr)) { err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), ucmd.db_addr, &qp->db); if (err) goto err_mtt; } } else { qp->sq_no_prefetch = 0; if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) qp->flags |= MLX4_IB_QP_LSO; if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP && dev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED && !mlx4_is_mfunc(dev->dev)) qp->flags |= MLX4_IB_QP_NETIF; err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp); if (err) goto err; if (qp_has_rq(init_attr)) { err = mlx4_db_alloc(dev->dev, &qp->db, 0); if (err) goto err; *qp->db.db = 0; } if (qp->max_inline_data) { err = mlx4_bf_alloc(dev->dev, &qp->bf, 0); if (err) { pr_debug("failed to allocate blue flame" " register (%d)", err); qp->bf.uar = &dev->priv_uar; } } else qp->bf.uar = &dev->priv_uar; if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) { err = -ENOMEM; goto err_db; } err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift, &qp->mtt); if (err) goto err_buf; err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf); if (err) goto err_mtt; qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL); qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL); if (!qp->sq.wrid || !qp->rq.wrid) { err = -ENOMEM; goto err_wrid; } } if (sqpn) { if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { if (alloc_proxy_bufs(pd->device, qp)) { err = -ENOMEM; goto err_wrid; } } } else { err = alloc_qpn_common(dev, qp, init_attr, &qpn); if (err) goto err_proxy; } err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); if (err) goto err_qpn; if (init_attr->qp_type == IB_QPT_XRC_TGT) qp->mqp.qpn |= (1 << 23); /* * Hardware wants QPN written in big-endian order (after * shifting) for send doorbell. Precompute this value to save * a little bit when posting sends. */ qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); qp->mqp.event = mlx4_ib_qp_event; if (!*caller_qp) *caller_qp = qp; return 0; err_qpn: unalloc_qpn_common(dev, qp, init_attr, qpn); err_proxy: if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) free_proxy_bufs(pd->device, qp); err_wrid: if (pd->uobject) { if (qp_has_rq(init_attr)) mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); } else { kfree(qp->sq.wrid); kfree(qp->rq.wrid); } err_mtt: mlx4_mtt_cleanup(dev->dev, &qp->mtt); err_buf: if (pd->uobject) ib_umem_release(qp->umem); else mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); err_db: if (!pd->uobject && qp_has_rq(init_attr)) mlx4_db_free(dev->dev, &qp->db); if (qp->max_inline_data) mlx4_bf_free(dev->dev, &qp->bf); err: if (!*caller_qp) kfree(qp); return err; } static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state) { switch (state) { case IB_QPS_RESET: return MLX4_QP_STATE_RST; case IB_QPS_INIT: return MLX4_QP_STATE_INIT; case IB_QPS_RTR: return MLX4_QP_STATE_RTR; case IB_QPS_RTS: return MLX4_QP_STATE_RTS; case IB_QPS_SQD: return MLX4_QP_STATE_SQD; case IB_QPS_SQE: return MLX4_QP_STATE_SQER; case IB_QPS_ERR: return MLX4_QP_STATE_ERR; default: return -1; } } static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq) __acquires(&send_cq->lock) __acquires(&recv_cq->lock) { if (send_cq == recv_cq) { spin_lock_irq(&send_cq->lock); (void) __acquire(&recv_cq->lock); } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { spin_lock_irq(&send_cq->lock); spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); } else { spin_lock_irq(&recv_cq->lock); spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); } } static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq) __releases(&send_cq->lock) __releases(&recv_cq->lock) { if (send_cq == recv_cq) { (void) __release(&recv_cq->lock); spin_unlock_irq(&send_cq->lock); } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { spin_unlock(&recv_cq->lock); spin_unlock_irq(&send_cq->lock); } else { spin_unlock(&send_cq->lock); spin_unlock_irq(&recv_cq->lock); } } static void del_gid_entries(struct mlx4_ib_qp *qp) { struct mlx4_ib_gid_entry *ge, *tmp; list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { list_del(&ge->list); kfree(ge); } } static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp) { if (qp->ibqp.qp_type == IB_QPT_XRC_TGT) return to_mpd(to_mxrcd(qp->ibqp.xrcd)->pd); else return to_mpd(qp->ibqp.pd); } static void get_cqs(struct mlx4_ib_qp *qp, struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq) { switch (qp->ibqp.qp_type) { case IB_QPT_XRC_TGT: *send_cq = to_mcq(to_mxrcd(qp->ibqp.xrcd)->cq); *recv_cq = *send_cq; break; case IB_QPT_XRC_INI: *send_cq = to_mcq(qp->ibqp.send_cq); *recv_cq = *send_cq; break; default: *send_cq = to_mcq(qp->ibqp.send_cq); *recv_cq = to_mcq(qp->ibqp.recv_cq); break; } } static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, int is_user) { struct mlx4_ib_cq *send_cq, *recv_cq; if (qp->state != IB_QPS_RESET) { if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state), MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp)) pr_warn("modify QP %06x to RESET failed.\n", qp->mqp.qpn); if (qp->pri.smac) { mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); qp->pri.smac = 0; } if (qp->alt.smac) { mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); qp->alt.smac = 0; } if (qp->pri.vid < 0x1000) { mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid); qp->pri.vid = 0xFFFF; qp->pri.candidate_vid = 0xFFFF; qp->pri.update_vid = 0; } if (qp->alt.vid < 0x1000) { mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid); qp->alt.vid = 0xFFFF; qp->alt.candidate_vid = 0xFFFF; qp->alt.update_vid = 0; } } get_cqs(qp, &send_cq, &recv_cq); mlx4_ib_lock_cqs(send_cq, recv_cq); if (!is_user) { __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL); if (send_cq != recv_cq) __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); } mlx4_qp_remove(dev->dev, &qp->mqp); mlx4_ib_unlock_cqs(send_cq, recv_cq); mlx4_qp_free(dev->dev, &qp->mqp); if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) release_qpn_common(dev, qp); mlx4_mtt_cleanup(dev->dev, &qp->mtt); if (is_user) { if (qp->rq.wqe_cnt) mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context), &qp->db); ib_umem_release(qp->umem); } else { kfree(qp->sq.wrid); kfree(qp->rq.wrid); if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) free_proxy_bufs(&dev->ib_dev, qp); mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); if (qp->max_inline_data) mlx4_bf_free(dev->dev, &qp->bf); if (qp->rq.wqe_cnt) mlx4_db_free(dev->dev, &qp->db); } del_gid_entries(qp); } static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) { /* Native or PPF */ if (!mlx4_is_mfunc(dev->dev) || (mlx4_is_master(dev->dev) && attr->create_flags & MLX4_IB_SRIOV_SQP)) { return dev->dev->phys_caps.base_sqpn + (attr->qp_type == IB_QPT_SMI ? 0 : 2) + attr->port_num - 1; } /* PF or VF -- creating proxies */ if (attr->qp_type == IB_QPT_SMI) return dev->dev->caps.qp0_proxy[attr->port_num - 1]; else return dev->dev->caps.qp1_proxy[attr->port_num - 1]; } #ifdef __linux__ static int check_qpg_attr(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) { if (attr->qpg_type == IB_QPG_NONE) return 0; if (attr->qp_type != IB_QPT_UD) return -EINVAL; if (attr->qpg_type == IB_QPG_PARENT) { if (attr->parent_attrib.tss_child_count == 1) return -EINVAL; /* Doesn't make sense */ if (attr->parent_attrib.rss_child_count == 1) return -EINVAL; /* Doesn't make sense */ if ((attr->parent_attrib.tss_child_count == 0) && (attr->parent_attrib.rss_child_count == 0)) /* Should be called with IP_QPG_NONE */ return -EINVAL; if (attr->parent_attrib.rss_child_count > 1) { int rss_align_num; if (!(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS)) return -ENOSYS; rss_align_num = roundup_pow_of_two( attr->parent_attrib.rss_child_count); if (rss_align_num > dev->dev->caps.max_rss_tbl_sz) return -EINVAL; } } else { struct mlx4_ib_qpg_data *qpg_data; if (attr->qpg_parent == NULL) return -EINVAL; if (IS_ERR(attr->qpg_parent)) return -EINVAL; qpg_data = to_mqp(attr->qpg_parent)->qpg_data; if (qpg_data == NULL) return -EINVAL; if (attr->qpg_type == IB_QPG_CHILD_TX && !qpg_data->tss_child_count) return -EINVAL; if (attr->qpg_type == IB_QPG_CHILD_RX && !qpg_data->rss_child_count) return -EINVAL; } return 0; } #endif #define RESERVED_FLAGS_MASK ((((unsigned int)IB_QP_CREATE_RESERVED_END - 1) | IB_QP_CREATE_RESERVED_END) \ & ~(IB_QP_CREATE_RESERVED_START - 1)) static enum mlx4_ib_qp_flags to_mlx4_ib_qp_flags(enum ib_qp_create_flags ib_qp_flags) { enum mlx4_ib_qp_flags mlx4_ib_qp_flags = 0; if (ib_qp_flags & IB_QP_CREATE_IPOIB_UD_LSO) mlx4_ib_qp_flags |= MLX4_IB_QP_LSO; if (ib_qp_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) mlx4_ib_qp_flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; if (ib_qp_flags & IB_QP_CREATE_NETIF_QP) mlx4_ib_qp_flags |= MLX4_IB_QP_NETIF; /* reserved flags */ mlx4_ib_qp_flags |= (ib_qp_flags & RESERVED_FLAGS_MASK); return mlx4_ib_qp_flags; } struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { struct mlx4_ib_qp *qp = NULL; int err; u16 xrcdn = 0; enum mlx4_ib_qp_flags mlx4_qp_flags = to_mlx4_ib_qp_flags(init_attr->create_flags); struct ib_device *device; /* see ib_core::ib_create_qp same handling */ device = pd ? pd->device : init_attr->xrcd->device; /* * We only support LSO, vendor flag1, and multicast loopback blocking, * and only for kernel UD QPs. */ if (mlx4_qp_flags & ~(MLX4_IB_QP_LSO | MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK | MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP | MLX4_IB_QP_NETIF)) return ERR_PTR(-EINVAL); if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { if (init_attr->qp_type != IB_QPT_UD) return ERR_PTR(-EINVAL); } if (init_attr->create_flags && (udata || ((mlx4_qp_flags & ~MLX4_IB_SRIOV_SQP) && init_attr->qp_type != IB_QPT_UD) || ((mlx4_qp_flags & MLX4_IB_SRIOV_SQP) && init_attr->qp_type > IB_QPT_GSI))) return ERR_PTR(-EINVAL); #ifdef __linux__ err = check_qpg_attr(to_mdev(device), init_attr); if (err) return ERR_PTR(err); #endif switch (init_attr->qp_type) { case IB_QPT_XRC_TGT: pd = to_mxrcd(init_attr->xrcd)->pd; xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq; /* fall through */ case IB_QPT_XRC_INI: if (!(to_mdev(device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) return ERR_PTR(-ENOSYS); init_attr->recv_cq = init_attr->send_cq; /* fall through */ case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_RAW_PACKET: qp = kzalloc(sizeof *qp, GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); qp->pri.vid = qp->alt.vid = 0xFFFF; /* fall through */ case IB_QPT_UD: { err = create_qp_common(to_mdev(device), pd, init_attr, udata, 0, &qp); if (err) { kfree(qp); return ERR_PTR(err); } qp->ibqp.qp_num = qp->mqp.qpn; qp->xrcdn = xrcdn; break; } case IB_QPT_SMI: case IB_QPT_GSI: { /* Userspace is not allowed to create special QPs: */ if (udata) return ERR_PTR(-EINVAL); err = create_qp_common(to_mdev(device), pd, init_attr, udata, get_sqp_num(to_mdev(device), init_attr), &qp); if (err) return ERR_PTR(err); qp->port = init_attr->port_num; qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; break; } default: /* Don't support raw QPs */ return ERR_PTR(-EINVAL); } return &qp->ibqp; } int mlx4_ib_destroy_qp(struct ib_qp *qp) { struct mlx4_ib_dev *dev = to_mdev(qp->device); struct mlx4_ib_qp *mqp = to_mqp(qp); struct mlx4_ib_pd *pd; if (is_qp0(dev, mqp)) mlx4_CLOSE_PORT(dev->dev, mqp->port); pd = get_pd(mqp); destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); if (is_sqp(dev, mqp)) kfree(to_msqp(mqp)); else kfree(mqp); return 0; } static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type) { switch (type) { case MLX4_IB_QPT_RC: return MLX4_QP_ST_RC; case MLX4_IB_QPT_UC: return MLX4_QP_ST_UC; case MLX4_IB_QPT_UD: return MLX4_QP_ST_UD; case MLX4_IB_QPT_XRC_INI: case MLX4_IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC; case MLX4_IB_QPT_SMI: case MLX4_IB_QPT_GSI: case MLX4_IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX; case MLX4_IB_QPT_PROXY_SMI_OWNER: case MLX4_IB_QPT_TUN_SMI_OWNER: return (mlx4_is_mfunc(dev->dev) ? MLX4_QP_ST_MLX : -1); case MLX4_IB_QPT_PROXY_SMI: case MLX4_IB_QPT_TUN_SMI: case MLX4_IB_QPT_PROXY_GSI: case MLX4_IB_QPT_TUN_GSI: return (mlx4_is_mfunc(dev->dev) ? MLX4_QP_ST_UD : -1); default: return -1; } } static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr, int attr_mask) { u8 dest_rd_atomic; u32 access_flags; u32 hw_access_flags = 0; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) dest_rd_atomic = attr->max_dest_rd_atomic; else dest_rd_atomic = qp->resp_depth; if (attr_mask & IB_QP_ACCESS_FLAGS) access_flags = attr->qp_access_flags; else access_flags = qp->atomic_rd_en; if (!dest_rd_atomic) access_flags &= IB_ACCESS_REMOTE_WRITE; if (access_flags & IB_ACCESS_REMOTE_READ) hw_access_flags |= MLX4_QP_BIT_RRE; if (access_flags & IB_ACCESS_REMOTE_ATOMIC) hw_access_flags |= MLX4_QP_BIT_RAE; if (access_flags & IB_ACCESS_REMOTE_WRITE) hw_access_flags |= MLX4_QP_BIT_RWE; return cpu_to_be32(hw_access_flags); } static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr, int attr_mask) { if (attr_mask & IB_QP_PKEY_INDEX) sqp->pkey_index = attr->pkey_index; if (attr_mask & IB_QP_QKEY) sqp->qkey = attr->qkey; if (attr_mask & IB_QP_SQ_PSN) sqp->send_psn = attr->sq_psn; } static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) { path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6); } static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, struct mlx4_ib_qp *qp, struct mlx4_qp_path *path, u8 port, int is_primary) { struct net_device *ndev; int err; int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_ETHERNET; u8 mac[6]; int is_mcast; u16 vlan_tag; int vidx; int smac_index; u64 u64_mac; u8 *smac; struct mlx4_roce_smac_vlan_info *smac_info; path->grh_mylmc = ah->src_path_bits & 0x7f; path->rlid = cpu_to_be16(ah->dlid); if (ah->static_rate) { path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET; while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && !(1 << path->static_rate & dev->dev->caps.stat_rate_support)) --path->static_rate; } else path->static_rate = 0; if (ah->ah_flags & IB_AH_GRH) { if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) { pr_err("sgid_index (%u) too large. max is %d\n", ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1); return -1; } path->grh_mylmc |= 1 << 7; path->mgid_index = ah->grh.sgid_index; path->hop_limit = ah->grh.hop_limit; path->tclass_flowlabel = cpu_to_be32((ah->grh.traffic_class << 20) | (ah->grh.flow_label)); memcpy(path->rgid, ah->grh.dgid.raw, 16); } if (is_eth) { if (!(ah->ah_flags & IB_AH_GRH)) return -1; path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((port - 1) << 6) | ((ah->sl & 7) << 3); if (is_primary) smac_info = &qp->pri; else smac_info = &qp->alt; vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]); if (vlan_tag < 0x1000) { if (smac_info->vid < 0x1000) { /* both valid vlan ids */ if (smac_info->vid != vlan_tag) { /* different VIDs. unreg old and reg new */ err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx); if (err) return err; smac_info->candidate_vid = vlan_tag; smac_info->candidate_vlan_index = vidx; smac_info->candidate_vlan_port = port; smac_info->update_vid = 1; path->vlan_index = vidx; path->fl = 1 << 6; } else { path->vlan_index = smac_info->vlan_index; path->fl = 1 << 6; } } else { /* no current vlan tag in qp */ err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx); if (err) return err; smac_info->candidate_vid = vlan_tag; smac_info->candidate_vlan_index = vidx; smac_info->candidate_vlan_port = port; smac_info->update_vid = 1; path->vlan_index = vidx; path->fl = 1 << 6; } } else { /* have current vlan tag. unregister it at modify-qp success */ if (smac_info->vid < 0x1000) { smac_info->candidate_vid = 0xFFFF; smac_info->update_vid = 1; } } err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port); if (err) return err; /* get smac_index for RoCE use. * If no smac was yet assigned, register one. * If one was already assigned, but the new mac differs, * unregister the old one and register the new one. */ spin_lock(&dev->iboe.lock); ndev = dev->iboe.netdevs[port - 1]; if (ndev) { #ifdef __linux__ smac = ndev->dev_addr; /* fixme: cache this value */ #else smac = IF_LLADDR(ndev); /* fixme: cache this value */ #endif u64_mac = mlx4_mac_to_u64(smac); } else u64_mac = dev->dev->caps.def_mac[port]; spin_unlock(&dev->iboe.lock); if (!smac_info->smac || smac_info->smac != u64_mac) { /* register candidate now, unreg if needed, after success */ smac_index = mlx4_register_mac(dev->dev, port, u64_mac); if (smac_index >= 0) { smac_info->candidate_smac_index = smac_index; smac_info->candidate_smac = u64_mac; smac_info->candidate_smac_port = port; } else return -EINVAL; } else smac_index = smac_info->smac_index; memcpy(path->dmac, mac, 6); path->ackto = MLX4_IB_LINK_TYPE_ETH; /* put MAC table smac index for IBoE */ path->grh_mylmc = (u8) (smac_index) | 0x80 ; } else path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((port - 1) << 6) | ((ah->sl & 0xf) << 2); return 0; } static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { struct mlx4_ib_gid_entry *ge, *tmp; list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) { ge->added = 1; ge->port = qp->port; } } } static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, struct mlx4_qp_context *context) { struct net_device *ndev; u64 u64_mac; u8 *smac; int smac_index; ndev = dev->iboe.netdevs[qp->port - 1]; if (ndev) { #ifdef __linux__ smac = ndev->dev_addr; /* fixme: cache this value */ #else smac = IF_LLADDR(ndev); /* fixme: cache this value */ #endif u64_mac = mlx4_mac_to_u64(smac); } else u64_mac = dev->dev->caps.def_mac[qp->port]; context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6); if (!qp->pri.smac) { smac_index = mlx4_register_mac(dev->dev, qp->port, u64_mac); if (smac_index >= 0) { qp->pri.candidate_smac_index = smac_index; qp->pri.candidate_smac = u64_mac; qp->pri.candidate_smac_port = qp->port; context->pri_path.grh_mylmc = 0x80 | (u8) smac_index; } else return -ENOENT; } return 0; } static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) { struct mlx4_ib_dev *dev = to_mdev(ibqp->device); struct mlx4_ib_qp *qp = to_mqp(ibqp); struct mlx4_ib_pd *pd; struct mlx4_ib_cq *send_cq, *recv_cq; struct mlx4_qp_context *context; enum mlx4_qp_optpar optpar = 0; int sqd_event; int steer_qp = 0; int err = -EINVAL; int is_eth = -1; context = kzalloc(sizeof *context, GFP_KERNEL); if (!context) return -ENOMEM; context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16)); if (!(attr_mask & IB_QP_PATH_MIG_STATE)) context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); else { optpar |= MLX4_QP_OPTPAR_PM_STATE; switch (attr->path_mig_state) { case IB_MIG_MIGRATED: context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); break; case IB_MIG_REARM: context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11); break; case IB_MIG_ARMED: context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11); break; } } if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; else if (ibqp->qp_type == IB_QPT_RAW_PACKET) context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX; else if (ibqp->qp_type == IB_QPT_UD) { if (qp->flags & MLX4_IB_QP_LSO) context->mtu_msgmax = (IB_MTU_4096 << 5) | ilog2(dev->dev->caps.max_gso_sz); else context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; } else if (attr_mask & IB_QP_PATH_MTU) { if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { pr_err("path MTU (%u) is invalid\n", attr->path_mtu); goto out; } context->mtu_msgmax = (attr->path_mtu << 5) | ilog2(dev->dev->caps.max_msg_sz); } if (qp->rq.wqe_cnt) context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3; context->rq_size_stride |= qp->rq.wqe_shift - 4; if (qp->sq.wqe_cnt) context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; context->sq_size_stride |= qp->sq.wqe_shift - 4; if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { context->sq_size_stride |= !!qp->sq_no_prefetch << 7; context->xrcd = cpu_to_be32((u32) qp->xrcdn); context->param3 |= cpu_to_be32(1 << 30); } if (qp->ibqp.uobject) context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index); else context->usr_page = cpu_to_be32(qp->bf.uar->index); if (attr_mask & IB_QP_DEST_QPN) context->remote_qpn = cpu_to_be32(attr->dest_qp_num); if (attr_mask & IB_QP_PORT) { if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD && !(attr_mask & IB_QP_AV)) { mlx4_set_sched(&context->pri_path, attr->port_num); optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE; } } if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { if (dev->counters[qp->port - 1] != -1) { context->pri_path.counter_index = dev->counters[qp->port - 1]; optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX; } else context->pri_path.counter_index = 0xff; if (qp->flags & MLX4_IB_QP_NETIF && (qp->qpg_type == IB_QPG_NONE || qp->qpg_type == IB_QPG_PARENT)) { mlx4_ib_steer_qp_reg(dev, qp, 1); steer_qp = 1; } } if (attr_mask & IB_QP_PKEY_INDEX) { if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) context->pri_path.disable_pkey_check = 0x40; context->pri_path.pkey_index = attr->pkey_index; optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; } if (attr_mask & IB_QP_AV) { if (mlx4_set_path(dev, &attr->ah_attr, qp, &context->pri_path, attr_mask & IB_QP_PORT ? attr->port_num : qp->port, 1)) goto out; optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | MLX4_QP_OPTPAR_SCHED_QUEUE); } if (attr_mask & IB_QP_TIMEOUT) { context->pri_path.ackto |= attr->timeout << 3; optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT; } if (attr_mask & IB_QP_ALT_PATH) { if (attr->alt_port_num == 0 || attr->alt_port_num > dev->dev->caps.num_ports) goto out; if (attr->alt_pkey_index >= dev->dev->caps.pkey_table_len[attr->alt_port_num]) goto out; if (mlx4_set_path(dev, &attr->alt_ah_attr, qp, &context->alt_path, attr->alt_port_num, 0)) goto out; context->alt_path.pkey_index = attr->alt_pkey_index; context->alt_path.ackto = attr->alt_timeout << 3; optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; } pd = get_pd(qp); get_cqs(qp, &send_cq, &recv_cq); context->pd = cpu_to_be32(pd->pdn); context->cqn_send = cpu_to_be32(send_cq->mcq.cqn); context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn); context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); /* Set "fast registration enabled" for all kernel QPs */ if (!qp->ibqp.uobject) context->params1 |= cpu_to_be32(1 << 11); if (attr_mask & IB_QP_RNR_RETRY) { context->params1 |= cpu_to_be32(attr->rnr_retry << 13); optpar |= MLX4_QP_OPTPAR_RNR_RETRY; } if (attr_mask & IB_QP_RETRY_CNT) { context->params1 |= cpu_to_be32(attr->retry_cnt << 16); optpar |= MLX4_QP_OPTPAR_RETRY_COUNT; } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { if (attr->max_rd_atomic) context->params1 |= cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21); optpar |= MLX4_QP_OPTPAR_SRA_MAX; } if (attr_mask & IB_QP_SQ_PSN) context->next_send_psn = cpu_to_be32(attr->sq_psn); if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { if (attr->max_dest_rd_atomic) context->params2 |= cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); optpar |= MLX4_QP_OPTPAR_RRA_MAX; } if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask); optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE; } if (attr_mask & IB_M_EXT_CLASS_1) context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_MASTER); /* for now we enable also sqe on send */ if (attr_mask & IB_M_EXT_CLASS_2) { context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_SYNC_SQ); context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_MASTER); } if (attr_mask & IB_M_EXT_CLASS_3) context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_SYNC_RQ); if (ibqp->srq) context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC); if (attr_mask & IB_QP_MIN_RNR_TIMER) { context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT; } if (attr_mask & IB_QP_RQ_PSN) context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); /* proxy and tunnel qp qkeys will be changed in modify-qp wrappers */ if (attr_mask & IB_QP_QKEY) { if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) context->qkey = cpu_to_be32(IB_QP_SET_QKEY); else { if (mlx4_is_mfunc(dev->dev) && !(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) && (attr->qkey & MLX4_RESERVED_QKEY_MASK) == MLX4_RESERVED_QKEY_BASE) { pr_err("Cannot use reserved QKEY" " 0x%x (range 0xffff0000..0xffffffff" " is reserved)\n", attr->qkey); err = -EINVAL; goto out; } context->qkey = cpu_to_be32(attr->qkey); } optpar |= MLX4_QP_OPTPAR_Q_KEY; } if (ibqp->srq) context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn); if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->db_rec_addr = cpu_to_be64(qp->db.dma); if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR && (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || ibqp->qp_type == IB_QPT_UD || ibqp->qp_type == IB_QPT_RAW_PACKET)) { context->pri_path.sched_queue = (qp->port - 1) << 6; if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI || qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) { context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI) context->pri_path.fl = 0x80; } else { if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) context->pri_path.fl = 0x80; context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; } is_eth = rdma_port_get_link_layer(&dev->ib_dev, qp->port) == IB_LINK_LAYER_ETHERNET; if (is_eth) { if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI || qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) context->pri_path.feup = 1 << 7; /* don't fsm */ /* handle smac_index */ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD || qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI || qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) { err = handle_eth_ud_smac_index(dev, qp, context); if (err) return -EINVAL; } } } if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) sqd_event = 1; else sqd_event = 0; if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->rlkey |= (1 << 4); if ((attr_mask & IB_QP_GROUP_RSS) && (qp->qpg_data->rss_child_count > 1)) { struct mlx4_ib_qpg_data *qpg_data = qp->qpg_data; void *rss_context_base = &context->pri_path; struct mlx4_rss_context *rss_context = (struct mlx4_rss_context *) (rss_context_base + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH); context->flags |= cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET); /* This should be tbl_sz_base_qpn */ rss_context->base_qpn = cpu_to_be32(qpg_data->rss_qpn_base | (ilog2(qpg_data->rss_child_count) << 24)); rss_context->default_qpn = cpu_to_be32(qpg_data->rss_qpn_base); /* This should be flags_hash_fn */ rss_context->flags = MLX4_RSS_TCP_IPV6 | MLX4_RSS_TCP_IPV4; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UDP_RSS) { rss_context->base_qpn_udp = rss_context->default_qpn; rss_context->flags |= MLX4_RSS_IPV6 | MLX4_RSS_IPV4 | MLX4_RSS_UDP_IPV6 | MLX4_RSS_UDP_IPV4; } if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP) { static const u32 rsskey[10] = { 0xD181C62C, 0xF7F4DB5B, 0x1983A2FC, 0x943E1ADB, 0xD9389E6B, 0xD1039C2C, 0xA74499AD, 0x593D56D9, 0xF3253C06, 0x2ADC1FFC}; rss_context->hash_fn = MLX4_RSS_HASH_TOP; memcpy(rss_context->rss_key, rsskey, sizeof(rss_context->rss_key)); } else { rss_context->hash_fn = MLX4_RSS_HASH_XOR; memset(rss_context->rss_key, 0, sizeof(rss_context->rss_key)); } } /* * Before passing a kernel QP to the HW, make sure that the * ownership bits of the send queue are set and the SQ * headroom is stamped so that the hardware doesn't start * processing stale work requests. */ if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { struct mlx4_wqe_ctrl_seg *ctrl; int i; for (i = 0; i < qp->sq.wqe_cnt; ++i) { ctrl = get_send_wqe(qp, i); ctrl->owner_opcode = cpu_to_be32(1U << 31); if (qp->sq_max_wqes_per_wr == 1) ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4); stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); } } err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state), to_mlx4_state(new_state), context, optpar, sqd_event, &qp->mqp); if (err) goto out; qp->state = new_state; if (attr_mask & IB_QP_ACCESS_FLAGS) qp->atomic_rd_en = attr->qp_access_flags; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) qp->resp_depth = attr->max_dest_rd_atomic; if (attr_mask & IB_QP_PORT) { qp->port = attr->port_num; update_mcg_macs(dev, qp); } if (attr_mask & IB_QP_ALT_PATH) qp->alt_port = attr->alt_port_num; if (is_sqp(dev, qp)) store_sqp_attrs(to_msqp(qp), attr, attr_mask); /* Set 'ignore_cq_overrun' bits for collectives offload */ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { if (attr_mask & (IB_M_EXT_CLASS_2 | IB_M_EXT_CLASS_3)) { err = mlx4_ib_ignore_overrun_cq(ibqp->send_cq); if (err) { pr_err("Failed to set ignore CQ " "overrun for QP 0x%x's send CQ\n", ibqp->qp_num); goto out; } if (ibqp->recv_cq != ibqp->send_cq) { err = mlx4_ib_ignore_overrun_cq(ibqp->recv_cq); if (err) { pr_err("Failed to set ignore " "CQ overrun for QP 0x%x's recv " "CQ\n", ibqp->qp_num); goto out; } } } } /* * If we moved QP0 to RTR, bring the IB link up; if we moved * QP0 to RESET or ERROR, bring the link back down. */ if (is_qp0(dev, qp)) { if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR) if (mlx4_INIT_PORT(dev->dev, qp->port)) pr_warn("INIT_PORT failed for port %d\n", qp->port); if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR && (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) mlx4_CLOSE_PORT(dev->dev, qp->port); } /* * If we moved a kernel QP to RESET, clean up all old CQ * entries and reinitialize the QP. */ if (new_state == IB_QPS_RESET) { if (!ibqp->uobject) { mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, ibqp->srq ? to_msrq(ibqp->srq) : NULL); if (send_cq != recv_cq) mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); qp->rq.head = 0; qp->rq.tail = 0; qp->sq.head = 0; qp->sq.tail = 0; qp->sq_next_wqe = 0; if (qp->rq.wqe_cnt) *qp->db.db = 0; if (qp->flags & MLX4_IB_QP_NETIF && (qp->qpg_type == IB_QPG_NONE || qp->qpg_type == IB_QPG_PARENT)) mlx4_ib_steer_qp_reg(dev, qp, 0); } if (qp->pri.smac) { mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); qp->pri.smac = 0; } if (qp->alt.smac) { mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); qp->alt.smac = 0; } if (qp->pri.vid < 0x1000) { mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid); qp->pri.vid = 0xFFFF; qp->pri.candidate_vid = 0xFFFF; qp->pri.update_vid = 0; } if (qp->alt.vid < 0x1000) { mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid); qp->alt.vid = 0xFFFF; qp->alt.candidate_vid = 0xFFFF; qp->alt.update_vid = 0; } } out: if (err && steer_qp) mlx4_ib_steer_qp_reg(dev, qp, 0); kfree(context); if (qp->pri.candidate_smac) { if (err) mlx4_unregister_mac(dev->dev, qp->pri.candidate_smac_port, qp->pri.candidate_smac); else { if (qp->pri.smac) { mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); } qp->pri.smac = qp->pri.candidate_smac; qp->pri.smac_index = qp->pri.candidate_smac_index; qp->pri.smac_port = qp->pri.candidate_smac_port; } qp->pri.candidate_smac = 0; qp->pri.candidate_smac_index = 0; qp->pri.candidate_smac_port = 0; } if (qp->alt.candidate_smac) { if (err) mlx4_unregister_mac(dev->dev, qp->alt.candidate_smac_port, qp->pri.candidate_smac); else { if (qp->pri.smac) { mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); } qp->alt.smac = qp->alt.candidate_smac; qp->alt.smac_index = qp->alt.candidate_smac_index; qp->alt.smac_port = qp->alt.candidate_smac_port; } qp->pri.candidate_smac = 0; qp->pri.candidate_smac_index = 0; qp->pri.candidate_smac_port = 0; } if (qp->pri.update_vid) { if (err) { if (qp->pri.candidate_vid < 0x1000) mlx4_unregister_vlan(dev->dev, qp->pri.candidate_vlan_port, qp->pri.candidate_vid); } else { if (qp->pri.vid < 0x1000) mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid); qp->pri.vid = qp->pri.candidate_vid; qp->pri.vlan_port = qp->pri.candidate_vlan_port; qp->pri.vlan_index = qp->pri.candidate_vlan_index; } qp->pri.candidate_vid = 0xFFFF; qp->pri.update_vid = 0; } if (qp->alt.update_vid) { if (err) { if (qp->alt.candidate_vid < 0x1000) mlx4_unregister_vlan(dev->dev, qp->alt.candidate_vlan_port, qp->alt.candidate_vid); } else { if (qp->alt.vid < 0x1000) mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid); qp->alt.vid = qp->alt.candidate_vid; qp->alt.vlan_port = qp->alt.candidate_vlan_port; qp->alt.vlan_index = qp->alt.candidate_vlan_index; } qp->alt.candidate_vid = 0xFFFF; qp->alt.update_vid = 0; } return err; } int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(ibqp->device); struct mlx4_ib_qp *qp = to_mqp(ibqp); enum ib_qp_state cur_state, new_state; int err = -EINVAL; mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask & ~IB_M_QP_MOD_VEND_MASK)) { pr_debug("qpn 0x%x: invalid attribute mask specified " "for transition %d to %d. qp_type %d," " attr_mask 0x%x\n", ibqp->qp_num, cur_state, new_state, ibqp->qp_type, attr_mask); goto out; } if ((attr_mask & IB_M_QP_MOD_VEND_MASK) && !dev->dev->caps.sync_qp) { pr_err("extended verbs are not supported by %s\n", dev->ib_dev.name); goto out; } if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || attr->port_num > dev->num_ports)) { pr_debug("qpn 0x%x: invalid port number (%d) specified " "for transition %d to %d. qp_type %d\n", ibqp->qp_num, attr->port_num, cur_state, new_state, ibqp->qp_type); goto out; } if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_PACKET) && (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num) != IB_LINK_LAYER_ETHERNET)) goto out; if (attr_mask & IB_QP_PKEY_INDEX) { int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) { pr_debug("qpn 0x%x: invalid pkey index (%d) specified " "for transition %d to %d. qp_type %d\n", ibqp->qp_num, attr->pkey_index, cur_state, new_state, ibqp->qp_type); goto out; } } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) { pr_debug("qpn 0x%x: max_rd_atomic (%d) too large. " "Transition %d to %d. qp_type %d\n", ibqp->qp_num, attr->max_rd_atomic, cur_state, new_state, ibqp->qp_type); goto out; } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) { pr_debug("qpn 0x%x: max_dest_rd_atomic (%d) too large. " "Transition %d to %d. qp_type %d\n", ibqp->qp_num, attr->max_dest_rd_atomic, cur_state, new_state, ibqp->qp_type); goto out; } if (cur_state == new_state && cur_state == IB_QPS_RESET) { err = 0; goto out; } err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); out: mutex_unlock(&qp->mutex); return err; } static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device); struct ib_device *ib_dev = &mdev->ib_dev; struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); u16 pkey; u32 qkey; int send_size; int header_size; int spc; int i; if (wr->opcode != IB_WR_SEND) return -EINVAL; send_size = 0; for (i = 0; i < wr->num_sge; ++i) send_size += wr->sg_list[i].length; /* for proxy-qp0 sends, need to add in size of tunnel header */ /* for tunnel-qp0 sends, tunnel header is already in s/g list */ if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) send_size += sizeof (struct mlx4_ib_tunnel_header); ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header); if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) { sqp->ud_header.lrh.service_level = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; sqp->ud_header.lrh.destination_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); } mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); /* force loopback */ mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR); mlx->rlid = sqp->ud_header.lrh.destination_lid; sqp->ud_header.lrh.virtual_lane = 0; sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey); sqp->ud_header.bth.pkey = cpu_to_be16(pkey); if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); else sqp->ud_header.bth.destination_qpn = cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) return -EINVAL; sqp->ud_header.deth.qkey = cpu_to_be32(qkey); sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn); sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; sqp->ud_header.immediate_present = 0; header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); /* * Inline data segments may not cross a 64 byte boundary. If * our UD header is bigger than the space available up to the * next 64 byte boundary in the WQE, use two inline data * segments to hold the UD header. */ spc = MLX4_INLINE_ALIGN - ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); if (header_size <= spc) { inl->byte_count = cpu_to_be32(1U << 31 | header_size); memcpy(inl + 1, sqp->header_buf, header_size); i = 1; } else { inl->byte_count = cpu_to_be32(1U << 31 | spc); memcpy(inl + 1, sqp->header_buf, spc); inl = (void *) (inl + 1) + spc; memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); /* * Need a barrier here to make sure all the data is * visible before the byte_count field is set. * Otherwise the HCA prefetcher could grab the 64-byte * chunk with this inline segment and get a valid (!= * 0xffffffff) byte count but stale data, and end up * generating a packet with bad headers. * * The first inline segment's byte_count field doesn't * need a barrier, because it comes after a * control/MLX segment and therefore is at an offset * of 16 mod 64. */ wmb(); inl->byte_count = cpu_to_be32(1U << 31 | (header_size - spc)); i = 2; } *mlx_seg_len = ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); return 0; } static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct ib_device *ib_dev = sqp->qp.ibqp.device; struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_ctrl_seg *ctrl = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); union ib_gid sgid; u16 pkey; int send_size; int header_size; int spc; int i; int is_eth; int is_vlan = 0; int is_grh; u16 vlan = 0; int err = 0; send_size = 0; for (i = 0; i < wr->num_sge; ++i) send_size += wr->sg_list[i].length; is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; is_grh = mlx4_ib_ah_grh_present(ah); if (is_eth) { if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { /* When multi-function is enabled, the ib_core gid * indexes don't necessarily match the hw ones, so * we must use our own cache */ err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, ah->av.ib.gid_index, &sgid.raw[0]); if (err) return err; } else { err = ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, ah->av.ib.gid_index, &sgid); if (err) return err; } vlan = rdma_get_vlan_id(&sgid); is_vlan = vlan < 0x1000; } ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header); if (!is_eth) { sqp->ud_header.lrh.service_level = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; sqp->ud_header.lrh.destination_lid = ah->av.ib.dlid; sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); } if (is_grh) { sqp->ud_header.grh.traffic_class = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff; sqp->ud_header.grh.flow_label = ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; if (is_eth) memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16); else { if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { /* When multi-function is enabled, the ib_core gid * indexes don't necessarily match the hw ones, so * we must use our own cache */ sqp->ud_header.grh.source_gid.global.subnet_prefix = to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. subnet_prefix; sqp->ud_header.grh.source_gid.global.interface_id = to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. guid_cache[ah->av.ib.gid_index]; } else ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid); } memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.ib.dgid, 16); } mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); if (!is_eth) { mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | (sqp->ud_header.lrh.service_level << 8)); if (ah->av.ib.port_pd & cpu_to_be32(0x80000000)) mlx->flags |= cpu_to_be32(0x1); /* force loopback */ mlx->rlid = sqp->ud_header.lrh.destination_lid; } switch (wr->opcode) { case IB_WR_SEND: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; sqp->ud_header.immediate_present = 0; break; case IB_WR_SEND_WITH_IMM: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; sqp->ud_header.immediate_present = 1; sqp->ud_header.immediate_data = wr->ex.imm_data; break; default: return -EINVAL; } if (is_eth) { u8 smac[6]; struct in6_addr in6; u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; mlx->sched_prio = cpu_to_be16(pcp); memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); /* FIXME: cache smac value? */ memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2); memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4); memcpy(&in6, sgid.raw, sizeof(in6)); rdma_get_ll_mac(&in6, smac); memcpy(sqp->ud_header.eth.smac_h, smac, 6); if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); if (!is_vlan) { sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); } else { sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); } } else { sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; } sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); else ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey); sqp->ud_header.bth.pkey = cpu_to_be16(pkey); sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? sqp->qkey : wr->wr.ud.remote_qkey); sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); if (0) { pr_err("built UD header of size %d:\n", header_size); for (i = 0; i < header_size / 4; ++i) { if (i % 8 == 0) pr_err(" [%02x] ", i * 4); pr_cont(" %08x", be32_to_cpu(((__be32 *) sqp->header_buf)[i])); if ((i + 1) % 8 == 0) pr_cont("\n"); } pr_err("\n"); } /* * Inline data segments may not cross a 64 byte boundary. If * our UD header is bigger than the space available up to the * next 64 byte boundary in the WQE, use two inline data * segments to hold the UD header. */ spc = MLX4_INLINE_ALIGN - ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); if (header_size <= spc) { inl->byte_count = cpu_to_be32(1U << 31 | header_size); memcpy(inl + 1, sqp->header_buf, header_size); i = 1; } else { inl->byte_count = cpu_to_be32(1U << 31 | spc); memcpy(inl + 1, sqp->header_buf, spc); inl = (void *) (inl + 1) + spc; memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); /* * Need a barrier here to make sure all the data is * visible before the byte_count field is set. * Otherwise the HCA prefetcher could grab the 64-byte * chunk with this inline segment and get a valid (!= * 0xffffffff) byte count but stale data, and end up * generating a packet with bad headers. * * The first inline segment's byte_count field doesn't * need a barrier, because it comes after a * control/MLX segment and therefore is at an offset * of 16 mod 64. */ wmb(); inl->byte_count = cpu_to_be32(1U << 31 | (header_size - spc)); i = 2; } *mlx_seg_len = ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); return 0; } static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq) { unsigned cur; struct mlx4_ib_cq *cq; cur = wq->head - wq->tail; if (likely(cur + nreq < wq->max_post)) return 0; cq = to_mcq(ib_cq); spin_lock(&cq->lock); cur = wq->head - wq->tail; spin_unlock(&cq->lock); return cur + nreq >= wq->max_post; } static __be32 convert_access(int acc) { return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC) : 0) | (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) | (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ) : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) | cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); } static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) { struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); int i; for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i) mfrpl->mapped_page_list[i] = cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] | MLX4_MTT_FLAG_PRESENT); fseg->flags = convert_access(wr->wr.fast_reg.access_flags); fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey); fseg->buf_list = cpu_to_be64(mfrpl->map); fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length); fseg->offset = 0; /* XXX -- is this just for ZBVA? */ fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift); fseg->reserved[0] = 0; fseg->reserved[1] = 0; } static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) { iseg->flags = 0; iseg->mem_key = cpu_to_be32(rkey); iseg->guest_id = 0; iseg->pa = 0; } static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, u64 remote_addr, u32 rkey) { rseg->raddr = cpu_to_be64(remote_addr); rseg->rkey = cpu_to_be32(rkey); rseg->reserved = 0; } static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr) { if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask); } else { aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); aseg->compare = 0; } } static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, struct ib_send_wr *wr) { aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask); aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask); } static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, struct ib_send_wr *wr) { memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan; memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); } static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, struct mlx4_wqe_datagram_seg *dseg, struct ib_send_wr *wr, enum ib_qp_type qpt) { union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av; struct mlx4_av sqp_av = {0}; int port = *((u8 *) &av->ib.port_pd) & 0x3; /* force loopback */ sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000); sqp_av.g_slid = av->ib.g_slid & 0x7f; /* no GRH */ sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel & cpu_to_be32(0xf0000000); memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av)); /* This function used only for sending on QP1 proxies */ dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]); /* Use QKEY from the QP context, which is set by master */ dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); } static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct mlx4_wqe_inline_seg *inl = wqe; struct mlx4_ib_tunnel_header hdr; struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); int spc; int i; memcpy(&hdr.av, &ah->av, sizeof hdr.av); hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index); hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); spc = MLX4_INLINE_ALIGN - ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); if (sizeof (hdr) <= spc) { memcpy(inl + 1, &hdr, sizeof (hdr)); wmb(); inl->byte_count = cpu_to_be32(1U << 31 | sizeof (hdr)); i = 1; } else { memcpy(inl + 1, &hdr, spc); wmb(); inl->byte_count = cpu_to_be32(1U << 31 | spc); inl = (void *) (inl + 1) + spc; memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc); wmb(); inl->byte_count = cpu_to_be32(1U << 31 | (sizeof (hdr) - spc)); i = 2; } *mlx_seg_len = ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + sizeof (hdr), 16); } static void set_mlx_icrc_seg(void *dseg) { u32 *t = dseg; struct mlx4_wqe_inline_seg *iseg = dseg; t[1] = 0; /* * Need a barrier here before writing the byte_count field to * make sure that all the data is visible before the * byte_count field is set. Otherwise, if the segment begins * a new cacheline, the HCA prefetcher could grab the 64-byte * chunk and get a valid (!= * 0xffffffff) byte count but * stale data, and end up sending the wrong data. */ wmb(); iseg->byte_count = cpu_to_be32((1U << 31) | 4); } static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) { dseg->lkey = cpu_to_be32(sg->lkey); dseg->addr = cpu_to_be64(sg->addr); /* * Need a barrier here before writing the byte_count field to * make sure that all the data is visible before the * byte_count field is set. Otherwise, if the segment begins * a new cacheline, the HCA prefetcher could grab the 64-byte * chunk and get a valid (!= * 0xffffffff) byte count but * stale data, and end up sending the wrong data. */ wmb(); dseg->byte_count = cpu_to_be32(sg->length); } static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) { dseg->byte_count = cpu_to_be32(sg->length); dseg->lkey = cpu_to_be32(sg->lkey); dseg->addr = cpu_to_be64(sg->addr); } static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, struct mlx4_ib_qp *qp, unsigned *lso_seg_len, __be32 *lso_hdr_sz, __be32 *blh) { unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE)) *blh = cpu_to_be32(1 << 6); if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) && wr->num_sge > qp->sq.max_gs - (halign >> 4))) return -EINVAL; memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); *lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 | wr->wr.ud.hlen); *lso_seg_len = halign; return 0; } static __be32 send_ieth(struct ib_send_wr *wr) { switch (wr->opcode) { case IB_WR_SEND_WITH_IMM: case IB_WR_RDMA_WRITE_WITH_IMM: return wr->ex.imm_data; case IB_WR_SEND_WITH_INV: return cpu_to_be32(wr->ex.invalidate_rkey); default: return 0; } } static void add_zero_len_inline(void *wqe) { struct mlx4_wqe_inline_seg *inl = wqe; memset(wqe, 0, 16); inl->byte_count = cpu_to_be32(1U << 31); } static int lay_inline_data(struct mlx4_ib_qp *qp, struct ib_send_wr *wr, void *wqe, int *sz) { struct mlx4_wqe_inline_seg *seg; void *addr; int len, seg_len; int num_seg; int off, to_copy; int i; int inl = 0; seg = wqe; wqe += sizeof *seg; off = ((unsigned long)wqe) & (unsigned long)(MLX4_INLINE_ALIGN - 1); num_seg = 0; seg_len = 0; for (i = 0; i < wr->num_sge; ++i) { addr = (void *) (unsigned long)(wr->sg_list[i].addr); len = wr->sg_list[i].length; inl += len; if (inl > qp->max_inline_data) { inl = 0; return -1; } while (len >= MLX4_INLINE_ALIGN - off) { to_copy = MLX4_INLINE_ALIGN - off; memcpy(wqe, addr, to_copy); len -= to_copy; wqe += to_copy; addr += to_copy; seg_len += to_copy; wmb(); /* see comment below */ seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len); seg_len = 0; seg = wqe; wqe += sizeof *seg; off = sizeof *seg; ++num_seg; } memcpy(wqe, addr, len); wqe += len; seg_len += len; off += len; } if (seg_len) { ++num_seg; /* * Need a barrier here to make sure * all the data is visible before the * byte_count field is set. Otherwise * the HCA prefetcher could grab the * 64-byte chunk with this inline * segment and get a valid (!= * 0xffffffff) byte count but stale * data, and end up sending the wrong * data. */ wmb(); seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len); } *sz = (inl + num_seg * sizeof *seg + 15) / 16; return 0; } /* * Avoid using memcpy() to copy to BlueFlame page, since memcpy() * implementations may use move-string-buffer assembler instructions, * which do not guarantee order of copying. */ static void mlx4_bf_copy(unsigned long *dst, unsigned long *src, unsigned bytecnt) { __iowrite64_copy(dst, src, bytecnt / 8); } int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { struct mlx4_ib_qp *qp = to_mqp(ibqp); void *wqe; struct mlx4_wqe_ctrl_seg *uninitialized_var(ctrl); struct mlx4_wqe_data_seg *dseg; unsigned long flags; int nreq; int err = 0; unsigned ind; int uninitialized_var(stamp); int uninitialized_var(size); unsigned uninitialized_var(seglen); __be32 dummy; __be32 *lso_wqe; __be32 uninitialized_var(lso_hdr_sz); __be32 blh; int i; int inl = 0; spin_lock_irqsave(&qp->sq.lock, flags); ind = qp->sq_next_wqe; for (nreq = 0; wr; ++nreq, wr = wr->next) { lso_wqe = &dummy; blh = 0; if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { err = -ENOMEM; *bad_wr = wr; goto out; } if (unlikely(wr->num_sge > qp->sq.max_gs)) { err = -EINVAL; *bad_wr = wr; goto out; } ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); *((u32 *) (&ctrl->vlan_tag)) = 0; qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id; ctrl->srcrb_flags = (wr->send_flags & IB_SEND_SIGNALED ? cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) | (wr->send_flags & IB_SEND_SOLICITED ? cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) | ((wr->send_flags & IB_SEND_IP_CSUM) ? cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM | MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) | qp->sq_signal_bits; ctrl->imm = send_ieth(wr); wqe += sizeof *ctrl; size = sizeof *ctrl / 16; switch (qp->mlx4_ib_qp_type) { case MLX4_IB_QPT_RC: case MLX4_IB_QPT_UC: switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: set_raddr_seg(wqe, wr->wr.atomic.remote_addr, wr->wr.atomic.rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); set_atomic_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_atomic_seg); size += (sizeof (struct mlx4_wqe_raddr_seg) + sizeof (struct mlx4_wqe_atomic_seg)) / 16; break; case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: set_raddr_seg(wqe, wr->wr.atomic.remote_addr, wr->wr.atomic.rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); set_masked_atomic_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_masked_atomic_seg); size += (sizeof (struct mlx4_wqe_raddr_seg) + sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16; break; case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: set_raddr_seg(wqe, wr->wr.rdma.remote_addr, wr->wr.rdma.rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); size += sizeof (struct mlx4_wqe_raddr_seg) / 16; break; case IB_WR_LOCAL_INV: ctrl->srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); set_local_inv_seg(wqe, wr->ex.invalidate_rkey); wqe += sizeof (struct mlx4_wqe_local_inval_seg); size += sizeof (struct mlx4_wqe_local_inval_seg) / 16; break; case IB_WR_FAST_REG_MR: ctrl->srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); set_fmr_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_fmr_seg); size += sizeof (struct mlx4_wqe_fmr_seg) / 16; break; default: /* No extra segments required for sends */ break; } break; case MLX4_IB_QPT_TUN_SMI_OWNER: err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; } wqe += seglen; size += seglen / 16; break; case MLX4_IB_QPT_TUN_SMI: case MLX4_IB_QPT_TUN_GSI: /* this is a UD qp used in MAD responses to slaves. */ set_datagram_seg(wqe, wr); /* set the forced-loopback bit in the data seg av */ *(__be32 *) wqe |= cpu_to_be32(0x80000000); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; break; case MLX4_IB_QPT_UD: set_datagram_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; if (wr->opcode == IB_WR_LSO) { err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh); if (unlikely(err)) { *bad_wr = wr; goto out; } lso_wqe = (__be32 *) wqe; wqe += seglen; size += seglen / 16; } break; case MLX4_IB_QPT_PROXY_SMI_OWNER: if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) { err = -ENOSYS; *bad_wr = wr; goto out; } err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; } wqe += seglen; size += seglen / 16; /* to start tunnel header on a cache-line boundary */ add_zero_len_inline(wqe); wqe += 16; size++; build_tunnel_header(wr, wqe, &seglen); wqe += seglen; size += seglen / 16; break; case MLX4_IB_QPT_PROXY_SMI: /* don't allow QP0 sends on guests */ err = -ENOSYS; *bad_wr = wr; goto out; case MLX4_IB_QPT_PROXY_GSI: /* If we are tunneling special qps, this is a UD qp. * In this case we first add a UD segment targeting * the tunnel qp, and then add a header with address * information */ set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; build_tunnel_header(wr, wqe, &seglen); wqe += seglen; size += seglen / 16; break; case MLX4_IB_QPT_SMI: case MLX4_IB_QPT_GSI: err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; } wqe += seglen; size += seglen / 16; break; default: break; } /* * Write data segments in reverse order, so as to * overwrite cacheline stamp last within each * cacheline. This avoids issues with WQE * prefetching. */ dseg = wqe; dseg += wr->num_sge - 1; /* Add one more inline data segment for ICRC for MLX sends */ if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI || qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI || qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) { set_mlx_icrc_seg(dseg + 1); size += sizeof (struct mlx4_wqe_data_seg) / 16; } if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) { int sz; err = lay_inline_data(qp, wr, wqe, &sz); if (!err) { inl = 1; size += sz; } } else { size += wr->num_sge * (sizeof(struct mlx4_wqe_data_seg) / 16); for (i = wr->num_sge - 1; i >= 0; --i, --dseg) set_data_seg(dseg, wr->sg_list + i); } /* * Possibly overwrite stamping in cacheline with LSO * segment only after making sure all data segments * are written. */ wmb(); *lso_wqe = lso_hdr_sz; ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? MLX4_WQE_CTRL_FENCE : 0) | size; /* * Make sure descriptor is fully written before * setting ownership bit (because HW can start * executing as soon as we do). */ wmb(); if (wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) { *bad_wr = wr; err = -EINVAL; goto out; } ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | (ind & qp->sq.wqe_cnt ? cpu_to_be32(1U << 31) : 0) | blh; stamp = ind + qp->sq_spare_wqes; ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); /* * We can improve latency by not stamping the last * send queue WQE until after ringing the doorbell, so * only stamp here if there are still more WQEs to post. * * Same optimization applies to padding with NOP wqe * in case of WQE shrinking (used to prevent wrap-around * in the middle of WR). */ if (wr->next) { stamp_send_wqe(qp, stamp, size * 16); ind = pad_wraparound(qp, ind); } } out: if (nreq == 1 && inl && size > 1 && size < qp->bf.buf_size / 16) { ctrl->owner_opcode |= htonl((qp->sq_next_wqe & 0xffff) << 8); /* We set above doorbell_qpn bits to 0 as part of vlan * tag initialization, so |= should be correct. */ *(u32 *) (&ctrl->vlan_tag) |= qp->doorbell_qpn; /* * Make sure that descriptor is written to memory * before writing to BlueFlame page. */ wmb(); ++qp->sq.head; mlx4_bf_copy(qp->bf.reg + qp->bf.offset, (unsigned long *) ctrl, ALIGN(size * 16, 64)); wc_wmb(); qp->bf.offset ^= qp->bf.buf_size; } else if (nreq) { qp->sq.head += nreq; /* * Make sure that descriptors are written before * doorbell record. */ wmb(); writel(qp->doorbell_qpn, qp->bf.uar->map + MLX4_SEND_DOORBELL); /* * Make sure doorbells don't leak out of SQ spinlock * and reach the HCA out of order. */ mmiowb(); } if (likely(nreq)) { stamp_send_wqe(qp, stamp, size * 16); ind = pad_wraparound(qp, ind); qp->sq_next_wqe = ind; } spin_unlock_irqrestore(&qp->sq.lock, flags); return err; } int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { struct mlx4_ib_qp *qp = to_mqp(ibqp); struct mlx4_wqe_data_seg *scat; unsigned long flags; int err = 0; int nreq; int ind; int max_gs; int i; max_gs = qp->rq.max_gs; spin_lock_irqsave(&qp->rq.lock, flags); ind = qp->rq.head & (qp->rq.wqe_cnt - 1); for (nreq = 0; wr; ++nreq, wr = wr->next) { if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { err = -ENOMEM; *bad_wr = wr; goto out; } if (unlikely(wr->num_sge > qp->rq.max_gs)) { err = -EINVAL; *bad_wr = wr; goto out; } scat = get_recv_wqe(qp, ind); if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { ib_dma_sync_single_for_device(ibqp->device, qp->sqp_proxy_rcv[ind].map, sizeof (struct mlx4_ib_proxy_sqp_hdr), DMA_FROM_DEVICE); scat->byte_count = cpu_to_be32(sizeof (struct mlx4_ib_proxy_sqp_hdr)); /* use dma lkey from upper layer entry */ scat->lkey = cpu_to_be32(wr->sg_list->lkey); scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map); scat++; max_gs--; } for (i = 0; i < wr->num_sge; ++i) __set_data_seg(scat + i, wr->sg_list + i); if (i < max_gs) { scat[i].byte_count = 0; scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY); scat[i].addr = 0; } qp->rq.wrid[ind] = wr->wr_id; ind = (ind + 1) & (qp->rq.wqe_cnt - 1); } out: if (likely(nreq)) { qp->rq.head += nreq; /* * Make sure that descriptors are written before * doorbell record. */ wmb(); *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff); } spin_unlock_irqrestore(&qp->rq.lock, flags); return err; } static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state) { switch (mlx4_state) { case MLX4_QP_STATE_RST: return IB_QPS_RESET; case MLX4_QP_STATE_INIT: return IB_QPS_INIT; case MLX4_QP_STATE_RTR: return IB_QPS_RTR; case MLX4_QP_STATE_RTS: return IB_QPS_RTS; case MLX4_QP_STATE_SQ_DRAINING: case MLX4_QP_STATE_SQD: return IB_QPS_SQD; case MLX4_QP_STATE_SQER: return IB_QPS_SQE; case MLX4_QP_STATE_ERR: return IB_QPS_ERR; default: return -1; } } static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state) { switch (mlx4_mig_state) { case MLX4_QP_PM_ARMED: return IB_MIG_ARMED; case MLX4_QP_PM_REARM: return IB_MIG_REARM; case MLX4_QP_PM_MIGRATED: return IB_MIG_MIGRATED; default: return -1; } } static int to_ib_qp_access_flags(int mlx4_flags) { int ib_flags = 0; if (mlx4_flags & MLX4_QP_BIT_RRE) ib_flags |= IB_ACCESS_REMOTE_READ; if (mlx4_flags & MLX4_QP_BIT_RWE) ib_flags |= IB_ACCESS_REMOTE_WRITE; if (mlx4_flags & MLX4_QP_BIT_RAE) ib_flags |= IB_ACCESS_REMOTE_ATOMIC; return ib_flags; } static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr, struct mlx4_qp_path *path) { struct mlx4_dev *dev = ibdev->dev; int is_eth; memset(ib_ah_attr, 0, sizeof *ib_ah_attr); ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1; if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) return; is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) == IB_LINK_LAYER_ETHERNET; if (is_eth) ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) | ((path->sched_queue & 4) << 1); else ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; ib_ah_attr->dlid = be16_to_cpu(path->rlid); ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f; ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0; ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0; if (ib_ah_attr->ah_flags) { ib_ah_attr->grh.sgid_index = path->mgid_index; ib_ah_attr->grh.hop_limit = path->hop_limit; ib_ah_attr->grh.traffic_class = (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff; ib_ah_attr->grh.flow_label = be32_to_cpu(path->tclass_flowlabel) & 0xfffff; memcpy(ib_ah_attr->grh.dgid.raw, path->rgid, sizeof ib_ah_attr->grh.dgid.raw); } } int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { struct mlx4_ib_dev *dev = to_mdev(ibqp->device); struct mlx4_ib_qp *qp = to_mqp(ibqp); struct mlx4_qp_context context; int mlx4_state; int err = 0; mutex_lock(&qp->mutex); if (qp->state == IB_QPS_RESET) { qp_attr->qp_state = IB_QPS_RESET; goto done; } err = mlx4_qp_query(dev->dev, &qp->mqp, &context); if (err) { err = -EINVAL; goto out; } mlx4_state = be32_to_cpu(context.flags) >> 28; qp->state = to_ib_qp_state(mlx4_state); qp_attr->qp_state = qp->state; qp_attr->path_mtu = context.mtu_msgmax >> 5; qp_attr->path_mig_state = to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3); qp_attr->qkey = be32_to_cpu(context.qkey); qp_attr->rq_psn = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff; qp_attr->sq_psn = be32_to_cpu(context.next_send_psn) & 0xffffff; qp_attr->dest_qp_num = be32_to_cpu(context.remote_qpn) & 0xffffff; qp_attr->qp_access_flags = to_ib_qp_access_flags(be32_to_cpu(context.params2)); if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path); to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path); qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; } qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f; if (qp_attr->qp_state == IB_QPS_INIT) qp_attr->port_num = qp->port; else qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING; qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7); qp_attr->max_dest_rd_atomic = 1 << ((be32_to_cpu(context.params2) >> 21) & 0x7); qp_attr->min_rnr_timer = (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f; qp_attr->timeout = context.pri_path.ackto >> 3; qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7; qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7; qp_attr->alt_timeout = context.alt_path.ackto >> 3; done: qp_attr->cur_qp_state = qp_attr->qp_state; qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; qp_attr->cap.max_recv_sge = qp->rq.max_gs; if (!ibqp->uobject) { qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; qp_attr->cap.max_send_sge = qp->sq.max_gs; } else { qp_attr->cap.max_send_wr = 0; qp_attr->cap.max_send_sge = 0; } /* * We don't support inline sends for kernel QPs (yet), and we * don't know what userspace's value should be. */ qp_attr->cap.max_inline_data = 0; qp_init_attr->cap = qp_attr->cap; qp_init_attr->create_flags = 0; if (qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; if (qp->flags & MLX4_IB_QP_LSO) qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; if (qp->flags & MLX4_IB_QP_NETIF) qp_init_attr->create_flags |= IB_QP_CREATE_NETIF_QP; qp_init_attr->sq_sig_type = qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; qp_init_attr->qpg_type = ibqp->qpg_type; if (ibqp->qpg_type == IB_QPG_PARENT) qp_init_attr->cap.qpg_tss_mask_sz = qp->qpg_data->qpg_tss_mask_sz; else qp_init_attr->cap.qpg_tss_mask_sz = 0; out: mutex_unlock(&qp->mutex); return err; } Index: stable/10/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c (revision 271127) @@ -1,800 +1,801 @@ /* * Copyright (c) 2012 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /*#include "core_priv.h"*/ #include "mlx4_ib.h" #include #include +#include #include /*show_admin_alias_guid returns the administratively assigned value of that GUID. * Values returned in buf parameter string: * 0 - requests opensm to assign a value. * ffffffffffffffff - delete this entry. * other - value assigned by administrator. */ static ssize_t show_admin_alias_guid(struct device *dev, struct device_attribute *attr, char *buf) { int record_num;/*0-15*/ int guid_index_in_rec; /*0 - 7*/ struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry = container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry); struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; struct mlx4_ib_dev *mdev = port->dev; record_num = mlx4_ib_iov_dentry->entry_num / 8 ; guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ; return sprintf(buf, "%llx\n", (long long) be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid. ports_guid[port->num - 1]. all_rec_per_port[record_num]. all_recs[8 * guid_index_in_rec])); } /* store_admin_alias_guid stores the (new) administratively assigned value of that GUID. * Values in buf parameter string: * 0 - requests opensm to assign a value. * 0xffffffffffffffff - delete this entry. * other - guid value assigned by the administrator. */ static ssize_t store_admin_alias_guid(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int record_num;/*0-15*/ int guid_index_in_rec; /*0 - 7*/ struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry = container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry); struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; struct mlx4_ib_dev *mdev = port->dev; u64 sysadmin_ag_val; record_num = mlx4_ib_iov_dentry->entry_num / 8; guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8; if (0 == record_num && 0 == guid_index_in_rec) { pr_err("GUID 0 block 0 is RO\n"); return count; } sscanf(buf, "%llx", &sysadmin_ag_val); *(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1]. all_rec_per_port[record_num]. all_recs[GUID_REC_SIZE * guid_index_in_rec] = cpu_to_be64(sysadmin_ag_val); /* Change the state to be pending for update */ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status = MLX4_GUID_INFO_STATUS_IDLE ; mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method = MLX4_GUID_INFO_RECORD_SET; switch (sysadmin_ag_val) { case MLX4_GUID_FOR_DELETE_VAL: mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method = MLX4_GUID_INFO_RECORD_DELETE; mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership = MLX4_GUID_SYSADMIN_ASSIGN; break; /* The sysadmin requests the SM to re-assign */ case MLX4_NOT_SET_GUID: mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership = MLX4_GUID_DRIVER_ASSIGN; break; /* The sysadmin requests a specific value.*/ default: mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership = MLX4_GUID_SYSADMIN_ASSIGN; break; } /* set the record index */ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes = mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec); mlx4_ib_init_alias_guid_work(mdev, port->num - 1); return count; } static ssize_t show_port_gid(struct device *dev, struct device_attribute *attr, char *buf) { struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry = container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry); struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; struct mlx4_ib_dev *mdev = port->dev; union ib_gid gid; ssize_t ret; ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num, mlx4_ib_iov_dentry->entry_num, &gid, 1); if (ret) return ret; ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", be16_to_cpu(((__be16 *) gid.raw)[0]), be16_to_cpu(((__be16 *) gid.raw)[1]), be16_to_cpu(((__be16 *) gid.raw)[2]), be16_to_cpu(((__be16 *) gid.raw)[3]), be16_to_cpu(((__be16 *) gid.raw)[4]), be16_to_cpu(((__be16 *) gid.raw)[5]), be16_to_cpu(((__be16 *) gid.raw)[6]), be16_to_cpu(((__be16 *) gid.raw)[7])); return ret; } static ssize_t show_phys_port_pkey(struct device *dev, struct device_attribute *attr, char *buf) { struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry = container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry); struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; struct mlx4_ib_dev *mdev = port->dev; u16 pkey; ssize_t ret; ret = __mlx4_ib_query_pkey(&mdev->ib_dev, port->num, mlx4_ib_iov_dentry->entry_num, &pkey, 1); if (ret) return ret; return sprintf(buf, "0x%04x\n", pkey); } #define DENTRY_REMOVE(_dentry) \ do { \ sysfs_remove_file((_dentry)->kobj, &(_dentry)->dentry.attr); \ } while (0); static int create_sysfs_entry(void *_ctx, struct mlx4_ib_iov_sysfs_attr *_dentry, char *_name, struct kobject *_kobj, ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf), ssize_t (*store)(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) ) { int ret = 0; struct mlx4_ib_iov_sysfs_attr *vdentry = _dentry; vdentry->ctx = _ctx; vdentry->dentry.show = show; vdentry->dentry.store = store; sysfs_attr_init(&vdentry->dentry.attr); vdentry->dentry.attr.name = vdentry->name; vdentry->dentry.attr.mode = 0; vdentry->kobj = _kobj; snprintf(vdentry->name, 15, "%s", _name); if (vdentry->dentry.store) vdentry->dentry.attr.mode |= S_IWUSR; if (vdentry->dentry.show) vdentry->dentry.attr.mode |= S_IRUGO; ret = sysfs_create_file(vdentry->kobj, &vdentry->dentry.attr); if (ret) { pr_err("failed to create %s\n", vdentry->dentry.attr.name); vdentry->ctx = NULL; return ret; } return ret; } int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, struct attribute *attr) { struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1]; int ret; ret = sysfs_create_file(port->mcgs_parent, attr); if (ret) pr_err("failed to create %s\n", attr->name); return ret; } void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, struct attribute *attr) { struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1]; sysfs_remove_file(port->mcgs_parent, attr); } static int add_port_entries(struct mlx4_ib_dev *device, int port_num) { int i; char buff[10]; struct mlx4_ib_iov_port *port = NULL; int ret = 0 ; struct ib_port_attr attr; /* get the physical gid and pkey table sizes.*/ ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1); if (ret) goto err; port = &device->iov_ports[port_num - 1]; port->dev = device; port->num = port_num; /* Directory structure: * iov - * port num - * admin_guids * gids (operational) * mcg_table */ port->dentr_ar = kzalloc(sizeof (struct mlx4_ib_iov_sysfs_attr_ar), GFP_KERNEL); if (!port->dentr_ar) { ret = -ENOMEM; goto err; } sprintf(buff, "%d", port_num); port->cur_port = kobject_create_and_add(buff, kobject_get(device->ports_parent)); if (!port->cur_port) { ret = -ENOMEM; goto kobj_create_err; } /* admin GUIDs */ port->admin_alias_parent = kobject_create_and_add("admin_guids", kobject_get(port->cur_port)); if (!port->admin_alias_parent) { ret = -ENOMEM; goto err_admin_guids; } for (i = 0 ; i < attr.gid_tbl_len; i++) { sprintf(buff, "%d", i); port->dentr_ar->dentries[i].entry_num = i; ret = create_sysfs_entry(port, &port->dentr_ar->dentries[i], buff, port->admin_alias_parent, show_admin_alias_guid, store_admin_alias_guid); if (ret) goto err_admin_alias_parent; } /* gids subdirectory (operational gids) */ port->gids_parent = kobject_create_and_add("gids", kobject_get(port->cur_port)); if (!port->gids_parent) { ret = -ENOMEM; goto err_gids; } for (i = 0 ; i < attr.gid_tbl_len; i++) { sprintf(buff, "%d", i); port->dentr_ar->dentries[attr.gid_tbl_len + i].entry_num = i; ret = create_sysfs_entry(port, &port->dentr_ar->dentries[attr.gid_tbl_len + i], buff, port->gids_parent, show_port_gid, NULL); if (ret) goto err_gids_parent; } /* physical port pkey table */ port->pkeys_parent = kobject_create_and_add("pkeys", kobject_get(port->cur_port)); if (!port->pkeys_parent) { ret = -ENOMEM; goto err_pkeys; } for (i = 0 ; i < attr.pkey_tbl_len; i++) { sprintf(buff, "%d", i); port->dentr_ar->dentries[2 * attr.gid_tbl_len + i].entry_num = i; ret = create_sysfs_entry(port, &port->dentr_ar->dentries[2 * attr.gid_tbl_len + i], buff, port->pkeys_parent, show_phys_port_pkey, NULL); if (ret) goto err_pkeys_parent; } /* MCGs table */ port->mcgs_parent = kobject_create_and_add("mcgs", kobject_get(port->cur_port)); if (!port->mcgs_parent) { ret = -ENOMEM; goto err_mcgs; } return 0; err_mcgs: kobject_put(port->cur_port); err_pkeys_parent: kobject_put(port->pkeys_parent); err_pkeys: kobject_put(port->cur_port); err_gids_parent: kobject_put(port->gids_parent); err_gids: kobject_put(port->cur_port); err_admin_alias_parent: kobject_put(port->admin_alias_parent); err_admin_guids: kobject_put(port->cur_port); kobject_put(port->cur_port); /* once more for create_and_add buff */ kobj_create_err: kobject_put(device->ports_parent); kfree(port->dentr_ar); err: pr_err("add_port_entries FAILED: for port:%d, error: %d\n", port_num, ret); return ret; } static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max) { char base_name[9]; /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */ strlcpy(name, pci_name(dev->dev->pdev), max); strncpy(base_name, name, 8); /*till xxxx:yy:*/ base_name[8] = '\0'; /* with no ARI only 3 last bits are used so when the fn is higher than 8 * need to add it to the dev num, so count in the last number will be * modulo 8 */ sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8)); } struct mlx4_port { struct kobject kobj; struct mlx4_ib_dev *dev; struct attribute_group pkey_group; struct attribute_group gid_group; u8 port_num; int slave; }; static void mlx4_port_release(struct kobject *kobj) { struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj); struct attribute *a; int i; for (i = 0; (a = p->pkey_group.attrs[i]); ++i) kfree(a); kfree(p->pkey_group.attrs); for (i = 0; (a = p->gid_group.attrs[i]); ++i) kfree(a); kfree(p->gid_group.attrs); kfree(p); } struct port_attribute { struct attribute attr; ssize_t (*show)(struct mlx4_port *, struct port_attribute *, char *buf); ssize_t (*store)(struct mlx4_port *, struct port_attribute *, const char *buf, size_t count); }; static ssize_t port_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct port_attribute *port_attr = container_of(attr, struct port_attribute, attr); struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj); if (!port_attr->show) return -EIO; return port_attr->show(p, port_attr, buf); } static ssize_t port_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t size) { struct port_attribute *port_attr = container_of(attr, struct port_attribute, attr); struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj); if (!port_attr->store) return -EIO; return port_attr->store(p, port_attr, buf, size); } static const struct sysfs_ops port_sysfs_ops = { .show = port_attr_show, .store = port_attr_store, }; static struct kobj_type port_type = { .release = mlx4_port_release, .sysfs_ops = &port_sysfs_ops, }; struct port_table_attribute { struct port_attribute attr; char name[8]; int index; }; static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr, char *buf) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); ssize_t ret = -ENODEV; if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >= (p->dev->dev->caps.pkey_table_len[p->port_num])) ret = sprintf(buf, "none\n"); else ret = sprintf(buf, "%d\n", p->dev->pkeys.virt2phys_pkey[p->slave] [p->port_num - 1][tab_attr->index]); return ret; } static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr, const char *buf, size_t count) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); int idx; int err; /* do not allow remapping Dom0 virtual pkey table */ if (p->slave == mlx4_master_func_num(p->dev->dev)) return -EINVAL; if (!strncasecmp(buf, "no", 2)) idx = p->dev->dev->phys_caps.pkey_phys_table_len[p->port_num] - 1; else if (sscanf(buf, "%i", &idx) != 1 || idx >= p->dev->dev->caps.pkey_table_len[p->port_num] || idx < 0) return -EINVAL; p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1] [tab_attr->index] = idx; mlx4_sync_pkey_table(p->dev->dev, p->slave, p->port_num, tab_attr->index, idx); err = mlx4_gen_pkey_eqe(p->dev->dev, p->slave, p->port_num); if (err) { pr_err("mlx4_gen_pkey_eqe failed for slave %d," " port %d, index %d\n", p->slave, p->port_num, idx); return err; } return count; } static ssize_t show_port_gid_idx(struct mlx4_port *p, struct port_attribute *attr, char *buf) { return sprintf(buf, "%d\n", p->slave); } static struct attribute ** alloc_group_attrs(ssize_t (*show)(struct mlx4_port *, struct port_attribute *, char *buf), ssize_t (*store)(struct mlx4_port *, struct port_attribute *, const char *buf, size_t count), int len) { struct attribute **tab_attr; struct port_table_attribute *element; int i; tab_attr = kcalloc(1 + len, sizeof (struct attribute *), GFP_KERNEL); if (!tab_attr) return NULL; for (i = 0; i < len; i++) { element = kzalloc(sizeof (struct port_table_attribute), GFP_KERNEL); if (!element) goto err; if (snprintf(element->name, sizeof (element->name), "%d", i) >= sizeof (element->name)) { kfree(element); goto err; } sysfs_attr_init(&element->attr.attr); element->attr.attr.name = element->name; if (store) { element->attr.attr.mode = S_IWUSR | S_IRUGO; element->attr.store = store; } else element->attr.attr.mode = S_IRUGO; element->attr.show = show; element->index = i; tab_attr[i] = &element->attr.attr; } return tab_attr; err: while (--i >= 0) kfree(tab_attr[i]); kfree(tab_attr); return NULL; } static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) { struct mlx4_port *p; int i; int ret; int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port_num) == IB_LINK_LAYER_ETHERNET; p = kzalloc(sizeof *p, GFP_KERNEL); if (!p) return -ENOMEM; p->dev = dev; p->port_num = port_num; p->slave = slave; ret = kobject_init_and_add(&p->kobj, &port_type, kobject_get(dev->dev_ports_parent[slave]), "%d", port_num); if (ret) goto err_alloc; p->pkey_group.name = "pkey_idx"; if (is_eth) p->pkey_group.attrs = alloc_group_attrs(show_port_pkey, NULL, dev->dev->caps.pkey_table_len[port_num]); else p->pkey_group.attrs = alloc_group_attrs(show_port_pkey, store_port_pkey, dev->dev->caps.pkey_table_len[port_num]); if (!p->pkey_group.attrs) goto err_alloc; ret = sysfs_create_group(&p->kobj, &p->pkey_group); if (ret) goto err_free_pkey; p->gid_group.name = "gid_idx"; p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1); if (!p->gid_group.attrs) goto err_free_pkey; ret = sysfs_create_group(&p->kobj, &p->gid_group); if (ret) goto err_free_gid; list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]); return 0; err_free_gid: kfree(p->gid_group.attrs[0]); kfree(p->gid_group.attrs); err_free_pkey: for (i = 0; i < dev->dev->caps.pkey_table_len[port_num]; ++i) kfree(p->pkey_group.attrs[i]); kfree(p->pkey_group.attrs); err_alloc: kobject_put(dev->dev_ports_parent[slave]); kfree(p); return ret; } static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave) { char name[32]; int err; int port; struct kobject *p, *t; struct mlx4_port *mport; get_name(dev, name, slave, sizeof name); dev->pkeys.device_parent[slave] = kobject_create_and_add(name, kobject_get(dev->iov_parent)); if (!dev->pkeys.device_parent[slave]) { err = -ENOMEM; goto fail_dev; } INIT_LIST_HEAD(&dev->pkeys.pkey_port_list[slave]); dev->dev_ports_parent[slave] = kobject_create_and_add("ports", kobject_get(dev->pkeys.device_parent[slave])); if (!dev->dev_ports_parent[slave]) { err = -ENOMEM; goto err_ports; } for (port = 1; port <= dev->dev->caps.num_ports; ++port) { err = add_port(dev, port, slave); if (err) goto err_add; } return 0; err_add: list_for_each_entry_safe(p, t, &dev->pkeys.pkey_port_list[slave], entry) { list_del(&p->entry); mport = container_of(p, struct mlx4_port, kobj); sysfs_remove_group(p, &mport->pkey_group); sysfs_remove_group(p, &mport->gid_group); kobject_put(p); } kobject_put(dev->dev_ports_parent[slave]); err_ports: kobject_put(dev->pkeys.device_parent[slave]); /* extra put for the device_parent create_and_add */ kobject_put(dev->pkeys.device_parent[slave]); fail_dev: kobject_put(dev->iov_parent); return err; } static int register_pkey_tree(struct mlx4_ib_dev *device) { int i; if (!mlx4_is_master(device->dev)) return 0; for (i = 0; i <= device->dev->num_vfs; ++i) register_one_pkey_tree(device, i); return 0; } static void unregister_pkey_tree(struct mlx4_ib_dev *device) { int slave; struct kobject *p, *t; struct mlx4_port *port; if (!mlx4_is_master(device->dev)) return; for (slave = device->dev->num_vfs; slave >= 0; --slave) { list_for_each_entry_safe(p, t, &device->pkeys.pkey_port_list[slave], entry) { list_del(&p->entry); port = container_of(p, struct mlx4_port, kobj); sysfs_remove_group(p, &port->pkey_group); sysfs_remove_group(p, &port->gid_group); kobject_put(p); kobject_put(device->dev_ports_parent[slave]); } kobject_put(device->dev_ports_parent[slave]); kobject_put(device->pkeys.device_parent[slave]); kobject_put(device->pkeys.device_parent[slave]); kobject_put(device->iov_parent); } } int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev) { int i; int ret = 0; if (!mlx4_is_master(dev->dev)) return 0; dev->iov_parent = kobject_create_and_add("iov", kobject_get(dev->ib_dev.ports_parent->parent)); if (!dev->iov_parent) { ret = -ENOMEM; goto err; } dev->ports_parent = kobject_create_and_add("ports", kobject_get(dev->iov_parent)); if (!dev->iov_parent) { ret = -ENOMEM; goto err_ports; } for (i = 1; i <= dev->ib_dev.phys_port_cnt; ++i) { ret = add_port_entries(dev, i); if (ret) goto err_add_entries; } ret = register_pkey_tree(dev); if (ret) goto err_add_entries; return 0; err_add_entries: kobject_put(dev->ports_parent); err_ports: kobject_put(dev->iov_parent); err: kobject_put(dev->ib_dev.ports_parent->parent); pr_err("mlx4_ib_device_register_sysfs error (%d)\n", ret); return ret; } static void unregister_alias_guid_tree(struct mlx4_ib_dev *device) { struct mlx4_ib_iov_port *p; int i; if (!mlx4_is_master(device->dev)) return; for (i = 0; i < device->dev->caps.num_ports; i++) { p = &device->iov_ports[i]; kobject_put(p->admin_alias_parent); kobject_put(p->gids_parent); kobject_put(p->pkeys_parent); kobject_put(p->mcgs_parent); kobject_put(p->cur_port); kobject_put(p->cur_port); kobject_put(p->cur_port); kobject_put(p->cur_port); kobject_put(p->cur_port); kobject_put(p->dev->ports_parent); kfree(p->dentr_ar); } } void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device) { unregister_alias_guid_tree(device); unregister_pkey_tree(device); kobject_put(device->ports_parent); kobject_put(device->iov_parent); kobject_put(device->iov_parent); kobject_put(device->ib_dev.ports_parent->parent); } Index: stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_allocator.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_allocator.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_allocator.c (revision 271127) @@ -1,301 +1,300 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include -#include #include "mthca_dev.h" /* Trivial bitmap-based allocator */ u32 mthca_alloc(struct mthca_alloc *alloc) { unsigned long flags; u32 obj; spin_lock_irqsave(&alloc->lock, flags); obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last); if (obj >= alloc->max) { alloc->top = (alloc->top + alloc->max) & alloc->mask; obj = find_first_zero_bit(alloc->table, alloc->max); } if (obj < alloc->max) { set_bit(obj, alloc->table); obj |= alloc->top; } else obj = -1; spin_unlock_irqrestore(&alloc->lock, flags); return obj; } void mthca_free(struct mthca_alloc *alloc, u32 obj) { unsigned long flags; obj &= alloc->max - 1; spin_lock_irqsave(&alloc->lock, flags); clear_bit(obj, alloc->table); alloc->last = min(alloc->last, obj); alloc->top = (alloc->top + alloc->max) & alloc->mask; spin_unlock_irqrestore(&alloc->lock, flags); } int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask, u32 reserved) { int i; /* num must be a power of 2 */ if (num != 1 << (ffs(num) - 1)) return -EINVAL; alloc->last = 0; alloc->top = 0; alloc->max = num; alloc->mask = mask; spin_lock_init(&alloc->lock); alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof (long), GFP_KERNEL); if (!alloc->table) return -ENOMEM; bitmap_zero(alloc->table, num); for (i = 0; i < reserved; ++i) set_bit(i, alloc->table); return 0; } void mthca_alloc_cleanup(struct mthca_alloc *alloc) { kfree(alloc->table); } /* * Array of pointers with lazy allocation of leaf pages. Callers of * _get, _set and _clear methods must use a lock or otherwise * serialize access to the array. */ #define MTHCA_ARRAY_MASK (PAGE_SIZE / sizeof (void *) - 1) void *mthca_array_get(struct mthca_array *array, int index) { int p = (index * sizeof (void *)) >> PAGE_SHIFT; if (array->page_list[p].page) return array->page_list[p].page[index & MTHCA_ARRAY_MASK]; else return NULL; } int mthca_array_set(struct mthca_array *array, int index, void *value) { int p = (index * sizeof (void *)) >> PAGE_SHIFT; /* Allocate with GFP_ATOMIC because we'll be called with locks held. */ if (!array->page_list[p].page) array->page_list[p].page = (void **) get_zeroed_page(GFP_ATOMIC); if (!array->page_list[p].page) return -ENOMEM; array->page_list[p].page[index & MTHCA_ARRAY_MASK] = value; ++array->page_list[p].used; return 0; } void mthca_array_clear(struct mthca_array *array, int index) { int p = (index * sizeof (void *)) >> PAGE_SHIFT; if (--array->page_list[p].used == 0) { free_page((unsigned long) array->page_list[p].page); array->page_list[p].page = NULL; } else array->page_list[p].page[index & MTHCA_ARRAY_MASK] = NULL; if (array->page_list[p].used < 0) pr_debug("Array %p index %d page %d with ref count %d < 0\n", array, index, p, array->page_list[p].used); } int mthca_array_init(struct mthca_array *array, int nent) { int npage = (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE; int i; array->page_list = kmalloc(npage * sizeof *array->page_list, GFP_KERNEL); if (!array->page_list) return -ENOMEM; for (i = 0; i < npage; ++i) { array->page_list[i].page = NULL; array->page_list[i].used = 0; } return 0; } void mthca_array_cleanup(struct mthca_array *array, int nent) { int i; for (i = 0; i < (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE; ++i) free_page((unsigned long) array->page_list[i].page); kfree(array->page_list); } /* * Handling for queue buffers -- we allocate a bunch of memory and * register it in a memory region at HCA virtual address 0. If the * requested size is > max_direct, we split the allocation into * multiple pages, so we don't require too much contiguous memory. */ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, union mthca_buf *buf, int *is_direct, struct mthca_pd *pd, int hca_write, struct mthca_mr *mr) { int err = -ENOMEM; int npages, shift; u64 *dma_list = NULL; dma_addr_t t; int i; if (size <= max_direct) { *is_direct = 1; npages = 1; shift = get_order(size) + PAGE_SHIFT; buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev, size, &t, GFP_KERNEL); if (!buf->direct.buf) return -ENOMEM; pci_unmap_addr_set(&buf->direct, mapping, t); memset(buf->direct.buf, 0, size); while (t & ((1 << shift) - 1)) { --shift; npages *= 2; } dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); if (!dma_list) goto err_free; for (i = 0; i < npages; ++i) dma_list[i] = t + i * (1 << shift); } else { *is_direct = 0; npages = (size + PAGE_SIZE - 1) / PAGE_SIZE; shift = PAGE_SHIFT; dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); if (!dma_list) return -ENOMEM; buf->page_list = kmalloc(npages * sizeof *buf->page_list, GFP_KERNEL); if (!buf->page_list) goto err_out; for (i = 0; i < npages; ++i) buf->page_list[i].buf = NULL; for (i = 0; i < npages; ++i) { buf->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, &t, GFP_KERNEL); if (!buf->page_list[i].buf) goto err_free; dma_list[i] = t; pci_unmap_addr_set(&buf->page_list[i], mapping, t); clear_page(buf->page_list[i].buf); } } err = mthca_mr_alloc_phys(dev, pd->pd_num, dma_list, shift, npages, 0, size, MTHCA_MPT_FLAG_LOCAL_READ | (hca_write ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0), mr); if (err) goto err_free; kfree(dma_list); return 0; err_free: mthca_buf_free(dev, size, buf, *is_direct, NULL); err_out: kfree(dma_list); return err; } void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf, int is_direct, struct mthca_mr *mr) { int i; if (mr) mthca_free_mr(dev, mr); if (is_direct) dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf, pci_unmap_addr(&buf->direct, mapping)); else { for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i) dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, buf->page_list[i].buf, pci_unmap_addr(&buf->page_list[i], mapping)); kfree(buf->page_list); } } Index: stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_main.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_main.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_main.c (revision 271127) @@ -1,1360 +1,1359 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include -#include #include #include #include #include "mthca_dev.h" #include "mthca_config_reg.h" #include "mthca_cmd.h" #include "mthca_profile.h" #include "mthca_memfree.h" #include "mthca_wqe.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); #ifdef CONFIG_INFINIBAND_MTHCA_DEBUG int mthca_debug_level = 0; module_param_named(debug_level, mthca_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); #endif /* CONFIG_INFINIBAND_MTHCA_DEBUG */ #ifdef CONFIG_PCI_MSI static int msi_x = 1; module_param(msi_x, int, 0444); MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero"); #else /* CONFIG_PCI_MSI */ #define msi_x (0) #endif /* CONFIG_PCI_MSI */ static int tune_pci = 0; module_param(tune_pci, int, 0444); MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero"); DEFINE_MUTEX(mthca_device_mutex); #define MTHCA_DEFAULT_NUM_QP (1 << 16) #define MTHCA_DEFAULT_RDB_PER_QP (1 << 2) #define MTHCA_DEFAULT_NUM_CQ (1 << 16) #define MTHCA_DEFAULT_NUM_MCG (1 << 13) #define MTHCA_DEFAULT_NUM_MPT (1 << 17) #define MTHCA_DEFAULT_NUM_MTT (1 << 20) #define MTHCA_DEFAULT_NUM_UDAV (1 << 15) #define MTHCA_DEFAULT_NUM_RESERVED_MTTS (1 << 18) #define MTHCA_DEFAULT_NUM_UARC_SIZE (1 << 18) static struct mthca_profile hca_profile = { .num_qp = MTHCA_DEFAULT_NUM_QP, .rdb_per_qp = MTHCA_DEFAULT_RDB_PER_QP, .num_cq = MTHCA_DEFAULT_NUM_CQ, .num_mcg = MTHCA_DEFAULT_NUM_MCG, .num_mpt = MTHCA_DEFAULT_NUM_MPT, .num_mtt = MTHCA_DEFAULT_NUM_MTT, .num_udav = MTHCA_DEFAULT_NUM_UDAV, /* Tavor only */ .fmr_reserved_mtts = MTHCA_DEFAULT_NUM_RESERVED_MTTS, /* Tavor only */ .uarc_size = MTHCA_DEFAULT_NUM_UARC_SIZE, /* Arbel only */ }; module_param_named(num_qp, hca_profile.num_qp, int, 0444); MODULE_PARM_DESC(num_qp, "maximum number of QPs per HCA"); module_param_named(rdb_per_qp, hca_profile.rdb_per_qp, int, 0444); MODULE_PARM_DESC(rdb_per_qp, "number of RDB buffers per QP"); module_param_named(num_cq, hca_profile.num_cq, int, 0444); MODULE_PARM_DESC(num_cq, "maximum number of CQs per HCA"); module_param_named(num_mcg, hca_profile.num_mcg, int, 0444); MODULE_PARM_DESC(num_mcg, "maximum number of multicast groups per HCA"); module_param_named(num_mpt, hca_profile.num_mpt, int, 0444); MODULE_PARM_DESC(num_mpt, "maximum number of memory protection table entries per HCA"); module_param_named(num_mtt, hca_profile.num_mtt, int, 0444); MODULE_PARM_DESC(num_mtt, "maximum number of memory translation table segments per HCA"); module_param_named(num_udav, hca_profile.num_udav, int, 0444); MODULE_PARM_DESC(num_udav, "maximum number of UD address vectors per HCA"); module_param_named(fmr_reserved_mtts, hca_profile.fmr_reserved_mtts, int, 0444); MODULE_PARM_DESC(fmr_reserved_mtts, "number of memory translation table segments reserved for FMR"); static int log_mtts_per_seg; module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-5)"); static char mthca_version[] __devinitdata = DRV_NAME ": Mellanox InfiniBand HCA driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; static int mthca_tune_pci(struct mthca_dev *mdev) { if (!tune_pci) return 0; /* First try to max out Read Byte Count */ if (pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX)) { if (pcix_set_mmrbc(mdev->pdev, pcix_get_max_mmrbc(mdev->pdev))) { mthca_err(mdev, "Couldn't set PCI-X max read count, " "aborting.\n"); return -ENODEV; } } else if (!(mdev->mthca_flags & MTHCA_FLAG_PCIE)) mthca_info(mdev, "No PCI-X capability, not setting RBC.\n"); if (pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP)) { if (pcie_set_readrq(mdev->pdev, 4096)) { mthca_err(mdev, "Couldn't write PCI Express read request, " "aborting.\n"); return -ENODEV; } } else if (mdev->mthca_flags & MTHCA_FLAG_PCIE) mthca_info(mdev, "No PCI Express capability, " "not setting Max Read Request Size.\n"); return 0; } static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim) { int err; u8 status; mdev->limits.mtt_seg_size = (1 << log_mtts_per_seg) * 8; err = mthca_QUERY_DEV_LIM(mdev, dev_lim, &status); if (err) { mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n"); return err; } if (status) { mthca_err(mdev, "QUERY_DEV_LIM returned status 0x%02x, " "aborting.\n", status); return -EINVAL; } if (dev_lim->min_page_sz > PAGE_SIZE) { mthca_err(mdev, "HCA minimum page size of %d bigger than " "kernel PAGE_SIZE of %d, aborting.\n", dev_lim->min_page_sz, PAGE_SIZE); return -ENODEV; } if (dev_lim->num_ports > MTHCA_MAX_PORTS) { mthca_err(mdev, "HCA has %d ports, but we only support %d, " "aborting.\n", dev_lim->num_ports, MTHCA_MAX_PORTS); return -ENODEV; } if (dev_lim->uar_size > pci_resource_len(mdev->pdev, 2)) { mthca_err(mdev, "HCA reported UAR size of 0x%x bigger than " "PCI resource 2 size of 0x%llx, aborting.\n", dev_lim->uar_size, (unsigned long long)pci_resource_len(mdev->pdev, 2)); return -ENODEV; } mdev->limits.num_ports = dev_lim->num_ports; mdev->limits.vl_cap = dev_lim->max_vl; mdev->limits.mtu_cap = dev_lim->max_mtu; mdev->limits.gid_table_len = dev_lim->max_gids; mdev->limits.pkey_table_len = dev_lim->max_pkeys; mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay; /* * Need to allow for worst case send WQE overhead and check * whether max_desc_sz imposes a lower limit than max_sg; UD * send has the biggest overhead. */ mdev->limits.max_sg = min_t(int, dev_lim->max_sg, (dev_lim->max_desc_sz - sizeof (struct mthca_next_seg) - (mthca_is_memfree(mdev) ? sizeof (struct mthca_arbel_ud_seg) : sizeof (struct mthca_tavor_ud_seg))) / sizeof (struct mthca_data_seg)); mdev->limits.max_wqes = dev_lim->max_qp_sz; mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp; mdev->limits.reserved_qps = dev_lim->reserved_qps; mdev->limits.max_srq_wqes = dev_lim->max_srq_sz; mdev->limits.reserved_srqs = dev_lim->reserved_srqs; mdev->limits.reserved_eecs = dev_lim->reserved_eecs; mdev->limits.max_desc_sz = dev_lim->max_desc_sz; mdev->limits.max_srq_sge = mthca_max_srq_sge(mdev); /* * Subtract 1 from the limit because we need to allocate a * spare CQE so the HCA HW can tell the difference between an * empty CQ and a full CQ. */ mdev->limits.max_cqes = dev_lim->max_cq_sz - 1; mdev->limits.reserved_cqs = dev_lim->reserved_cqs; mdev->limits.reserved_eqs = dev_lim->reserved_eqs; mdev->limits.reserved_mtts = dev_lim->reserved_mtts; mdev->limits.reserved_mrws = dev_lim->reserved_mrws; mdev->limits.reserved_uars = dev_lim->reserved_uars; mdev->limits.reserved_pds = dev_lim->reserved_pds; mdev->limits.port_width_cap = dev_lim->max_port_width; mdev->limits.page_size_cap = ~(u32) (dev_lim->min_page_sz - 1); mdev->limits.flags = dev_lim->flags; /* * For old FW that doesn't return static rate support, use a * value of 0x3 (only static rate values of 0 or 1 are handled), * except on Sinai, where even old FW can handle static rate * values of 2 and 3. */ if (dev_lim->stat_rate_support) mdev->limits.stat_rate_support = dev_lim->stat_rate_support; else if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT) mdev->limits.stat_rate_support = 0xf; else mdev->limits.stat_rate_support = 0x3; /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. May be doable since hardware supports it for SRQ. IB_DEVICE_N_NOTIFY_CQ is supported by hardware but not by driver. IB_DEVICE_SRQ_RESIZE is supported by hardware but SRQ is not supported by driver. */ mdev->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN; if (dev_lim->flags & DEV_LIM_FLAG_BAD_PKEY_CNTR) mdev->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; if (dev_lim->flags & DEV_LIM_FLAG_BAD_QKEY_CNTR) mdev->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; if (dev_lim->flags & DEV_LIM_FLAG_RAW_MULTI) mdev->device_cap_flags |= IB_DEVICE_RAW_MULTI; if (dev_lim->flags & DEV_LIM_FLAG_AUTO_PATH_MIG) mdev->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; if (dev_lim->flags & DEV_LIM_FLAG_UD_AV_PORT_ENFORCE) mdev->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; if (dev_lim->flags & DEV_LIM_FLAG_SRQ) mdev->mthca_flags |= MTHCA_FLAG_SRQ; if (mthca_is_memfree(mdev)) if (dev_lim->flags & DEV_LIM_FLAG_IPOIB_CSUM) mdev->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; return 0; } static int mthca_init_tavor(struct mthca_dev *mdev) { s64 size; u8 status; int err; struct mthca_dev_lim dev_lim; struct mthca_profile profile; struct mthca_init_hca_param init_hca; err = mthca_SYS_EN(mdev, &status); if (err) { mthca_err(mdev, "SYS_EN command failed, aborting.\n"); return err; } if (status) { mthca_err(mdev, "SYS_EN returned status 0x%02x, " "aborting.\n", status); return -EINVAL; } err = mthca_QUERY_FW(mdev, &status); if (err) { mthca_err(mdev, "QUERY_FW command failed, aborting.\n"); goto err_disable; } if (status) { mthca_err(mdev, "QUERY_FW returned status 0x%02x, " "aborting.\n", status); err = -EINVAL; goto err_disable; } err = mthca_QUERY_DDR(mdev, &status); if (err) { mthca_err(mdev, "QUERY_DDR command failed, aborting.\n"); goto err_disable; } if (status) { mthca_err(mdev, "QUERY_DDR returned status 0x%02x, " "aborting.\n", status); err = -EINVAL; goto err_disable; } err = mthca_dev_lim(mdev, &dev_lim); if (err) { mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n"); goto err_disable; } profile = hca_profile; profile.num_uar = dev_lim.uar_size / PAGE_SIZE; profile.uarc_size = 0; if (mdev->mthca_flags & MTHCA_FLAG_SRQ) profile.num_srq = dev_lim.max_srqs; size = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca); if (size < 0) { err = size; goto err_disable; } err = mthca_INIT_HCA(mdev, &init_hca, &status); if (err) { mthca_err(mdev, "INIT_HCA command failed, aborting.\n"); goto err_disable; } if (status) { mthca_err(mdev, "INIT_HCA returned status 0x%02x, " "aborting.\n", status); err = -EINVAL; goto err_disable; } return 0; err_disable: mthca_SYS_DIS(mdev, &status); return err; } static int mthca_load_fw(struct mthca_dev *mdev) { u8 status; int err; /* FIXME: use HCA-attached memory for FW if present */ mdev->fw.arbel.fw_icm = mthca_alloc_icm(mdev, mdev->fw.arbel.fw_pages, GFP_HIGHUSER | __GFP_NOWARN, 0); if (!mdev->fw.arbel.fw_icm) { mthca_err(mdev, "Couldn't allocate FW area, aborting.\n"); return -ENOMEM; } err = mthca_MAP_FA(mdev, mdev->fw.arbel.fw_icm, &status); if (err) { mthca_err(mdev, "MAP_FA command failed, aborting.\n"); goto err_free; } if (status) { mthca_err(mdev, "MAP_FA returned status 0x%02x, aborting.\n", status); err = -EINVAL; goto err_free; } err = mthca_RUN_FW(mdev, &status); if (err) { mthca_err(mdev, "RUN_FW command failed, aborting.\n"); goto err_unmap_fa; } if (status) { mthca_err(mdev, "RUN_FW returned status 0x%02x, aborting.\n", status); err = -EINVAL; goto err_unmap_fa; } return 0; err_unmap_fa: mthca_UNMAP_FA(mdev, &status); err_free: mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0); return err; } static int mthca_init_icm(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim, struct mthca_init_hca_param *init_hca, u64 icm_size) { u64 aux_pages; u8 status; int err; err = mthca_SET_ICM_SIZE(mdev, icm_size, &aux_pages, &status); if (err) { mthca_err(mdev, "SET_ICM_SIZE command failed, aborting.\n"); return err; } if (status) { mthca_err(mdev, "SET_ICM_SIZE returned status 0x%02x, " "aborting.\n", status); return -EINVAL; } mthca_dbg(mdev, "%lld KB of HCA context requires %lld KB aux memory.\n", (unsigned long long) icm_size >> 10, (unsigned long long) aux_pages << 2); mdev->fw.arbel.aux_icm = mthca_alloc_icm(mdev, aux_pages, GFP_HIGHUSER | __GFP_NOWARN, 0); if (!mdev->fw.arbel.aux_icm) { mthca_err(mdev, "Couldn't allocate aux memory, aborting.\n"); return -ENOMEM; } err = mthca_MAP_ICM_AUX(mdev, mdev->fw.arbel.aux_icm, &status); if (err) { mthca_err(mdev, "MAP_ICM_AUX command failed, aborting.\n"); goto err_free_aux; } if (status) { mthca_err(mdev, "MAP_ICM_AUX returned status 0x%02x, aborting.\n", status); err = -EINVAL; goto err_free_aux; } err = mthca_map_eq_icm(mdev, init_hca->eqc_base); if (err) { mthca_err(mdev, "Failed to map EQ context memory, aborting.\n"); goto err_unmap_aux; } /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * mdev->limits.mtt_seg_size, dma_get_cache_alignment()) / mdev->limits.mtt_seg_size; mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base, mdev->limits.mtt_seg_size, mdev->limits.num_mtt_segs, mdev->limits.reserved_mtts, 1, 0); if (!mdev->mr_table.mtt_table) { mthca_err(mdev, "Failed to map MTT context memory, aborting.\n"); err = -ENOMEM; goto err_unmap_eq; } mdev->mr_table.mpt_table = mthca_alloc_icm_table(mdev, init_hca->mpt_base, dev_lim->mpt_entry_sz, mdev->limits.num_mpts, mdev->limits.reserved_mrws, 1, 1); if (!mdev->mr_table.mpt_table) { mthca_err(mdev, "Failed to map MPT context memory, aborting.\n"); err = -ENOMEM; goto err_unmap_mtt; } mdev->qp_table.qp_table = mthca_alloc_icm_table(mdev, init_hca->qpc_base, dev_lim->qpc_entry_sz, mdev->limits.num_qps, mdev->limits.reserved_qps, 0, 0); if (!mdev->qp_table.qp_table) { mthca_err(mdev, "Failed to map QP context memory, aborting.\n"); err = -ENOMEM; goto err_unmap_mpt; } mdev->qp_table.eqp_table = mthca_alloc_icm_table(mdev, init_hca->eqpc_base, dev_lim->eqpc_entry_sz, mdev->limits.num_qps, mdev->limits.reserved_qps, 0, 0); if (!mdev->qp_table.eqp_table) { mthca_err(mdev, "Failed to map EQP context memory, aborting.\n"); err = -ENOMEM; goto err_unmap_qp; } mdev->qp_table.rdb_table = mthca_alloc_icm_table(mdev, init_hca->rdb_base, MTHCA_RDB_ENTRY_SIZE, mdev->limits.num_qps << mdev->qp_table.rdb_shift, 0, 0, 0); if (!mdev->qp_table.rdb_table) { mthca_err(mdev, "Failed to map RDB context memory, aborting\n"); err = -ENOMEM; goto err_unmap_eqp; } mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base, dev_lim->cqc_entry_sz, mdev->limits.num_cqs, mdev->limits.reserved_cqs, 0, 0); if (!mdev->cq_table.table) { mthca_err(mdev, "Failed to map CQ context memory, aborting.\n"); err = -ENOMEM; goto err_unmap_rdb; } if (mdev->mthca_flags & MTHCA_FLAG_SRQ) { mdev->srq_table.table = mthca_alloc_icm_table(mdev, init_hca->srqc_base, dev_lim->srq_entry_sz, mdev->limits.num_srqs, mdev->limits.reserved_srqs, 0, 0); if (!mdev->srq_table.table) { mthca_err(mdev, "Failed to map SRQ context memory, " "aborting.\n"); err = -ENOMEM; goto err_unmap_cq; } } /* * It's not strictly required, but for simplicity just map the * whole multicast group table now. The table isn't very big * and it's a lot easier than trying to track ref counts. */ mdev->mcg_table.table = mthca_alloc_icm_table(mdev, init_hca->mc_base, MTHCA_MGM_ENTRY_SIZE, mdev->limits.num_mgms + mdev->limits.num_amgms, mdev->limits.num_mgms + mdev->limits.num_amgms, 0, 0); if (!mdev->mcg_table.table) { mthca_err(mdev, "Failed to map MCG context memory, aborting.\n"); err = -ENOMEM; goto err_unmap_srq; } return 0; err_unmap_srq: if (mdev->mthca_flags & MTHCA_FLAG_SRQ) mthca_free_icm_table(mdev, mdev->srq_table.table); err_unmap_cq: mthca_free_icm_table(mdev, mdev->cq_table.table); err_unmap_rdb: mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); err_unmap_eqp: mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); err_unmap_qp: mthca_free_icm_table(mdev, mdev->qp_table.qp_table); err_unmap_mpt: mthca_free_icm_table(mdev, mdev->mr_table.mpt_table); err_unmap_mtt: mthca_free_icm_table(mdev, mdev->mr_table.mtt_table); err_unmap_eq: mthca_unmap_eq_icm(mdev); err_unmap_aux: mthca_UNMAP_ICM_AUX(mdev, &status); err_free_aux: mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0); return err; } static void mthca_free_icms(struct mthca_dev *mdev) { u8 status; mthca_free_icm_table(mdev, mdev->mcg_table.table); if (mdev->mthca_flags & MTHCA_FLAG_SRQ) mthca_free_icm_table(mdev, mdev->srq_table.table); mthca_free_icm_table(mdev, mdev->cq_table.table); mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); mthca_free_icm_table(mdev, mdev->qp_table.qp_table); mthca_free_icm_table(mdev, mdev->mr_table.mpt_table); mthca_free_icm_table(mdev, mdev->mr_table.mtt_table); mthca_unmap_eq_icm(mdev); mthca_UNMAP_ICM_AUX(mdev, &status); mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0); } static int mthca_init_arbel(struct mthca_dev *mdev) { struct mthca_dev_lim dev_lim; struct mthca_profile profile; struct mthca_init_hca_param init_hca; s64 icm_size; u8 status; int err; err = mthca_QUERY_FW(mdev, &status); if (err) { mthca_err(mdev, "QUERY_FW command failed, aborting.\n"); return err; } if (status) { mthca_err(mdev, "QUERY_FW returned status 0x%02x, " "aborting.\n", status); return -EINVAL; } err = mthca_ENABLE_LAM(mdev, &status); if (err) { mthca_err(mdev, "ENABLE_LAM command failed, aborting.\n"); return err; } if (status == MTHCA_CMD_STAT_LAM_NOT_PRE) { mthca_dbg(mdev, "No HCA-attached memory (running in MemFree mode)\n"); mdev->mthca_flags |= MTHCA_FLAG_NO_LAM; } else if (status) { mthca_err(mdev, "ENABLE_LAM returned status 0x%02x, " "aborting.\n", status); return -EINVAL; } err = mthca_load_fw(mdev); if (err) { mthca_err(mdev, "Failed to start FW, aborting.\n"); goto err_disable; } err = mthca_dev_lim(mdev, &dev_lim); if (err) { mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n"); goto err_stop_fw; } profile = hca_profile; profile.num_uar = dev_lim.uar_size / PAGE_SIZE; profile.num_udav = 0; if (mdev->mthca_flags & MTHCA_FLAG_SRQ) profile.num_srq = dev_lim.max_srqs; icm_size = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca); if (icm_size < 0) { err = icm_size; goto err_stop_fw; } err = mthca_init_icm(mdev, &dev_lim, &init_hca, icm_size); if (err) goto err_stop_fw; err = mthca_INIT_HCA(mdev, &init_hca, &status); if (err) { mthca_err(mdev, "INIT_HCA command failed, aborting.\n"); goto err_free_icm; } if (status) { mthca_err(mdev, "INIT_HCA returned status 0x%02x, " "aborting.\n", status); err = -EINVAL; goto err_free_icm; } return 0; err_free_icm: mthca_free_icms(mdev); err_stop_fw: mthca_UNMAP_FA(mdev, &status); mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0); err_disable: if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) mthca_DISABLE_LAM(mdev, &status); return err; } static void mthca_close_hca(struct mthca_dev *mdev) { u8 status; mthca_CLOSE_HCA(mdev, 0, &status); if (mthca_is_memfree(mdev)) { mthca_free_icms(mdev); mthca_UNMAP_FA(mdev, &status); mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0); if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) mthca_DISABLE_LAM(mdev, &status); } else mthca_SYS_DIS(mdev, &status); } static int mthca_init_hca(struct mthca_dev *mdev) { u8 status; int err; struct mthca_adapter adapter; if (mthca_is_memfree(mdev)) err = mthca_init_arbel(mdev); else err = mthca_init_tavor(mdev); if (err) return err; err = mthca_QUERY_ADAPTER(mdev, &adapter, &status); if (err) { mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n"); goto err_close; } if (status) { mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, " "aborting.\n", status); err = -EINVAL; goto err_close; } mdev->eq_table.inta_pin = adapter.inta_pin; if (!mthca_is_memfree(mdev)) mdev->rev_id = adapter.revision_id; memcpy(mdev->board_id, adapter.board_id, sizeof mdev->board_id); return 0; err_close: mthca_close_hca(mdev); return err; } static int mthca_setup_hca(struct mthca_dev *dev) { int err; u8 status; MTHCA_INIT_DOORBELL_LOCK(&dev->doorbell_lock); err = mthca_init_uar_table(dev); if (err) { mthca_err(dev, "Failed to initialize " "user access region table, aborting.\n"); return err; } err = mthca_uar_alloc(dev, &dev->driver_uar); if (err) { mthca_err(dev, "Failed to allocate driver access region, " "aborting.\n"); goto err_uar_table_free; } dev->kar = ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); if (!dev->kar) { mthca_err(dev, "Couldn't map kernel access region, " "aborting.\n"); err = -ENOMEM; goto err_uar_free; } err = mthca_init_pd_table(dev); if (err) { mthca_err(dev, "Failed to initialize " "protection domain table, aborting.\n"); goto err_kar_unmap; } err = mthca_init_mr_table(dev); if (err) { mthca_err(dev, "Failed to initialize " "memory region table, aborting.\n"); goto err_pd_table_free; } err = mthca_pd_alloc(dev, 1, &dev->driver_pd); if (err) { mthca_err(dev, "Failed to create driver PD, " "aborting.\n"); goto err_mr_table_free; } err = mthca_init_eq_table(dev); if (err) { mthca_err(dev, "Failed to initialize " "event queue table, aborting.\n"); goto err_pd_free; } err = mthca_cmd_use_events(dev); if (err) { mthca_err(dev, "Failed to switch to event-driven " "firmware commands, aborting.\n"); goto err_eq_table_free; } err = mthca_NOP(dev, &status); if (err || status) { if (dev->mthca_flags & MTHCA_FLAG_MSI_X) { mthca_warn(dev, "NOP command failed to generate interrupt " "(IRQ %d).\n", dev->eq_table.eq[MTHCA_EQ_CMD].msi_x_vector); mthca_warn(dev, "Trying again with MSI-X disabled.\n"); } else { mthca_err(dev, "NOP command failed to generate interrupt " "(IRQ %d), aborting.\n", dev->pdev->irq); mthca_err(dev, "BIOS or ACPI interrupt routing problem?\n"); } goto err_cmd_poll; } mthca_dbg(dev, "NOP command IRQ test passed\n"); err = mthca_init_cq_table(dev); if (err) { mthca_err(dev, "Failed to initialize " "completion queue table, aborting.\n"); goto err_cmd_poll; } err = mthca_init_srq_table(dev); if (err) { mthca_err(dev, "Failed to initialize " "shared receive queue table, aborting.\n"); goto err_cq_table_free; } err = mthca_init_qp_table(dev); if (err) { mthca_err(dev, "Failed to initialize " "queue pair table, aborting.\n"); goto err_srq_table_free; } err = mthca_init_av_table(dev); if (err) { mthca_err(dev, "Failed to initialize " "address vector table, aborting.\n"); goto err_qp_table_free; } err = mthca_init_mcg_table(dev); if (err) { mthca_err(dev, "Failed to initialize " "multicast group table, aborting.\n"); goto err_av_table_free; } return 0; err_av_table_free: mthca_cleanup_av_table(dev); err_qp_table_free: mthca_cleanup_qp_table(dev); err_srq_table_free: mthca_cleanup_srq_table(dev); err_cq_table_free: mthca_cleanup_cq_table(dev); err_cmd_poll: mthca_cmd_use_polling(dev); err_eq_table_free: mthca_cleanup_eq_table(dev); err_pd_free: mthca_pd_free(dev, &dev->driver_pd); err_mr_table_free: mthca_cleanup_mr_table(dev); err_pd_table_free: mthca_cleanup_pd_table(dev); err_kar_unmap: iounmap(dev->kar); err_uar_free: mthca_uar_free(dev, &dev->driver_uar); err_uar_table_free: mthca_cleanup_uar_table(dev); return err; } static int mthca_enable_msi_x(struct mthca_dev *mdev) { struct msix_entry entries[3]; int err; entries[0].entry = 0; entries[1].entry = 1; entries[2].entry = 2; err = pci_enable_msix(mdev->pdev, entries, ARRAY_SIZE(entries)); if (err) { if (err > 0) mthca_info(mdev, "Only %d MSI-X vectors available, " "not using MSI-X\n", err); return err; } mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector = entries[0].vector; mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector = entries[1].vector; mdev->eq_table.eq[MTHCA_EQ_CMD ].msi_x_vector = entries[2].vector; return 0; } /* Types of supported HCA */ enum { TAVOR, /* MT23108 */ ARBEL_COMPAT, /* MT25208 in Tavor compat mode */ ARBEL_NATIVE, /* MT25208 with extended features */ SINAI /* MT25204 */ }; #define MTHCA_FW_VER(major, minor, subminor) \ (((u64) (major) << 32) | ((u64) (minor) << 16) | (u64) (subminor)) static struct { u64 latest_fw; u32 flags; } mthca_hca_table[] = { [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 5, 0), .flags = 0 }, [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 8, 200), .flags = MTHCA_FLAG_PCIE }, [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 3, 0), .flags = MTHCA_FLAG_MEMFREE | MTHCA_FLAG_PCIE }, [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 2, 0), .flags = MTHCA_FLAG_MEMFREE | MTHCA_FLAG_PCIE | MTHCA_FLAG_SINAI_OPT } }; static int __mthca_init_one(struct pci_dev *pdev, int hca_type) { int ddr_hidden = 0; int err; struct mthca_dev *mdev; printk(KERN_INFO PFX "Initializing %s\n", pci_name(pdev)); err = pci_enable_device(pdev); if (err) { dev_err(&pdev->dev, "Cannot enable PCI device, " "aborting.\n"); return err; } /* * Check for BARs. We expect 0: 1MB, 2: 8MB, 4: DDR (may not * be present) */ if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || pci_resource_len(pdev, 0) != 1 << 20) { dev_err(&pdev->dev, "Missing DCS, aborting.\n"); err = -ENODEV; goto err_disable_pdev; } if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) { dev_err(&pdev->dev, "Missing UAR, aborting.\n"); err = -ENODEV; goto err_disable_pdev; } if (!(pci_resource_flags(pdev, 4) & IORESOURCE_MEM)) ddr_hidden = 1; err = pci_request_regions(pdev, DRV_NAME); if (err) { dev_err(&pdev->dev, "Cannot obtain PCI resources, " "aborting.\n"); goto err_disable_pdev; } pci_set_master(pdev); err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) { dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n"); err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); goto err_free_res; } } err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) { dev_warn(&pdev->dev, "Warning: couldn't set 64-bit " "consistent PCI DMA mask.\n"); err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, " "aborting.\n"); goto err_free_res; } } mdev = (struct mthca_dev *) ib_alloc_device(sizeof *mdev); if (!mdev) { dev_err(&pdev->dev, "Device struct alloc failed, " "aborting.\n"); err = -ENOMEM; goto err_free_res; } mdev->pdev = pdev; mdev->mthca_flags = mthca_hca_table[hca_type].flags; if (ddr_hidden) mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN; /* * Now reset the HCA before we touch the PCI capabilities or * attempt a firmware command, since a boot ROM may have left * the HCA in an undefined state. */ err = mthca_reset(mdev); if (err) { mthca_err(mdev, "Failed to reset HCA, aborting.\n"); goto err_free_dev; } if (mthca_cmd_init(mdev)) { mthca_err(mdev, "Failed to init command interface, aborting.\n"); goto err_free_dev; } err = mthca_tune_pci(mdev); if (err) goto err_cmd; err = mthca_init_hca(mdev); if (err) goto err_cmd; if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) { mthca_warn(mdev, "HCA FW version %d.%d.%03d is old (%d.%d.%03d is current).\n", (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, (int) (mdev->fw_ver & 0xffff), (int) (mthca_hca_table[hca_type].latest_fw >> 32), (int) (mthca_hca_table[hca_type].latest_fw >> 16) & 0xffff, (int) (mthca_hca_table[hca_type].latest_fw & 0xffff)); mthca_warn(mdev, "If you have problems, try updating your HCA FW.\n"); } if (msi_x && !mthca_enable_msi_x(mdev)) mdev->mthca_flags |= MTHCA_FLAG_MSI_X; err = mthca_setup_hca(mdev); if (err == -EBUSY && (mdev->mthca_flags & MTHCA_FLAG_MSI_X)) { if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) pci_disable_msix(pdev); mdev->mthca_flags &= ~MTHCA_FLAG_MSI_X; err = mthca_setup_hca(mdev); } if (err) goto err_close; err = mthca_register_device(mdev); if (err) goto err_cleanup; err = mthca_create_agents(mdev); if (err) goto err_unregister; pci_set_drvdata(pdev, mdev); mdev->hca_type = hca_type; mdev->active = 1; return 0; err_unregister: mthca_unregister_device(mdev); err_cleanup: mthca_cleanup_mcg_table(mdev); mthca_cleanup_av_table(mdev); mthca_cleanup_qp_table(mdev); mthca_cleanup_srq_table(mdev); mthca_cleanup_cq_table(mdev); mthca_cmd_use_polling(mdev); mthca_cleanup_eq_table(mdev); mthca_pd_free(mdev, &mdev->driver_pd); mthca_cleanup_mr_table(mdev); mthca_cleanup_pd_table(mdev); mthca_cleanup_uar_table(mdev); err_close: if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) pci_disable_msix(pdev); mthca_close_hca(mdev); err_cmd: mthca_cmd_cleanup(mdev); err_free_dev: ib_dealloc_device(&mdev->ib_dev); err_free_res: pci_release_regions(pdev); err_disable_pdev: pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); return err; } static void __mthca_remove_one(struct pci_dev *pdev) { struct mthca_dev *mdev = pci_get_drvdata(pdev); u8 status; int p; if (mdev) { mthca_free_agents(mdev); mthca_unregister_device(mdev); for (p = 1; p <= mdev->limits.num_ports; ++p) mthca_CLOSE_IB(mdev, p, &status); mthca_cleanup_mcg_table(mdev); mthca_cleanup_av_table(mdev); mthca_cleanup_qp_table(mdev); mthca_cleanup_srq_table(mdev); mthca_cleanup_cq_table(mdev); mthca_cmd_use_polling(mdev); mthca_cleanup_eq_table(mdev); mthca_pd_free(mdev, &mdev->driver_pd); mthca_cleanup_mr_table(mdev); mthca_cleanup_pd_table(mdev); iounmap(mdev->kar); mthca_uar_free(mdev, &mdev->driver_uar); mthca_cleanup_uar_table(mdev); mthca_close_hca(mdev); mthca_cmd_cleanup(mdev); if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) pci_disable_msix(pdev); ib_dealloc_device(&mdev->ib_dev); pci_release_regions(pdev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } } int __mthca_restart_one(struct pci_dev *pdev) { struct mthca_dev *mdev; int hca_type; mdev = pci_get_drvdata(pdev); if (!mdev) return -ENODEV; hca_type = mdev->hca_type; __mthca_remove_one(pdev); return __mthca_init_one(pdev, hca_type); } static int __devinit mthca_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { static int mthca_version_printed = 0; int ret; mutex_lock(&mthca_device_mutex); if (!mthca_version_printed) { printk(KERN_INFO "%s", mthca_version); ++mthca_version_printed; } if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) { printk(KERN_ERR PFX "%s has invalid driver data %jx\n", pci_name(pdev), (uintmax_t)id->driver_data); mutex_unlock(&mthca_device_mutex); return -ENODEV; } ret = __mthca_init_one(pdev, id->driver_data); mutex_unlock(&mthca_device_mutex); return ret; } static void __devexit mthca_remove_one(struct pci_dev *pdev) { mutex_lock(&mthca_device_mutex); __mthca_remove_one(pdev); mutex_unlock(&mthca_device_mutex); } static struct pci_device_id mthca_pci_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR), .driver_data = TAVOR }, { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_TAVOR), .driver_data = TAVOR }, { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT), .driver_data = ARBEL_COMPAT }, { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT), .driver_data = ARBEL_COMPAT }, { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL), .driver_data = ARBEL_NATIVE }, { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_ARBEL), .driver_data = ARBEL_NATIVE }, { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_SINAI), .driver_data = SINAI }, { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_SINAI), .driver_data = SINAI }, { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_SINAI_OLD), .driver_data = SINAI }, { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_SINAI_OLD), .driver_data = SINAI }, { 0, } }; MODULE_DEVICE_TABLE(pci, mthca_pci_table); static struct pci_driver mthca_driver = { .name = DRV_NAME, .id_table = mthca_pci_table, .probe = mthca_init_one, .remove = __devexit_p(mthca_remove_one) }; static void __init __mthca_check_profile_val(const char *name, int *pval, int pval_default) { /* value must be positive and power of 2 */ int old_pval = *pval; if (old_pval <= 0) *pval = pval_default; else *pval = roundup_pow_of_two(old_pval); if (old_pval != *pval) { printk(KERN_WARNING PFX "Invalid value %d for %s in module parameter.\n", old_pval, name); printk(KERN_WARNING PFX "Corrected %s to %d.\n", name, *pval); } } #define mthca_check_profile_val(name, default) \ __mthca_check_profile_val(#name, &hca_profile.name, default) static void __init mthca_validate_profile(void) { mthca_check_profile_val(num_qp, MTHCA_DEFAULT_NUM_QP); mthca_check_profile_val(rdb_per_qp, MTHCA_DEFAULT_RDB_PER_QP); mthca_check_profile_val(num_cq, MTHCA_DEFAULT_NUM_CQ); mthca_check_profile_val(num_mcg, MTHCA_DEFAULT_NUM_MCG); mthca_check_profile_val(num_mpt, MTHCA_DEFAULT_NUM_MPT); mthca_check_profile_val(num_mtt, MTHCA_DEFAULT_NUM_MTT); mthca_check_profile_val(num_udav, MTHCA_DEFAULT_NUM_UDAV); mthca_check_profile_val(fmr_reserved_mtts, MTHCA_DEFAULT_NUM_RESERVED_MTTS); if (hca_profile.fmr_reserved_mtts >= hca_profile.num_mtt) { printk(KERN_WARNING PFX "Invalid fmr_reserved_mtts module parameter %d.\n", hca_profile.fmr_reserved_mtts); printk(KERN_WARNING PFX "(Must be smaller than num_mtt %d)\n", hca_profile.num_mtt); hca_profile.fmr_reserved_mtts = hca_profile.num_mtt / 2; printk(KERN_WARNING PFX "Corrected fmr_reserved_mtts to %d.\n", hca_profile.fmr_reserved_mtts); } if (log_mtts_per_seg == 0) log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8); if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 5)) { printk(KERN_WARNING PFX "bad log_mtts_per_seg (%d). Using default - %d\n", log_mtts_per_seg, ilog2(MTHCA_MTT_SEG_SIZE / 8)); log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8); } } static int __init mthca_init(void) { int ret; mthca_validate_profile(); ret = mthca_catas_init(); if (ret) return ret; ret = pci_register_driver(&mthca_driver); if (ret < 0) { mthca_catas_cleanup(); return ret; } return 0; } static void __exit mthca_cleanup(void) { pci_unregister_driver(&mthca_driver); mthca_catas_cleanup(); } module_init_order(mthca_init, SI_ORDER_MIDDLE); module_exit(mthca_cleanup); Index: stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c (revision 271127) @@ -1,1427 +1,1428 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include +#include #include "mthca_dev.h" #include "mthca_cmd.h" #include "mthca_user.h" #include "mthca_memfree.h" static void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; mad->class_version = 1; mad->method = IB_MGMT_METHOD_GET; } static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; struct mthca_dev *mdev = to_mdev(ibdev); u8 status; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; memset(props, 0, sizeof *props); props->fw_ver = mdev->fw_ver; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; err = mthca_MAD_IFC(mdev, 1, 1, 1, NULL, NULL, in_mad, out_mad, &status); if (err) goto out; if (status) { err = -EINVAL; goto out; } props->device_cap_flags = mdev->device_cap_flags; props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30)); props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&props->sys_image_guid, out_mad->data + 4, 8); props->max_mr_size = ~0ull; props->page_size_cap = mdev->limits.page_size_cap; props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps; props->max_qp_wr = mdev->limits.max_wqes; props->max_sge = mdev->limits.max_sg; props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs; props->max_cqe = mdev->limits.max_cqes; props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws; props->max_pd = mdev->limits.num_pds - mdev->limits.reserved_pds; props->max_qp_rd_atom = 1 << mdev->qp_table.rdb_shift; props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma; props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs; props->max_srq_wr = mdev->limits.max_srq_wqes; props->max_srq_sge = mdev->limits.max_srq_sge; props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay; props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; props->max_pkeys = mdev->limits.pkey_table_len; props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms; props->max_mcast_qp_attach = MTHCA_QP_PER_MGM; props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; /* * If Sinai memory key optimization is being used, then only * the 8-bit key portion will change. For other HCAs, the * unused index bits will also be used for FMR remapping. */ if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT) props->max_map_per_fmr = 255; else props->max_map_per_fmr = (1 << (32 - ilog2(mdev->limits.num_mpts))) - 1; err = 0; out: kfree(in_mad); kfree(out_mad); return err; } static int mthca_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; u8 status; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; memset(props, 0, sizeof *props); init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad, &status); if (err) goto out; if (status) { err = -EINVAL; goto out; } props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); props->lmc = out_mad->data[34] & 0x7; props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); props->sm_sl = out_mad->data[36] & 0xf; props->state = out_mad->data[32] & 0xf; props->phys_state = out_mad->data[33] >> 4; props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len; props->max_msg_sz = 0x80000000; props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len; props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; props->active_speed = out_mad->data[35] >> 4; props->max_mtu = out_mad->data[41] & 0xf; props->active_mtu = out_mad->data[36] >> 4; props->subnet_timeout = out_mad->data[51] & 0x1f; props->max_vl_num = out_mad->data[37] >> 4; props->init_type_reply = out_mad->data[41] >> 4; out: kfree(in_mad); kfree(out_mad); return err; } static int mthca_modify_device(struct ib_device *ibdev, int mask, struct ib_device_modify *props) { if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) return -EOPNOTSUPP; if (mask & IB_DEVICE_MODIFY_NODE_DESC) { if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex)) return -ERESTARTSYS; memcpy(ibdev->node_desc, props->node_desc, 64); mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); } return 0; } static int mthca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, struct ib_port_modify *props) { struct mthca_set_ib_param set_ib; struct ib_port_attr attr; int err; u8 status; if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex)) return -ERESTARTSYS; err = mthca_query_port(ibdev, port, &attr); if (err) goto out; set_ib.set_si_guid = 0; set_ib.reset_qkey_viol = !!(port_modify_mask & IB_PORT_RESET_QKEY_CNTR); set_ib.cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) & ~props->clr_port_cap_mask; err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port, &status); if (err) goto out; if (status) { err = -EINVAL; goto out; } out: mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); return err; } static int mthca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; u8 status; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; in_mad->attr_mod = cpu_to_be32(index / 32); err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad, &status); if (err) goto out; if (status) { err = -EINVAL; goto out; } *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]); out: kfree(in_mad); kfree(out_mad); return err; } static int mthca_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; u8 status; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad, &status); if (err) goto out; if (status) { err = -EINVAL; goto out; } memcpy(gid->raw, out_mad->data + 8, 8); init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; in_mad->attr_mod = cpu_to_be32(index / 8); err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad, &status); if (err) goto out; if (status) { err = -EINVAL; goto out; } memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); out: kfree(in_mad); kfree(out_mad); return err; } static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { struct mthca_alloc_ucontext_resp uresp; struct mthca_ucontext *context; int err; if (!(to_mdev(ibdev)->active)) return ERR_PTR(-EAGAIN); memset(&uresp, 0, sizeof uresp); uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps; if (mthca_is_memfree(to_mdev(ibdev))) uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size; else uresp.uarc_size = 0; context = kmalloc(sizeof *context, GFP_KERNEL); if (!context) return ERR_PTR(-ENOMEM); err = mthca_uar_alloc(to_mdev(ibdev), &context->uar); if (err) { kfree(context); return ERR_PTR(err); } context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev)); if (IS_ERR(context->db_tab)) { err = PTR_ERR(context->db_tab); mthca_uar_free(to_mdev(ibdev), &context->uar); kfree(context); return ERR_PTR(err); } if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) { mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab); mthca_uar_free(to_mdev(ibdev), &context->uar); kfree(context); return ERR_PTR(-EFAULT); } context->reg_mr_warned = 0; return &context->ibucontext; } static int mthca_dealloc_ucontext(struct ib_ucontext *context) { mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar, to_mucontext(context)->db_tab); mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar); kfree(to_mucontext(context)); return 0; } static int mthca_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma) { if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); if (io_remap_pfn_range(vma, vma->vm_start, to_mucontext(context)->uar.pfn, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; return 0; } static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) { struct mthca_pd *pd; int err; pd = kmalloc(sizeof *pd, GFP_KERNEL); if (!pd) return ERR_PTR(-ENOMEM); err = mthca_pd_alloc(to_mdev(ibdev), !context, pd); if (err) { kfree(pd); return ERR_PTR(err); } if (context) { if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) { mthca_pd_free(to_mdev(ibdev), pd); kfree(pd); return ERR_PTR(-EFAULT); } } return &pd->ibpd; } static int mthca_dealloc_pd(struct ib_pd *pd) { mthca_pd_free(to_mdev(pd->device), to_mpd(pd)); kfree(pd); return 0; } static struct ib_ah *mthca_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { int err; struct mthca_ah *ah; ah = kmalloc(sizeof *ah, GFP_ATOMIC); if (!ah) return ERR_PTR(-ENOMEM); err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah); if (err) { kfree(ah); return ERR_PTR(err); } return &ah->ibah; } static int mthca_ah_destroy(struct ib_ah *ah) { mthca_destroy_ah(to_mdev(ah->device), to_mah(ah)); kfree(ah); return 0; } static struct ib_srq *mthca_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, struct ib_udata *udata) { struct mthca_create_srq ucmd; struct mthca_ucontext *context = NULL; struct mthca_srq *srq; int err; srq = kmalloc(sizeof *srq, GFP_KERNEL); if (!srq) return ERR_PTR(-ENOMEM); if (pd->uobject) { context = to_mucontext(pd->uobject->context); if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { err = -EFAULT; goto err_free; } err = mthca_map_user_db(to_mdev(pd->device), &context->uar, context->db_tab, ucmd.db_index, ucmd.db_page); if (err) goto err_free; srq->mr.ibmr.lkey = ucmd.lkey; srq->db_index = ucmd.db_index; } err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd), &init_attr->attr, srq); if (err && pd->uobject) mthca_unmap_user_db(to_mdev(pd->device), &context->uar, context->db_tab, ucmd.db_index); if (err) goto err_free; if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (__u32))) { mthca_free_srq(to_mdev(pd->device), srq); err = -EFAULT; goto err_free; } return &srq->ibsrq; err_free: kfree(srq); return ERR_PTR(err); } static int mthca_destroy_srq(struct ib_srq *srq) { struct mthca_ucontext *context; if (srq->uobject) { context = to_mucontext(srq->uobject->context); mthca_unmap_user_db(to_mdev(srq->device), &context->uar, context->db_tab, to_msrq(srq)->db_index); } mthca_free_srq(to_mdev(srq->device), to_msrq(srq)); kfree(srq); return 0; } static struct ib_qp *mthca_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { struct mthca_create_qp ucmd; struct mthca_qp *qp; int err; if (init_attr->create_flags) return ERR_PTR(-EINVAL); switch (init_attr->qp_type) { case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_UD: { struct mthca_ucontext *context; qp = kmalloc(sizeof *qp, GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); if (pd->uobject) { context = to_mucontext(pd->uobject->context); if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { kfree(qp); return ERR_PTR(-EFAULT); } err = mthca_map_user_db(to_mdev(pd->device), &context->uar, context->db_tab, ucmd.sq_db_index, ucmd.sq_db_page); if (err) { kfree(qp); return ERR_PTR(err); } err = mthca_map_user_db(to_mdev(pd->device), &context->uar, context->db_tab, ucmd.rq_db_index, ucmd.rq_db_page); if (err) { mthca_unmap_user_db(to_mdev(pd->device), &context->uar, context->db_tab, ucmd.sq_db_index); kfree(qp); return ERR_PTR(err); } qp->mr.ibmr.lkey = ucmd.lkey; qp->sq.db_index = ucmd.sq_db_index; qp->rq.db_index = ucmd.rq_db_index; } err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd), to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq), init_attr->qp_type, init_attr->sq_sig_type, &init_attr->cap, qp); if (err && pd->uobject) { context = to_mucontext(pd->uobject->context); mthca_unmap_user_db(to_mdev(pd->device), &context->uar, context->db_tab, ucmd.sq_db_index); mthca_unmap_user_db(to_mdev(pd->device), &context->uar, context->db_tab, ucmd.rq_db_index); } qp->ibqp.qp_num = qp->qpn; break; } case IB_QPT_SMI: case IB_QPT_GSI: { /* Don't allow userspace to create special QPs */ if (pd->uobject) return ERR_PTR(-EINVAL); qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd), to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq), init_attr->sq_sig_type, &init_attr->cap, qp->ibqp.qp_num, init_attr->port_num, to_msqp(qp)); break; } default: /* Don't support raw QPs */ return ERR_PTR(-ENOSYS); } if (err) { kfree(qp); return ERR_PTR(err); } init_attr->cap.max_send_wr = qp->sq.max; init_attr->cap.max_recv_wr = qp->rq.max; init_attr->cap.max_send_sge = qp->sq.max_gs; init_attr->cap.max_recv_sge = qp->rq.max_gs; init_attr->cap.max_inline_data = qp->max_inline_data; return &qp->ibqp; } static int mthca_destroy_qp(struct ib_qp *qp) { if (qp->uobject) { mthca_unmap_user_db(to_mdev(qp->device), &to_mucontext(qp->uobject->context)->uar, to_mucontext(qp->uobject->context)->db_tab, to_mqp(qp)->sq.db_index); mthca_unmap_user_db(to_mdev(qp->device), &to_mucontext(qp->uobject->context)->uar, to_mucontext(qp->uobject->context)->db_tab, to_mqp(qp)->rq.db_index); } mthca_free_qp(to_mdev(qp->device), to_mqp(qp)); kfree(qp); return 0; } static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, int comp_vector, struct ib_ucontext *context, struct ib_udata *udata) { struct mthca_create_cq ucmd; struct mthca_cq *cq; int nent; int err; if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) return ERR_PTR(-EINVAL); if (context) { if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) return ERR_PTR(-EFAULT); err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, to_mucontext(context)->db_tab, ucmd.set_db_index, ucmd.set_db_page); if (err) return ERR_PTR(err); err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, to_mucontext(context)->db_tab, ucmd.arm_db_index, ucmd.arm_db_page); if (err) goto err_unmap_set; } cq = kmalloc(sizeof *cq, GFP_KERNEL); if (!cq) { err = -ENOMEM; goto err_unmap_arm; } if (context) { cq->buf.mr.ibmr.lkey = ucmd.lkey; cq->set_ci_db_index = ucmd.set_db_index; cq->arm_db_index = ucmd.arm_db_index; } for (nent = 1; nent <= entries; nent <<= 1) ; /* nothing */ err = mthca_init_cq(to_mdev(ibdev), nent, context ? to_mucontext(context) : NULL, context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num, cq); if (err) goto err_free; if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) { mthca_free_cq(to_mdev(ibdev), cq); goto err_free; } cq->resize_buf = NULL; return &cq->ibcq; err_free: kfree(cq); err_unmap_arm: if (context) mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, to_mucontext(context)->db_tab, ucmd.arm_db_index); err_unmap_set: if (context) mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, to_mucontext(context)->db_tab, ucmd.set_db_index); return ERR_PTR(err); } static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq, int entries) { int ret; spin_lock_irq(&cq->lock); if (cq->resize_buf) { ret = -EBUSY; goto unlock; } cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); if (!cq->resize_buf) { ret = -ENOMEM; goto unlock; } cq->resize_buf->state = CQ_RESIZE_ALLOC; ret = 0; unlock: spin_unlock_irq(&cq->lock); if (ret) return ret; ret = mthca_alloc_cq_buf(dev, &cq->resize_buf->buf, entries); if (ret) { spin_lock_irq(&cq->lock); kfree(cq->resize_buf); cq->resize_buf = NULL; spin_unlock_irq(&cq->lock); return ret; } cq->resize_buf->cqe = entries - 1; spin_lock_irq(&cq->lock); cq->resize_buf->state = CQ_RESIZE_READY; spin_unlock_irq(&cq->lock); return 0; } static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) { struct mthca_dev *dev = to_mdev(ibcq->device); struct mthca_cq *cq = to_mcq(ibcq); struct mthca_resize_cq ucmd; u32 lkey; u8 status; int ret; if (entries < 1 || entries > dev->limits.max_cqes) return -EINVAL; mutex_lock(&cq->mutex); entries = roundup_pow_of_two(entries + 1); if (entries == ibcq->cqe + 1) { ret = 0; goto out; } if (cq->is_kernel) { ret = mthca_alloc_resize_buf(dev, cq, entries); if (ret) goto out; lkey = cq->resize_buf->buf.mr.ibmr.lkey; } else { if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { ret = -EFAULT; goto out; } lkey = ucmd.lkey; } ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, ilog2(entries), &status); if (status) ret = -EINVAL; if (ret) { if (cq->resize_buf) { mthca_free_cq_buf(dev, &cq->resize_buf->buf, cq->resize_buf->cqe); kfree(cq->resize_buf); spin_lock_irq(&cq->lock); cq->resize_buf = NULL; spin_unlock_irq(&cq->lock); } goto out; } if (cq->is_kernel) { struct mthca_cq_buf tbuf; int tcqe; spin_lock_irq(&cq->lock); if (cq->resize_buf->state == CQ_RESIZE_READY) { mthca_cq_resize_copy_cqes(cq); tbuf = cq->buf; tcqe = cq->ibcq.cqe; cq->buf = cq->resize_buf->buf; cq->ibcq.cqe = cq->resize_buf->cqe; } else { tbuf = cq->resize_buf->buf; tcqe = cq->resize_buf->cqe; } kfree(cq->resize_buf); cq->resize_buf = NULL; spin_unlock_irq(&cq->lock); mthca_free_cq_buf(dev, &tbuf, tcqe); } else ibcq->cqe = entries - 1; out: mutex_unlock(&cq->mutex); return ret; } static int mthca_destroy_cq(struct ib_cq *cq) { if (cq->uobject) { mthca_unmap_user_db(to_mdev(cq->device), &to_mucontext(cq->uobject->context)->uar, to_mucontext(cq->uobject->context)->db_tab, to_mcq(cq)->arm_db_index); mthca_unmap_user_db(to_mdev(cq->device), &to_mucontext(cq->uobject->context)->uar, to_mucontext(cq->uobject->context)->db_tab, to_mcq(cq)->set_ci_db_index); } mthca_free_cq(to_mdev(cq->device), to_mcq(cq)); kfree(cq); return 0; } static inline u32 convert_access(int acc) { return (acc & IB_ACCESS_REMOTE_ATOMIC ? MTHCA_MPT_FLAG_ATOMIC : 0) | (acc & IB_ACCESS_REMOTE_WRITE ? MTHCA_MPT_FLAG_REMOTE_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? MTHCA_MPT_FLAG_REMOTE_READ : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0) | MTHCA_MPT_FLAG_LOCAL_READ; } static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc) { struct mthca_mr *mr; int err; mr = kmalloc(sizeof *mr, GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); err = mthca_mr_alloc_notrans(to_mdev(pd->device), to_mpd(pd)->pd_num, convert_access(acc), mr); if (err) { kfree(mr); return ERR_PTR(err); } mr->umem = NULL; return &mr->ibmr; } static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, struct ib_phys_buf *buffer_list, int num_phys_buf, int acc, u64 *iova_start) { struct mthca_mr *mr; u64 *page_list; u64 total_size; unsigned long mask; int shift; int npages; int err; int i, j, n; mask = buffer_list[0].addr ^ *iova_start; total_size = 0; for (i = 0; i < num_phys_buf; ++i) { if (i != 0) mask |= buffer_list[i].addr; if (i != num_phys_buf - 1) mask |= buffer_list[i].addr + buffer_list[i].size; total_size += buffer_list[i].size; } if (mask & ~PAGE_MASK) return ERR_PTR(-EINVAL); shift = __ffs(mask | 1 << 31); buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1); buffer_list[0].addr &= ~0ull << shift; mr = kmalloc(sizeof *mr, GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); npages = 0; for (i = 0; i < num_phys_buf; ++i) npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift; if (!npages) return &mr->ibmr; page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL); if (!page_list) { kfree(mr); return ERR_PTR(-ENOMEM); } n = 0; for (i = 0; i < num_phys_buf; ++i) for (j = 0; j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift; ++j) page_list[n++] = buffer_list[i].addr + ((u64) j << shift); mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) " "in PD %x; shift %d, npages %d.\n", (unsigned long long) buffer_list[0].addr, (unsigned long long) *iova_start, to_mpd(pd)->pd_num, shift, npages); err = mthca_mr_alloc_phys(to_mdev(pd->device), to_mpd(pd)->pd_num, page_list, shift, npages, *iova_start, total_size, convert_access(acc), mr); if (err) { kfree(page_list); kfree(mr); return ERR_PTR(err); } kfree(page_list); mr->umem = NULL; return &mr->ibmr; } static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata, int mr_id) { struct mthca_dev *dev = to_mdev(pd->device); struct ib_umem_chunk *chunk; struct mthca_mr *mr; struct mthca_reg_mr ucmd; u64 *pages; int shift, n, len; int i, j, k; int err = 0; int write_mtt_size; if (udata->inlen - sizeof (struct ib_uverbs_cmd_hdr) < sizeof ucmd) { if (!to_mucontext(pd->uobject->context)->reg_mr_warned) { mthca_warn(dev, "Process '%s' did not pass in MR attrs.\n", curproc->p_comm); mthca_warn(dev, " Update libmthca to fix this.\n"); } ++to_mucontext(pd->uobject->context)->reg_mr_warned; ucmd.mr_attrs = 0; } else if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) return ERR_PTR(-EFAULT); mr = kmalloc(sizeof *mr, GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, ucmd.mr_attrs & MTHCA_MR_DMASYNC); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err; } shift = ffs(mr->umem->page_size) - 1; n = 0; list_for_each_entry(chunk, &mr->umem->chunk_list, list) n += chunk->nents; mr->mtt = mthca_alloc_mtt(dev, n); if (IS_ERR(mr->mtt)) { err = PTR_ERR(mr->mtt); goto err_umem; } pages = (u64 *) __get_free_page(GFP_KERNEL); if (!pages) { err = -ENOMEM; goto err_mtt; } i = n = 0; write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); list_for_each_entry(chunk, &mr->umem->chunk_list, list) for (j = 0; j < chunk->nmap; ++j) { len = sg_dma_len(&chunk->page_list[j]) >> shift; for (k = 0; k < len; ++k) { pages[i++] = sg_dma_address(&chunk->page_list[j]) + mr->umem->page_size * k; /* * Be friendly to write_mtt and pass it chunks * of appropriate size. */ if (i == write_mtt_size) { err = mthca_write_mtt(dev, mr->mtt, n, pages, i); if (err) goto mtt_done; n += i; i = 0; } } } if (i) err = mthca_write_mtt(dev, mr->mtt, n, pages, i); mtt_done: free_page((unsigned long) pages); if (err) goto err_mtt; err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, virt, length, convert_access(acc), mr); if (err) goto err_mtt; return &mr->ibmr; err_mtt: mthca_free_mtt(dev, mr->mtt); err_umem: ib_umem_release(mr->umem); err: kfree(mr); return ERR_PTR(err); } static int mthca_dereg_mr(struct ib_mr *mr) { struct mthca_mr *mmr = to_mmr(mr); mthca_free_mr(to_mdev(mr->device), mmr); if (mmr->umem) ib_umem_release(mmr->umem); kfree(mmr); return 0; } static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr) { struct mthca_fmr *fmr; int err; fmr = kmalloc(sizeof *fmr, GFP_KERNEL); if (!fmr) return ERR_PTR(-ENOMEM); memcpy(&fmr->attr, fmr_attr, sizeof *fmr_attr); err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num, convert_access(mr_access_flags), fmr); if (err) { kfree(fmr); return ERR_PTR(err); } return &fmr->ibmr; } static int mthca_dealloc_fmr(struct ib_fmr *fmr) { struct mthca_fmr *mfmr = to_mfmr(fmr); int err; err = mthca_free_fmr(to_mdev(fmr->device), mfmr); if (err) return err; kfree(mfmr); return 0; } static int mthca_unmap_fmr(struct list_head *fmr_list) { struct ib_fmr *fmr; int err; u8 status; struct mthca_dev *mdev = NULL; list_for_each_entry(fmr, fmr_list, list) { if (mdev && to_mdev(fmr->device) != mdev) return -EINVAL; mdev = to_mdev(fmr->device); } if (!mdev) return 0; if (mthca_is_memfree(mdev)) { list_for_each_entry(fmr, fmr_list, list) mthca_arbel_fmr_unmap(mdev, to_mfmr(fmr)); wmb(); } else list_for_each_entry(fmr, fmr_list, list) mthca_tavor_fmr_unmap(mdev, to_mfmr(fmr)); err = mthca_SYNC_TPT(mdev, &status); if (err) return err; if (status) return -EINVAL; return 0; } static ssize_t show_rev(struct device *device, struct device_attribute *attr, char *buf) { struct mthca_dev *dev = container_of(device, struct mthca_dev, ib_dev.dev); return sprintf(buf, "%x\n", dev->rev_id); } static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, char *buf) { struct mthca_dev *dev = container_of(device, struct mthca_dev, ib_dev.dev); return sprintf(buf, "%d.%d.%d\n", (int) (dev->fw_ver >> 32), (int) (dev->fw_ver >> 16) & 0xffff, (int) dev->fw_ver & 0xffff); } static ssize_t show_hca(struct device *device, struct device_attribute *attr, char *buf) { struct mthca_dev *dev = container_of(device, struct mthca_dev, ib_dev.dev); switch (dev->pdev->device) { case PCI_DEVICE_ID_MELLANOX_TAVOR: return sprintf(buf, "MT23108\n"); case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT: return sprintf(buf, "MT25208 (MT23108 compat mode)\n"); case PCI_DEVICE_ID_MELLANOX_ARBEL: return sprintf(buf, "MT25208\n"); case PCI_DEVICE_ID_MELLANOX_SINAI: case PCI_DEVICE_ID_MELLANOX_SINAI_OLD: return sprintf(buf, "MT25204\n"); default: return sprintf(buf, "unknown\n"); } } static ssize_t show_board(struct device *device, struct device_attribute *attr, char *buf) { struct mthca_dev *dev = container_of(device, struct mthca_dev, ib_dev.dev); return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct device_attribute *mthca_dev_attributes[] = { &dev_attr_hw_rev, &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id }; static int mthca_init_node_data(struct mthca_dev *dev) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; u8 status; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; err = mthca_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad, &status); if (err) goto out; if (status) { err = -EINVAL; goto out; } memcpy(dev->ib_dev.node_desc, out_mad->data, 64); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; err = mthca_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad, &status); if (err) goto out; if (status) { err = -EINVAL; goto out; } if (mthca_is_memfree(dev)) dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); out: kfree(in_mad); kfree(out_mad); return err; } int mthca_register_device(struct mthca_dev *dev) { int ret; int i; ret = mthca_init_node_data(dev); if (ret) return ret; strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION; dev->ib_dev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | (1ull << IB_USER_VERBS_CMD_REG_MR) | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | (1ull << IB_USER_VERBS_CMD_CREATE_QP) | (1ull << IB_USER_VERBS_CMD_QUERY_QP) | (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dma_device = &dev->pdev->dev; dev->ib_dev.query_device = mthca_query_device; dev->ib_dev.query_port = mthca_query_port; dev->ib_dev.modify_device = mthca_modify_device; dev->ib_dev.modify_port = mthca_modify_port; dev->ib_dev.query_pkey = mthca_query_pkey; dev->ib_dev.query_gid = mthca_query_gid; dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext; dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext; dev->ib_dev.mmap = mthca_mmap_uar; dev->ib_dev.alloc_pd = mthca_alloc_pd; dev->ib_dev.dealloc_pd = mthca_dealloc_pd; dev->ib_dev.create_ah = mthca_ah_create; dev->ib_dev.query_ah = mthca_ah_query; dev->ib_dev.destroy_ah = mthca_ah_destroy; if (dev->mthca_flags & MTHCA_FLAG_SRQ) { dev->ib_dev.create_srq = mthca_create_srq; dev->ib_dev.modify_srq = mthca_modify_srq; dev->ib_dev.query_srq = mthca_query_srq; dev->ib_dev.destroy_srq = mthca_destroy_srq; dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); if (mthca_is_memfree(dev)) dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv; else dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv; } dev->ib_dev.create_qp = mthca_create_qp; dev->ib_dev.modify_qp = mthca_modify_qp; dev->ib_dev.query_qp = mthca_query_qp; dev->ib_dev.destroy_qp = mthca_destroy_qp; dev->ib_dev.create_cq = mthca_create_cq; dev->ib_dev.resize_cq = mthca_resize_cq; dev->ib_dev.destroy_cq = mthca_destroy_cq; dev->ib_dev.poll_cq = mthca_poll_cq; dev->ib_dev.get_dma_mr = mthca_get_dma_mr; dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr; dev->ib_dev.reg_user_mr = mthca_reg_user_mr; dev->ib_dev.dereg_mr = mthca_dereg_mr; if (dev->mthca_flags & MTHCA_FLAG_FMR) { dev->ib_dev.alloc_fmr = mthca_alloc_fmr; dev->ib_dev.unmap_fmr = mthca_unmap_fmr; dev->ib_dev.dealloc_fmr = mthca_dealloc_fmr; if (mthca_is_memfree(dev)) dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr; else dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr; } dev->ib_dev.attach_mcast = mthca_multicast_attach; dev->ib_dev.detach_mcast = mthca_multicast_detach; dev->ib_dev.process_mad = mthca_process_mad; if (mthca_is_memfree(dev)) { dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq; dev->ib_dev.post_send = mthca_arbel_post_send; dev->ib_dev.post_recv = mthca_arbel_post_receive; } else { dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq; dev->ib_dev.post_send = mthca_tavor_post_send; dev->ib_dev.post_recv = mthca_tavor_post_receive; } mutex_init(&dev->cap_mask_mutex); ret = ib_register_device(&dev->ib_dev, NULL); if (ret) return ret; for (i = 0; i < ARRAY_SIZE(mthca_dev_attributes); ++i) { ret = device_create_file(&dev->ib_dev.dev, mthca_dev_attributes[i]); if (ret) { ib_unregister_device(&dev->ib_dev); return ret; } } mthca_start_catas_poll(dev); return 0; } void mthca_unregister_device(struct mthca_dev *dev) { mthca_stop_catas_poll(dev); ib_unregister_device(&dev->ib_dev); } Index: stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_reset.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_reset.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_reset.c (revision 271127) @@ -1,298 +1,297 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include #include #include #include #include #include "mthca_dev.h" #include "mthca_cmd.h" int mthca_reset(struct mthca_dev *mdev) { int i; int err = 0; u32 *hca_header = NULL; u32 *bridge_header = NULL; struct pci_dev *bridge = NULL; int bridge_pcix_cap = 0; int hca_pcie_cap = 0; int hca_pcix_cap = 0; u16 devctl; u16 linkctl; #define MTHCA_RESET_OFFSET 0xf0010 #define MTHCA_RESET_VALUE swab32(1) /* * Reset the chip. This is somewhat ugly because we have to * save off the PCI header before reset and then restore it * after the chip reboots. We skip config space offsets 22 * and 23 since those have a special meaning. * * To make matters worse, for Tavor (PCI-X HCA) we have to * find the associated bridge device and save off its PCI * header as well. */ if (!(mdev->mthca_flags & MTHCA_FLAG_PCIE)) { /* Look for the bridge -- its device ID will be 2 more than HCA's device ID. */ #ifdef __linux__ while ((bridge = pci_get_device(mdev->pdev->vendor, mdev->pdev->device + 2, bridge)) != NULL) { if (bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE && bridge->subordinate == mdev->pdev->bus) { mthca_dbg(mdev, "Found bridge: %s\n", pci_name(bridge)); break; } } if (!bridge) { /* * Didn't find a bridge for a Tavor device -- * assume we're in no-bridge mode and hope for * the best. */ mthca_warn(mdev, "No bridge found for %s\n", pci_name(mdev->pdev)); } #else mthca_warn(mdev, "Reset on PCI-X is not supported.\n"); goto out; #endif } /* For Arbel do we need to save off the full 4K PCI Express header?? */ hca_header = kmalloc(256, GFP_KERNEL); if (!hca_header) { err = -ENOMEM; mthca_err(mdev, "Couldn't allocate memory to save HCA " "PCI header, aborting.\n"); goto out; } for (i = 0; i < 64; ++i) { if (i == 22 || i == 23) continue; if (pci_read_config_dword(mdev->pdev, i * 4, hca_header + i)) { err = -ENODEV; mthca_err(mdev, "Couldn't save HCA " "PCI header, aborting.\n"); goto out; } } hca_pcix_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX); hca_pcie_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP); #ifdef __linux__ if (bridge) { bridge_header = kmalloc(256, GFP_KERNEL); if (!bridge_header) { err = -ENOMEM; mthca_err(mdev, "Couldn't allocate memory to save HCA " "bridge PCI header, aborting.\n"); goto out; } for (i = 0; i < 64; ++i) { if (i == 22 || i == 23) continue; if (pci_read_config_dword(bridge, i * 4, bridge_header + i)) { err = -ENODEV; mthca_err(mdev, "Couldn't save HCA bridge " "PCI header, aborting.\n"); goto out; } } bridge_pcix_cap = pci_find_capability(bridge, PCI_CAP_ID_PCIX); if (!bridge_pcix_cap) { err = -ENODEV; mthca_err(mdev, "Couldn't locate HCA bridge " "PCI-X capability, aborting.\n"); goto out; } } #endif /* actually hit reset */ { void __iomem *reset = ioremap(pci_resource_start(mdev->pdev, 0) + MTHCA_RESET_OFFSET, 4); if (!reset) { err = -ENOMEM; mthca_err(mdev, "Couldn't map HCA reset register, " "aborting.\n"); goto out; } writel(MTHCA_RESET_VALUE, reset); iounmap(reset); } /* Docs say to wait one second before accessing device */ msleep(1000); /* Now wait for PCI device to start responding again */ { u32 v; int c = 0; for (c = 0; c < 100; ++c) { if (pci_read_config_dword(bridge ? bridge : mdev->pdev, 0, &v)) { err = -ENODEV; mthca_err(mdev, "Couldn't access HCA after reset, " "aborting.\n"); goto out; } if (v != 0xffffffff) goto good; msleep(100); } err = -ENODEV; mthca_err(mdev, "PCI device did not come back after reset, " "aborting.\n"); goto out; } good: /* Now restore the PCI headers */ if (bridge) { if (pci_write_config_dword(bridge, bridge_pcix_cap + 0x8, bridge_header[(bridge_pcix_cap + 0x8) / 4])) { err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA bridge Upstream " "split transaction control, aborting.\n"); goto out; } if (pci_write_config_dword(bridge, bridge_pcix_cap + 0xc, bridge_header[(bridge_pcix_cap + 0xc) / 4])) { err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA bridge Downstream " "split transaction control, aborting.\n"); goto out; } /* * Bridge control register is at 0x3e, so we'll * naturally restore it last in this loop. */ for (i = 0; i < 16; ++i) { if (i * 4 == PCI_COMMAND) continue; if (pci_write_config_dword(bridge, i * 4, bridge_header[i])) { err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA bridge reg %x, " "aborting.\n", i); goto out; } } if (pci_write_config_dword(bridge, PCI_COMMAND, bridge_header[PCI_COMMAND / 4])) { err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA bridge COMMAND, " "aborting.\n"); goto out; } } if (hca_pcix_cap) { if (pci_write_config_dword(mdev->pdev, hca_pcix_cap, hca_header[hca_pcix_cap / 4])) { err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA PCI-X " "command register, aborting.\n"); goto out; } } if (hca_pcie_cap) { devctl = hca_header[(hca_pcie_cap + PCI_EXP_DEVCTL) / 4]; if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_DEVCTL, devctl)) { err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA PCI Express " "Device Control register, aborting.\n"); goto out; } linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4]; if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_LNKCTL, linkctl)) { err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA PCI Express " "Link control register, aborting.\n"); goto out; } } for (i = 0; i < 16; ++i) { if (i * 4 == PCI_COMMAND) continue; if (pci_write_config_dword(mdev->pdev, i * 4, hca_header[i])) { err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA reg %x, " "aborting.\n", i); goto out; } } if (pci_write_config_dword(mdev->pdev, PCI_COMMAND, hca_header[PCI_COMMAND / 4])) { err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA COMMAND, " "aborting.\n"); goto out; } out: #ifdef __linux__ if (bridge) pci_dev_put(bridge); #endif kfree(bridge_header); kfree(hca_header); return err; } Index: stable/10/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c (revision 271127) @@ -1,1558 +1,1557 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "ipoib.h" static int ipoib_resolvemulti(struct ifnet *, struct sockaddr **, struct sockaddr *); #include -#include #include #include #include #include /* For ARPHRD_xxx */ #include #include #include MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); MODULE_LICENSE("Dual BSD/GPL"); int ipoib_sendq_size = IPOIB_TX_RING_SIZE; int ipoib_recvq_size = IPOIB_RX_RING_SIZE; module_param_named(send_queue_size, ipoib_sendq_size, int, 0444); MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG int ipoib_debug_level = 1; module_param_named(debug_level, ipoib_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); #endif struct ipoib_path_iter { struct ipoib_dev_priv *priv; struct ipoib_path path; }; static const u8 ipv4_bcast_addr[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff }; struct workqueue_struct *ipoib_workqueue; struct ib_sa_client ipoib_sa_client; static void ipoib_add_one(struct ib_device *device); static void ipoib_remove_one(struct ib_device *device); static void ipoib_start(struct ifnet *dev); static int ipoib_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro); static int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data); static void ipoib_input(struct ifnet *ifp, struct mbuf *m); #define IPOIB_MTAP(_ifp, _m) \ do { \ if (bpf_peers_present((_ifp)->if_bpf)) { \ M_ASSERTVALID(_m); \ ipoib_mtap_mb((_ifp), (_m)); \ } \ } while (0) /* * This is for clients that have an ipoib_header in the mbuf. */ static void ipoib_mtap_mb(struct ifnet *ifp, struct mbuf *mb) { struct ipoib_header *ih; struct ether_header eh; ih = mtod(mb, struct ipoib_header *); eh.ether_type = ih->proto; bcopy(ih->hwaddr, &eh.ether_dhost, ETHER_ADDR_LEN); bzero(&eh.ether_shost, ETHER_ADDR_LEN); mb->m_data += sizeof(struct ipoib_header); mb->m_len -= sizeof(struct ipoib_header); bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb); mb->m_data -= sizeof(struct ipoib_header); mb->m_len += sizeof(struct ipoib_header); } void ipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto) { struct ether_header eh; eh.ether_type = proto; bzero(&eh.ether_shost, ETHER_ADDR_LEN); bzero(&eh.ether_dhost, ETHER_ADDR_LEN); bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb); } static struct ib_client ipoib_client = { .name = "ipoib", .add = ipoib_add_one, .remove = ipoib_remove_one }; int ipoib_open(struct ipoib_dev_priv *priv) { struct ifnet *dev = priv->dev; ipoib_dbg(priv, "bringing up interface\n"); set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); if (ipoib_pkey_dev_delay_open(priv)) return 0; if (ipoib_ib_dev_open(priv)) goto err_disable; if (ipoib_ib_dev_up(priv)) goto err_stop; if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { struct ipoib_dev_priv *cpriv; /* Bring up any child interfaces too */ mutex_lock(&priv->vlan_mutex); list_for_each_entry(cpriv, &priv->child_intfs, list) if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) == 0) ipoib_open(cpriv); mutex_unlock(&priv->vlan_mutex); } dev->if_drv_flags |= IFF_DRV_RUNNING; dev->if_drv_flags &= ~IFF_DRV_OACTIVE; return 0; err_stop: ipoib_ib_dev_stop(priv, 1); err_disable: clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); return -EINVAL; } static void ipoib_init(void *arg) { struct ifnet *dev; struct ipoib_dev_priv *priv; priv = arg; dev = priv->dev; if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) ipoib_open(priv); queue_work(ipoib_workqueue, &priv->flush_light); } static int ipoib_stop(struct ipoib_dev_priv *priv) { struct ifnet *dev = priv->dev; ipoib_dbg(priv, "stopping interface\n"); clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); ipoib_ib_dev_down(priv, 0); ipoib_ib_dev_stop(priv, 0); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { struct ipoib_dev_priv *cpriv; /* Bring down any child interfaces too */ mutex_lock(&priv->vlan_mutex); list_for_each_entry(cpriv, &priv->child_intfs, list) if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) != 0) ipoib_stop(cpriv); mutex_unlock(&priv->vlan_mutex); } return 0; } int ipoib_change_mtu(struct ipoib_dev_priv *priv, int new_mtu) { struct ifnet *dev = priv->dev; /* dev->if_mtu > 2K ==> connected mode */ if (ipoib_cm_admin_enabled(priv)) { if (new_mtu > IPOIB_CM_MTU(ipoib_cm_max_mtu(priv))) return -EINVAL; if (new_mtu > priv->mcast_mtu) ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", priv->mcast_mtu); dev->if_mtu = new_mtu; return 0; } if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) return -EINVAL; priv->admin_mtu = new_mtu; dev->if_mtu = min(priv->mcast_mtu, priv->admin_mtu); queue_work(ipoib_workqueue, &priv->flush_light); return 0; } static int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ipoib_dev_priv *priv = ifp->if_softc; struct ifaddr *ifa = (struct ifaddr *) data; struct ifreq *ifr = (struct ifreq *) data; int error = 0; switch (command) { case SIOCSIFFLAGS: if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) error = -ipoib_open(priv); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) ipoib_stop(priv); break; case SIOCADDMULTI: case SIOCDELMULTI: if (ifp->if_drv_flags & IFF_DRV_RUNNING) queue_work(ipoib_workqueue, &priv->restart_task); break; case SIOCSIFADDR: ifp->if_flags |= IFF_UP; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: ifp->if_init(ifp->if_softc); /* before arpwhohas */ arp_ifinit(ifp, ifa); break; #endif default: ifp->if_init(ifp->if_softc); break; } break; case SIOCGIFADDR: { struct sockaddr *sa; sa = (struct sockaddr *) & ifr->ifr_data; bcopy(IF_LLADDR(ifp), (caddr_t) sa->sa_data, INFINIBAND_ALEN); } break; case SIOCSIFMTU: /* * Set the interface MTU. */ error = -ipoib_change_mtu(priv, ifr->ifr_mtu); break; default: error = EINVAL; break; } return (error); } static struct ipoib_path * __path_find(struct ipoib_dev_priv *priv, void *gid) { struct rb_node *n = priv->path_tree.rb_node; struct ipoib_path *path; int ret; while (n) { path = rb_entry(n, struct ipoib_path, rb_node); ret = memcmp(gid, path->pathrec.dgid.raw, sizeof (union ib_gid)); if (ret < 0) n = n->rb_left; else if (ret > 0) n = n->rb_right; else return path; } return NULL; } static int __path_add(struct ipoib_dev_priv *priv, struct ipoib_path *path) { struct rb_node **n = &priv->path_tree.rb_node; struct rb_node *pn = NULL; struct ipoib_path *tpath; int ret; while (*n) { pn = *n; tpath = rb_entry(pn, struct ipoib_path, rb_node); ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw, sizeof (union ib_gid)); if (ret < 0) n = &pn->rb_left; else if (ret > 0) n = &pn->rb_right; else return -EEXIST; } rb_link_node(&path->rb_node, pn, n); rb_insert_color(&path->rb_node, &priv->path_tree); list_add_tail(&path->list, &priv->path_list); return 0; } void ipoib_path_free(struct ipoib_dev_priv *priv, struct ipoib_path *path) { _IF_DRAIN(&path->queue); if (path->ah) ipoib_put_ah(path->ah); if (ipoib_cm_get(path)) ipoib_cm_destroy_tx(ipoib_cm_get(path)); kfree(path); } #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG struct ipoib_path_iter * ipoib_path_iter_init(struct ipoib_dev_priv *priv) { struct ipoib_path_iter *iter; iter = kmalloc(sizeof *iter, GFP_KERNEL); if (!iter) return NULL; iter->priv = priv; memset(iter->path.pathrec.dgid.raw, 0, 16); if (ipoib_path_iter_next(iter)) { kfree(iter); return NULL; } return iter; } int ipoib_path_iter_next(struct ipoib_path_iter *iter) { struct ipoib_dev_priv *priv = iter->priv; struct rb_node *n; struct ipoib_path *path; int ret = 1; spin_lock_irq(&priv->lock); n = rb_first(&priv->path_tree); while (n) { path = rb_entry(n, struct ipoib_path, rb_node); if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw, sizeof (union ib_gid)) < 0) { iter->path = *path; ret = 0; break; } n = rb_next(n); } spin_unlock_irq(&priv->lock); return ret; } void ipoib_path_iter_read(struct ipoib_path_iter *iter, struct ipoib_path *path) { *path = iter->path; } #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ void ipoib_mark_paths_invalid(struct ipoib_dev_priv *priv) { struct ipoib_path *path, *tp; spin_lock_irq(&priv->lock); list_for_each_entry_safe(path, tp, &priv->path_list, list) { ipoib_dbg(priv, "mark path LID 0x%04x GID %16D invalid\n", be16_to_cpu(path->pathrec.dlid), path->pathrec.dgid.raw, ":"); path->valid = 0; } spin_unlock_irq(&priv->lock); } void ipoib_flush_paths(struct ipoib_dev_priv *priv) { struct ipoib_path *path, *tp; LIST_HEAD(remove_list); unsigned long flags; spin_lock_irqsave(&priv->lock, flags); list_splice_init(&priv->path_list, &remove_list); list_for_each_entry(path, &remove_list, list) rb_erase(&path->rb_node, &priv->path_tree); list_for_each_entry_safe(path, tp, &remove_list, list) { if (path->query) ib_sa_cancel_query(path->query_id, path->query); spin_unlock_irqrestore(&priv->lock, flags); wait_for_completion(&path->done); ipoib_path_free(priv, path); spin_lock_irqsave(&priv->lock, flags); } spin_unlock_irqrestore(&priv->lock, flags); } static void path_rec_completion(int status, struct ib_sa_path_rec *pathrec, void *path_ptr) { struct ipoib_path *path = path_ptr; struct ipoib_dev_priv *priv = path->priv; struct ifnet *dev = priv->dev; struct ipoib_ah *ah = NULL; struct ipoib_ah *old_ah = NULL; struct ifqueue mbqueue; struct mbuf *mb; unsigned long flags; if (!status) ipoib_dbg(priv, "PathRec LID 0x%04x for GID %16D\n", be16_to_cpu(pathrec->dlid), pathrec->dgid.raw, ":"); else ipoib_dbg(priv, "PathRec status %d for GID %16D\n", status, path->pathrec.dgid.raw, ":"); bzero(&mbqueue, sizeof(mbqueue)); if (!status) { struct ib_ah_attr av; if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av)) ah = ipoib_create_ah(priv, priv->pd, &av); } spin_lock_irqsave(&priv->lock, flags); if (ah) { path->pathrec = *pathrec; old_ah = path->ah; path->ah = ah; ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n", ah, be16_to_cpu(pathrec->dlid), pathrec->sl); for (;;) { _IF_DEQUEUE(&path->queue, mb); if (mb == NULL) break; _IF_ENQUEUE(&mbqueue, mb); } #ifdef CONFIG_INFINIBAND_IPOIB_CM if (ipoib_cm_enabled(priv, path->hwaddr) && !ipoib_cm_get(path)) ipoib_cm_set(path, ipoib_cm_create_tx(priv, path)); #endif path->valid = 1; } path->query = NULL; complete(&path->done); spin_unlock_irqrestore(&priv->lock, flags); if (old_ah) ipoib_put_ah(old_ah); for (;;) { _IF_DEQUEUE(&mbqueue, mb); if (mb == NULL) break; mb->m_pkthdr.rcvif = dev; if (dev->if_transmit(dev, mb)) ipoib_warn(priv, "dev_queue_xmit failed " "to requeue packet\n"); } } static struct ipoib_path * path_rec_create(struct ipoib_dev_priv *priv, uint8_t *hwaddr) { struct ipoib_path *path; if (!priv->broadcast) return NULL; path = kzalloc(sizeof *path, GFP_ATOMIC); if (!path) return NULL; path->priv = priv; bzero(&path->queue, sizeof(path->queue)); #ifdef CONFIG_INFINIBAND_IPOIB_CM memcpy(&path->hwaddr, hwaddr, INFINIBAND_ALEN); #endif memcpy(path->pathrec.dgid.raw, &hwaddr[4], sizeof (union ib_gid)); path->pathrec.sgid = priv->local_gid; path->pathrec.pkey = cpu_to_be16(priv->pkey); path->pathrec.numb_path = 1; path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class; return path; } static int path_rec_start(struct ipoib_dev_priv *priv, struct ipoib_path *path) { struct ifnet *dev = priv->dev; ib_sa_comp_mask comp_mask = IB_SA_PATH_REC_MTU_SELECTOR | IB_SA_PATH_REC_MTU; struct ib_sa_path_rec p_rec; p_rec = path->pathrec; p_rec.mtu_selector = IB_SA_GT; switch (roundup_pow_of_two(dev->if_mtu + IPOIB_ENCAP_LEN)) { case 512: p_rec.mtu = IB_MTU_256; break; case 1024: p_rec.mtu = IB_MTU_512; break; case 2048: p_rec.mtu = IB_MTU_1024; break; case 4096: p_rec.mtu = IB_MTU_2048; break; default: /* Wildcard everything */ comp_mask = 0; p_rec.mtu = 0; p_rec.mtu_selector = 0; } ipoib_dbg(priv, "Start path record lookup for %16D MTU > %d\n", p_rec.dgid.raw, ":", comp_mask ? ib_mtu_enum_to_int(p_rec.mtu) : 0); init_completion(&path->done); path->query_id = ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port, &p_rec, comp_mask | IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_NUMB_PATH | IB_SA_PATH_REC_TRAFFIC_CLASS | IB_SA_PATH_REC_PKEY, 1000, GFP_ATOMIC, path_rec_completion, path, &path->query); if (path->query_id < 0) { ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id); path->query = NULL; complete(&path->done); return path->query_id; } return 0; } static void ipoib_unicast_send(struct mbuf *mb, struct ipoib_dev_priv *priv, struct ipoib_header *eh) { struct ipoib_path *path; path = __path_find(priv, eh->hwaddr + 4); if (!path || !path->valid) { int new_path = 0; if (!path) { path = path_rec_create(priv, eh->hwaddr); new_path = 1; } if (path) { _IF_ENQUEUE(&path->queue, mb); if (!path->query && path_rec_start(priv, path)) { spin_unlock_irqrestore(&priv->lock, flags); if (new_path) ipoib_path_free(priv, path); return; } else __path_add(priv, path); } else { ++priv->dev->if_oerrors; m_freem(mb); } return; } if (ipoib_cm_get(path) && ipoib_cm_up(path)) { ipoib_cm_send(priv, mb, ipoib_cm_get(path)); } else if (path->ah) { ipoib_send(priv, mb, path->ah, IPOIB_QPN(eh->hwaddr)); } else if ((path->query || !path_rec_start(priv, path)) && path->queue.ifq_len < IPOIB_MAX_PATH_REC_QUEUE) { _IF_ENQUEUE(&path->queue, mb); } else { ++priv->dev->if_oerrors; m_freem(mb); } } static int ipoib_send_one(struct ipoib_dev_priv *priv, struct mbuf *mb) { struct ipoib_header *eh; eh = mtod(mb, struct ipoib_header *); if (IPOIB_IS_MULTICAST(eh->hwaddr)) { /* Add in the P_Key for multicast*/ eh->hwaddr[8] = (priv->pkey >> 8) & 0xff; eh->hwaddr[9] = priv->pkey & 0xff; ipoib_mcast_send(priv, eh->hwaddr + 4, mb); } else ipoib_unicast_send(mb, priv, eh); return 0; } static void _ipoib_start(struct ifnet *dev, struct ipoib_dev_priv *priv) { struct mbuf *mb; if ((dev->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; spin_lock(&priv->lock); while (!IFQ_DRV_IS_EMPTY(&dev->if_snd) && (dev->if_drv_flags & IFF_DRV_OACTIVE) == 0) { IFQ_DRV_DEQUEUE(&dev->if_snd, mb); if (mb == NULL) break; IPOIB_MTAP(dev, mb); ipoib_send_one(priv, mb); } spin_unlock(&priv->lock); } static void ipoib_start(struct ifnet *dev) { _ipoib_start(dev, dev->if_softc); } static void ipoib_vlan_start(struct ifnet *dev) { struct ipoib_dev_priv *priv; struct mbuf *mb; priv = VLAN_COOKIE(dev); if (priv != NULL) return _ipoib_start(dev, priv); while (!IFQ_DRV_IS_EMPTY(&dev->if_snd)) { IFQ_DRV_DEQUEUE(&dev->if_snd, mb); if (mb == NULL) break; m_freem(mb); dev->if_oerrors++; } } int ipoib_dev_init(struct ipoib_dev_priv *priv, struct ib_device *ca, int port) { /* Allocate RX/TX "rings" to hold queued mbs */ priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, GFP_KERNEL); if (!priv->rx_ring) { printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", ca->name, ipoib_recvq_size); goto out; } priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, GFP_KERNEL); if (!priv->tx_ring) { printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", ca->name, ipoib_sendq_size); goto out_rx_ring_cleanup; } memset(priv->tx_ring, 0, ipoib_sendq_size * sizeof *priv->tx_ring); /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ if (ipoib_ib_dev_init(priv, ca, port)) goto out_tx_ring_cleanup; return 0; out_tx_ring_cleanup: kfree(priv->tx_ring); out_rx_ring_cleanup: kfree(priv->rx_ring); out: return -ENOMEM; } static void ipoib_detach(struct ipoib_dev_priv *priv) { struct ifnet *dev; dev = priv->dev; if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { bpfdetach(dev); if_detach(dev); if_free(dev); } else VLAN_SETCOOKIE(priv->dev, NULL); free(priv, M_TEMP); } void ipoib_dev_cleanup(struct ipoib_dev_priv *priv) { struct ipoib_dev_priv *cpriv, *tcpriv; /* Delete any child interfaces first */ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { ipoib_dev_cleanup(cpriv); ipoib_detach(cpriv); } ipoib_ib_dev_cleanup(priv); kfree(priv->rx_ring); kfree(priv->tx_ring); priv->rx_ring = NULL; priv->tx_ring = NULL; } static volatile int ipoib_unit; static struct ipoib_dev_priv * ipoib_priv_alloc(void) { struct ipoib_dev_priv *priv; priv = malloc(sizeof(struct ipoib_dev_priv), M_TEMP, M_ZERO|M_WAITOK); spin_lock_init(&priv->lock); mutex_init(&priv->vlan_mutex); INIT_LIST_HEAD(&priv->path_list); INIT_LIST_HEAD(&priv->child_intfs); INIT_LIST_HEAD(&priv->dead_ahs); INIT_LIST_HEAD(&priv->multicast_list); INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task); INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); memcpy(priv->broadcastaddr, ipv4_bcast_addr, INFINIBAND_ALEN); return (priv); } struct ipoib_dev_priv * ipoib_intf_alloc(const char *name) { struct ipoib_dev_priv *priv; struct sockaddr_dl *sdl; struct ifnet *dev; priv = ipoib_priv_alloc(); dev = priv->dev = if_alloc(IFT_INFINIBAND); if (!dev) { free(priv, M_TEMP); return NULL; } dev->if_softc = priv; if_initname(dev, name, atomic_fetchadd_int(&ipoib_unit, 1)); dev->if_flags = IFF_BROADCAST | IFF_MULTICAST; dev->if_addrlen = INFINIBAND_ALEN; dev->if_hdrlen = IPOIB_HEADER_LEN; if_attach(dev); dev->if_init = ipoib_init; dev->if_ioctl = ipoib_ioctl; dev->if_start = ipoib_start; dev->if_output = ipoib_output; dev->if_input = ipoib_input; dev->if_resolvemulti = ipoib_resolvemulti; if_initbaudrate(dev, IF_Gbps(10)); dev->if_broadcastaddr = priv->broadcastaddr; dev->if_snd.ifq_maxlen = ipoib_sendq_size * 2; sdl = (struct sockaddr_dl *)dev->if_addr->ifa_addr; sdl->sdl_type = IFT_INFINIBAND; sdl->sdl_alen = dev->if_addrlen; priv->dev = dev; if_link_state_change(dev, LINK_STATE_DOWN); bpfattach(dev, DLT_EN10MB, ETHER_HDR_LEN); return dev->if_softc; } int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca) { struct ib_device_attr *device_attr; int result = -ENOMEM; device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL); if (!device_attr) { printk(KERN_WARNING "%s: allocation of %zu bytes failed\n", hca->name, sizeof *device_attr); return result; } result = ib_query_device(hca, device_attr); if (result) { printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n", hca->name, result); kfree(device_attr); return result; } priv->hca_caps = device_attr->device_cap_flags; kfree(device_attr); priv->dev->if_hwassist = 0; priv->dev->if_capabilities = 0; #ifndef CONFIG_INFINIBAND_IPOIB_CM if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { set_bit(IPOIB_FLAG_CSUM, &priv->flags); priv->dev->if_hwassist = CSUM_IP | CSUM_TCP | CSUM_UDP; priv->dev->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; } #if 0 if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO) { priv->dev->if_capabilities |= IFCAP_TSO4; priv->dev->if_hwassist |= CSUM_TSO; } #endif #endif priv->dev->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_LINKSTATE; priv->dev->if_capenable = priv->dev->if_capabilities; return 0; } static struct ifnet * ipoib_add_port(const char *format, struct ib_device *hca, u8 port) { struct ipoib_dev_priv *priv; struct ib_port_attr attr; int result = -ENOMEM; priv = ipoib_intf_alloc(format); if (!priv) goto alloc_mem_failed; if (!ib_query_port(hca, port, &attr)) priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); else { printk(KERN_WARNING "%s: ib_query_port %d failed\n", hca->name, port); goto device_init_failed; } /* MTU will be reset when mcast join happens */ priv->dev->if_mtu = IPOIB_UD_MTU(priv->max_ib_mtu); priv->mcast_mtu = priv->admin_mtu = priv->dev->if_mtu; result = ib_query_pkey(hca, port, 0, &priv->pkey); if (result) { printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", hca->name, port, result); goto device_init_failed; } if (ipoib_set_dev_features(priv, hca)) goto device_init_failed; /* * Set the full membership bit, so that we join the right * broadcast group, etc. */ priv->pkey |= 0x8000; priv->broadcastaddr[8] = priv->pkey >> 8; priv->broadcastaddr[9] = priv->pkey & 0xff; result = ib_query_gid(hca, port, 0, &priv->local_gid); if (result) { printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", hca->name, port, result); goto device_init_failed; } memcpy(IF_LLADDR(priv->dev) + 4, priv->local_gid.raw, sizeof (union ib_gid)); result = ipoib_dev_init(priv, hca, port); if (result < 0) { printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n", hca->name, port, result); goto device_init_failed; } if (ipoib_cm_admin_enabled(priv)) priv->dev->if_mtu = IPOIB_CM_MTU(ipoib_cm_max_mtu(priv)); INIT_IB_EVENT_HANDLER(&priv->event_handler, priv->ca, ipoib_event); result = ib_register_event_handler(&priv->event_handler); if (result < 0) { printk(KERN_WARNING "%s: ib_register_event_handler failed for " "port %d (ret = %d)\n", hca->name, port, result); goto event_failed; } if_printf(priv->dev, "Attached to %s port %d\n", hca->name, port); return priv->dev; event_failed: ipoib_dev_cleanup(priv); device_init_failed: ipoib_detach(priv); alloc_mem_failed: return ERR_PTR(result); } static void ipoib_add_one(struct ib_device *device) { struct list_head *dev_list; struct ifnet *dev; struct ipoib_dev_priv *priv; int s, e, p; if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); if (!dev_list) return; INIT_LIST_HEAD(dev_list); if (device->node_type == RDMA_NODE_IB_SWITCH) { s = 0; e = 0; } else { s = 1; e = device->phys_port_cnt; } for (p = s; p <= e; ++p) { if (rdma_port_get_link_layer(device, p) != IB_LINK_LAYER_INFINIBAND) continue; dev = ipoib_add_port("ib", device, p); if (!IS_ERR(dev)) { priv = dev->if_softc; list_add_tail(&priv->list, dev_list); } } ib_set_client_data(device, &ipoib_client, dev_list); } static void ipoib_remove_one(struct ib_device *device) { struct ipoib_dev_priv *priv, *tmp; struct list_head *dev_list; if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; dev_list = ib_get_client_data(device, &ipoib_client); list_for_each_entry_safe(priv, tmp, dev_list, list) { if (rdma_port_get_link_layer(device, priv->port) != IB_LINK_LAYER_INFINIBAND) continue; ipoib_stop(priv); ib_unregister_event_handler(&priv->event_handler); /* dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); */ flush_workqueue(ipoib_workqueue); ipoib_dev_cleanup(priv); ipoib_detach(priv); } kfree(dev_list); } static void ipoib_config_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) { struct ipoib_dev_priv *parent; struct ipoib_dev_priv *priv; struct ifnet *dev; uint16_t pkey; int error; if (ifp->if_type != IFT_INFINIBAND) return; dev = VLAN_DEVAT(ifp, vtag); if (dev == NULL) return; priv = NULL; error = 0; parent = ifp->if_softc; /* We only support 15 bits of pkey. */ if (vtag & 0x8000) return; pkey = vtag | 0x8000; /* Set full membership bit. */ if (pkey == parent->pkey) return; /* Check for dups */ mutex_lock(&parent->vlan_mutex); list_for_each_entry(priv, &parent->child_intfs, list) { if (priv->pkey == pkey) { priv = NULL; error = EBUSY; goto out; } } priv = ipoib_priv_alloc(); priv->dev = dev; priv->max_ib_mtu = parent->max_ib_mtu; priv->mcast_mtu = priv->admin_mtu = parent->dev->if_mtu; set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); error = ipoib_set_dev_features(priv, parent->ca); if (error) goto out; priv->pkey = pkey; priv->broadcastaddr[8] = pkey >> 8; priv->broadcastaddr[9] = pkey & 0xff; dev->if_broadcastaddr = priv->broadcastaddr; error = ipoib_dev_init(priv, parent->ca, parent->port); if (error) goto out; priv->parent = parent->dev; list_add_tail(&priv->list, &parent->child_intfs); VLAN_SETCOOKIE(dev, priv); dev->if_start = ipoib_vlan_start; dev->if_drv_flags &= ~IFF_DRV_RUNNING; dev->if_hdrlen = IPOIB_HEADER_LEN; if (ifp->if_drv_flags & IFF_DRV_RUNNING) ipoib_open(priv); mutex_unlock(&parent->vlan_mutex); return; out: mutex_unlock(&parent->vlan_mutex); if (priv) free(priv, M_TEMP); if (error) ipoib_warn(parent, "failed to initialize subinterface: device %s, port %d vtag 0x%X", parent->ca->name, parent->port, vtag); return; } static void ipoib_unconfig_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) { struct ipoib_dev_priv *parent; struct ipoib_dev_priv *priv; struct ifnet *dev; uint16_t pkey; if (ifp->if_type != IFT_INFINIBAND) return; dev = VLAN_DEVAT(ifp, vtag); if (dev) VLAN_SETCOOKIE(dev, NULL); pkey = vtag | 0x8000; parent = ifp->if_softc; mutex_lock(&parent->vlan_mutex); list_for_each_entry(priv, &parent->child_intfs, list) { if (priv->pkey == pkey) { ipoib_dev_cleanup(priv); list_del(&priv->list); break; } } mutex_unlock(&parent->vlan_mutex); } eventhandler_tag ipoib_vlan_attach; eventhandler_tag ipoib_vlan_detach; static int __init ipoib_init_module(void) { int ret; ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size); ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE); ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE); ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE, IPOIB_MIN_QUEUE_SIZE)); #ifdef CONFIG_INFINIBAND_IPOIB_CM ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); #endif ipoib_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, ipoib_config_vlan, NULL, EVENTHANDLER_PRI_FIRST); ipoib_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, ipoib_unconfig_vlan, NULL, EVENTHANDLER_PRI_FIRST); /* * We create our own workqueue mainly because we want to be * able to flush it when devices are being removed. We can't * use schedule_work()/flush_scheduled_work() because both * unregister_netdev() and linkwatch_event take the rtnl lock, * so flush_scheduled_work() can deadlock during device * removal. */ ipoib_workqueue = create_singlethread_workqueue("ipoib"); if (!ipoib_workqueue) { ret = -ENOMEM; goto err_fs; } ib_sa_register_client(&ipoib_sa_client); ret = ib_register_client(&ipoib_client); if (ret) goto err_sa; return 0; err_sa: ib_sa_unregister_client(&ipoib_sa_client); destroy_workqueue(ipoib_workqueue); err_fs: return ret; } static void __exit ipoib_cleanup_module(void) { EVENTHANDLER_DEREGISTER(vlan_config, ipoib_vlan_attach); EVENTHANDLER_DEREGISTER(vlan_unconfig, ipoib_vlan_detach); ib_unregister_client(&ipoib_client); ib_sa_unregister_client(&ipoib_sa_client); destroy_workqueue(ipoib_workqueue); } /* * Infiniband output routine. */ static int ipoib_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { u_char edst[INFINIBAND_ALEN]; struct llentry *lle = NULL; struct rtentry *rt0 = NULL; struct ipoib_header *eh; int error = 0; short type; if (ro != NULL) { if (!(m->m_flags & (M_BCAST | M_MCAST))) lle = ro->ro_lle; rt0 = ro->ro_rt; } #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) goto bad; #endif M_PROFILE(m); if (ifp->if_flags & IFF_MONITOR) { error = ENETDOWN; goto bad; } if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) { error = ENETDOWN; goto bad; } switch (dst->sa_family) { #ifdef INET case AF_INET: if (lle != NULL && (lle->la_flags & LLE_VALID)) memcpy(edst, &lle->ll_addr.mac8, sizeof(edst)); else if (m->m_flags & M_MCAST) ip_ib_mc_map(((struct sockaddr_in *)dst)->sin_addr.s_addr, ifp->if_broadcastaddr, edst); else error = arpresolve(ifp, rt0, m, dst, edst, &lle); if (error) return (error == EWOULDBLOCK ? 0 : error); type = htons(ETHERTYPE_IP); break; case AF_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_INFINIBAND); switch(ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: type = htons(ETHERTYPE_REVARP); break; case ARPOP_REQUEST: case ARPOP_REPLY: default: type = htons(ETHERTYPE_ARP); break; } if (m->m_flags & M_BCAST) bcopy(ifp->if_broadcastaddr, edst, INFINIBAND_ALEN); else bcopy(ar_tha(ah), edst, INFINIBAND_ALEN); } break; #endif #ifdef INET6 case AF_INET6: if (lle != NULL && (lle->la_flags & LLE_VALID)) memcpy(edst, &lle->ll_addr.mac8, sizeof(edst)); else if (m->m_flags & M_MCAST) ipv6_ib_mc_map(&((struct sockaddr_in6 *)dst)->sin6_addr, ifp->if_broadcastaddr, edst); else error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle); if (error) return error; type = htons(ETHERTYPE_IPV6); break; #endif default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); error = EAFNOSUPPORT; goto bad; } /* * Add local net header. If no space in first mbuf, * allocate another. */ M_PREPEND(m, IPOIB_HEADER_LEN, M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto bad; } eh = mtod(m, struct ipoib_header *); (void)memcpy(&eh->proto, &type, sizeof(eh->proto)); (void)memcpy(&eh->hwaddr, edst, sizeof (edst)); /* * Queue message on interface, update output statistics if * successful, and start output if interface not yet active. */ return ((ifp->if_transmit)(ifp, m)); bad: if (m != NULL) m_freem(m); return (error); } /* * Upper layer processing for a received Infiniband packet. */ void ipoib_demux(struct ifnet *ifp, struct mbuf *m, u_short proto) { int isr; #ifdef MAC /* * Tag the mbuf with an appropriate MAC label before any other * consumers can get to it. */ mac_ifnet_create_mbuf(ifp, m); #endif /* Allow monitor mode to claim this frame, after stats are updated. */ if (ifp->if_flags & IFF_MONITOR) { if_printf(ifp, "discard frame at IFF_MONITOR\n"); m_freem(m); return; } /* * Dispatch frame to upper layer. */ switch (proto) { #ifdef INET case ETHERTYPE_IP: isr = NETISR_IP; break; case ETHERTYPE_ARP: if (ifp->if_flags & IFF_NOARP) { /* Discard packet if ARP is disabled on interface */ m_freem(m); return; } isr = NETISR_ARP; break; #endif #ifdef INET6 case ETHERTYPE_IPV6: isr = NETISR_IPV6; break; #endif default: goto discard; } netisr_dispatch(isr, m); return; discard: m_freem(m); } /* * Process a received Infiniband packet. */ static void ipoib_input(struct ifnet *ifp, struct mbuf *m) { struct ipoib_header *eh; if ((ifp->if_flags & IFF_UP) == 0) { m_freem(m); return; } CURVNET_SET_QUIET(ifp->if_vnet); /* Let BPF have it before we strip the header. */ IPOIB_MTAP(ifp, m); eh = mtod(m, struct ipoib_header *); /* * Reset layer specific mbuf flags to avoid confusing upper layers. * Strip off Infiniband header. */ m->m_flags &= ~M_VLANTAG; m_clrprotoflags(m); m_adj(m, IPOIB_HEADER_LEN); if (IPOIB_IS_MULTICAST(eh->hwaddr)) { if (memcmp(eh->hwaddr, ifp->if_broadcastaddr, ifp->if_addrlen) == 0) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; ifp->if_imcasts++; } ipoib_demux(ifp, m, ntohs(eh->proto)); CURVNET_RESTORE(); } static int ipoib_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, struct sockaddr *sa) { struct sockaddr_dl *sdl; #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif u_char *e_addr; switch(sa->sa_family) { case AF_LINK: /* * No mapping needed. Just check that it's a valid MC address. */ sdl = (struct sockaddr_dl *)sa; e_addr = LLADDR(sdl); if (!IPOIB_IS_MULTICAST(e_addr)) return EADDRNOTAVAIL; *llsa = 0; return 0; #ifdef INET case AF_INET: sin = (struct sockaddr_in *)sa; if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) return EADDRNOTAVAIL; sdl = malloc(sizeof *sdl, M_IFMADDR, M_NOWAIT|M_ZERO); if (sdl == NULL) return ENOMEM; sdl->sdl_len = sizeof *sdl; sdl->sdl_family = AF_LINK; sdl->sdl_index = ifp->if_index; sdl->sdl_type = IFT_INFINIBAND; sdl->sdl_alen = INFINIBAND_ALEN; e_addr = LLADDR(sdl); ip_ib_mc_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr, e_addr); *llsa = (struct sockaddr *)sdl; return 0; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)sa; /* * An IP6 address of 0 means listen to all * of the multicast address used for IP6. * This has no meaning in ipoib. */ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) return EADDRNOTAVAIL; if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) return EADDRNOTAVAIL; sdl = malloc(sizeof *sdl, M_IFMADDR, M_NOWAIT|M_ZERO); if (sdl == NULL) return (ENOMEM); sdl->sdl_len = sizeof *sdl; sdl->sdl_family = AF_LINK; sdl->sdl_index = ifp->if_index; sdl->sdl_type = IFT_INFINIBAND; sdl->sdl_alen = INFINIBAND_ALEN; e_addr = LLADDR(sdl); ipv6_ib_mc_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr); *llsa = (struct sockaddr *)sdl; return 0; #endif default: return EAFNOSUPPORT; } } module_init(ipoib_init_module); module_exit(ipoib_cleanup_module); #undef MODULE_VERSION #include static int ipoib_evhand(module_t mod, int event, void *arg) { return (0); } static moduledata_t ipoib_mod = { .name = "ipoib", .evhand = ipoib_evhand, }; DECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_SMP, SI_ORDER_ANY); MODULE_DEPEND(ipoib, ibcore, 1, 1, 1); Index: stable/10/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h =================================================================== --- stable/10/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h (revision 271126) +++ stable/10/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h (revision 271127) @@ -1,721 +1,722 @@ #ifndef _SDP_H_ #define _SDP_H_ #include "opt_ddb.h" #include "opt_inet.h" #include "opt_ofed.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SDP_DEBUG #define CONFIG_INFINIBAND_SDP_DEBUG #endif #include "sdp_dbg.h" #undef LIST_HEAD /* From sys/queue.h */ #define LIST_HEAD(name, type) \ struct name { \ struct type *lh_first; /* first element */ \ } /* Interval between sucessive polls in the Tx routine when polling is used instead of interrupts (in per-core Tx rings) - should be power of 2 */ #define SDP_TX_POLL_MODER 16 #define SDP_TX_POLL_TIMEOUT (HZ / 20) #define SDP_NAGLE_TIMEOUT (HZ / 10) #define SDP_SRCAVAIL_CANCEL_TIMEOUT (HZ * 5) #define SDP_SRCAVAIL_ADV_TIMEOUT (1 * HZ) #define SDP_SRCAVAIL_PAYLOAD_LEN 1 #define SDP_RESOLVE_TIMEOUT 1000 #define SDP_ROUTE_TIMEOUT 1000 #define SDP_RETRY_COUNT 5 #define SDP_KEEPALIVE_TIME (120 * 60 * HZ) #define SDP_FIN_WAIT_TIMEOUT (60 * HZ) /* like TCP_FIN_TIMEOUT */ #define SDP_TX_SIZE 0x40 #define SDP_RX_SIZE 0x40 #define SDP_FMR_SIZE (MIN(0x1000, PAGE_SIZE) / sizeof(u64)) #define SDP_FMR_POOL_SIZE 1024 #define SDP_FMR_DIRTY_SIZE ( SDP_FMR_POOL_SIZE / 4 ) #define SDP_MAX_RDMA_READ_LEN (PAGE_SIZE * (SDP_FMR_SIZE - 2)) /* mb inlined data len - rest will be rx'ed into frags */ #define SDP_HEAD_SIZE (sizeof(struct sdp_bsdh)) /* limit tx payload len, if the sink supports bigger buffers than the source * can handle. * or rx fragment size (limited by sge->length size) */ #define SDP_MAX_PACKET (1 << 16) #define SDP_MAX_PAYLOAD (SDP_MAX_PACKET - SDP_HEAD_SIZE) #define SDP_MAX_RECV_SGES (SDP_MAX_PACKET / MCLBYTES) #define SDP_MAX_SEND_SGES (SDP_MAX_PACKET / MCLBYTES) + 2 #define SDP_NUM_WC 4 #define SDP_DEF_ZCOPY_THRESH 64*1024 #define SDP_MIN_ZCOPY_THRESH PAGE_SIZE #define SDP_MAX_ZCOPY_THRESH 1048576 #define SDP_OP_RECV 0x800000000LL #define SDP_OP_SEND 0x400000000LL #define SDP_OP_RDMA 0x200000000LL #define SDP_OP_NOP 0x100000000LL /* how long (in jiffies) to block sender till tx completion*/ #define SDP_BZCOPY_POLL_TIMEOUT (HZ / 10) #define SDP_AUTO_CONF 0xffff #define AUTO_MOD_DELAY (HZ / 4) struct sdp_mb_cb { __u32 seq; /* Starting sequence number */ struct bzcopy_state *bz; struct rx_srcavail_state *rx_sa; struct tx_srcavail_state *tx_sa; }; #define M_PUSH M_PROTO1 /* Do a 'push'. */ #define M_URG M_PROTO2 /* Mark as urgent (oob). */ #define SDP_SKB_CB(__mb) ((struct sdp_mb_cb *)&((__mb)->cb[0])) #define BZCOPY_STATE(mb) (SDP_SKB_CB(mb)->bz) #define RX_SRCAVAIL_STATE(mb) (SDP_SKB_CB(mb)->rx_sa) #define TX_SRCAVAIL_STATE(mb) (SDP_SKB_CB(mb)->tx_sa) #ifndef MIN #define MIN(a, b) (a < b ? a : b) #endif #define ring_head(ring) (atomic_read(&(ring).head)) #define ring_tail(ring) (atomic_read(&(ring).tail)) #define ring_posted(ring) (ring_head(ring) - ring_tail(ring)) #define rx_ring_posted(ssk) ring_posted(ssk->rx_ring) #ifdef SDP_ZCOPY #define tx_ring_posted(ssk) (ring_posted(ssk->tx_ring) + \ (ssk->tx_ring.rdma_inflight ? ssk->tx_ring.rdma_inflight->busy : 0)) #else #define tx_ring_posted(ssk) ring_posted(ssk->tx_ring) #endif extern int sdp_zcopy_thresh; extern int rcvbuf_initial_size; extern struct workqueue_struct *rx_comp_wq; extern struct ib_client sdp_client; enum sdp_mid { SDP_MID_HELLO = 0x0, SDP_MID_HELLO_ACK = 0x1, SDP_MID_DISCONN = 0x2, SDP_MID_ABORT = 0x3, SDP_MID_SENDSM = 0x4, SDP_MID_RDMARDCOMPL = 0x6, SDP_MID_SRCAVAIL_CANCEL = 0x8, SDP_MID_CHRCVBUF = 0xB, SDP_MID_CHRCVBUF_ACK = 0xC, SDP_MID_SINKAVAIL = 0xFD, SDP_MID_SRCAVAIL = 0xFE, SDP_MID_DATA = 0xFF, }; enum sdp_flags { SDP_OOB_PRES = 1 << 0, SDP_OOB_PEND = 1 << 1, }; enum { SDP_MIN_TX_CREDITS = 2 }; enum { SDP_ERR_ERROR = -4, SDP_ERR_FAULT = -3, SDP_NEW_SEG = -2, SDP_DO_WAIT_MEM = -1 }; struct sdp_bsdh { u8 mid; u8 flags; __u16 bufs; __u32 len; __u32 mseq; __u32 mseq_ack; } __attribute__((__packed__)); union cma_ip_addr { struct in6_addr ip6; struct { __u32 pad[3]; __u32 addr; } ip4; } __attribute__((__packed__)); /* TODO: too much? Can I avoid having the src/dst and port here? */ struct sdp_hh { struct sdp_bsdh bsdh; u8 majv_minv; u8 ipv_cap; u8 rsvd1; u8 max_adverts; __u32 desremrcvsz; __u32 localrcvsz; __u16 port; __u16 rsvd2; union cma_ip_addr src_addr; union cma_ip_addr dst_addr; u8 rsvd3[IB_CM_REQ_PRIVATE_DATA_SIZE - sizeof(struct sdp_bsdh) - 48]; } __attribute__((__packed__)); struct sdp_hah { struct sdp_bsdh bsdh; u8 majv_minv; u8 ipv_cap; u8 rsvd1; u8 ext_max_adverts; __u32 actrcvsz; u8 rsvd2[IB_CM_REP_PRIVATE_DATA_SIZE - sizeof(struct sdp_bsdh) - 8]; } __attribute__((__packed__)); struct sdp_rrch { __u32 len; } __attribute__((__packed__)); struct sdp_srcah { __u32 len; __u32 rkey; __u64 vaddr; } __attribute__((__packed__)); struct sdp_buf { struct mbuf *mb; u64 mapping[SDP_MAX_SEND_SGES]; } __attribute__((__packed__)); struct sdp_chrecvbuf { u32 size; } __attribute__((__packed__)); /* Context used for synchronous zero copy bcopy (BZCOPY) */ struct bzcopy_state { unsigned char __user *u_base; int u_len; int left; int page_cnt; int cur_page; int cur_offset; int busy; struct sdp_sock *ssk; struct page **pages; }; enum rx_sa_flag { RX_SA_ABORTED = 2, }; enum tx_sa_flag { TX_SA_SENDSM = 0x01, TX_SA_CROSS_SEND = 0x02, TX_SA_INTRRUPTED = 0x04, TX_SA_TIMEDOUT = 0x08, TX_SA_ERROR = 0x10, }; struct rx_srcavail_state { /* Advertised buffer stuff */ u32 mseq; u32 used; u32 reported; u32 len; u32 rkey; u64 vaddr; /* Dest buff info */ struct ib_umem *umem; struct ib_pool_fmr *fmr; /* Utility */ u8 busy; enum rx_sa_flag flags; }; struct tx_srcavail_state { /* Data below 'busy' will be reset */ u8 busy; struct ib_umem *umem; struct ib_pool_fmr *fmr; u32 bytes_sent; u32 bytes_acked; enum tx_sa_flag abort_flags; u8 posted; u32 mseq; }; struct sdp_tx_ring { #ifdef SDP_ZCOPY struct rx_srcavail_state *rdma_inflight; #endif struct sdp_buf *buffer; atomic_t head; atomic_t tail; struct ib_cq *cq; atomic_t credits; #define tx_credits(ssk) (atomic_read(&ssk->tx_ring.credits)) struct callout timer; u16 poll_cnt; }; struct sdp_rx_ring { struct sdp_buf *buffer; atomic_t head; atomic_t tail; struct ib_cq *cq; int destroyed; struct rwlock destroyed_lock; }; struct sdp_device { struct ib_pd *pd; struct ib_mr *mr; struct ib_fmr_pool *fmr_pool; }; struct sdp_moderation { unsigned long last_moder_packets; unsigned long last_moder_tx_packets; unsigned long last_moder_bytes; unsigned long last_moder_jiffies; int last_moder_time; u16 rx_usecs; u16 rx_frames; u16 tx_usecs; u32 pkt_rate_low; u16 rx_usecs_low; u32 pkt_rate_high; u16 rx_usecs_high; u16 sample_interval; u16 adaptive_rx_coal; u32 msg_enable; int moder_cnt; int moder_time; }; /* These are flags fields. */ #define SDP_TIMEWAIT 0x0001 /* In ssk timewait state. */ #define SDP_DROPPED 0x0002 /* Socket has been dropped. */ #define SDP_SOCKREF 0x0004 /* Holding a sockref for close. */ #define SDP_NODELAY 0x0008 /* Disble nagle. */ #define SDP_NEEDFIN 0x0010 /* Send a fin on the next tx. */ #define SDP_DREQWAIT 0x0020 /* Waiting on DREQ. */ #define SDP_DESTROY 0x0040 /* Being destroyed. */ #define SDP_DISCON 0x0080 /* rdma_disconnect is owed. */ /* These are oobflags */ #define SDP_HADOOB 0x0001 /* Had OOB data. */ #define SDP_HAVEOOB 0x0002 /* Have OOB data. */ struct sdp_sock { LIST_ENTRY(sdp_sock) list; struct socket *socket; struct rdma_cm_id *id; struct ib_device *ib_device; struct sdp_device *sdp_dev; struct ib_qp *qp; struct ucred *cred; struct callout keep2msl; /* 2msl and keepalive timer. */ struct callout nagle_timer; /* timeout waiting for ack */ struct ib_ucontext context; in_port_t lport; in_addr_t laddr; in_port_t fport; in_addr_t faddr; int flags; int oobflags; /* protected by rx lock. */ int state; int softerror; int recv_bytes; /* Bytes per recv. buf including header */ int xmit_size_goal; char iobc; struct sdp_rx_ring rx_ring; struct sdp_tx_ring tx_ring; struct rwlock lock; struct mbuf *rx_ctl_q; struct mbuf *rx_ctl_tail; int qp_active; /* XXX Flag. */ int max_sge; struct work_struct rx_comp_work; #define rcv_nxt(ssk) atomic_read(&(ssk->rcv_nxt)) atomic_t rcv_nxt; /* SDP specific */ atomic_t mseq_ack; #define mseq_ack(ssk) (atomic_read(&ssk->mseq_ack)) unsigned max_bufs; /* Initial buffers offered by other side */ unsigned min_bufs; /* Low water mark to wake senders */ unsigned long nagle_last_unacked; /* mseq of lastest unacked packet */ atomic_t remote_credits; #define remote_credits(ssk) (atomic_read(&ssk->remote_credits)) int poll_cq; /* SDP slow start */ int recv_request_head; /* mark the rx_head when the resize request was recieved */ int recv_request; /* XXX flag if request to resize was recieved */ unsigned long tx_packets; unsigned long rx_packets; unsigned long tx_bytes; unsigned long rx_bytes; struct sdp_moderation auto_mod; struct task shutdown_task; #ifdef SDP_ZCOPY struct tx_srcavail_state *tx_sa; struct rx_srcavail_state *rx_sa; spinlock_t tx_sa_lock; struct delayed_work srcavail_cancel_work; int srcavail_cancel_mseq; /* ZCOPY data: -1:use global; 0:disable zcopy; >0: zcopy threshold */ int zcopy_thresh; #endif }; #define sdp_sk(so) ((struct sdp_sock *)(so->so_pcb)) #define SDP_RLOCK(ssk) rw_rlock(&(ssk)->lock) #define SDP_WLOCK(ssk) rw_wlock(&(ssk)->lock) #define SDP_RUNLOCK(ssk) rw_runlock(&(ssk)->lock) #define SDP_WUNLOCK(ssk) rw_wunlock(&(ssk)->lock) #define SDP_WLOCK_ASSERT(ssk) rw_assert(&(ssk)->lock, RA_WLOCKED) #define SDP_RLOCK_ASSERT(ssk) rw_assert(&(ssk)->lock, RA_RLOCKED) #define SDP_LOCK_ASSERT(ssk) rw_assert(&(ssk)->lock, RA_LOCKED) static inline void tx_sa_reset(struct tx_srcavail_state *tx_sa) { memset((void *)&tx_sa->busy, 0, sizeof(*tx_sa) - offsetof(typeof(*tx_sa), busy)); } static inline void rx_ring_unlock(struct sdp_rx_ring *rx_ring) { rw_runlock(&rx_ring->destroyed_lock); } static inline int rx_ring_trylock(struct sdp_rx_ring *rx_ring) { rw_rlock(&rx_ring->destroyed_lock); if (rx_ring->destroyed) { rx_ring_unlock(rx_ring); return 0; } return 1; } static inline void rx_ring_destroy_lock(struct sdp_rx_ring *rx_ring) { rw_wlock(&rx_ring->destroyed_lock); rx_ring->destroyed = 1; rw_wunlock(&rx_ring->destroyed_lock); } static inline void sdp_arm_rx_cq(struct sdp_sock *ssk) { sdp_prf(ssk->socket, NULL, "Arming RX cq"); sdp_dbg_data(ssk->socket, "Arming RX cq\n"); ib_req_notify_cq(ssk->rx_ring.cq, IB_CQ_NEXT_COMP); } static inline void sdp_arm_tx_cq(struct sdp_sock *ssk) { sdp_prf(ssk->socket, NULL, "Arming TX cq"); sdp_dbg_data(ssk->socket, "Arming TX cq. credits: %d, posted: %d\n", tx_credits(ssk), tx_ring_posted(ssk)); ib_req_notify_cq(ssk->tx_ring.cq, IB_CQ_NEXT_COMP); } /* return the min of: * - tx credits * - free slots in tx_ring (not including SDP_MIN_TX_CREDITS */ static inline int tx_slots_free(struct sdp_sock *ssk) { int min_free; min_free = MIN(tx_credits(ssk), SDP_TX_SIZE - tx_ring_posted(ssk)); if (min_free < SDP_MIN_TX_CREDITS) return 0; return min_free - SDP_MIN_TX_CREDITS; }; /* utilities */ static inline char *mid2str(int mid) { #define ENUM2STR(e) [e] = #e static char *mid2str[] = { ENUM2STR(SDP_MID_HELLO), ENUM2STR(SDP_MID_HELLO_ACK), ENUM2STR(SDP_MID_ABORT), ENUM2STR(SDP_MID_DISCONN), ENUM2STR(SDP_MID_SENDSM), ENUM2STR(SDP_MID_RDMARDCOMPL), ENUM2STR(SDP_MID_SRCAVAIL_CANCEL), ENUM2STR(SDP_MID_CHRCVBUF), ENUM2STR(SDP_MID_CHRCVBUF_ACK), ENUM2STR(SDP_MID_DATA), ENUM2STR(SDP_MID_SRCAVAIL), ENUM2STR(SDP_MID_SINKAVAIL), }; if (mid >= ARRAY_SIZE(mid2str)) return NULL; return mid2str[mid]; } static inline struct mbuf * sdp_alloc_mb(struct socket *sk, u8 mid, int size, int wait) { struct sdp_bsdh *h; struct mbuf *mb; MGETHDR(mb, wait, MT_DATA); if (mb == NULL) return (NULL); mb->m_pkthdr.len = mb->m_len = sizeof(struct sdp_bsdh); h = mtod(mb, struct sdp_bsdh *); h->mid = mid; return mb; } static inline struct mbuf * sdp_alloc_mb_data(struct socket *sk, int wait) { return sdp_alloc_mb(sk, SDP_MID_DATA, 0, wait); } static inline struct mbuf * sdp_alloc_mb_disconnect(struct socket *sk, int wait) { return sdp_alloc_mb(sk, SDP_MID_DISCONN, 0, wait); } static inline void * mb_put(struct mbuf *mb, int len) { uint8_t *data; data = mb->m_data; data += mb->m_len; mb->m_len += len; return (void *)data; } static inline struct mbuf * sdp_alloc_mb_chrcvbuf_ack(struct socket *sk, int size, int wait) { struct mbuf *mb; struct sdp_chrecvbuf *resp_size; mb = sdp_alloc_mb(sk, SDP_MID_CHRCVBUF_ACK, sizeof(*resp_size), wait); if (mb == NULL) return (NULL); resp_size = (struct sdp_chrecvbuf *)mb_put(mb, sizeof *resp_size); resp_size->size = htonl(size); return mb; } static inline struct mbuf * sdp_alloc_mb_srcavail(struct socket *sk, u32 len, u32 rkey, u64 vaddr, int wait) { struct mbuf *mb; struct sdp_srcah *srcah; mb = sdp_alloc_mb(sk, SDP_MID_SRCAVAIL, sizeof(*srcah), wait); if (mb == NULL) return (NULL); srcah = (struct sdp_srcah *)mb_put(mb, sizeof(*srcah)); srcah->len = htonl(len); srcah->rkey = htonl(rkey); srcah->vaddr = cpu_to_be64(vaddr); return mb; } static inline struct mbuf * sdp_alloc_mb_srcavail_cancel(struct socket *sk, int wait) { return sdp_alloc_mb(sk, SDP_MID_SRCAVAIL_CANCEL, 0, wait); } static inline struct mbuf * sdp_alloc_mb_rdmardcompl(struct socket *sk, u32 len, int wait) { struct mbuf *mb; struct sdp_rrch *rrch; mb = sdp_alloc_mb(sk, SDP_MID_RDMARDCOMPL, sizeof(*rrch), wait); if (mb == NULL) return (NULL); rrch = (struct sdp_rrch *)mb_put(mb, sizeof(*rrch)); rrch->len = htonl(len); return mb; } static inline struct mbuf * sdp_alloc_mb_sendsm(struct socket *sk, int wait) { return sdp_alloc_mb(sk, SDP_MID_SENDSM, 0, wait); } static inline int sdp_tx_ring_slots_left(struct sdp_sock *ssk) { return SDP_TX_SIZE - tx_ring_posted(ssk); } static inline int credit_update_needed(struct sdp_sock *ssk) { int c; c = remote_credits(ssk); if (likely(c > SDP_MIN_TX_CREDITS)) c += c/2; return unlikely(c < rx_ring_posted(ssk)) && likely(tx_credits(ssk) > 0) && likely(sdp_tx_ring_slots_left(ssk)); } #define SDPSTATS_COUNTER_INC(stat) #define SDPSTATS_COUNTER_ADD(stat, val) #define SDPSTATS_COUNTER_MID_INC(stat, mid) #define SDPSTATS_HIST_LINEAR(stat, size) #define SDPSTATS_HIST(stat, size) static inline void sdp_cleanup_sdp_buf(struct sdp_sock *ssk, struct sdp_buf *sbuf, enum dma_data_direction dir) { struct ib_device *dev; struct mbuf *mb; int i; dev = ssk->ib_device; for (i = 0, mb = sbuf->mb; mb != NULL; mb = mb->m_next, i++) ib_dma_unmap_single(dev, sbuf->mapping[i], mb->m_len, dir); } /* sdp_main.c */ void sdp_set_default_moderation(struct sdp_sock *ssk); void sdp_start_keepalive_timer(struct socket *sk); void sdp_urg(struct sdp_sock *ssk, struct mbuf *mb); void sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk); void sdp_abort(struct socket *sk); struct sdp_sock *sdp_notify(struct sdp_sock *ssk, int error); /* sdp_cma.c */ int sdp_cma_handler(struct rdma_cm_id *, struct rdma_cm_event *); /* sdp_tx.c */ int sdp_tx_ring_create(struct sdp_sock *ssk, struct ib_device *device); void sdp_tx_ring_destroy(struct sdp_sock *ssk); int sdp_xmit_poll(struct sdp_sock *ssk, int force); void sdp_post_send(struct sdp_sock *ssk, struct mbuf *mb); void sdp_post_sends(struct sdp_sock *ssk, int wait); void sdp_post_keepalive(struct sdp_sock *ssk); /* sdp_rx.c */ void sdp_rx_ring_init(struct sdp_sock *ssk); int sdp_rx_ring_create(struct sdp_sock *ssk, struct ib_device *device); void sdp_rx_ring_destroy(struct sdp_sock *ssk); int sdp_resize_buffers(struct sdp_sock *ssk, u32 new_size); int sdp_init_buffers(struct sdp_sock *ssk, u32 new_size); void sdp_do_posts(struct sdp_sock *ssk); void sdp_rx_comp_full(struct sdp_sock *ssk); /* sdp_zcopy.c */ +struct kiocb; int sdp_sendmsg_zcopy(struct kiocb *iocb, struct socket *sk, struct iovec *iov); int sdp_handle_srcavail(struct sdp_sock *ssk, struct sdp_srcah *srcah); void sdp_handle_sendsm(struct sdp_sock *ssk, u32 mseq_ack); void sdp_handle_rdma_read_compl(struct sdp_sock *ssk, u32 mseq_ack, u32 bytes_completed); int sdp_handle_rdma_read_cqe(struct sdp_sock *ssk); int sdp_rdma_to_iovec(struct socket *sk, struct iovec *iov, struct mbuf *mb, unsigned long *used); int sdp_post_rdma_rd_compl(struct sdp_sock *ssk, struct rx_srcavail_state *rx_sa); int sdp_post_sendsm(struct socket *sk); void srcavail_cancel_timeout(struct work_struct *work); void sdp_abort_srcavail(struct socket *sk); void sdp_abort_rdma_read(struct socket *sk); int sdp_process_rx(struct sdp_sock *ssk); #endif Index: stable/10/sys/ofed/drivers/net/mlx4/alloc.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/alloc.c (revision 271126) +++ stable/10/sys/ofed/drivers/net/mlx4/alloc.c (revision 271127) @@ -1,450 +1,449 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include -//#include /* XXX SK probabaly not needed in freeBSD XXX */ -#include +#include #include #include #include "mlx4.h" u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap) { u32 obj; spin_lock(&bitmap->lock); obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->last); if (obj >= bitmap->max) { bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) & bitmap->mask; obj = find_first_zero_bit(bitmap->table, bitmap->max); } if (obj < bitmap->max) { set_bit(obj, bitmap->table); bitmap->last = (obj + 1); if (bitmap->last == bitmap->max) bitmap->last = 0; obj |= bitmap->top; } else obj = -1; if (obj != -1) --bitmap->avail; spin_unlock(&bitmap->lock); return obj; } void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj) { mlx4_bitmap_free_range(bitmap, obj, 1); } static unsigned long find_aligned_range(unsigned long *bitmap, u32 start, u32 nbits, int len, int align, u32 skip_mask) { unsigned long end, i; again: start = ALIGN(start, align); while ((start < nbits) && (test_bit(start, bitmap) || (start & skip_mask))) start += align; if (start >= nbits) return -1; end = start+len; if (end > nbits) return -1; for (i = start + 1; i < end; i++) { if (test_bit(i, bitmap) || ((u32)i & skip_mask)) { start = i + 1; goto again; } } return start; } u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align, u32 skip_mask) { u32 obj; if (likely(cnt == 1 && align == 1 && !skip_mask)) return mlx4_bitmap_alloc(bitmap); spin_lock(&bitmap->lock); obj = find_aligned_range(bitmap->table, bitmap->last, bitmap->max, cnt, align, skip_mask); if (obj >= bitmap->max) { bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) & bitmap->mask; obj = find_aligned_range(bitmap->table, 0, bitmap->max, cnt, align, skip_mask); } if (obj < bitmap->max) { bitmap_set(bitmap->table, obj, cnt); if (obj == bitmap->last) { bitmap->last = (obj + cnt); if (bitmap->last >= bitmap->max) bitmap->last = 0; } obj |= bitmap->top; } else obj = -1; if (obj != -1) bitmap->avail -= cnt; spin_unlock(&bitmap->lock); return obj; } u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap) { return bitmap->avail; } void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt) { obj &= bitmap->max + bitmap->reserved_top - 1; spin_lock(&bitmap->lock); bitmap_clear(bitmap->table, obj, cnt); bitmap->avail += cnt; spin_unlock(&bitmap->lock); } int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved_bot, u32 reserved_top) { /* sanity check */ if (num <= (u64)reserved_top + reserved_bot) return -EINVAL; /* num must be a power of 2 */ if (num != roundup_pow_of_two(num)) return -EINVAL; if (reserved_bot + reserved_top >= num) return -EINVAL; bitmap->last = 0; bitmap->top = 0; bitmap->max = num - reserved_top; bitmap->mask = mask; bitmap->reserved_top = reserved_top; bitmap->avail = num - reserved_top - reserved_bot; spin_lock_init(&bitmap->lock); bitmap->table = kzalloc(BITS_TO_LONGS(bitmap->max) * sizeof (long), GFP_KERNEL); if (!bitmap->table) return -ENOMEM; bitmap_set(bitmap->table, 0, reserved_bot); return 0; } void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap) { kfree(bitmap->table); } /* * Handling for queue buffers -- we allocate a bunch of memory and * register it in a memory region at HCA virtual address 0. If the * requested size is > max_direct, we split the allocation into * multiple pages, so we don't require too much contiguous memory. */ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, struct mlx4_buf *buf) { dma_addr_t t; if (size <= max_direct) { buf->nbufs = 1; buf->npages = 1; buf->page_shift = get_order(size) + PAGE_SHIFT; buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev, size, &t, GFP_KERNEL); if (!buf->direct.buf) return -ENOMEM; buf->direct.map = t; while (t & ((1 << buf->page_shift) - 1)) { --buf->page_shift; buf->npages *= 2; } memset(buf->direct.buf, 0, size); } else { int i; buf->direct.buf = NULL; buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; buf->npages = buf->nbufs; buf->page_shift = PAGE_SHIFT; buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), GFP_KERNEL); if (!buf->page_list) return -ENOMEM; for (i = 0; i < buf->nbufs; ++i) { buf->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, &t, GFP_KERNEL); if (!buf->page_list[i].buf) goto err_free; buf->page_list[i].map = t; memset(buf->page_list[i].buf, 0, PAGE_SIZE); } if (BITS_PER_LONG == 64) { struct page **pages; pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL); if (!pages) goto err_free; for (i = 0; i < buf->nbufs; ++i) pages[i] = virt_to_page(buf->page_list[i].buf); buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL); kfree(pages); if (!buf->direct.buf) goto err_free; } } return 0; err_free: mlx4_buf_free(dev, size, buf); return -ENOMEM; } EXPORT_SYMBOL_GPL(mlx4_buf_alloc); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) { int i; if (buf->nbufs == 1) dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf, buf->direct.map); else { if (BITS_PER_LONG == 64 && buf->direct.buf) vunmap(buf->direct.buf); for (i = 0; i < buf->nbufs; ++i) if (buf->page_list[i].buf) dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, buf->page_list[i].buf, buf->page_list[i].map); kfree(buf->page_list); } } EXPORT_SYMBOL_GPL(mlx4_buf_free); static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device) { struct mlx4_db_pgdir *pgdir; pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL); if (!pgdir) return NULL; bitmap_fill(pgdir->order1, MLX4_DB_PER_PAGE / 2); pgdir->bits[0] = pgdir->order0; pgdir->bits[1] = pgdir->order1; pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE, &pgdir->db_dma, GFP_KERNEL); if (!pgdir->db_page) { kfree(pgdir); return NULL; } return pgdir; } static int mlx4_alloc_db_from_pgdir(struct mlx4_db_pgdir *pgdir, struct mlx4_db *db, int order) { int o; int i; for (o = order; o <= 1; ++o) { i = find_first_bit(pgdir->bits[o], MLX4_DB_PER_PAGE >> o); if (i < MLX4_DB_PER_PAGE >> o) goto found; } return -ENOMEM; found: clear_bit(i, pgdir->bits[o]); i <<= o; if (o > order) set_bit(i ^ 1, pgdir->bits[order]); db->u.pgdir = pgdir; db->index = i; db->db = pgdir->db_page + db->index; db->dma = pgdir->db_dma + db->index * 4; db->order = order; return 0; } int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_db_pgdir *pgdir; int ret = 0; mutex_lock(&priv->pgdir_mutex); list_for_each_entry(pgdir, &priv->pgdir_list, list) if (!mlx4_alloc_db_from_pgdir(pgdir, db, order)) goto out; pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev)); if (!pgdir) { ret = -ENOMEM; goto out; } list_add(&pgdir->list, &priv->pgdir_list); /* This should never fail -- we just allocated an empty page: */ WARN_ON(mlx4_alloc_db_from_pgdir(pgdir, db, order)); out: mutex_unlock(&priv->pgdir_mutex); return ret; } EXPORT_SYMBOL_GPL(mlx4_db_alloc); void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db) { struct mlx4_priv *priv = mlx4_priv(dev); int o; int i; mutex_lock(&priv->pgdir_mutex); o = db->order; i = db->index; if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) { clear_bit(i ^ 1, db->u.pgdir->order0); ++o; } i >>= o; set_bit(i, db->u.pgdir->bits[o]); if (bitmap_full(db->u.pgdir->order1, MLX4_DB_PER_PAGE / 2)) { dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, db->u.pgdir->db_page, db->u.pgdir->db_dma); list_del(&db->u.pgdir->list); kfree(db->u.pgdir); } mutex_unlock(&priv->pgdir_mutex); } EXPORT_SYMBOL_GPL(mlx4_db_free); int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, int size, int max_direct) { int err; err = mlx4_db_alloc(dev, &wqres->db, 1); if (err) return err; *wqres->db.db = 0; err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf); if (err) goto err_db; err = mlx4_mtt_init(dev, wqres->buf.npages, wqres->buf.page_shift, &wqres->mtt); if (err) goto err_buf; err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf); if (err) goto err_mtt; return 0; err_mtt: mlx4_mtt_cleanup(dev, &wqres->mtt); err_buf: mlx4_buf_free(dev, size, &wqres->buf); err_db: mlx4_db_free(dev, &wqres->db); return err; } EXPORT_SYMBOL_GPL(mlx4_alloc_hwq_res); void mlx4_free_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, int size) { mlx4_mtt_cleanup(dev, &wqres->mtt); mlx4_buf_free(dev, size, &wqres->buf); mlx4_db_free(dev, &wqres->db); } EXPORT_SYMBOL_GPL(mlx4_free_hwq_res); Index: stable/10/sys/ofed/drivers/net/mlx4/cmd.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/cmd.c (revision 271126) +++ stable/10/sys/ofed/drivers/net/mlx4/cmd.c (revision 271127) @@ -1,2164 +1,2164 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include "mlx4.h" #include "fw.h" #define CMD_POLL_TOKEN 0xffff #define INBOX_MASK 0xffffffffffffff00ULL #define CMD_CHAN_VER 1 #define CMD_CHAN_IF_REV 1 enum { /* command completed successfully: */ CMD_STAT_OK = 0x00, /* Internal error (such as a bus error) occurred while processing command: */ CMD_STAT_INTERNAL_ERR = 0x01, /* Operation/command not supported or opcode modifier not supported: */ CMD_STAT_BAD_OP = 0x02, /* Parameter not supported or parameter out of range: */ CMD_STAT_BAD_PARAM = 0x03, /* System not enabled or bad system state: */ CMD_STAT_BAD_SYS_STATE = 0x04, /* Attempt to access reserved or unallocaterd resource: */ CMD_STAT_BAD_RESOURCE = 0x05, /* Requested resource is currently executing a command, or is otherwise busy: */ CMD_STAT_RESOURCE_BUSY = 0x06, /* Required capability exceeds device limits: */ CMD_STAT_EXCEED_LIM = 0x08, /* Resource is not in the appropriate state or ownership: */ CMD_STAT_BAD_RES_STATE = 0x09, /* Index out of range: */ CMD_STAT_BAD_INDEX = 0x0a, /* FW image corrupted: */ CMD_STAT_BAD_NVMEM = 0x0b, /* Error in ICM mapping (e.g. not enough auxiliary ICM pages to execute command): */ CMD_STAT_ICM_ERROR = 0x0c, /* Attempt to modify a QP/EE which is not in the presumed state: */ CMD_STAT_BAD_QP_STATE = 0x10, /* Bad segment parameters (Address/Size): */ CMD_STAT_BAD_SEG_PARAM = 0x20, /* Memory Region has Memory Windows bound to: */ CMD_STAT_REG_BOUND = 0x21, /* HCA local attached memory not present: */ CMD_STAT_LAM_NOT_PRE = 0x22, /* Bad management packet (silently discarded): */ CMD_STAT_BAD_PKT = 0x30, /* More outstanding CQEs in CQ than new CQ size: */ CMD_STAT_BAD_SIZE = 0x40, /* Multi Function device support required: */ CMD_STAT_MULTI_FUNC_REQ = 0x50, }; enum { HCR_IN_PARAM_OFFSET = 0x00, HCR_IN_MODIFIER_OFFSET = 0x08, HCR_OUT_PARAM_OFFSET = 0x0c, HCR_TOKEN_OFFSET = 0x14, HCR_STATUS_OFFSET = 0x18, HCR_OPMOD_SHIFT = 12, HCR_T_BIT = 21, HCR_E_BIT = 22, HCR_GO_BIT = 23 }; enum { GO_BIT_TIMEOUT_MSECS = 10000 }; struct mlx4_cmd_context { struct completion done; int result; int next; u64 out_param; u16 token; u8 fw_status; }; static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, struct mlx4_vhcr_cmd *in_vhcr); static int mlx4_status_to_errno(u8 status) { static const int trans_table[] = { [CMD_STAT_INTERNAL_ERR] = -EIO, [CMD_STAT_BAD_OP] = -EPERM, [CMD_STAT_BAD_PARAM] = -EINVAL, [CMD_STAT_BAD_SYS_STATE] = -ENXIO, [CMD_STAT_BAD_RESOURCE] = -EBADF, [CMD_STAT_RESOURCE_BUSY] = -EBUSY, [CMD_STAT_EXCEED_LIM] = -ENOMEM, [CMD_STAT_BAD_RES_STATE] = -EBADF, [CMD_STAT_BAD_INDEX] = -EBADF, [CMD_STAT_BAD_NVMEM] = -EFAULT, [CMD_STAT_ICM_ERROR] = -ENFILE, [CMD_STAT_BAD_QP_STATE] = -EINVAL, [CMD_STAT_BAD_SEG_PARAM] = -EFAULT, [CMD_STAT_REG_BOUND] = -EBUSY, [CMD_STAT_LAM_NOT_PRE] = -EAGAIN, [CMD_STAT_BAD_PKT] = -EINVAL, [CMD_STAT_BAD_SIZE] = -ENOMEM, [CMD_STAT_MULTI_FUNC_REQ] = -EACCES, }; if (status >= ARRAY_SIZE(trans_table) || (status != CMD_STAT_OK && trans_table[status] == 0)) return -EIO; return trans_table[status]; } static u8 mlx4_errno_to_status(int errno) { switch (errno) { case -EPERM: return CMD_STAT_BAD_OP; case -EINVAL: return CMD_STAT_BAD_PARAM; case -ENXIO: return CMD_STAT_BAD_SYS_STATE; case -EBUSY: return CMD_STAT_RESOURCE_BUSY; case -ENOMEM: return CMD_STAT_EXCEED_LIM; case -ENFILE: return CMD_STAT_ICM_ERROR; default: return CMD_STAT_INTERNAL_ERR; } } static int comm_pending(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); u32 status = readl(&priv->mfunc.comm->slave_read); return (swab32(status) >> 31) != priv->cmd.comm_toggle; } static void mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param) { struct mlx4_priv *priv = mlx4_priv(dev); u32 val; priv->cmd.comm_toggle ^= 1; val = param | (cmd << 16) | (priv->cmd.comm_toggle << 31); __raw_writel((__force u32) cpu_to_be32(val), &priv->mfunc.comm->slave_write); mmiowb(); } static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param, unsigned long timeout) { struct mlx4_priv *priv = mlx4_priv(dev); unsigned long end; int err = 0; int ret_from_pending = 0; /* First, verify that the master reports correct status */ if (comm_pending(dev)) { mlx4_warn(dev, "Communication channel is not idle." "my toggle is %d (cmd:0x%x)\n", priv->cmd.comm_toggle, cmd); return -EAGAIN; } /* Write command */ down(&priv->cmd.poll_sem); mlx4_comm_cmd_post(dev, cmd, param); end = msecs_to_jiffies(timeout) + jiffies; while (comm_pending(dev) && time_before(jiffies, end)) cond_resched(); ret_from_pending = comm_pending(dev); if (ret_from_pending) { /* check if the slave is trying to boot in the middle of * FLR process. The only non-zero result in the RESET command * is MLX4_DELAY_RESET_SLAVE*/ if ((MLX4_COMM_CMD_RESET == cmd)) { mlx4_warn(dev, "Got slave FLRed from Communication" " channel (ret:0x%x)\n", ret_from_pending); err = MLX4_DELAY_RESET_SLAVE; } else { mlx4_warn(dev, "Communication channel timed out\n"); err = -ETIMEDOUT; } } up(&priv->cmd.poll_sem); return err; } static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op, u16 param, unsigned long timeout) { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; struct mlx4_cmd_context *context; unsigned long end; int err = 0; down(&cmd->event_sem); spin_lock(&cmd->context_lock); BUG_ON(cmd->free_head < 0); context = &cmd->context[cmd->free_head]; context->token += cmd->token_mask + 1; cmd->free_head = context->next; spin_unlock(&cmd->context_lock); init_completion(&context->done); mlx4_comm_cmd_post(dev, op, param); if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) { mlx4_warn(dev, "communication channel command 0x%x timed out\n", op); err = -EBUSY; goto out; } err = context->result; if (err && context->fw_status != CMD_STAT_MULTI_FUNC_REQ) { mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", op, context->fw_status); goto out; } out: /* wait for comm channel ready * this is necessary for prevention the race * when switching between event to polling mode */ end = msecs_to_jiffies(timeout) + jiffies; while (comm_pending(dev) && time_before(jiffies, end)) cond_resched(); spin_lock(&cmd->context_lock); context->next = cmd->free_head; cmd->free_head = context - cmd->context; spin_unlock(&cmd->context_lock); up(&cmd->event_sem); return err; } int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, unsigned long timeout) { if (mlx4_priv(dev)->cmd.use_events) return mlx4_comm_cmd_wait(dev, cmd, param, timeout); return mlx4_comm_cmd_poll(dev, cmd, param, timeout); } static int cmd_pending(struct mlx4_dev *dev) { u32 status; if (pci_channel_offline(dev->pdev)) return -EIO; status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET); return (status & swab32(1 << HCR_GO_BIT)) || (mlx4_priv(dev)->cmd.toggle == !!(status & swab32(1 << HCR_T_BIT))); } static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, u32 in_modifier, u8 op_modifier, u16 op, u16 token, int event) { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; u32 __iomem *hcr = cmd->hcr; int ret = -EAGAIN; unsigned long end; mutex_lock(&cmd->hcr_mutex); if (pci_channel_offline(dev->pdev)) { /* * Device is going through error recovery * and cannot accept commands. */ ret = -EIO; goto out; } end = jiffies; if (event) end += msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS); while (cmd_pending(dev)) { if (pci_channel_offline(dev->pdev)) { /* * Device is going through error recovery * and cannot accept commands. */ ret = -EIO; goto out; } if (time_after_eq(jiffies, end)) { mlx4_err(dev, "%s:cmd_pending failed\n", __func__); goto out; } cond_resched(); } /* * We use writel (instead of something like memcpy_toio) * because writes of less than 32 bits to the HCR don't work * (and some architectures such as ia64 implement memcpy_toio * in terms of writeb). */ __raw_writel((__force u32) cpu_to_be32(in_param >> 32), hcr + 0); __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), hcr + 1); __raw_writel((__force u32) cpu_to_be32(in_modifier), hcr + 2); __raw_writel((__force u32) cpu_to_be32(out_param >> 32), hcr + 3); __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), hcr + 4); __raw_writel((__force u32) cpu_to_be32(token << 16), hcr + 5); /* __raw_writel may not order writes. */ wmb(); __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) | (cmd->toggle << HCR_T_BIT) | (event ? (1 << HCR_E_BIT) : 0) | (op_modifier << HCR_OPMOD_SHIFT) | op), hcr + 6); /* * Make sure that our HCR writes don't get mixed in with * writes from another CPU starting a FW command. */ mmiowb(); cmd->toggle = cmd->toggle ^ 1; ret = 0; out: mutex_unlock(&cmd->hcr_mutex); return ret; } static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, int out_is_imm, u32 in_modifier, u8 op_modifier, u16 op, unsigned long timeout) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vhcr_cmd *vhcr = priv->mfunc.vhcr; int ret; mutex_lock(&priv->cmd.slave_cmd_mutex); vhcr->in_param = cpu_to_be64(in_param); vhcr->out_param = out_param ? cpu_to_be64(*out_param) : 0; vhcr->in_modifier = cpu_to_be32(in_modifier); vhcr->opcode = cpu_to_be16((((u16) op_modifier) << 12) | (op & 0xfff)); vhcr->token = cpu_to_be16(CMD_POLL_TOKEN); vhcr->status = 0; vhcr->flags = !!(priv->cmd.use_events) << 6; if (mlx4_is_master(dev)) { ret = mlx4_master_process_vhcr(dev, dev->caps.function, vhcr); if (!ret) { if (out_is_imm) { if (out_param) *out_param = be64_to_cpu(vhcr->out_param); else { mlx4_err(dev, "response expected while" "output mailbox is NULL for " "command 0x%x\n", op); vhcr->status = CMD_STAT_BAD_PARAM; } } ret = mlx4_status_to_errno(vhcr->status); } } else { ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0, MLX4_COMM_TIME + timeout); if (!ret) { if (out_is_imm) { if (out_param) *out_param = be64_to_cpu(vhcr->out_param); else { mlx4_err(dev, "response expected while" "output mailbox is NULL for " "command 0x%x\n", op); vhcr->status = CMD_STAT_BAD_PARAM; } } ret = mlx4_status_to_errno(vhcr->status); } else mlx4_err(dev, "failed execution of VHCR_POST command" "opcode 0x%x\n", op); } mutex_unlock(&priv->cmd.slave_cmd_mutex); return ret; } static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, int out_is_imm, u32 in_modifier, u8 op_modifier, u16 op, unsigned long timeout) { struct mlx4_priv *priv = mlx4_priv(dev); void __iomem *hcr = priv->cmd.hcr; int err = 0; unsigned long end; u32 stat; down(&priv->cmd.poll_sem); if (pci_channel_offline(dev->pdev)) { /* * Device is going through error recovery * and cannot accept commands. */ err = -EIO; goto out; } err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0); if (err) goto out; end = msecs_to_jiffies(timeout) + jiffies; while (cmd_pending(dev) && time_before(jiffies, end)) { if (pci_channel_offline(dev->pdev)) { /* * Device is going through error recovery * and cannot accept commands. */ err = -EIO; goto out; } cond_resched(); } if (cmd_pending(dev)) { mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", op); err = -ETIMEDOUT; goto out; } if (out_is_imm) *out_param = (u64) be32_to_cpu((__force __be32) __raw_readl(hcr + HCR_OUT_PARAM_OFFSET)) << 32 | (u64) be32_to_cpu((__force __be32) __raw_readl(hcr + HCR_OUT_PARAM_OFFSET + 4)); stat = be32_to_cpu((__force __be32) __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24; err = mlx4_status_to_errno(stat); if (err) mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", op, stat); out: up(&priv->cmd.poll_sem); return err; } void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_context *context = &priv->cmd.context[token & priv->cmd.token_mask]; /* previously timed out command completing at long last */ if (token != context->token) return; context->fw_status = status; context->result = mlx4_status_to_errno(status); context->out_param = out_param; complete(&context->done); } static int get_status(struct mlx4_dev *dev, u32 *status, int *go_bit, int *t_bit) { if (pci_channel_offline(dev->pdev)) return -EIO; *status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET); *t_bit = !!(*status & swab32(1 << HCR_T_BIT)); *go_bit = !!(*status & swab32(1 << HCR_GO_BIT)); return 0; } static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, int out_is_imm, u32 in_modifier, u8 op_modifier, u16 op, unsigned long timeout) { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; struct mlx4_cmd_context *context; int err = 0; int go_bit = 0, t_bit = 0, stat_err; u32 status = 0; down(&cmd->event_sem); spin_lock(&cmd->context_lock); BUG_ON(cmd->free_head < 0); context = &cmd->context[cmd->free_head]; context->token += cmd->token_mask + 1; cmd->free_head = context->next; spin_unlock(&cmd->context_lock); init_completion(&context->done); err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, in_modifier, op_modifier, op, context->token, 1); if (err) { mlx4_warn(dev, "command 0x%x could not be posted (%d)\n", op, err); goto out; } if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) { stat_err = get_status(dev, &status, &go_bit, &t_bit); mlx4_warn(dev, "command 0x%x timed out: " "get_status err=%d, status=0x%x, go_bit=%d, " "t_bit=%d, toggle=0x%x\n", op, stat_err, status, go_bit, t_bit, mlx4_priv(dev)->cmd.toggle); err = -EBUSY; goto out; } err = context->result; if (err) { mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", op, context->fw_status); goto out; } if (out_is_imm) *out_param = context->out_param; out: spin_lock(&cmd->context_lock); context->next = cmd->free_head; cmd->free_head = context - cmd->context; spin_unlock(&cmd->context_lock); up(&cmd->event_sem); return err; } int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, int out_is_imm, u32 in_modifier, u8 op_modifier, u16 op, unsigned long timeout, int native) { if (pci_channel_offline(dev->pdev)) return -EIO; if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) { if (mlx4_priv(dev)->cmd.use_events) return mlx4_cmd_wait(dev, in_param, out_param, out_is_imm, in_modifier, op_modifier, op, timeout); else return mlx4_cmd_poll(dev, in_param, out_param, out_is_imm, in_modifier, op_modifier, op, timeout); } return mlx4_slave_cmd(dev, in_param, out_param, out_is_imm, in_modifier, op_modifier, op, timeout); } EXPORT_SYMBOL_GPL(__mlx4_cmd); static int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev) { return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_ARM_COMM_CHANNEL, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); } static int mlx4_ACCESS_MEM(struct mlx4_dev *dev, u64 master_addr, int slave, u64 slave_addr, int size, int is_read) { u64 in_param; u64 out_param; if ((slave_addr & 0xfff) | (master_addr & 0xfff) | (slave & ~0x7f) | (size & 0xff)) { mlx4_err(dev, "Bad access mem params - slave_addr:0x%llx " "master_addr:0x%llx slave_id:%d size:%d\n", - slave_addr, master_addr, slave, size); + (long long)slave_addr, (long long)master_addr, slave, size); return -EINVAL; } if (is_read) { in_param = (u64) slave | slave_addr; out_param = (u64) dev->caps.function | master_addr; } else { in_param = (u64) dev->caps.function | master_addr; out_param = (u64) slave | slave_addr; } return mlx4_cmd_imm(dev, in_param, &out_param, size, 0, MLX4_CMD_ACCESS_MEM, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } static int query_pkey_block(struct mlx4_dev *dev, u8 port, u16 index, u16 *pkey, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox) { struct ib_smp *in_mad = (struct ib_smp *)(inbox->buf); struct ib_smp *out_mad = (struct ib_smp *)(outbox->buf); int err; int i; if (index & 0x1f) return -EINVAL; in_mad->attr_mod = cpu_to_be32(index / 32); err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3, MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (err) return err; for (i = 0; i < 32; ++i) pkey[i] = be16_to_cpu(((__be16 *) out_mad->data)[i]); return err; } static int get_full_pkey_table(struct mlx4_dev *dev, u8 port, u16 *table, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox) { int i; int err; for (i = 0; i < dev->caps.pkey_table_len[port]; i += 32) { err = query_pkey_block(dev, port, i, table + i, inbox, outbox); if (err) return err; } return 0; } #define PORT_CAPABILITY_LOCATION_IN_SMP 20 #define PORT_STATE_OFFSET 32 static enum ib_port_state vf_port_state(struct mlx4_dev *dev, int port, int vf) { if (mlx4_get_slave_port_state(dev, vf, port) == SLAVE_PORT_UP) return IB_PORT_ACTIVE; else return IB_PORT_DOWN; } static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct ib_smp *smp = inbox->buf; u32 index; u8 port; u16 *table; int err; int vidx, pidx; struct mlx4_priv *priv = mlx4_priv(dev); struct ib_smp *outsmp = outbox->buf; __be16 *outtab = (__be16 *)(outsmp->data); __be32 slave_cap_mask; __be64 slave_node_guid; port = vhcr->in_modifier; if (smp->base_version == 1 && smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && smp->class_version == 1) { if (smp->method == IB_MGMT_METHOD_GET) { if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) { index = be32_to_cpu(smp->attr_mod); if (port < 1 || port > dev->caps.num_ports) return -EINVAL; table = kcalloc(dev->caps.pkey_table_len[port], sizeof *table, GFP_KERNEL); if (!table) return -ENOMEM; /* need to get the full pkey table because the paravirtualized * pkeys may be scattered among several pkey blocks. */ err = get_full_pkey_table(dev, port, table, inbox, outbox); if (!err) { for (vidx = index * 32; vidx < (index + 1) * 32; ++vidx) { pidx = priv->virt2phys_pkey[slave][port - 1][vidx]; outtab[vidx % 32] = cpu_to_be16(table[pidx]); } } kfree(table); return err; } if (smp->attr_id == IB_SMP_ATTR_PORT_INFO) { /*get the slave specific caps:*/ /*do the command */ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, vhcr->in_modifier, vhcr->op_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); /* modify the response for slaves */ if (!err && slave != mlx4_master_func_num(dev)) { u8 *state = outsmp->data + PORT_STATE_OFFSET; *state = (*state & 0xf0) | vf_port_state(dev, port, slave); slave_cap_mask = priv->mfunc.master.slave_state[slave].ib_cap_mask[port]; memcpy(outsmp->data + PORT_CAPABILITY_LOCATION_IN_SMP, &slave_cap_mask, 4); } return err; } if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) { /* compute slave's gid block */ smp->attr_mod = cpu_to_be32(slave / 8); /* execute cmd */ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, vhcr->in_modifier, vhcr->op_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (!err) { /* if needed, move slave gid to index 0 */ if (slave % 8) memcpy(outsmp->data, outsmp->data + (slave % 8) * 8, 8); /* delete all other gids */ memset(outsmp->data + 8, 0, 56); } return err; } if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) { err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, vhcr->in_modifier, vhcr->op_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (!err) { slave_node_guid = mlx4_get_slave_node_guid(dev, slave); memcpy(outsmp->data + 12, &slave_node_guid, 8); } return err; } } } if (slave != mlx4_master_func_num(dev) && ((smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) || (smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && smp->method == IB_MGMT_METHOD_SET))) { mlx4_err(dev, "slave %d is trying to execute a Subnet MGMT MAD, " "class 0x%x, method 0x%x for attr 0x%x. Rejecting\n", slave, smp->method, smp->mgmt_class, be16_to_cpu(smp->attr_id)); return -EPERM; } /*default:*/ return mlx4_cmd_box(dev, inbox->dma, outbox->dma, vhcr->in_modifier, vhcr->op_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); } int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { u64 in_param; u64 out_param; int err; in_param = cmd->has_inbox ? (u64) inbox->dma : vhcr->in_param; out_param = cmd->has_outbox ? (u64) outbox->dma : vhcr->out_param; if (cmd->encode_slave_id) { in_param &= 0xffffffffffffff00ll; in_param |= slave; } err = __mlx4_cmd(dev, in_param, &out_param, cmd->out_is_imm, vhcr->in_modifier, vhcr->op_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (cmd->out_is_imm) vhcr->out_param = out_param; return err; } static struct mlx4_cmd_info cmd_info[] = { { .opcode = MLX4_CMD_QUERY_FW, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QUERY_FW_wrapper }, { .opcode = MLX4_CMD_QUERY_HCA, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = NULL }, { .opcode = MLX4_CMD_QUERY_DEV_CAP, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QUERY_DEV_CAP_wrapper }, { .opcode = MLX4_CMD_QUERY_FUNC_CAP, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QUERY_FUNC_CAP_wrapper }, { .opcode = MLX4_CMD_QUERY_ADAPTER, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = NULL }, { .opcode = MLX4_CMD_INIT_PORT, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_INIT_PORT_wrapper }, { .opcode = MLX4_CMD_CLOSE_PORT, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_CLOSE_PORT_wrapper }, { .opcode = MLX4_CMD_QUERY_PORT, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QUERY_PORT_wrapper }, { .opcode = MLX4_CMD_SET_PORT, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_SET_PORT_wrapper }, { .opcode = MLX4_CMD_MAP_EQ, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_MAP_EQ_wrapper }, { .opcode = MLX4_CMD_SW2HW_EQ, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = true, .verify = NULL, .wrapper = mlx4_SW2HW_EQ_wrapper }, { .opcode = MLX4_CMD_HW_HEALTH_CHECK, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = NULL }, { .opcode = MLX4_CMD_NOP, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = NULL }, { .opcode = MLX4_CMD_ALLOC_RES, .has_inbox = false, .has_outbox = false, .out_is_imm = true, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_ALLOC_RES_wrapper }, { .opcode = MLX4_CMD_FREE_RES, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_FREE_RES_wrapper }, { .opcode = MLX4_CMD_SW2HW_MPT, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = true, .verify = NULL, .wrapper = mlx4_SW2HW_MPT_wrapper }, { .opcode = MLX4_CMD_QUERY_MPT, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QUERY_MPT_wrapper }, { .opcode = MLX4_CMD_HW2SW_MPT, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_HW2SW_MPT_wrapper }, { .opcode = MLX4_CMD_READ_MTT, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = NULL }, { .opcode = MLX4_CMD_WRITE_MTT, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_WRITE_MTT_wrapper }, { .opcode = MLX4_CMD_SYNC_TPT, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = NULL }, { .opcode = MLX4_CMD_HW2SW_EQ, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = true, .verify = NULL, .wrapper = mlx4_HW2SW_EQ_wrapper }, { .opcode = MLX4_CMD_QUERY_EQ, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = true, .verify = NULL, .wrapper = mlx4_QUERY_EQ_wrapper }, { .opcode = MLX4_CMD_SW2HW_CQ, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = true, .verify = NULL, .wrapper = mlx4_SW2HW_CQ_wrapper }, { .opcode = MLX4_CMD_HW2SW_CQ, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_HW2SW_CQ_wrapper }, { .opcode = MLX4_CMD_QUERY_CQ, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QUERY_CQ_wrapper }, { .opcode = MLX4_CMD_MODIFY_CQ, .has_inbox = true, .has_outbox = false, .out_is_imm = true, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_MODIFY_CQ_wrapper }, { .opcode = MLX4_CMD_SW2HW_SRQ, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = true, .verify = NULL, .wrapper = mlx4_SW2HW_SRQ_wrapper }, { .opcode = MLX4_CMD_HW2SW_SRQ, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_HW2SW_SRQ_wrapper }, { .opcode = MLX4_CMD_QUERY_SRQ, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QUERY_SRQ_wrapper }, { .opcode = MLX4_CMD_ARM_SRQ, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_ARM_SRQ_wrapper }, { .opcode = MLX4_CMD_RST2INIT_QP, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = true, .verify = NULL, .wrapper = mlx4_RST2INIT_QP_wrapper }, { .opcode = MLX4_CMD_INIT2INIT_QP, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_INIT2INIT_QP_wrapper }, { .opcode = MLX4_CMD_INIT2RTR_QP, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_INIT2RTR_QP_wrapper }, { .opcode = MLX4_CMD_RTR2RTS_QP, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_RTR2RTS_QP_wrapper }, { .opcode = MLX4_CMD_RTS2RTS_QP, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_RTS2RTS_QP_wrapper }, { .opcode = MLX4_CMD_SQERR2RTS_QP, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_SQERR2RTS_QP_wrapper }, { .opcode = MLX4_CMD_2ERR_QP, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_GEN_QP_wrapper }, { .opcode = MLX4_CMD_RTS2SQD_QP, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_GEN_QP_wrapper }, { .opcode = MLX4_CMD_SQD2SQD_QP, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_SQD2SQD_QP_wrapper }, { .opcode = MLX4_CMD_SQD2RTS_QP, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_SQD2RTS_QP_wrapper }, { .opcode = MLX4_CMD_2RST_QP, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_2RST_QP_wrapper }, { .opcode = MLX4_CMD_QUERY_QP, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_GEN_QP_wrapper }, { .opcode = MLX4_CMD_SUSPEND_QP, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_GEN_QP_wrapper }, { .opcode = MLX4_CMD_UNSUSPEND_QP, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_GEN_QP_wrapper }, { .opcode = MLX4_CMD_CONF_SPECIAL_QP, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, /* XXX verify: only demux can do this */ .wrapper = NULL }, { .opcode = MLX4_CMD_MAD_IFC, .has_inbox = true, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_MAD_IFC_wrapper }, { .opcode = MLX4_CMD_QUERY_IF_STAT, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QUERY_IF_STAT_wrapper }, /* Native multicast commands are not available for guests */ { .opcode = MLX4_CMD_QP_ATTACH, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QP_ATTACH_wrapper }, { .opcode = MLX4_CMD_PROMISC, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_PROMISC_wrapper }, /* Ethernet specific commands */ { .opcode = MLX4_CMD_SET_VLAN_FLTR, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_SET_VLAN_FLTR_wrapper }, { .opcode = MLX4_CMD_SET_MCAST_FLTR, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_SET_MCAST_FLTR_wrapper }, { .opcode = MLX4_CMD_DUMP_ETH_STATS, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_DUMP_ETH_STATS_wrapper }, { .opcode = MLX4_CMD_INFORM_FLR_DONE, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = NULL }, /* flow steering commands */ { .opcode = MLX4_QP_FLOW_STEERING_ATTACH, .has_inbox = true, .has_outbox = false, .out_is_imm = true, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QP_FLOW_STEERING_ATTACH_wrapper }, { .opcode = MLX4_QP_FLOW_STEERING_DETACH, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QP_FLOW_STEERING_DETACH_wrapper }, }; static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, struct mlx4_vhcr_cmd *in_vhcr) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_info *cmd = NULL; struct mlx4_vhcr_cmd *vhcr_cmd = in_vhcr ? in_vhcr : priv->mfunc.vhcr; struct mlx4_vhcr *vhcr; struct mlx4_cmd_mailbox *inbox = NULL; struct mlx4_cmd_mailbox *outbox = NULL; u64 in_param; u64 out_param; int ret = 0; int i; int err = 0; /* Create sw representation of Virtual HCR */ vhcr = kzalloc(sizeof(struct mlx4_vhcr), GFP_KERNEL); if (!vhcr) return -ENOMEM; /* DMA in the vHCR */ if (!in_vhcr) { ret = mlx4_ACCESS_MEM(dev, priv->mfunc.vhcr_dma, slave, priv->mfunc.master.slave_state[slave].vhcr_dma, ALIGN(sizeof(struct mlx4_vhcr_cmd), MLX4_ACCESS_MEM_ALIGN), 1); if (ret) { mlx4_err(dev, "%s:Failed reading vhcr" "ret: 0x%x\n", __func__, ret); kfree(vhcr); return ret; } } /* Fill SW VHCR fields */ vhcr->in_param = be64_to_cpu(vhcr_cmd->in_param); vhcr->out_param = be64_to_cpu(vhcr_cmd->out_param); vhcr->in_modifier = be32_to_cpu(vhcr_cmd->in_modifier); vhcr->token = be16_to_cpu(vhcr_cmd->token); vhcr->op = be16_to_cpu(vhcr_cmd->opcode) & 0xfff; vhcr->op_modifier = (u8) (be16_to_cpu(vhcr_cmd->opcode) >> 12); vhcr->e_bit = vhcr_cmd->flags & (1 << 6); /* Lookup command */ for (i = 0; i < ARRAY_SIZE(cmd_info); ++i) { if (vhcr->op == cmd_info[i].opcode) { cmd = &cmd_info[i]; break; } } if (!cmd) { mlx4_err(dev, "Unknown command:0x%x accepted from slave:%d\n", vhcr->op, slave); vhcr_cmd->status = CMD_STAT_BAD_PARAM; goto out_status; } /* Read inbox */ if (cmd->has_inbox) { vhcr->in_param &= INBOX_MASK; inbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(inbox)) { vhcr_cmd->status = CMD_STAT_BAD_SIZE; inbox = NULL; goto out_status; } if (mlx4_ACCESS_MEM(dev, inbox->dma, slave, vhcr->in_param, MLX4_MAILBOX_SIZE, 1)) { mlx4_err(dev, "%s: Failed reading inbox (cmd:0x%x)\n", __func__, cmd->opcode); vhcr_cmd->status = CMD_STAT_INTERNAL_ERR; goto out_status; } } /* Apply permission and bound checks if applicable */ if (cmd->verify && cmd->verify(dev, slave, vhcr, inbox)) { mlx4_warn(dev, "Command:0x%x from slave: %d failed protection " "checks for resource_id:%d\n", vhcr->op, slave, vhcr->in_modifier); vhcr_cmd->status = CMD_STAT_BAD_OP; goto out_status; } /* Allocate outbox */ if (cmd->has_outbox) { outbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(outbox)) { vhcr_cmd->status = CMD_STAT_BAD_SIZE; outbox = NULL; goto out_status; } } /* Execute the command! */ if (cmd->wrapper) { err = cmd->wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (cmd->out_is_imm) vhcr_cmd->out_param = cpu_to_be64(vhcr->out_param); } else { in_param = cmd->has_inbox ? (u64) inbox->dma : vhcr->in_param; out_param = cmd->has_outbox ? (u64) outbox->dma : vhcr->out_param; err = __mlx4_cmd(dev, in_param, &out_param, cmd->out_is_imm, vhcr->in_modifier, vhcr->op_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (cmd->out_is_imm) { vhcr->out_param = out_param; vhcr_cmd->out_param = cpu_to_be64(vhcr->out_param); } } if (err) { mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with" " error:%d, status %d\n", vhcr->op, slave, vhcr->errno, err); vhcr_cmd->status = mlx4_errno_to_status(err); goto out_status; } /* Write outbox if command completed successfully */ if (cmd->has_outbox && !vhcr_cmd->status) { ret = mlx4_ACCESS_MEM(dev, outbox->dma, slave, vhcr->out_param, MLX4_MAILBOX_SIZE, MLX4_CMD_WRAPPED); if (ret) { /* If we failed to write back the outbox after the *command was successfully executed, we must fail this * slave, as it is now in undefined state */ mlx4_err(dev, "%s:Failed writing outbox\n", __func__); goto out; } } out_status: /* DMA back vhcr result */ if (!in_vhcr) { ret = mlx4_ACCESS_MEM(dev, priv->mfunc.vhcr_dma, slave, priv->mfunc.master.slave_state[slave].vhcr_dma, ALIGN(sizeof(struct mlx4_vhcr), MLX4_ACCESS_MEM_ALIGN), MLX4_CMD_WRAPPED); if (ret) mlx4_err(dev, "%s:Failed writing vhcr result\n", __func__); else if (vhcr->e_bit && mlx4_GEN_EQE(dev, slave, &priv->mfunc.master.cmd_eqe)) mlx4_warn(dev, "Failed to generate command completion " "eqe for slave %d\n", slave); } out: kfree(vhcr); mlx4_free_cmd_mailbox(dev, inbox); mlx4_free_cmd_mailbox(dev, outbox); return ret; } static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) { int port, err; struct mlx4_vport_state *vp_admin; struct mlx4_vport_oper_state *vp_oper; for (port = 1; port <= MLX4_MAX_PORTS; port++) { vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; vp_oper->state = *vp_admin; if (MLX4_VGT != vp_admin->default_vlan) { err = mlx4_register_vlan(&priv->dev, port, vp_admin->default_vlan, &(vp_oper->vlan_idx)); if (err) { vp_oper->vlan_idx = NO_INDX; mlx4_warn((&priv->dev), "No vlan resorces slave %d, port %d\n", slave, port); return err; } mlx4_dbg((&(priv->dev)), "alloc vlan %d idx %d slave %d port %d\n", (int)(vp_oper->state.default_vlan), vp_oper->vlan_idx, slave, port); } if (vp_admin->spoofchk) { vp_oper->mac_idx = __mlx4_register_mac(&priv->dev, port, vp_admin->mac); if (0 > vp_oper->mac_idx) { err = vp_oper->mac_idx; vp_oper->mac_idx = NO_INDX; mlx4_warn((&priv->dev), "No mac resorces slave %d, port %d\n", slave, port); return err; } mlx4_dbg((&(priv->dev)), "alloc mac %llx idx %d slave %d port %d\n", - vp_oper->state.mac, vp_oper->mac_idx, slave, port); + (long long)vp_oper->state.mac, vp_oper->mac_idx, slave, port); } } return 0; } static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave) { int port; struct mlx4_vport_oper_state *vp_oper; for (port = 1; port <= MLX4_MAX_PORTS; port++) { vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if (NO_INDX != vp_oper->vlan_idx) { __mlx4_unregister_vlan(&priv->dev, port, vp_oper->state.default_vlan); vp_oper->vlan_idx = NO_INDX; } if (NO_INDX != vp_oper->mac_idx) { __mlx4_unregister_mac(&priv->dev, port, vp_oper->state.mac); vp_oper->mac_idx = NO_INDX; } } return; } static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, u16 param, u8 toggle) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state; u32 reply; u8 is_going_down = 0; int i; unsigned long flags; slave_state[slave].comm_toggle ^= 1; reply = (u32) slave_state[slave].comm_toggle << 31; if (toggle != slave_state[slave].comm_toggle) { mlx4_warn(dev, "Incorrect toggle %d from slave %d. *** MASTER" "STATE COMPROMISIED ***\n", toggle, slave); goto reset_slave; } if (cmd == MLX4_COMM_CMD_RESET) { mlx4_warn(dev, "Received reset from slave:%d\n", slave); slave_state[slave].active = false; mlx4_master_deactivate_admin_state(priv, slave); for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) { slave_state[slave].event_eq[i].eqn = -1; slave_state[slave].event_eq[i].token = 0; } /*check if we are in the middle of FLR process, if so return "retry" status to the slave*/ if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) goto inform_slave_state; mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, slave); /* write the version in the event field */ reply |= mlx4_comm_get_version(); goto reset_slave; } /*command from slave in the middle of FLR*/ if (cmd != MLX4_COMM_CMD_RESET && MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) { mlx4_warn(dev, "slave:%d is Trying to run cmd(0x%x) " "in the middle of FLR\n", slave, cmd); return; } switch (cmd) { case MLX4_COMM_CMD_VHCR0: if (slave_state[slave].last_cmd != MLX4_COMM_CMD_RESET) goto reset_slave; slave_state[slave].vhcr_dma = ((u64) param) << 48; priv->mfunc.master.slave_state[slave].cookie = 0; mutex_init(&priv->mfunc.master.gen_eqe_mutex[slave]); break; case MLX4_COMM_CMD_VHCR1: if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR0) goto reset_slave; slave_state[slave].vhcr_dma |= ((u64) param) << 32; break; case MLX4_COMM_CMD_VHCR2: if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR1) goto reset_slave; slave_state[slave].vhcr_dma |= ((u64) param) << 16; break; case MLX4_COMM_CMD_VHCR_EN: if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR2) goto reset_slave; slave_state[slave].vhcr_dma |= param; if (mlx4_master_activate_admin_state(priv, slave)) goto reset_slave; slave_state[slave].active = true; mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, slave); break; case MLX4_COMM_CMD_VHCR_POST: if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) && (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) goto reset_slave; mutex_lock(&priv->cmd.slave_cmd_mutex); if (mlx4_master_process_vhcr(dev, slave, NULL)) { mlx4_err(dev, "Failed processing vhcr for slave:%d," " resetting slave.\n", slave); mutex_unlock(&priv->cmd.slave_cmd_mutex); goto reset_slave; } mutex_unlock(&priv->cmd.slave_cmd_mutex); break; default: mlx4_warn(dev, "Bad comm cmd:%d from slave:%d\n", cmd, slave); goto reset_slave; } spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); if (!slave_state[slave].is_slave_going_down) slave_state[slave].last_cmd = cmd; else is_going_down = 1; spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); if (is_going_down) { mlx4_warn(dev, "Slave is going down aborting command(%d)" " executing from slave:%d\n", cmd, slave); return; } __raw_writel((__force u32) cpu_to_be32(reply), &priv->mfunc.comm[slave].slave_read); mmiowb(); return; reset_slave: /* cleanup any slave resources */ mlx4_delete_all_resources_for_slave(dev, slave); spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); if (!slave_state[slave].is_slave_going_down) slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET; spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); /*with slave in the middle of flr, no need to clean resources again.*/ inform_slave_state: memset(&slave_state[slave].event_eq, 0, sizeof(struct mlx4_slave_event_eq_info)); __raw_writel((__force u32) cpu_to_be32(reply), &priv->mfunc.comm[slave].slave_read); wmb(); } /* master command processing */ void mlx4_master_comm_channel(struct work_struct *work) { struct mlx4_mfunc_master_ctx *master = container_of(work, struct mlx4_mfunc_master_ctx, comm_work); struct mlx4_mfunc *mfunc = container_of(master, struct mlx4_mfunc, master); struct mlx4_priv *priv = container_of(mfunc, struct mlx4_priv, mfunc); struct mlx4_dev *dev = &priv->dev; __be32 *bit_vec; u32 comm_cmd; u32 vec; int i, j, slave; int toggle; int served = 0; int reported = 0; u32 slt; bit_vec = master->comm_arm_bit_vector; for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++) { vec = be32_to_cpu(bit_vec[i]); for (j = 0; j < 32; j++) { if (!(vec & (1 << j))) continue; ++reported; slave = (i * 32) + j; comm_cmd = swab32(readl( &mfunc->comm[slave].slave_write)); slt = swab32(readl(&mfunc->comm[slave].slave_read)) >> 31; toggle = comm_cmd >> 31; if (toggle != slt) { if (master->slave_state[slave].comm_toggle != slt) { mlx4_info(dev, "slave %d out of sync." " read toggle %d, state toggle %d. " "Resynching.\n", slave, slt, master->slave_state[slave].comm_toggle); master->slave_state[slave].comm_toggle = slt; } mlx4_master_do_cmd(dev, slave, comm_cmd >> 16 & 0xff, comm_cmd & 0xffff, toggle); ++served; } } } if (reported && reported != served) mlx4_warn(dev, "Got command event with bitmask from %d slaves" " but %d were served\n", reported, served); if (mlx4_ARM_COMM_CHANNEL(dev)) mlx4_warn(dev, "Failed to arm comm channel events\n"); } static int sync_toggles(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int wr_toggle; int rd_toggle; unsigned long end; wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)) >> 31; end = jiffies + msecs_to_jiffies(5000); while (time_before(jiffies, end)) { rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)) >> 31; if (rd_toggle == wr_toggle) { priv->cmd.comm_toggle = rd_toggle; return 0; } cond_resched(); } /* * we could reach here if for example the previous VM using this * function misbehaved and left the channel with unsynced state. We * should fix this here and give this VM a chance to use a properly * synced channel */ mlx4_warn(dev, "recovering from previously mis-behaved VM\n"); __raw_writel((__force u32) 0, &priv->mfunc.comm->slave_read); __raw_writel((__force u32) 0, &priv->mfunc.comm->slave_write); priv->cmd.comm_toggle = 0; return 0; } int mlx4_multi_func_init(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_state; int i, j, err, port; if (mlx4_is_master(dev)) priv->mfunc.comm = ioremap(pci_resource_start(dev->pdev, priv->fw.comm_bar) + priv->fw.comm_base, MLX4_COMM_PAGESIZE); else priv->mfunc.comm = ioremap(pci_resource_start(dev->pdev, 2) + MLX4_SLAVE_COMM_BASE, MLX4_COMM_PAGESIZE); if (!priv->mfunc.comm) { mlx4_err(dev, "Couldn't map communication vector.\n"); goto err_vhcr; } if (mlx4_is_master(dev)) { priv->mfunc.master.slave_state = kzalloc(dev->num_slaves * sizeof(struct mlx4_slave_state), GFP_KERNEL); if (!priv->mfunc.master.slave_state) goto err_comm; priv->mfunc.master.vf_admin = kzalloc(dev->num_slaves * sizeof(struct mlx4_vf_admin_state), GFP_KERNEL); if (!priv->mfunc.master.vf_admin) goto err_comm_admin; priv->mfunc.master.vf_oper = kzalloc(dev->num_slaves * sizeof(struct mlx4_vf_oper_state), GFP_KERNEL); if (!priv->mfunc.master.vf_oper) goto err_comm_oper; for (i = 0; i < dev->num_slaves; ++i) { s_state = &priv->mfunc.master.slave_state[i]; s_state->last_cmd = MLX4_COMM_CMD_RESET; for (j = 0; j < MLX4_EVENT_TYPES_NUM; ++j) s_state->event_eq[j].eqn = -1; __raw_writel((__force u32) 0, &priv->mfunc.comm[i].slave_write); __raw_writel((__force u32) 0, &priv->mfunc.comm[i].slave_read); mmiowb(); for (port = 1; port <= MLX4_MAX_PORTS; port++) { s_state->vlan_filter[port] = kzalloc(sizeof(struct mlx4_vlan_fltr), GFP_KERNEL); if (!s_state->vlan_filter[port]) { if (--port) kfree(s_state->vlan_filter[port]); goto err_slaves; } INIT_LIST_HEAD(&s_state->mcast_filters[port]); priv->mfunc.master.vf_admin[i].vport[port].default_vlan = MLX4_VGT; priv->mfunc.master.vf_oper[i].vport[port].state.default_vlan = MLX4_VGT; priv->mfunc.master.vf_oper[i].vport[port].vlan_idx = NO_INDX; priv->mfunc.master.vf_oper[i].vport[port].mac_idx = NO_INDX; } spin_lock_init(&s_state->lock); } memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size); priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD; INIT_WORK(&priv->mfunc.master.comm_work, mlx4_master_comm_channel); INIT_WORK(&priv->mfunc.master.slave_event_work, mlx4_gen_slave_eqe); INIT_WORK(&priv->mfunc.master.slave_flr_event_work, mlx4_master_handle_slave_flr); spin_lock_init(&priv->mfunc.master.slave_state_lock); spin_lock_init(&priv->mfunc.master.slave_eq.event_lock); priv->mfunc.master.comm_wq = create_singlethread_workqueue("mlx4_comm"); if (!priv->mfunc.master.comm_wq) goto err_slaves; if (mlx4_init_resource_tracker(dev)) goto err_thread; err = mlx4_ARM_COMM_CHANNEL(dev); if (err) { mlx4_err(dev, " Failed to arm comm channel eq: %x\n", err); goto err_resource; } } else { err = sync_toggles(dev); if (err) { mlx4_err(dev, "Couldn't sync toggles\n"); goto err_comm; } } return 0; err_resource: mlx4_free_resource_tracker(dev, RES_TR_FREE_ALL); err_thread: flush_workqueue(priv->mfunc.master.comm_wq); destroy_workqueue(priv->mfunc.master.comm_wq); err_slaves: while (--i) { for (port = 1; port <= MLX4_MAX_PORTS; port++) kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]); } kfree(priv->mfunc.master.vf_oper); err_comm_oper: kfree(priv->mfunc.master.vf_admin); err_comm_admin: kfree(priv->mfunc.master.slave_state); err_comm: iounmap(priv->mfunc.comm); err_vhcr: dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, priv->mfunc.vhcr, priv->mfunc.vhcr_dma); priv->mfunc.vhcr = NULL; return -ENOMEM; } int mlx4_cmd_init(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); mutex_init(&priv->cmd.hcr_mutex); mutex_init(&priv->cmd.slave_cmd_mutex); sema_init(&priv->cmd.poll_sem, 1); priv->cmd.use_events = 0; priv->cmd.toggle = 1; priv->cmd.hcr = NULL; priv->mfunc.vhcr = NULL; if (!mlx4_is_slave(dev)) { priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_HCR_BASE, MLX4_HCR_SIZE); if (!priv->cmd.hcr) { mlx4_err(dev, "Couldn't map command register.\n"); return -ENOMEM; } } if (mlx4_is_mfunc(dev)) { priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE, &priv->mfunc.vhcr_dma, GFP_KERNEL); if (!priv->mfunc.vhcr) { mlx4_err(dev, "Couldn't allocate VHCR.\n"); goto err_hcr; } } priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev, MLX4_MAILBOX_SIZE, MLX4_MAILBOX_SIZE, 0); if (!priv->cmd.pool) goto err_vhcr; return 0; err_vhcr: if (mlx4_is_mfunc(dev)) dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, priv->mfunc.vhcr, priv->mfunc.vhcr_dma); priv->mfunc.vhcr = NULL; err_hcr: if (!mlx4_is_slave(dev)) iounmap(priv->cmd.hcr); return -ENOMEM; } void mlx4_multi_func_cleanup(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int i, port; if (mlx4_is_master(dev)) { flush_workqueue(priv->mfunc.master.comm_wq); destroy_workqueue(priv->mfunc.master.comm_wq); for (i = 0; i < dev->num_slaves; i++) { for (port = 1; port <= MLX4_MAX_PORTS; port++) kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]); } kfree(priv->mfunc.master.slave_state); kfree(priv->mfunc.master.vf_admin); kfree(priv->mfunc.master.vf_oper); } iounmap(priv->mfunc.comm); } void mlx4_cmd_cleanup(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); pci_pool_destroy(priv->cmd.pool); if (!mlx4_is_slave(dev)) iounmap(priv->cmd.hcr); if (mlx4_is_mfunc(dev)) dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, priv->mfunc.vhcr, priv->mfunc.vhcr_dma); priv->mfunc.vhcr = NULL; } /* * Switch to using events to issue FW commands (can only be called * after event queue for command events has been initialized). */ int mlx4_cmd_use_events(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int i; int err = 0; priv->cmd.context = kmalloc(priv->cmd.max_cmds * sizeof (struct mlx4_cmd_context), GFP_KERNEL); if (!priv->cmd.context) return -ENOMEM; for (i = 0; i < priv->cmd.max_cmds; ++i) { priv->cmd.context[i].token = i; priv->cmd.context[i].next = i + 1; } priv->cmd.context[priv->cmd.max_cmds - 1].next = -1; priv->cmd.free_head = 0; sema_init(&priv->cmd.event_sem, priv->cmd.max_cmds); spin_lock_init(&priv->cmd.context_lock); for (priv->cmd.token_mask = 1; priv->cmd.token_mask < priv->cmd.max_cmds; priv->cmd.token_mask <<= 1) ; /* nothing */ --priv->cmd.token_mask; down(&priv->cmd.poll_sem); priv->cmd.use_events = 1; return err; } /* * Switch back to polling (used when shutting down the device) */ void mlx4_cmd_use_polling(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int i; priv->cmd.use_events = 0; for (i = 0; i < priv->cmd.max_cmds; ++i) down(&priv->cmd.event_sem); kfree(priv->cmd.context); up(&priv->cmd.poll_sem); } struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev) { struct mlx4_cmd_mailbox *mailbox; mailbox = kmalloc(sizeof *mailbox, GFP_KERNEL); if (!mailbox) return ERR_PTR(-ENOMEM); mailbox->buf = pci_pool_alloc(mlx4_priv(dev)->cmd.pool, GFP_KERNEL, &mailbox->dma); if (!mailbox->buf) { kfree(mailbox); return ERR_PTR(-ENOMEM); } return mailbox; } EXPORT_SYMBOL_GPL(mlx4_alloc_cmd_mailbox); void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox) { if (!mailbox) return; pci_pool_free(mlx4_priv(dev)->cmd.pool, mailbox->buf, mailbox->dma); kfree(mailbox); } EXPORT_SYMBOL_GPL(mlx4_free_cmd_mailbox); u32 mlx4_comm_get_version(void) { return ((u32) CMD_CHAN_IF_REV << 8) | (u32) CMD_CHAN_VER; } int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u8 *mac) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vport_state *s_info; if (!mlx4_is_master(dev)) return -EPROTONOSUPPORT; if ((vf <= 0) || (vf > dev->num_vfs)) { mlx4_err(dev, "Bad vf number:%d (max vf activated: %d)\n", vf, dev->num_vfs); return -EINVAL; } s_info = &priv->mfunc.master.vf_admin[vf].vport[port]; s_info->mac = mlx4_mac_to_u64(mac); mlx4_info(dev, "default mac on vf %d port %d to %llX will take afect only after vf restart\n", - vf, port, s_info->mac); + vf, port, (long long)s_info->mac); return 0; } EXPORT_SYMBOL_GPL(mlx4_set_vf_mac); int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vport_state *s_info; if ((!mlx4_is_master(dev)) || !(dev->caps.flags & MLX4_DEV_CAP_FLAG_ESWITCH_SUPPORT)) return -EPROTONOSUPPORT; if ((vf <= 0) || (vf > dev->num_vfs) || (vlan > 4095) || (qos > 7)) return -EINVAL; s_info = &priv->mfunc.master.vf_admin[vf].vport[port]; if ((0 == vlan) && (0 == qos)) s_info->default_vlan = MLX4_VGT; else s_info->default_vlan = vlan; s_info->default_qos = qos; return 0; } EXPORT_SYMBOL_GPL(mlx4_set_vf_vlan); int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vport_state *s_info; if ((!mlx4_is_master(dev)) || !(dev->caps.flags & MLX4_DEV_CAP_FLAG_ESWITCH_SUPPORT)) return -EPROTONOSUPPORT; if ((vf <= 0) || (vf > dev->num_vfs)) return -EINVAL; s_info = &priv->mfunc.master.vf_admin[vf].vport[port]; s_info->spoofchk = setting; return 0; } EXPORT_SYMBOL_GPL(mlx4_set_vf_spoofchk); Index: stable/10/sys/ofed/drivers/net/mlx4/cq.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/cq.c (revision 271126) +++ stable/10/sys/ofed/drivers/net/mlx4/cq.c (revision 271127) @@ -1,422 +1,421 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include #include #include #include #include "mlx4.h" #include "icm.h" #define MLX4_CQ_STATUS_OK ( 0 << 28) #define MLX4_CQ_STATUS_OVERFLOW ( 9 << 28) #define MLX4_CQ_STATUS_WRITE_FAIL (10 << 28) #define MLX4_CQ_FLAG_CC ( 1 << 18) #define MLX4_CQ_FLAG_OI ( 1 << 17) #define MLX4_CQ_STATE_ARMED ( 9 << 8) #define MLX4_CQ_STATE_ARMED_SOL ( 6 << 8) #define MLX4_EQ_STATE_FIRED (10 << 8) void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn) { struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; struct mlx4_cq *cq; spin_lock(&cq_table->lock); cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree, cqn & (dev->caps.num_cqs - 1)); if (cq) atomic_inc(&cq->refcount); spin_unlock(&cq_table->lock); if (!cq) { mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn); return; } ++cq->arm_sn; cq->comp(cq); if (atomic_dec_and_test(&cq->refcount)) complete(&cq->free); } void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type) { struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; struct mlx4_cq *cq; spin_lock(&cq_table->lock); cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1)); if (cq) atomic_inc(&cq->refcount); spin_unlock(&cq_table->lock); if (!cq) { mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn); return; } cq->event(cq, event_type); if (atomic_dec_and_test(&cq->refcount)) complete(&cq->free); } static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, int cq_num) { return mlx4_cmd(dev, mailbox->dma, cq_num, 0, MLX4_CMD_SW2HW_CQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } static int mlx4_MODIFY_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, int cq_num, u32 opmod) { return mlx4_cmd(dev, mailbox->dma, cq_num, opmod, MLX4_CMD_MODIFY_CQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } static int mlx4_HW2SW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, int cq_num) { return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, cq_num, mailbox ? 0 : 1, MLX4_CMD_HW2SW_CQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq, u16 count, u16 period) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_cq_context *cq_context; int err; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); cq_context = mailbox->buf; memset(cq_context, 0, sizeof *cq_context); cq_context->cq_max_count = cpu_to_be16(count); cq_context->cq_period = cpu_to_be16(period); err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, 1); mlx4_free_cmd_mailbox(dev, mailbox); return err; } EXPORT_SYMBOL_GPL(mlx4_cq_modify); int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq, int entries, struct mlx4_mtt *mtt) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_cq_context *cq_context; u64 mtt_addr; int err; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); cq_context = mailbox->buf; memset(cq_context, 0, sizeof *cq_context); cq_context->logsize_usrpage = cpu_to_be32(ilog2(entries) << 24); cq_context->log_page_size = mtt->page_shift - 12; mtt_addr = mlx4_mtt_addr(dev, mtt); cq_context->mtt_base_addr_h = mtt_addr >> 32; cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff); err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, 0); mlx4_free_cmd_mailbox(dev, mailbox); return err; } EXPORT_SYMBOL_GPL(mlx4_cq_resize); int mlx4_cq_ignore_overrun(struct mlx4_dev *dev, struct mlx4_cq *cq) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_cq_context *cq_context; int err; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); cq_context = mailbox->buf; memset(cq_context, 0, sizeof *cq_context); cq_context->flags |= cpu_to_be32(MLX4_CQ_FLAG_OI); err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, 3); mlx4_free_cmd_mailbox(dev, mailbox); return err; } EXPORT_SYMBOL_GPL(mlx4_cq_ignore_overrun); int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cq_table *cq_table = &priv->cq_table; int err; *cqn = mlx4_bitmap_alloc(&cq_table->bitmap); if (*cqn == -1) return -ENOMEM; err = mlx4_table_get(dev, &cq_table->table, *cqn); if (err) goto err_out; err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn); if (err) goto err_put; return 0; err_put: mlx4_table_put(dev, &cq_table->table, *cqn); err_out: mlx4_bitmap_free(&cq_table->bitmap, *cqn); return err; } static int mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn) { u64 out_param; int err; if (mlx4_is_mfunc(dev)) { err = mlx4_cmd_imm(dev, 0, &out_param, RES_CQ, RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) return err; else { *cqn = get_param_l(&out_param); return 0; } } return __mlx4_cq_alloc_icm(dev, cqn); } void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cq_table *cq_table = &priv->cq_table; mlx4_table_put(dev, &cq_table->cmpt_table, cqn); mlx4_table_put(dev, &cq_table->table, cqn); mlx4_bitmap_free(&cq_table->bitmap, cqn); } static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn) { u64 in_param = 0; int err; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, cqn); err = mlx4_cmd(dev, in_param, RES_CQ, RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) mlx4_warn(dev, "Failed freeing cq:%d\n", cqn); } else __mlx4_cq_free_icm(dev, cqn); } static int mlx4_find_least_loaded_vector(struct mlx4_priv *priv) { int i; int index = 0; int min = priv->eq_table.eq[0].load; for (i = 1; i < priv->dev.caps.num_comp_vectors; i++) { if (priv->eq_table.eq[i].load < min) { index = i; min = priv->eq_table.eq[i].load; } } return index; } int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, unsigned vector, int collapsed, int timestamp_en) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cq_table *cq_table = &priv->cq_table; struct mlx4_cmd_mailbox *mailbox; struct mlx4_cq_context *cq_context; u64 mtt_addr; int err; cq->vector = (vector == MLX4_LEAST_ATTACHED_VECTOR) ? mlx4_find_least_loaded_vector(priv) : vector; if (cq->vector > dev->caps.num_comp_vectors + dev->caps.comp_pool) { return -EINVAL; } err = mlx4_cq_alloc_icm(dev, &cq->cqn); if (err) { return err; } spin_lock_irq(&cq_table->lock); err = radix_tree_insert(&cq_table->tree, cq->cqn, cq); spin_unlock_irq(&cq_table->lock); if (err){ goto err_icm; } mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { err = PTR_ERR(mailbox); goto err_radix; } cq_context = mailbox->buf; memset(cq_context, 0, sizeof *cq_context); cq_context->flags = cpu_to_be32(!!collapsed << 18); if (timestamp_en) cq_context->flags |= cpu_to_be32(1 << 19); cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index); cq_context->comp_eqn = priv->eq_table.eq[cq->vector].eqn; cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; mtt_addr = mlx4_mtt_addr(dev, mtt); cq_context->mtt_base_addr_h = mtt_addr >> 32; cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff); cq_context->db_rec_addr = cpu_to_be64(db_rec); err = mlx4_SW2HW_CQ(dev, mailbox, cq->cqn); mlx4_free_cmd_mailbox(dev, mailbox); if (err) goto err_radix; priv->eq_table.eq[cq->vector].load++; cq->cons_index = 0; cq->arm_sn = 1; cq->uar = uar; atomic_set(&cq->refcount, 1); init_completion(&cq->free); cq->eqn = priv->eq_table.eq[cq->vector].eqn; cq->irq = priv->eq_table.eq[cq->vector].irq; return 0; err_radix: spin_lock_irq(&cq_table->lock); radix_tree_delete(&cq_table->tree, cq->cqn); spin_unlock_irq(&cq_table->lock); err_icm: mlx4_cq_free_icm(dev, cq->cqn); return err; } EXPORT_SYMBOL_GPL(mlx4_cq_alloc); void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cq_table *cq_table = &priv->cq_table; int err; err = mlx4_HW2SW_CQ(dev, NULL, cq->cqn); if (err) mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); priv->eq_table.eq[cq->vector].load--; synchronize_irq(priv->eq_table.eq[cq->vector].irq); spin_lock_irq(&cq_table->lock); radix_tree_delete(&cq_table->tree, cq->cqn); spin_unlock_irq(&cq_table->lock); if (atomic_dec_and_test(&cq->refcount)) complete(&cq->free); wait_for_completion(&cq->free); mlx4_cq_free_icm(dev, cq->cqn); } EXPORT_SYMBOL_GPL(mlx4_cq_free); int mlx4_init_cq_table(struct mlx4_dev *dev) { struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; int err; spin_lock_init(&cq_table->lock); INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); if (mlx4_is_slave(dev)) return 0; err = mlx4_bitmap_init(&cq_table->bitmap, dev->caps.num_cqs, dev->caps.num_cqs - 1, dev->caps.reserved_cqs, 0); if (err) return err; return 0; } void mlx4_cleanup_cq_table(struct mlx4_dev *dev) { if (mlx4_is_slave(dev)) return; /* Nothing to do to clean up radix_tree */ mlx4_bitmap_cleanup(&mlx4_priv(dev)->cq_table.bitmap); } Index: stable/10/sys/ofed/drivers/net/mlx4/en_netdev.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/en_netdev.c (revision 271126) +++ stable/10/sys/ofed/drivers/net/mlx4/en_netdev.c (revision 271127) @@ -1,1666 +1,1666 @@ /* * Copyright (c) 2007 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include "mlx4_en.h" #include #include #include #include #include #include #include #include static void mlx4_en_init_locked(struct mlx4_en_priv *priv); static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv); static void mlx4_en_vlan_rx_add_vid(void *arg, struct net_device *dev, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); int idx; u8 field; if (arg != priv) return; if ((vid == 0) || (vid > 4095)) /* Invalid */ return; en_dbg(HW, priv, "adding VLAN:%d\n", vid); idx = vid >> 5; field = 1 << (vid & 0x1f); spin_lock(&priv->vlan_lock); priv->vlgrp_modified = true; if (priv->vlan_unregister[idx] & field) priv->vlan_unregister[idx] &= ~field; else priv->vlan_register[idx] |= field; priv->vlans[idx] |= field; spin_unlock(&priv->vlan_lock); } static void mlx4_en_vlan_rx_kill_vid(void *arg, struct net_device *dev, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); int idx; u8 field; if (arg != priv) return; if ((vid == 0) || (vid > 4095)) /* Invalid */ return; en_dbg(HW, priv, "Killing VID:%d\n", vid); idx = vid >> 5; field = 1 << (vid & 0x1f); spin_lock(&priv->vlan_lock); priv->vlgrp_modified = true; if (priv->vlan_register[idx] & field) priv->vlan_register[idx] &= ~field; else priv->vlan_unregister[idx] |= field; priv->vlans[idx] &= ~field; spin_unlock(&priv->vlan_lock); } u64 mlx4_en_mac_to_u64(u8 *addr) { u64 mac = 0; int i; for (i = 0; i < ETHER_ADDR_LEN; i++) { mac <<= 8; mac |= addr[i]; } return mac; } static int mlx4_en_cache_mclist(struct net_device *dev, u64 **mcaddrp) { struct ifmultiaddr *ifma; u64 *mcaddr; int cnt; int i; *mcaddrp = NULL; restart: cnt = 0; if_maddr_rlock(dev); TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen != ETHER_ADDR_LEN) continue; cnt++; } if_maddr_runlock(dev); if (cnt == 0) return (0); mcaddr = kmalloc(sizeof(u64) * cnt, GFP_KERNEL); if (mcaddr == NULL) return (0); i = 0; if_maddr_rlock(dev); TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen != ETHER_ADDR_LEN) continue; /* Make sure the list didn't grow. */ if (i == cnt) { if_maddr_runlock(dev); kfree(mcaddr); goto restart; } mcaddr[i++] = mlx4_en_mac_to_u64( LLADDR((struct sockaddr_dl *)ifma->ifma_addr)); } if_maddr_runlock(dev); *mcaddrp = mcaddr; return (i); } static void mlx4_en_stop_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); queue_work(priv->mdev->workqueue, &priv->stop_port_task); } static void mlx4_en_start_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); queue_work(priv->mdev->workqueue, &priv->start_port_task); } static void mlx4_en_set_multicast(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); if (!priv->port_up) return; queue_work(priv->mdev->workqueue, &priv->mcast_task); } static void mlx4_en_do_set_multicast(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, mcast_task); struct net_device *dev = priv->dev; struct mlx4_en_dev *mdev = priv->mdev; int err; mutex_lock(&mdev->state_lock); if (!mdev->device_up) { en_dbg(HW, priv, "Card is not up, " "ignoring multicast change.\n"); goto out; } if (!priv->port_up) { en_dbg(HW, priv, "Port is down, " "ignoring multicast change.\n"); goto out; } /* * Promsicuous mode: disable all filters */ if (dev->if_flags & IFF_PROMISC) { if (!(priv->flags & MLX4_EN_FLAG_PROMISC)) { priv->flags |= MLX4_EN_FLAG_PROMISC; /* Enable promiscouos mode */ err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 1); if (err) en_err(priv, "Failed enabling " "promiscous mode\n"); /* Disable port multicast filter (unconditionally) */ err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling " "multicast filter\n"); /* Disable port VLAN filter */ err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, NULL); if (err) en_err(priv, "Failed disabling VLAN filter\n"); } goto out; } /* * Not in promiscous mode */ if (priv->flags & MLX4_EN_FLAG_PROMISC) { priv->flags &= ~MLX4_EN_FLAG_PROMISC; /* Disable promiscouos mode */ err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0); if (err) en_err(priv, "Failed disabling promiscous mode\n"); /* Enable port VLAN filter */ err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, priv->vlans); if (err) en_err(priv, "Failed enabling VLAN filter\n"); } /* Enable/disable the multicast filter according to IFF_ALLMULTI */ if (dev->if_flags & IFF_ALLMULTI) { err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); } else { u64 *mcaddr; int mccount; int i; err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); /* Flush mcast filter and init it with broadcast address */ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, ETH_BCAST, 1, MLX4_MCAST_CONFIG); /* Update multicast list - we cache all addresses so they won't * change while HW is updated holding the command semaphor */ mccount = mlx4_en_cache_mclist(dev, &mcaddr); for (i = 0; i < mccount; i++) mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, mcaddr[i], 0, MLX4_MCAST_CONFIG); err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_ENABLE); if (err) en_err(priv, "Failed enabling multicast filter\n"); kfree(mcaddr); } out: mutex_unlock(&mdev->state_lock); } #ifdef CONFIG_NET_POLL_CONTROLLER static void mlx4_en_netpoll(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_cq *cq; unsigned long flags; int i; for (i = 0; i < priv->rx_ring_num; i++) { cq = &priv->rx_cq[i]; spin_lock_irqsave(&cq->lock, flags); napi_synchronize(&cq->napi); mlx4_en_process_rx_cq(dev, cq, 0); spin_unlock_irqrestore(&cq->lock, flags); } } #endif static void mlx4_en_watchdog_timeout(void *arg) { struct mlx4_en_priv *priv = arg; struct mlx4_en_dev *mdev = priv->mdev; en_dbg(DRV, priv, "Scheduling watchdog\n"); queue_work(mdev->workqueue, &priv->watchdog_task); if (priv->port_up) callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT, mlx4_en_watchdog_timeout, priv); } /* XXX This clears user settings in too many cases. */ static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) { struct mlx4_en_cq *cq; int i; /* If we haven't received a specific coalescing setting * (module param), we set the moderation paramters as follows: * - moder_cnt is set to the number of mtu sized packets to * satisfy our coelsing target. * - moder_time is set to a fixed value. */ priv->rx_frames = MLX4_EN_RX_COAL_TARGET / priv->dev->if_mtu + 1; priv->rx_usecs = MLX4_EN_RX_COAL_TIME; en_dbg(INTR, priv, "Default coalesing params for mtu:%ld - " "rx_frames:%d rx_usecs:%d\n", priv->dev->if_mtu, priv->rx_frames, priv->rx_usecs); /* Setup cq moderation params */ for (i = 0; i < priv->rx_ring_num; i++) { cq = &priv->rx_cq[i]; cq->moder_cnt = priv->rx_frames; cq->moder_time = priv->rx_usecs; priv->last_moder_time[i] = MLX4_EN_AUTO_CONF; priv->last_moder_packets[i] = 0; priv->last_moder_bytes[i] = 0; } for (i = 0; i < priv->tx_ring_num; i++) { cq = &priv->tx_cq[i]; cq->moder_cnt = MLX4_EN_TX_COAL_PKTS; cq->moder_time = MLX4_EN_TX_COAL_TIME; } /* Reset auto-moderation params */ priv->pkt_rate_low = MLX4_EN_RX_RATE_LOW; priv->rx_usecs_low = MLX4_EN_RX_COAL_TIME_LOW; priv->pkt_rate_high = MLX4_EN_RX_RATE_HIGH; priv->rx_usecs_high = MLX4_EN_RX_COAL_TIME_HIGH; priv->sample_interval = MLX4_EN_SAMPLE_INTERVAL; priv->adaptive_rx_coal = 1; priv->last_moder_jiffies = 0; priv->last_moder_tx_packets = 0; } static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv) { unsigned long period = (unsigned long) (jiffies - priv->last_moder_jiffies); struct mlx4_en_cq *cq; unsigned long packets; unsigned long rate; unsigned long avg_pkt_size; unsigned long rx_packets; unsigned long rx_bytes; unsigned long rx_pkt_diff; int moder_time; int ring, err; if (!priv->adaptive_rx_coal || period < priv->sample_interval * HZ) return; for (ring = 0; ring < priv->rx_ring_num; ring++) { spin_lock(&priv->stats_lock); rx_packets = priv->rx_ring[ring].packets; rx_bytes = priv->rx_ring[ring].bytes; spin_unlock(&priv->stats_lock); rx_pkt_diff = ((unsigned long) (rx_packets - priv->last_moder_packets[ring])); packets = rx_pkt_diff; rate = packets * HZ / period; avg_pkt_size = packets ? ((unsigned long) (rx_bytes - priv->last_moder_bytes[ring])) / packets : 0; /* Apply auto-moderation only when packet rate * exceeds a rate that it matters */ if (rate > (MLX4_EN_RX_RATE_THRESH / priv->rx_ring_num) && avg_pkt_size > MLX4_EN_AVG_PKT_SMALL) { if (rate < priv->pkt_rate_low || avg_pkt_size < MLX4_EN_AVG_PKT_SMALL) moder_time = priv->rx_usecs_low; else if (rate > priv->pkt_rate_high) moder_time = priv->rx_usecs_high; else moder_time = (rate - priv->pkt_rate_low) * (priv->rx_usecs_high - priv->rx_usecs_low) / (priv->pkt_rate_high - priv->pkt_rate_low) + priv->rx_usecs_low; } else { moder_time = priv->rx_usecs_low; } if (moder_time != priv->last_moder_time[ring]) { priv->last_moder_time[ring] = moder_time; cq = &priv->rx_cq[ring]; cq->moder_time = moder_time; err = mlx4_en_set_cq_moder(priv, cq); if (err) en_err(priv, "Failed modifying moderation " "for cq:%d\n", ring); } priv->last_moder_packets[ring] = rx_packets; priv->last_moder_bytes[ring] = rx_bytes; } priv->last_moder_jiffies = jiffies; } static void mlx4_en_handle_vlans(struct mlx4_en_priv *priv) { u8 vlan_register[VLAN_FLTR_SIZE]; u8 vlan_unregister[VLAN_FLTR_SIZE]; int i, j, idx; u16 vid; /* cache the vlan data for processing * done under lock to avoid changes during work */ spin_lock(&priv->vlan_lock); for (i = 0; i < VLAN_FLTR_SIZE; i++) { vlan_register[i] = priv->vlan_register[i]; priv->vlan_register[i] = 0; vlan_unregister[i] = priv->vlan_unregister[i]; priv->vlan_unregister[i] = 0; } priv->vlgrp_modified = false; spin_unlock(&priv->vlan_lock); /* Configure the vlan filter * The vlgrp is updated with all the vids that need to be allowed */ if (mlx4_SET_VLAN_FLTR(priv->mdev->dev, priv->port, priv->vlans)) en_err(priv, "Failed configuring VLAN filter\n"); /* Configure the VLAN table */ for (i = 0; i < VLAN_FLTR_SIZE; i++) { for (j = 0; j < 32; j++) { vid = (i << 5) + j; if (vlan_register[i] & (1 << j)) if (mlx4_register_vlan(priv->mdev->dev, priv->port, vid, &idx)) en_dbg(HW, priv, "failed registering vlan %d\n", vid); if (vlan_unregister[i] & (1 << j)) { if (!mlx4_find_cached_vlan(priv->mdev->dev, priv->port, vid, &idx)) mlx4_unregister_vlan(priv->mdev->dev, priv->port, idx); else en_dbg(HW, priv, "could not find vid %d in cache\n", vid); } } } } static void mlx4_en_do_get_stats(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, stats_task); struct mlx4_en_dev *mdev = priv->mdev; int err; err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0); if (err) en_dbg(HW, priv, "Could not update stats \n"); mutex_lock(&mdev->state_lock); if (mdev->device_up) { if (priv->port_up) { if (priv->vlgrp_modified) mlx4_en_handle_vlans(priv); mlx4_en_auto_moderation(priv); } queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); } mlx4_en_QUERY_PORT(priv->mdev, priv->port); mutex_unlock(&mdev->state_lock); } static void mlx4_en_linkstate(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, linkstate_task); struct mlx4_en_dev *mdev = priv->mdev; int linkstate = priv->link_state; mutex_lock(&mdev->state_lock); /* If observable port state changed set carrier state and * report to system log */ if (priv->last_link_state != linkstate) { if (linkstate == MLX4_DEV_EVENT_PORT_DOWN) { if_link_state_change(priv->dev, LINK_STATE_DOWN); } else { en_info(priv, "Link Up\n"); if_link_state_change(priv->dev, LINK_STATE_UP); } } priv->last_link_state = linkstate; mutex_unlock(&mdev->state_lock); } static void mlx4_en_lock_and_stop_port(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, stop_port_task); struct net_device *dev = priv->dev; struct mlx4_en_dev *mdev = priv->mdev; mutex_lock(&mdev->state_lock); mlx4_en_do_stop_port(dev); mutex_unlock(&mdev->state_lock); } static void mlx4_en_lock_and_start_port(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, start_port_task); struct net_device *dev = priv->dev; struct mlx4_en_dev *mdev = priv->mdev; mutex_lock(&mdev->state_lock); mlx4_en_do_start_port(dev); mutex_unlock(&mdev->state_lock); } int mlx4_en_do_start_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_cq *cq; struct mlx4_en_tx_ring *tx_ring; u64 config; int rx_index = 0; int tx_index = 0; int err = 0; int i; int j; if (priv->port_up) { en_dbg(DRV, priv, "start port called while port already up\n"); return 0; } /* Calculate Rx buf size */ dev->if_mtu = min(dev->if_mtu, priv->max_mtu); mlx4_en_calc_rx_buf(dev); en_dbg(DRV, priv, "Rx buf size:%d\n", priv->rx_mb_size); /* Configure rx cq's and rings */ err = mlx4_en_activate_rx_rings(priv); if (err) { en_err(priv, "Failed to activate RX rings\n"); return err; } for (i = 0; i < priv->rx_ring_num; i++) { cq = &priv->rx_cq[i]; err = mlx4_en_activate_cq(priv, cq); if (err) { en_err(priv, "Failed activating Rx CQ\n"); goto cq_err; } for (j = 0; j < cq->size; j++) cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK; err = mlx4_en_set_cq_moder(priv, cq); if (err) { en_err(priv, "Failed setting cq moderation parameters"); mlx4_en_deactivate_cq(priv, cq); goto cq_err; } mlx4_en_arm_cq(priv, cq); priv->rx_ring[i].cqn = cq->mcq.cqn; ++rx_index; } err = mlx4_en_config_rss_steer(priv); if (err) { en_err(priv, "Failed configuring rss steering\n"); goto cq_err; } /* Configure tx cq's and rings */ for (i = 0; i < priv->tx_ring_num; i++) { /* Configure cq */ cq = &priv->tx_cq[i]; err = mlx4_en_activate_cq(priv, cq); if (err) { en_err(priv, "Failed allocating Tx CQ\n"); goto tx_err; } err = mlx4_en_set_cq_moder(priv, cq); if (err) { en_err(priv, "Failed setting cq moderation parameters"); mlx4_en_deactivate_cq(priv, cq); goto tx_err; } en_dbg(DRV, priv, "Resetting index of collapsed CQ:%d to -1\n", i); cq->buf->wqe_index = cpu_to_be16(0xffff); /* Configure ring */ tx_ring = &priv->tx_ring[i]; err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn); if (err) { en_err(priv, "Failed allocating Tx ring\n"); mlx4_en_deactivate_cq(priv, cq); goto tx_err; } /* Set initial ownership of all Tx TXBBs to SW (1) */ for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) *((u32 *) (tx_ring->buf + j)) = 0xffffffff; ++tx_index; } /* Configure port */ err = mlx4_SET_PORT_general(mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); if (err) { en_err(priv, "Failed setting port general configurations " "for port %d, with error %d\n", priv->port, err); goto tx_err; } /* Set default qp number */ err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0); if (err) { en_err(priv, "Failed setting default qp numbers\n"); goto tx_err; } /* Set port mac number */ en_dbg(DRV, priv, "Setting mac for port %d\n", priv->port); err = mlx4_register_mac(mdev->dev, priv->port, mlx4_en_mac_to_u64(IF_LLADDR(dev))); if (err < 0) { en_err(priv, "Failed setting port mac err=%d\n", err); goto tx_err; } mdev->mac_removed[priv->port] = 0; /* Init port */ en_dbg(HW, priv, "Initializing port\n"); err = mlx4_INIT_PORT(mdev->dev, priv->port); if (err) { en_err(priv, "Failed Initializing port\n"); goto mac_err; } /* Set the various hardware offload abilities */ dev->if_hwassist = 0; if (dev->if_capenable & IFCAP_TSO4) dev->if_hwassist |= CSUM_TSO; if (dev->if_capenable & IFCAP_TXCSUM) dev->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (dev->if_capenable & IFCAP_RXCSUM) priv->rx_csum = 1; else priv->rx_csum = 0; err = mlx4_wol_read(priv->mdev->dev, &config, priv->port); if (err) { en_err(priv, "Failed to get WoL info, unable to modify\n"); goto wol_err; } if (dev->if_capenable & IFCAP_WOL_MAGIC) { config |= MLX4_EN_WOL_DO_MODIFY | MLX4_EN_WOL_ENABLED | MLX4_EN_WOL_MAGIC; } else { config &= ~(MLX4_EN_WOL_ENABLED | MLX4_EN_WOL_MAGIC); config |= MLX4_EN_WOL_DO_MODIFY; } err = mlx4_wol_write(priv->mdev->dev, config, priv->port); if (err) { en_err(priv, "Failed to set WoL information\n"); goto wol_err; } priv->port_up = true; /* Populate multicast list */ mlx4_en_set_multicast(dev); /* Enable the queues. */ dev->if_drv_flags &= ~IFF_DRV_OACTIVE; dev->if_drv_flags |= IFF_DRV_RUNNING; callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT, mlx4_en_watchdog_timeout, priv); return 0; wol_err: /* close port*/ mlx4_CLOSE_PORT(mdev->dev, priv->port); mac_err: mlx4_unregister_mac(mdev->dev, priv->port, priv->mac); tx_err: while (tx_index--) { mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[tx_index]); mlx4_en_deactivate_cq(priv, &priv->tx_cq[tx_index]); } mlx4_en_release_rss_steer(priv); cq_err: while (rx_index--) mlx4_en_deactivate_cq(priv, &priv->rx_cq[rx_index]); for (i = 0; i < priv->rx_ring_num; i++) mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]); return err; /* need to close devices */ } void mlx4_en_do_stop_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int i; if (!priv->port_up) { en_dbg(DRV, priv, "stop port called while port already down\n"); return; } /* Set port as not active */ priv->port_up = false; /* Unregister Mac address for the port */ mlx4_unregister_mac(mdev->dev, priv->port, priv->mac); mdev->mac_removed[priv->port] = 1; /* Free TX Rings */ for (i = 0; i < priv->tx_ring_num; i++) { mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[i]); mlx4_en_deactivate_cq(priv, &priv->tx_cq[i]); } msleep(10); for (i = 0; i < priv->tx_ring_num; i++) mlx4_en_free_tx_buf(dev, &priv->tx_ring[i]); /* Free RSS qps */ mlx4_en_release_rss_steer(priv); /* Free RX Rings */ for (i = 0; i < priv->rx_ring_num; i++) { mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]); mlx4_en_deactivate_cq(priv, &priv->rx_cq[i]); } /* close port*/ mlx4_CLOSE_PORT(mdev->dev, priv->port); callout_stop(&priv->watchdog_timer); dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } static void mlx4_en_restart(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, watchdog_task); struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; struct mlx4_en_tx_ring *ring; int i; if (priv->blocked == 0 || priv->port_up == 0) return; for (i = 0; i < priv->tx_ring_num; i++) { ring = &priv->tx_ring[i]; if (ring->blocked && ring->watchdog_time + MLX4_EN_WATCHDOG_TIMEOUT < ticks) goto reset; } return; reset: priv->port_stats.tx_timeout++; en_dbg(DRV, priv, "Watchdog task called for port %d\n", priv->port); mutex_lock(&mdev->state_lock); if (priv->port_up) { mlx4_en_do_stop_port(dev); if (mlx4_en_do_start_port(dev)) en_err(priv, "Failed restarting port %d\n", priv->port); } mutex_unlock(&mdev->state_lock); } static void mlx4_en_init(void *arg) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; priv = arg; mdev = priv->mdev; mutex_lock(&mdev->state_lock); mlx4_en_init_locked(priv); mutex_unlock(&mdev->state_lock); } static void mlx4_en_init_locked(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev; struct ifnet *dev; int i; dev = priv->dev; mdev = priv->mdev; if (dev->if_drv_flags & IFF_DRV_RUNNING) mlx4_en_do_stop_port(dev); if (!mdev->device_up) { en_err(priv, "Cannot open - device down/disabled\n"); return; } /* Reset HW statistics and performance counters */ if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) en_dbg(HW, priv, "Failed dumping statistics\n"); memset(&priv->pstats, 0, sizeof(priv->pstats)); for (i = 0; i < priv->tx_ring_num; i++) { priv->tx_ring[i].bytes = 0; priv->tx_ring[i].packets = 0; } for (i = 0; i < priv->rx_ring_num; i++) { priv->rx_ring[i].bytes = 0; priv->rx_ring[i].packets = 0; } mlx4_en_set_default_moderation(priv); if (mlx4_en_do_start_port(dev)) en_err(priv, "Failed starting port:%d\n", priv->port); } void mlx4_en_free_resources(struct mlx4_en_priv *priv) { int i; for (i = 0; i < priv->tx_ring_num; i++) { if (priv->tx_ring[i].tx_info) mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]); if (priv->tx_cq[i].buf) mlx4_en_destroy_cq(priv, &priv->tx_cq[i]); } for (i = 0; i < priv->rx_ring_num; i++) { if (priv->rx_ring[i].rx_info) mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i]); if (priv->rx_cq[i].buf) mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); } /* Free the stats tree when we resize the rings. */ if (priv->sysctl) sysctl_ctx_free(&priv->stat_ctx); } int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) { struct mlx4_en_port_profile *prof = priv->prof; int i; /* Create tx Rings */ for (i = 0; i < priv->tx_ring_num; i++) { if (mlx4_en_create_cq(priv, &priv->tx_cq[i], prof->tx_ring_size, i, TX)) goto err; if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], prof->tx_ring_size, TXBB_SIZE)) goto err; } /* Create rx Rings */ for (i = 0; i < priv->rx_ring_num; i++) { if (mlx4_en_create_cq(priv, &priv->rx_cq[i], prof->rx_ring_size, i, RX)) goto err; if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i], prof->rx_ring_size)) goto err; } /* Re-create stat sysctls in case the number of rings changed. */ mlx4_en_sysctl_stat(priv); /* Populate Tx priority mappings */ mlx4_en_set_prio_map(priv, priv->tx_prio_map, priv->tx_ring_num - MLX4_EN_NUM_HASH_RINGS); return 0; err: en_err(priv, "Failed to allocate NIC resources\n"); return -ENOMEM; } void mlx4_en_destroy_netdev(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port); if (priv->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach); if (priv->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach); /* Unregister device - this will close the port if it was up */ if (priv->registered) ether_ifdetach(dev); if (priv->allocated) mlx4_free_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE); mutex_lock(&mdev->state_lock); mlx4_en_do_stop_port(dev); mutex_unlock(&mdev->state_lock); cancel_delayed_work(&priv->stats_task); /* flush any pending task for this netdev */ flush_workqueue(mdev->workqueue); callout_drain(&priv->watchdog_timer); /* Detach the netdev so tasks would not attempt to access it */ mutex_lock(&mdev->state_lock); mdev->pndev[priv->port] = NULL; mutex_unlock(&mdev->state_lock); mlx4_en_free_resources(priv); if (priv->sysctl) sysctl_ctx_free(&priv->conf_ctx); mtx_destroy(&priv->stats_lock.m); mtx_destroy(&priv->vlan_lock.m); kfree(priv); if_free(dev); } static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err = 0; en_dbg(DRV, priv, "Change MTU called - current:%ld new:%d\n", dev->if_mtu, new_mtu); if ((new_mtu < MLX4_EN_MIN_MTU) || (new_mtu > priv->max_mtu)) { en_err(priv, "Bad MTU size:%d.\n", new_mtu); return -EPERM; } mutex_lock(&mdev->state_lock); dev->if_mtu = new_mtu; if (dev->if_drv_flags & IFF_DRV_RUNNING) { if (!mdev->device_up) { /* NIC is probably restarting - let watchdog task reset * the port */ en_dbg(DRV, priv, "Change MTU called with card down!?\n"); } else { mlx4_en_do_stop_port(dev); mlx4_en_set_default_moderation(priv); err = mlx4_en_do_start_port(dev); if (err) { en_err(priv, "Failed restarting port:%d\n", priv->port); queue_work(mdev->workqueue, &priv->watchdog_task); } } } mutex_unlock(&mdev->state_lock); return 0; } static int mlx4_en_calc_media(struct mlx4_en_priv *priv) { int trans_type; int active; active = IFM_ETHER; if (priv->last_link_state == MLX4_DEV_EVENT_PORT_DOWN) return (active); /* * [ShaharK] mlx4_en_QUERY_PORT sleeps and cannot be called under a * non-sleepable lock. * I moved it to the periodic mlx4_en_do_get_stats. if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) return (active); */ active |= IFM_FDX; trans_type = priv->port_state.transciver; /* XXX I don't know all of the transceiver values. */ switch (priv->port_state.link_speed) { case 1000: active |= IFM_1000_T; break; case 10000: if (trans_type > 0 && trans_type <= 0xC) active |= IFM_10G_SR; else if (trans_type == 0x80 || trans_type == 0) active |= IFM_10G_CX4; break; case 40000: active |= IFM_40G_CR4; break; } if (priv->prof->tx_pause) active |= IFM_ETH_TXPAUSE; if (priv->prof->rx_pause) active |= IFM_ETH_RXPAUSE; return (active); } static void mlx4_en_media_status(struct ifnet *dev, struct ifmediareq *ifmr) { struct mlx4_en_priv *priv; priv = dev->if_softc; ifmr->ifm_status = IFM_AVALID; if (priv->last_link_state != MLX4_DEV_EVENT_PORT_DOWN) ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active = mlx4_en_calc_media(priv); return; } static int mlx4_en_media_change(struct ifnet *dev) { struct mlx4_en_priv *priv; struct ifmedia *ifm; int rxpause; int txpause; int error; priv = dev->if_softc; ifm = &priv->media; rxpause = txpause = 0; error = 0; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: break; case IFM_10G_SR: case IFM_10G_CX4: case IFM_1000_T: if (IFM_SUBTYPE(ifm->ifm_media) == IFM_SUBTYPE(mlx4_en_calc_media(priv)) && (ifm->ifm_media & IFM_FDX)) break; /* Fallthrough */ default: printf("%s: Only auto media type\n", if_name(dev)); return (EINVAL); } /* Allow user to set/clear pause */ if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_RXPAUSE) rxpause = 1; if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_TXPAUSE) txpause = 1; if (priv->prof->tx_pause != txpause || priv->prof->rx_pause != rxpause) { priv->prof->tx_pause = txpause; priv->prof->rx_pause = rxpause; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); } return (error); } static int mlx4_en_ioctl(struct ifnet *dev, u_long command, caddr_t data) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; struct ifreq *ifr; int error; int mask; error = 0; mask = 0; priv = dev->if_softc; mdev = priv->mdev; ifr = (struct ifreq *) data; switch (command) { case SIOCSIFMTU: error = -mlx4_en_change_mtu(dev, ifr->ifr_mtu); break; case SIOCSIFFLAGS: if (dev->if_flags & IFF_UP) { if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) mlx4_en_start_port(dev); else mlx4_en_set_multicast(dev); } else { if (dev->if_drv_flags & IFF_DRV_RUNNING) { mlx4_en_stop_port(dev); if_link_state_change(dev, LINK_STATE_DOWN); /* * Since mlx4_en_stop_port is defered we * have to wait till it's finished. */ for (int count=0; count<10; count++) { if (dev->if_drv_flags & IFF_DRV_RUNNING) { DELAY(20000); } else { break; } } } } break; case SIOCADDMULTI: case SIOCDELMULTI: mlx4_en_set_multicast(dev); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(dev, ifr, &priv->media, command); break; case SIOCSIFCAP: mutex_lock(&mdev->state_lock); mask = ifr->ifr_reqcap ^ dev->if_capenable; if (mask & IFCAP_HWCSUM) dev->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) dev->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_LRO) dev->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) dev->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) dev->if_capenable ^= IFCAP_VLAN_HWFILTER; if (mask & IFCAP_WOL_MAGIC) dev->if_capenable ^= IFCAP_WOL_MAGIC; if (dev->if_drv_flags & IFF_DRV_RUNNING) mlx4_en_init_locked(priv); mutex_unlock(&mdev->state_lock); VLAN_CAPABILITIES(dev); break; default: error = ether_ioctl(dev, command, data); break; } return (error); } static int mlx4_en_set_ring_size(struct net_device *dev, int rx_size, int tx_size) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int port_up = 0; int err = 0; rx_size = roundup_pow_of_two(rx_size); rx_size = max_t(u32, rx_size, MLX4_EN_MIN_RX_SIZE); rx_size = min_t(u32, rx_size, MLX4_EN_MAX_RX_SIZE); tx_size = roundup_pow_of_two(tx_size); tx_size = max_t(u32, tx_size, MLX4_EN_MIN_TX_SIZE); tx_size = min_t(u32, tx_size, MLX4_EN_MAX_TX_SIZE); if (rx_size == (priv->port_up ? priv->rx_ring[0].actual_size : priv->rx_ring[0].size) && tx_size == priv->tx_ring[0].size) return 0; mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; mlx4_en_do_stop_port(dev); } mlx4_en_free_resources(priv); priv->prof->tx_ring_size = tx_size; priv->prof->rx_ring_size = rx_size; err = mlx4_en_alloc_resources(priv); if (err) { en_err(priv, "Failed reallocating port resources\n"); goto out; } if (port_up) { err = mlx4_en_do_start_port(dev); if (err) en_err(priv, "Failed starting port\n"); } out: mutex_unlock(&mdev->state_lock); return err; } static int mlx4_en_set_rx_ring_size(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int size; int error; priv = arg1; size = priv->prof->rx_ring_size; error = sysctl_handle_int(oidp, &size, 0, req); if (error || !req->newptr) return (error); error = -mlx4_en_set_ring_size(priv->dev, size, priv->prof->tx_ring_size); return (error); } static int mlx4_en_set_tx_ring_size(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int size; int error; priv = arg1; size = priv->prof->tx_ring_size; error = sysctl_handle_int(oidp, &size, 0, req); if (error || !req->newptr) return (error); error = -mlx4_en_set_ring_size(priv->dev, priv->prof->rx_ring_size, size); return (error); } static int mlx4_en_set_tx_ppp(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int ppp; int error; priv = arg1; ppp = priv->prof->tx_ppp; error = sysctl_handle_int(oidp, &ppp, 0, req); if (error || !req->newptr) return (error); if (ppp > 0xff || ppp < 0) return (-EINVAL); priv->prof->tx_ppp = ppp; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); return (error); } static int mlx4_en_set_rx_ppp(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; int tx_ring_num; int ppp; int error; int port_up; port_up = 0; priv = arg1; mdev = priv->mdev; ppp = priv->prof->rx_ppp; error = sysctl_handle_int(oidp, &ppp, 0, req); if (error || !req->newptr) return (error); if (ppp > 0xff || ppp < 0) return (-EINVAL); /* See if we have to change the number of tx queues. */ if (!ppp != !priv->prof->rx_ppp) { tx_ring_num = MLX4_EN_NUM_HASH_RINGS + 1 + (!!ppp) * MLX4_EN_NUM_PPP_RINGS; mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; mlx4_en_do_stop_port(priv->dev); } mlx4_en_free_resources(priv); priv->tx_ring_num = tx_ring_num; priv->prof->rx_ppp = ppp; error = -mlx4_en_alloc_resources(priv); if (error) en_err(priv, "Failed reallocating port resources\n"); if (error == 0 && port_up) { error = -mlx4_en_do_start_port(priv->dev); if (error) en_err(priv, "Failed starting port\n"); } mutex_unlock(&mdev->state_lock); return (error); } priv->prof->rx_ppp = ppp; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); return (error); } static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv) { struct net_device *dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *node; struct sysctl_oid_list *node_list; struct sysctl_oid *coal; struct sysctl_oid_list *coal_list; dev = priv->dev; ctx = &priv->conf_ctx; sysctl_ctx_init(ctx); priv->sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO, dev->if_xname, CTLFLAG_RD, 0, "mlx4 10gig ethernet"); node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->sysctl), OID_AUTO, "conf", CTLFLAG_RD, NULL, "Configuration"); node_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "msg_enable", CTLFLAG_RW, &priv->msg_enable, 0, "Driver message enable bitfield"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_rings", CTLTYPE_INT | CTLFLAG_RD, &priv->rx_ring_num, 0, "Number of receive rings"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_rings", CTLTYPE_INT | CTLFLAG_RD, &priv->tx_ring_num, 0, "Number of transmit rings"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_rx_ring_size, "I", "Receive ring size"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_tx_ring_size, "I", "Transmit ring size"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "ip_reasm", CTLFLAG_RW, &priv->ip_reasm, 0, "Allow reassembly of IP fragments."); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_ppp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_tx_ppp, "I", "TX Per-priority pause"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_ppp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_rx_ppp, "I", "RX Per-priority pause"); /* Add coalescer configuration. */ coal = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, "coalesce", CTLFLAG_RD, NULL, "Interrupt coalesce configuration"); coal_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_low", CTLFLAG_RW, &priv->pkt_rate_low, 0, "Packets per-second for minimum delay"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_low", CTLFLAG_RW, &priv->rx_usecs_low, 0, "Minimum RX delay in micro-seconds"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_high", CTLFLAG_RW, &priv->pkt_rate_high, 0, "Packets per-second for maximum delay"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_high", CTLFLAG_RW, &priv->rx_usecs_high, 0, "Maximum RX delay in micro-seconds"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "sample_interval", CTLFLAG_RW, &priv->sample_interval, 0, "adaptive frequency in units of HZ ticks"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "adaptive_rx_coal", CTLFLAG_RW, &priv->adaptive_rx_coal, 0, "Enable adaptive rx coalescing"); } static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv) { struct net_device *dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *node; struct sysctl_oid_list *node_list; struct sysctl_oid *ring_node; struct sysctl_oid_list *ring_list; struct mlx4_en_tx_ring *tx_ring; struct mlx4_en_rx_ring *rx_ring; char namebuf[128]; int i; dev = priv->dev; ctx = &priv->stat_ctx; sysctl_ctx_init(ctx); node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->sysctl), OID_AUTO, "stat", CTLFLAG_RD, NULL, "Statistics"); node_list = SYSCTL_CHILDREN(node); #ifdef MLX4_EN_PERF_STAT SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_poll", CTLFLAG_RD, &priv->pstats.tx_poll, "TX Poll calls"); SYSCTL_ADD_QUAD(ctx, node_list, OID_AUTO, "tx_pktsz_avg", CTLFLAG_RD, &priv->pstats.tx_pktsz_avg, "TX average packet size"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "inflight_avg", CTLFLAG_RD, &priv->pstats.inflight_avg, "TX average packets in-flight"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_coal_avg", CTLFLAG_RD, &priv->pstats.tx_coal_avg, "TX average coalesced completions"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_coal_avg", CTLFLAG_RD, &priv->pstats.rx_coal_avg, "RX average coalesced completions"); #endif SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tso_packets", CTLFLAG_RD, &priv->port_stats.tso_packets, "TSO packets sent"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "queue_stopped", CTLFLAG_RD, &priv->port_stats.queue_stopped, "Queue full"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "wake_queue", CTLFLAG_RD, &priv->port_stats.wake_queue, "Queue resumed after full"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_timeout", CTLFLAG_RD, &priv->port_stats.tx_timeout, "Transmit timeouts"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_alloc_failed", CTLFLAG_RD, &priv->port_stats.rx_alloc_failed, "RX failed to allocate mbuf"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_chksum_good", CTLFLAG_RD, &priv->port_stats.rx_chksum_good, "RX checksum offload success"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_chksum_none", CTLFLAG_RD, &priv->port_stats.rx_chksum_none, "RX without checksum offload"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_chksum_offload", CTLFLAG_RD, &priv->port_stats.tx_chksum_offload, "TX checksum offloads"); /* Could strdup the names and add in a loop. This is simpler. */ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "broadcast", CTLFLAG_RD, &priv->pkstats.broadcast, "Broadcast packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio0", CTLFLAG_RD, &priv->pkstats.tx_prio[0], "TX Priority 0 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio1", CTLFLAG_RD, &priv->pkstats.tx_prio[1], "TX Priority 1 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio2", CTLFLAG_RD, &priv->pkstats.tx_prio[2], "TX Priority 2 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio3", CTLFLAG_RD, &priv->pkstats.tx_prio[3], "TX Priority 3 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio4", CTLFLAG_RD, &priv->pkstats.tx_prio[4], "TX Priority 4 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio5", CTLFLAG_RD, &priv->pkstats.tx_prio[5], "TX Priority 5 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio6", CTLFLAG_RD, &priv->pkstats.tx_prio[6], "TX Priority 6 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio7", CTLFLAG_RD, &priv->pkstats.tx_prio[7], "TX Priority 7 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio0", CTLFLAG_RD, &priv->pkstats.rx_prio[0], "RX Priority 0 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio1", CTLFLAG_RD, &priv->pkstats.rx_prio[1], "RX Priority 1 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio2", CTLFLAG_RD, &priv->pkstats.rx_prio[2], "RX Priority 2 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio3", CTLFLAG_RD, &priv->pkstats.rx_prio[3], "RX Priority 3 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio4", CTLFLAG_RD, &priv->pkstats.rx_prio[4], "RX Priority 4 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio5", CTLFLAG_RD, &priv->pkstats.rx_prio[5], "RX Priority 5 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio6", CTLFLAG_RD, &priv->pkstats.rx_prio[6], "RX Priority 6 packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio7", CTLFLAG_RD, &priv->pkstats.rx_prio[7], "RX Priority 7 packets"); for (i = 0; i < priv->tx_ring_num; i++) { tx_ring = &priv->tx_ring[i]; snprintf(namebuf, sizeof(namebuf), "tx_ring%d", i); ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "TX Ring"); ring_list = SYSCTL_CHILDREN(ring_node); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "packets", CTLFLAG_RD, &tx_ring->packets, "TX packets"); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "bytes", CTLFLAG_RD, &tx_ring->bytes, "TX bytes"); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "error", CTLFLAG_RD, &tx_ring->errors, "TX soft errors"); } for (i = 0; i < priv->rx_ring_num; i++) { rx_ring = &priv->rx_ring[i]; snprintf(namebuf, sizeof(namebuf), "rx_ring%d", i); ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "RX Ring"); ring_list = SYSCTL_CHILDREN(ring_node); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "packets", CTLFLAG_RD, &rx_ring->packets, "RX packets"); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "bytes", CTLFLAG_RD, &rx_ring->bytes, "RX bytes"); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "error", CTLFLAG_RD, &rx_ring->errors, "RX soft errors"); SYSCTL_ADD_UINT(ctx, ring_list, OID_AUTO, "lro_queued", CTLFLAG_RD, &rx_ring->lro.lro_queued, 0, "LRO Queued"); SYSCTL_ADD_UINT(ctx, ring_list, OID_AUTO, "lro_flushed", CTLFLAG_RD, &rx_ring->lro.lro_flushed, 0, "LRO Flushed"); } } int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_port_profile *prof) { static volatile int mlx4_en_unit; struct net_device *dev; struct mlx4_en_priv *priv; uint8_t dev_addr[ETHER_ADDR_LEN]; int err; int i; priv = kzalloc(sizeof(*priv), GFP_KERNEL); dev = priv->dev = if_alloc(IFT_ETHER); if (dev == NULL) { mlx4_err(mdev, "Net device allocation failed\n"); kfree(priv); return -ENOMEM; } dev->if_softc = priv; if_initname(dev, "mlxen", atomic_fetchadd_int(&mlx4_en_unit, 1)); dev->if_mtu = ETHERMTU; dev->if_baudrate = 1000000000; dev->if_init = mlx4_en_init; dev->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; dev->if_ioctl = mlx4_en_ioctl; dev->if_transmit = mlx4_en_transmit; dev->if_qflush = mlx4_en_qflush; dev->if_snd.ifq_maxlen = prof->tx_ring_size; /* * Initialize driver private data */ priv->dev = dev; priv->mdev = mdev; priv->prof = prof; priv->port = port; priv->port_up = false; priv->rx_csum = 1; priv->flags = prof->flags; priv->tx_ring_num = prof->tx_ring_num; priv->rx_ring_num = prof->rx_ring_num; priv->mac_index = -1; priv->msg_enable = MLX4_EN_MSG_LEVEL; priv->ip_reasm = priv->mdev->profile.ip_reasm; mtx_init(&priv->stats_lock.m, "mlx4 stats", NULL, MTX_DEF); mtx_init(&priv->vlan_lock.m, "mlx4 vlan", NULL, MTX_DEF); INIT_WORK(&priv->start_port_task, mlx4_en_lock_and_start_port); INIT_WORK(&priv->stop_port_task, mlx4_en_lock_and_stop_port); INIT_WORK(&priv->mcast_task, mlx4_en_do_set_multicast); INIT_WORK(&priv->watchdog_task, mlx4_en_restart); INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); callout_init(&priv->watchdog_timer, 1); /* Query for default mac and max mtu */ priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port]; priv->mac = mdev->dev->caps.def_mac[priv->port]; if (ILLEGAL_MAC(priv->mac)) { en_err(priv, "Port: %d, invalid mac burned: 0x%llx, quiting\n", - priv->port, priv->mac); + priv->port, (long long)priv->mac); err = -EINVAL; goto out; } mlx4_en_sysctl_conf(priv); err = mlx4_en_alloc_resources(priv); if (err) goto out; /* Allocate page for receive rings */ err = mlx4_alloc_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE); if (err) { en_err(priv, "Failed to allocate page for rx qps\n"); goto out; } priv->allocated = 1; /* * Set driver features */ dev->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM; dev->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; dev->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER; dev->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU; if (mdev->LSO_support) dev->if_capabilities |= IFCAP_TSO4 | IFCAP_VLAN_HWTSO; if (mdev->profile.num_lro) dev->if_capabilities |= IFCAP_LRO; dev->if_capenable = dev->if_capabilities; /* * Setup wake-on-lan. */ #if 0 if (priv->mdev->dev->caps.wol) { u64 config; if (mlx4_wol_read(priv->mdev->dev, &config, priv->port) == 0) { if (config & MLX4_EN_WOL_MAGIC) dev->if_capabilities |= IFCAP_WOL_MAGIC; if (config & MLX4_EN_WOL_ENABLED) dev->if_capenable |= IFCAP_WOL_MAGIC; } } #endif /* Register for VLAN events */ priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, mlx4_en_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST); priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, mlx4_en_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST); mdev->pndev[priv->port] = dev; priv->last_link_state = MLX4_DEV_EVENT_PORT_DOWN; if_link_state_change(dev, LINK_STATE_DOWN); /* Set default MAC */ for (i = 0; i < ETHER_ADDR_LEN; i++) dev_addr[ETHER_ADDR_LEN - 1 - i] = (u8) (priv->mac >> (8 * i)); ether_ifattach(dev, dev_addr); ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK, mlx4_en_media_change, mlx4_en_media_status); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_1000_T, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_SR, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_CX4, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO); en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); priv->registered = 1; queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); return 0; out: mlx4_en_destroy_netdev(dev); return err; } Index: stable/10/sys/ofed/drivers/net/mlx4/en_rx.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/en_rx.c (revision 271126) +++ stable/10/sys/ofed/drivers/net/mlx4/en_rx.c (revision 271127) @@ -1,804 +1,804 @@ /* * Copyright (c) 2007 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include "opt_inet.h" #include "mlx4_en.h" #include #include #include #include #include enum { MIN_RX_ARM = 1024, }; static int mlx4_en_alloc_buf(struct mlx4_en_priv *priv, struct mlx4_en_rx_desc *rx_desc, struct mbuf **mb_list, int i) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; struct mbuf *mb; dma_addr_t dma; if (i == 0) mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, frag_info->frag_size); else mb = m_getjcl(M_NOWAIT, MT_DATA, 0, frag_info->frag_size); if (mb == NULL) { priv->port_stats.rx_alloc_failed++; return -ENOMEM; } dma = pci_map_single(mdev->pdev, mb->m_data, frag_info->frag_size, PCI_DMA_FROMDEVICE); rx_desc->data[i].addr = cpu_to_be64(dma); mb_list[i] = mb; return 0; } static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index; int possible_frags; int i; /* Set size and memtype fields */ for (i = 0; i < priv->num_frags; i++) { rx_desc->data[i].byte_count = cpu_to_be32(priv->frag_info[i].frag_size); rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key); } /* If the number of used fragments does not fill up the ring stride, * remaining (unused) fragments must be padded with null address/size * and a special memory key */ possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE; for (i = priv->num_frags; i < possible_frags; i++) { rx_desc->data[i].byte_count = 0; rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); rx_desc->data[i].addr = 0; } } static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride); struct mbuf **mb_list = ring->rx_info + (index << priv->log_rx_info); int i; for (i = 0; i < priv->num_frags; i++) if (mlx4_en_alloc_buf(priv, rx_desc, mb_list, i)) goto err; return 0; err: while (i--) m_free(mb_list[i]); return -ENOMEM; } static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) { *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff); } static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { struct mlx4_en_frag_info *frag_info; struct mlx4_en_dev *mdev = priv->mdev; struct mbuf **mb_list; struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride); dma_addr_t dma; int nr; mb_list = ring->rx_info + (index << priv->log_rx_info); for (nr = 0; nr < priv->num_frags; nr++) { en_dbg(DRV, priv, "Freeing fragment:%d\n", nr); frag_info = &priv->frag_info[nr]; dma = be64_to_cpu(rx_desc->data[nr].addr); - en_dbg(DRV, priv, "Unmaping buffer at dma:0x%llx\n", (u64) dma); + en_dbg(DRV, priv, "Unmaping buffer at dma:0x%llx\n", (long long) dma); pci_unmap_single(mdev->pdev, dma, frag_info->frag_size, PCI_DMA_FROMDEVICE); m_free(mb_list[nr]); } } static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) { struct mlx4_en_rx_ring *ring; int ring_ind; int buf_ind; int new_size; int err; for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) { for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { ring = &priv->rx_ring[ring_ind]; err = mlx4_en_prepare_rx_desc(priv, ring, ring->actual_size); if (err) { if (ring->actual_size == 0) { en_err(priv, "Failed to allocate " "enough rx buffers\n"); return -ENOMEM; } else { new_size = rounddown_pow_of_two(ring->actual_size); en_warn(priv, "Only %d buffers allocated " "reducing ring size to %d\n", ring->actual_size, new_size); goto reduce_rings; } } ring->actual_size++; ring->prod++; } } return 0; reduce_rings: for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { ring = &priv->rx_ring[ring_ind]; while (ring->actual_size > new_size) { ring->actual_size--; ring->prod--; mlx4_en_free_rx_desc(priv, ring, ring->actual_size); } } return 0; } static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { int index; en_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n", ring->cons, ring->prod); /* Unmap and free Rx buffers */ BUG_ON((u32) (ring->prod - ring->cons) > ring->actual_size); while (ring->cons != ring->prod) { index = ring->cons & ring->size_mask; en_dbg(DRV, priv, "Processing descriptor:%d\n", index); mlx4_en_free_rx_desc(priv, ring, index); ++ring->cons; } } int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, u32 size) { struct mlx4_en_dev *mdev = priv->mdev; int err; int tmp; ring->prod = 0; ring->cons = 0; ring->size = size; ring->size_mask = size - 1; ring->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + DS_SIZE * MLX4_EN_MAX_RX_FRAGS); ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride + TXBB_SIZE; tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * sizeof(struct mbuf *)); ring->rx_info = kmalloc(tmp, GFP_KERNEL); if (!ring->rx_info) { en_err(priv, "Failed allocating rx_info ring\n"); return -ENOMEM; } en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d stride:%d (%d)\n", ring->rx_info, tmp, ring->stride, ring->log_stride); err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); if (err) goto err_ring; err = mlx4_en_map_buffer(&ring->wqres.buf); if (err) { en_err(priv, "Failed to map RX buffer\n"); goto err_hwq; } ring->buf = ring->wqres.buf.direct.buf; return 0; mlx4_en_unmap_buffer(&ring->wqres.buf); err_hwq: mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); err_ring: kfree(ring->rx_info); ring->rx_info = NULL; return err; } int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) { struct mlx4_en_rx_ring *ring; int i; int ring_ind; int err; int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + DS_SIZE * priv->num_frags); for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { ring = &priv->rx_ring[ring_ind]; ring->prod = 0; ring->cons = 0; ring->actual_size = 0; ring->cqn = priv->rx_cq[ring_ind].mcq.cqn; ring->stride = stride; if (ring->stride <= TXBB_SIZE) ring->buf += TXBB_SIZE; ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride; memset(ring->buf, 0, ring->buf_size); mlx4_en_update_rx_prod_db(ring); /* Initailize all descriptors */ for (i = 0; i < ring->size; i++) mlx4_en_init_rx_desc(priv, ring, i); #ifdef INET /* Configure lro mngr */ if (priv->dev->if_capenable & IFCAP_LRO) { if (tcp_lro_init(&ring->lro)) priv->dev->if_capenable &= ~IFCAP_LRO; else ring->lro.ifp = priv->dev; } #endif } err = mlx4_en_fill_rx_buffers(priv); if (err) goto err_buffers; for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { ring = &priv->rx_ring[ring_ind]; ring->size_mask = ring->actual_size - 1; mlx4_en_update_rx_prod_db(ring); } return 0; err_buffers: for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) mlx4_en_free_rx_buf(priv, &priv->rx_ring[ring_ind]); return err; } void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { struct mlx4_en_dev *mdev = priv->mdev; mlx4_en_unmap_buffer(&ring->wqres.buf); mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size + TXBB_SIZE); kfree(ring->rx_info); ring->rx_info = NULL; } void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { #ifdef INET tcp_lro_free(&ring->lro); #endif mlx4_en_free_rx_buf(priv, ring); if (ring->stride <= TXBB_SIZE) ring->buf -= TXBB_SIZE; } /* Unmap a completed descriptor and free unused pages */ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_desc *rx_desc, struct mbuf **mb_list, int length) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_frag_info *frag_info; dma_addr_t dma; struct mbuf *mb; int nr; mb = mb_list[0]; mb->m_pkthdr.len = length; /* Collect used fragments while replacing them in the HW descirptors */ for (nr = 0; nr < priv->num_frags; nr++) { frag_info = &priv->frag_info[nr]; if (length <= frag_info->frag_prefix_size) break; if (nr) mb->m_next = mb_list[nr]; mb = mb_list[nr]; mb->m_len = frag_info[nr].frag_size; dma = be64_to_cpu(rx_desc->data[nr].addr); /* Allocate a replacement page */ if (mlx4_en_alloc_buf(priv, rx_desc, mb_list, nr)) goto fail; /* Unmap buffer */ pci_unmap_single(mdev->pdev, dma, frag_info[nr].frag_size, PCI_DMA_FROMDEVICE); } /* Adjust size of last fragment to match actual length */ mb->m_len = length - priv->frag_info[nr - 1].frag_prefix_size; mb->m_next = NULL; return 0; fail: /* Drop all accumulated fragments (which have already been replaced in * the descriptor) of this packet; remaining fragments are reused... */ while (nr > 0) { nr--; m_free(mb_list[nr]); } return -ENOMEM; } static inline int invalid_cqe(struct mlx4_en_priv *priv, struct mlx4_cqe *cqe) { /* Drop packet on bad receive or bad checksum */ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR)) { en_err(priv, "CQE completed in error - vendor " "syndrom:%d syndrom:%d\n", ((struct mlx4_err_cqe *) cqe)->vendor_err_syndrome, ((struct mlx4_err_cqe *) cqe)->syndrome); return 1; } if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) { en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n"); return 1; } return 0; } static void validate_loopback(struct mlx4_en_priv *priv, struct mbuf *mb) { int i; int offset = ETHER_HDR_LEN; for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) { if (*(mb->m_data + offset) != (unsigned char) (i & 0xff)) goto out_loopback; } /* Loopback found */ priv->loopback_ok = 1; out_loopback: m_freem(mb); } static struct mbuf *mlx4_en_rx_mb(struct mlx4_en_priv *priv, struct mlx4_en_rx_desc *rx_desc, struct mbuf **mb_list, unsigned int length) { struct mbuf *mb; mb = mb_list[0]; /* Move relevant fragments to mb */ if (unlikely(mlx4_en_complete_rx_desc(priv, rx_desc, mb_list, length))) return NULL; return mb; } int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cqe *cqe; struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring]; struct mbuf **mb_list; struct mlx4_en_rx_desc *rx_desc; struct mbuf *mb; #ifdef INET struct lro_entry *queued; #endif int index; unsigned int length; int polled = 0; if (!priv->port_up) return 0; /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx * descriptor offset can be deduced from the CQE index instead of * reading 'cqe->index' */ index = cq->mcq.cons_index & ring->size_mask; cqe = &cq->buf[index]; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cq->mcq.cons_index & cq->size)) { mb_list = ring->rx_info + (index << priv->log_rx_info); rx_desc = ring->buf + (index << ring->log_stride); /* * make sure we read the CQE after we read the ownership bit */ rmb(); if (invalid_cqe(priv, cqe)) goto next; /* * Packet is OK - process it. */ length = be32_to_cpu(cqe->byte_cnt); mb = mlx4_en_rx_mb(priv, rx_desc, mb_list, length); if (!mb) { ring->errors++; goto next; } ring->bytes += length; ring->packets++; if (unlikely(priv->validate_loopback)) { validate_loopback(priv, mb); goto next; } mb->m_pkthdr.flowid = cq->ring; mb->m_flags |= M_FLOWID; mb->m_pkthdr.rcvif = dev; if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) { mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->sl_vid); mb->m_flags |= M_VLANTAG; } if (likely(priv->rx_csum) && (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && (cqe->checksum == cpu_to_be16(0xffff))) { priv->port_stats.rx_chksum_good++; mb->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR; mb->m_pkthdr.csum_data = htons(0xffff); /* This packet is eligible for LRO if it is: * - DIX Ethernet (type interpretation) * - TCP/IP (v4) * - without IP options * - not an IP fragment */ #ifdef INET if (mlx4_en_can_lro(cqe->status) && (dev->if_capenable & IFCAP_LRO)) { if (ring->lro.lro_cnt != 0 && tcp_lro_rx(&ring->lro, mb, 0) == 0) goto next; } #endif /* LRO not possible, complete processing here */ INC_PERF_COUNTER(priv->pstats.lro_misses); } else { mb->m_pkthdr.csum_flags = 0; priv->port_stats.rx_chksum_none++; #ifdef INET if (priv->ip_reasm && cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4) && !mlx4_en_rx_frags(priv, ring, mb, cqe)) goto next; #endif } /* Push it up the stack */ dev->if_input(dev, mb); next: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; cqe = &cq->buf[index]; if (++polled == budget) goto out; } /* Flush all pending IP reassembly sessions */ out: #ifdef INET mlx4_en_flush_frags(priv, ring); while ((queued = SLIST_FIRST(&ring->lro.lro_active)) != NULL) { SLIST_REMOVE_HEAD(&ring->lro.lro_active, next); tcp_lro_flush(&ring->lro, queued); } #endif AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); mlx4_cq_set_ci(&cq->mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ ring->cons = cq->mcq.cons_index; ring->prod += polled; /* Polled descriptors were realocated in place */ mlx4_en_update_rx_prod_db(ring); return polled; } /* Rx CQ polling - called by NAPI */ static int mlx4_en_poll_rx_cq(struct mlx4_en_cq *cq, int budget) { struct net_device *dev = cq->dev; int done; done = mlx4_en_process_rx_cq(dev, cq, budget); cq->tot_rx += done; return done; } void mlx4_en_rx_que(void *context, int pending) { struct mlx4_en_cq *cq; cq = context; while (mlx4_en_poll_rx_cq(cq, MLX4_EN_MAX_RX_POLL) == MLX4_EN_MAX_RX_POLL); mlx4_en_arm_cq(cq->dev->if_softc, cq); } void mlx4_en_rx_irq(struct mlx4_cq *mcq) { struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); struct mlx4_en_priv *priv = netdev_priv(cq->dev); int done; done = mlx4_en_poll_rx_cq(cq, MLX4_EN_MAX_RX_POLL); if (done == MLX4_EN_MAX_RX_POLL) taskqueue_enqueue(cq->tq, &cq->cq_task); else mlx4_en_arm_cq(priv, cq); } #if MLX4_EN_MAX_RX_FRAGS == 3 static int frag_sizes[] = { FRAG_SZ0, FRAG_SZ1, FRAG_SZ2, }; #elif MLX4_EN_MAX_RX_FRAGS == 2 static int frag_sizes[] = { FRAG_SZ0, FRAG_SZ1, }; #else #error "Unknown MAX_RX_FRAGS" #endif void mlx4_en_calc_rx_buf(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); int eff_mtu = dev->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETH_LLC_SNAP_SIZE; int buf_size = 0; int i, frag; for (i = 0, frag = 0; buf_size < eff_mtu; frag++, i++) { /* * Allocate small to large but only as much as is needed for * the tail. */ while (i > 0 && eff_mtu - buf_size <= frag_sizes[i - 1]) i--; priv->frag_info[frag].frag_size = frag_sizes[i]; priv->frag_info[frag].frag_prefix_size = buf_size; buf_size += priv->frag_info[frag].frag_size; } priv->num_frags = frag; priv->rx_mb_size = eff_mtu; priv->log_rx_info = ROUNDUP_LOG2(priv->num_frags * sizeof(struct mbuf *)); en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d " "num_frags:%d):\n", eff_mtu, priv->num_frags); for (i = 0; i < priv->num_frags; i++) { en_dbg(DRV, priv, " frag:%d - size:%d prefix:%d\n", i, priv->frag_info[i].frag_size, priv->frag_info[i].frag_prefix_size) } } /* RSS related functions */ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, struct mlx4_en_rx_ring *ring, enum mlx4_qp_state *state, struct mlx4_qp *qp) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_qp_context *context; int err = 0; context = kmalloc(sizeof *context , GFP_KERNEL); if (!context) { en_err(priv, "Failed to allocate qp context\n"); return -ENOMEM; } err = mlx4_qp_alloc(mdev->dev, qpn, qp); if (err) { en_err(priv, "Failed to allocate qp #%x\n", qpn); goto out; } qp->event = mlx4_en_sqp_event; memset(context, 0, sizeof *context); mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0, qpn, ring->cqn, context); context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma); err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, context, qp, state); if (err) { mlx4_qp_remove(mdev->dev, qp); mlx4_qp_free(mdev->dev, qp); } mlx4_en_update_rx_prod_db(ring); out: kfree(context); return err; } /* Allocate rx qp's and configure them according to rss map */ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rss_map *rss_map = &priv->rss_map; struct mlx4_qp_context context; struct mlx4_en_rss_context *rss_context; void *ptr; u8 rss_mask; int i, qpn; int err = 0; int good_qps = 0; if (mdev->profile.udp_rss) rss_mask = 0x3f; else rss_mask = 0x14; en_dbg(DRV, priv, "Configuring rss steering\n"); err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num, roundup_pow_of_two(priv->rx_ring_num), &rss_map->base_qpn, 0); if (err) { en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num); return err; } for (i = 0; i < priv->rx_ring_num; i++) { qpn = rss_map->base_qpn + i; err = mlx4_en_config_rss_qp(priv, qpn, &priv->rx_ring[i], &rss_map->state[i], &rss_map->qps[i]); if (err) goto rss_err; ++good_qps; } /* Configure RSS indirection qp */ err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &priv->base_qpn, 0); if (err) { en_err(priv, "Failed to reserve range for RSS " "indirection qp\n"); goto rss_err; } err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp); if (err) { en_err(priv, "Failed to allocate RSS indirection QP\n"); goto reserve_err; } rss_map->indir_qp.event = mlx4_en_sqp_event; mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn, priv->rx_ring[0].cqn, &context); ptr = ((void *) &context) + 0x3c; rss_context = (struct mlx4_en_rss_context *) ptr; rss_context->base_qpn = cpu_to_be32(ilog2(priv->rx_ring_num) << 24 | (rss_map->base_qpn)); rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn); rss_context->flags = rss_mask; rss_context->base_qpn_udp = rss_context->default_qpn; err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context, &rss_map->indir_qp, &rss_map->indir_state); if (err) goto indir_err; return 0; indir_err: mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); mlx4_qp_free(mdev->dev, &rss_map->indir_qp); reserve_err: mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1); rss_err: for (i = 0; i < good_qps; i++) { mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]); mlx4_qp_remove(mdev->dev, &rss_map->qps[i]); mlx4_qp_free(mdev->dev, &rss_map->qps[i]); } mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num); return err; } void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rss_map *rss_map = &priv->rss_map; int i; mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); mlx4_qp_free(mdev->dev, &rss_map->indir_qp); mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1); for (i = 0; i < priv->rx_ring_num; i++) { mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]); mlx4_qp_remove(mdev->dev, &rss_map->qps[i]); mlx4_qp_free(mdev->dev, &rss_map->qps[i]); } mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num); } Index: stable/10/sys/ofed/drivers/net/mlx4/eq.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/eq.c (revision 271126) +++ stable/10/sys/ofed/drivers/net/mlx4/eq.c (revision 271127) @@ -1,1370 +1,1369 @@ /* * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include #include #include #include #include #include #include "mlx4.h" #include "fw.h" enum { MLX4_IRQNAME_SIZE = 32 }; enum { MLX4_NUM_ASYNC_EQE = 0x100, MLX4_NUM_SPARE_EQE = 0x80, MLX4_EQ_ENTRY_SIZE = 0x20 }; #define MLX4_EQ_STATUS_OK ( 0 << 28) #define MLX4_EQ_STATUS_WRITE_FAIL (10 << 28) #define MLX4_EQ_OWNER_SW ( 0 << 24) #define MLX4_EQ_OWNER_HW ( 1 << 24) #define MLX4_EQ_FLAG_EC ( 1 << 18) #define MLX4_EQ_FLAG_OI ( 1 << 17) #define MLX4_EQ_STATE_ARMED ( 9 << 8) #define MLX4_EQ_STATE_FIRED (10 << 8) #define MLX4_EQ_STATE_ALWAYS_ARMED (11 << 8) #define MLX4_ASYNC_EVENT_MASK ((1ull << MLX4_EVENT_TYPE_PATH_MIG) | \ (1ull << MLX4_EVENT_TYPE_COMM_EST) | \ (1ull << MLX4_EVENT_TYPE_SQ_DRAINED) | \ (1ull << MLX4_EVENT_TYPE_CQ_ERROR) | \ (1ull << MLX4_EVENT_TYPE_WQ_CATAS_ERROR) | \ (1ull << MLX4_EVENT_TYPE_EEC_CATAS_ERROR) | \ (1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED) | \ (1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ (1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR) | \ (1ull << MLX4_EVENT_TYPE_PORT_CHANGE) | \ (1ull << MLX4_EVENT_TYPE_ECC_DETECT) | \ (1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) | \ (1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE) | \ (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \ (1ull << MLX4_EVENT_TYPE_CMD) | \ (1ull << MLX4_EVENT_TYPE_OP_REQUIRED) | \ (1ull << MLX4_EVENT_TYPE_COMM_CHANNEL) | \ (1ull << MLX4_EVENT_TYPE_FLR_EVENT) | \ (1ull << MLX4_EVENT_TYPE_FATAL_WARNING)) static u64 get_async_ev_mask(struct mlx4_dev *dev) { u64 async_ev_mask = MLX4_ASYNC_EVENT_MASK; if (dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV) async_ev_mask |= (1ull << MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT); return async_ev_mask; } static void eq_set_ci(struct mlx4_eq *eq, int req_not) { __raw_writel((__force u32) cpu_to_be32((eq->cons_index & 0xffffff) | req_not << 31), eq->doorbell); /* We still want ordering, just not swabbing, so add a barrier */ mb(); } static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor) { /* (entry & (eq->nent - 1)) gives us a cyclic array */ unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor); /* CX3 is capable of extending the EQE from 32 to 64 bytes. * When this feature is enabled, the first (in the lower addresses) * 32 bytes in the 64 byte EQE are reserved and the next 32 bytes * contain the legacy EQE information. */ return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE; } static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor) { struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor); return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe; } static struct mlx4_eqe *next_slave_event_eqe(struct mlx4_slave_event_eq *slave_eq) { struct mlx4_eqe *eqe = &slave_eq->event_eqe[slave_eq->cons & (SLAVE_EVENT_EQ_SIZE - 1)]; return (!!(eqe->owner & 0x80) ^ !!(slave_eq->cons & SLAVE_EVENT_EQ_SIZE)) ? eqe : NULL; } void mlx4_gen_slave_eqe(struct work_struct *work) { struct mlx4_mfunc_master_ctx *master = container_of(work, struct mlx4_mfunc_master_ctx, slave_event_work); struct mlx4_mfunc *mfunc = container_of(master, struct mlx4_mfunc, master); struct mlx4_priv *priv = container_of(mfunc, struct mlx4_priv, mfunc); struct mlx4_dev *dev = &priv->dev; struct mlx4_slave_event_eq *slave_eq = &mfunc->master.slave_eq; struct mlx4_eqe *eqe; u8 slave; int i; for (eqe = next_slave_event_eqe(slave_eq); eqe; eqe = next_slave_event_eqe(slave_eq)) { slave = eqe->slave_id; /* All active slaves need to receive the event */ if (slave == ALL_SLAVES) { for (i = 0; i < dev->num_slaves; i++) { if (i != dev->caps.function && master->slave_state[i].active) if (mlx4_GEN_EQE(dev, i, eqe)) mlx4_warn(dev, "Failed to " " generate event " "for slave %d\n", i); } } else { if (mlx4_GEN_EQE(dev, slave, eqe)) mlx4_warn(dev, "Failed to generate event " "for slave %d\n", slave); } ++slave_eq->cons; } } static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_event_eq *slave_eq = &priv->mfunc.master.slave_eq; struct mlx4_eqe *s_eqe; unsigned long flags; spin_lock_irqsave(&slave_eq->event_lock, flags); s_eqe = &slave_eq->event_eqe[slave_eq->prod & (SLAVE_EVENT_EQ_SIZE - 1)]; if ((!!(s_eqe->owner & 0x80)) ^ (!!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE))) { mlx4_warn(dev, "Master failed to generate an EQE for slave: %d. " "No free EQE on slave events queue\n", slave); spin_unlock_irqrestore(&slave_eq->event_lock, flags); return; } memcpy(s_eqe, eqe, dev->caps.eqe_size - 1); s_eqe->slave_id = slave; /* ensure all information is written before setting the ownersip bit */ wmb(); s_eqe->owner = !!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE) ? 0x0 : 0x80; ++slave_eq->prod; queue_work(priv->mfunc.master.comm_wq, &priv->mfunc.master.slave_event_work); spin_unlock_irqrestore(&slave_eq->event_lock, flags); } static void mlx4_slave_event(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_slave = &priv->mfunc.master.slave_state[slave]; if (!s_slave->active) { /*mlx4_warn(dev, "Trying to pass event to inactive slave\n");*/ return; } slave_event(dev, slave, eqe); } int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port) { struct mlx4_eqe eqe; struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_slave = &priv->mfunc.master.slave_state[slave]; if (!s_slave->active) return 0; memset(&eqe, 0, sizeof eqe); eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE; eqe.event.port_mgmt_change.port = port; return mlx4_GEN_EQE(dev, slave, &eqe); } EXPORT_SYMBOL(mlx4_gen_pkey_eqe); int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port) { struct mlx4_eqe eqe; /*don't send if we don't have the that slave */ if (dev->num_vfs < slave) return 0; memset(&eqe, 0, sizeof eqe); eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; eqe.subtype = MLX4_DEV_PMC_SUBTYPE_GUID_INFO; eqe.event.port_mgmt_change.port = port; return mlx4_GEN_EQE(dev, slave, &eqe); } EXPORT_SYMBOL(mlx4_gen_guid_change_eqe); int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, u8 port_subtype_change) { struct mlx4_eqe eqe; /*don't send if we don't have the that slave */ if (dev->num_vfs < slave) return 0; memset(&eqe, 0, sizeof eqe); eqe.type = MLX4_EVENT_TYPE_PORT_CHANGE; eqe.subtype = port_subtype_change; eqe.event.port_change.port = cpu_to_be32(port << 28); mlx4_dbg(dev, "%s: sending: %d to slave: %d on port: %d\n", __func__, port_subtype_change, slave, port); return mlx4_GEN_EQE(dev, slave, &eqe); } EXPORT_SYMBOL(mlx4_gen_port_state_change_eqe); enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state; if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS) { pr_err("%s: Error: asking for slave:%d, port:%d\n", __func__, slave, port); return SLAVE_PORT_DOWN; } return s_state[slave].port_state[port]; } EXPORT_SYMBOL(mlx4_get_slave_port_state); static int mlx4_set_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, enum slave_port_state state) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state; if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) { pr_err("%s: Error: asking for slave:%d, port:%d\n", __func__, slave, port); return -1; } s_state[slave].port_state[port] = state; return 0; } static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event) { int i; enum slave_port_gen_event gen_event; for (i = 0; i < dev->num_slaves; i++) set_and_calc_slave_port_state(dev, i, port, event, &gen_event); } /************************************************************************** The function get as input the new event to that port, and according to the prev state change the slave's port state. The events are: MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, MLX4_PORT_STATE_DEV_EVENT_PORT_UP MLX4_PORT_STATE_IB_EVENT_GID_VALID MLX4_PORT_STATE_IB_EVENT_GID_INVALID ***************************************************************************/ int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int event, enum slave_port_gen_event *gen_event) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *ctx = NULL; unsigned long flags; int ret = -1; enum slave_port_state cur_state = mlx4_get_slave_port_state(dev, slave, port); *gen_event = SLAVE_PORT_GEN_EVENT_NONE; if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) { pr_err("%s: Error: asking for slave:%d, port:%d\n", __func__, slave, port); return ret; } ctx = &priv->mfunc.master.slave_state[slave]; spin_lock_irqsave(&ctx->lock, flags); switch (cur_state) { case SLAVE_PORT_DOWN: if (MLX4_PORT_STATE_DEV_EVENT_PORT_UP == event) mlx4_set_slave_port_state(dev, slave, port, SLAVE_PENDING_UP); break; case SLAVE_PENDING_UP: if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) mlx4_set_slave_port_state(dev, slave, port, SLAVE_PORT_DOWN); else if (MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID == event) { mlx4_set_slave_port_state(dev, slave, port, SLAVE_PORT_UP); *gen_event = SLAVE_PORT_GEN_EVENT_UP; } break; case SLAVE_PORT_UP: if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) { mlx4_set_slave_port_state(dev, slave, port, SLAVE_PORT_DOWN); *gen_event = SLAVE_PORT_GEN_EVENT_DOWN; } else if (MLX4_PORT_STATE_IB_EVENT_GID_INVALID == event) { mlx4_set_slave_port_state(dev, slave, port, SLAVE_PENDING_UP); *gen_event = SLAVE_PORT_GEN_EVENT_DOWN; } break; default: pr_err("%s: BUG!!! UNKNOWN state: " "slave:%d, port:%d\n", __func__, slave, port); goto out; } ret = mlx4_get_slave_port_state(dev, slave, port); out: spin_unlock_irqrestore(&ctx->lock, flags); return ret; } EXPORT_SYMBOL(set_and_calc_slave_port_state); int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr) { struct mlx4_eqe eqe; memset(&eqe, 0, sizeof eqe); eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PORT_INFO; eqe.event.port_mgmt_change.port = port; eqe.event.port_mgmt_change.params.port_info.changed_attr = cpu_to_be32((u32) attr); slave_event(dev, ALL_SLAVES, &eqe); return 0; } EXPORT_SYMBOL(mlx4_gen_slaves_port_mgt_ev); void mlx4_master_handle_slave_flr(struct work_struct *work) { struct mlx4_mfunc_master_ctx *master = container_of(work, struct mlx4_mfunc_master_ctx, slave_flr_event_work); struct mlx4_mfunc *mfunc = container_of(master, struct mlx4_mfunc, master); struct mlx4_priv *priv = container_of(mfunc, struct mlx4_priv, mfunc); struct mlx4_dev *dev = &priv->dev; struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state; int i; int err; unsigned long flags; mlx4_dbg(dev, "mlx4_handle_slave_flr\n"); for (i = 0 ; i < dev->num_slaves; i++) { if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) { mlx4_dbg(dev, "mlx4_handle_slave_flr: " "clean slave: %d\n", i); mlx4_delete_all_resources_for_slave(dev, i); /*return the slave to running mode*/ spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); slave_state[i].last_cmd = MLX4_COMM_CMD_RESET; slave_state[i].is_slave_going_down = 0; spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); /*notify the FW:*/ err = mlx4_cmd(dev, 0, i, 0, MLX4_CMD_INFORM_FLR_DONE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) mlx4_warn(dev, "Failed to notify FW on " "FLR done (slave:%d)\n", i); } } } static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_eqe *eqe; int cqn; int eqes_found = 0; int set_ci = 0; int port; int slave = 0; int ret; u32 flr_slave; u8 update_slave_state; int i; enum slave_port_gen_event gen_event; unsigned long flags; while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) { /* * Make sure we read EQ entry contents after we've * checked the ownership bit. */ rmb(); switch (eqe->type) { case MLX4_EVENT_TYPE_COMP: cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff; mlx4_cq_completion(dev, cqn); break; case MLX4_EVENT_TYPE_PATH_MIG: case MLX4_EVENT_TYPE_COMM_EST: case MLX4_EVENT_TYPE_SQ_DRAINED: case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE: case MLX4_EVENT_TYPE_WQ_CATAS_ERROR: case MLX4_EVENT_TYPE_PATH_MIG_FAILED: case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR: case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR: mlx4_dbg(dev, "event %d arrived\n", eqe->type); if (mlx4_is_master(dev)) { /* forward only to slave owning the QP */ ret = mlx4_get_slave_from_resource_id(dev, RES_QP, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, &slave); if (ret && ret != -ENOENT) { mlx4_dbg(dev, "QP event %02x(%02x) on " "EQ %d at index %u: could " "not get slave id (%d)\n", eqe->type, eqe->subtype, eq->eqn, eq->cons_index, ret); break; } if (!ret && slave != dev->caps.function) { mlx4_slave_event(dev, slave, eqe); break; } } mlx4_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, eqe->type); break; case MLX4_EVENT_TYPE_SRQ_LIMIT: mlx4_warn(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT\n", __func__); case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR: if (mlx4_is_master(dev)) { /* forward only to slave owning the SRQ */ ret = mlx4_get_slave_from_resource_id(dev, RES_SRQ, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff, &slave); if (ret && ret != -ENOENT) { mlx4_warn(dev, "SRQ event %02x(%02x) " "on EQ %d at index %u: could" " not get slave id (%d)\n", eqe->type, eqe->subtype, eq->eqn, eq->cons_index, ret); break; } mlx4_warn(dev, "%s: slave:%d, srq_no:0x%x," " event: %02x(%02x)\n", __func__, slave, be32_to_cpu(eqe->event.srq.srqn), eqe->type, eqe->subtype); if (!ret && slave != dev->caps.function) { mlx4_warn(dev, "%s: sending event " "%02x(%02x) to slave:%d\n", __func__, eqe->type, eqe->subtype, slave); mlx4_slave_event(dev, slave, eqe); break; } } mlx4_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff, eqe->type); break; case MLX4_EVENT_TYPE_CMD: mlx4_cmd_event(dev, be16_to_cpu(eqe->event.cmd.token), eqe->event.cmd.status, be64_to_cpu(eqe->event.cmd.out_param)); break; case MLX4_EVENT_TYPE_PORT_CHANGE: port = be32_to_cpu(eqe->event.port_change.port) >> 28; if (eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN) { mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN, port); mlx4_priv(dev)->sense.do_sense_port[port] = 1; if (!mlx4_is_master(dev)) break; for (i = 0; i < dev->num_slaves; i++) { if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) { if (i == mlx4_master_func_num(dev)) continue; mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN" " to slave: %d, port:%d\n", __func__, i, port); mlx4_slave_event(dev, i, eqe); } else { /* IB port */ set_and_calc_slave_port_state(dev, i, port, MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, &gen_event); /*we can be in pending state, then do not send port_down event*/ if (SLAVE_PORT_GEN_EVENT_DOWN == gen_event) { if (i == mlx4_master_func_num(dev)) continue; mlx4_slave_event(dev, i, eqe); } } } } else { mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP, port); mlx4_priv(dev)->sense.do_sense_port[port] = 0; if (!mlx4_is_master(dev)) break; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) for (i = 0; i < dev->num_slaves; i++) { if (i == mlx4_master_func_num(dev)) continue; mlx4_slave_event(dev, i, eqe); } else /* IB port */ /* port-up event will be sent to a slave when the * slave's alias-guid is set. This is done in alias_GUID.c */ set_all_slave_state(dev, port, MLX4_DEV_EVENT_PORT_UP); } break; case MLX4_EVENT_TYPE_CQ_ERROR: mlx4_warn(dev, "CQ %s on CQN %06x\n", eqe->event.cq_err.syndrome == 1 ? "overrun" : "access violation", be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff); if (mlx4_is_master(dev)) { ret = mlx4_get_slave_from_resource_id(dev, RES_CQ, be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff, &slave); if (ret && ret != -ENOENT) { mlx4_dbg(dev, "CQ event %02x(%02x) on " "EQ %d at index %u: could " "not get slave id (%d)\n", eqe->type, eqe->subtype, eq->eqn, eq->cons_index, ret); break; } if (!ret && slave != dev->caps.function) { mlx4_slave_event(dev, slave, eqe); break; } } mlx4_cq_event(dev, be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff, eqe->type); break; case MLX4_EVENT_TYPE_EQ_OVERFLOW: mlx4_warn(dev, "EQ overrun on EQN %d\n", eq->eqn); break; case MLX4_EVENT_TYPE_OP_REQUIRED: atomic_inc(&priv->opreq_count); /* FW commands can't be executed from interrupt context working in deferred task */ queue_work(mlx4_wq, &priv->opreq_task); break; case MLX4_EVENT_TYPE_COMM_CHANNEL: if (!mlx4_is_master(dev)) { mlx4_warn(dev, "Received comm channel event " "for non master device\n"); break; } memcpy(&priv->mfunc.master.comm_arm_bit_vector, eqe->event.comm_channel_arm.bit_vec, sizeof eqe->event.comm_channel_arm.bit_vec); queue_work(priv->mfunc.master.comm_wq, &priv->mfunc.master.comm_work); break; case MLX4_EVENT_TYPE_FLR_EVENT: flr_slave = be32_to_cpu(eqe->event.flr_event.slave_id); if (!mlx4_is_master(dev)) { mlx4_warn(dev, "Non-master function received" "FLR event\n"); break; } mlx4_dbg(dev, "FLR event for slave: %d\n", flr_slave); if (flr_slave >= dev->num_slaves) { mlx4_warn(dev, "Got FLR for unknown function: %d\n", flr_slave); update_slave_state = 0; } else update_slave_state = 1; spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); if (update_slave_state) { priv->mfunc.master.slave_state[flr_slave].active = false; priv->mfunc.master.slave_state[flr_slave].last_cmd = MLX4_COMM_CMD_FLR; priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1; } spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); queue_work(priv->mfunc.master.comm_wq, &priv->mfunc.master.slave_flr_event_work); break; case MLX4_EVENT_TYPE_FATAL_WARNING: if (eqe->subtype == MLX4_FATAL_WARNING_SUBTYPE_WARMING) { if (mlx4_is_master(dev)) for (i = 0; i < dev->num_slaves; i++) { mlx4_dbg(dev, "%s: Sending " "MLX4_FATAL_WARNING_SUBTYPE_WARMING" " to slave: %d\n", __func__, i); if (i == dev->caps.function) continue; mlx4_slave_event(dev, i, eqe); } mlx4_err(dev, "Temperature Threshold was reached! " "Threshold: %d celsius degrees; " "Current Temperature: %d\n", be16_to_cpu(eqe->event.warming.warning_threshold), be16_to_cpu(eqe->event.warming.current_temperature)); } else mlx4_warn(dev, "Unhandled event FATAL WARNING (%02x), " "subtype %02x on EQ %d at index %u. owner=%x, " "nent=0x%x, slave=%x, ownership=%s\n", eqe->type, eqe->subtype, eq->eqn, eq->cons_index, eqe->owner, eq->nent, eqe->slave_id, !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? "HW" : "SW"); break; case MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT: mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_MGMT_CHANGE, (unsigned long) eqe); break; case MLX4_EVENT_TYPE_EEC_CATAS_ERROR: case MLX4_EVENT_TYPE_ECC_DETECT: default: mlx4_warn(dev, "Unhandled event %02x(%02x) on EQ %d at " "index %u. owner=%x, nent=0x%x, slave=%x, " "ownership=%s\n", eqe->type, eqe->subtype, eq->eqn, eq->cons_index, eqe->owner, eq->nent, eqe->slave_id, !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? "HW" : "SW"); break; }; ++eq->cons_index; eqes_found = 1; ++set_ci; /* * The HCA will think the queue has overflowed if we * don't tell it we've been processing events. We * create our EQs with MLX4_NUM_SPARE_EQE extra * entries, so we must update our consumer index at * least that often. */ if (unlikely(set_ci >= MLX4_NUM_SPARE_EQE)) { eq_set_ci(eq, 0); set_ci = 0; } } eq_set_ci(eq, 1); return eqes_found; } static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr) { struct mlx4_dev *dev = dev_ptr; struct mlx4_priv *priv = mlx4_priv(dev); int work = 0; int i; writel(priv->eq_table.clr_mask, priv->eq_table.clr_int); for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]); return IRQ_RETVAL(work); } static irqreturn_t mlx4_msi_x_interrupt(int irq, void *eq_ptr) { struct mlx4_eq *eq = eq_ptr; struct mlx4_dev *dev = eq->dev; mlx4_eq_int(dev, eq); /* MSI-X vectors always belong to us */ return IRQ_HANDLED; } int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_event_eq_info *event_eq = priv->mfunc.master.slave_state[slave].event_eq; u32 in_modifier = vhcr->in_modifier; u32 eqn = in_modifier & 0x1FF; u64 in_param = vhcr->in_param; int err = 0; int i; if (slave == dev->caps.function) err = mlx4_cmd(dev, in_param, (in_modifier & 0x80000000) | eqn, 0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); if (!err) for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) if (in_param & (1LL << i)) event_eq[i].eqn = in_modifier >> 31 ? -1 : eqn; return err; } static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap, int eq_num) { return mlx4_cmd(dev, event_mask, (unmap << 31) | eq_num, 0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); } static int mlx4_SW2HW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, int eq_num) { return mlx4_cmd(dev, mailbox->dma, eq_num, 0, MLX4_CMD_SW2HW_EQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, int eq_num) { return mlx4_cmd_box(dev, 0, mailbox->dma, eq_num, 0, MLX4_CMD_HW2SW_EQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } static int mlx4_num_eq_uar(struct mlx4_dev *dev) { /* * Each UAR holds 4 EQ doorbells. To figure out how many UARs * we need to map, take the difference of highest index and * the lowest index we'll use and add 1. */ return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs + dev->caps.comp_pool)/4 - dev->caps.reserved_eqs/4 + 1; } static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq) { struct mlx4_priv *priv = mlx4_priv(dev); int index; index = eq->eqn / 4 - dev->caps.reserved_eqs / 4; if (!priv->eq_table.uar_map[index]) { priv->eq_table.uar_map[index] = ioremap(pci_resource_start(dev->pdev, 2) + ((eq->eqn / 4) << PAGE_SHIFT), PAGE_SIZE); if (!priv->eq_table.uar_map[index]) { mlx4_err(dev, "Couldn't map EQ doorbell for EQN 0x%06x\n", eq->eqn); return NULL; } } return priv->eq_table.uar_map[index] + 0x800 + 8 * (eq->eqn % 4); } static void mlx4_unmap_uar(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int i; for (i = 0; i < mlx4_num_eq_uar(dev); ++i) if (priv->eq_table.uar_map[i]) { iounmap(priv->eq_table.uar_map[i]); priv->eq_table.uar_map[i] = NULL; } } static int mlx4_create_eq(struct mlx4_dev *dev, int nent, u8 intr, struct mlx4_eq *eq) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; struct mlx4_eq_context *eq_context; int npages; u64 *dma_list = NULL; dma_addr_t t; u64 mtt_addr; int err = -ENOMEM; int i; eq->dev = dev; eq->nent = roundup_pow_of_two(max(nent, 2)); /* CX3 is capable of extending the CQE\EQE from 32 to 64 bytes */ npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE; eq->page_list = kmalloc(npages * sizeof *eq->page_list, GFP_KERNEL); if (!eq->page_list) goto err_out; for (i = 0; i < npages; ++i) eq->page_list[i].buf = NULL; dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); if (!dma_list) goto err_out_free; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) goto err_out_free; eq_context = mailbox->buf; for (i = 0; i < npages; ++i) { eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, &t, GFP_KERNEL); if (!eq->page_list[i].buf) goto err_out_free_pages; dma_list[i] = t; eq->page_list[i].map = t; memset(eq->page_list[i].buf, 0, PAGE_SIZE); } eq->eqn = mlx4_bitmap_alloc(&priv->eq_table.bitmap); if (eq->eqn == -1) goto err_out_free_pages; eq->doorbell = mlx4_get_eq_uar(dev, eq); if (!eq->doorbell) { err = -ENOMEM; goto err_out_free_eq; } err = mlx4_mtt_init(dev, npages, PAGE_SHIFT, &eq->mtt); if (err) goto err_out_free_eq; err = mlx4_write_mtt(dev, &eq->mtt, 0, npages, dma_list); if (err) goto err_out_free_mtt; memset(eq_context, 0, sizeof *eq_context); eq_context->flags = cpu_to_be32(MLX4_EQ_STATUS_OK | MLX4_EQ_STATE_ARMED); eq_context->log_eq_size = ilog2(eq->nent); eq_context->intr = intr; eq_context->log_page_size = PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT; mtt_addr = mlx4_mtt_addr(dev, &eq->mtt); eq_context->mtt_base_addr_h = mtt_addr >> 32; eq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff); err = mlx4_SW2HW_EQ(dev, mailbox, eq->eqn); if (err) { mlx4_warn(dev, "SW2HW_EQ failed (%d)\n", err); goto err_out_free_mtt; } kfree(dma_list); mlx4_free_cmd_mailbox(dev, mailbox); eq->cons_index = 0; return err; err_out_free_mtt: mlx4_mtt_cleanup(dev, &eq->mtt); err_out_free_eq: mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn); err_out_free_pages: for (i = 0; i < npages; ++i) if (eq->page_list[i].buf) dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, eq->page_list[i].buf, eq->page_list[i].map); mlx4_free_cmd_mailbox(dev, mailbox); err_out_free: kfree(eq->page_list); kfree(dma_list); err_out: return err; } static void mlx4_free_eq(struct mlx4_dev *dev, struct mlx4_eq *eq) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; int err; int i; /* CX3 is capable of extending the CQE\EQE from 32 to 64 bytes */ int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return; err = mlx4_HW2SW_EQ(dev, mailbox, eq->eqn); if (err) mlx4_warn(dev, "HW2SW_EQ failed (%d)\n", err); if (0) { mlx4_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn); for (i = 0; i < sizeof (struct mlx4_eq_context) / 4; ++i) { if (i % 4 == 0) pr_cont("[%02x] ", i * 4); pr_cont(" %08x", be32_to_cpup(mailbox->buf + i * 4)); if ((i + 1) % 4 == 0) pr_cont("\n"); } } mlx4_mtt_cleanup(dev, &eq->mtt); for (i = 0; i < npages; ++i) dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, eq->page_list[i].buf, eq->page_list[i].map); kfree(eq->page_list); mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn); mlx4_free_cmd_mailbox(dev, mailbox); } static void mlx4_free_irqs(struct mlx4_dev *dev) { struct mlx4_eq_table *eq_table = &mlx4_priv(dev)->eq_table; struct mlx4_priv *priv = mlx4_priv(dev); int i, vec; if (eq_table->have_irq) free_irq(dev->pdev->irq, dev); for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) if (eq_table->eq[i].have_irq) { free_irq(eq_table->eq[i].irq, eq_table->eq + i); eq_table->eq[i].have_irq = 0; } for (i = 0; i < dev->caps.comp_pool; i++) { /* * Freeing the assigned irq's * all bits should be 0, but we need to validate */ if (priv->msix_ctl.pool_bm & 1ULL << i) { /* NO need protecting*/ vec = dev->caps.num_comp_vectors + 1 + i; free_irq(priv->eq_table.eq[vec].irq, &priv->eq_table.eq[vec]); } } kfree(eq_table->irq_names); } static int mlx4_map_clr_int(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); priv->clr_base = ioremap(pci_resource_start(dev->pdev, priv->fw.clr_int_bar) + priv->fw.clr_int_base, MLX4_CLR_INT_SIZE); if (!priv->clr_base) { mlx4_err(dev, "Couldn't map interrupt clear register, aborting.\n"); return -ENOMEM; } return 0; } static void mlx4_unmap_clr_int(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); iounmap(priv->clr_base); } int mlx4_alloc_eq_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); priv->eq_table.eq = kcalloc(dev->caps.num_eqs - dev->caps.reserved_eqs, sizeof *priv->eq_table.eq, GFP_KERNEL); if (!priv->eq_table.eq) return -ENOMEM; return 0; } void mlx4_free_eq_table(struct mlx4_dev *dev) { kfree(mlx4_priv(dev)->eq_table.eq); } int mlx4_init_eq_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int err; int i; priv->eq_table.uar_map = kcalloc(mlx4_num_eq_uar(dev), sizeof *priv->eq_table.uar_map, GFP_KERNEL); if (!priv->eq_table.uar_map) { err = -ENOMEM; goto err_out_free; } err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs, dev->caps.num_eqs - 1, dev->caps.reserved_eqs, 0); if (err) goto err_out_free; for (i = 0; i < mlx4_num_eq_uar(dev); ++i) priv->eq_table.uar_map[i] = NULL; if (!mlx4_is_slave(dev)) { err = mlx4_map_clr_int(dev); if (err) goto err_out_bitmap; priv->eq_table.clr_mask = swab32(1 << (priv->eq_table.inta_pin & 31)); priv->eq_table.clr_int = priv->clr_base + (priv->eq_table.inta_pin < 32 ? 4 : 0); } priv->eq_table.irq_names = kmalloc(MLX4_IRQNAME_SIZE * (dev->caps.num_comp_vectors + 1 + dev->caps.comp_pool), GFP_KERNEL); if (!priv->eq_table.irq_names) { err = -ENOMEM; goto err_out_clr_int; } for (i = 0; i < dev->caps.num_comp_vectors; ++i) { err = mlx4_create_eq(dev, dev->caps.num_cqs - dev->caps.reserved_cqs + MLX4_NUM_SPARE_EQE, (dev->flags & MLX4_FLAG_MSI_X) ? i : 0, &priv->eq_table.eq[i]); if (err) { --i; goto err_out_unmap; } } err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE, (dev->flags & MLX4_FLAG_MSI_X) ? dev->caps.num_comp_vectors : 0, &priv->eq_table.eq[dev->caps.num_comp_vectors]); if (err) goto err_out_comp; /*if additional completion vectors poolsize is 0 this loop will not run*/ for (i = dev->caps.num_comp_vectors + 1; i < dev->caps.num_comp_vectors + dev->caps.comp_pool + 1; ++i) { err = mlx4_create_eq(dev, dev->caps.num_cqs - dev->caps.reserved_cqs + MLX4_NUM_SPARE_EQE, (dev->flags & MLX4_FLAG_MSI_X) ? i : 0, &priv->eq_table.eq[i]); if (err) { --i; goto err_out_unmap; } } if (dev->flags & MLX4_FLAG_MSI_X) { const char *eq_name; for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) { if (i < dev->caps.num_comp_vectors) { snprintf(priv->eq_table.irq_names + i * MLX4_IRQNAME_SIZE, MLX4_IRQNAME_SIZE, "mlx4-comp-%d@pci:%s", i, pci_name(dev->pdev)); } else { snprintf(priv->eq_table.irq_names + i * MLX4_IRQNAME_SIZE, MLX4_IRQNAME_SIZE, "mlx4-async@pci:%s", pci_name(dev->pdev)); } eq_name = priv->eq_table.irq_names + i * MLX4_IRQNAME_SIZE; err = request_irq(priv->eq_table.eq[i].irq, mlx4_msi_x_interrupt, 0, eq_name, priv->eq_table.eq + i); if (err) goto err_out_async; priv->eq_table.eq[i].have_irq = 1; } } else { snprintf(priv->eq_table.irq_names, MLX4_IRQNAME_SIZE, DRV_NAME "@pci:%s", pci_name(dev->pdev)); err = request_irq(dev->pdev->irq, mlx4_interrupt, IRQF_SHARED, priv->eq_table.irq_names, dev); if (err) goto err_out_async; priv->eq_table.have_irq = 1; } err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); if (err) mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", priv->eq_table.eq[dev->caps.num_comp_vectors].eqn, err); for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) eq_set_ci(&priv->eq_table.eq[i], 1); return 0; err_out_async: mlx4_free_eq(dev, &priv->eq_table.eq[dev->caps.num_comp_vectors]); err_out_comp: i = dev->caps.num_comp_vectors - 1; err_out_unmap: while (i >= 0) { mlx4_free_eq(dev, &priv->eq_table.eq[i]); --i; } mlx4_free_irqs(dev); err_out_clr_int: if (!mlx4_is_slave(dev)) mlx4_unmap_clr_int(dev); err_out_bitmap: mlx4_unmap_uar(dev); mlx4_bitmap_cleanup(&priv->eq_table.bitmap); err_out_free: kfree(priv->eq_table.uar_map); return err; } void mlx4_cleanup_eq_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int i; mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 1, priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); mlx4_free_irqs(dev); for (i = 0; i < dev->caps.num_comp_vectors + dev->caps.comp_pool + 1; ++i) mlx4_free_eq(dev, &priv->eq_table.eq[i]); if (!mlx4_is_slave(dev)) mlx4_unmap_clr_int(dev); mlx4_unmap_uar(dev); mlx4_bitmap_cleanup(&priv->eq_table.bitmap); kfree(priv->eq_table.uar_map); } /* A test that verifies that we can accept interrupts on all * the irq vectors of the device. * Interrupts are checked using the NOP command. */ int mlx4_test_interrupts(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int i; int err; err = mlx4_NOP(dev); /* When not in MSI_X, there is only one irq to check */ if (!(dev->flags & MLX4_FLAG_MSI_X) || mlx4_is_slave(dev)) return err; /* A loop over all completion vectors, for each vector we will check * whether it works by mapping command completions to that vector * and performing a NOP command */ for(i = 0; !err && (i < dev->caps.num_comp_vectors); ++i) { /* Temporary use polling for command completions */ mlx4_cmd_use_polling(dev); /* Map the new eq to handle all asyncronous events */ err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, priv->eq_table.eq[i].eqn); if (err) { mlx4_warn(dev, "Failed mapping eq for interrupt test\n"); mlx4_cmd_use_events(dev); break; } /* Go back to using events */ mlx4_cmd_use_events(dev); err = mlx4_NOP(dev); } /* Return to default */ mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); return err; } EXPORT_SYMBOL(mlx4_test_interrupts); int mlx4_assign_eq(struct mlx4_dev *dev, char *name, int *vector) { struct mlx4_priv *priv = mlx4_priv(dev); int vec = 0, err = 0, i; mutex_lock(&priv->msix_ctl.pool_lock); for (i = 0; !vec && i < dev->caps.comp_pool; i++) { if (~priv->msix_ctl.pool_bm & 1ULL << i) { priv->msix_ctl.pool_bm |= 1ULL << i; vec = dev->caps.num_comp_vectors + 1 + i; snprintf(priv->eq_table.irq_names + vec * MLX4_IRQNAME_SIZE, MLX4_IRQNAME_SIZE, "%s", name); err = request_irq(priv->eq_table.eq[vec].irq, mlx4_msi_x_interrupt, 0, &priv->eq_table.irq_names[vec<<5], priv->eq_table.eq + vec); if (err) { /*zero out bit by fliping it*/ priv->msix_ctl.pool_bm ^= 1 << i; vec = 0; continue; /*we dont want to break here*/ } eq_set_ci(&priv->eq_table.eq[vec], 1); } } mutex_unlock(&priv->msix_ctl.pool_lock); if (vec) { *vector = vec; } else { *vector = 0; err = (i == dev->caps.comp_pool) ? -ENOSPC : err; } return err; } EXPORT_SYMBOL(mlx4_assign_eq); void mlx4_release_eq(struct mlx4_dev *dev, int vec) { struct mlx4_priv *priv = mlx4_priv(dev); /*bm index*/ int i = vec - dev->caps.num_comp_vectors - 1; if (likely(i >= 0)) { /*sanity check , making sure were not trying to free irq's Belonging to a legacy EQ*/ mutex_lock(&priv->msix_ctl.pool_lock); if (priv->msix_ctl.pool_bm & 1ULL << i) { free_irq(priv->eq_table.eq[vec].irq, &priv->eq_table.eq[vec]); priv->msix_ctl.pool_bm &= ~(1ULL << i); } mutex_unlock(&priv->msix_ctl.pool_lock); } } EXPORT_SYMBOL(mlx4_release_eq); Index: stable/10/sys/ofed/drivers/net/mlx4/fw.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/fw.c (revision 271126) +++ stable/10/sys/ofed/drivers/net/mlx4/fw.c (revision 271127) @@ -1,1796 +1,1796 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include "fw.h" #include "icm.h" enum { MLX4_COMMAND_INTERFACE_MIN_REV = 2, MLX4_COMMAND_INTERFACE_MAX_REV = 3, MLX4_COMMAND_INTERFACE_NEW_PORT_CMDS = 3, }; extern void __buggy_use_of_MLX4_GET(void); extern void __buggy_use_of_MLX4_PUT(void); static bool enable_qos; module_param(enable_qos, bool, 0444); MODULE_PARM_DESC(enable_qos, "Enable Quality of Service support in the HCA (default: off)"); #define MLX4_GET(dest, source, offset) \ do { \ void *__p = (char *) (source) + (offset); \ switch (sizeof (dest)) { \ case 1: (dest) = *(u8 *) __p; break; \ case 2: (dest) = be16_to_cpup(__p); break; \ case 4: (dest) = be32_to_cpup(__p); break; \ case 8: (dest) = be64_to_cpup(__p); break; \ default: __buggy_use_of_MLX4_GET(); \ } \ } while (0) #define MLX4_PUT(dest, source, offset) \ do { \ void *__d = ((char *) (dest) + (offset)); \ switch (sizeof(source)) { \ case 1: *(u8 *) __d = (source); break; \ case 2: *(__be16 *) __d = cpu_to_be16(source); break; \ case 4: *(__be32 *) __d = cpu_to_be32(source); break; \ case 8: *(__be64 *) __d = cpu_to_be64(source); break; \ default: __buggy_use_of_MLX4_PUT(); \ } \ } while (0) static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags) { static const char *fname[] = { [ 0] = "RC transport", [ 1] = "UC transport", [ 2] = "UD transport", [ 3] = "XRC transport", [ 4] = "reliable multicast", [ 5] = "FCoIB support", [ 6] = "SRQ support", [ 7] = "IPoIB checksum offload", [ 8] = "P_Key violation counter", [ 9] = "Q_Key violation counter", [10] = "VMM", [12] = "DPDP", [15] = "Big LSO headers", [16] = "MW support", [17] = "APM support", [18] = "Atomic ops support", [19] = "Raw multicast support", [20] = "Address vector port checking support", [21] = "UD multicast support", [24] = "Demand paging support", [25] = "Router support", [30] = "IBoE support", [32] = "Unicast loopback support", [34] = "FCS header control", [38] = "Wake On LAN support", [40] = "UDP RSS support", [41] = "Unicast VEP steering support", [42] = "Multicast VEP steering support", [48] = "Counters support", [59] = "Port management change event support", [60] = "eSwitch support", [61] = "64 byte EQE support", [62] = "64 byte CQE support", }; int i; mlx4_dbg(dev, "DEV_CAP flags:\n"); for (i = 0; i < ARRAY_SIZE(fname); ++i) if (fname[i] && (flags & (1LL << i))) mlx4_dbg(dev, " %s\n", fname[i]); } static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) { static const char * const fname[] = { [0] = "RSS support", [1] = "RSS Toeplitz Hash Function support", [2] = "RSS XOR Hash Function support", [3] = "Device manage flow steering support" }; int i; for (i = 0; i < ARRAY_SIZE(fname); ++i) if (fname[i] && (flags & (1LL << i))) mlx4_dbg(dev, " %s\n", fname[i]); } int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg) { struct mlx4_cmd_mailbox *mailbox; u32 *inbox; int err = 0; #define MOD_STAT_CFG_IN_SIZE 0x100 #define MOD_STAT_CFG_PG_SZ_M_OFFSET 0x002 #define MOD_STAT_CFG_PG_SZ_OFFSET 0x003 mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); inbox = mailbox->buf; memset(inbox, 0, MOD_STAT_CFG_IN_SIZE); MLX4_PUT(inbox, cfg->log_pg_sz, MOD_STAT_CFG_PG_SZ_OFFSET); MLX4_PUT(inbox, cfg->log_pg_sz_m, MOD_STAT_CFG_PG_SZ_M_OFFSET); err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); u8 field; u32 size; int err = 0; #define QUERY_FUNC_CAP_FLAGS_OFFSET 0x0 #define QUERY_FUNC_CAP_NUM_PORTS_OFFSET 0x1 #define QUERY_FUNC_CAP_PF_BHVR_OFFSET 0x4 #define QUERY_FUNC_CAP_FMR_OFFSET 0x8 #define QUERY_FUNC_CAP_QP_QUOTA_OFFSET 0x10 #define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET 0x14 #define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET 0x18 #define QUERY_FUNC_CAP_MPT_QUOTA_OFFSET 0x20 #define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET 0x24 #define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x28 #define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c #define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0x30 #define QUERY_FUNC_CAP_FMR_FLAG 0x80 #define QUERY_FUNC_CAP_FLAG_RDMA 0x40 #define QUERY_FUNC_CAP_FLAG_ETH 0x80 /* when opcode modifier = 1 */ #define QUERY_FUNC_CAP_PHYS_PORT_OFFSET 0x3 #define QUERY_FUNC_CAP_RDMA_PROPS_OFFSET 0x8 #define QUERY_FUNC_CAP_ETH_PROPS_OFFSET 0xc #define QUERY_FUNC_CAP_QP0_TUNNEL 0x10 #define QUERY_FUNC_CAP_QP0_PROXY 0x14 #define QUERY_FUNC_CAP_QP1_TUNNEL 0x18 #define QUERY_FUNC_CAP_QP1_PROXY 0x1c #define QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC 0x40 #define QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN 0x80 #define QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID 0x80 if (vhcr->op_modifier == 1) { field = 0; /* ensure force vlan and force mac bits are not set */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); /* ensure that phy_wqe_gid bit is not set */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); field = vhcr->in_modifier; /* phys-port = logical-port */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); /* size is now the QP number */ size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL); size += 2; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL); size = dev->phys_caps.base_proxy_sqpn + 8 * slave + field - 1; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY); size += 2; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY); } else if (vhcr->op_modifier == 0) { /* enable rdma and ethernet interfaces */ field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS_OFFSET); field = dev->caps.num_ports; MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); size = dev->caps.function_caps; /* set PF behaviours */ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET); field = 0; /* protected FMR support not available as yet */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FMR_OFFSET); size = priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); size = priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); size = priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); size = dev->caps.num_eqs; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET); size = dev->caps.reserved_eqs; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); size = priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); size = dev->caps.num_mgms + dev->caps.num_amgms; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); } else err = -EINVAL; return err; } int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, struct mlx4_func_cap *func_cap) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; u8 field, op_modifier; u32 size; int err = 0; op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); err = mlx4_cmd_box(dev, 0, mailbox->dma, gen_or_port, op_modifier, MLX4_CMD_QUERY_FUNC_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) goto out; outbox = mailbox->buf; if (!op_modifier) { MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET); if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) { mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n"); err = -EPROTONOSUPPORT; goto out; } func_cap->flags = field; MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); func_cap->num_ports = field; MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET); func_cap->pf_context_behaviour = size; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); func_cap->qp_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); func_cap->srq_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); func_cap->cq_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET); func_cap->max_eq = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); func_cap->reserved_eq = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); func_cap->mpt_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); func_cap->mtt_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); func_cap->mcg_quota = size & 0xFFFFFF; goto out; } /* logical port query */ if (gen_or_port > dev->caps.num_ports) { err = -EINVAL; goto out; } if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) { MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) { mlx4_err(dev, "VLAN is enforced on this port\n"); err = -EPROTONOSUPPORT; goto out; } if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) { mlx4_err(dev, "Force mac is enabled on this port\n"); err = -EPROTONOSUPPORT; goto out; } } else if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_IB) { MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) { mlx4_err(dev, "phy_wqe_gid is " "enforced on this ib port\n"); err = -EPROTONOSUPPORT; goto out; } } MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); func_cap->physical_port = field; if (func_cap->physical_port != gen_or_port) { err = -ENOSYS; goto out; } MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL); func_cap->qp0_tunnel_qpn = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_PROXY); func_cap->qp0_proxy_qpn = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_TUNNEL); func_cap->qp1_tunnel_qpn = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY); func_cap->qp1_proxy_qpn = size & 0xFFFFFF; /* All other resources are allocated by the master, but we still report * 'num' and 'reserved' capabilities as follows: * - num remains the maximum resource index * - 'num - reserved' is the total available objects of a resource, but * resource indices may be less than 'reserved' * TODO: set per-resource quotas */ out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; u8 field; u32 field32, flags, ext_flags; u16 size; u16 stat_rate; int err; int i; #define QUERY_DEV_CAP_OUT_SIZE 0x100 #define QUERY_DEV_CAP_MAX_SRQ_SZ_OFFSET 0x10 #define QUERY_DEV_CAP_MAX_QP_SZ_OFFSET 0x11 #define QUERY_DEV_CAP_RSVD_QP_OFFSET 0x12 #define QUERY_DEV_CAP_MAX_QP_OFFSET 0x13 #define QUERY_DEV_CAP_RSVD_SRQ_OFFSET 0x14 #define QUERY_DEV_CAP_MAX_SRQ_OFFSET 0x15 #define QUERY_DEV_CAP_RSVD_EEC_OFFSET 0x16 #define QUERY_DEV_CAP_MAX_EEC_OFFSET 0x17 #define QUERY_DEV_CAP_MAX_CQ_SZ_OFFSET 0x19 #define QUERY_DEV_CAP_RSVD_CQ_OFFSET 0x1a #define QUERY_DEV_CAP_MAX_CQ_OFFSET 0x1b #define QUERY_DEV_CAP_MAX_MPT_OFFSET 0x1d #define QUERY_DEV_CAP_RSVD_EQ_OFFSET 0x1e #define QUERY_DEV_CAP_MAX_EQ_OFFSET 0x1f #define QUERY_DEV_CAP_RSVD_MTT_OFFSET 0x20 #define QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET 0x21 #define QUERY_DEV_CAP_RSVD_MRW_OFFSET 0x22 #define QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET 0x23 #define QUERY_DEV_CAP_MAX_AV_OFFSET 0x27 #define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET 0x29 #define QUERY_DEV_CAP_MAX_RES_QP_OFFSET 0x2b #define QUERY_DEV_CAP_MAX_GSO_OFFSET 0x2d #define QUERY_DEV_CAP_RSS_OFFSET 0x2e #define QUERY_DEV_CAP_MAX_RDMA_OFFSET 0x2f #define QUERY_DEV_CAP_RSZ_SRQ_OFFSET 0x33 #define QUERY_DEV_CAP_ACK_DELAY_OFFSET 0x35 #define QUERY_DEV_CAP_MTU_WIDTH_OFFSET 0x36 #define QUERY_DEV_CAP_VL_PORT_OFFSET 0x37 #define QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET 0x38 #define QUERY_DEV_CAP_MAX_GID_OFFSET 0x3b #define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET 0x3c #define QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET 0x3e #define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f #define QUERY_DEV_CAP_EXT_FLAGS_OFFSET 0x40 #define QUERY_DEV_CAP_SYNC_QP_OFFSET 0x42 #define QUERY_DEV_CAP_FLAGS_OFFSET 0x44 #define QUERY_DEV_CAP_RSVD_UAR_OFFSET 0x48 #define QUERY_DEV_CAP_UAR_SZ_OFFSET 0x49 #define QUERY_DEV_CAP_PAGE_SZ_OFFSET 0x4b #define QUERY_DEV_CAP_BF_OFFSET 0x4c #define QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET 0x4d #define QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET 0x4e #define QUERY_DEV_CAP_LOG_MAX_BF_PAGES_OFFSET 0x4f #define QUERY_DEV_CAP_MAX_SG_SQ_OFFSET 0x51 #define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET 0x52 #define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET 0x55 #define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET 0x56 #define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET 0x61 #define QUERY_DEV_CAP_RSVD_MCG_OFFSET 0x62 #define QUERY_DEV_CAP_MAX_MCG_OFFSET 0x63 #define QUERY_DEV_CAP_RSVD_PD_OFFSET 0x64 #define QUERY_DEV_CAP_MAX_PD_OFFSET 0x65 #define QUERY_DEV_CAP_RSVD_XRC_OFFSET 0x66 #define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67 #define QUERY_DEV_CAP_MAX_BASIC_COUNTERS_OFFSET 0x68 #define QUERY_DEV_CAP_MAX_EXTENDED_COUNTERS_OFFSET 0x6c #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 #define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82 #define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84 #define QUERY_DEV_CAP_ALTC_ENTRY_SZ_OFFSET 0x86 #define QUERY_DEV_CAP_EQC_ENTRY_SZ_OFFSET 0x88 #define QUERY_DEV_CAP_CQC_ENTRY_SZ_OFFSET 0x8a #define QUERY_DEV_CAP_SRQ_ENTRY_SZ_OFFSET 0x8c #define QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET 0x8e #define QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET 0x90 #define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET 0x92 #define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x94 #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 dev_cap->flags2 = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); outbox = mailbox->buf; err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) goto out; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET); dev_cap->reserved_qps = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET); dev_cap->max_qps = 1 << (field & 0x1f); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_SRQ_OFFSET); dev_cap->reserved_srqs = 1 << (field >> 4); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SRQ_OFFSET); dev_cap->max_srqs = 1 << (field & 0x1f); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_CQ_SZ_OFFSET); dev_cap->max_cq_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_CQ_OFFSET); dev_cap->reserved_cqs = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_CQ_OFFSET); dev_cap->max_cqs = 1 << (field & 0x1f); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET); dev_cap->max_mpts = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET); dev_cap->reserved_eqs = field & 0xf; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET); dev_cap->max_eqs = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET); dev_cap->reserved_mtts = 1 << (field >> 4); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET); dev_cap->max_mrw_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MRW_OFFSET); dev_cap->reserved_mrws = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET); dev_cap->max_mtt_seg = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET); dev_cap->max_requester_per_qp = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET); dev_cap->max_responder_per_qp = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GSO_OFFSET); field &= 0x1f; if (!field) dev_cap->max_gso_sz = 0; else dev_cap->max_gso_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSS_OFFSET); if (field & 0x20) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS_XOR; if (field & 0x10) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS_TOP; field &= 0xf; if (field) { dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS; dev_cap->max_rss_tbl_sz = 1 << field; } else dev_cap->max_rss_tbl_sz = 0; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RDMA_OFFSET); dev_cap->max_rdma_global = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_ACK_DELAY_OFFSET); dev_cap->local_ca_ack_delay = field & 0x1f; MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET); dev_cap->num_ports = field & 0xf; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET); dev_cap->max_msg_sz = 1 << (field & 0x1f); MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN; dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f; MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET); dev_cap->fs_max_num_qp_per_entry = field; MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET); dev_cap->stat_rate_support = stat_rate; MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET); dev_cap->timestamp_support = field & 0x80; MLX4_GET(ext_flags, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); MLX4_GET(flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET); dev_cap->flags = flags | (u64)ext_flags << 32; MLX4_GET(field, outbox, QUERY_DEV_CAP_SYNC_QP_OFFSET); dev_cap->sync_qp = field & 0x10; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET); dev_cap->reserved_uars = field >> 4; MLX4_GET(field, outbox, QUERY_DEV_CAP_UAR_SZ_OFFSET); dev_cap->uar_size = 1 << ((field & 0x3f) + 20); MLX4_GET(field, outbox, QUERY_DEV_CAP_PAGE_SZ_OFFSET); dev_cap->min_page_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_BF_OFFSET); if (field & 0x80) { MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET); dev_cap->bf_reg_size = 1 << (field & 0x1f); MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET); if ((1 << (field & 0x3f)) > (PAGE_SIZE / dev_cap->bf_reg_size)) field = 3; dev_cap->bf_regs_per_page = 1 << (field & 0x3f); mlx4_dbg(dev, "BlueFlame available (reg size %d, regs/page %d)\n", dev_cap->bf_reg_size, dev_cap->bf_regs_per_page); } else { dev_cap->bf_reg_size = 0; mlx4_dbg(dev, "BlueFlame not available\n"); } MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_SQ_OFFSET); dev_cap->max_sq_sg = field; MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET); dev_cap->max_sq_desc_sz = size; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_MCG_OFFSET); dev_cap->max_qp_per_mcg = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MCG_OFFSET); dev_cap->reserved_mgms = field & 0xf; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MCG_OFFSET); dev_cap->max_mcgs = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_PD_OFFSET); dev_cap->reserved_pds = field >> 4; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET); dev_cap->max_pds = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_XRC_OFFSET); dev_cap->reserved_xrcds = field >> 4; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_XRC_OFFSET); dev_cap->max_xrcds = 1 << (field & 0x1f); MLX4_GET(size, outbox, QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET); dev_cap->rdmarc_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET); dev_cap->qpc_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET); dev_cap->aux_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_ALTC_ENTRY_SZ_OFFSET); dev_cap->altc_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_EQC_ENTRY_SZ_OFFSET); dev_cap->eqc_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_CQC_ENTRY_SZ_OFFSET); dev_cap->cqc_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_SRQ_ENTRY_SZ_OFFSET); dev_cap->srq_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET); dev_cap->cmpt_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET); dev_cap->mtt_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET); dev_cap->dmpt_entry_sz = size; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SRQ_SZ_OFFSET); dev_cap->max_srq_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_SZ_OFFSET); dev_cap->max_qp_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSZ_SRQ_OFFSET); dev_cap->resize_srq = field & 1; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_RQ_OFFSET); dev_cap->max_rq_sg = field; MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET); dev_cap->max_rq_desc_sz = size; MLX4_GET(dev_cap->bmme_flags, outbox, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); MLX4_GET(dev_cap->reserved_lkey, outbox, QUERY_DEV_CAP_RSVD_LKEY_OFFSET); MLX4_GET(dev_cap->max_icm_sz, outbox, QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET); if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS) MLX4_GET(dev_cap->max_basic_counters, outbox, QUERY_DEV_CAP_MAX_BASIC_COUNTERS_OFFSET); /* FW reports 256 however real value is 255 */ dev_cap->max_basic_counters = min_t(u32, dev_cap->max_basic_counters, 255); if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS_EXT) MLX4_GET(dev_cap->max_extended_counters, outbox, QUERY_DEV_CAP_MAX_EXTENDED_COUNTERS_OFFSET); if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { for (i = 1; i <= dev_cap->num_ports; ++i) { MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET); dev_cap->max_vl[i] = field >> 4; MLX4_GET(field, outbox, QUERY_DEV_CAP_MTU_WIDTH_OFFSET); dev_cap->ib_mtu[i] = field >> 4; dev_cap->max_port_width[i] = field & 0xf; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GID_OFFSET); dev_cap->max_gids[i] = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PKEY_OFFSET); dev_cap->max_pkeys[i] = 1 << (field & 0xf); } } else { #define QUERY_PORT_SUPPORTED_TYPE_OFFSET 0x00 #define QUERY_PORT_MTU_OFFSET 0x01 #define QUERY_PORT_ETH_MTU_OFFSET 0x02 #define QUERY_PORT_WIDTH_OFFSET 0x06 #define QUERY_PORT_MAX_GID_PKEY_OFFSET 0x07 #define QUERY_PORT_MAX_MACVLAN_OFFSET 0x0a #define QUERY_PORT_MAX_VL_OFFSET 0x0b #define QUERY_PORT_MAC_OFFSET 0x10 #define QUERY_PORT_TRANS_VENDOR_OFFSET 0x18 #define QUERY_PORT_WAVELENGTH_OFFSET 0x1c #define QUERY_PORT_TRANS_CODE_OFFSET 0x20 for (i = 1; i <= dev_cap->num_ports; ++i) { err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 0, MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); if (err) goto out; MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET); dev_cap->supported_port_types[i] = field & 3; dev_cap->suggested_type[i] = (field >> 3) & 1; dev_cap->default_sense[i] = (field >> 4) & 1; MLX4_GET(field, outbox, QUERY_PORT_MTU_OFFSET); dev_cap->ib_mtu[i] = field & 0xf; MLX4_GET(field, outbox, QUERY_PORT_WIDTH_OFFSET); dev_cap->max_port_width[i] = field & 0xf; MLX4_GET(field, outbox, QUERY_PORT_MAX_GID_PKEY_OFFSET); dev_cap->max_gids[i] = 1 << (field >> 4); dev_cap->max_pkeys[i] = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_PORT_MAX_VL_OFFSET); dev_cap->max_vl[i] = field & 0xf; MLX4_GET(field, outbox, QUERY_PORT_MAX_MACVLAN_OFFSET); dev_cap->log_max_macs[i] = field & 0xf; dev_cap->log_max_vlans[i] = field >> 4; MLX4_GET(dev_cap->eth_mtu[i], outbox, QUERY_PORT_ETH_MTU_OFFSET); MLX4_GET(dev_cap->def_mac[i], outbox, QUERY_PORT_MAC_OFFSET); MLX4_GET(field32, outbox, QUERY_PORT_TRANS_VENDOR_OFFSET); dev_cap->trans_type[i] = field32 >> 24; dev_cap->vendor_oui[i] = field32 & 0xffffff; MLX4_GET(dev_cap->wavelength[i], outbox, QUERY_PORT_WAVELENGTH_OFFSET); MLX4_GET(dev_cap->trans_code[i], outbox, QUERY_PORT_TRANS_CODE_OFFSET); } } mlx4_dbg(dev, "Base MM extensions: flags %08x, rsvd L_Key %08x\n", dev_cap->bmme_flags, dev_cap->reserved_lkey); /* * Each UAR has 4 EQ doorbells; so if a UAR is reserved, then * we can't use any EQs whose doorbell falls on that page, * even if the EQ itself isn't reserved. */ dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4, dev_cap->reserved_eqs); mlx4_dbg(dev, "Max ICM size %lld MB\n", (unsigned long long) dev_cap->max_icm_sz >> 20); mlx4_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n", dev_cap->max_qps, dev_cap->reserved_qps, dev_cap->qpc_entry_sz); mlx4_dbg(dev, "Max SRQs: %d, reserved SRQs: %d, entry size: %d\n", dev_cap->max_srqs, dev_cap->reserved_srqs, dev_cap->srq_entry_sz); mlx4_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n", dev_cap->max_cqs, dev_cap->reserved_cqs, dev_cap->cqc_entry_sz); mlx4_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n", dev_cap->max_eqs, dev_cap->reserved_eqs, dev_cap->eqc_entry_sz); mlx4_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n", dev_cap->reserved_mrws, dev_cap->reserved_mtts); mlx4_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n", dev_cap->max_pds, dev_cap->reserved_pds, dev_cap->reserved_uars); mlx4_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n", dev_cap->max_pds, dev_cap->reserved_mgms); mlx4_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n", dev_cap->max_cq_sz, dev_cap->max_qp_sz, dev_cap->max_srq_sz); mlx4_dbg(dev, "Local CA ACK delay: %d, max MTU: %d, port width cap: %d\n", dev_cap->local_ca_ack_delay, 128 << dev_cap->ib_mtu[1], dev_cap->max_port_width[1]); mlx4_dbg(dev, "Max SQ desc size: %d, max SQ S/G: %d\n", dev_cap->max_sq_desc_sz, dev_cap->max_sq_sg); mlx4_dbg(dev, "Max RQ desc size: %d, max RQ S/G: %d\n", dev_cap->max_rq_desc_sz, dev_cap->max_rq_sg); mlx4_dbg(dev, "Max GSO size: %d\n", dev_cap->max_gso_sz); mlx4_dbg(dev, "Max basic counters: %d\n", dev_cap->max_basic_counters); mlx4_dbg(dev, "Max extended counters: %d\n", dev_cap->max_extended_counters); mlx4_dbg(dev, "Max RSS Table size: %d\n", dev_cap->max_rss_tbl_sz); dump_dev_cap_flags(dev, dev_cap->flags); dump_dev_cap_flags2(dev, dev_cap->flags2); out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { u64 flags; int err = 0; u8 field; err = mlx4_cmd_box(dev, 0, outbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; /* add port mng change event capability unconditionally to slaves */ MLX4_GET(flags, outbox->buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); flags |= MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV; MLX4_PUT(outbox->buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); /* For guests, report Blueflame disabled */ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_BF_OFFSET); field &= 0x7f; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET); return 0; } int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); u64 def_mac; u8 port_type; u16 short_field; int err; #define MLX4_VF_PORT_NO_LINK_SENSE_MASK 0xE0 #define QUERY_PORT_CUR_MAX_PKEY_OFFSET 0x0c #define QUERY_PORT_CUR_MAX_GID_OFFSET 0x0e err = mlx4_cmd_box(dev, 0, outbox->dma, vhcr->in_modifier, 0, MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); if (!err && dev->caps.function != slave) { /* set slave default_mac address */ MLX4_GET(def_mac, outbox->buf, QUERY_PORT_MAC_OFFSET); def_mac += slave << 8; /* if config MAC in DB use it */ if (priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.mac) def_mac = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.mac; MLX4_PUT(outbox->buf, def_mac, QUERY_PORT_MAC_OFFSET); /* get port type - currently only eth is enabled */ MLX4_GET(port_type, outbox->buf, QUERY_PORT_SUPPORTED_TYPE_OFFSET); /* No link sensing allowed */ port_type &= MLX4_VF_PORT_NO_LINK_SENSE_MASK; /* set port type to currently operating port type */ port_type |= (dev->caps.port_type[vhcr->in_modifier] & 0x3); MLX4_PUT(outbox->buf, port_type, QUERY_PORT_SUPPORTED_TYPE_OFFSET); if (dev->caps.port_type[vhcr->in_modifier] == MLX4_PORT_TYPE_ETH) short_field = mlx4_get_slave_num_gids(dev, slave); else short_field = 1; /* slave max gids */ MLX4_PUT(outbox->buf, short_field, QUERY_PORT_CUR_MAX_GID_OFFSET); short_field = dev->caps.pkey_table_len[vhcr->in_modifier]; MLX4_PUT(outbox->buf, short_field, QUERY_PORT_CUR_MAX_PKEY_OFFSET); } return err; } int mlx4_get_slave_pkey_gid_tbl_len(struct mlx4_dev *dev, u8 port, int *gid_tbl_len, int *pkey_tbl_len) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; u16 field; int err; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); err = mlx4_cmd_box(dev, 0, mailbox->dma, port, 0, MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); if (err) goto out; outbox = mailbox->buf; MLX4_GET(field, outbox, QUERY_PORT_CUR_MAX_GID_OFFSET); *gid_tbl_len = field; MLX4_GET(field, outbox, QUERY_PORT_CUR_MAX_PKEY_OFFSET); *pkey_tbl_len = field; out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } EXPORT_SYMBOL(mlx4_get_slave_pkey_gid_tbl_len); int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_icm_iter iter; __be64 *pages; int lg; int nent = 0; int i; int err = 0; int ts = 0, tc = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); memset(mailbox->buf, 0, MLX4_MAILBOX_SIZE); pages = mailbox->buf; for (mlx4_icm_first(icm, &iter); !mlx4_icm_last(&iter); mlx4_icm_next(&iter)) { /* * We have to pass pages that are aligned to their * size, so find the least significant 1 in the * address or size and use that as our log2 size. */ lg = ffs(mlx4_icm_addr(&iter) | mlx4_icm_size(&iter)) - 1; if (lg < MLX4_ICM_PAGE_SHIFT) { mlx4_warn(dev, "Got FW area not aligned to %d (%llx/%lx).\n", MLX4_ICM_PAGE_SIZE, (unsigned long long) mlx4_icm_addr(&iter), mlx4_icm_size(&iter)); err = -EINVAL; goto out; } for (i = 0; i < mlx4_icm_size(&iter) >> lg; ++i) { if (virt != -1) { pages[nent * 2] = cpu_to_be64(virt); virt += 1 << lg; } pages[nent * 2 + 1] = cpu_to_be64((mlx4_icm_addr(&iter) + (i << lg)) | (lg - MLX4_ICM_PAGE_SHIFT)); ts += 1 << (lg - 10); ++tc; if (++nent == MLX4_MAILBOX_SIZE / 16) { err = mlx4_cmd(dev, mailbox->dma, nent, 0, op, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); if (err) goto out; nent = 0; } } } if (nent) err = mlx4_cmd(dev, mailbox->dma, nent, 0, op, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); if (err) goto out; switch (op) { case MLX4_CMD_MAP_FA: mlx4_dbg(dev, "Mapped %d chunks/%d KB for FW.\n", tc, ts); break; case MLX4_CMD_MAP_ICM_AUX: mlx4_dbg(dev, "Mapped %d chunks/%d KB for ICM aux.\n", tc, ts); break; case MLX4_CMD_MAP_ICM: mlx4_dbg(dev, "Mapped %d chunks/%d KB at %llx for ICM.\n", tc, ts, (unsigned long long) virt - (ts << 10)); break; } out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm) { return mlx4_map_cmd(dev, MLX4_CMD_MAP_FA, icm, -1); } int mlx4_UNMAP_FA(struct mlx4_dev *dev) { return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_FA, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); } int mlx4_RUN_FW(struct mlx4_dev *dev) { return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_RUN_FW, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } int mlx4_QUERY_FW(struct mlx4_dev *dev) { struct mlx4_fw *fw = &mlx4_priv(dev)->fw; struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; struct mlx4_cmd_mailbox *mailbox; u32 *outbox; int err = 0; u64 fw_ver; u16 cmd_if_rev; u8 lg; #define QUERY_FW_OUT_SIZE 0x100 #define QUERY_FW_VER_OFFSET 0x00 #define QUERY_FW_PPF_ID 0x09 #define QUERY_FW_CMD_IF_REV_OFFSET 0x0a #define QUERY_FW_MAX_CMD_OFFSET 0x0f #define QUERY_FW_ERR_START_OFFSET 0x30 #define QUERY_FW_ERR_SIZE_OFFSET 0x38 #define QUERY_FW_ERR_BAR_OFFSET 0x3c #define QUERY_FW_SIZE_OFFSET 0x00 #define QUERY_FW_CLR_INT_BASE_OFFSET 0x20 #define QUERY_FW_CLR_INT_BAR_OFFSET 0x28 #define QUERY_FW_COMM_BASE_OFFSET 0x40 #define QUERY_FW_COMM_BAR_OFFSET 0x48 #define QUERY_FW_CLOCK_OFFSET 0x50 #define QUERY_FW_CLOCK_BAR 0x58 mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); outbox = mailbox->buf; err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_FW, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) goto out; MLX4_GET(fw_ver, outbox, QUERY_FW_VER_OFFSET); /* * FW subminor version is at more significant bits than minor * version, so swap here. */ dev->caps.fw_ver = (fw_ver & 0xffff00000000ull) | ((fw_ver & 0xffff0000ull) >> 16) | ((fw_ver & 0x0000ffffull) << 16); MLX4_GET(lg, outbox, QUERY_FW_PPF_ID); dev->caps.function = lg; if (mlx4_is_slave(dev)) goto out; MLX4_GET(cmd_if_rev, outbox, QUERY_FW_CMD_IF_REV_OFFSET); if (cmd_if_rev < MLX4_COMMAND_INTERFACE_MIN_REV || cmd_if_rev > MLX4_COMMAND_INTERFACE_MAX_REV) { mlx4_err(dev, "Installed FW has unsupported " "command interface revision %d.\n", cmd_if_rev); mlx4_err(dev, "(Installed FW version is %d.%d.%03d)\n", (int) (dev->caps.fw_ver >> 32), (int) (dev->caps.fw_ver >> 16) & 0xffff, (int) dev->caps.fw_ver & 0xffff); mlx4_err(dev, "This driver version supports only revisions %d to %d.\n", MLX4_COMMAND_INTERFACE_MIN_REV, MLX4_COMMAND_INTERFACE_MAX_REV); err = -ENODEV; goto out; } if (cmd_if_rev < MLX4_COMMAND_INTERFACE_NEW_PORT_CMDS) dev->flags |= MLX4_FLAG_OLD_PORT_CMDS; MLX4_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); cmd->max_cmds = 1 << lg; mlx4_dbg(dev, "FW version %d.%d.%03d (cmd intf rev %d), max commands %d\n", (int) (dev->caps.fw_ver >> 32), (int) (dev->caps.fw_ver >> 16) & 0xffff, (int) dev->caps.fw_ver & 0xffff, cmd_if_rev, cmd->max_cmds); MLX4_GET(fw->catas_offset, outbox, QUERY_FW_ERR_START_OFFSET); MLX4_GET(fw->catas_size, outbox, QUERY_FW_ERR_SIZE_OFFSET); MLX4_GET(fw->catas_bar, outbox, QUERY_FW_ERR_BAR_OFFSET); fw->catas_bar = (fw->catas_bar >> 6) * 2; mlx4_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x, BAR %d\n", (unsigned long long) fw->catas_offset, fw->catas_size, fw->catas_bar); MLX4_GET(fw->fw_pages, outbox, QUERY_FW_SIZE_OFFSET); MLX4_GET(fw->clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET); MLX4_GET(fw->clr_int_bar, outbox, QUERY_FW_CLR_INT_BAR_OFFSET); fw->clr_int_bar = (fw->clr_int_bar >> 6) * 2; MLX4_GET(fw->comm_base, outbox, QUERY_FW_COMM_BASE_OFFSET); MLX4_GET(fw->comm_bar, outbox, QUERY_FW_COMM_BAR_OFFSET); fw->comm_bar = (fw->comm_bar >> 6) * 2; mlx4_dbg(dev, "Communication vector bar:%d offset:0x%llx\n", - fw->comm_bar, fw->comm_base); + fw->comm_bar, (long long)fw->comm_base); mlx4_dbg(dev, "FW size %d KB\n", fw->fw_pages >> 2); MLX4_GET(fw->clock_offset, outbox, QUERY_FW_CLOCK_OFFSET); MLX4_GET(fw->clock_bar, outbox, QUERY_FW_CLOCK_BAR); fw->clock_bar = (fw->clock_bar >> 6) * 2; mlx4_dbg(dev, "Internal clock bar:%d offset:0x%llx\n", - fw->comm_bar, fw->comm_base); + fw->comm_bar, (long long)fw->comm_base); /* * Round up number of system pages needed in case * MLX4_ICM_PAGE_SIZE < PAGE_SIZE. */ fw->fw_pages = ALIGN(fw->fw_pages, PAGE_SIZE / MLX4_ICM_PAGE_SIZE) >> (PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT); mlx4_dbg(dev, "Clear int @ %llx, BAR %d\n", (unsigned long long) fw->clr_int_base, fw->clr_int_bar); out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_QUERY_FW_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { u8 *outbuf; int err; outbuf = outbox->buf; err = mlx4_cmd_box(dev, 0, outbox->dma, 0, 0, MLX4_CMD_QUERY_FW, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; /* for slaves, set pci PPF ID to invalid and zero out everything * else except FW version */ outbuf[0] = outbuf[1] = 0; memset(&outbuf[8], 0, QUERY_FW_OUT_SIZE - 8); outbuf[QUERY_FW_PPF_ID] = MLX4_INVALID_SLAVE_ID; return 0; } static void get_board_id(void *vsd, char *board_id) { int i; #define VSD_OFFSET_SIG1 0x00 #define VSD_OFFSET_SIG2 0xde #define VSD_OFFSET_MLX_BOARD_ID 0xd0 #define VSD_OFFSET_TS_BOARD_ID 0x20 #define VSD_SIGNATURE_TOPSPIN 0x5ad memset(board_id, 0, MLX4_BOARD_ID_LEN); if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN && be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) { strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MLX4_BOARD_ID_LEN); } else { /* * The board ID is a string but the firmware byte * swaps each 4-byte word before passing it back to * us. Therefore we need to swab it before printing. */ for (i = 0; i < 4; ++i) ((u32 *) board_id)[i] = swab32(*(u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4)); } } int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; int err; #define QUERY_ADAPTER_OUT_SIZE 0x100 #define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10 #define QUERY_ADAPTER_VSD_OFFSET 0x20 mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); outbox = mailbox->buf; err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_ADAPTER, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) goto out; MLX4_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET); get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4, adapter->board_id); out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) { struct mlx4_cmd_mailbox *mailbox; __be32 *inbox; int err; #define INIT_HCA_IN_SIZE 0x200 #define INIT_HCA_VERSION_OFFSET 0x000 #define INIT_HCA_VERSION 2 #define INIT_HCA_CACHELINE_SZ_OFFSET 0x0e #define INIT_HCA_FLAGS_OFFSET 0x014 #define INIT_HCA_QPC_OFFSET 0x020 #define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10) #define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17) #define INIT_HCA_SRQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x28) #define INIT_HCA_LOG_SRQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x2f) #define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30) #define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37) #define INIT_HCA_EQE_CQE_OFFSETS (INIT_HCA_QPC_OFFSET + 0x38) #define INIT_HCA_ALTC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40) #define INIT_HCA_AUXC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50) #define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60) #define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67) #define INIT_HCA_RDMARC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70) #define INIT_HCA_LOG_RD_OFFSET (INIT_HCA_QPC_OFFSET + 0x77) #define INIT_HCA_MCAST_OFFSET 0x0c0 #define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00) #define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12) #define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16) #define INIT_HCA_UC_STEERING_OFFSET (INIT_HCA_MCAST_OFFSET + 0x18) #define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b) #define INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN 0x6 #define INIT_HCA_FS_PARAM_OFFSET 0x1d0 #define INIT_HCA_FS_BASE_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x00) #define INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x12) #define INIT_HCA_FS_LOG_TABLE_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x1b) #define INIT_HCA_FS_ETH_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x21) #define INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x22) #define INIT_HCA_FS_IB_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x25) #define INIT_HCA_FS_IB_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x26) #define INIT_HCA_TPT_OFFSET 0x0f0 #define INIT_HCA_DMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00) #define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b) #define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10) #define INIT_HCA_CMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x18) #define INIT_HCA_UAR_OFFSET 0x120 #define INIT_HCA_LOG_UAR_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0a) #define INIT_HCA_UAR_PAGE_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0b) mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); inbox = mailbox->buf; memset(inbox, 0, INIT_HCA_IN_SIZE); *((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION; *((u8 *) mailbox->buf + INIT_HCA_CACHELINE_SZ_OFFSET) = ((ilog2(CACHE_LINE_SIZE) - 4) << 5) | (1 << 4); #if defined(__LITTLE_ENDIAN) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1); #elif defined(__BIG_ENDIAN) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1); #else #error Host endianness not defined #endif /* Check port for UD address vector: */ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1); /* Enable IPoIB checksumming if we can: */ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 3); /* Enable QoS support if module parameter set */ if (enable_qos) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 2); /* Enable fast drop performance optimization */ if (dev->caps.fast_drop) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 7); /* enable counters */ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4); /* CX3 is capable of extending CQEs\EQEs from 32 to 64 bytes */ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) { *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29); dev->caps.eqe_size = 64; dev->caps.eqe_factor = 1; } else { dev->caps.eqe_size = 32; dev->caps.eqe_factor = 0; } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) { *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30); dev->caps.cqe_size = 64; dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; } else { dev->caps.cqe_size = 32; } /* QPC/EEC/CQC/EQC/RDMARC attributes */ MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET); MLX4_PUT(inbox, param->log_num_qps, INIT_HCA_LOG_QP_OFFSET); MLX4_PUT(inbox, param->srqc_base, INIT_HCA_SRQC_BASE_OFFSET); MLX4_PUT(inbox, param->log_num_srqs, INIT_HCA_LOG_SRQ_OFFSET); MLX4_PUT(inbox, param->cqc_base, INIT_HCA_CQC_BASE_OFFSET); MLX4_PUT(inbox, param->log_num_cqs, INIT_HCA_LOG_CQ_OFFSET); MLX4_PUT(inbox, param->altc_base, INIT_HCA_ALTC_BASE_OFFSET); MLX4_PUT(inbox, param->auxc_base, INIT_HCA_AUXC_BASE_OFFSET); MLX4_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET); MLX4_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET); MLX4_PUT(inbox, param->rdmarc_base, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET); /* steering attributes */ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN); MLX4_PUT(inbox, param->mc_base, INIT_HCA_FS_BASE_OFFSET); MLX4_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); MLX4_PUT(inbox, param->log_mc_table_sz, INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); /* Enable Ethernet flow steering * with udp unicast and tcp unicast */ MLX4_PUT(inbox, (u8) (MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN), INIT_HCA_FS_ETH_BITS_OFFSET); MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR, INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET); /* Enable IPoIB flow steering * with udp unicast and tcp unicast */ MLX4_PUT(inbox, (u8) (MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN), INIT_HCA_FS_IB_BITS_OFFSET); MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR, INIT_HCA_FS_IB_NUM_ADDRS_OFFSET); } else { MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET); MLX4_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); MLX4_PUT(inbox, param->log_mc_hash_sz, INIT_HCA_LOG_MC_HASH_SZ_OFFSET); MLX4_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); if (dev->caps.steering_mode == MLX4_STEERING_MODE_B0) { MLX4_PUT(inbox, (u8) (1 << 3), INIT_HCA_UC_STEERING_OFFSET); } } /* TPT attributes */ MLX4_PUT(inbox, param->dmpt_base, INIT_HCA_DMPT_BASE_OFFSET); MLX4_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET); MLX4_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET); MLX4_PUT(inbox, param->cmpt_base, INIT_HCA_CMPT_BASE_OFFSET); /* UAR attributes */ MLX4_PUT(inbox, param->uar_page_sz, INIT_HCA_UAR_PAGE_SZ_OFFSET); MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET); err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000, MLX4_CMD_NATIVE); if (err) mlx4_err(dev, "INIT_HCA returns %d\n", err); mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_QUERY_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) { struct mlx4_cmd_mailbox *mailbox; __be32 *outbox; u32 dword_field; int err; u8 byte_field; #define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04 #define QUERY_HCA_CORE_CLOCK_OFFSET 0x0c mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); outbox = mailbox->buf; err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_HCA, MLX4_CMD_TIME_CLASS_B, !mlx4_is_slave(dev)); if (err) goto out; MLX4_GET(param->global_caps, outbox, QUERY_HCA_GLOBAL_CAPS_OFFSET); MLX4_GET(param->hca_core_clock, outbox, QUERY_HCA_CORE_CLOCK_OFFSET); /* QPC/EEC/CQC/EQC/RDMARC attributes */ MLX4_GET(param->qpc_base, outbox, INIT_HCA_QPC_BASE_OFFSET); MLX4_GET(param->log_num_qps, outbox, INIT_HCA_LOG_QP_OFFSET); MLX4_GET(param->srqc_base, outbox, INIT_HCA_SRQC_BASE_OFFSET); MLX4_GET(param->log_num_srqs, outbox, INIT_HCA_LOG_SRQ_OFFSET); MLX4_GET(param->cqc_base, outbox, INIT_HCA_CQC_BASE_OFFSET); MLX4_GET(param->log_num_cqs, outbox, INIT_HCA_LOG_CQ_OFFSET); MLX4_GET(param->altc_base, outbox, INIT_HCA_ALTC_BASE_OFFSET); MLX4_GET(param->auxc_base, outbox, INIT_HCA_AUXC_BASE_OFFSET); MLX4_GET(param->eqc_base, outbox, INIT_HCA_EQC_BASE_OFFSET); MLX4_GET(param->log_num_eqs, outbox, INIT_HCA_LOG_EQ_OFFSET); MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET); MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET); if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) { param->steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; } else { MLX4_GET(byte_field, outbox, INIT_HCA_UC_STEERING_OFFSET); if (byte_field & 0x8) { param->steering_mode = MLX4_STEERING_MODE_B0; } else { param->steering_mode = MLX4_STEERING_MODE_A0; } } if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET); MLX4_GET(param->log_mc_entry_sz, outbox, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); MLX4_GET(param->log_mc_table_sz, outbox, INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); } else { MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET); MLX4_GET(param->log_mc_entry_sz, outbox, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); MLX4_GET(param->log_mc_hash_sz, outbox, INIT_HCA_LOG_MC_HASH_SZ_OFFSET); MLX4_GET(param->log_mc_table_sz, outbox, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); } /* CX3 is capable of extending CQEs\EQEs from 32 to 64 bytes */ MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_OFFSETS); if (byte_field & 0x20) /* 64-bytes eqe enabled */ param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED; if (byte_field & 0x40) /* 64-bytes cqe enabled */ param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED; /* TPT attributes */ MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); MLX4_GET(param->log_mpt_sz, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET); MLX4_GET(param->mtt_base, outbox, INIT_HCA_MTT_BASE_OFFSET); MLX4_GET(param->cmpt_base, outbox, INIT_HCA_CMPT_BASE_OFFSET); /* UAR attributes */ MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET); MLX4_GET(param->log_uar_sz, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET); out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } /* for IB-type ports only in SRIOV mode. Checks that both proxy QP0 * and real QP0 are active, so that the paravirtualized QP0 is ready * to operate */ static int check_qp0_state(struct mlx4_dev *dev, int function, int port) { struct mlx4_priv *priv = mlx4_priv(dev); /* irrelevant if not infiniband */ if (priv->mfunc.master.qp0_state[port].proxy_qp0_active && priv->mfunc.master.qp0_state[port].qp0_active) return 1; return 0; } int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); int port = vhcr->in_modifier; int err; if (priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port)) return 0; if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { /* Enable port only if it was previously disabled */ if (!priv->mfunc.master.init_port_ref[port]) { err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; } priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); } else { if (slave == mlx4_master_func_num(dev)) { if (check_qp0_state(dev, slave, port) && !priv->mfunc.master.qp0_state[port].port_active) { err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; priv->mfunc.master.qp0_state[port].port_active = 1; priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); } } else priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); } ++priv->mfunc.master.init_port_ref[port]; return 0; } int mlx4_INIT_PORT(struct mlx4_dev *dev, int port) { struct mlx4_cmd_mailbox *mailbox; u32 *inbox; int err; u32 flags; u16 field; if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { #define INIT_PORT_IN_SIZE 256 #define INIT_PORT_FLAGS_OFFSET 0x00 #define INIT_PORT_FLAG_SIG (1 << 18) #define INIT_PORT_FLAG_NG (1 << 17) #define INIT_PORT_FLAG_G0 (1 << 16) #define INIT_PORT_VL_SHIFT 4 #define INIT_PORT_PORT_WIDTH_SHIFT 8 #define INIT_PORT_MTU_OFFSET 0x04 #define INIT_PORT_MAX_GID_OFFSET 0x06 #define INIT_PORT_MAX_PKEY_OFFSET 0x0a #define INIT_PORT_GUID0_OFFSET 0x10 #define INIT_PORT_NODE_GUID_OFFSET 0x18 #define INIT_PORT_SI_GUID_OFFSET 0x20 mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); inbox = mailbox->buf; memset(inbox, 0, INIT_PORT_IN_SIZE); flags = 0; flags |= (dev->caps.vl_cap[port] & 0xf) << INIT_PORT_VL_SHIFT; flags |= (dev->caps.port_width_cap[port] & 0xf) << INIT_PORT_PORT_WIDTH_SHIFT; MLX4_PUT(inbox, flags, INIT_PORT_FLAGS_OFFSET); field = 128 << dev->caps.ib_mtu_cap[port]; MLX4_PUT(inbox, field, INIT_PORT_MTU_OFFSET); field = dev->caps.gid_table_len[port]; MLX4_PUT(inbox, field, INIT_PORT_MAX_GID_OFFSET); field = dev->caps.pkey_table_len[port]; MLX4_PUT(inbox, field, INIT_PORT_MAX_PKEY_OFFSET); err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_INIT_PORT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev, mailbox); } else err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); return err; } EXPORT_SYMBOL_GPL(mlx4_INIT_PORT); int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); int port = vhcr->in_modifier; int err; if (!(priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port))) return 0; if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { if (priv->mfunc.master.init_port_ref[port] == 1) { err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000, MLX4_CMD_NATIVE); if (err) return err; } priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); } else { /* infiniband port */ if (slave == mlx4_master_func_num(dev)) { if (!priv->mfunc.master.qp0_state[port].qp0_active && priv->mfunc.master.qp0_state[port].port_active) { err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000, MLX4_CMD_NATIVE); if (err) return err; priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); priv->mfunc.master.qp0_state[port].port_active = 0; } } else priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); } --priv->mfunc.master.init_port_ref[port]; return 0; } int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port) { return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000, MLX4_CMD_WRAPPED); } EXPORT_SYMBOL_GPL(mlx4_CLOSE_PORT); int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic) { return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA, 1000, MLX4_CMD_NATIVE); } int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages) { int ret = mlx4_cmd_imm(dev, icm_size, aux_pages, 0, 0, MLX4_CMD_SET_ICM_SIZE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (ret) return ret; /* * Round up number of system pages needed in case * MLX4_ICM_PAGE_SIZE < PAGE_SIZE. */ *aux_pages = ALIGN(*aux_pages, PAGE_SIZE / MLX4_ICM_PAGE_SIZE) >> (PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT); return 0; } int mlx4_NOP(struct mlx4_dev *dev) { /* Input modifier of 0x1f means "finish as soon as possible." */ return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } int mlx4_query_diag_counters(struct mlx4_dev *dev, int array_length, u8 op_modifier, u32 in_offset[], u32 counter_out[]) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; int ret; int i; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); outbox = mailbox->buf; ret = mlx4_cmd_box(dev, 0, mailbox->dma, 0, op_modifier, MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (ret) goto out; for (i = 0; i < array_length; i++) { if (in_offset[i] > MLX4_MAILBOX_SIZE) { ret = -EINVAL; goto out; } MLX4_GET(counter_out[i], outbox, in_offset[i]); } out: mlx4_free_cmd_mailbox(dev, mailbox); return ret; } EXPORT_SYMBOL_GPL(mlx4_query_diag_counters); #define MLX4_WOL_SETUP_MODE (5 << 28) int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port) { u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8; return mlx4_cmd_imm(dev, 0, config, in_mod, 0x3, MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } EXPORT_SYMBOL_GPL(mlx4_wol_read); int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port) { u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8; return mlx4_cmd(dev, config, in_mod, 0x1, MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } EXPORT_SYMBOL_GPL(mlx4_wol_write); enum { ADD_TO_MCG = 0x26, }; void mlx4_opreq_action(struct work_struct *work) { struct mlx4_priv *priv = container_of(work, struct mlx4_priv, opreq_task); struct mlx4_dev *dev = &priv->dev; int num_tasks = atomic_read(&priv->opreq_count); struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; u32 *outbox; u32 modifier; u16 token; u16 type_m; u16 type; int err; u32 num_qps; struct mlx4_qp qp; int i; u8 rem_mcg; u8 prot; #define GET_OP_REQ_MODIFIER_OFFSET 0x08 #define GET_OP_REQ_TOKEN_OFFSET 0x14 #define GET_OP_REQ_TYPE_OFFSET 0x1a #define GET_OP_REQ_DATA_OFFSET 0x20 mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { mlx4_err(dev, "Failed to allocate mailbox for GET_OP_REQ\n"); return; } outbox = mailbox->buf; while (num_tasks) { err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_GET_OP_REQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) { mlx4_err(dev, "Failed to retreive required operation: %d\n", err); return; } MLX4_GET(modifier, outbox, GET_OP_REQ_MODIFIER_OFFSET); MLX4_GET(token, outbox, GET_OP_REQ_TOKEN_OFFSET); MLX4_GET(type, outbox, GET_OP_REQ_TYPE_OFFSET); type_m = type >> 12; type &= 0xfff; switch (type) { case ADD_TO_MCG: if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { mlx4_warn(dev, "ADD MCG operation is not supported in " "DEVICE_MANAGED steerign mode\n"); err = EPERM; break; } mgm = (struct mlx4_mgm *) ((u8 *) (outbox) + GET_OP_REQ_DATA_OFFSET); num_qps = be32_to_cpu(mgm->members_count) & MGM_QPN_MASK; rem_mcg = ((u8 *) (&mgm->members_count))[0] & 1; prot = ((u8 *) (&mgm->members_count))[0] >> 6; for (i = 0; i < num_qps; i++) { qp.qpn = be32_to_cpu(mgm->qp[i]); if (rem_mcg) err = mlx4_multicast_detach(dev, &qp, mgm->gid, prot, 0); else err = mlx4_multicast_attach(dev, &qp, mgm->gid, mgm->gid[5] ,0, prot, NULL); if (err) break; } break; default: mlx4_warn(dev, "Bad type for required operation\n"); err = EINVAL; break; } err = mlx4_cmd(dev, 0, ((u32) err | cpu_to_be32(token) << 16), 1, MLX4_CMD_GET_OP_REQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) { mlx4_err(dev, "Failed to acknowledge required request: %d\n", err); goto out; } memset(outbox, 0, 0xffc); num_tasks = atomic_dec_return(&priv->opreq_count); } out: mlx4_free_cmd_mailbox(dev, mailbox); } Index: stable/10/sys/ofed/drivers/net/mlx4/main.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/main.c (revision 271126) +++ stable/10/sys/ofed/drivers/net/mlx4/main.c (revision 271127) @@ -1,2878 +1,2878 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include -#include #include #include #include #include #include #include #include +#include #include #include #include "mlx4.h" #include "fw.h" #include "icm.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); struct workqueue_struct *mlx4_wq; #ifdef CONFIG_MLX4_DEBUG int mlx4_debug_level = 0; module_param_named(debug_level, mlx4_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); #endif /* CONFIG_MLX4_DEBUG */ #ifdef CONFIG_PCI_MSI static int msi_x = 1; module_param(msi_x, int, 0444); MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero"); #else /* CONFIG_PCI_MSI */ #define msi_x (0) #endif /* CONFIG_PCI_MSI */ static int enable_sys_tune = 0; module_param(enable_sys_tune, int, 0444); MODULE_PARM_DESC(enable_sys_tune, "Tune the cpu's for better performance (default 0)"); int mlx4_blck_lb = 1; module_param_named(block_loopback, mlx4_blck_lb, int, 0644); MODULE_PARM_DESC(block_loopback, "Block multicast loopback packets if > 0 " "(default: 1)"); static int num_vfs; module_param(num_vfs, int, 0444); MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0"); static int probe_vf; module_param(probe_vf, int, 0644); MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)"); int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; module_param_named(log_num_mgm_entry_size, mlx4_log_num_mgm_entry_size, int, 0444); MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" " of qp per mcg, for example:" " 10 gives 248.range: 7 <=" " log_num_mgm_entry_size <= 12." " To activate device managed" " flow steering when available, set to -1"); static int high_rate_steer; module_param(high_rate_steer, int, 0444); MODULE_PARM_DESC(high_rate_steer, "Enable steering mode for higher packet rate" " (default off)"); static int fast_drop; module_param_named(fast_drop, fast_drop, int, 0444); MODULE_PARM_DESC(fast_drop, "Enable fast packet drop when no recieve WQEs are posted"); int mlx4_enable_64b_cqe_eqe; module_param_named(enable_64b_cqe_eqe, mlx4_enable_64b_cqe_eqe, int, 0644); MODULE_PARM_DESC(enable_64b_cqe_eqe, "Enable 64 byte CQEs/EQEs when the the FW supports this, if nonzero"); #define HCA_GLOBAL_CAP_MASK 0 #define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE static char mlx4_version[] __devinitdata = DRV_NAME ": Mellanox ConnectX core driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; static int log_num_mac = 7; module_param_named(log_num_mac, log_num_mac, int, 0444); MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)"); static int log_num_vlan; module_param_named(log_num_vlan, log_num_vlan, int, 0444); MODULE_PARM_DESC(log_num_vlan, "(Obsolete) Log2 max number of VLANs per ETH port (0-7)"); /* Log2 max number of VLANs per ETH port (0-7) */ #define MLX4_LOG_NUM_VLANS 7 int log_mtts_per_seg = ilog2(1); module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment " "(0-7) (default: 0)"); static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE}; #if 0 static int arr_argc = 2; module_param_array(port_type_array, int, &arr_argc, 0444); MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default " "1 for IB, 2 for Ethernet"); #endif struct mlx4_port_config { struct list_head list; enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; struct pci_dev *pdev; }; #define MLX4_LOG_NUM_MTT 20 /* We limit to 30 as of a bit map issue which uses int and not uint. see mlx4_buddy_init -> bitmap_zero which gets int. */ #define MLX4_MAX_LOG_NUM_MTT 30 static struct mlx4_profile mod_param_profile = { .num_qp = 19, .num_srq = 16, .rdmarc_per_qp = 4, .num_cq = 16, .num_mcg = 13, .num_mpt = 19, .num_mtt = 0, /* max(20, 2*MTTs for host memory)) */ }; module_param_named(log_num_qp, mod_param_profile.num_qp, int, 0444); MODULE_PARM_DESC(log_num_qp, "log maximum number of QPs per HCA (default: 19)"); module_param_named(log_num_srq, mod_param_profile.num_srq, int, 0444); MODULE_PARM_DESC(log_num_srq, "log maximum number of SRQs per HCA " "(default: 16)"); module_param_named(log_rdmarc_per_qp, mod_param_profile.rdmarc_per_qp, int, 0444); MODULE_PARM_DESC(log_rdmarc_per_qp, "log number of RDMARC buffers per QP " "(default: 4)"); module_param_named(log_num_cq, mod_param_profile.num_cq, int, 0444); MODULE_PARM_DESC(log_num_cq, "log maximum number of CQs per HCA (default: 16)"); module_param_named(log_num_mcg, mod_param_profile.num_mcg, int, 0444); MODULE_PARM_DESC(log_num_mcg, "log maximum number of multicast groups per HCA " "(default: 13)"); module_param_named(log_num_mpt, mod_param_profile.num_mpt, int, 0444); MODULE_PARM_DESC(log_num_mpt, "log maximum number of memory protection table entries per " "HCA (default: 19)"); module_param_named(log_num_mtt, mod_param_profile.num_mtt, int, 0444); MODULE_PARM_DESC(log_num_mtt, "log maximum number of memory translation table segments per " "HCA (default: max(20, 2*MTTs for register all of the host memory limited to 30))"); enum { MLX4_IF_STATE_BASIC, MLX4_IF_STATE_EXTENDED }; static void process_mod_param_profile(struct mlx4_profile *profile) { vm_size_t hwphyssz; hwphyssz = 0; TUNABLE_ULONG_FETCH("hw.realmem", (u_long *) &hwphyssz); profile->num_qp = 1 << mod_param_profile.num_qp; profile->num_srq = 1 << mod_param_profile.num_srq; profile->rdmarc_per_qp = 1 << mod_param_profile.rdmarc_per_qp; profile->num_cq = 1 << mod_param_profile.num_cq; profile->num_mcg = 1 << mod_param_profile.num_mcg; profile->num_mpt = 1 << mod_param_profile.num_mpt; /* * We want to scale the number of MTTs with the size of the * system memory, since it makes sense to register a lot of * memory on a system with a lot of memory. As a heuristic, * make sure we have enough MTTs to register twice the system * memory (with PAGE_SIZE entries). * * This number has to be a power of two and fit into 32 bits * due to device limitations. We cap this at 2^30 as of bit map * limitation to work with int instead of uint (mlx4_buddy_init -> bitmap_zero) * That limits us to 4TB of memory registration per HCA with * 4KB pages, which is probably OK for the next few months. */ if (mod_param_profile.num_mtt) profile->num_mtt = 1 << mod_param_profile.num_mtt; else { profile->num_mtt = roundup_pow_of_two(max_t(unsigned, 1 << (MLX4_LOG_NUM_MTT - log_mtts_per_seg), min(1UL << (MLX4_MAX_LOG_NUM_MTT - log_mtts_per_seg), (hwphyssz << 1) >> log_mtts_per_seg))); /* set the actual value, so it will be reflected to the user using the sysfs */ mod_param_profile.num_mtt = ilog2(profile->num_mtt * (1 << log_mtts_per_seg)); } } int mlx4_check_port_params(struct mlx4_dev *dev, enum mlx4_port_type *port_type) { int i; for (i = 0; i < dev->caps.num_ports - 1; i++) { if (port_type[i] != port_type[i + 1]) { if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) { mlx4_err(dev, "Only same port types supported " "on this HCA, aborting.\n"); return -EINVAL; } } } for (i = 0; i < dev->caps.num_ports; i++) { if (!(port_type[i] & dev->caps.supported_type[i+1])) { mlx4_err(dev, "Requested port type for port %d is not " "supported on this HCA\n", i + 1); return -EINVAL; } } return 0; } static void mlx4_set_port_mask(struct mlx4_dev *dev) { int i; for (i = 1; i <= dev->caps.num_ports; ++i) dev->caps.port_mask[i] = dev->caps.port_type[i]; } static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { int err; int i; err = mlx4_QUERY_DEV_CAP(dev, dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); return err; } if (dev_cap->min_page_sz > PAGE_SIZE) { mlx4_err(dev, "HCA minimum page size of %d bigger than " "kernel PAGE_SIZE of %d, aborting.\n", dev_cap->min_page_sz, PAGE_SIZE); return -ENODEV; } if (dev_cap->num_ports > MLX4_MAX_PORTS) { mlx4_err(dev, "HCA has %d ports, but we only support %d, " "aborting.\n", dev_cap->num_ports, MLX4_MAX_PORTS); return -ENODEV; } if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) { mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than " "PCI resource 2 size of 0x%llx, aborting.\n", dev_cap->uar_size, (unsigned long long) pci_resource_len(dev->pdev, 2)); return -ENODEV; } dev->caps.num_ports = dev_cap->num_ports; dev->phys_caps.num_phys_eqs = MLX4_MAX_EQ_NUM; for (i = 1; i <= dev->caps.num_ports; ++i) { dev->caps.vl_cap[i] = dev_cap->max_vl[i]; dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i]; dev->phys_caps.gid_phys_table_len[i] = dev_cap->max_gids[i]; dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i]; /* set gid and pkey table operating lengths by default * to non-sriov values */ dev->caps.gid_table_len[i] = dev_cap->max_gids[i]; dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i]; dev->caps.port_width_cap[i] = dev_cap->max_port_width[i]; dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i]; dev->caps.def_mac[i] = dev_cap->def_mac[i]; dev->caps.supported_type[i] = dev_cap->supported_port_types[i]; dev->caps.suggested_type[i] = dev_cap->suggested_type[i]; dev->caps.default_sense[i] = dev_cap->default_sense[i]; dev->caps.trans_type[i] = dev_cap->trans_type[i]; dev->caps.vendor_oui[i] = dev_cap->vendor_oui[i]; dev->caps.wavelength[i] = dev_cap->wavelength[i]; dev->caps.trans_code[i] = dev_cap->trans_code[i]; } dev->caps.uar_page_size = PAGE_SIZE; dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay; dev->caps.bf_reg_size = dev_cap->bf_reg_size; dev->caps.bf_regs_per_page = dev_cap->bf_regs_per_page; dev->caps.max_sq_sg = dev_cap->max_sq_sg; dev->caps.max_rq_sg = dev_cap->max_rq_sg; dev->caps.max_wqes = dev_cap->max_qp_sz; dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp; dev->caps.max_srq_wqes = dev_cap->max_srq_sz; dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1; dev->caps.reserved_srqs = dev_cap->reserved_srqs; dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz; dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; /* * Subtract 1 from the limit because we need to allocate a * spare CQE to enable resizing the CQ */ dev->caps.max_cqes = dev_cap->max_cq_sz - 1; dev->caps.reserved_cqs = dev_cap->reserved_cqs; dev->caps.reserved_eqs = dev_cap->reserved_eqs; dev->caps.reserved_mtts = dev_cap->reserved_mtts; dev->caps.reserved_mrws = dev_cap->reserved_mrws; /* The first 128 UARs are used for EQ doorbells */ dev->caps.reserved_uars = max_t(int, 128, dev_cap->reserved_uars); dev->caps.reserved_pds = dev_cap->reserved_pds; dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? dev_cap->reserved_xrcds : 0; dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? dev_cap->max_xrcds : 0; dev->caps.mtt_entry_sz = dev_cap->mtt_entry_sz; dev->caps.max_msg_sz = dev_cap->max_msg_sz; dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); dev->caps.flags = dev_cap->flags; dev->caps.flags2 = dev_cap->flags2; dev->caps.bmme_flags = dev_cap->bmme_flags; dev->caps.reserved_lkey = dev_cap->reserved_lkey; dev->caps.stat_rate_support = dev_cap->stat_rate_support; dev->caps.cq_timestamp = dev_cap->timestamp_support; dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; /* Don't do sense port on multifunction devices (for now at least) */ if (mlx4_is_mfunc(dev)) dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; dev->caps.log_num_macs = log_num_mac; dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; dev->caps.fast_drop = fast_drop ? !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FAST_DROP) : 0; for (i = 1; i <= dev->caps.num_ports; ++i) { dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE; if (dev->caps.supported_type[i]) { /* if only ETH is supported - assign ETH */ if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH) dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH; /* if only IB is supported, assign IB */ else if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_IB) dev->caps.port_type[i] = MLX4_PORT_TYPE_IB; else { /* if IB and ETH are supported, we set the port * type according to user selection of port type; * if user selected none, take the FW hint */ if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE) dev->caps.port_type[i] = dev->caps.suggested_type[i] ? MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB; else dev->caps.port_type[i] = port_type_array[i - 1]; } } /* * Link sensing is allowed on the port if 3 conditions are true: * 1. Both protocols are supported on the port. * 2. Different types are supported on the port * 3. FW declared that it supports link sensing */ mlx4_priv(dev)->sense.sense_allowed[i] = ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) && (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)); /* * If "default_sense" bit is set, we move the port to "AUTO" mode * and perform sense_port FW command to try and set the correct * port type from beginning */ if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) { enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE; dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO; mlx4_SENSE_PORT(dev, i, &sensed_port); if (sensed_port != MLX4_PORT_TYPE_NONE) dev->caps.port_type[i] = sensed_port; } else { dev->caps.possible_type[i] = dev->caps.port_type[i]; } if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) { dev->caps.log_num_macs = dev_cap->log_max_macs[i]; mlx4_warn(dev, "Requested number of MACs is too much " "for port %d, reducing to %d.\n", i, 1 << dev->caps.log_num_macs); } if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) { dev->caps.log_num_vlans = dev_cap->log_max_vlans[i]; mlx4_warn(dev, "Requested number of VLANs is too much " "for port %d, reducing to %d.\n", i, 1 << dev->caps.log_num_vlans); } } dev->caps.max_basic_counters = dev_cap->max_basic_counters; dev->caps.max_extended_counters = dev_cap->max_extended_counters; /* support extended counters if available */ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS_EXT) dev->caps.max_counters = dev->caps.max_extended_counters; else dev->caps.max_counters = dev->caps.max_basic_counters; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] = (1 << dev->caps.log_num_macs) * (1 << dev->caps.log_num_vlans) * dev->caps.num_ports; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH; dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; dev->caps.sync_qp = dev_cap->sync_qp; dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; if (!mlx4_enable_64b_cqe_eqe) { if (dev_cap->flags & (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) { mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n"); dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; } } if ((dev->caps.flags & (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) && mlx4_is_master(dev)) dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; return 0; } /*The function checks if there are live vf, return the num of them*/ static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_state; int i; int ret = 0; for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) { s_state = &priv->mfunc.master.slave_state[i]; if (s_state->active && s_state->last_cmd != MLX4_COMM_CMD_RESET) { mlx4_warn(dev, "%s: slave: %d is still active\n", __func__, i); ret++; } } return ret; } int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) { u32 qk = MLX4_RESERVED_QKEY_BASE; if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || qpn < dev->phys_caps.base_proxy_sqpn) return -EINVAL; if (qpn >= dev->phys_caps.base_tunnel_sqpn) /* tunnel qp */ qk += qpn - dev->phys_caps.base_tunnel_sqpn; else qk += qpn - dev->phys_caps.base_proxy_sqpn; *qkey = qk; return 0; } EXPORT_SYMBOL(mlx4_get_parav_qkey); void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val) { struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); if (!mlx4_is_master(dev)) return; priv->virt2phys_pkey[slave][port - 1][i] = val; } EXPORT_SYMBOL(mlx4_sync_pkey_table); void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid) { struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); if (!mlx4_is_master(dev)) return; priv->slave_node_guids[slave] = guid; } EXPORT_SYMBOL(mlx4_put_slave_node_guid); __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); if (!mlx4_is_master(dev)) return 0; return priv->slave_node_guids[slave]; } EXPORT_SYMBOL(mlx4_get_slave_node_guid); int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_slave; if (!mlx4_is_master(dev)) return 0; s_slave = &priv->mfunc.master.slave_state[slave]; return !!s_slave->active; } EXPORT_SYMBOL(mlx4_is_slave_active); static void slave_adjust_steering_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, struct mlx4_init_hca_param *hca_param) { dev->caps.steering_mode = hca_param->steering_mode; if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; else dev->caps.num_qp_per_mgm = 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2); mlx4_dbg(dev, "Steering mode is: %s\n", mlx4_steering_mode_str(dev->caps.steering_mode)); } static int mlx4_slave_cap(struct mlx4_dev *dev) { int err; u32 page_size; struct mlx4_dev_cap dev_cap; struct mlx4_func_cap func_cap; struct mlx4_init_hca_param hca_param; int i; memset(&hca_param, 0, sizeof(hca_param)); err = mlx4_QUERY_HCA(dev, &hca_param); if (err) { mlx4_err(dev, "QUERY_HCA command failed, aborting.\n"); return err; } /*fail if the hca has an unknown capability */ if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) != HCA_GLOBAL_CAP_MASK) { mlx4_err(dev, "Unknown hca global capabilities\n"); return -ENOSYS; } mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz; dev->caps.hca_core_clock = hca_param.hca_core_clock; memset(&dev_cap, 0, sizeof(dev_cap)); dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp; err = mlx4_dev_cap(dev, &dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); return err; } err = mlx4_QUERY_FW(dev); if (err) mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n"); page_size = ~dev->caps.page_size_cap + 1; mlx4_warn(dev, "HCA minimum page size:%d\n", page_size); if (page_size > PAGE_SIZE) { mlx4_err(dev, "HCA minimum page size of %d bigger than " "kernel PAGE_SIZE of %d, aborting.\n", page_size, PAGE_SIZE); return -ENODEV; } /* slave gets uar page size from QUERY_HCA fw command */ dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12); /* TODO: relax this assumption */ if (dev->caps.uar_page_size != PAGE_SIZE) { mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %d\n", dev->caps.uar_page_size, PAGE_SIZE); return -ENODEV; } memset(&func_cap, 0, sizeof(func_cap)); err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); if (err) { mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n", err); return err; } if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != PF_CONTEXT_BEHAVIOUR_MASK) { mlx4_err(dev, "Unknown pf context behaviour\n"); return -ENOSYS; } dev->caps.num_ports = func_cap.num_ports; dev->quotas.qp = func_cap.qp_quota; dev->quotas.srq = func_cap.srq_quota; dev->quotas.cq = func_cap.cq_quota; dev->quotas.mpt = func_cap.mpt_quota; dev->quotas.mtt = func_cap.mtt_quota; dev->caps.num_qps = 1 << hca_param.log_num_qps; dev->caps.num_srqs = 1 << hca_param.log_num_srqs; dev->caps.num_cqs = 1 << hca_param.log_num_cqs; dev->caps.num_mpts = 1 << hca_param.log_mpt_sz; dev->caps.num_eqs = func_cap.max_eq; dev->caps.reserved_eqs = func_cap.reserved_eq; dev->caps.num_pds = MLX4_NUM_PDS; dev->caps.num_mgms = 0; dev->caps.num_amgms = 0; if (dev->caps.num_ports > MLX4_MAX_PORTS) { mlx4_err(dev, "HCA has %d ports, but we only support %d, " "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS); return -ENODEV; } dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { err = -ENOMEM; goto err_mem; } for (i = 1; i <= dev->caps.num_ports; ++i) { err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap); if (err) { mlx4_err(dev, "QUERY_FUNC_CAP port command failed for" " port %d, aborting (%d).\n", i, err); goto err_mem; } dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; dev->caps.port_mask[i] = dev->caps.port_type[i]; err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, &dev->caps.gid_table_len[i], &dev->caps.pkey_table_len[i]); if (err) goto err_mem; } if (dev->caps.uar_page_size * (dev->caps.num_uars - dev->caps.reserved_uars) > pci_resource_len(dev->pdev, 2)) { mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than " "PCI resource 2 size of 0x%llx, aborting.\n", dev->caps.uar_page_size * dev->caps.num_uars, (unsigned long long) pci_resource_len(dev->pdev, 2)); err = -ENOMEM; goto err_mem; } if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { dev->caps.eqe_size = 64; dev->caps.eqe_factor = 1; } else { dev->caps.eqe_size = 32; dev->caps.eqe_factor = 0; } if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { dev->caps.cqe_size = 64; dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; } else { dev->caps.cqe_size = 32; } slave_adjust_steering_mode(dev, &dev_cap, &hca_param); return 0; err_mem: kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); dev->caps.qp0_tunnel = dev->caps.qp0_proxy = dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; return err; } /* * Change the port configuration of the device. * Every user of this function must hold the port mutex. */ int mlx4_change_port_types(struct mlx4_dev *dev, enum mlx4_port_type *port_types) { int err = 0; int change = 0; int port; for (port = 0; port < dev->caps.num_ports; port++) { /* Change the port type only if the new type is different * from the current, and not set to Auto */ if (port_types[port] != dev->caps.port_type[port + 1]) change = 1; } if (change) { mlx4_unregister_device(dev); for (port = 1; port <= dev->caps.num_ports; port++) { mlx4_CLOSE_PORT(dev, port); dev->caps.port_type[port] = port_types[port - 1]; err = mlx4_SET_PORT(dev, port, -1); if (err) { mlx4_err(dev, "Failed to set port %d, " "aborting\n", port); goto out; } } mlx4_set_port_mask(dev); err = mlx4_register_device(dev); } out: return err; } static ssize_t show_port_type(struct device *dev, struct device_attribute *attr, char *buf) { struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, port_attr); struct mlx4_dev *mdev = info->dev; char type[8]; sprintf(type, "%s", (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ? "ib" : "eth"); if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO) sprintf(buf, "auto (%s)\n", type); else sprintf(buf, "%s\n", type); return strlen(buf); } static ssize_t set_port_type(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, port_attr); struct mlx4_dev *mdev = info->dev; struct mlx4_priv *priv = mlx4_priv(mdev); enum mlx4_port_type types[MLX4_MAX_PORTS]; enum mlx4_port_type new_types[MLX4_MAX_PORTS]; int i; int err = 0; if (!strcmp(buf, "ib\n")) info->tmp_type = MLX4_PORT_TYPE_IB; else if (!strcmp(buf, "eth\n")) info->tmp_type = MLX4_PORT_TYPE_ETH; else if (!strcmp(buf, "auto\n")) info->tmp_type = MLX4_PORT_TYPE_AUTO; else { mlx4_err(mdev, "%s is not supported port type\n", buf); return -EINVAL; } mlx4_stop_sense(mdev); mutex_lock(&priv->port_mutex); /* Possible type is always the one that was delivered */ mdev->caps.possible_type[info->port] = info->tmp_type; for (i = 0; i < mdev->caps.num_ports; i++) { types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type : mdev->caps.possible_type[i+1]; if (types[i] == MLX4_PORT_TYPE_AUTO) types[i] = mdev->caps.port_type[i+1]; } if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) { for (i = 1; i <= mdev->caps.num_ports; i++) { if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { mdev->caps.possible_type[i] = mdev->caps.port_type[i]; err = -EINVAL; } } } if (err) { mlx4_err(mdev, "Auto sensing is not supported on this HCA. " "Set only 'eth' or 'ib' for both ports " "(should be the same)\n"); goto out; } mlx4_do_sense_ports(mdev, new_types, types); err = mlx4_check_port_params(mdev, new_types); if (err) goto out; /* We are about to apply the changes after the configuration * was verified, no need to remember the temporary types * any more */ for (i = 0; i < mdev->caps.num_ports; i++) priv->port[i + 1].tmp_type = 0; err = mlx4_change_port_types(mdev, new_types); out: mlx4_start_sense(mdev); mutex_unlock(&priv->port_mutex); return err ? err : count; } enum ibta_mtu { IB_MTU_256 = 1, IB_MTU_512 = 2, IB_MTU_1024 = 3, IB_MTU_2048 = 4, IB_MTU_4096 = 5 }; static inline int int_to_ibta_mtu(int mtu) { switch (mtu) { case 256: return IB_MTU_256; case 512: return IB_MTU_512; case 1024: return IB_MTU_1024; case 2048: return IB_MTU_2048; case 4096: return IB_MTU_4096; default: return -1; } } static inline int ibta_mtu_to_int(enum ibta_mtu mtu) { switch (mtu) { case IB_MTU_256: return 256; case IB_MTU_512: return 512; case IB_MTU_1024: return 1024; case IB_MTU_2048: return 2048; case IB_MTU_4096: return 4096; default: return -1; } } static ssize_t show_port_ib_mtu(struct device *dev, struct device_attribute *attr, char *buf) { struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, port_mtu_attr); struct mlx4_dev *mdev = info->dev; if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); sprintf(buf, "%d\n", ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port])); return strlen(buf); } static ssize_t set_port_ib_mtu(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, port_mtu_attr); struct mlx4_dev *mdev = info->dev; struct mlx4_priv *priv = mlx4_priv(mdev); int err, port, mtu, ibta_mtu = -1; if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) { mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); return -EINVAL; } mtu = (int) simple_strtol(buf, NULL, 0); ibta_mtu = int_to_ibta_mtu(mtu); if (ibta_mtu < 0) { mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf); return -EINVAL; } mdev->caps.port_ib_mtu[info->port] = ibta_mtu; mlx4_stop_sense(mdev); mutex_lock(&priv->port_mutex); mlx4_unregister_device(mdev); for (port = 1; port <= mdev->caps.num_ports; port++) { mlx4_CLOSE_PORT(mdev, port); err = mlx4_SET_PORT(mdev, port, -1); if (err) { mlx4_err(mdev, "Failed to set port %d, " "aborting\n", port); goto err_set_port; } } err = mlx4_register_device(mdev); err_set_port: mutex_unlock(&priv->port_mutex); mlx4_start_sense(mdev); return err ? err : count; } static int mlx4_load_fw(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int err; priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages, GFP_HIGHUSER | __GFP_NOWARN, 0); if (!priv->fw.fw_icm) { mlx4_err(dev, "Couldn't allocate FW area, aborting.\n"); return -ENOMEM; } err = mlx4_MAP_FA(dev, priv->fw.fw_icm); if (err) { mlx4_err(dev, "MAP_FA command failed, aborting.\n"); goto err_free; } err = mlx4_RUN_FW(dev); if (err) { mlx4_err(dev, "RUN_FW command failed, aborting.\n"); goto err_unmap_fa; } return 0; err_unmap_fa: mlx4_UNMAP_FA(dev); err_free: mlx4_free_icm(dev, priv->fw.fw_icm, 0); return err; } static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, int cmpt_entry_sz) { struct mlx4_priv *priv = mlx4_priv(dev); int err; int num_eqs; err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_QP * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) goto err; err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_SRQ * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, dev->caps.num_srqs, dev->caps.reserved_srqs, 0, 0); if (err) goto err_qp; err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_CQ * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, dev->caps.num_cqs, dev->caps.reserved_cqs, 0, 0); if (err) goto err_srq; num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : dev->caps.num_eqs; err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_EQ * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, num_eqs, num_eqs, 0, 0); if (err) goto err_cq; return 0; err_cq: mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); err_srq: mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); err_qp: mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); err: return err; } static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, struct mlx4_init_hca_param *init_hca, u64 icm_size) { struct mlx4_priv *priv = mlx4_priv(dev); u64 aux_pages; int num_eqs; int err; err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages); if (err) { mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n"); return err; } mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n", (unsigned long long) icm_size >> 10, (unsigned long long) aux_pages << 2); priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages, GFP_HIGHUSER | __GFP_NOWARN, 0); if (!priv->fw.aux_icm) { mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n"); return -ENOMEM; } err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm); if (err) { mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n"); goto err_free_aux; } err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz); if (err) { mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n"); goto err_unmap_aux; } num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : dev->caps.num_eqs; err = mlx4_init_icm_table(dev, &priv->eq_table.table, init_hca->eqc_base, dev_cap->eqc_entry_sz, num_eqs, num_eqs, 0, 0); if (err) { mlx4_err(dev, "Failed to map EQ context memory, aborting.\n"); goto err_unmap_cmpt; } /* * Reserved MTT entries must be aligned up to a cacheline * boundary, since the FW will write to them, while the driver * writes to all other MTT entries. (The variable * dev->caps.mtt_entry_sz below is really the MTT segment * size, not the raw entry size) */ dev->caps.reserved_mtts = ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz, dma_get_cache_alignment()) / dev->caps.mtt_entry_sz; err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table, init_hca->mtt_base, dev->caps.mtt_entry_sz, dev->caps.num_mtts, dev->caps.reserved_mtts, 1, 0); if (err) { mlx4_err(dev, "Failed to map MTT context memory, aborting.\n"); goto err_unmap_eq; } err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table, init_hca->dmpt_base, dev_cap->dmpt_entry_sz, dev->caps.num_mpts, dev->caps.reserved_mrws, 1, 1); if (err) { mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n"); goto err_unmap_mtt; } err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table, init_hca->qpc_base, dev_cap->qpc_entry_sz, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) { mlx4_err(dev, "Failed to map QP context memory, aborting.\n"); goto err_unmap_dmpt; } err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table, init_hca->auxc_base, dev_cap->aux_entry_sz, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) { mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n"); goto err_unmap_qp; } err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table, init_hca->altc_base, dev_cap->altc_entry_sz, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) { mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n"); goto err_unmap_auxc; } err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table, init_hca->rdmarc_base, dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) { mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n"); goto err_unmap_altc; } err = mlx4_init_icm_table(dev, &priv->cq_table.table, init_hca->cqc_base, dev_cap->cqc_entry_sz, dev->caps.num_cqs, dev->caps.reserved_cqs, 0, 0); if (err) { mlx4_err(dev, "Failed to map CQ context memory, aborting.\n"); goto err_unmap_rdmarc; } err = mlx4_init_icm_table(dev, &priv->srq_table.table, init_hca->srqc_base, dev_cap->srq_entry_sz, dev->caps.num_srqs, dev->caps.reserved_srqs, 0, 0); if (err) { mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n"); goto err_unmap_cq; } /* * For flow steering device managed mode it is required to use * mlx4_init_icm_table. For B0 steering mode it's not strictly * required, but for simplicity just map the whole multicast * group table now. The table isn't very big and it's a lot * easier than trying to track ref counts. */ err = mlx4_init_icm_table(dev, &priv->mcg_table.table, init_hca->mc_base, mlx4_get_mgm_entry_size(dev), dev->caps.num_mgms + dev->caps.num_amgms, dev->caps.num_mgms + dev->caps.num_amgms, 0, 0); if (err) { mlx4_err(dev, "Failed to map MCG context memory, aborting.\n"); goto err_unmap_srq; } return 0; err_unmap_srq: mlx4_cleanup_icm_table(dev, &priv->srq_table.table); err_unmap_cq: mlx4_cleanup_icm_table(dev, &priv->cq_table.table); err_unmap_rdmarc: mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); err_unmap_altc: mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); err_unmap_auxc: mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); err_unmap_qp: mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); err_unmap_dmpt: mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); err_unmap_mtt: mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); err_unmap_eq: mlx4_cleanup_icm_table(dev, &priv->eq_table.table); err_unmap_cmpt: mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); err_unmap_aux: mlx4_UNMAP_ICM_AUX(dev); err_free_aux: mlx4_free_icm(dev, priv->fw.aux_icm, 0); return err; } static void mlx4_free_icms(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); mlx4_cleanup_icm_table(dev, &priv->mcg_table.table); mlx4_cleanup_icm_table(dev, &priv->srq_table.table); mlx4_cleanup_icm_table(dev, &priv->cq_table.table); mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); mlx4_cleanup_icm_table(dev, &priv->eq_table.table); mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); mlx4_UNMAP_ICM_AUX(dev); mlx4_free_icm(dev, priv->fw.aux_icm, 0); } static void mlx4_slave_exit(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); mutex_lock(&priv->cmd.slave_cmd_mutex); if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME)) mlx4_warn(dev, "Failed to close slave function.\n"); mutex_unlock(&priv->cmd.slave_cmd_mutex); } static int map_bf_area(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); resource_size_t bf_start; resource_size_t bf_len; int err = 0; if (!dev->caps.bf_reg_size) return -ENXIO; bf_start = pci_resource_start(dev->pdev, 2) + (dev->caps.num_uars << PAGE_SHIFT); bf_len = pci_resource_len(dev->pdev, 2) - (dev->caps.num_uars << PAGE_SHIFT); priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len); if (!priv->bf_mapping) err = -ENOMEM; return err; } static void unmap_bf_area(struct mlx4_dev *dev) { if (mlx4_priv(dev)->bf_mapping) io_mapping_free(mlx4_priv(dev)->bf_mapping); } cycle_t mlx4_read_clock(struct mlx4_dev *dev) { u32 clockhi, clocklo, clockhi1; cycle_t cycles; int i; struct mlx4_priv *priv = mlx4_priv(dev); for (i = 0; i < 10; i++) { clockhi = swab32(readl(priv->clock_mapping)); clocklo = swab32(readl(priv->clock_mapping + 4)); clockhi1 = swab32(readl(priv->clock_mapping)); if (clockhi == clockhi1) break; } cycles = (u64) clockhi << 32 | (u64) clocklo; return cycles; } EXPORT_SYMBOL_GPL(mlx4_read_clock); static int map_internal_clock(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); priv->clock_mapping = ioremap(pci_resource_start(dev->pdev, priv->fw.clock_bar) + priv->fw.clock_offset, MLX4_CLOCK_SIZE); if (!priv->clock_mapping) return -ENOMEM; return 0; } static void unmap_internal_clock(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); if (priv->clock_mapping) iounmap(priv->clock_mapping); } static void mlx4_close_hca(struct mlx4_dev *dev) { unmap_internal_clock(dev); unmap_bf_area(dev); if (mlx4_is_slave(dev)) mlx4_slave_exit(dev); else { mlx4_CLOSE_HCA(dev, 0); mlx4_free_icms(dev); mlx4_UNMAP_FA(dev); mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0); } } static int mlx4_init_slave(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); u64 dma = (u64) priv->mfunc.vhcr_dma; int num_of_reset_retries = NUM_OF_RESET_RETRIES; int ret_from_reset = 0; u32 slave_read; u32 cmd_channel_ver; mutex_lock(&priv->cmd.slave_cmd_mutex); priv->cmd.max_cmds = 1; mlx4_warn(dev, "Sending reset\n"); ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME); /* if we are in the middle of flr the slave will try * NUM_OF_RESET_RETRIES times before leaving.*/ if (ret_from_reset) { if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) { msleep(SLEEP_TIME_IN_RESET); while (ret_from_reset && num_of_reset_retries) { mlx4_warn(dev, "slave is currently in the" "middle of FLR. retrying..." "(try num:%d)\n", (NUM_OF_RESET_RETRIES - num_of_reset_retries + 1)); ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME); num_of_reset_retries = num_of_reset_retries - 1; } } else goto err; } /* check the driver version - the slave I/F revision * must match the master's */ slave_read = swab32(readl(&priv->mfunc.comm->slave_read)); cmd_channel_ver = mlx4_comm_get_version(); if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) != MLX4_COMM_GET_IF_REV(slave_read)) { mlx4_err(dev, "slave driver version is not supported" " by the master\n"); goto err; } mlx4_warn(dev, "Sending vhcr0\n"); if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME)) goto err; mutex_unlock(&priv->cmd.slave_cmd_mutex); return 0; err: mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); mutex_unlock(&priv->cmd.slave_cmd_mutex); return -EIO; } static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) { int i; for (i = 1; i <= dev->caps.num_ports; i++) { if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) dev->caps.gid_table_len[i] = mlx4_get_slave_num_gids(dev, 0); else dev->caps.gid_table_len[i] = 1; dev->caps.pkey_table_len[i] = dev->phys_caps.pkey_phys_table_len[i] - 1; } } static int choose_log_fs_mgm_entry_size(int qp_per_entry) { int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE; i++) { if (qp_per_entry <= 4 * ((1 << i) / 16 - 2)) break; } return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1; } static void choose_steering_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { // This is only valid to the integrated driver. // The new ported mlx4_core driver is in B0 steering mode by default // and the old mlx4_en driver is in A0 steering mode by default. // If high_rate_steer == TRUE it means that A0 steering mode is on. // The integration fix is to hard code high_rate_steer to TRUE. high_rate_steer = 1; if (high_rate_steer && !mlx4_is_mfunc(dev)) { dev->caps.flags &= ~(MLX4_DEV_CAP_FLAG_VEP_MC_STEER | MLX4_DEV_CAP_FLAG_VEP_UC_STEER); dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_FS_EN; } if (mlx4_log_num_mgm_entry_size == -1 && dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && dev_cap->fs_log_max_ucast_qp_range_size == 0 && (!mlx4_is_mfunc(dev) || (dev_cap->fs_max_num_qp_per_entry >= (num_vfs + 1))) && choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= MLX4_MIN_MGM_LOG_ENTRY_SIZE) { dev->oper_log_mgm_entry_size = choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry); dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; } else { if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) { dev->caps.steering_mode = MLX4_STEERING_MODE_B0; } else { dev->caps.steering_mode = MLX4_STEERING_MODE_A0; if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags " "set to use B0 steering. Falling back to A0 steering mode.\n"); } dev->oper_log_mgm_entry_size = mlx4_log_num_mgm_entry_size > 0 ? mlx4_log_num_mgm_entry_size : MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); } mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, " "log_num_mgm_entry_size = %d\n", mlx4_steering_mode_str(dev->caps.steering_mode), dev->oper_log_mgm_entry_size, mlx4_log_num_mgm_entry_size); } static int mlx4_init_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_dev_cap *dev_cap = NULL; struct mlx4_adapter adapter; struct mlx4_mod_stat_cfg mlx4_cfg; struct mlx4_profile profile; struct mlx4_init_hca_param init_hca; u64 icm_size; int err; if (!mlx4_is_slave(dev)) { err = mlx4_QUERY_FW(dev); if (err) { if (err == -EACCES) mlx4_info(dev, "non-primary physical function, skipping.\n"); else mlx4_err(dev, "QUERY_FW command failed, aborting.\n"); return err; } err = mlx4_load_fw(dev); if (err) { mlx4_err(dev, "Failed to start FW, aborting.\n"); return err; } mlx4_cfg.log_pg_sz_m = 1; mlx4_cfg.log_pg_sz = 0; err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg); if (err) mlx4_warn(dev, "Failed to override log_pg_sz parameter\n"); dev_cap = kzalloc(sizeof *dev_cap, GFP_KERNEL); if (!dev_cap) { mlx4_err(dev, "Failed to allocate memory for dev_cap\n"); err = -ENOMEM; goto err_stop_fw; } err = mlx4_dev_cap(dev, dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); goto err_stop_fw; } choose_steering_mode(dev, dev_cap); if (mlx4_is_master(dev)) mlx4_parav_master_pf_caps(dev); process_mod_param_profile(&profile); if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) profile.num_mcg = MLX4_FS_NUM_MCG; icm_size = mlx4_make_profile(dev, &profile, dev_cap, &init_hca); if ((long long) icm_size < 0) { err = icm_size; goto err_stop_fw; } dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; init_hca.log_uar_sz = ilog2(dev->caps.num_uars); init_hca.uar_page_sz = PAGE_SHIFT - 12; err = mlx4_init_icm(dev, dev_cap, &init_hca, icm_size); if (err) goto err_stop_fw; err = mlx4_INIT_HCA(dev, &init_hca); if (err) { mlx4_err(dev, "INIT_HCA command failed, aborting.\n"); goto err_free_icm; } } else { err = mlx4_init_slave(dev); if (err) { mlx4_err(dev, "Failed to initialize slave\n"); return err; } err = mlx4_slave_cap(dev); if (err) { mlx4_err(dev, "Failed to obtain slave caps\n"); goto err_close; } } if (map_bf_area(dev)) mlx4_dbg(dev, "Failed to map blue flame area\n"); /* * Read HCA frequency by QUERY_HCA command */ if (dev->caps.cq_timestamp) { memset(&init_hca, 0, sizeof(init_hca)); err = mlx4_QUERY_HCA(dev, &init_hca); if (err) { mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n"); dev->caps.cq_timestamp = 0; } else dev->caps.hca_core_clock = init_hca.hca_core_clock; /* * In case we got HCA frequency 0 - disable timestamping * to avoid dividing by zero */ if (!dev->caps.hca_core_clock) { dev->caps.cq_timestamp = 0; mlx4_err(dev, "HCA frequency is 0. " "Timestamping is not supported."); } /* * Map internal clock, in case of failure disable timestamping */ if (map_internal_clock(dev)) { dev->caps.cq_timestamp = 0; mlx4_err(dev, "Failed to map internal clock. " "Timestamping is not supported.\n"); } } /*Only the master set the ports, all the rest got it from it.*/ if (!mlx4_is_slave(dev)) mlx4_set_port_mask(dev); err = mlx4_QUERY_ADAPTER(dev, &adapter); if (err) { mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n"); goto unmap_bf; } priv->eq_table.inta_pin = adapter.inta_pin; memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id); if (!mlx4_is_slave(dev)) kfree(dev_cap); return 0; unmap_bf: unmap_internal_clock(dev); unmap_bf_area(dev); if (mlx4_is_slave(dev)) { kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); } err_close: if (mlx4_is_slave(dev)) mlx4_slave_exit(dev); else mlx4_CLOSE_HCA(dev, 0); err_free_icm: if (!mlx4_is_slave(dev)) mlx4_free_icms(dev); err_stop_fw: if (!mlx4_is_slave(dev)) { mlx4_UNMAP_FA(dev); mlx4_free_icm(dev, priv->fw.fw_icm, 0); if (dev_cap) kfree(dev_cap); } return err; } static int mlx4_init_counters_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int res; int nent_pow2; if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) return -ENOENT; nent_pow2 = roundup_pow_of_two(dev->caps.max_counters); res = mlx4_bitmap_init(&priv->counters_bitmap, nent_pow2, nent_pow2 - 1, 0, nent_pow2 - dev->caps.max_counters); if (res) return res; if (dev->caps.max_counters == dev->caps.max_basic_counters) return 0; res = mlx4_cmd(dev, MLX4_IF_STATE_EXTENDED, 0, 0, MLX4_CMD_SET_IF_STAT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (res) mlx4_err(dev, "Failed to set extended counters (err=%d)\n", res); return res; } static void mlx4_cleanup_counters_table(struct mlx4_dev *dev) { if (!mlx4_is_slave(dev) && (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap); } int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) { struct mlx4_priv *priv = mlx4_priv(dev); if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) return -ENOENT; *idx = mlx4_bitmap_alloc(&priv->counters_bitmap); if (*idx == -1) return -ENOMEM; return 0; } int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) { u64 out_param; int err; if (mlx4_is_mfunc(dev)) { err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER, RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (!err) *idx = get_param_l(&out_param); return err; } return __mlx4_counter_alloc(dev, idx); } EXPORT_SYMBOL_GPL(mlx4_counter_alloc); void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx) { mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx); return; } void mlx4_counter_free(struct mlx4_dev *dev, u32 idx) { u64 in_param = 0; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, idx); mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE, MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); return; } __mlx4_counter_free(dev, idx); } EXPORT_SYMBOL_GPL(mlx4_counter_free); static int mlx4_setup_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int err; int port; __be32 ib_port_default_caps; err = mlx4_init_uar_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "user access region table (err=%d), aborting.\n", err); return err; } err = mlx4_uar_alloc(dev, &priv->driver_uar); if (err) { mlx4_err(dev, "Failed to allocate driver access region " "(err=%d), aborting.\n", err); goto err_uar_table_free; } priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); if (!priv->kar) { mlx4_err(dev, "Couldn't map kernel access region, " "aborting.\n"); err = -ENOMEM; goto err_uar_free; } err = mlx4_init_pd_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "protection domain table (err=%d), aborting.\n", err); goto err_kar_unmap; } err = mlx4_init_xrcd_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "reliable connection domain table (err=%d), " "aborting.\n", err); goto err_pd_table_free; } err = mlx4_init_mr_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "memory region table (err=%d), aborting.\n", err); goto err_xrcd_table_free; } err = mlx4_init_eq_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "event queue table (err=%d), aborting.\n", err); goto err_mr_table_free; } err = mlx4_cmd_use_events(dev); if (err) { mlx4_err(dev, "Failed to switch to event-driven " "firmware commands (err=%d), aborting.\n", err); goto err_eq_table_free; } err = mlx4_NOP(dev); if (err) { if (dev->flags & MLX4_FLAG_MSI_X) { mlx4_warn(dev, "NOP command failed to generate MSI-X " "interrupt IRQ %d).\n", priv->eq_table.eq[dev->caps.num_comp_vectors].irq); mlx4_warn(dev, "Trying again without MSI-X.\n"); } else { mlx4_err(dev, "NOP command failed to generate interrupt " "(IRQ %d), aborting.\n", priv->eq_table.eq[dev->caps.num_comp_vectors].irq); mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); } goto err_cmd_poll; } mlx4_dbg(dev, "NOP command IRQ test passed\n"); err = mlx4_init_cq_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "completion queue table (err=%d), aborting.\n", err); goto err_cmd_poll; } err = mlx4_init_srq_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "shared receive queue table (err=%d), aborting.\n", err); goto err_cq_table_free; } err = mlx4_init_qp_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "queue pair table (err=%d), aborting.\n", err); goto err_srq_table_free; } if (!mlx4_is_slave(dev)) { err = mlx4_init_mcg_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "multicast group table (err=%d), aborting.\n", err); goto err_qp_table_free; } err = mlx4_init_counters_table(dev); if (err && err != -ENOENT) { mlx4_err(dev, "Failed to initialize counters table (err=%d), " "aborting.\n", err); goto err_mcg_table_free; } for (port = 1; port <= dev->caps.num_ports; port++) { ib_port_default_caps = 0; err = mlx4_get_port_ib_caps(dev, port, &ib_port_default_caps); if (err) mlx4_warn(dev, "failed to get port %d default " "ib capabilities (%d). Continuing " "with caps = 0\n", port, err); dev->caps.ib_port_def_cap[port] = ib_port_default_caps; /* initialize per-slave default ib port capabilities */ if (mlx4_is_master(dev)) { int i; for (i = 0; i < dev->num_slaves; i++) { if (i == mlx4_master_func_num(dev)) continue; priv->mfunc.master.slave_state[i].ib_cap_mask[port] = ib_port_default_caps; } } if (mlx4_is_mfunc(dev)) dev->caps.port_ib_mtu[port] = IB_MTU_2048; else dev->caps.port_ib_mtu[port] = IB_MTU_4096; err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ? dev->caps.pkey_table_len[port] : -1); if (err) { mlx4_err(dev, "Failed to set port %d (err=%d), " "aborting\n", port, err); goto err_counters_table_free; } } } return 0; err_counters_table_free: mlx4_cleanup_counters_table(dev); err_mcg_table_free: mlx4_cleanup_mcg_table(dev); err_qp_table_free: mlx4_cleanup_qp_table(dev); err_srq_table_free: mlx4_cleanup_srq_table(dev); err_cq_table_free: mlx4_cleanup_cq_table(dev); err_cmd_poll: mlx4_cmd_use_polling(dev); err_eq_table_free: mlx4_cleanup_eq_table(dev); err_mr_table_free: mlx4_cleanup_mr_table(dev); err_xrcd_table_free: mlx4_cleanup_xrcd_table(dev); err_pd_table_free: mlx4_cleanup_pd_table(dev); err_kar_unmap: iounmap(priv->kar); err_uar_free: mlx4_uar_free(dev, &priv->driver_uar); err_uar_table_free: mlx4_cleanup_uar_table(dev); return err; } static void mlx4_enable_msi_x(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct msix_entry *entries; int nreq = min_t(int, dev->caps.num_ports * min_t(int, num_possible_cpus() + 1, MAX_MSIX_P_PORT) + MSIX_LEGACY_SZ, MAX_MSIX); int err; int i; if (msi_x) { nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, nreq); entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); if (!entries) goto no_msi; for (i = 0; i < nreq; ++i) entries[i].entry = i; retry: err = pci_enable_msix(dev->pdev, entries, nreq); if (err) { /* Try again if at least 2 vectors are available */ if (err > 1) { mlx4_info(dev, "Requested %d vectors, " "but only %d MSI-X vectors available, " "trying again\n", nreq, err); nreq = err; goto retry; } kfree(entries); goto no_msi; } if (nreq < MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) { /*Working in legacy mode , all EQ's shared*/ dev->caps.comp_pool = 0; dev->caps.num_comp_vectors = nreq - 1; } else { dev->caps.comp_pool = nreq - MSIX_LEGACY_SZ; dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1; } for (i = 0; i < nreq; ++i) priv->eq_table.eq[i].irq = entries[i].vector; dev->flags |= MLX4_FLAG_MSI_X; kfree(entries); return; } no_msi: dev->caps.num_comp_vectors = 1; dev->caps.comp_pool = 0; for (i = 0; i < 2; ++i) priv->eq_table.eq[i].irq = dev->pdev->irq; } static int mlx4_init_port_info(struct mlx4_dev *dev, int port) { struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; int err = 0; info->dev = dev; info->port = port; if (!mlx4_is_slave(dev)) { mlx4_init_mac_table(dev, &info->mac_table); mlx4_init_vlan_table(dev, &info->vlan_table); info->base_qpn = mlx4_get_base_qpn(dev, port); } sprintf(info->dev_name, "mlx4_port%d", port); info->port_attr.attr.name = info->dev_name; if (mlx4_is_mfunc(dev)) info->port_attr.attr.mode = S_IRUGO; else { info->port_attr.attr.mode = S_IRUGO | S_IWUSR; info->port_attr.store = set_port_type; } info->port_attr.show = show_port_type; sysfs_attr_init(&info->port_attr.attr); err = device_create_file(&dev->pdev->dev, &info->port_attr); if (err) { mlx4_err(dev, "Failed to create file for port %d\n", port); info->port = -1; } sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port); info->port_mtu_attr.attr.name = info->dev_mtu_name; if (mlx4_is_mfunc(dev)) info->port_mtu_attr.attr.mode = S_IRUGO; else { info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR; info->port_mtu_attr.store = set_port_ib_mtu; } info->port_mtu_attr.show = show_port_ib_mtu; sysfs_attr_init(&info->port_mtu_attr.attr); err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr); if (err) { mlx4_err(dev, "Failed to create mtu file for port %d\n", port); device_remove_file(&info->dev->pdev->dev, &info->port_attr); info->port = -1; } return err; } static void mlx4_cleanup_port_info(struct mlx4_port_info *info) { if (info->port < 0) return; device_remove_file(&info->dev->pdev->dev, &info->port_attr); device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr); } static int mlx4_init_steering(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int num_entries = dev->caps.num_ports; int i, j; priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL); if (!priv->steer) return -ENOMEM; for (i = 0; i < num_entries; i++) for (j = 0; j < MLX4_NUM_STEERS; j++) { INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]); INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]); } return 0; } static void mlx4_clear_steering(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_steer_index *entry, *tmp_entry; struct mlx4_promisc_qp *pqp, *tmp_pqp; int num_entries = dev->caps.num_ports; int i, j; for (i = 0; i < num_entries; i++) { for (j = 0; j < MLX4_NUM_STEERS; j++) { list_for_each_entry_safe(pqp, tmp_pqp, &priv->steer[i].promisc_qps[j], list) { list_del(&pqp->list); kfree(pqp); } list_for_each_entry_safe(entry, tmp_entry, &priv->steer[i].steer_entries[j], list) { list_del(&entry->list); list_for_each_entry_safe(pqp, tmp_pqp, &entry->duplicates, list) { list_del(&pqp->list); kfree(pqp); } kfree(entry); } } } kfree(priv->steer); } static int extended_func_num(struct pci_dev *pdev) { return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn); } #define MLX4_OWNER_BASE 0x8069c #define MLX4_OWNER_SIZE 4 static int mlx4_get_ownership(struct mlx4_dev *dev) { void __iomem *owner; u32 ret; if (pci_channel_offline(dev->pdev)) return -EIO; owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, MLX4_OWNER_SIZE); if (!owner) { mlx4_err(dev, "Failed to obtain ownership bit\n"); return -ENOMEM; } ret = readl(owner); iounmap(owner); return (int) !!ret; } static void mlx4_free_ownership(struct mlx4_dev *dev) { void __iomem *owner; if (pci_channel_offline(dev->pdev)) return; owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, MLX4_OWNER_SIZE); if (!owner) { mlx4_err(dev, "Failed to obtain ownership bit\n"); return; } writel(0, owner); msleep(1000); iounmap(owner); } static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) { struct mlx4_priv *priv; struct mlx4_dev *dev; int err; int port; pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev)); err = pci_enable_device(pdev); if (err) { dev_err(&pdev->dev, "Cannot enable PCI device, " "aborting.\n"); return err; } if (num_vfs > MLX4_MAX_NUM_VF) { dev_err(&pdev->dev, "There are more VF's (%d) than allowed(%d)\n", num_vfs, MLX4_MAX_NUM_VF); return -EINVAL; } if (num_vfs < 0) { dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n"); return -EINVAL; } /* * Check for BARs. */ if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) && !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { dev_err(&pdev->dev, "Missing DCS, aborting." "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%x)\n", pci_dev_data, pci_resource_flags(pdev, 0)); err = -ENODEV; goto err_disable_pdev; } if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) { dev_err(&pdev->dev, "Missing UAR, aborting.\n"); err = -ENODEV; goto err_disable_pdev; } err = pci_request_regions(pdev, DRV_NAME); if (err) { dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); goto err_disable_pdev; } pci_set_master(pdev); err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) { dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n"); err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); goto err_release_regions; } } err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) { dev_warn(&pdev->dev, "Warning: couldn't set 64-bit " "consistent PCI DMA mask.\n"); err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, " "aborting.\n"); goto err_release_regions; } } /* Allow large DMA segments, up to the firmware limit of 1 GB */ dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); priv = kzalloc(sizeof *priv, GFP_KERNEL); if (!priv) { dev_err(&pdev->dev, "Device struct alloc failed, " "aborting.\n"); err = -ENOMEM; goto err_release_regions; } dev = &priv->dev; dev->pdev = pdev; INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); mutex_init(&priv->port_mutex); INIT_LIST_HEAD(&priv->pgdir_list); mutex_init(&priv->pgdir_mutex); INIT_LIST_HEAD(&priv->bf_list); mutex_init(&priv->bf_mutex); dev->rev_id = pdev->revision; dev->numa_node = dev_to_node(&pdev->dev); /* Detect if this device is a virtual function */ if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { /* When acting as pf, we normally skip vfs unless explicitly * requested to probe them. */ if (num_vfs && extended_func_num(pdev) > probe_vf) { mlx4_warn(dev, "Skipping virtual function:%d\n", extended_func_num(pdev)); err = -ENODEV; goto err_free_dev; } mlx4_warn(dev, "Detected virtual function - running in slave mode\n"); dev->flags |= MLX4_FLAG_SLAVE; } else { /* We reset the device and enable SRIOV only for physical * devices. Try to claim ownership on the device; * if already taken, skip -- do not allow multiple PFs */ err = mlx4_get_ownership(dev); if (err) { if (err < 0) goto err_free_dev; else { mlx4_warn(dev, "Multiple PFs not yet supported." " Skipping PF.\n"); err = -EINVAL; goto err_free_dev; } } if (num_vfs) { mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", num_vfs); err = pci_enable_sriov(pdev, num_vfs); if (err) { mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", err); err = 0; } else { mlx4_warn(dev, "Running in master mode\n"); dev->flags |= MLX4_FLAG_SRIOV | MLX4_FLAG_MASTER; dev->num_vfs = num_vfs; } } atomic_set(&priv->opreq_count, 0); INIT_WORK(&priv->opreq_task, mlx4_opreq_action); /* * Now reset the HCA before we touch the PCI capabilities or * attempt a firmware command, since a boot ROM may have left * the HCA in an undefined state. */ err = mlx4_reset(dev); if (err) { mlx4_err(dev, "Failed to reset HCA, aborting.\n"); goto err_sriov; } } slave_start: err = mlx4_cmd_init(dev); if (err) { mlx4_err(dev, "Failed to init command interface, aborting.\n"); goto err_sriov; } /* In slave functions, the communication channel must be initialized * before posting commands. Also, init num_slaves before calling * mlx4_init_hca */ if (mlx4_is_mfunc(dev)) { if (mlx4_is_master(dev)) dev->num_slaves = MLX4_MAX_NUM_SLAVES; else { dev->num_slaves = 0; err = mlx4_multi_func_init(dev); if (err) { mlx4_err(dev, "Failed to init slave mfunc" " interface, aborting.\n"); goto err_cmd; } } } err = mlx4_init_hca(dev); if (err) { if (err == -EACCES) { /* Not primary Physical function * Running in slave mode */ mlx4_cmd_cleanup(dev); dev->flags |= MLX4_FLAG_SLAVE; dev->flags &= ~MLX4_FLAG_MASTER; goto slave_start; } else goto err_mfunc; } /* In master functions, the communication channel must be initialized * after obtaining its address from fw */ if (mlx4_is_master(dev)) { err = mlx4_multi_func_init(dev); if (err) { mlx4_err(dev, "Failed to init master mfunc" "interface, aborting.\n"); goto err_close; } } err = mlx4_alloc_eq_table(dev); if (err) goto err_master_mfunc; priv->msix_ctl.pool_bm = 0; mutex_init(&priv->msix_ctl.pool_lock); mlx4_enable_msi_x(dev); if ((mlx4_is_mfunc(dev)) && !(dev->flags & MLX4_FLAG_MSI_X)) { err = -ENOSYS; mlx4_err(dev, "INTx is not supported in multi-function mode." " aborting.\n"); goto err_free_eq; } if (!mlx4_is_slave(dev)) { err = mlx4_init_steering(dev); if (err) goto err_free_eq; } err = mlx4_setup_hca(dev); if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) && !mlx4_is_mfunc(dev)) { dev->flags &= ~MLX4_FLAG_MSI_X; dev->caps.num_comp_vectors = 1; dev->caps.comp_pool = 0; pci_disable_msix(pdev); err = mlx4_setup_hca(dev); } if (err) goto err_steer; mlx4_init_quotas(dev); for (port = 1; port <= dev->caps.num_ports; port++) { err = mlx4_init_port_info(dev, port); if (err) goto err_port; } err = mlx4_register_device(dev); if (err) goto err_port; err = mlx4_sense_init(dev); if (err) goto err_port; mlx4_start_sense(dev); priv->pci_dev_data = pci_dev_data; pci_set_drvdata(pdev, dev); return 0; err_port: for (--port; port >= 1; --port) mlx4_cleanup_port_info(&priv->port[port]); mlx4_cleanup_counters_table(dev); mlx4_cleanup_mcg_table(dev); mlx4_cleanup_qp_table(dev); mlx4_cleanup_srq_table(dev); mlx4_cleanup_cq_table(dev); mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); mlx4_cleanup_mr_table(dev); mlx4_cleanup_xrcd_table(dev); mlx4_cleanup_pd_table(dev); mlx4_cleanup_uar_table(dev); err_steer: if (!mlx4_is_slave(dev)) mlx4_clear_steering(dev); err_free_eq: mlx4_free_eq_table(dev); err_master_mfunc: if (mlx4_is_master(dev)) mlx4_multi_func_cleanup(dev); if (mlx4_is_slave(dev)) { kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); } err_close: if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); mlx4_close_hca(dev); err_mfunc: if (mlx4_is_slave(dev)) mlx4_multi_func_cleanup(dev); err_cmd: mlx4_cmd_cleanup(dev); err_sriov: if (dev->flags & MLX4_FLAG_SRIOV) pci_disable_sriov(pdev); if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); err_free_dev: kfree(priv); err_release_regions: pci_release_regions(pdev); err_disable_pdev: pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); return err; } static int __devinit mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { printk_once(KERN_INFO "%s", mlx4_version); return __mlx4_init_one(pdev, id->driver_data); } static void mlx4_remove_one(struct pci_dev *pdev) { struct mlx4_dev *dev = pci_get_drvdata(pdev); struct mlx4_priv *priv = mlx4_priv(dev); int p; if (dev) { /* in SRIOV it is not allowed to unload the pf's * driver while there are alive vf's */ if (mlx4_is_master(dev)) { if (mlx4_how_many_lives_vf(dev)) mlx4_err(dev, "Removing PF when there are assigned VF's !!!\n"); } mlx4_sense_cleanup(dev); mlx4_unregister_device(dev); for (p = 1; p <= dev->caps.num_ports; p++) { mlx4_cleanup_port_info(&priv->port[p]); mlx4_CLOSE_PORT(dev, p); } if (mlx4_is_master(dev)) mlx4_free_resource_tracker(dev, RES_TR_FREE_SLAVES_ONLY); mlx4_cleanup_counters_table(dev); mlx4_cleanup_mcg_table(dev); mlx4_cleanup_qp_table(dev); mlx4_cleanup_srq_table(dev); mlx4_cleanup_cq_table(dev); mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); mlx4_cleanup_mr_table(dev); mlx4_cleanup_xrcd_table(dev); mlx4_cleanup_pd_table(dev); if (mlx4_is_master(dev)) mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY); iounmap(priv->kar); mlx4_uar_free(dev, &priv->driver_uar); mlx4_cleanup_uar_table(dev); if (!mlx4_is_slave(dev)) mlx4_clear_steering(dev); mlx4_free_eq_table(dev); if (mlx4_is_master(dev)) mlx4_multi_func_cleanup(dev); mlx4_close_hca(dev); if (mlx4_is_slave(dev)) mlx4_multi_func_cleanup(dev); mlx4_cmd_cleanup(dev); if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); if (dev->flags & MLX4_FLAG_SRIOV) { mlx4_warn(dev, "Disabling SR-IOV\n"); pci_disable_sriov(pdev); } if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); kfree(priv); pci_release_regions(pdev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } } int mlx4_restart_one(struct pci_dev *pdev) { struct mlx4_dev *dev = pci_get_drvdata(pdev); struct mlx4_priv *priv = mlx4_priv(dev); int pci_dev_data; pci_dev_data = priv->pci_dev_data; mlx4_remove_one(pdev); return __mlx4_init_one(pdev, pci_dev_data); } static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = { /* MT25408 "Hermon" SDR */ { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" DDR */ { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" QDR */ { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" DDR PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" QDR PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" EN 10GigE */ { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25458 ConnectX EN 10GBASE-T 10GigE */ { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT26468 ConnectX EN 10GigE PCIe gen2*/ { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */ { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT26478 ConnectX2 40GigE PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25400 Family [ConnectX-2 Virtual Function] */ { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF }, /* MT27500 Family [ConnectX-3] */ { PCI_VDEVICE(MELLANOX, 0x1003), 0 }, /* MT27500 Family [ConnectX-3 Virtual Function] */ { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF }, { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */ { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */ { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */ { PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */ { PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */ { PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */ { PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */ { PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */ { PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */ { PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */ { PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */ { PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */ { 0, } }; MODULE_DEVICE_TABLE(pci, mlx4_pci_table); static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { mlx4_remove_one(pdev); return state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; } static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) { int ret = __mlx4_init_one(pdev, 0); return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } static const struct pci_error_handlers mlx4_err_handler = { .error_detected = mlx4_pci_err_detected, .slot_reset = mlx4_pci_slot_reset, }; static int suspend(struct pci_dev *pdev, pm_message_t state) { mlx4_remove_one(pdev); if (mlx4_log_num_mgm_entry_size != -1 && (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) { pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not " "in legal range (-1 or %d..%d)\n", mlx4_log_num_mgm_entry_size, MLX4_MIN_MGM_LOG_ENTRY_SIZE, MLX4_MAX_MGM_LOG_ENTRY_SIZE); return -1; } return 0; } static int resume(struct pci_dev *pdev) { return __mlx4_init_one(pdev, 0); } static struct pci_driver mlx4_driver = { .name = DRV_NAME, .id_table = (struct pci_device_id*)mlx4_pci_table, .probe = mlx4_init_one, .remove = __devexit_p(mlx4_remove_one), .suspend = suspend, .resume = resume, .err_handler = (struct pci_error_handlers*)&mlx4_err_handler, }; static int __init mlx4_verify_params(void) { if ((log_num_mac < 0) || (log_num_mac > 7)) { pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac); return -1; } if (log_num_vlan != 0) pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n", MLX4_LOG_NUM_VLANS); if (mlx4_set_4k_mtu != -1) pr_warning("mlx4_core: set_4k_mtu - obsolete module param\n"); if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) { pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg); return -1; } /* Check if module param for ports type has legal combination */ if (port_type_array[0] == false && port_type_array[1] == true) { pr_warning("mlx4_core: module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n"); port_type_array[0] = true; } if (mlx4_log_num_mgm_entry_size != -1 && (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) { pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not " "in legal range (-1 or %d..%d)\n", mlx4_log_num_mgm_entry_size, MLX4_MIN_MGM_LOG_ENTRY_SIZE, MLX4_MAX_MGM_LOG_ENTRY_SIZE); return -1; } if (mod_param_profile.num_qp < 18 || mod_param_profile.num_qp > 23) { pr_warning("mlx4_core: bad log_num_qp: %d\n", mod_param_profile.num_qp); return -1; } if (mod_param_profile.num_srq < 10) { pr_warning("mlx4_core: too low log_num_srq: %d\n", mod_param_profile.num_srq); return -1; } if (mod_param_profile.num_cq < 10) { pr_warning("mlx4_core: too low log_num_cq: %d\n", mod_param_profile.num_cq); return -1; } if (mod_param_profile.num_mpt < 10) { pr_warning("mlx4_core: too low log_num_mpt: %d\n", mod_param_profile.num_mpt); return -1; } if (mod_param_profile.num_mtt && mod_param_profile.num_mtt < 15) { pr_warning("mlx4_core: too low log_num_mtt: %d\n", mod_param_profile.num_mtt); return -1; } if (mod_param_profile.num_mtt > MLX4_MAX_LOG_NUM_MTT) { pr_warning("mlx4_core: too high log_num_mtt: %d\n", mod_param_profile.num_mtt); return -1; } return 0; } static int __init mlx4_init(void) { int ret; if (mlx4_verify_params()) return -EINVAL; mlx4_catas_init(); mlx4_wq = create_singlethread_workqueue("mlx4"); if (!mlx4_wq) return -ENOMEM; if (enable_sys_tune) sys_tune_init(); ret = pci_register_driver(&mlx4_driver); if (ret < 0 && enable_sys_tune) sys_tune_fini(); return ret < 0 ? ret : 0; } static void __exit mlx4_cleanup(void) { if (enable_sys_tune) sys_tune_fini(); pci_unregister_driver(&mlx4_driver); destroy_workqueue(mlx4_wq); } module_init_order(mlx4_init, SI_ORDER_MIDDLE); module_exit(mlx4_cleanup); #undef MODULE_VERSION #include static int mlx4_evhand(module_t mod, int event, void *arg) { return (0); } static moduledata_t mlx4_mod = { .name = "mlx4", .evhand = mlx4_evhand, }; MODULE_VERSION(mlx4, 1); DECLARE_MODULE(mlx4, mlx4_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY); Index: stable/10/sys/ofed/drivers/net/mlx4/mcg.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/mcg.c (revision 271126) +++ stable/10/sys/ofed/drivers/net/mlx4/mcg.c (revision 271127) @@ -1,1426 +1,1426 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include "mlx4.h" static const u8 zero_gid[16]; /* automatically initialized to 0 */ int mlx4_get_mgm_entry_size(struct mlx4_dev *dev) { return 1 << dev->oper_log_mgm_entry_size; } int mlx4_get_qp_per_mgm(struct mlx4_dev *dev) { return 4 * (mlx4_get_mgm_entry_size(dev) / 16 - 2); } static int mlx4_QP_FLOW_STEERING_ATTACH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, u32 size, u64 *reg_id) { u64 imm; int err = 0; err = mlx4_cmd_imm(dev, mailbox->dma, &imm, size, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; *reg_id = imm; return err; } static int mlx4_QP_FLOW_STEERING_DETACH(struct mlx4_dev *dev, u64 regid) { int err = 0; err = mlx4_cmd(dev, regid, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); return err; } static int mlx4_READ_ENTRY(struct mlx4_dev *dev, int index, struct mlx4_cmd_mailbox *mailbox) { return mlx4_cmd_box(dev, 0, mailbox->dma, index, 0, MLX4_CMD_READ_MCG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } static int mlx4_WRITE_ENTRY(struct mlx4_dev *dev, int index, struct mlx4_cmd_mailbox *mailbox) { return mlx4_cmd(dev, mailbox->dma, index, 0, MLX4_CMD_WRITE_MCG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } static int mlx4_WRITE_PROMISC(struct mlx4_dev *dev, u8 port, u8 steer, struct mlx4_cmd_mailbox *mailbox) { u32 in_mod; in_mod = (u32) port << 16 | steer << 1; return mlx4_cmd(dev, mailbox->dma, in_mod, 0x1, MLX4_CMD_WRITE_MCG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } static int mlx4_GID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, u16 *hash, u8 op_mod) { u64 imm; int err; err = mlx4_cmd_imm(dev, mailbox->dma, &imm, 0, op_mod, MLX4_CMD_MGID_HASH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (!err) *hash = imm; return err; } static struct mlx4_promisc_qp *get_promisc_qp(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, u32 qpn) { struct mlx4_steer *s_steer = &mlx4_priv(dev)->steer[port - 1]; struct mlx4_promisc_qp *pqp; list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) { if (pqp->qpn == qpn) return pqp; } /* not found */ return NULL; } /* * Add new entry to steering data structure. * All promisc QPs should be added as well */ static int new_steering_entry(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, unsigned int index, u32 qpn) { struct mlx4_steer *s_steer; struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; u32 members_count; struct mlx4_steer_index *new_entry; struct mlx4_promisc_qp *pqp; struct mlx4_promisc_qp *dqp = NULL; u32 prot; int err; s_steer = &mlx4_priv(dev)->steer[port - 1]; new_entry = kzalloc(sizeof *new_entry, GFP_KERNEL); if (!new_entry) return -ENOMEM; INIT_LIST_HEAD(&new_entry->duplicates); new_entry->index = index; list_add_tail(&new_entry->list, &s_steer->steer_entries[steer]); /* If the given qpn is also a promisc qp, * it should be inserted to duplicates list */ pqp = get_promisc_qp(dev, port, steer, qpn); if (pqp) { dqp = kmalloc(sizeof *dqp, GFP_KERNEL); if (!dqp) { err = -ENOMEM; goto out_alloc; } dqp->qpn = qpn; list_add_tail(&dqp->list, &new_entry->duplicates); } /* if no promisc qps for this vep, we are done */ if (list_empty(&s_steer->promisc_qps[steer])) return 0; /* now need to add all the promisc qps to the new * steering entry, as they should also receive the packets * destined to this address */ mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { err = -ENOMEM; goto out_alloc; } mgm = mailbox->buf; err = mlx4_READ_ENTRY(dev, index, mailbox); if (err) goto out_mailbox; members_count = be32_to_cpu(mgm->members_count) & 0xffffff; prot = be32_to_cpu(mgm->members_count) >> 30; list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) { /* don't add already existing qpn */ if (pqp->qpn == qpn) continue; if (members_count == dev->caps.num_qp_per_mgm) { /* out of space */ err = -ENOMEM; goto out_mailbox; } /* add the qpn */ mgm->qp[members_count++] = cpu_to_be32(pqp->qpn & MGM_QPN_MASK); } /* update the qps count and update the entry with all the promisc qps*/ mgm->members_count = cpu_to_be32(members_count | (prot << 30)); err = mlx4_WRITE_ENTRY(dev, index, mailbox); out_mailbox: mlx4_free_cmd_mailbox(dev, mailbox); if (!err) return 0; out_alloc: if (dqp) { list_del(&dqp->list); kfree(dqp); } list_del(&new_entry->list); kfree(new_entry); return err; } /* update the data structures with existing steering entry */ static int existing_steering_entry(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, unsigned int index, u32 qpn) { struct mlx4_steer *s_steer; struct mlx4_steer_index *tmp_entry, *entry = NULL; struct mlx4_promisc_qp *pqp; struct mlx4_promisc_qp *dqp; s_steer = &mlx4_priv(dev)->steer[port - 1]; pqp = get_promisc_qp(dev, port, steer, qpn); if (!pqp) return 0; /* nothing to do */ list_for_each_entry(tmp_entry, &s_steer->steer_entries[steer], list) { if (tmp_entry->index == index) { entry = tmp_entry; break; } } if (unlikely(!entry)) { mlx4_warn(dev, "Steering entry at index %x is not registered\n", index); return -EINVAL; } /* the given qpn is listed as a promisc qpn * we need to add it as a duplicate to this entry * for future references */ list_for_each_entry(dqp, &entry->duplicates, list) { if (qpn == pqp->qpn) return 0; /* qp is already duplicated */ } /* add the qp as a duplicate on this index */ dqp = kmalloc(sizeof *dqp, GFP_KERNEL); if (!dqp) return -ENOMEM; dqp->qpn = qpn; list_add_tail(&dqp->list, &entry->duplicates); return 0; } /* Check whether a qpn is a duplicate on steering entry * If so, it should not be removed from mgm */ static bool check_duplicate_entry(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, unsigned int index, u32 qpn) { struct mlx4_steer *s_steer; struct mlx4_steer_index *tmp_entry, *entry = NULL; struct mlx4_promisc_qp *dqp, *tmp_dqp; s_steer = &mlx4_priv(dev)->steer[port - 1]; /* if qp is not promisc, it cannot be duplicated */ if (!get_promisc_qp(dev, port, steer, qpn)) return false; /* The qp is promisc qp so it is a duplicate on this index * Find the index entry, and remove the duplicate */ list_for_each_entry(tmp_entry, &s_steer->steer_entries[steer], list) { if (tmp_entry->index == index) { entry = tmp_entry; break; } } if (unlikely(!entry)) { mlx4_warn(dev, "Steering entry for index %x is not registered\n", index); return false; } list_for_each_entry_safe(dqp, tmp_dqp, &entry->duplicates, list) { if (dqp->qpn == qpn) { list_del(&dqp->list); kfree(dqp); } } return true; } /* I a steering entry contains only promisc QPs, it can be removed. */ static bool can_remove_steering_entry(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, unsigned int index, u32 tqpn) { struct mlx4_steer *s_steer; struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; struct mlx4_steer_index *entry = NULL, *tmp_entry; u32 qpn; u32 members_count; bool ret = false; int i; s_steer = &mlx4_priv(dev)->steer[port - 1]; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return false; mgm = mailbox->buf; if (mlx4_READ_ENTRY(dev, index, mailbox)) goto out; members_count = be32_to_cpu(mgm->members_count) & 0xffffff; for (i = 0; i < members_count; i++) { qpn = be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK; if (!get_promisc_qp(dev, port, steer, qpn) && qpn != tqpn) { /* the qp is not promisc, the entry can't be removed */ goto out; } } /* All the qps currently registered for this entry are promiscuous, * Checking for duplicates */ ret = true; list_for_each_entry_safe(entry, tmp_entry, &s_steer->steer_entries[steer], list) { if (entry->index == index) { if (list_empty(&entry->duplicates) || members_count == 1) { struct mlx4_promisc_qp *pqp, *tmp_pqp; /* * If there is only 1 entry in duplicates than * this is the QP we want to delete, going over * the list and deleting the entry. */ list_del(&entry->list); list_for_each_entry_safe(pqp, tmp_pqp, &entry->duplicates, list) { list_del(&pqp->list); kfree(pqp); } kfree(entry); } else { /* This entry contains duplicates so it shouldn't be removed */ ret = false; goto out; } } } out: mlx4_free_cmd_mailbox(dev, mailbox); return ret; } static int add_promisc_qp(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, u32 qpn) { struct mlx4_steer *s_steer; struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; struct mlx4_steer_index *entry; struct mlx4_promisc_qp *pqp; struct mlx4_promisc_qp *dqp; u32 members_count; u32 prot; int i; bool found; int err; struct mlx4_priv *priv = mlx4_priv(dev); s_steer = &mlx4_priv(dev)->steer[port - 1]; mutex_lock(&priv->mcg_table.mutex); if (get_promisc_qp(dev, port, steer, qpn)) { err = 0; /* Noting to do, already exists */ goto out_mutex; } pqp = kmalloc(sizeof *pqp, GFP_KERNEL); if (!pqp) { err = -ENOMEM; goto out_mutex; } pqp->qpn = qpn; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { err = -ENOMEM; goto out_alloc; } mgm = mailbox->buf; /* the promisc qp needs to be added for each one of the steering * entries, if it already exists, needs to be added as a duplicate * for this entry */ list_for_each_entry(entry, &s_steer->steer_entries[steer], list) { err = mlx4_READ_ENTRY(dev, entry->index, mailbox); if (err) goto out_mailbox; members_count = be32_to_cpu(mgm->members_count) & 0xffffff; prot = be32_to_cpu(mgm->members_count) >> 30; found = false; for (i = 0; i < members_count; i++) { if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qpn) { /* Entry already exists, add to duplicates */ dqp = kmalloc(sizeof *dqp, GFP_KERNEL); if (!dqp) { err = -ENOMEM; goto out_mailbox; } dqp->qpn = qpn; list_add_tail(&dqp->list, &entry->duplicates); found = true; } } if (!found) { /* Need to add the qpn to mgm */ if (members_count == dev->caps.num_qp_per_mgm) { /* entry is full */ err = -ENOMEM; goto out_mailbox; } mgm->qp[members_count++] = cpu_to_be32(qpn & MGM_QPN_MASK); mgm->members_count = cpu_to_be32(members_count | (prot << 30)); err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox); if (err) goto out_mailbox; } } /* add the new qpn to list of promisc qps */ list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]); /* now need to add all the promisc qps to default entry */ memset(mgm, 0, sizeof *mgm); members_count = 0; list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list) { if (members_count == dev->caps.num_qp_per_mgm) { /* entry is full */ err = -ENOMEM; goto out_list; } mgm->qp[members_count++] = cpu_to_be32(dqp->qpn & MGM_QPN_MASK); } mgm->members_count = cpu_to_be32(members_count | MLX4_PROT_ETH << 30); err = mlx4_WRITE_PROMISC(dev, port, steer, mailbox); if (err) goto out_list; mlx4_free_cmd_mailbox(dev, mailbox); mutex_unlock(&priv->mcg_table.mutex); return 0; out_list: list_del(&pqp->list); out_mailbox: mlx4_free_cmd_mailbox(dev, mailbox); out_alloc: kfree(pqp); out_mutex: mutex_unlock(&priv->mcg_table.mutex); return err; } static int remove_promisc_qp(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, u32 qpn) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_steer *s_steer; struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; struct mlx4_steer_index *entry; struct mlx4_promisc_qp *pqp; struct mlx4_promisc_qp *dqp; u32 members_count; bool found; bool back_to_list = false; int i, loc = -1; int err; s_steer = &mlx4_priv(dev)->steer[port - 1]; mutex_lock(&priv->mcg_table.mutex); pqp = get_promisc_qp(dev, port, steer, qpn); if (unlikely(!pqp)) { mlx4_warn(dev, "QP %x is not promiscuous QP\n", qpn); /* nothing to do */ err = 0; goto out_mutex; } /*remove from list of promisc qps */ list_del(&pqp->list); /* set the default entry not to include the removed one */ mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { err = -ENOMEM; back_to_list = true; goto out_list; } mgm = mailbox->buf; memset(mgm, 0, sizeof *mgm); members_count = 0; list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list) mgm->qp[members_count++] = cpu_to_be32(dqp->qpn & MGM_QPN_MASK); mgm->members_count = cpu_to_be32(members_count | MLX4_PROT_ETH << 30); err = mlx4_WRITE_PROMISC(dev, port, steer, mailbox); if (err) goto out_mailbox; /* remove the qp from all the steering entries*/ list_for_each_entry(entry, &s_steer->steer_entries[steer], list) { found = false; list_for_each_entry(dqp, &entry->duplicates, list) { if (dqp->qpn == qpn) { found = true; break; } } if (found) { /* a duplicate, no need to change the mgm, * only update the duplicates list */ list_del(&dqp->list); kfree(dqp); } else { err = mlx4_READ_ENTRY(dev, entry->index, mailbox); if (err) goto out_mailbox; members_count = be32_to_cpu(mgm->members_count) & 0xffffff; for (i = 0; i < members_count; ++i) if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qpn) { loc = i; break; } if (loc < 0) { mlx4_err(dev, "QP %06x wasn't found in entry %d\n", qpn, entry->index); err = -EINVAL; goto out_mailbox; } /* copy the last QP in this MGM over removed QP */ mgm->qp[loc] = mgm->qp[members_count - 1]; mgm->qp[members_count - 1] = 0; mgm->members_count = cpu_to_be32(--members_count | (MLX4_PROT_ETH << 30)); err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox); if (err) goto out_mailbox; } } out_mailbox: mlx4_free_cmd_mailbox(dev, mailbox); out_list: if (back_to_list) list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]); else kfree(pqp); out_mutex: mutex_unlock(&priv->mcg_table.mutex); return err; } /* * Caller must hold MCG table semaphore. gid and mgm parameters must * be properly aligned for command interface. * * Returns 0 unless a firmware command error occurs. * * If GID is found in MGM or MGM is empty, *index = *hash, *prev = -1 * and *mgm holds MGM entry. * * if GID is found in AMGM, *index = index in AMGM, *prev = index of * previous entry in hash chain and *mgm holds AMGM entry. * * If no AMGM exists for given gid, *index = -1, *prev = index of last * entry in hash chain and *mgm holds end of hash chain. */ static int find_entry(struct mlx4_dev *dev, u8 port, u8 *gid, enum mlx4_protocol prot, struct mlx4_cmd_mailbox *mgm_mailbox, int *prev, int *index) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm = mgm_mailbox->buf; u8 *mgid; int err; u16 hash; u8 op_mod = (prot == MLX4_PROT_ETH) ? !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) : 0; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return -ENOMEM; mgid = mailbox->buf; memcpy(mgid, gid, 16); err = mlx4_GID_HASH(dev, mailbox, &hash, op_mod); mlx4_free_cmd_mailbox(dev, mailbox); if (err) return err; if (0) mlx4_dbg(dev, "Hash for %pI6 is %04x\n", gid, hash); *index = hash; *prev = -1; do { err = mlx4_READ_ENTRY(dev, *index, mgm_mailbox); if (err) return err; if (!(be32_to_cpu(mgm->members_count) & 0xffffff)) { if (*index != hash) { mlx4_err(dev, "Found zero MGID in AMGM.\n"); err = -EINVAL; } return err; } if (!memcmp(mgm->gid, gid, 16) && be32_to_cpu(mgm->members_count) >> 30 == prot) return err; *prev = *index; *index = be32_to_cpu(mgm->next_gid_index) >> 6; } while (*index); *index = -1; return err; } static void trans_rule_ctrl_to_hw(struct mlx4_net_trans_rule *ctrl, struct mlx4_net_trans_rule_hw_ctrl *hw) { static const u8 __promisc_mode[] = { [MLX4_FS_REGULAR] = 0x0, [MLX4_FS_ALL_DEFAULT] = 0x1, [MLX4_FS_MC_DEFAULT] = 0x3, [MLX4_FS_UC_SNIFFER] = 0x4, [MLX4_FS_MC_SNIFFER] = 0x5, }; u32 dw = 0; dw = ctrl->queue_mode == MLX4_NET_TRANS_Q_LIFO ? 1 : 0; dw |= ctrl->exclusive ? (1 << 2) : 0; dw |= ctrl->allow_loopback ? (1 << 3) : 0; dw |= __promisc_mode[ctrl->promisc_mode] << 8; dw |= ctrl->priority << 16; hw->ctrl = cpu_to_be32(dw); hw->port = ctrl->port; hw->qpn = cpu_to_be32(ctrl->qpn); } const u16 __sw_id_hw[] = { [MLX4_NET_TRANS_RULE_ID_ETH] = 0xE001, [MLX4_NET_TRANS_RULE_ID_IB] = 0xE005, [MLX4_NET_TRANS_RULE_ID_IPV6] = 0xE003, [MLX4_NET_TRANS_RULE_ID_IPV4] = 0xE002, [MLX4_NET_TRANS_RULE_ID_TCP] = 0xE004, [MLX4_NET_TRANS_RULE_ID_UDP] = 0xE006 }; static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec, struct _rule_hw *rule_hw) { static const size_t __rule_hw_sz[] = { [MLX4_NET_TRANS_RULE_ID_ETH] = sizeof(struct mlx4_net_trans_rule_hw_eth), [MLX4_NET_TRANS_RULE_ID_IB] = sizeof(struct mlx4_net_trans_rule_hw_ib), [MLX4_NET_TRANS_RULE_ID_IPV6] = 0, [MLX4_NET_TRANS_RULE_ID_IPV4] = sizeof(struct mlx4_net_trans_rule_hw_ipv4), [MLX4_NET_TRANS_RULE_ID_TCP] = sizeof(struct mlx4_net_trans_rule_hw_tcp_udp), [MLX4_NET_TRANS_RULE_ID_UDP] = sizeof(struct mlx4_net_trans_rule_hw_tcp_udp) }; if (spec->id >= MLX4_NET_TRANS_RULE_NUM) { mlx4_err(dev, "Invalid network rule id. id = %d\n", spec->id); return -EINVAL; } memset(rule_hw, 0, __rule_hw_sz[spec->id]); rule_hw->id = cpu_to_be16(__sw_id_hw[spec->id]); rule_hw->size = __rule_hw_sz[spec->id] >> 2; switch (spec->id) { case MLX4_NET_TRANS_RULE_ID_ETH: memcpy(rule_hw->eth.dst_mac, spec->eth.dst_mac, ETH_ALEN); memcpy(rule_hw->eth.dst_mac_msk, spec->eth.dst_mac_msk, ETH_ALEN); memcpy(rule_hw->eth.src_mac, spec->eth.src_mac, ETH_ALEN); memcpy(rule_hw->eth.src_mac_msk, spec->eth.src_mac_msk, ETH_ALEN); if (spec->eth.ether_type_enable) { rule_hw->eth.ether_type_enable = 1; rule_hw->eth.ether_type = spec->eth.ether_type; } rule_hw->eth.vlan_id = spec->eth.vlan_id; rule_hw->eth.vlan_id_msk = spec->eth.vlan_id_msk; break; case MLX4_NET_TRANS_RULE_ID_IB: rule_hw->ib.r_u_qpn = spec->ib.r_u_qpn; rule_hw->ib.qpn_mask = spec->ib.qpn_msk; memcpy(&rule_hw->ib.dst_gid, &spec->ib.dst_gid, 16); memcpy(&rule_hw->ib.dst_gid_msk, &spec->ib.dst_gid_msk, 16); break; case MLX4_NET_TRANS_RULE_ID_IPV6: return -EOPNOTSUPP; case MLX4_NET_TRANS_RULE_ID_IPV4: rule_hw->ipv4.src_ip = spec->ipv4.src_ip; rule_hw->ipv4.src_ip_msk = spec->ipv4.src_ip_msk; rule_hw->ipv4.dst_ip = spec->ipv4.dst_ip; rule_hw->ipv4.dst_ip_msk = spec->ipv4.dst_ip_msk; break; case MLX4_NET_TRANS_RULE_ID_TCP: case MLX4_NET_TRANS_RULE_ID_UDP: rule_hw->tcp_udp.dst_port = spec->tcp_udp.dst_port; rule_hw->tcp_udp.dst_port_msk = spec->tcp_udp.dst_port_msk; rule_hw->tcp_udp.src_port = spec->tcp_udp.src_port; rule_hw->tcp_udp.src_port_msk = spec->tcp_udp.src_port_msk; break; default: return -EINVAL; } return __rule_hw_sz[spec->id]; } static void mlx4_err_rule(struct mlx4_dev *dev, char *str, struct mlx4_net_trans_rule *rule) { #define BUF_SIZE 256 struct mlx4_spec_list *cur; char buf[BUF_SIZE]; int len = 0; mlx4_err(dev, "%s", str); len += snprintf(buf + len, BUF_SIZE - len, "port = %d prio = 0x%x qp = 0x%x ", rule->port, rule->priority, rule->qpn); list_for_each_entry(cur, &rule->list, list) { switch (cur->id) { case MLX4_NET_TRANS_RULE_ID_ETH: len += snprintf(buf + len, BUF_SIZE - len, "dmac = %pM ", &cur->eth.dst_mac); if (cur->eth.ether_type) len += snprintf(buf + len, BUF_SIZE - len, "ethertype = 0x%x ", be16_to_cpu(cur->eth.ether_type)); if (cur->eth.vlan_id) len += snprintf(buf + len, BUF_SIZE - len, "vlan-id = %d ", be16_to_cpu(cur->eth.vlan_id)); break; case MLX4_NET_TRANS_RULE_ID_IPV4: if (cur->ipv4.src_ip) len += snprintf(buf + len, BUF_SIZE - len, "src-ip = %pI4 ", &cur->ipv4.src_ip); if (cur->ipv4.dst_ip) len += snprintf(buf + len, BUF_SIZE - len, "dst-ip = %pI4 ", &cur->ipv4.dst_ip); break; case MLX4_NET_TRANS_RULE_ID_TCP: case MLX4_NET_TRANS_RULE_ID_UDP: if (cur->tcp_udp.src_port) len += snprintf(buf + len, BUF_SIZE - len, "src-port = %d ", be16_to_cpu(cur->tcp_udp.src_port)); if (cur->tcp_udp.dst_port) len += snprintf(buf + len, BUF_SIZE - len, "dst-port = %d ", be16_to_cpu(cur->tcp_udp.dst_port)); break; case MLX4_NET_TRANS_RULE_ID_IB: len += snprintf(buf + len, BUF_SIZE - len, "dst-gid = %pI6\n", cur->ib.dst_gid); len += snprintf(buf + len, BUF_SIZE - len, "dst-gid-mask = %pI6\n", cur->ib.dst_gid_msk); break; case MLX4_NET_TRANS_RULE_ID_IPV6: break; default: break; } } len += snprintf(buf + len, BUF_SIZE - len, "\n"); mlx4_err(dev, "%s", buf); if (len >= BUF_SIZE) mlx4_err(dev, "Network rule error message was truncated, print buffer is too small.\n"); } int mlx4_flow_attach(struct mlx4_dev *dev, struct mlx4_net_trans_rule *rule, u64 *reg_id) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_spec_list *cur; u32 size = 0; int ret; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); memset(mailbox->buf, 0, sizeof(struct mlx4_net_trans_rule_hw_ctrl)); trans_rule_ctrl_to_hw(rule, mailbox->buf); size += sizeof(struct mlx4_net_trans_rule_hw_ctrl); list_for_each_entry(cur, &rule->list, list) { ret = parse_trans_rule(dev, cur, mailbox->buf + size); if (ret < 0) { mlx4_free_cmd_mailbox(dev, mailbox); return -EINVAL; } size += ret; } ret = mlx4_QP_FLOW_STEERING_ATTACH(dev, mailbox, size >> 2, reg_id); if (ret == -ENOMEM) mlx4_err_rule(dev, "mcg table is full. Fail to register network rule.\n", rule); else if (ret) mlx4_err_rule(dev, "Fail to register network rule.\n", rule); mlx4_free_cmd_mailbox(dev, mailbox); return ret; } EXPORT_SYMBOL_GPL(mlx4_flow_attach); int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id) { int err; err = mlx4_QP_FLOW_STEERING_DETACH(dev, reg_id); if (err) mlx4_err(dev, "Fail to detach network rule. registration id = 0x%llx\n", - reg_id); + (long long)reg_id); return err; } EXPORT_SYMBOL_GPL(mlx4_flow_detach); int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, u32 max_range_qpn) { int err; u64 in_param; in_param = ((u64) min_range_qpn) << 32; in_param |= ((u64) max_range_qpn) & 0xFFFFFFFF; err = mlx4_cmd(dev, in_param, 0, 0, MLX4_FLOW_STEERING_IB_UC_QP_RANGE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); return err; } EXPORT_SYMBOL_GPL(mlx4_FLOW_STEERING_IB_UC_QP_RANGE); int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot, enum mlx4_steer_type steer) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; u32 members_count; int index, prev; int link = 0; int i; int err; u8 port = gid[5]; u8 new_entry = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); mgm = mailbox->buf; mutex_lock(&priv->mcg_table.mutex); err = find_entry(dev, port, gid, prot, mailbox, &prev, &index); if (err) goto out; if (index != -1) { if (!(be32_to_cpu(mgm->members_count) & 0xffffff)) { new_entry = 1; memcpy(mgm->gid, gid, 16); } } else { link = 1; index = mlx4_bitmap_alloc(&priv->mcg_table.bitmap); if (index == -1) { mlx4_err(dev, "No AMGM entries left\n"); err = -ENOMEM; goto out; } index += dev->caps.num_mgms; new_entry = 1; memset(mgm, 0, sizeof *mgm); memcpy(mgm->gid, gid, 16); } members_count = be32_to_cpu(mgm->members_count) & 0xffffff; if (members_count == dev->caps.num_qp_per_mgm) { mlx4_err(dev, "MGM at index %x is full.\n", index); err = -ENOMEM; goto out; } for (i = 0; i < members_count; ++i) if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn) { mlx4_dbg(dev, "QP %06x already a member of MGM\n", qp->qpn); err = 0; goto out; } mgm->qp[members_count++] = cpu_to_be32((qp->qpn & MGM_QPN_MASK) | (!!mlx4_blck_lb << MGM_BLCK_LB_BIT)); mgm->members_count = cpu_to_be32(members_count | (u32) prot << 30); err = mlx4_WRITE_ENTRY(dev, index, mailbox); if (err) goto out; if (!link) goto out; err = mlx4_READ_ENTRY(dev, prev, mailbox); if (err) goto out; mgm->next_gid_index = cpu_to_be32(index << 6); err = mlx4_WRITE_ENTRY(dev, prev, mailbox); if (err) goto out; if (prot == MLX4_PROT_ETH) { /* manage the steering entry for promisc mode */ if (new_entry) new_steering_entry(dev, port, steer, index, qp->qpn); else existing_steering_entry(dev, port, steer, index, qp->qpn); } out: if (err && link && index != -1) { if (index < dev->caps.num_mgms) mlx4_warn(dev, "Got AMGM index %d < %d", index, dev->caps.num_mgms); else mlx4_bitmap_free(&priv->mcg_table.bitmap, index - dev->caps.num_mgms); } mutex_unlock(&priv->mcg_table.mutex); mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot, enum mlx4_steer_type steer) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; u32 members_count; int prev, index; int i, loc = -1; int err; u8 port = gid[5]; bool removed_entry = false; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); mgm = mailbox->buf; mutex_lock(&priv->mcg_table.mutex); err = find_entry(dev, port, gid, prot, mailbox, &prev, &index); if (err) goto out; if (index == -1) { mlx4_err(dev, "MGID %pI6 not found\n", gid); err = -EINVAL; goto out; } /* if this pq is also a promisc qp, it shouldn't be removed */ if (prot == MLX4_PROT_ETH && check_duplicate_entry(dev, port, steer, index, qp->qpn)) goto out; members_count = be32_to_cpu(mgm->members_count) & 0xffffff; for (i = 0; i < members_count; ++i) if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn) { loc = i; break; } if (loc == -1) { mlx4_err(dev, "QP %06x not found in MGM\n", qp->qpn); err = -EINVAL; goto out; } /* copy the last QP in this MGM over removed QP */ mgm->qp[loc] = mgm->qp[members_count - 1]; mgm->qp[members_count - 1] = 0; mgm->members_count = cpu_to_be32(--members_count | (u32) prot << 30); if (prot == MLX4_PROT_ETH) removed_entry = can_remove_steering_entry(dev, port, steer, index, qp->qpn); if (members_count && (prot != MLX4_PROT_ETH || !removed_entry)) { err = mlx4_WRITE_ENTRY(dev, index, mailbox); goto out; } /* We are going to delete the entry, members count should be 0 */ mgm->members_count = cpu_to_be32((u32) prot << 30); if (prev == -1) { /* Remove entry from MGM */ int amgm_index = be32_to_cpu(mgm->next_gid_index) >> 6; if (amgm_index) { err = mlx4_READ_ENTRY(dev, amgm_index, mailbox); if (err) goto out; } else memset(mgm->gid, 0, 16); err = mlx4_WRITE_ENTRY(dev, index, mailbox); if (err) goto out; if (amgm_index) { if (amgm_index < dev->caps.num_mgms) mlx4_warn(dev, "MGM entry %d had AMGM index %d < %d", index, amgm_index, dev->caps.num_mgms); else mlx4_bitmap_free(&priv->mcg_table.bitmap, amgm_index - dev->caps.num_mgms); } } else { /* Remove entry from AMGM */ int cur_next_index = be32_to_cpu(mgm->next_gid_index) >> 6; err = mlx4_READ_ENTRY(dev, prev, mailbox); if (err) goto out; mgm->next_gid_index = cpu_to_be32(cur_next_index << 6); err = mlx4_WRITE_ENTRY(dev, prev, mailbox); if (err) goto out; if (index < dev->caps.num_mgms) mlx4_warn(dev, "entry %d had next AMGM index %d < %d", prev, index, dev->caps.num_mgms); else mlx4_bitmap_free(&priv->mcg_table.bitmap, index - dev->caps.num_mgms); } out: mutex_unlock(&priv->mcg_table.mutex); mlx4_free_cmd_mailbox(dev, mailbox); return err; } static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], u8 attach, u8 block_loopback, enum mlx4_protocol prot) { struct mlx4_cmd_mailbox *mailbox; int err = 0; int qpn; if (!mlx4_is_mfunc(dev)) return -EBADF; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); memcpy(mailbox->buf, gid, 16); qpn = qp->qpn; qpn |= (prot << 28); if (attach && block_loopback) qpn |= (1U << 31); err = mlx4_cmd(dev, mailbox->dma, qpn, attach, MLX4_CMD_QP_ATTACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], u8 port, int block_mcast_loopback, enum mlx4_protocol prot, u64 *reg_id) { switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_A0: if (prot == MLX4_PROT_ETH) return 0; case MLX4_STEERING_MODE_B0: if (prot == MLX4_PROT_ETH) gid[7] |= (MLX4_MC_STEER << 1); if (mlx4_is_mfunc(dev)) return mlx4_QP_ATTACH(dev, qp, gid, 1, block_mcast_loopback, prot); return mlx4_qp_attach_common(dev, qp, gid, block_mcast_loopback, prot, MLX4_MC_STEER); case MLX4_STEERING_MODE_DEVICE_MANAGED: { struct mlx4_spec_list spec = { {NULL} }; __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); struct mlx4_net_trans_rule rule = { .queue_mode = MLX4_NET_TRANS_Q_FIFO, .exclusive = 0, .promisc_mode = MLX4_FS_REGULAR, .priority = MLX4_DOMAIN_NIC, }; rule.allow_loopback = !block_mcast_loopback; rule.port = port; rule.qpn = qp->qpn; INIT_LIST_HEAD(&rule.list); switch (prot) { case MLX4_PROT_ETH: spec.id = MLX4_NET_TRANS_RULE_ID_ETH; memcpy(spec.eth.dst_mac, &gid[10], ETH_ALEN); memcpy(spec.eth.dst_mac_msk, &mac_mask, ETH_ALEN); break; case MLX4_PROT_IB_IPV6: spec.id = MLX4_NET_TRANS_RULE_ID_IB; memcpy(spec.ib.dst_gid, gid, 16); memset(&spec.ib.dst_gid_msk, 0xff, 16); break; default: return -EINVAL; } list_add_tail(&spec.list, &rule.list); return mlx4_flow_attach(dev, &rule, reg_id); } default: return -EINVAL; } } EXPORT_SYMBOL_GPL(mlx4_multicast_attach); int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot, u64 reg_id) { switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_A0: if (prot == MLX4_PROT_ETH) return 0; case MLX4_STEERING_MODE_B0: if (prot == MLX4_PROT_ETH) gid[7] |= (MLX4_MC_STEER << 1); if (mlx4_is_mfunc(dev)) return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot); return mlx4_qp_detach_common(dev, qp, gid, prot, MLX4_MC_STEER); case MLX4_STEERING_MODE_DEVICE_MANAGED: return mlx4_flow_detach(dev, reg_id); default: return -EINVAL; } } EXPORT_SYMBOL_GPL(mlx4_multicast_detach); int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn, enum mlx4_net_trans_promisc_mode mode) { struct mlx4_net_trans_rule rule; u64 *regid_p; switch (mode) { case MLX4_FS_ALL_DEFAULT: regid_p = &dev->regid_promisc_array[port]; break; case MLX4_FS_MC_DEFAULT: regid_p = &dev->regid_allmulti_array[port]; break; default: return -1; } if (*regid_p != 0) return -1; rule.promisc_mode = mode; rule.port = port; rule.qpn = qpn; INIT_LIST_HEAD(&rule.list); mlx4_err(dev, "going promisc on %x\n", port); return mlx4_flow_attach(dev, &rule, regid_p); } EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_add); int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port, enum mlx4_net_trans_promisc_mode mode) { int ret; u64 *regid_p; switch (mode) { case MLX4_FS_ALL_DEFAULT: regid_p = &dev->regid_promisc_array[port]; break; case MLX4_FS_MC_DEFAULT: regid_p = &dev->regid_allmulti_array[port]; break; default: return -1; } if (*regid_p == 0) return -1; ret = mlx4_flow_detach(dev, *regid_p); if (ret == 0) *regid_p = 0; return ret; } EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_remove); int mlx4_unicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot) { if (prot == MLX4_PROT_ETH) gid[7] |= (MLX4_UC_STEER << 1); if (mlx4_is_mfunc(dev)) return mlx4_QP_ATTACH(dev, qp, gid, 1, block_mcast_loopback, prot); return mlx4_qp_attach_common(dev, qp, gid, block_mcast_loopback, prot, MLX4_UC_STEER); } EXPORT_SYMBOL_GPL(mlx4_unicast_attach); int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot) { if (prot == MLX4_PROT_ETH) gid[7] |= (MLX4_UC_STEER << 1); if (mlx4_is_mfunc(dev)) return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot); return mlx4_qp_detach_common(dev, qp, gid, prot, MLX4_UC_STEER); } EXPORT_SYMBOL_GPL(mlx4_unicast_detach); int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { u32 qpn = (u32) vhcr->in_param & 0xffffffff; u8 port = vhcr->in_param >> 62; enum mlx4_steer_type steer = vhcr->in_modifier; /* Promiscuous unicast is not allowed in mfunc */ if (mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER) return 0; if (vhcr->op_modifier) return add_promisc_qp(dev, port, steer, qpn); else return remove_promisc_qp(dev, port, steer, qpn); } static int mlx4_PROMISC(struct mlx4_dev *dev, u32 qpn, enum mlx4_steer_type steer, u8 add, u8 port) { return mlx4_cmd(dev, (u64) qpn | (u64) port << 62, (u32) steer, add, MLX4_CMD_PROMISC, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port) { if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 1, port); return add_promisc_qp(dev, port, MLX4_MC_STEER, qpn); } EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_add); int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port) { if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 0, port); return remove_promisc_qp(dev, port, MLX4_MC_STEER, qpn); } EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_remove); int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port) { if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 1, port); return add_promisc_qp(dev, port, MLX4_UC_STEER, qpn); } EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_add); int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port) { if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 0, port); return remove_promisc_qp(dev, port, MLX4_UC_STEER, qpn); } EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_remove); int mlx4_init_mcg_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int err; /* No need for mcg_table when fw managed the mcg table*/ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) return 0; err = mlx4_bitmap_init(&priv->mcg_table.bitmap, dev->caps.num_amgms, dev->caps.num_amgms - 1, 0, 0); if (err) return err; mutex_init(&priv->mcg_table.mutex); return 0; } void mlx4_cleanup_mcg_table(struct mlx4_dev *dev) { if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) mlx4_bitmap_cleanup(&mlx4_priv(dev)->mcg_table.bitmap); } Index: stable/10/sys/ofed/drivers/net/mlx4/mr.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/mr.c (revision 271126) +++ stable/10/sys/ofed/drivers/net/mlx4/mr.c (revision 271127) @@ -1,899 +1,898 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include #include #include #include #include #include #include "mlx4.h" #include "icm.h" #define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28) #define MLX4_MPT_FLAG_FREE (0x3UL << 28) #define MLX4_MPT_FLAG_MIO (1 << 17) #define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15) #define MLX4_MPT_FLAG_PHYSICAL (1 << 9) #define MLX4_MPT_FLAG_REGION (1 << 8) #define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27) #define MLX4_MPT_PD_FLAG_RAE (1 << 28) #define MLX4_MPT_PD_FLAG_EN_INV (3 << 24) #define MLX4_MPT_STATUS_SW 0xF0 #define MLX4_MPT_STATUS_HW 0x00 static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order) { int o; int m; u32 seg; spin_lock(&buddy->lock); for (o = order; o <= buddy->max_order; ++o) if (buddy->num_free[o]) { m = 1 << (buddy->max_order - o); seg = find_first_bit(buddy->bits[o], m); if (seg < m) goto found; } spin_unlock(&buddy->lock); return -1; found: clear_bit(seg, buddy->bits[o]); --buddy->num_free[o]; while (o > order) { --o; seg <<= 1; set_bit(seg ^ 1, buddy->bits[o]); ++buddy->num_free[o]; } spin_unlock(&buddy->lock); seg <<= order; return seg; } static void mlx4_buddy_free(struct mlx4_buddy *buddy, u32 seg, int order) { seg >>= order; spin_lock(&buddy->lock); while (test_bit(seg ^ 1, buddy->bits[order])) { clear_bit(seg ^ 1, buddy->bits[order]); --buddy->num_free[order]; seg >>= 1; ++order; } set_bit(seg, buddy->bits[order]); ++buddy->num_free[order]; spin_unlock(&buddy->lock); } static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) { int i, s; buddy->max_order = max_order; spin_lock_init(&buddy->lock); buddy->bits = kcalloc(buddy->max_order + 1, sizeof (long *), GFP_KERNEL); buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free, GFP_KERNEL); if (!buddy->bits || !buddy->num_free) goto err_out; for (i = 0; i <= buddy->max_order; ++i) { s = BITS_TO_LONGS(1 << (buddy->max_order - i)); buddy->bits[i] = kcalloc(s, sizeof (long), GFP_KERNEL | __GFP_NOWARN); if (!buddy->bits[i]) { goto err_out_free; } } set_bit(0, buddy->bits[buddy->max_order]); buddy->num_free[buddy->max_order] = 1; return 0; err_out_free: for (i = 0; i <= buddy->max_order; ++i) if ( buddy->bits[i] ) kfree(buddy->bits[i]); err_out: kfree(buddy->bits); kfree(buddy->num_free); return -ENOMEM; } static void mlx4_buddy_cleanup(struct mlx4_buddy *buddy) { int i; for (i = 0; i <= buddy->max_order; ++i) kfree(buddy->bits[i]); kfree(buddy->bits); kfree(buddy->num_free); } u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order) { struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; u32 seg; int seg_order; u32 offset; seg_order = max_t(int, order - log_mtts_per_seg, 0); seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, seg_order); if (seg == -1) return -1; offset = seg * (1 << log_mtts_per_seg); if (mlx4_table_get_range(dev, &mr_table->mtt_table, offset, offset + (1 << order) - 1)) { mlx4_buddy_free(&mr_table->mtt_buddy, seg, seg_order); return -1; } return offset; } static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order) { u64 in_param = 0; u64 out_param; int err; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, order); err = mlx4_cmd_imm(dev, in_param, &out_param, RES_MTT, RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) return -1; return get_param_l(&out_param); } return __mlx4_alloc_mtt_range(dev, order); } int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift, struct mlx4_mtt *mtt) { int i; if (!npages) { mtt->order = -1; mtt->page_shift = MLX4_ICM_PAGE_SHIFT; return 0; } else mtt->page_shift = page_shift; for (mtt->order = 0, i = 1; i < npages; i <<= 1) ++mtt->order; mtt->offset = mlx4_alloc_mtt_range(dev, mtt->order); if (mtt->offset == -1) { mlx4_err(dev, "Failed to allocate mtts for %d pages(order %d)\n", npages, mtt->order); return -ENOMEM; } return 0; } EXPORT_SYMBOL_GPL(mlx4_mtt_init); void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order) { u32 first_seg; int seg_order; struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; seg_order = max_t(int, order - log_mtts_per_seg, 0); first_seg = offset / (1 << log_mtts_per_seg); mlx4_buddy_free(&mr_table->mtt_buddy, first_seg, seg_order); mlx4_table_put_range(dev, &mr_table->mtt_table, offset, offset + (1 << order) - 1); } static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order) { u64 in_param = 0; int err; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, offset); set_param_h(&in_param, order); err = mlx4_cmd(dev, in_param, RES_MTT, RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) mlx4_warn(dev, "Failed to free mtt range at:" "%d order:%d\n", offset, order); return; } __mlx4_free_mtt_range(dev, offset, order); } void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt) { if (mtt->order < 0) return; mlx4_free_mtt_range(dev, mtt->offset, mtt->order); } EXPORT_SYMBOL_GPL(mlx4_mtt_cleanup); u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt) { return (u64) mtt->offset * dev->caps.mtt_entry_sz; } EXPORT_SYMBOL_GPL(mlx4_mtt_addr); static u32 hw_index_to_key(u32 ind) { return (ind >> 24) | (ind << 8); } static u32 key_to_hw_index(u32 key) { return (key << 24) | (key >> 8); } static int mlx4_SW2HW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, int mpt_index) { return mlx4_cmd(dev, mailbox->dma, mpt_index, 0, MLX4_CMD_SW2HW_MPT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); } static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, int mpt_index) { return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index, !mailbox, MLX4_CMD_HW2SW_MPT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); } static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd, u64 iova, u64 size, u32 access, int npages, int page_shift, struct mlx4_mr *mr) { mr->iova = iova; mr->size = size; mr->pd = pd; mr->access = access; mr->enabled = MLX4_MR_DISABLED; mr->key = hw_index_to_key(mridx); return mlx4_mtt_init(dev, npages, page_shift, &mr->mtt); } static int mlx4_WRITE_MTT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, int num_entries) { return mlx4_cmd(dev, mailbox->dma, num_entries, 0, MLX4_CMD_WRITE_MTT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } int __mlx4_mr_reserve(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); return mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap); } static int mlx4_mr_reserve(struct mlx4_dev *dev) { u64 out_param; if (mlx4_is_mfunc(dev)) { if (mlx4_cmd_imm(dev, 0, &out_param, RES_MPT, RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED)) return -1; return get_param_l(&out_param); } return __mlx4_mr_reserve(dev); } void __mlx4_mr_release(struct mlx4_dev *dev, u32 index) { struct mlx4_priv *priv = mlx4_priv(dev); mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index); } static void mlx4_mr_release(struct mlx4_dev *dev, u32 index) { u64 in_param = 0; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, index); if (mlx4_cmd(dev, in_param, RES_MPT, RES_OP_RESERVE, MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED)) mlx4_warn(dev, "Failed to release mr index:%d\n", index); return; } __mlx4_mr_release(dev, index); } int __mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index) { struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; return mlx4_table_get(dev, &mr_table->dmpt_table, index); } static int mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index) { u64 param = 0; if (mlx4_is_mfunc(dev)) { set_param_l(¶m, index); return mlx4_cmd_imm(dev, param, ¶m, RES_MPT, RES_OP_MAP_ICM, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } return __mlx4_mr_alloc_icm(dev, index); } void __mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index) { struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; mlx4_table_put(dev, &mr_table->dmpt_table, index); } static void mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index) { u64 in_param = 0; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, index); if (mlx4_cmd(dev, in_param, RES_MPT, RES_OP_MAP_ICM, MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED)) mlx4_warn(dev, "Failed to free icm of mr index:%d\n", index); return; } return __mlx4_mr_free_icm(dev, index); } int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, int npages, int page_shift, struct mlx4_mr *mr) { u32 index; int err; index = mlx4_mr_reserve(dev); if (index == -1) return -ENOMEM; err = mlx4_mr_alloc_reserved(dev, index, pd, iova, size, access, npages, page_shift, mr); if (err) mlx4_mr_release(dev, index); return err; } EXPORT_SYMBOL_GPL(mlx4_mr_alloc); static void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr) { int err; if (mr->enabled == MLX4_MR_EN_HW) { err = mlx4_HW2SW_MPT(dev, NULL, key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1)); if (err) mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err); mr->enabled = MLX4_MR_EN_SW; } mlx4_mtt_cleanup(dev, &mr->mtt); } void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) { mlx4_mr_free_reserved(dev, mr); if (mr->enabled) mlx4_mr_free_icm(dev, key_to_hw_index(mr->key)); mlx4_mr_release(dev, key_to_hw_index(mr->key)); } EXPORT_SYMBOL_GPL(mlx4_mr_free); int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_mpt_entry *mpt_entry; int err; err = mlx4_mr_alloc_icm(dev, key_to_hw_index(mr->key)); if (err) return err; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { err = PTR_ERR(mailbox); goto err_table; } mpt_entry = mailbox->buf; memset(mpt_entry, 0, sizeof *mpt_entry); mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_MIO | MLX4_MPT_FLAG_REGION | mr->access); mpt_entry->key = cpu_to_be32(key_to_hw_index(mr->key)); mpt_entry->pd_flags = cpu_to_be32(mr->pd | MLX4_MPT_PD_FLAG_EN_INV); mpt_entry->start = cpu_to_be64(mr->iova); mpt_entry->length = cpu_to_be64(mr->size); mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift); if (mr->mtt.order < 0) { mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL); mpt_entry->mtt_addr = 0; } else { mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev, &mr->mtt)); } if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) { /* fast register MR in free state */ mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE); mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG | MLX4_MPT_PD_FLAG_RAE); mpt_entry->mtt_sz = cpu_to_be32(1 << mr->mtt.order); } else { mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS); } err = mlx4_SW2HW_MPT(dev, mailbox, key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1)); if (err) { mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err); goto err_cmd; } mr->enabled = MLX4_MR_EN_HW; mlx4_free_cmd_mailbox(dev, mailbox); return 0; err_cmd: mlx4_free_cmd_mailbox(dev, mailbox); err_table: mlx4_mr_free_icm(dev, key_to_hw_index(mr->key)); return err; } EXPORT_SYMBOL_GPL(mlx4_mr_enable); static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list) { struct mlx4_priv *priv = mlx4_priv(dev); __be64 *mtts; dma_addr_t dma_handle; int i; mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->offset + start_index, &dma_handle); if (!mtts) return -ENOMEM; dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE); for (i = 0; i < npages; ++i) mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); dma_sync_single_for_device(&dev->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE); return 0; } int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list) { int err = 0; int chunk; int mtts_per_page; int max_mtts_first_page; /* compute how may mtts fit in the first page */ mtts_per_page = PAGE_SIZE / sizeof(u64); max_mtts_first_page = mtts_per_page - (mtt->offset + start_index) % mtts_per_page; chunk = min_t(int, max_mtts_first_page, npages); while (npages > 0) { err = mlx4_write_mtt_chunk(dev, mtt, start_index, chunk, page_list); if (err) return err; npages -= chunk; start_index += chunk; page_list += chunk; chunk = min_t(int, mtts_per_page, npages); } return err; } int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list) { struct mlx4_cmd_mailbox *mailbox = NULL; __be64 *inbox = NULL; int chunk; int err = 0; int i; if (mtt->order < 0) return -EINVAL; if (mlx4_is_mfunc(dev)) { mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); inbox = mailbox->buf; while (npages > 0) { chunk = min_t(int, MLX4_MAILBOX_SIZE / sizeof(u64) - 2, npages); inbox[0] = cpu_to_be64(mtt->offset + start_index); inbox[1] = 0; for (i = 0; i < chunk; ++i) inbox[i + 2] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); err = mlx4_WRITE_MTT(dev, mailbox, chunk); if (err) { mlx4_free_cmd_mailbox(dev, mailbox); return err; } npages -= chunk; start_index += chunk; page_list += chunk; } mlx4_free_cmd_mailbox(dev, mailbox); return err; } return __mlx4_write_mtt(dev, mtt, start_index, npages, page_list); } EXPORT_SYMBOL_GPL(mlx4_write_mtt); int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, struct mlx4_buf *buf) { u64 *page_list; int err; int i; page_list = kmalloc(buf->npages * sizeof *page_list, GFP_KERNEL); if (!page_list) return -ENOMEM; for (i = 0; i < buf->npages; ++i) if (buf->nbufs == 1) page_list[i] = buf->direct.map + (i << buf->page_shift); else page_list[i] = buf->page_list[i].map; err = mlx4_write_mtt(dev, mtt, 0, buf->npages, page_list); kfree(page_list); return err; } EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt); int mlx4_init_mr_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_mr_table *mr_table = &priv->mr_table; int err; /* Nothing to do for slaves - all MR handling is forwarded * to the master */ if (mlx4_is_slave(dev)) return 0; if (!is_power_of_2(dev->caps.num_mpts)) return -EINVAL; err = mlx4_bitmap_init(&mr_table->mpt_bitmap, dev->caps.num_mpts, ~0, dev->caps.reserved_mrws, 0); if (err) return err; err = mlx4_buddy_init(&mr_table->mtt_buddy, ilog2((u32)dev->caps.num_mtts / (1 << log_mtts_per_seg))); if (err) goto err_buddy; if (dev->caps.reserved_mtts) { priv->reserved_mtts = mlx4_alloc_mtt_range(dev, fls(dev->caps.reserved_mtts - 1)); if (priv->reserved_mtts < 0) { mlx4_warn(dev, "MTT table of order %u is too small.\n", mr_table->mtt_buddy.max_order); err = -ENOMEM; goto err_reserve_mtts; } } return 0; err_reserve_mtts: mlx4_buddy_cleanup(&mr_table->mtt_buddy); err_buddy: mlx4_bitmap_cleanup(&mr_table->mpt_bitmap); return err; } void mlx4_cleanup_mr_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_mr_table *mr_table = &priv->mr_table; if (mlx4_is_slave(dev)) return; if (priv->reserved_mtts >= 0) mlx4_free_mtt_range(dev, priv->reserved_mtts, fls(dev->caps.reserved_mtts - 1)); mlx4_buddy_cleanup(&mr_table->mtt_buddy); mlx4_bitmap_cleanup(&mr_table->mpt_bitmap); } static inline int mlx4_check_fmr(struct mlx4_fmr *fmr, u64 *page_list, int npages, u64 iova) { int i, page_mask; if (npages > fmr->max_pages) return -EINVAL; page_mask = (1 << fmr->page_shift) - 1; /* We are getting page lists, so va must be page aligned. */ if (iova & page_mask) return -EINVAL; /* Trust the user not to pass misaligned data in page_list */ if (0) for (i = 0; i < npages; ++i) { if (page_list[i] & ~page_mask) return -EINVAL; } if (fmr->maps >= fmr->max_maps) return -EINVAL; return 0; } int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, int npages, u64 iova, u32 *lkey, u32 *rkey) { u32 key; int i, err; err = mlx4_check_fmr(fmr, page_list, npages, iova); if (err) return err; ++fmr->maps; key = key_to_hw_index(fmr->mr.key); key += dev->caps.num_mpts; *lkey = *rkey = fmr->mr.key = hw_index_to_key(key); *(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW; /* Make sure MPT status is visible before writing MTT entries */ wmb(); dma_sync_single_for_cpu(&dev->pdev->dev, fmr->dma_handle, npages * sizeof(u64), DMA_TO_DEVICE); for (i = 0; i < npages; ++i) fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); dma_sync_single_for_device(&dev->pdev->dev, fmr->dma_handle, npages * sizeof(u64), DMA_TO_DEVICE); fmr->mpt->key = cpu_to_be32(key); fmr->mpt->lkey = cpu_to_be32(key); fmr->mpt->length = cpu_to_be64(npages * (1ull << fmr->page_shift)); fmr->mpt->start = cpu_to_be64(iova); /* Make MTT entries are visible before setting MPT status */ wmb(); *(u8 *) fmr->mpt = MLX4_MPT_STATUS_HW; /* Make sure MPT status is visible before consumer can use FMR */ wmb(); return 0; } EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr); int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, int max_maps, u8 page_shift, struct mlx4_fmr *fmr) { struct mlx4_priv *priv = mlx4_priv(dev); int err = -ENOMEM; if (max_maps > dev->caps.max_fmr_maps) return -EINVAL; if (page_shift < (ffs(dev->caps.page_size_cap) - 1) || page_shift >= 32) return -EINVAL; /* All MTTs must fit in the same page */ if (max_pages * sizeof *fmr->mtts > PAGE_SIZE) return -EINVAL; fmr->page_shift = page_shift; fmr->max_pages = max_pages; fmr->max_maps = max_maps; fmr->maps = 0; err = mlx4_mr_alloc(dev, pd, 0, 0, access, max_pages, page_shift, &fmr->mr); if (err) return err; fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table, fmr->mr.mtt.offset, &fmr->dma_handle); if (!fmr->mtts) { err = -ENOMEM; goto err_free; } return 0; err_free: mlx4_mr_free(dev, &fmr->mr); return err; } EXPORT_SYMBOL_GPL(mlx4_fmr_alloc); int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr) { struct mlx4_priv *priv = mlx4_priv(dev); int err; err = mlx4_mr_enable(dev, &fmr->mr); if (err) return err; fmr->mpt = mlx4_table_find(&priv->mr_table.dmpt_table, key_to_hw_index(fmr->mr.key), NULL); if (!fmr->mpt) return -ENOMEM; return 0; } EXPORT_SYMBOL_GPL(mlx4_fmr_enable); void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u32 *lkey, u32 *rkey) { struct mlx4_cmd_mailbox *mailbox; int err; if (!fmr->maps) return; fmr->maps = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { err = PTR_ERR(mailbox); mlx4_warn(dev, "mlx4_alloc_cmd_mailbox failed (%d)\n", err); return; } err = mlx4_HW2SW_MPT(dev, NULL, key_to_hw_index(fmr->mr.key) & (dev->caps.num_mpts - 1)); mlx4_free_cmd_mailbox(dev, mailbox); if (err) { mlx4_warn(dev, "mlx4_HW2SW_MPT failed (%d)\n", err); return; } fmr->mr.enabled = MLX4_MR_EN_SW; } EXPORT_SYMBOL_GPL(mlx4_fmr_unmap); int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr) { if (fmr->maps) return -EBUSY; mlx4_mr_free(dev, &fmr->mr); fmr->mr.enabled = MLX4_MR_DISABLED; return 0; } EXPORT_SYMBOL_GPL(mlx4_fmr_free); int mlx4_SYNC_TPT(struct mlx4_dev *dev) { return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000, MLX4_CMD_NATIVE); } EXPORT_SYMBOL_GPL(mlx4_SYNC_TPT); Index: stable/10/sys/ofed/drivers/net/mlx4/pd.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/pd.c (revision 271126) +++ stable/10/sys/ofed/drivers/net/mlx4/pd.c (revision 271127) @@ -1,302 +1,301 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include #include #include #include #include "mlx4.h" #include "icm.h" enum { MLX4_NUM_RESERVED_UARS = 8 }; int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn) { struct mlx4_priv *priv = mlx4_priv(dev); *pdn = mlx4_bitmap_alloc(&priv->pd_bitmap); if (*pdn == -1) return -ENOMEM; return 0; } EXPORT_SYMBOL_GPL(mlx4_pd_alloc); void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn) { mlx4_bitmap_free(&mlx4_priv(dev)->pd_bitmap, pdn); } EXPORT_SYMBOL_GPL(mlx4_pd_free); int __mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn) { struct mlx4_priv *priv = mlx4_priv(dev); *xrcdn = mlx4_bitmap_alloc(&priv->xrcd_bitmap); if (*xrcdn == -1) return -ENOMEM; return 0; } int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn) { u64 out_param; int err; if (mlx4_is_mfunc(dev)) { err = mlx4_cmd_imm(dev, 0, &out_param, RES_XRCD, RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) return err; *xrcdn = get_param_l(&out_param); return 0; } return __mlx4_xrcd_alloc(dev, xrcdn); } EXPORT_SYMBOL_GPL(mlx4_xrcd_alloc); void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn) { mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn); } void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn) { u64 in_param = 0; int err; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, xrcdn); err = mlx4_cmd(dev, in_param, RES_XRCD, RES_OP_RESERVE, MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) mlx4_warn(dev, "Failed to release xrcdn %d\n", xrcdn); } else __mlx4_xrcd_free(dev, xrcdn); } EXPORT_SYMBOL_GPL(mlx4_xrcd_free); int mlx4_init_pd_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); return mlx4_bitmap_init(&priv->pd_bitmap, dev->caps.num_pds, (1 << NOT_MASKED_PD_BITS) - 1, dev->caps.reserved_pds, 0); } void mlx4_cleanup_pd_table(struct mlx4_dev *dev) { mlx4_bitmap_cleanup(&mlx4_priv(dev)->pd_bitmap); } int mlx4_init_xrcd_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); return mlx4_bitmap_init(&priv->xrcd_bitmap, (1 << 16), (1 << 16) - 1, dev->caps.reserved_xrcds + 1, 0); } void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev) { mlx4_bitmap_cleanup(&mlx4_priv(dev)->xrcd_bitmap); } int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar) { int offset; uar->index = mlx4_bitmap_alloc(&mlx4_priv(dev)->uar_table.bitmap); if (uar->index == -1) return -ENOMEM; if (mlx4_is_slave(dev)) offset = uar->index % ((int) pci_resource_len(dev->pdev, 2) / dev->caps.uar_page_size); else offset = uar->index; uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + offset; uar->map = NULL; return 0; } EXPORT_SYMBOL_GPL(mlx4_uar_alloc); void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar) { mlx4_bitmap_free(&mlx4_priv(dev)->uar_table.bitmap, uar->index); } EXPORT_SYMBOL_GPL(mlx4_uar_free); #ifndef CONFIG_PPC int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_uar *uar; int err = 0; int idx; if (!priv->bf_mapping) return -ENOMEM; mutex_lock(&priv->bf_mutex); if (!list_empty(&priv->bf_list)) uar = list_entry(priv->bf_list.next, struct mlx4_uar, bf_list); else { if (mlx4_bitmap_avail(&priv->uar_table.bitmap) < MLX4_NUM_RESERVED_UARS) { err = -ENOMEM; goto out; } uar = kmalloc_node(sizeof *uar, GFP_KERNEL, node); if (!uar) { uar = kmalloc(sizeof *uar, GFP_KERNEL); if (!uar) { err = -ENOMEM; goto out; } } err = mlx4_uar_alloc(dev, uar); if (err) goto free_kmalloc; uar->map = ioremap(uar->pfn << PAGE_SHIFT, PAGE_SIZE); if (!uar->map) { err = -ENOMEM; goto free_uar; } uar->bf_map = io_mapping_map_wc(priv->bf_mapping, uar->index << PAGE_SHIFT); if (!uar->bf_map) { err = -ENOMEM; goto unamp_uar; } uar->free_bf_bmap = 0; list_add(&uar->bf_list, &priv->bf_list); } bf->uar = uar; idx = ffz(uar->free_bf_bmap); uar->free_bf_bmap |= 1 << idx; bf->uar = uar; bf->offset = 0; bf->buf_size = dev->caps.bf_reg_size / 2; bf->reg = uar->bf_map + idx * dev->caps.bf_reg_size; if (uar->free_bf_bmap == (1 << dev->caps.bf_regs_per_page) - 1) list_del_init(&uar->bf_list); goto out; unamp_uar: bf->uar = NULL; iounmap(uar->map); free_uar: mlx4_uar_free(dev, uar); free_kmalloc: kfree(uar); out: mutex_unlock(&priv->bf_mutex); return err; } EXPORT_SYMBOL_GPL(mlx4_bf_alloc); void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf) { struct mlx4_priv *priv = mlx4_priv(dev); int idx; if (!bf->uar || !bf->uar->bf_map) return; mutex_lock(&priv->bf_mutex); idx = (bf->reg - bf->uar->bf_map) / dev->caps.bf_reg_size; bf->uar->free_bf_bmap &= ~(1 << idx); if (!bf->uar->free_bf_bmap) { if (!list_empty(&bf->uar->bf_list)) list_del(&bf->uar->bf_list); io_mapping_unmap(bf->uar->bf_map); iounmap(bf->uar->map); mlx4_uar_free(dev, bf->uar); kfree(bf->uar); } else if (list_empty(&bf->uar->bf_list)) list_add(&bf->uar->bf_list, &priv->bf_list); mutex_unlock(&priv->bf_mutex); } EXPORT_SYMBOL_GPL(mlx4_bf_free); #else int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node) { memset(bf, 0, sizeof *bf); return -ENOSYS; } EXPORT_SYMBOL_GPL(mlx4_bf_alloc); void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf) { return; } EXPORT_SYMBOL_GPL(mlx4_bf_free); #endif int mlx4_init_uar_table(struct mlx4_dev *dev) { if (dev->caps.num_uars <= 128) { mlx4_err(dev, "Only %d UAR pages (need more than 128)\n", dev->caps.num_uars); mlx4_err(dev, "Increase firmware log2_uar_bar_megabytes?\n"); return -ENODEV; } return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap, dev->caps.num_uars, dev->caps.num_uars - 1, dev->caps.reserved_uars, 0); } void mlx4_cleanup_uar_table(struct mlx4_dev *dev) { mlx4_bitmap_cleanup(&mlx4_priv(dev)->uar_table.bitmap); } Index: stable/10/sys/ofed/drivers/net/mlx4/qp.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/qp.c (revision 271126) +++ stable/10/sys/ofed/drivers/net/mlx4/qp.c (revision 271127) @@ -1,608 +1,606 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include - #include #include #include "mlx4.h" #include "icm.h" /* * QP to support BF should have bits 6,7 cleared */ #define MLX4_BF_QP_SKIP_MASK 0xc0 #define MLX4_MAX_BF_QP_RANGE 0x40 void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type) { struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; struct mlx4_qp *qp; spin_lock(&qp_table->lock); qp = __mlx4_qp_lookup(dev, qpn); if (qp) atomic_inc(&qp->refcount); spin_unlock(&qp_table->lock); if (!qp) { mlx4_dbg(dev, "Async event for none existent QP %08x\n", qpn); return; } qp->event(qp, event_type); if (atomic_dec_and_test(&qp->refcount)) complete(&qp->free); } /* used for INIT/CLOSE port logic */ static int is_master_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0) { /* this procedure is called after we already know we are on the master */ /* qp0 is either the proxy qp0, or the real qp0 */ u32 pf_proxy_offset = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev); *proxy_qp0 = qp->qpn >= pf_proxy_offset && qp->qpn <= pf_proxy_offset + 1; *real_qp0 = qp->qpn >= dev->phys_caps.base_sqpn && qp->qpn <= dev->phys_caps.base_sqpn + 1; return *real_qp0 || *proxy_qp0; } static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state, struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar, int sqd_event, struct mlx4_qp *qp, int native) { static const u16 op[MLX4_QP_NUM_STATE][MLX4_QP_NUM_STATE] = { [MLX4_QP_STATE_RST] = { [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP, [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP, [MLX4_QP_STATE_INIT] = MLX4_CMD_RST2INIT_QP, }, [MLX4_QP_STATE_INIT] = { [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP, [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP, [MLX4_QP_STATE_INIT] = MLX4_CMD_INIT2INIT_QP, [MLX4_QP_STATE_RTR] = MLX4_CMD_INIT2RTR_QP, }, [MLX4_QP_STATE_RTR] = { [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP, [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP, [MLX4_QP_STATE_RTS] = MLX4_CMD_RTR2RTS_QP, }, [MLX4_QP_STATE_RTS] = { [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP, [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP, [MLX4_QP_STATE_RTS] = MLX4_CMD_RTS2RTS_QP, [MLX4_QP_STATE_SQD] = MLX4_CMD_RTS2SQD_QP, }, [MLX4_QP_STATE_SQD] = { [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP, [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP, [MLX4_QP_STATE_RTS] = MLX4_CMD_SQD2RTS_QP, [MLX4_QP_STATE_SQD] = MLX4_CMD_SQD2SQD_QP, }, [MLX4_QP_STATE_SQER] = { [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP, [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP, [MLX4_QP_STATE_RTS] = MLX4_CMD_SQERR2RTS_QP, }, [MLX4_QP_STATE_ERR] = { [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP, [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP, } }; struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; int ret = 0; int real_qp0 = 0; int proxy_qp0 = 0; u8 port; if (cur_state >= MLX4_QP_NUM_STATE || new_state >= MLX4_QP_NUM_STATE || !op[cur_state][new_state]) return -EINVAL; if (op[cur_state][new_state] == MLX4_CMD_2RST_QP) { ret = mlx4_cmd(dev, 0, qp->qpn, 2, MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, native); if (mlx4_is_master(dev) && cur_state != MLX4_QP_STATE_ERR && cur_state != MLX4_QP_STATE_RST && is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) { port = (qp->qpn & 1) + 1; if (proxy_qp0) priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0; else priv->mfunc.master.qp0_state[port].qp0_active = 0; } return ret; } mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); if (cur_state == MLX4_QP_STATE_RST && new_state == MLX4_QP_STATE_INIT) { u64 mtt_addr = mlx4_mtt_addr(dev, mtt); context->mtt_base_addr_h = mtt_addr >> 32; context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff); context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; } *(__be32 *) mailbox->buf = cpu_to_be32(optpar); memcpy(mailbox->buf + 8, context, sizeof *context); ((struct mlx4_qp_context *) (mailbox->buf + 8))->local_qpn = cpu_to_be32(qp->qpn); ret = mlx4_cmd(dev, mailbox->dma, qp->qpn | (!!sqd_event << 31), new_state == MLX4_QP_STATE_RST ? 2 : 0, op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C, native); if (mlx4_is_master(dev) && is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) { port = (qp->qpn & 1) + 1; if (cur_state != MLX4_QP_STATE_ERR && cur_state != MLX4_QP_STATE_RST && new_state == MLX4_QP_STATE_ERR) { if (proxy_qp0) priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0; else priv->mfunc.master.qp0_state[port].qp0_active = 0; } else if (new_state == MLX4_QP_STATE_RTR) { if (proxy_qp0) priv->mfunc.master.qp0_state[port].proxy_qp0_active = 1; else priv->mfunc.master.qp0_state[port].qp0_active = 1; } } mlx4_free_cmd_mailbox(dev, mailbox); return ret; } int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state, struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar, int sqd_event, struct mlx4_qp *qp) { return __mlx4_qp_modify(dev, mtt, cur_state, new_state, context, optpar, sqd_event, qp, 0); } EXPORT_SYMBOL_GPL(mlx4_qp_modify); int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base, u8 bf_qp) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; if (cnt > MLX4_MAX_BF_QP_RANGE && bf_qp) return -ENOMEM; *base = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align, bf_qp ? MLX4_BF_QP_SKIP_MASK : 0); if (*base == -1) return -ENOMEM; return 0; } int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base, u8 bf_qp) { u64 in_param = 0; u64 out_param; int err; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, (((!!bf_qp) << 31) | (u32)cnt)); set_param_h(&in_param, align); err = mlx4_cmd_imm(dev, in_param, &out_param, RES_QP, RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) return err; *base = get_param_l(&out_param); return 0; } return __mlx4_qp_reserve_range(dev, cnt, align, base, bf_qp); } EXPORT_SYMBOL_GPL(mlx4_qp_reserve_range); void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; if (mlx4_is_qp_reserved(dev, (u32) base_qpn)) return; mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt); } void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) { u64 in_param = 0; int err; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, base_qpn); set_param_h(&in_param, cnt); err = mlx4_cmd(dev, in_param, RES_QP, RES_OP_RESERVE, MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) { mlx4_warn(dev, "Failed to release qp range" " base:%d cnt:%d\n", base_qpn, cnt); } } else __mlx4_qp_release_range(dev, base_qpn, cnt); } EXPORT_SYMBOL_GPL(mlx4_qp_release_range); int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; int err; err = mlx4_table_get(dev, &qp_table->qp_table, qpn); if (err) goto err_out; err = mlx4_table_get(dev, &qp_table->auxc_table, qpn); if (err) goto err_put_qp; err = mlx4_table_get(dev, &qp_table->altc_table, qpn); if (err) goto err_put_auxc; err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn); if (err) goto err_put_altc; err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn); if (err) goto err_put_rdmarc; return 0; err_put_rdmarc: mlx4_table_put(dev, &qp_table->rdmarc_table, qpn); err_put_altc: mlx4_table_put(dev, &qp_table->altc_table, qpn); err_put_auxc: mlx4_table_put(dev, &qp_table->auxc_table, qpn); err_put_qp: mlx4_table_put(dev, &qp_table->qp_table, qpn); err_out: return err; } static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) { u64 param = 0; if (mlx4_is_mfunc(dev)) { set_param_l(¶m, qpn); return mlx4_cmd_imm(dev, param, ¶m, RES_QP, RES_OP_MAP_ICM, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } return __mlx4_qp_alloc_icm(dev, qpn); } void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; mlx4_table_put(dev, &qp_table->cmpt_table, qpn); mlx4_table_put(dev, &qp_table->rdmarc_table, qpn); mlx4_table_put(dev, &qp_table->altc_table, qpn); mlx4_table_put(dev, &qp_table->auxc_table, qpn); mlx4_table_put(dev, &qp_table->qp_table, qpn); } static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn) { u64 in_param = 0; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, qpn); if (mlx4_cmd(dev, in_param, RES_QP, RES_OP_MAP_ICM, MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED)) mlx4_warn(dev, "Failed to free icm of qp:%d\n", qpn); } else __mlx4_qp_free_icm(dev, qpn); } int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; int err; if (!qpn) return -EINVAL; qp->qpn = qpn; err = mlx4_qp_alloc_icm(dev, qpn); if (err) return err; spin_lock_irq(&qp_table->lock); err = radix_tree_insert(&dev->qp_table_tree, qp->qpn & (dev->caps.num_qps - 1), qp); spin_unlock_irq(&qp_table->lock); if (err) goto err_icm; atomic_set(&qp->refcount, 1); init_completion(&qp->free); return 0; err_icm: mlx4_qp_free_icm(dev, qpn); return err; } EXPORT_SYMBOL_GPL(mlx4_qp_alloc); void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp) { struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; unsigned long flags; spin_lock_irqsave(&qp_table->lock, flags); radix_tree_delete(&dev->qp_table_tree, qp->qpn & (dev->caps.num_qps - 1)); spin_unlock_irqrestore(&qp_table->lock, flags); } EXPORT_SYMBOL_GPL(mlx4_qp_remove); void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp) { if (atomic_dec_and_test(&qp->refcount)) complete(&qp->free); wait_for_completion(&qp->free); mlx4_qp_free_icm(dev, qp->qpn); } EXPORT_SYMBOL_GPL(mlx4_qp_free); static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn) { return mlx4_cmd(dev, 0, base_qpn, 0, MLX4_CMD_CONF_SPECIAL_QP, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); } int mlx4_init_qp_table(struct mlx4_dev *dev) { struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; int err; int reserved_from_top = 0; int reserved_from_bot; int k; spin_lock_init(&qp_table->lock); INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC); if (mlx4_is_slave(dev)) return 0; /* * We reserve 2 extra QPs per port for the special QPs. The * block of special QPs must be aligned to a multiple of 8, so * round up. * * We also reserve the MSB of the 24-bit QP number to indicate * that a QP is an XRC QP. */ dev->phys_caps.base_sqpn = ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8); { int sort[MLX4_NUM_QP_REGION]; int i, j, tmp; int last_base = dev->caps.num_qps; for (i = 1; i < MLX4_NUM_QP_REGION; ++i) sort[i] = i; for (i = MLX4_NUM_QP_REGION; i > 0; --i) { for (j = 2; j < i; ++j) { if (dev->caps.reserved_qps_cnt[sort[j]] > dev->caps.reserved_qps_cnt[sort[j - 1]]) { tmp = sort[j]; sort[j] = sort[j - 1]; sort[j - 1] = tmp; } } } for (i = 1; i < MLX4_NUM_QP_REGION; ++i) { last_base -= dev->caps.reserved_qps_cnt[sort[i]]; dev->caps.reserved_qps_base[sort[i]] = last_base; reserved_from_top += dev->caps.reserved_qps_cnt[sort[i]]; } } /* Reserve 8 real SQPs in both native and SRIOV modes. * In addition, in SRIOV mode, reserve 8 proxy SQPs per function * (for all PFs and VFs), and 8 corresponding tunnel QPs. * Each proxy SQP works opposite its own tunnel QP. * * The QPs are arranged as follows: * a. 8 real SQPs * b. All the proxy SQPs (8 per function) * c. All the tunnel QPs (8 per function) */ reserved_from_bot = mlx4_num_reserved_sqps(dev); if (reserved_from_bot + reserved_from_top > dev->caps.num_qps) { mlx4_err(dev, "Number of reserved QPs is higher than number " "of QPs, increase the value of log_num_qp\n"); return -EINVAL; } err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps, (1 << 23) - 1, reserved_from_bot, reserved_from_top); if (err) return err; if (mlx4_is_mfunc(dev)) { /* for PPF use */ dev->phys_caps.base_proxy_sqpn = dev->phys_caps.base_sqpn + 8; dev->phys_caps.base_tunnel_sqpn = dev->phys_caps.base_sqpn + 8 + 8 * MLX4_MFUNC_MAX; /* In mfunc, calculate proxy and tunnel qp offsets for the PF here, * since the PF does not call mlx4_slave_caps */ dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { err = -ENOMEM; goto err_mem; } for (k = 0; k < dev->caps.num_ports; k++) { dev->caps.qp0_proxy[k] = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev) + k; dev->caps.qp0_tunnel[k] = dev->caps.qp0_proxy[k] + 8 * MLX4_MFUNC_MAX; dev->caps.qp1_proxy[k] = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k; dev->caps.qp1_tunnel[k] = dev->caps.qp1_proxy[k] + 8 * MLX4_MFUNC_MAX; } } err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn); if (err) goto err_mem; return 0; err_mem: kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); dev->caps.qp0_tunnel = dev->caps.qp0_proxy = dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; return err; } void mlx4_cleanup_qp_table(struct mlx4_dev *dev) { if (mlx4_is_slave(dev)) return; mlx4_CONF_SPECIAL_QP(dev, 0); mlx4_bitmap_cleanup(&mlx4_priv(dev)->qp_table.bitmap); } int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp, struct mlx4_qp_context *context) { struct mlx4_cmd_mailbox *mailbox; int err; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); err = mlx4_cmd_box(dev, 0, mailbox->dma, qp->qpn, 0, MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (!err) memcpy(context, mailbox->buf + 8, sizeof *context); mlx4_free_cmd_mailbox(dev, mailbox); return err; } EXPORT_SYMBOL_GPL(mlx4_qp_query); int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt, struct mlx4_qp_context *context, struct mlx4_qp *qp, enum mlx4_qp_state *qp_state) { int err; int i; enum mlx4_qp_state states[] = { MLX4_QP_STATE_RST, MLX4_QP_STATE_INIT, MLX4_QP_STATE_RTR, MLX4_QP_STATE_RTS }; for (i = 0; i < ARRAY_SIZE(states) - 1; i++) { context->flags &= cpu_to_be32(~(0xf << 28)); context->flags |= cpu_to_be32(states[i + 1] << 28); err = mlx4_qp_modify(dev, mtt, states[i], states[i + 1], context, 0, 0, qp); if (err) { mlx4_err(dev, "Failed to bring QP to state: " "%d with error: %d\n", states[i + 1], err); return err; } *qp_state = states[i + 1]; } return 0; } EXPORT_SYMBOL_GPL(mlx4_qp_to_ready); Index: stable/10/sys/ofed/drivers/net/mlx4/reset.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/reset.c (revision 271126) +++ stable/10/sys/ofed/drivers/net/mlx4/reset.c (revision 271127) @@ -1,187 +1,186 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include #include #include #include #include #include #include "mlx4.h" int mlx4_reset(struct mlx4_dev *dev) { void __iomem *reset; u32 *hca_header = NULL; int pcie_cap; u16 devctl; u16 linkctl; u16 vendor; unsigned long end; u32 sem; int i; int err = 0; #define MLX4_RESET_BASE 0xf0000 #define MLX4_RESET_SIZE 0x400 #define MLX4_SEM_OFFSET 0x3fc #define MLX4_RESET_OFFSET 0x10 #define MLX4_RESET_VALUE swab32(1) #define MLX4_SEM_TIMEOUT_JIFFIES (10 * HZ) #define MLX4_RESET_TIMEOUT_JIFFIES (2 * HZ) /* * Reset the chip. This is somewhat ugly because we have to * save off the PCI header before reset and then restore it * after the chip reboots. We skip config space offsets 22 * and 23 since those have a special meaning. */ /* Do we need to save off the full 4K PCI Express header?? */ hca_header = kmalloc(256, GFP_KERNEL); if (!hca_header) { err = -ENOMEM; mlx4_err(dev, "Couldn't allocate memory to save HCA " "PCI header, aborting.\n"); goto out; } pcie_cap = pci_find_capability(dev->pdev, PCI_CAP_ID_EXP); for (i = 0; i < 64; ++i) { if (i == 22 || i == 23) continue; if (pci_read_config_dword(dev->pdev, i * 4, hca_header + i)) { err = -ENODEV; mlx4_err(dev, "Couldn't save HCA " "PCI header, aborting.\n"); goto out; } } reset = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_RESET_BASE, MLX4_RESET_SIZE); if (!reset) { err = -ENOMEM; mlx4_err(dev, "Couldn't map HCA reset register, aborting.\n"); goto out; } /* grab HW semaphore to lock out flash updates */ end = jiffies + MLX4_SEM_TIMEOUT_JIFFIES; do { sem = readl(reset + MLX4_SEM_OFFSET); if (!sem) break; msleep(1); } while (time_before(jiffies, end)); if (sem) { mlx4_err(dev, "Failed to obtain HW semaphore, aborting\n"); err = -EAGAIN; iounmap(reset); goto out; } /* actually hit reset */ writel(MLX4_RESET_VALUE, reset + MLX4_RESET_OFFSET); iounmap(reset); /* Docs say to wait one second before accessing device */ msleep(2000); end = jiffies + MLX4_RESET_TIMEOUT_JIFFIES; do { if (!pci_read_config_word(dev->pdev, PCI_VENDOR_ID, &vendor) && vendor != 0xffff) break; msleep(1); } while (time_before(jiffies, end)); if (vendor == 0xffff) { err = -ENODEV; mlx4_err(dev, "PCI device did not come back after reset, " "aborting.\n"); goto out; } /* Now restore the PCI headers */ if (pcie_cap) { devctl = hca_header[(pcie_cap + PCI_EXP_DEVCTL) / 4]; if (pci_write_config_word(dev->pdev, pcie_cap + PCI_EXP_DEVCTL, devctl)) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA PCI Express " "Device Control register, aborting.\n"); goto out; } linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4]; if (pci_write_config_word(dev->pdev, pcie_cap + PCI_EXP_LNKCTL, linkctl)) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA PCI Express " "Link control register, aborting.\n"); goto out; } } for (i = 0; i < 16; ++i) { if (i * 4 == PCI_COMMAND) continue; if (pci_write_config_dword(dev->pdev, i * 4, hca_header[i])) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA reg %x, " "aborting.\n", i); goto out; } } if (pci_write_config_dword(dev->pdev, PCI_COMMAND, hca_header[PCI_COMMAND / 4])) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA COMMAND, " "aborting.\n"); goto out; } out: kfree(hca_header); return err; } Index: stable/10/sys/ofed/drivers/net/mlx4/resource_tracker.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/resource_tracker.c (revision 271126) +++ stable/10/sys/ofed/drivers/net/mlx4/resource_tracker.c (revision 271127) @@ -1,4315 +1,4315 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. * All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include "mlx4.h" #include "fw.h" #define MLX4_MAC_VALID (1ull << 63) struct mac_res { struct list_head list; u64 mac; int ref_count; u8 smac_index; u8 port; }; struct vlan_res { struct list_head list; u16 vlan; int ref_count; int vlan_index; u8 port; }; struct res_common { struct list_head list; struct rb_node node; u64 res_id; int owner; int state; int from_state; int to_state; int removing; }; enum { RES_ANY_BUSY = 1 }; struct res_gid { struct list_head list; u8 gid[16]; enum mlx4_protocol prot; enum mlx4_steer_type steer; }; enum res_qp_states { RES_QP_BUSY = RES_ANY_BUSY, /* QP number was allocated */ RES_QP_RESERVED, /* ICM memory for QP context was mapped */ RES_QP_MAPPED, /* QP is in hw ownership */ RES_QP_HW }; struct res_qp { struct res_common com; struct res_mtt *mtt; struct res_cq *rcq; struct res_cq *scq; struct res_srq *srq; struct list_head mcg_list; spinlock_t mcg_spl; int local_qpn; }; enum res_mtt_states { RES_MTT_BUSY = RES_ANY_BUSY, RES_MTT_ALLOCATED, }; static inline const char *mtt_states_str(enum res_mtt_states state) { switch (state) { case RES_MTT_BUSY: return "RES_MTT_BUSY"; case RES_MTT_ALLOCATED: return "RES_MTT_ALLOCATED"; default: return "Unknown"; } } struct res_mtt { struct res_common com; int order; atomic_t ref_count; }; enum res_mpt_states { RES_MPT_BUSY = RES_ANY_BUSY, RES_MPT_RESERVED, RES_MPT_MAPPED, RES_MPT_HW, }; struct res_mpt { struct res_common com; struct res_mtt *mtt; int key; }; enum res_eq_states { RES_EQ_BUSY = RES_ANY_BUSY, RES_EQ_RESERVED, RES_EQ_HW, }; struct res_eq { struct res_common com; struct res_mtt *mtt; }; enum res_cq_states { RES_CQ_BUSY = RES_ANY_BUSY, RES_CQ_ALLOCATED, RES_CQ_HW, }; struct res_cq { struct res_common com; struct res_mtt *mtt; atomic_t ref_count; }; enum res_srq_states { RES_SRQ_BUSY = RES_ANY_BUSY, RES_SRQ_ALLOCATED, RES_SRQ_HW, }; struct res_srq { struct res_common com; struct res_mtt *mtt; struct res_cq *cq; atomic_t ref_count; }; enum res_counter_states { RES_COUNTER_BUSY = RES_ANY_BUSY, RES_COUNTER_ALLOCATED, }; struct res_counter { struct res_common com; int port; }; enum res_xrcdn_states { RES_XRCD_BUSY = RES_ANY_BUSY, RES_XRCD_ALLOCATED, }; struct res_xrcdn { struct res_common com; int port; }; enum res_fs_rule_states { RES_FS_RULE_BUSY = RES_ANY_BUSY, RES_FS_RULE_ALLOCATED, }; struct res_fs_rule { struct res_common com; }; static int mlx4_is_eth(struct mlx4_dev *dev, int port) { return dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB ? 0 : 1; } static void *res_tracker_lookup(struct rb_root *root, u64 res_id) { struct rb_node *node = root->rb_node; while (node) { struct res_common *res = container_of(node, struct res_common, node); if (res_id < res->res_id) node = node->rb_left; else if (res_id > res->res_id) node = node->rb_right; else return res; } return NULL; } static int res_tracker_insert(struct rb_root *root, struct res_common *res) { struct rb_node **new = &(root->rb_node), *parent = NULL; /* Figure out where to put new node */ while (*new) { struct res_common *this = container_of(*new, struct res_common, node); parent = *new; if (res->res_id < this->res_id) new = &((*new)->rb_left); else if (res->res_id > this->res_id) new = &((*new)->rb_right); else return -EEXIST; } /* Add new node and rebalance tree. */ rb_link_node(&res->node, parent, new); rb_insert_color(&res->node, root); return 0; } enum qp_transition { QP_TRANS_INIT2RTR, QP_TRANS_RTR2RTS, QP_TRANS_RTS2RTS, QP_TRANS_SQERR2RTS, QP_TRANS_SQD2SQD, QP_TRANS_SQD2RTS }; /* For Debug uses */ static const char *ResourceType(enum mlx4_resource rt) { switch (rt) { case RES_QP: return "RES_QP"; case RES_CQ: return "RES_CQ"; case RES_SRQ: return "RES_SRQ"; case RES_MPT: return "RES_MPT"; case RES_MTT: return "RES_MTT"; case RES_MAC: return "RES_MAC"; case RES_VLAN: return "RES_VLAN"; case RES_EQ: return "RES_EQ"; case RES_COUNTER: return "RES_COUNTER"; case RES_FS_RULE: return "RES_FS_RULE"; case RES_XRCD: return "RES_XRCD"; default: return "Unknown resource type !!!"; }; } static void rem_slave_vlans(struct mlx4_dev *dev, int slave); static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave, enum mlx4_resource res_type, int count, int port) { struct mlx4_priv *priv = mlx4_priv(dev); struct resource_allocator *res_alloc = &priv->mfunc.master.res_tracker.res_alloc[res_type]; int err = -EINVAL; int allocated, free, reserved, guaranteed, from_free; spin_lock(&res_alloc->alloc_lock); allocated = (port > 0) ? res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] : res_alloc->allocated[slave]; free = (port > 0) ? res_alloc->res_port_free[port - 1] : res_alloc->res_free; reserved = (port > 0) ? res_alloc->res_port_rsvd[port - 1] : res_alloc->res_reserved; guaranteed = res_alloc->guaranteed[slave]; if (allocated + count > res_alloc->quota[slave]) goto out; if (allocated + count <= guaranteed) { err = 0; } else { /* portion may need to be obtained from free area */ if (guaranteed - allocated > 0) from_free = count - (guaranteed - allocated); else from_free = count; if (free - from_free > reserved) err = 0; } if (!err) { /* grant the request */ if (port > 0) { res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] += count; res_alloc->res_port_free[port - 1] -= count; } else { res_alloc->allocated[slave] += count; res_alloc->res_free -= count; } } out: spin_unlock(&res_alloc->alloc_lock); return err; } static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave, enum mlx4_resource res_type, int count, int port) { struct mlx4_priv *priv = mlx4_priv(dev); struct resource_allocator *res_alloc = &priv->mfunc.master.res_tracker.res_alloc[res_type]; spin_lock(&res_alloc->alloc_lock); if (port > 0) { res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] -= count; res_alloc->res_port_free[port - 1] += count; } else { res_alloc->allocated[slave] -= count; res_alloc->res_free += count; } spin_unlock(&res_alloc->alloc_lock); return; } static inline void initialize_res_quotas(struct mlx4_dev *dev, struct resource_allocator *res_alloc, enum mlx4_resource res_type, int vf, int num_instances) { res_alloc->guaranteed[vf] = num_instances / (2 * (dev->num_vfs + 1)); res_alloc->quota[vf] = (num_instances / 2) + res_alloc->guaranteed[vf]; if (vf == mlx4_master_func_num(dev)) { res_alloc->res_free = num_instances; if (res_type == RES_MTT) { /* reserved mtts will be taken out of the PF allocation */ res_alloc->res_free += dev->caps.reserved_mtts; res_alloc->guaranteed[vf] += dev->caps.reserved_mtts; res_alloc->quota[vf] += dev->caps.reserved_mtts; } } } void mlx4_init_quotas(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int pf; /* quotas for VFs are initialized in mlx4_slave_cap */ if (mlx4_is_slave(dev)) return; if (!mlx4_is_mfunc(dev)) { dev->quotas.qp = dev->caps.num_qps - dev->caps.reserved_qps - mlx4_num_reserved_sqps(dev); dev->quotas.cq = dev->caps.num_cqs - dev->caps.reserved_cqs; dev->quotas.srq = dev->caps.num_srqs - dev->caps.reserved_srqs; dev->quotas.mtt = dev->caps.num_mtts - dev->caps.reserved_mtts; dev->quotas.mpt = dev->caps.num_mpts - dev->caps.reserved_mrws; return; } pf = mlx4_master_func_num(dev); dev->quotas.qp = priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[pf]; dev->quotas.cq = priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[pf]; dev->quotas.srq = priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[pf]; dev->quotas.mtt = priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[pf]; dev->quotas.mpt = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf]; } int mlx4_init_resource_tracker(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int i, j; int t; priv->mfunc.master.res_tracker.slave_list = kzalloc(dev->num_slaves * sizeof(struct slave_list), GFP_KERNEL); if (!priv->mfunc.master.res_tracker.slave_list) return -ENOMEM; for (i = 0 ; i < dev->num_slaves; i++) { for (t = 0; t < MLX4_NUM_OF_RESOURCE_TYPE; ++t) INIT_LIST_HEAD(&priv->mfunc.master.res_tracker. slave_list[i].res_list[t]); mutex_init(&priv->mfunc.master.res_tracker.slave_list[i].mutex); } mlx4_dbg(dev, "Started init_resource_tracker: %ld slaves\n", dev->num_slaves); for (i = 0 ; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) priv->mfunc.master.res_tracker.res_tree[i] = RB_ROOT; for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { struct resource_allocator *res_alloc = &priv->mfunc.master.res_tracker.res_alloc[i]; res_alloc->quota = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); res_alloc->guaranteed = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); if (i == RES_MAC || i == RES_VLAN) res_alloc->allocated = kzalloc(MLX4_MAX_PORTS * (dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); else res_alloc->allocated = kzalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); if (!res_alloc->quota || !res_alloc->guaranteed || !res_alloc->allocated) goto no_mem_err; spin_lock_init(&res_alloc->alloc_lock); for (t = 0; t < dev->num_vfs + 1; t++) { switch (i) { case RES_QP: initialize_res_quotas(dev, res_alloc, RES_QP, t, dev->caps.num_qps - dev->caps.reserved_qps - mlx4_num_reserved_sqps(dev)); break; case RES_CQ: initialize_res_quotas(dev, res_alloc, RES_CQ, t, dev->caps.num_cqs - dev->caps.reserved_cqs); break; case RES_SRQ: initialize_res_quotas(dev, res_alloc, RES_SRQ, t, dev->caps.num_srqs - dev->caps.reserved_srqs); break; case RES_MPT: initialize_res_quotas(dev, res_alloc, RES_MPT, t, dev->caps.num_mpts - dev->caps.reserved_mrws); break; case RES_MTT: initialize_res_quotas(dev, res_alloc, RES_MTT, t, dev->caps.num_mtts - dev->caps.reserved_mtts); break; case RES_MAC: if (t == mlx4_master_func_num(dev)) { res_alloc->quota[t] = MLX4_MAX_MAC_NUM - 2 * dev->num_vfs; res_alloc->guaranteed[t] = res_alloc->quota[t]; for (j = 0; j < MLX4_MAX_PORTS; j++) res_alloc->res_port_free[j] = MLX4_MAX_MAC_NUM; } else { res_alloc->quota[t] = 2; res_alloc->guaranteed[t] = 2; } break; case RES_VLAN: if (t == mlx4_master_func_num(dev)) { res_alloc->quota[t] = MLX4_MAX_VLAN_NUM; res_alloc->guaranteed[t] = MLX4_MAX_VLAN_NUM / 2; for (j = 0; j < MLX4_MAX_PORTS; j++) res_alloc->res_port_free[j] = res_alloc->quota[t]; } else { res_alloc->quota[t] = MLX4_MAX_VLAN_NUM / 2; res_alloc->guaranteed[t] = 0; } break; case RES_COUNTER: res_alloc->quota[t] = dev->caps.max_counters; res_alloc->guaranteed[t] = 0; if (t == mlx4_master_func_num(dev)) res_alloc->res_free = res_alloc->quota[t]; break; default: break; } if (i == RES_MAC || i == RES_VLAN) { for (j = 0; j < MLX4_MAX_PORTS; j++) res_alloc->res_port_rsvd[j] += res_alloc->guaranteed[t]; } else { res_alloc->res_reserved += res_alloc->guaranteed[t]; } } } spin_lock_init(&priv->mfunc.master.res_tracker.lock); return 0; no_mem_err: for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated); priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL; kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed); priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL; kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota); priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL; } return -ENOMEM; } void mlx4_free_resource_tracker(struct mlx4_dev *dev, enum mlx4_res_tracker_free_type type) { struct mlx4_priv *priv = mlx4_priv(dev); int i; if (priv->mfunc.master.res_tracker.slave_list) { if (type != RES_TR_FREE_STRUCTS_ONLY) { for (i = 0; i < dev->num_slaves; i++) { if (type == RES_TR_FREE_ALL || dev->caps.function != i) mlx4_delete_all_resources_for_slave(dev, i); } /* free master's vlans */ i = dev->caps.function; mutex_lock(&priv->mfunc.master.res_tracker.slave_list[i].mutex); rem_slave_vlans(dev, i); mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[i].mutex); } if (type != RES_TR_FREE_SLAVES_ONLY) { for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated); priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL; kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed); priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL; kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota); priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL; } kfree(priv->mfunc.master.res_tracker.slave_list); priv->mfunc.master.res_tracker.slave_list = NULL; } } } static void update_pkey_index(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *inbox) { u8 sched = *(u8 *)(inbox->buf + 64); u8 orig_index = *(u8 *)(inbox->buf + 35); u8 new_index; struct mlx4_priv *priv = mlx4_priv(dev); int port; port = (sched >> 6 & 1) + 1; new_index = priv->virt2phys_pkey[slave][port - 1][orig_index]; *(u8 *)(inbox->buf + 35) = new_index; } static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, u8 slave) { struct mlx4_qp_context *qp_ctx = inbox->buf + 8; enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *) inbox->buf); u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; int port; if (MLX4_QP_ST_UD == ts) { port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; if (mlx4_is_eth(dev, port)) qp_ctx->pri_path.mgid_index = mlx4_get_base_gid_ix(dev, slave) | 0x80; else qp_ctx->pri_path.mgid_index = 0x80 | slave; } else if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_UC == ts) { if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) { port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; if (mlx4_is_eth(dev, port)) { qp_ctx->pri_path.mgid_index += mlx4_get_base_gid_ix(dev, slave); qp_ctx->pri_path.mgid_index &= 0x7f; } else { qp_ctx->pri_path.mgid_index = slave & 0x7F; } } if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1; if (mlx4_is_eth(dev, port)) { qp_ctx->alt_path.mgid_index += mlx4_get_base_gid_ix(dev, slave); qp_ctx->alt_path.mgid_index &= 0x7f; } else { qp_ctx->alt_path.mgid_index = slave & 0x7F; } } } } static int update_vport_qp_param(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, u8 slave) { struct mlx4_qp_context *qpc = inbox->buf + 8; struct mlx4_vport_oper_state *vp_oper; struct mlx4_priv *priv; u32 qp_type; int port; port = (qpc->pri_path.sched_queue & 0x40) ? 2 : 1; priv = mlx4_priv(dev); vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if (MLX4_VGT != vp_oper->state.default_vlan) { qp_type = (be32_to_cpu(qpc->flags) >> 16) & 0xff; if (MLX4_QP_ST_RC == qp_type) return -EINVAL; qpc->srqn |= cpu_to_be32(1 << 25); /*set cqe vlan mask */ qpc->pri_path.vlan_index = vp_oper->vlan_idx; qpc->pri_path.fl = 1 << 6; /* set cv bit*/ qpc->pri_path.feup |= 1 << 3; /* set fvl bit */ qpc->pri_path.sched_queue &= 0xC7; qpc->pri_path.sched_queue |= (vp_oper->state.default_qos) << 3; mlx4_dbg(dev, "qp %d port %d Q 0x%x set vlan to %d vidx %d feup %x fl %x\n", be32_to_cpu(qpc->local_qpn) & 0xffffff, port, (int)(qpc->pri_path.sched_queue), vp_oper->state.default_vlan, vp_oper->vlan_idx, (int)(qpc->pri_path.feup), (int)(qpc->pri_path.fl)); } if (vp_oper->state.spoofchk) { qpc->pri_path.feup |= 1 << 5; /* set fsm bit */; qpc->pri_path.grh_mylmc = (0x80 & qpc->pri_path.grh_mylmc) + vp_oper->mac_idx; mlx4_dbg(dev, "spoof qp %d port %d feup 0x%x, myLmc 0x%x mindx %d\n", be32_to_cpu(qpc->local_qpn) & 0xffffff, port, (int)qpc->pri_path.feup, (int)qpc->pri_path.grh_mylmc, vp_oper->mac_idx); } return 0; } static int mpt_mask(struct mlx4_dev *dev) { return dev->caps.num_mpts - 1; } static void *find_res(struct mlx4_dev *dev, int res_id, enum mlx4_resource type) { struct mlx4_priv *priv = mlx4_priv(dev); return res_tracker_lookup(&priv->mfunc.master.res_tracker.res_tree[type], res_id); } static int get_res(struct mlx4_dev *dev, int slave, u64 res_id, enum mlx4_resource type, void *res) { struct res_common *r; int err = 0; spin_lock_irq(mlx4_tlock(dev)); r = find_res(dev, res_id, type); if (!r) { err = -ENOENT; goto exit; } if (r->state == RES_ANY_BUSY) { err = -EBUSY; goto exit; } if (r->owner != slave) { err = -EPERM; goto exit; } r->from_state = r->state; r->state = RES_ANY_BUSY; if (res) *((struct res_common **)res) = r; exit: spin_unlock_irq(mlx4_tlock(dev)); return err; } int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev, enum mlx4_resource type, u64 res_id, int *slave) { struct res_common *r; int err = -ENOENT; int id = res_id; if (type == RES_QP) id &= 0x7fffff; spin_lock(mlx4_tlock(dev)); r = find_res(dev, id, type); if (r) { *slave = r->owner; err = 0; } spin_unlock(mlx4_tlock(dev)); return err; } static void put_res(struct mlx4_dev *dev, int slave, u64 res_id, enum mlx4_resource type) { struct res_common *r; spin_lock_irq(mlx4_tlock(dev)); r = find_res(dev, res_id, type); if (r) r->state = r->from_state; spin_unlock_irq(mlx4_tlock(dev)); } static struct res_common *alloc_qp_tr(int id) { struct res_qp *ret; ret = kzalloc(sizeof *ret, GFP_KERNEL); if (!ret) return NULL; ret->com.res_id = id; ret->com.state = RES_QP_RESERVED; ret->local_qpn = id; INIT_LIST_HEAD(&ret->mcg_list); spin_lock_init(&ret->mcg_spl); return &ret->com; } static struct res_common *alloc_mtt_tr(int id, int order) { struct res_mtt *ret; ret = kzalloc(sizeof *ret, GFP_KERNEL); if (!ret) return NULL; ret->com.res_id = id; ret->order = order; ret->com.state = RES_MTT_ALLOCATED; atomic_set(&ret->ref_count, 0); return &ret->com; } static struct res_common *alloc_mpt_tr(int id, int key) { struct res_mpt *ret; ret = kzalloc(sizeof *ret, GFP_KERNEL); if (!ret) return NULL; ret->com.res_id = id; ret->com.state = RES_MPT_RESERVED; ret->key = key; return &ret->com; } static struct res_common *alloc_eq_tr(int id) { struct res_eq *ret; ret = kzalloc(sizeof *ret, GFP_KERNEL); if (!ret) return NULL; ret->com.res_id = id; ret->com.state = RES_EQ_RESERVED; return &ret->com; } static struct res_common *alloc_cq_tr(int id) { struct res_cq *ret; ret = kzalloc(sizeof *ret, GFP_KERNEL); if (!ret) return NULL; ret->com.res_id = id; ret->com.state = RES_CQ_ALLOCATED; atomic_set(&ret->ref_count, 0); return &ret->com; } static struct res_common *alloc_srq_tr(int id) { struct res_srq *ret; ret = kzalloc(sizeof *ret, GFP_KERNEL); if (!ret) return NULL; ret->com.res_id = id; ret->com.state = RES_SRQ_ALLOCATED; atomic_set(&ret->ref_count, 0); return &ret->com; } static struct res_common *alloc_counter_tr(int id) { struct res_counter *ret; ret = kzalloc(sizeof *ret, GFP_KERNEL); if (!ret) return NULL; ret->com.res_id = id; ret->com.state = RES_COUNTER_ALLOCATED; return &ret->com; } static struct res_common *alloc_xrcdn_tr(int id) { struct res_xrcdn *ret; ret = kzalloc(sizeof *ret, GFP_KERNEL); if (!ret) return NULL; ret->com.res_id = id; ret->com.state = RES_XRCD_ALLOCATED; return &ret->com; } static struct res_common *alloc_fs_rule_tr(u64 id) { struct res_fs_rule *ret; ret = kzalloc(sizeof *ret, GFP_KERNEL); if (!ret) return NULL; ret->com.res_id = id; ret->com.state = RES_FS_RULE_ALLOCATED; return &ret->com; } static struct res_common *alloc_tr(u64 id, enum mlx4_resource type, int slave, int extra) { struct res_common *ret; switch (type) { case RES_QP: ret = alloc_qp_tr(id); break; case RES_MPT: ret = alloc_mpt_tr(id, extra); break; case RES_MTT: ret = alloc_mtt_tr(id, extra); break; case RES_EQ: ret = alloc_eq_tr(id); break; case RES_CQ: ret = alloc_cq_tr(id); break; case RES_SRQ: ret = alloc_srq_tr(id); break; case RES_MAC: printk(KERN_ERR "implementation missing\n"); return NULL; case RES_COUNTER: ret = alloc_counter_tr(id); break; case RES_XRCD: ret = alloc_xrcdn_tr(id); break; case RES_FS_RULE: ret = alloc_fs_rule_tr(id); break; default: return NULL; } if (ret) ret->owner = slave; return ret; } static int add_res_range(struct mlx4_dev *dev, int slave, u64 base, int count, enum mlx4_resource type, int extra) { int i; int err; struct mlx4_priv *priv = mlx4_priv(dev); struct res_common **res_arr; struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct rb_root *root = &tracker->res_tree[type]; res_arr = kzalloc(count * sizeof *res_arr, GFP_KERNEL); if (!res_arr) return -ENOMEM; for (i = 0; i < count; ++i) { res_arr[i] = alloc_tr(base + i, type, slave, extra); if (!res_arr[i]) { for (--i; i >= 0; --i) kfree(res_arr[i]); kfree(res_arr); return -ENOMEM; } } spin_lock_irq(mlx4_tlock(dev)); for (i = 0; i < count; ++i) { if (find_res(dev, base + i, type)) { err = -EEXIST; goto undo; } err = res_tracker_insert(root, res_arr[i]); if (err) goto undo; list_add_tail(&res_arr[i]->list, &tracker->slave_list[slave].res_list[type]); } spin_unlock_irq(mlx4_tlock(dev)); kfree(res_arr); return 0; undo: for (--i; i >= base; --i) rb_erase(&res_arr[i]->node, root); spin_unlock_irq(mlx4_tlock(dev)); for (i = 0; i < count; ++i) kfree(res_arr[i]); kfree(res_arr); return err; } static int remove_qp_ok(struct res_qp *res) { if (res->com.state == RES_QP_BUSY) return -EBUSY; else if (res->com.state != RES_QP_RESERVED) return -EPERM; return 0; } static int remove_mtt_ok(struct res_mtt *res, int order) { if (res->com.state == RES_MTT_BUSY || atomic_read(&res->ref_count)) { printk(KERN_DEBUG "%s-%d: state %s, ref_count %d\n", __func__, __LINE__, mtt_states_str(res->com.state), atomic_read(&res->ref_count)); return -EBUSY; } else if (res->com.state != RES_MTT_ALLOCATED) return -EPERM; else if (res->order != order) return -EINVAL; return 0; } static int remove_mpt_ok(struct res_mpt *res) { if (res->com.state == RES_MPT_BUSY) return -EBUSY; else if (res->com.state != RES_MPT_RESERVED) return -EPERM; return 0; } static int remove_eq_ok(struct res_eq *res) { if (res->com.state == RES_MPT_BUSY) return -EBUSY; else if (res->com.state != RES_MPT_RESERVED) return -EPERM; return 0; } static int remove_counter_ok(struct res_counter *res) { if (res->com.state == RES_COUNTER_BUSY) return -EBUSY; else if (res->com.state != RES_COUNTER_ALLOCATED) return -EPERM; return 0; } static int remove_xrcdn_ok(struct res_xrcdn *res) { if (res->com.state == RES_XRCD_BUSY) return -EBUSY; else if (res->com.state != RES_XRCD_ALLOCATED) return -EPERM; return 0; } static int remove_fs_rule_ok(struct res_fs_rule *res) { if (res->com.state == RES_FS_RULE_BUSY) return -EBUSY; else if (res->com.state != RES_FS_RULE_ALLOCATED) return -EPERM; return 0; } static int remove_cq_ok(struct res_cq *res) { if (res->com.state == RES_CQ_BUSY) return -EBUSY; else if (res->com.state != RES_CQ_ALLOCATED) return -EPERM; return 0; } static int remove_srq_ok(struct res_srq *res) { if (res->com.state == RES_SRQ_BUSY) return -EBUSY; else if (res->com.state != RES_SRQ_ALLOCATED) return -EPERM; return 0; } static int remove_ok(struct res_common *res, enum mlx4_resource type, int extra) { switch (type) { case RES_QP: return remove_qp_ok((struct res_qp *)res); case RES_CQ: return remove_cq_ok((struct res_cq *)res); case RES_SRQ: return remove_srq_ok((struct res_srq *)res); case RES_MPT: return remove_mpt_ok((struct res_mpt *)res); case RES_MTT: return remove_mtt_ok((struct res_mtt *)res, extra); case RES_MAC: return -ENOSYS; case RES_EQ: return remove_eq_ok((struct res_eq *)res); case RES_COUNTER: return remove_counter_ok((struct res_counter *)res); case RES_XRCD: return remove_xrcdn_ok((struct res_xrcdn *)res); case RES_FS_RULE: return remove_fs_rule_ok((struct res_fs_rule *)res); default: return -EINVAL; } } static int rem_res_range(struct mlx4_dev *dev, int slave, u64 base, int count, enum mlx4_resource type, int extra) { u64 i; int err; struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct res_common *r; spin_lock_irq(mlx4_tlock(dev)); for (i = base; i < base + count; ++i) { r = res_tracker_lookup(&tracker->res_tree[type], i); if (!r) { err = -ENOENT; goto out; } if (r->owner != slave) { err = -EPERM; goto out; } err = remove_ok(r, type, extra); if (err) goto out; } for (i = base; i < base + count; ++i) { r = res_tracker_lookup(&tracker->res_tree[type], i); rb_erase(&r->node, &tracker->res_tree[type]); list_del(&r->list); kfree(r); } err = 0; out: spin_unlock_irq(mlx4_tlock(dev)); return err; } static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn, enum res_qp_states state, struct res_qp **qp, int alloc) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct res_qp *r; int err = 0; spin_lock_irq(mlx4_tlock(dev)); r = res_tracker_lookup(&tracker->res_tree[RES_QP], qpn); if (!r) err = -ENOENT; else if (r->com.owner != slave) err = -EPERM; else { switch (state) { case RES_QP_BUSY: mlx4_dbg(dev, "%s: failed RES_QP, 0x%llx\n", - __func__, r->com.res_id); + __func__, (long long)r->com.res_id); err = -EBUSY; break; case RES_QP_RESERVED: if (r->com.state == RES_QP_MAPPED && !alloc) break; - mlx4_dbg(dev, "failed RES_QP, 0x%llx\n", r->com.res_id); + mlx4_dbg(dev, "failed RES_QP, 0x%llx\n", (long long)r->com.res_id); err = -EINVAL; break; case RES_QP_MAPPED: if ((r->com.state == RES_QP_RESERVED && alloc) || r->com.state == RES_QP_HW) break; else { mlx4_dbg(dev, "failed RES_QP, 0x%llx\n", - r->com.res_id); + (long long)r->com.res_id); err = -EINVAL; } break; case RES_QP_HW: if (r->com.state != RES_QP_MAPPED) err = -EINVAL; break; default: err = -EINVAL; } if (!err) { r->com.from_state = r->com.state; r->com.to_state = state; r->com.state = RES_QP_BUSY; if (qp) *qp = r; } } spin_unlock_irq(mlx4_tlock(dev)); return err; } static int mr_res_start_move_to(struct mlx4_dev *dev, int slave, int index, enum res_mpt_states state, struct res_mpt **mpt) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct res_mpt *r; int err = 0; spin_lock_irq(mlx4_tlock(dev)); r = res_tracker_lookup(&tracker->res_tree[RES_MPT], index); if (!r) err = -ENOENT; else if (r->com.owner != slave) err = -EPERM; else { switch (state) { case RES_MPT_BUSY: err = -EINVAL; break; case RES_MPT_RESERVED: if (r->com.state != RES_MPT_MAPPED) err = -EINVAL; break; case RES_MPT_MAPPED: if (r->com.state != RES_MPT_RESERVED && r->com.state != RES_MPT_HW) err = -EINVAL; break; case RES_MPT_HW: if (r->com.state != RES_MPT_MAPPED) err = -EINVAL; break; default: err = -EINVAL; } if (!err) { r->com.from_state = r->com.state; r->com.to_state = state; r->com.state = RES_MPT_BUSY; if (mpt) *mpt = r; } } spin_unlock_irq(mlx4_tlock(dev)); return err; } static int eq_res_start_move_to(struct mlx4_dev *dev, int slave, int index, enum res_eq_states state, struct res_eq **eq) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct res_eq *r; int err = 0; spin_lock_irq(mlx4_tlock(dev)); r = res_tracker_lookup(&tracker->res_tree[RES_EQ], index); if (!r) err = -ENOENT; else if (r->com.owner != slave) err = -EPERM; else { switch (state) { case RES_EQ_BUSY: err = -EINVAL; break; case RES_EQ_RESERVED: if (r->com.state != RES_EQ_HW) err = -EINVAL; break; case RES_EQ_HW: if (r->com.state != RES_EQ_RESERVED) err = -EINVAL; break; default: err = -EINVAL; } if (!err) { r->com.from_state = r->com.state; r->com.to_state = state; r->com.state = RES_EQ_BUSY; if (eq) *eq = r; } } spin_unlock_irq(mlx4_tlock(dev)); return err; } static int cq_res_start_move_to(struct mlx4_dev *dev, int slave, int cqn, enum res_cq_states state, struct res_cq **cq) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct res_cq *r; int err; spin_lock_irq(mlx4_tlock(dev)); r = res_tracker_lookup(&tracker->res_tree[RES_CQ], cqn); if (!r) err = -ENOENT; else if (r->com.owner != slave) err = -EPERM; else { switch (state) { case RES_CQ_BUSY: err = -EBUSY; break; case RES_CQ_ALLOCATED: if (r->com.state != RES_CQ_HW) err = -EINVAL; else if (atomic_read(&r->ref_count)) err = -EBUSY; else err = 0; break; case RES_CQ_HW: if (r->com.state != RES_CQ_ALLOCATED) err = -EINVAL; else err = 0; break; default: err = -EINVAL; } if (!err) { r->com.from_state = r->com.state; r->com.to_state = state; r->com.state = RES_CQ_BUSY; if (cq) *cq = r; } } spin_unlock_irq(mlx4_tlock(dev)); return err; } static int srq_res_start_move_to(struct mlx4_dev *dev, int slave, int index, enum res_srq_states state, struct res_srq **srq) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct res_srq *r; int err = 0; spin_lock_irq(mlx4_tlock(dev)); r = res_tracker_lookup(&tracker->res_tree[RES_SRQ], index); if (!r) err = -ENOENT; else if (r->com.owner != slave) err = -EPERM; else { switch (state) { case RES_SRQ_BUSY: err = -EINVAL; break; case RES_SRQ_ALLOCATED: if (r->com.state != RES_SRQ_HW) err = -EINVAL; else if (atomic_read(&r->ref_count)) err = -EBUSY; break; case RES_SRQ_HW: if (r->com.state != RES_SRQ_ALLOCATED) err = -EINVAL; break; default: err = -EINVAL; } if (!err) { r->com.from_state = r->com.state; r->com.to_state = state; r->com.state = RES_SRQ_BUSY; if (srq) *srq = r; } } spin_unlock_irq(mlx4_tlock(dev)); return err; } static void res_abort_move(struct mlx4_dev *dev, int slave, enum mlx4_resource type, int id) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct res_common *r; spin_lock_irq(mlx4_tlock(dev)); r = res_tracker_lookup(&tracker->res_tree[type], id); if (r && (r->owner == slave)) r->state = r->from_state; spin_unlock_irq(mlx4_tlock(dev)); } static void res_end_move(struct mlx4_dev *dev, int slave, enum mlx4_resource type, int id) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct res_common *r; spin_lock_irq(mlx4_tlock(dev)); r = res_tracker_lookup(&tracker->res_tree[type], id); if (r && (r->owner == slave)) r->state = r->to_state; spin_unlock_irq(mlx4_tlock(dev)); } static int valid_reserved(struct mlx4_dev *dev, int slave, int qpn) { return mlx4_is_qp_reserved(dev, qpn) && (mlx4_is_master(dev) || mlx4_is_guest_proxy(dev, slave, qpn)); } static int fw_reserved(struct mlx4_dev *dev, int qpn) { return qpn < dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]; } static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int err; int count; int align; int base; int qpn; u8 bf_qp; switch (op) { case RES_OP_RESERVE: count = get_param_l(&in_param) & 0xffffff; bf_qp = get_param_l(&in_param) >> 31; align = get_param_h(&in_param); err = mlx4_grant_resource(dev, slave, RES_QP, count, 0); if (err) return err; err = __mlx4_qp_reserve_range(dev, count, align, &base, bf_qp); if (err) { mlx4_release_resource(dev, slave, RES_QP, count, 0); return err; } err = add_res_range(dev, slave, base, count, RES_QP, 0); if (err) { mlx4_release_resource(dev, slave, RES_QP, count, 0); __mlx4_qp_release_range(dev, base, count); return err; } set_param_l(out_param, base); break; case RES_OP_MAP_ICM: qpn = get_param_l(&in_param) & 0x7fffff; if (valid_reserved(dev, slave, qpn)) { err = add_res_range(dev, slave, qpn, 1, RES_QP, 0); if (err) return err; } err = qp_res_start_move_to(dev, slave, qpn, RES_QP_MAPPED, NULL, 1); if (err) return err; if (!fw_reserved(dev, qpn)) { err = __mlx4_qp_alloc_icm(dev, qpn); if (err) { res_abort_move(dev, slave, RES_QP, qpn); return err; } } res_end_move(dev, slave, RES_QP, qpn); break; default: err = -EINVAL; break; } return err; } static int mtt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int err = -EINVAL; int base; int order; if (op != RES_OP_RESERVE_AND_MAP) return err; order = get_param_l(&in_param); err = mlx4_grant_resource(dev, slave, RES_MTT, 1 << order, 0); if (err) return err; base = __mlx4_alloc_mtt_range(dev, order); if (base == -1) { mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0); return -ENOMEM; } err = add_res_range(dev, slave, base, 1, RES_MTT, order); if (err) { mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0); __mlx4_free_mtt_range(dev, base, order); } else set_param_l(out_param, base); return err; } static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int err = -EINVAL; int index; int id; struct res_mpt *mpt; switch (op) { case RES_OP_RESERVE: err = mlx4_grant_resource(dev, slave, RES_MPT, 1, 0); if (err) break; index = __mlx4_mr_reserve(dev); if (index == -1) { mlx4_release_resource(dev, slave, RES_MPT, 1, 0); break; } id = index & mpt_mask(dev); err = add_res_range(dev, slave, id, 1, RES_MPT, index); if (err) { mlx4_release_resource(dev, slave, RES_MPT, 1, 0); __mlx4_mr_release(dev, index); break; } set_param_l(out_param, index); break; case RES_OP_MAP_ICM: index = get_param_l(&in_param); id = index & mpt_mask(dev); err = mr_res_start_move_to(dev, slave, id, RES_MPT_MAPPED, &mpt); if (err) return err; err = __mlx4_mr_alloc_icm(dev, mpt->key); if (err) { res_abort_move(dev, slave, RES_MPT, id); return err; } res_end_move(dev, slave, RES_MPT, id); break; } return err; } static int cq_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int cqn; int err; switch (op) { case RES_OP_RESERVE_AND_MAP: err = mlx4_grant_resource(dev, slave, RES_CQ, 1, 0); if (err) break; err = __mlx4_cq_alloc_icm(dev, &cqn); if (err) { mlx4_release_resource(dev, slave, RES_CQ, 1, 0); break; } err = add_res_range(dev, slave, cqn, 1, RES_CQ, 0); if (err) { mlx4_release_resource(dev, slave, RES_CQ, 1, 0); __mlx4_cq_free_icm(dev, cqn); break; } set_param_l(out_param, cqn); break; default: err = -EINVAL; } return err; } static int srq_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int srqn; int err; switch (op) { case RES_OP_RESERVE_AND_MAP: err = mlx4_grant_resource(dev, slave, RES_SRQ, 1, 0); if (err) break; err = __mlx4_srq_alloc_icm(dev, &srqn); if (err) { mlx4_release_resource(dev, slave, RES_SRQ, 1, 0); break; } err = add_res_range(dev, slave, srqn, 1, RES_SRQ, 0); if (err) { mlx4_release_resource(dev, slave, RES_SRQ, 1, 0); __mlx4_srq_free_icm(dev, srqn); break; } set_param_l(out_param, srqn); break; default: err = -EINVAL; } return err; } static int mac_find_smac_ix_in_slave(struct mlx4_dev *dev, int slave, int port, u8 smac_index, u64 *mac) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *mac_list = &tracker->slave_list[slave].res_list[RES_MAC]; struct mac_res *res, *tmp; list_for_each_entry_safe(res, tmp, mac_list, list) { if (res->smac_index == smac_index && res->port == (u8) port) { *mac = res->mac; return 0; } } return -ENOENT; } static int mac_add_to_slave(struct mlx4_dev *dev, int slave, u64 mac, int port, u8 smac_index) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *mac_list = &tracker->slave_list[slave].res_list[RES_MAC]; struct mac_res *res, *tmp; list_for_each_entry_safe(res, tmp, mac_list, list) { if (res->mac == mac && res->port == (u8) port) { /* mac found. update ref count */ ++res->ref_count; return 0; } } if (mlx4_grant_resource(dev, slave, RES_MAC, 1, port)) return -EINVAL; res = kzalloc(sizeof *res, GFP_KERNEL); if (!res) { mlx4_release_resource(dev, slave, RES_MAC, 1, port); return -ENOMEM; } res->mac = mac; res->port = (u8) port; res->smac_index = smac_index; res->ref_count = 1; list_add_tail(&res->list, &tracker->slave_list[slave].res_list[RES_MAC]); return 0; } static void mac_del_from_slave(struct mlx4_dev *dev, int slave, u64 mac, int port) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *mac_list = &tracker->slave_list[slave].res_list[RES_MAC]; struct mac_res *res, *tmp; list_for_each_entry_safe(res, tmp, mac_list, list) { if (res->mac == mac && res->port == (u8) port) { if (!--res->ref_count) { list_del(&res->list); mlx4_release_resource(dev, slave, RES_MAC, 1, port); kfree(res); } break; } } } static void rem_slave_macs(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *mac_list = &tracker->slave_list[slave].res_list[RES_MAC]; struct mac_res *res, *tmp; int i; list_for_each_entry_safe(res, tmp, mac_list, list) { list_del(&res->list); /* dereference the mac the num times the slave referenced it */ for (i = 0; i < res->ref_count; i++) __mlx4_unregister_mac(dev, res->port, res->mac); mlx4_release_resource(dev, slave, RES_MAC, 1, res->port); kfree(res); } } static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param, int in_port) { int err = -EINVAL; int port; u64 mac; u8 smac_index = 0; if (op != RES_OP_RESERVE_AND_MAP) return err; port = !in_port ? get_param_l(out_param) : in_port; mac = in_param; err = __mlx4_register_mac(dev, port, mac); if (err >= 0) { smac_index = err; set_param_l(out_param, err); err = 0; } if (!err) { err = mac_add_to_slave(dev, slave, mac, port, smac_index); if (err) __mlx4_unregister_mac(dev, port, mac); } return err; } static int vlan_add_to_slave(struct mlx4_dev *dev, int slave, u16 vlan, int port, int vlan_index) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *vlan_list = &tracker->slave_list[slave].res_list[RES_VLAN]; struct vlan_res *res, *tmp; list_for_each_entry_safe(res, tmp, vlan_list, list) { if (res->vlan == vlan && res->port == (u8) port) { /* vlan found. update ref count */ ++res->ref_count; return 0; } } if (mlx4_grant_resource(dev, slave, RES_VLAN, 1, port)) return -EINVAL; res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) { mlx4_release_resource(dev, slave, RES_VLAN, 1, port); return -ENOMEM; } res->vlan = vlan; res->port = (u8) port; res->vlan_index = vlan_index; res->ref_count = 1; list_add_tail(&res->list, &tracker->slave_list[slave].res_list[RES_VLAN]); return 0; } static void vlan_del_from_slave(struct mlx4_dev *dev, int slave, u16 vlan, int port) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *vlan_list = &tracker->slave_list[slave].res_list[RES_VLAN]; struct vlan_res *res, *tmp; list_for_each_entry_safe(res, tmp, vlan_list, list) { if (res->vlan == vlan && res->port == (u8) port) { if (!--res->ref_count) { list_del(&res->list); mlx4_release_resource(dev, slave, RES_VLAN, 1, port); kfree(res); } break; } } } static void rem_slave_vlans(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *vlan_list = &tracker->slave_list[slave].res_list[RES_VLAN]; struct vlan_res *res, *tmp; int i; list_for_each_entry_safe(res, tmp, vlan_list, list) { list_del(&res->list); /* dereference the vlan the num times the slave referenced it */ for (i = 0; i < res->ref_count; i++) __mlx4_unregister_vlan(dev, res->port, res->vlan); mlx4_release_resource(dev, slave, RES_VLAN, 1, res->port); kfree(res); } } static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param, int port) { int err = -EINVAL; u16 vlan; int vlan_index; if (!port) return err; if (op != RES_OP_RESERVE_AND_MAP) return err; vlan = (u16) in_param; err = __mlx4_register_vlan(dev, port, vlan, &vlan_index); if (!err) { set_param_l(out_param, (u32) vlan_index); err = vlan_add_to_slave(dev, slave, vlan, port, vlan_index); if (err) __mlx4_unregister_vlan(dev, port, vlan); } return err; } static int counter_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { u32 index; int err; if (op != RES_OP_RESERVE) return -EINVAL; err = mlx4_grant_resource(dev, slave, RES_COUNTER, 1, 0); if (err) return err; err = __mlx4_counter_alloc(dev, &index); if (err) { mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); return err; } err = add_res_range(dev, slave, index, 1, RES_COUNTER, 0); if (err) { __mlx4_counter_free(dev, index); mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); } else { set_param_l(out_param, index); } return err; } static int xrcdn_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { u32 xrcdn; int err; if (op != RES_OP_RESERVE) return -EINVAL; err = __mlx4_xrcd_alloc(dev, &xrcdn); if (err) return err; err = add_res_range(dev, slave, xrcdn, 1, RES_XRCD, 0); if (err) __mlx4_xrcd_free(dev, xrcdn); else set_param_l(out_param, xrcdn); return err; } int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int alop = vhcr->op_modifier; switch (vhcr->in_modifier & 0xFF) { case RES_QP: err = qp_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; case RES_MTT: err = mtt_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; case RES_MPT: err = mpt_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; case RES_CQ: err = cq_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; case RES_SRQ: err = srq_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; case RES_MAC: err = mac_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param, (vhcr->in_modifier >> 8) & 0xFF); break; case RES_VLAN: err = vlan_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param, (vhcr->in_modifier >> 8) & 0xFF); break; case RES_COUNTER: err = counter_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; case RES_XRCD: err = xrcdn_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; default: err = -EINVAL; break; } return err; } static int qp_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param) { int err; int count; int base; int qpn; switch (op) { case RES_OP_RESERVE: base = get_param_l(&in_param) & 0x7fffff; count = get_param_h(&in_param); err = rem_res_range(dev, slave, base, count, RES_QP, 0); if (err) break; mlx4_release_resource(dev, slave, RES_QP, count, 0); __mlx4_qp_release_range(dev, base, count); break; case RES_OP_MAP_ICM: qpn = get_param_l(&in_param) & 0x7fffff; err = qp_res_start_move_to(dev, slave, qpn, RES_QP_RESERVED, NULL, 0); if (err) return err; if (!fw_reserved(dev, qpn)) __mlx4_qp_free_icm(dev, qpn); res_end_move(dev, slave, RES_QP, qpn); if (valid_reserved(dev, slave, qpn)) err = rem_res_range(dev, slave, qpn, 1, RES_QP, 0); break; default: err = -EINVAL; break; } return err; } static int mtt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int err = -EINVAL; int base; int order; if (op != RES_OP_RESERVE_AND_MAP) return err; base = get_param_l(&in_param); order = get_param_h(&in_param); err = rem_res_range(dev, slave, base, 1, RES_MTT, order); if (!err) { mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0); __mlx4_free_mtt_range(dev, base, order); } return err; } static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param) { int err = -EINVAL; int index; int id; struct res_mpt *mpt; switch (op) { case RES_OP_RESERVE: index = get_param_l(&in_param); id = index & mpt_mask(dev); err = get_res(dev, slave, id, RES_MPT, &mpt); if (err) break; index = mpt->key; put_res(dev, slave, id, RES_MPT); err = rem_res_range(dev, slave, id, 1, RES_MPT, 0); if (err) break; mlx4_release_resource(dev, slave, RES_MPT, 1, 0); __mlx4_mr_release(dev, index); break; case RES_OP_MAP_ICM: index = get_param_l(&in_param); id = index & mpt_mask(dev); err = mr_res_start_move_to(dev, slave, id, RES_MPT_RESERVED, &mpt); if (err) return err; __mlx4_mr_free_icm(dev, mpt->key); res_end_move(dev, slave, RES_MPT, id); return err; break; default: err = -EINVAL; break; } return err; } static int cq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int cqn; int err; switch (op) { case RES_OP_RESERVE_AND_MAP: cqn = get_param_l(&in_param); err = rem_res_range(dev, slave, cqn, 1, RES_CQ, 0); if (err) break; mlx4_release_resource(dev, slave, RES_CQ, 1, 0); __mlx4_cq_free_icm(dev, cqn); break; default: err = -EINVAL; break; } return err; } static int srq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int srqn; int err; switch (op) { case RES_OP_RESERVE_AND_MAP: srqn = get_param_l(&in_param); err = rem_res_range(dev, slave, srqn, 1, RES_SRQ, 0); if (err) break; mlx4_release_resource(dev, slave, RES_SRQ, 1, 0); __mlx4_srq_free_icm(dev, srqn); break; default: err = -EINVAL; break; } return err; } static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param, int in_port) { int port; int err = 0; switch (op) { case RES_OP_RESERVE_AND_MAP: port = !in_port ? get_param_l(out_param) : in_port; mac_del_from_slave(dev, slave, in_param, port); __mlx4_unregister_mac(dev, port, in_param); break; default: err = -EINVAL; break; } return err; } static int vlan_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param, int port) { int err = 0; switch (op) { case RES_OP_RESERVE_AND_MAP: if (!port) return -EINVAL; vlan_del_from_slave(dev, slave, in_param, port); __mlx4_unregister_vlan(dev, port, in_param); break; default: err = -EINVAL; break; } return err; } static int counter_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int index; int err; if (op != RES_OP_RESERVE) return -EINVAL; index = get_param_l(&in_param); err = rem_res_range(dev, slave, index, 1, RES_COUNTER, 0); if (err) return err; __mlx4_counter_free(dev, index); mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); return err; } static int xrcdn_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int xrcdn; int err; if (op != RES_OP_RESERVE) return -EINVAL; xrcdn = get_param_l(&in_param); err = rem_res_range(dev, slave, xrcdn, 1, RES_XRCD, 0); if (err) return err; __mlx4_xrcd_free(dev, xrcdn); return err; } int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err = -EINVAL; int alop = vhcr->op_modifier; switch (vhcr->in_modifier & 0xFF) { case RES_QP: err = qp_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param); break; case RES_MTT: err = mtt_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; case RES_MPT: err = mpt_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param); break; case RES_CQ: err = cq_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; case RES_SRQ: err = srq_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; case RES_MAC: err = mac_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param, (vhcr->in_modifier >> 8) & 0xFF); break; case RES_VLAN: err = vlan_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param, (vhcr->in_modifier >> 8) & 0xFF); break; case RES_COUNTER: err = counter_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; case RES_XRCD: err = xrcdn_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); default: break; } return err; } /* ugly but other choices are uglier */ static int mr_phys_mpt(struct mlx4_mpt_entry *mpt) { return (be32_to_cpu(mpt->flags) >> 9) & 1; } static int mr_get_mtt_addr(struct mlx4_mpt_entry *mpt) { return (int)be64_to_cpu(mpt->mtt_addr) & 0xfffffff8; } static int mr_get_mtt_size(struct mlx4_mpt_entry *mpt) { return be32_to_cpu(mpt->mtt_sz); } static int qp_get_mtt_addr(struct mlx4_qp_context *qpc) { return be32_to_cpu(qpc->mtt_base_addr_l) & 0xfffffff8; } static int srq_get_mtt_addr(struct mlx4_srq_context *srqc) { return be32_to_cpu(srqc->mtt_base_addr_l) & 0xfffffff8; } static int qp_get_mtt_size(struct mlx4_qp_context *qpc) { int page_shift = (qpc->log_page_size & 0x3f) + 12; int log_sq_size = (qpc->sq_size_stride >> 3) & 0xf; int log_sq_sride = qpc->sq_size_stride & 7; int log_rq_size = (qpc->rq_size_stride >> 3) & 0xf; int log_rq_stride = qpc->rq_size_stride & 7; int srq = (be32_to_cpu(qpc->srqn) >> 24) & 1; int rss = (be32_to_cpu(qpc->flags) >> 13) & 1; int xrc = (be32_to_cpu(qpc->local_qpn) >> 23) & 1; int sq_size; int rq_size; int total_pages; int total_mem; int page_offset = (be32_to_cpu(qpc->params2) >> 6) & 0x3f; sq_size = 1 << (log_sq_size + log_sq_sride + 4); rq_size = (srq|rss|xrc) ? 0 : (1 << (log_rq_size + log_rq_stride + 4)); total_mem = sq_size + rq_size; total_pages = roundup_pow_of_two((total_mem + (page_offset << 6)) >> page_shift); return total_pages; } static int check_mtt_range(struct mlx4_dev *dev, int slave, int start, int size, struct res_mtt *mtt) { int res_start = mtt->com.res_id; int res_size = (1 << mtt->order); if (start < res_start || start + size > res_start + res_size) return -EPERM; return 0; } int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int index = vhcr->in_modifier; struct res_mtt *mtt; struct res_mpt *mpt; int mtt_base = mr_get_mtt_addr(inbox->buf) / dev->caps.mtt_entry_sz; int phys; int id; id = index & mpt_mask(dev); err = mr_res_start_move_to(dev, slave, id, RES_MPT_HW, &mpt); if (err) return err; phys = mr_phys_mpt(inbox->buf); if (!phys) { err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); if (err) goto ex_abort; err = check_mtt_range(dev, slave, mtt_base, mr_get_mtt_size(inbox->buf), mtt); if (err) goto ex_put; mpt->mtt = mtt; } err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_put; if (!phys) { atomic_inc(&mtt->ref_count); put_res(dev, slave, mtt->com.res_id, RES_MTT); } res_end_move(dev, slave, RES_MPT, id); return 0; ex_put: if (!phys) put_res(dev, slave, mtt->com.res_id, RES_MTT); ex_abort: res_abort_move(dev, slave, RES_MPT, id); return err; } int mlx4_HW2SW_MPT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int index = vhcr->in_modifier; struct res_mpt *mpt; int id; id = index & mpt_mask(dev); err = mr_res_start_move_to(dev, slave, id, RES_MPT_MAPPED, &mpt); if (err) return err; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_abort; if (mpt->mtt) atomic_dec(&mpt->mtt->ref_count); res_end_move(dev, slave, RES_MPT, id); return 0; ex_abort: res_abort_move(dev, slave, RES_MPT, id); return err; } int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int index = vhcr->in_modifier; struct res_mpt *mpt; int id; id = index & mpt_mask(dev); err = get_res(dev, slave, id, RES_MPT, &mpt); if (err) return err; if (mpt->com.from_state != RES_MPT_HW) { err = -EBUSY; goto out; } err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); out: put_res(dev, slave, id, RES_MPT); return err; } static int qp_get_rcqn(struct mlx4_qp_context *qpc) { return be32_to_cpu(qpc->cqn_recv) & 0xffffff; } static int qp_get_scqn(struct mlx4_qp_context *qpc) { return be32_to_cpu(qpc->cqn_send) & 0xffffff; } static u32 qp_get_srqn(struct mlx4_qp_context *qpc) { return be32_to_cpu(qpc->srqn) & 0x1ffffff; } static void adjust_proxy_tun_qkey(struct mlx4_dev *dev, struct mlx4_vhcr *vhcr, struct mlx4_qp_context *context) { u32 qpn = vhcr->in_modifier & 0xffffff; u32 qkey = 0; if (mlx4_get_parav_qkey(dev, qpn, &qkey)) return; /* adjust qkey in qp context */ context->qkey = cpu_to_be32(qkey); } int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int qpn = vhcr->in_modifier & 0x7fffff; struct res_mtt *mtt; struct res_qp *qp; struct mlx4_qp_context *qpc = inbox->buf + 8; int mtt_base = qp_get_mtt_addr(qpc) / dev->caps.mtt_entry_sz; int mtt_size = qp_get_mtt_size(qpc); struct res_cq *rcq; struct res_cq *scq; int rcqn = qp_get_rcqn(qpc); int scqn = qp_get_scqn(qpc); u32 srqn = qp_get_srqn(qpc) & 0xffffff; int use_srq = (qp_get_srqn(qpc) >> 24) & 1; struct res_srq *srq; int local_qpn = be32_to_cpu(qpc->local_qpn) & 0xffffff; err = qp_res_start_move_to(dev, slave, qpn, RES_QP_HW, &qp, 0); if (err) return err; qp->local_qpn = local_qpn; err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); if (err) goto ex_abort; err = check_mtt_range(dev, slave, mtt_base, mtt_size, mtt); if (err) goto ex_put_mtt; err = get_res(dev, slave, rcqn, RES_CQ, &rcq); if (err) goto ex_put_mtt; if (scqn != rcqn) { err = get_res(dev, slave, scqn, RES_CQ, &scq); if (err) goto ex_put_rcq; } else scq = rcq; if (use_srq) { err = get_res(dev, slave, srqn, RES_SRQ, &srq); if (err) goto ex_put_scq; } adjust_proxy_tun_qkey(dev, vhcr, qpc); update_pkey_index(dev, slave, inbox); err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_put_srq; atomic_inc(&mtt->ref_count); qp->mtt = mtt; atomic_inc(&rcq->ref_count); qp->rcq = rcq; atomic_inc(&scq->ref_count); qp->scq = scq; if (scqn != rcqn) put_res(dev, slave, scqn, RES_CQ); if (use_srq) { atomic_inc(&srq->ref_count); put_res(dev, slave, srqn, RES_SRQ); qp->srq = srq; } put_res(dev, slave, rcqn, RES_CQ); put_res(dev, slave, mtt_base, RES_MTT); res_end_move(dev, slave, RES_QP, qpn); return 0; ex_put_srq: if (use_srq) put_res(dev, slave, srqn, RES_SRQ); ex_put_scq: if (scqn != rcqn) put_res(dev, slave, scqn, RES_CQ); ex_put_rcq: put_res(dev, slave, rcqn, RES_CQ); ex_put_mtt: put_res(dev, slave, mtt_base, RES_MTT); ex_abort: res_abort_move(dev, slave, RES_QP, qpn); return err; } static int eq_get_mtt_addr(struct mlx4_eq_context *eqc) { return be32_to_cpu(eqc->mtt_base_addr_l) & 0xfffffff8; } static int eq_get_mtt_size(struct mlx4_eq_context *eqc) { int log_eq_size = eqc->log_eq_size & 0x1f; int page_shift = (eqc->log_page_size & 0x3f) + 12; if (log_eq_size + 5 < page_shift) return 1; return 1 << (log_eq_size + 5 - page_shift); } static int cq_get_mtt_addr(struct mlx4_cq_context *cqc) { return be32_to_cpu(cqc->mtt_base_addr_l) & 0xfffffff8; } static int cq_get_mtt_size(struct mlx4_cq_context *cqc) { int log_cq_size = (be32_to_cpu(cqc->logsize_usrpage) >> 24) & 0x1f; int page_shift = (cqc->log_page_size & 0x3f) + 12; if (log_cq_size + 5 < page_shift) return 1; return 1 << (log_cq_size + 5 - page_shift); } int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int eqn = vhcr->in_modifier; int res_id = (slave << 8) | eqn; struct mlx4_eq_context *eqc = inbox->buf; int mtt_base = eq_get_mtt_addr(eqc) / dev->caps.mtt_entry_sz; int mtt_size = eq_get_mtt_size(eqc); struct res_eq *eq; struct res_mtt *mtt; err = add_res_range(dev, slave, res_id, 1, RES_EQ, 0); if (err) return err; err = eq_res_start_move_to(dev, slave, res_id, RES_EQ_HW, &eq); if (err) goto out_add; err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); if (err) goto out_move; err = check_mtt_range(dev, slave, mtt_base, mtt_size, mtt); if (err) goto out_put; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto out_put; atomic_inc(&mtt->ref_count); eq->mtt = mtt; put_res(dev, slave, mtt->com.res_id, RES_MTT); res_end_move(dev, slave, RES_EQ, res_id); return 0; out_put: put_res(dev, slave, mtt->com.res_id, RES_MTT); out_move: res_abort_move(dev, slave, RES_EQ, res_id); out_add: rem_res_range(dev, slave, res_id, 1, RES_EQ, 0); return err; } static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start, int len, struct res_mtt **res) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct res_mtt *mtt; int err = -EINVAL; spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry(mtt, &tracker->slave_list[slave].res_list[RES_MTT], com.list) { if (!check_mtt_range(dev, slave, start, len, mtt)) { *res = mtt; mtt->com.from_state = mtt->com.state; mtt->com.state = RES_MTT_BUSY; err = 0; break; } } spin_unlock_irq(mlx4_tlock(dev)); return err; } static int verify_qp_parameters(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, enum qp_transition transition, u8 slave) { u32 qp_type; struct mlx4_qp_context *qp_ctx; enum mlx4_qp_optpar optpar; int port; int num_gids; qp_ctx = inbox->buf + 8; qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; optpar = be32_to_cpu(*(__be32 *) inbox->buf); switch (qp_type) { case MLX4_QP_ST_RC: case MLX4_QP_ST_UC: switch (transition) { case QP_TRANS_INIT2RTR: case QP_TRANS_RTR2RTS: case QP_TRANS_RTS2RTS: case QP_TRANS_SQD2SQD: case QP_TRANS_SQD2RTS: if (slave != mlx4_master_func_num(dev)) if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) { port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) num_gids = mlx4_get_slave_num_gids(dev, slave); else num_gids = 1; if (qp_ctx->pri_path.mgid_index >= num_gids) return -EINVAL; } if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1; if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) num_gids = mlx4_get_slave_num_gids(dev, slave); else num_gids = 1; if (qp_ctx->alt_path.mgid_index >= num_gids) return -EINVAL; } break; default: break; } break; default: break; } return 0; } int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_mtt mtt; __be64 *page_list = inbox->buf; u64 *pg_list = (u64 *)page_list; int i; struct res_mtt *rmtt = NULL; int start = be64_to_cpu(page_list[0]); int npages = vhcr->in_modifier; int err; err = get_containing_mtt(dev, slave, start, npages, &rmtt); if (err) return err; /* Call the SW implementation of write_mtt: * - Prepare a dummy mtt struct * - Translate inbox contents to simple addresses in host endianess */ mtt.offset = 0; /* TBD this is broken but I don't handle it since we don't really use it */ mtt.order = 0; mtt.page_shift = 0; for (i = 0; i < npages; ++i) pg_list[i + 2] = (be64_to_cpu(page_list[i + 2]) & ~1ULL); err = __mlx4_write_mtt(dev, &mtt, be64_to_cpu(page_list[0]), npages, ((u64 *)page_list + 2)); if (rmtt) put_res(dev, slave, rmtt->com.res_id, RES_MTT); return err; } int mlx4_HW2SW_EQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int eqn = vhcr->in_modifier; int res_id = eqn | (slave << 8); struct res_eq *eq; int err; err = eq_res_start_move_to(dev, slave, res_id, RES_EQ_RESERVED, &eq); if (err) return err; err = get_res(dev, slave, eq->mtt->com.res_id, RES_MTT, NULL); if (err) goto ex_abort; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_put; atomic_dec(&eq->mtt->ref_count); put_res(dev, slave, eq->mtt->com.res_id, RES_MTT); res_end_move(dev, slave, RES_EQ, res_id); rem_res_range(dev, slave, res_id, 1, RES_EQ, 0); return 0; ex_put: put_res(dev, slave, eq->mtt->com.res_id, RES_MTT); ex_abort: res_abort_move(dev, slave, RES_EQ, res_id); return err; } int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_event_eq_info *event_eq; struct mlx4_cmd_mailbox *mailbox; u32 in_modifier = 0; int err; int res_id; struct res_eq *req; if (!priv->mfunc.master.slave_state) return -EINVAL; event_eq = &priv->mfunc.master.slave_state[slave].event_eq[eqe->type]; /* Create the event only if the slave is registered */ if (event_eq->eqn < 0) return 0; mutex_lock(&priv->mfunc.master.gen_eqe_mutex[slave]); res_id = (slave << 8) | event_eq->eqn; err = get_res(dev, slave, res_id, RES_EQ, &req); if (err) goto unlock; if (req->com.from_state != RES_EQ_HW) { err = -EINVAL; goto put; } mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { err = PTR_ERR(mailbox); goto put; } if (eqe->type == MLX4_EVENT_TYPE_CMD) { ++event_eq->token; eqe->event.cmd.token = cpu_to_be16(event_eq->token); } memcpy(mailbox->buf, (u8 *) eqe, 28); in_modifier = (slave & 0xff) | ((event_eq->eqn & 0xff) << 16); err = mlx4_cmd(dev, mailbox->dma, in_modifier, 0, MLX4_CMD_GEN_EQE, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); put_res(dev, slave, res_id, RES_EQ); mutex_unlock(&priv->mfunc.master.gen_eqe_mutex[slave]); mlx4_free_cmd_mailbox(dev, mailbox); return err; put: put_res(dev, slave, res_id, RES_EQ); unlock: mutex_unlock(&priv->mfunc.master.gen_eqe_mutex[slave]); return err; } int mlx4_QUERY_EQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int eqn = vhcr->in_modifier; int res_id = eqn | (slave << 8); struct res_eq *eq; int err; err = get_res(dev, slave, res_id, RES_EQ, &eq); if (err) return err; if (eq->com.from_state != RES_EQ_HW) { err = -EINVAL; goto ex_put; } err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); ex_put: put_res(dev, slave, res_id, RES_EQ); return err; } int mlx4_SW2HW_CQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int cqn = vhcr->in_modifier; struct mlx4_cq_context *cqc = inbox->buf; int mtt_base = cq_get_mtt_addr(cqc) / dev->caps.mtt_entry_sz; struct res_cq *cq; struct res_mtt *mtt; err = cq_res_start_move_to(dev, slave, cqn, RES_CQ_HW, &cq); if (err) return err; err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); if (err) goto out_move; err = check_mtt_range(dev, slave, mtt_base, cq_get_mtt_size(cqc), mtt); if (err) goto out_put; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto out_put; atomic_inc(&mtt->ref_count); cq->mtt = mtt; put_res(dev, slave, mtt->com.res_id, RES_MTT); res_end_move(dev, slave, RES_CQ, cqn); return 0; out_put: put_res(dev, slave, mtt->com.res_id, RES_MTT); out_move: res_abort_move(dev, slave, RES_CQ, cqn); return err; } int mlx4_HW2SW_CQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int cqn = vhcr->in_modifier; struct res_cq *cq; err = cq_res_start_move_to(dev, slave, cqn, RES_CQ_ALLOCATED, &cq); if (err) return err; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto out_move; atomic_dec(&cq->mtt->ref_count); res_end_move(dev, slave, RES_CQ, cqn); return 0; out_move: res_abort_move(dev, slave, RES_CQ, cqn); return err; } int mlx4_QUERY_CQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int cqn = vhcr->in_modifier; struct res_cq *cq; int err; err = get_res(dev, slave, cqn, RES_CQ, &cq); if (err) return err; if (cq->com.from_state != RES_CQ_HW) goto ex_put; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); ex_put: put_res(dev, slave, cqn, RES_CQ); return err; } static int handle_resize(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd, struct res_cq *cq) { int err; struct res_mtt *orig_mtt; struct res_mtt *mtt; struct mlx4_cq_context *cqc = inbox->buf; int mtt_base = cq_get_mtt_addr(cqc) / dev->caps.mtt_entry_sz; err = get_res(dev, slave, cq->mtt->com.res_id, RES_MTT, &orig_mtt); if (err) return err; if (orig_mtt != cq->mtt) { err = -EINVAL; goto ex_put; } err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); if (err) goto ex_put; err = check_mtt_range(dev, slave, mtt_base, cq_get_mtt_size(cqc), mtt); if (err) goto ex_put1; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_put1; atomic_dec(&orig_mtt->ref_count); put_res(dev, slave, orig_mtt->com.res_id, RES_MTT); atomic_inc(&mtt->ref_count); cq->mtt = mtt; put_res(dev, slave, mtt->com.res_id, RES_MTT); return 0; ex_put1: put_res(dev, slave, mtt->com.res_id, RES_MTT); ex_put: put_res(dev, slave, orig_mtt->com.res_id, RES_MTT); return err; } int mlx4_MODIFY_CQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int cqn = vhcr->in_modifier; struct res_cq *cq; int err; err = get_res(dev, slave, cqn, RES_CQ, &cq); if (err) return err; if (cq->com.from_state != RES_CQ_HW) goto ex_put; if (vhcr->op_modifier == 0) { err = handle_resize(dev, slave, vhcr, inbox, outbox, cmd, cq); goto ex_put; } err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); ex_put: put_res(dev, slave, cqn, RES_CQ); return err; } static int srq_get_mtt_size(struct mlx4_srq_context *srqc) { int log_srq_size = (be32_to_cpu(srqc->state_logsize_srqn) >> 24) & 0xf; int log_rq_stride = srqc->logstride & 7; int page_shift = (srqc->log_page_size & 0x3f) + 12; if (log_srq_size + log_rq_stride + 4 < page_shift) return 1; return 1 << (log_srq_size + log_rq_stride + 4 - page_shift); } int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int srqn = vhcr->in_modifier; struct res_mtt *mtt; struct res_srq *srq; struct mlx4_srq_context *srqc = inbox->buf; int mtt_base = srq_get_mtt_addr(srqc) / dev->caps.mtt_entry_sz; if (srqn != (be32_to_cpu(srqc->state_logsize_srqn) & 0xffffff)) return -EINVAL; err = srq_res_start_move_to(dev, slave, srqn, RES_SRQ_HW, &srq); if (err) return err; err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); if (err) goto ex_abort; err = check_mtt_range(dev, slave, mtt_base, srq_get_mtt_size(srqc), mtt); if (err) goto ex_put_mtt; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_put_mtt; atomic_inc(&mtt->ref_count); srq->mtt = mtt; put_res(dev, slave, mtt->com.res_id, RES_MTT); res_end_move(dev, slave, RES_SRQ, srqn); return 0; ex_put_mtt: put_res(dev, slave, mtt->com.res_id, RES_MTT); ex_abort: res_abort_move(dev, slave, RES_SRQ, srqn); return err; } int mlx4_HW2SW_SRQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int srqn = vhcr->in_modifier; struct res_srq *srq; err = srq_res_start_move_to(dev, slave, srqn, RES_SRQ_ALLOCATED, &srq); if (err) return err; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_abort; atomic_dec(&srq->mtt->ref_count); if (srq->cq) atomic_dec(&srq->cq->ref_count); res_end_move(dev, slave, RES_SRQ, srqn); return 0; ex_abort: res_abort_move(dev, slave, RES_SRQ, srqn); return err; } int mlx4_QUERY_SRQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int srqn = vhcr->in_modifier; struct res_srq *srq; err = get_res(dev, slave, srqn, RES_SRQ, &srq); if (err) return err; if (srq->com.from_state != RES_SRQ_HW) { err = -EBUSY; goto out; } err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); out: put_res(dev, slave, srqn, RES_SRQ); return err; } int mlx4_ARM_SRQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int srqn = vhcr->in_modifier; struct res_srq *srq; err = get_res(dev, slave, srqn, RES_SRQ, &srq); if (err) return err; if (srq->com.from_state != RES_SRQ_HW) { err = -EBUSY; goto out; } err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); out: put_res(dev, slave, srqn, RES_SRQ); return err; } int mlx4_GEN_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int qpn = vhcr->in_modifier & 0x7fffff; struct res_qp *qp; err = get_res(dev, slave, qpn, RES_QP, &qp); if (err) return err; if (qp->com.from_state != RES_QP_HW) { err = -EBUSY; goto out; } err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); out: put_res(dev, slave, qpn, RES_QP); return err; } int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_qp_context *context = inbox->buf + 8; adjust_proxy_tun_qkey(dev, vhcr, context); update_pkey_index(dev, slave, inbox); return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } static int roce_verify_mac(struct mlx4_dev *dev, int slave, struct mlx4_qp_context *qpc, struct mlx4_cmd_mailbox *inbox) { u64 mac; int port; u32 ts = (be32_to_cpu(qpc->flags) >> 16) & 0xff; u8 sched = *(u8 *)(inbox->buf + 64); u8 smac_ix; port = (sched >> 6 & 1) + 1; if (mlx4_is_eth(dev, port) && (ts != MLX4_QP_ST_MLX)) { smac_ix = qpc->pri_path.grh_mylmc & 0x7f; if (mac_find_smac_ix_in_slave(dev, slave, port, smac_ix, &mac)) return -ENOENT; } return 0; } int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; struct mlx4_qp_context *qpc = inbox->buf + 8; err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave); if (err) return err; if (roce_verify_mac(dev, slave, qpc, inbox)) return -EINVAL; update_pkey_index(dev, slave, inbox); update_gid(dev, inbox, (u8)slave); adjust_proxy_tun_qkey(dev, vhcr, qpc); err = update_vport_qp_param(dev, inbox, slave); if (err) return err; return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; struct mlx4_qp_context *context = inbox->buf + 8; err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave); if (err) return err; update_pkey_index(dev, slave, inbox); update_gid(dev, inbox, (u8)slave); adjust_proxy_tun_qkey(dev, vhcr, context); return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; struct mlx4_qp_context *context = inbox->buf + 8; err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave); if (err) return err; update_pkey_index(dev, slave, inbox); update_gid(dev, inbox, (u8)slave); adjust_proxy_tun_qkey(dev, vhcr, context); return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_qp_context *context = inbox->buf + 8; adjust_proxy_tun_qkey(dev, vhcr, context); return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; struct mlx4_qp_context *context = inbox->buf + 8; err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave); if (err) return err; adjust_proxy_tun_qkey(dev, vhcr, context); update_gid(dev, inbox, (u8)slave); update_pkey_index(dev, slave, inbox); return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; struct mlx4_qp_context *context = inbox->buf + 8; err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave); if (err) return err; adjust_proxy_tun_qkey(dev, vhcr, context); update_gid(dev, inbox, (u8)slave); update_pkey_index(dev, slave, inbox); return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int qpn = vhcr->in_modifier & 0x7fffff; struct res_qp *qp; err = qp_res_start_move_to(dev, slave, qpn, RES_QP_MAPPED, &qp, 0); if (err) return err; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_abort; atomic_dec(&qp->mtt->ref_count); atomic_dec(&qp->rcq->ref_count); atomic_dec(&qp->scq->ref_count); if (qp->srq) atomic_dec(&qp->srq->ref_count); res_end_move(dev, slave, RES_QP, qpn); return 0; ex_abort: res_abort_move(dev, slave, RES_QP, qpn); return err; } static struct res_gid *find_gid(struct mlx4_dev *dev, int slave, struct res_qp *rqp, u8 *gid) { struct res_gid *res; list_for_each_entry(res, &rqp->mcg_list, list) { if (!memcmp(res->gid, gid, 16)) return res; } return NULL; } static int add_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, u8 *gid, enum mlx4_protocol prot, enum mlx4_steer_type steer) { struct res_gid *res; int err; res = kzalloc(sizeof *res, GFP_KERNEL); if (!res) return -ENOMEM; spin_lock_irq(&rqp->mcg_spl); if (find_gid(dev, slave, rqp, gid)) { kfree(res); err = -EEXIST; } else { memcpy(res->gid, gid, 16); res->prot = prot; res->steer = steer; list_add_tail(&res->list, &rqp->mcg_list); err = 0; } spin_unlock_irq(&rqp->mcg_spl); return err; } static int rem_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, u8 *gid, enum mlx4_protocol prot, enum mlx4_steer_type steer) { struct res_gid *res; int err; spin_lock_irq(&rqp->mcg_spl); res = find_gid(dev, slave, rqp, gid); if (!res || res->prot != prot || res->steer != steer) err = -EINVAL; else { list_del(&res->list); kfree(res); err = 0; } spin_unlock_irq(&rqp->mcg_spl); return err; } int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_qp qp; /* dummy for calling attach/detach */ u8 *gid = inbox->buf; enum mlx4_protocol prot = (vhcr->in_modifier >> 28) & 0x7; int err; int qpn; struct res_qp *rqp; int attach = vhcr->op_modifier; int block_loopback = vhcr->in_modifier >> 31; u8 steer_type_mask = 2; enum mlx4_steer_type type = (gid[7] & steer_type_mask) >> 1; qpn = vhcr->in_modifier & 0xffffff; err = get_res(dev, slave, qpn, RES_QP, &rqp); if (err) return err; qp.qpn = qpn; if (attach) { err = add_mcg_res(dev, slave, rqp, gid, prot, type); if (err) goto ex_put; err = mlx4_qp_attach_common(dev, &qp, gid, block_loopback, prot, type); if (err) goto ex_rem; } else { err = rem_mcg_res(dev, slave, rqp, gid, prot, type); if (err) goto ex_put; err = mlx4_qp_detach_common(dev, &qp, gid, prot, type); } put_res(dev, slave, qpn, RES_QP); return 0; ex_rem: /* ignore error return below, already in error */ (void) rem_mcg_res(dev, slave, rqp, gid, prot, type); ex_put: put_res(dev, slave, qpn, RES_QP); return err; } /* * MAC validation for Flow Steering rules. * VF can attach rules only with a mac address which is assigned to it. */ static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header, struct list_head *rlist) { struct mac_res *res, *tmp; __be64 be_mac; /* make sure it isn't multicast or broadcast mac*/ if (!is_multicast_ether_addr(eth_header->eth.dst_mac) && !is_broadcast_ether_addr(eth_header->eth.dst_mac)) { list_for_each_entry_safe(res, tmp, rlist, list) { be_mac = cpu_to_be64(res->mac << 16); if (!memcmp(&be_mac, eth_header->eth.dst_mac, ETH_ALEN)) return 0; } pr_err("MAC %pM doesn't belong to VF %d, Steering rule rejected\n", eth_header->eth.dst_mac, slave); return -EINVAL; } return 0; } /* * In case of missing eth header, append eth header with a MAC address * assigned to the VF. */ static int add_eth_header(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *inbox, struct list_head *rlist, int header_id) { struct mac_res *res, *tmp; u8 port; struct mlx4_net_trans_rule_hw_ctrl *ctrl; struct mlx4_net_trans_rule_hw_eth *eth_header; struct mlx4_net_trans_rule_hw_ipv4 *ip_header; struct mlx4_net_trans_rule_hw_tcp_udp *l4_header; __be64 be_mac = 0; __be64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16); ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf; port = ctrl->port; eth_header = (struct mlx4_net_trans_rule_hw_eth *)(ctrl + 1); /* Clear a space in the inbox for eth header */ switch (header_id) { case MLX4_NET_TRANS_RULE_ID_IPV4: ip_header = (struct mlx4_net_trans_rule_hw_ipv4 *)(eth_header + 1); memmove(ip_header, eth_header, sizeof(*ip_header) + sizeof(*l4_header)); break; case MLX4_NET_TRANS_RULE_ID_TCP: case MLX4_NET_TRANS_RULE_ID_UDP: l4_header = (struct mlx4_net_trans_rule_hw_tcp_udp *) (eth_header + 1); memmove(l4_header, eth_header, sizeof(*l4_header)); break; default: return -EINVAL; } list_for_each_entry_safe(res, tmp, rlist, list) { if (port == res->port) { be_mac = cpu_to_be64(res->mac << 16); break; } } if (!be_mac) { pr_err("Failed adding eth header to FS rule, Can't find matching MAC for port %d .\n", port); return -EINVAL; } memset(eth_header, 0, sizeof(*eth_header)); eth_header->size = sizeof(*eth_header) >> 2; eth_header->id = cpu_to_be16(__sw_id_hw[MLX4_NET_TRANS_RULE_ID_ETH]); memcpy(eth_header->dst_mac, &be_mac, ETH_ALEN); memcpy(eth_header->dst_mac_msk, &mac_msk, ETH_ALEN); return 0; } int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *rlist = &tracker->slave_list[slave].res_list[RES_MAC]; int err; struct mlx4_net_trans_rule_hw_ctrl *ctrl; struct _rule_hw *rule_header; int header_id; if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) return -EOPNOTSUPP; ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf; rule_header = (struct _rule_hw *)(ctrl + 1); header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id)); switch (header_id) { case MLX4_NET_TRANS_RULE_ID_ETH: if (validate_eth_header_mac(slave, rule_header, rlist)) return -EINVAL; break; case MLX4_NET_TRANS_RULE_ID_IB: break; case MLX4_NET_TRANS_RULE_ID_IPV4: case MLX4_NET_TRANS_RULE_ID_TCP: case MLX4_NET_TRANS_RULE_ID_UDP: pr_warn("Can't attach FS rule without L2 headers, adding L2 header.\n"); if (add_eth_header(dev, slave, inbox, rlist, header_id)) return -EINVAL; vhcr->in_modifier += sizeof(struct mlx4_net_trans_rule_hw_eth) >> 2; break; default: pr_err("Corrupted mailbox.\n"); return -EINVAL; } err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, vhcr->in_modifier, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, 0); if (err) { mlx4_err(dev, "Fail to add flow steering resources.\n "); /* detach rule*/ mlx4_cmd(dev, vhcr->out_param, 0, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } return err; } int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) return -EOPNOTSUPP; err = rem_res_range(dev, slave, vhcr->in_param, 1, RES_FS_RULE, 0); if (err) { mlx4_err(dev, "Fail to remove flow steering resources.\n "); return err; } err = mlx4_cmd(dev, vhcr->in_param, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); return err; } enum { BUSY_MAX_RETRIES = 10 }; int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int index = vhcr->in_modifier & 0xffff; err = get_res(dev, slave, index, RES_COUNTER, NULL); if (err) return err; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); put_res(dev, slave, index, RES_COUNTER); return err; } static void detach_qp(struct mlx4_dev *dev, int slave, struct res_qp *rqp) { struct res_gid *rgid; struct res_gid *tmp; struct mlx4_qp qp; /* dummy for calling attach/detach */ list_for_each_entry_safe(rgid, tmp, &rqp->mcg_list, list) { qp.qpn = rqp->local_qpn; (void) mlx4_qp_detach_common(dev, &qp, rgid->gid, rgid->prot, rgid->steer); list_del(&rgid->list); kfree(rgid); } } static int _move_all_busy(struct mlx4_dev *dev, int slave, enum mlx4_resource type, int print) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *rlist = &tracker->slave_list[slave].res_list[type]; struct res_common *r; struct res_common *tmp; int busy; busy = 0; spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(r, tmp, rlist, list) { if (r->owner == slave) { if (!r->removing) { if (r->state == RES_ANY_BUSY) { if (print) mlx4_dbg(dev, "%s id 0x%llx is busy\n", ResourceType(type), - r->res_id); + (long long)r->res_id); ++busy; } else { r->from_state = r->state; r->state = RES_ANY_BUSY; r->removing = 1; } } } } spin_unlock_irq(mlx4_tlock(dev)); return busy; } static int move_all_busy(struct mlx4_dev *dev, int slave, enum mlx4_resource type) { unsigned long begin; int busy; begin = jiffies; do { busy = _move_all_busy(dev, slave, type, 0); if (time_after(jiffies, begin + 5 * HZ)) break; if (busy) cond_resched(); } while (busy); if (busy) busy = _move_all_busy(dev, slave, type, 1); return busy; } static void rem_slave_qps(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *qp_list = &tracker->slave_list[slave].res_list[RES_QP]; struct res_qp *qp; struct res_qp *tmp; int state; u64 in_param; int qpn; int err; err = move_all_busy(dev, slave, RES_QP); if (err) mlx4_warn(dev, "rem_slave_qps: Could not move all qps to busy" "for slave %d\n", slave); spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(qp, tmp, qp_list, com.list) { spin_unlock_irq(mlx4_tlock(dev)); if (qp->com.owner == slave) { qpn = qp->com.res_id; detach_qp(dev, slave, qp); state = qp->com.from_state; while (state != 0) { switch (state) { case RES_QP_RESERVED: spin_lock_irq(mlx4_tlock(dev)); rb_erase(&qp->com.node, &tracker->res_tree[RES_QP]); list_del(&qp->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(qp); state = 0; break; case RES_QP_MAPPED: if (!valid_reserved(dev, slave, qpn)) __mlx4_qp_free_icm(dev, qpn); state = RES_QP_RESERVED; break; case RES_QP_HW: in_param = slave; err = mlx4_cmd(dev, in_param, qp->local_qpn, 2, MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) mlx4_dbg(dev, "rem_slave_qps: failed" " to move slave %d qpn %d to" " reset\n", slave, qp->local_qpn); atomic_dec(&qp->rcq->ref_count); atomic_dec(&qp->scq->ref_count); atomic_dec(&qp->mtt->ref_count); if (qp->srq) atomic_dec(&qp->srq->ref_count); state = RES_QP_MAPPED; break; default: state = 0; } } } spin_lock_irq(mlx4_tlock(dev)); } spin_unlock_irq(mlx4_tlock(dev)); } static void rem_slave_srqs(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *srq_list = &tracker->slave_list[slave].res_list[RES_SRQ]; struct res_srq *srq; struct res_srq *tmp; int state; u64 in_param; LIST_HEAD(tlist); int srqn; int err; err = move_all_busy(dev, slave, RES_SRQ); if (err) mlx4_warn(dev, "rem_slave_srqs: Could not move all srqs to " "busy for slave %d\n", slave); spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(srq, tmp, srq_list, com.list) { spin_unlock_irq(mlx4_tlock(dev)); if (srq->com.owner == slave) { srqn = srq->com.res_id; state = srq->com.from_state; while (state != 0) { switch (state) { case RES_SRQ_ALLOCATED: __mlx4_srq_free_icm(dev, srqn); spin_lock_irq(mlx4_tlock(dev)); rb_erase(&srq->com.node, &tracker->res_tree[RES_SRQ]); list_del(&srq->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(srq); state = 0; break; case RES_SRQ_HW: in_param = slave; err = mlx4_cmd(dev, in_param, srqn, 1, MLX4_CMD_HW2SW_SRQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) mlx4_dbg(dev, "rem_slave_srqs: failed" " to move slave %d srq %d to" " SW ownership\n", slave, srqn); atomic_dec(&srq->mtt->ref_count); if (srq->cq) atomic_dec(&srq->cq->ref_count); state = RES_SRQ_ALLOCATED; break; default: state = 0; } } } spin_lock_irq(mlx4_tlock(dev)); } spin_unlock_irq(mlx4_tlock(dev)); } static void rem_slave_cqs(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *cq_list = &tracker->slave_list[slave].res_list[RES_CQ]; struct res_cq *cq; struct res_cq *tmp; int state; u64 in_param; LIST_HEAD(tlist); int cqn; int err; err = move_all_busy(dev, slave, RES_CQ); if (err) mlx4_warn(dev, "rem_slave_cqs: Could not move all cqs to " "busy for slave %d\n", slave); spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(cq, tmp, cq_list, com.list) { spin_unlock_irq(mlx4_tlock(dev)); if (cq->com.owner == slave && !atomic_read(&cq->ref_count)) { cqn = cq->com.res_id; state = cq->com.from_state; while (state != 0) { switch (state) { case RES_CQ_ALLOCATED: __mlx4_cq_free_icm(dev, cqn); spin_lock_irq(mlx4_tlock(dev)); rb_erase(&cq->com.node, &tracker->res_tree[RES_CQ]); list_del(&cq->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(cq); state = 0; break; case RES_CQ_HW: in_param = slave; err = mlx4_cmd(dev, in_param, cqn, 1, MLX4_CMD_HW2SW_CQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) mlx4_dbg(dev, "rem_slave_cqs: failed" " to move slave %d cq %d to" " SW ownership\n", slave, cqn); atomic_dec(&cq->mtt->ref_count); state = RES_CQ_ALLOCATED; break; default: state = 0; } } } spin_lock_irq(mlx4_tlock(dev)); } spin_unlock_irq(mlx4_tlock(dev)); } static void rem_slave_mrs(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *mpt_list = &tracker->slave_list[slave].res_list[RES_MPT]; struct res_mpt *mpt; struct res_mpt *tmp; int state; u64 in_param; LIST_HEAD(tlist); int mptn; int err; err = move_all_busy(dev, slave, RES_MPT); if (err) mlx4_warn(dev, "rem_slave_mrs: Could not move all mpts to " "busy for slave %d\n", slave); spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(mpt, tmp, mpt_list, com.list) { spin_unlock_irq(mlx4_tlock(dev)); if (mpt->com.owner == slave) { mptn = mpt->com.res_id; state = mpt->com.from_state; while (state != 0) { switch (state) { case RES_MPT_RESERVED: __mlx4_mr_release(dev, mpt->key); spin_lock_irq(mlx4_tlock(dev)); rb_erase(&mpt->com.node, &tracker->res_tree[RES_MPT]); list_del(&mpt->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(mpt); state = 0; break; case RES_MPT_MAPPED: __mlx4_mr_free_icm(dev, mpt->key); state = RES_MPT_RESERVED; break; case RES_MPT_HW: in_param = slave; err = mlx4_cmd(dev, in_param, mptn, 0, MLX4_CMD_HW2SW_MPT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) mlx4_dbg(dev, "rem_slave_mrs: failed" " to move slave %d mpt %d to" " SW ownership\n", slave, mptn); if (mpt->mtt) atomic_dec(&mpt->mtt->ref_count); state = RES_MPT_MAPPED; break; default: state = 0; } } } spin_lock_irq(mlx4_tlock(dev)); } spin_unlock_irq(mlx4_tlock(dev)); } static void rem_slave_mtts(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *mtt_list = &tracker->slave_list[slave].res_list[RES_MTT]; struct res_mtt *mtt; struct res_mtt *tmp; int state; LIST_HEAD(tlist); int base; int err; err = move_all_busy(dev, slave, RES_MTT); if (err) mlx4_warn(dev, "rem_slave_mtts: Could not move all mtts to " "busy for slave %d\n", slave); spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(mtt, tmp, mtt_list, com.list) { spin_unlock_irq(mlx4_tlock(dev)); if (mtt->com.owner == slave) { base = mtt->com.res_id; state = mtt->com.from_state; while (state != 0) { switch (state) { case RES_MTT_ALLOCATED: __mlx4_free_mtt_range(dev, base, mtt->order); spin_lock_irq(mlx4_tlock(dev)); rb_erase(&mtt->com.node, &tracker->res_tree[RES_MTT]); list_del(&mtt->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(mtt); state = 0; break; default: state = 0; } } } spin_lock_irq(mlx4_tlock(dev)); } spin_unlock_irq(mlx4_tlock(dev)); } static void rem_slave_fs_rule(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *fs_rule_list = &tracker->slave_list[slave].res_list[RES_FS_RULE]; struct res_fs_rule *fs_rule; struct res_fs_rule *tmp; int state; u64 base; int err; err = move_all_busy(dev, slave, RES_FS_RULE); if (err) mlx4_warn(dev, "rem_slave_fs_rule: Could not move all mtts to busy for slave %d\n", slave); spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(fs_rule, tmp, fs_rule_list, com.list) { spin_unlock_irq(mlx4_tlock(dev)); if (fs_rule->com.owner == slave) { base = fs_rule->com.res_id; state = fs_rule->com.from_state; while (state != 0) { switch (state) { case RES_FS_RULE_ALLOCATED: /* detach rule */ err = mlx4_cmd(dev, base, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); spin_lock_irq(mlx4_tlock(dev)); rb_erase(&fs_rule->com.node, &tracker->res_tree[RES_FS_RULE]); list_del(&fs_rule->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(fs_rule); state = 0; break; default: state = 0; } } } spin_lock_irq(mlx4_tlock(dev)); } spin_unlock_irq(mlx4_tlock(dev)); } static void rem_slave_eqs(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *eq_list = &tracker->slave_list[slave].res_list[RES_EQ]; struct res_eq *eq; struct res_eq *tmp; int err; int state; LIST_HEAD(tlist); int eqn; struct mlx4_cmd_mailbox *mailbox; err = move_all_busy(dev, slave, RES_EQ); if (err) mlx4_warn(dev, "rem_slave_eqs: Could not move all eqs to " "busy for slave %d\n", slave); spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(eq, tmp, eq_list, com.list) { spin_unlock_irq(mlx4_tlock(dev)); if (eq->com.owner == slave) { eqn = eq->com.res_id; state = eq->com.from_state; while (state != 0) { switch (state) { case RES_EQ_RESERVED: spin_lock_irq(mlx4_tlock(dev)); rb_erase(&eq->com.node, &tracker->res_tree[RES_EQ]); list_del(&eq->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(eq); state = 0; break; case RES_EQ_HW: mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { cond_resched(); continue; } err = mlx4_cmd_box(dev, slave, 0, eqn & 0xff, 0, MLX4_CMD_HW2SW_EQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) mlx4_dbg(dev, "rem_slave_eqs: failed" " to move slave %d eqs %d to" " SW ownership\n", slave, eqn); mlx4_free_cmd_mailbox(dev, mailbox); atomic_dec(&eq->mtt->ref_count); state = RES_EQ_RESERVED; break; default: state = 0; } } } spin_lock_irq(mlx4_tlock(dev)); } spin_unlock_irq(mlx4_tlock(dev)); } static void rem_slave_counters(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *counter_list = &tracker->slave_list[slave].res_list[RES_COUNTER]; struct res_counter *counter; struct res_counter *tmp; int err; int index; err = move_all_busy(dev, slave, RES_COUNTER); if (err) mlx4_warn(dev, "rem_slave_counters: Could not move all counters to " "busy for slave %d\n", slave); spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(counter, tmp, counter_list, com.list) { if (counter->com.owner == slave) { index = counter->com.res_id; rb_erase(&counter->com.node, &tracker->res_tree[RES_COUNTER]); list_del(&counter->com.list); kfree(counter); __mlx4_counter_free(dev, index); } } spin_unlock_irq(mlx4_tlock(dev)); } static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *xrcdn_list = &tracker->slave_list[slave].res_list[RES_XRCD]; struct res_xrcdn *xrcd; struct res_xrcdn *tmp; int err; int xrcdn; err = move_all_busy(dev, slave, RES_XRCD); if (err) mlx4_warn(dev, "rem_slave_xrcdns: Could not move all xrcdns to " "busy for slave %d\n", slave); spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(xrcd, tmp, xrcdn_list, com.list) { if (xrcd->com.owner == slave) { xrcdn = xrcd->com.res_id; rb_erase(&xrcd->com.node, &tracker->res_tree[RES_XRCD]); list_del(&xrcd->com.list); kfree(xrcd); __mlx4_xrcd_free(dev, xrcdn); } } spin_unlock_irq(mlx4_tlock(dev)); } void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); mutex_lock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex); rem_slave_macs(dev, slave); rem_slave_vlans(dev, slave); rem_slave_qps(dev, slave); rem_slave_srqs(dev, slave); rem_slave_cqs(dev, slave); rem_slave_mrs(dev, slave); rem_slave_eqs(dev, slave); rem_slave_mtts(dev, slave); rem_slave_counters(dev, slave); rem_slave_xrcdns(dev, slave); rem_slave_fs_rule(dev, slave); mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex); } Index: stable/10/sys/ofed/drivers/net/mlx4/sense.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/sense.c (revision 271126) +++ stable/10/sys/ofed/drivers/net/mlx4/sense.c (revision 271127) @@ -1,160 +1,160 @@ /* * Copyright (c) 2007 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include #include #include #include "mlx4.h" int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port, enum mlx4_port_type *type) { u64 out_param; int err = 0; err = mlx4_cmd_imm(dev, 0, &out_param, port, 0, MLX4_CMD_SENSE_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); if (err) { mlx4_err(dev, "Sense command failed for port: %d\n", port); return err; } if (out_param > 2) { - mlx4_err(dev, "Sense returned illegal value: 0x%llx\n", out_param); + mlx4_err(dev, "Sense returned illegal value: 0x%llx\n", (long long)out_param); return -EINVAL; } *type = out_param; return 0; } void mlx4_do_sense_ports(struct mlx4_dev *dev, enum mlx4_port_type *stype, enum mlx4_port_type *defaults) { struct mlx4_sense *sense = &mlx4_priv(dev)->sense; int err; int i; for (i = 1; i <= dev->caps.num_ports; i++) { stype[i - 1] = 0; if (sense->do_sense_port[i] && sense->sense_allowed[i] && dev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { err = mlx4_SENSE_PORT(dev, i, &stype[i - 1]); if (err) stype[i - 1] = defaults[i - 1]; } else stype[i - 1] = defaults[i - 1]; } /* * If sensed nothing, remain in current configuration. */ for (i = 0; i < dev->caps.num_ports; i++) stype[i] = stype[i] ? stype[i] : defaults[i]; } static void mlx4_sense_port(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); struct mlx4_sense *sense = container_of(delay, struct mlx4_sense, sense_poll); struct mlx4_dev *dev = sense->dev; struct mlx4_priv *priv = mlx4_priv(dev); enum mlx4_port_type stype[MLX4_MAX_PORTS]; mutex_lock(&priv->port_mutex); mlx4_do_sense_ports(dev, stype, &dev->caps.port_type[1]); if (mlx4_check_port_params(dev, stype)) goto sense_again; if (mlx4_change_port_types(dev, stype)) mlx4_err(dev, "Failed to change port_types\n"); sense_again: mutex_unlock(&priv->port_mutex); if (sense->resched) queue_delayed_work(sense->sense_wq , &sense->sense_poll, round_jiffies(MLX4_SENSE_RANGE)); } void mlx4_start_sense(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_sense *sense = &priv->sense; if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) return; sense->resched = 1; queue_delayed_work(sense->sense_wq , &sense->sense_poll, round_jiffies(MLX4_SENSE_RANGE)); } void mlx4_stop_sense(struct mlx4_dev *dev) { mlx4_priv(dev)->sense.resched = 0; } void mlx4_sense_cleanup(struct mlx4_dev *dev) { mlx4_stop_sense(dev); cancel_delayed_work(&mlx4_priv(dev)->sense.sense_poll); destroy_workqueue(mlx4_priv(dev)->sense.sense_wq); } int mlx4_sense_init(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_sense *sense = &priv->sense; int port; sense->dev = dev; sense->sense_wq = create_singlethread_workqueue("mlx4_sense"); if (!sense->sense_wq) return -ENOMEM; for (port = 1; port <= dev->caps.num_ports; port++) sense->do_sense_port[port] = 1; INIT_DEFERRABLE_WORK(&sense->sense_poll, mlx4_sense_port); return 0; } Index: stable/10/sys/ofed/drivers/net/mlx4/srq.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/srq.c (revision 271126) +++ stable/10/sys/ofed/drivers/net/mlx4/srq.c (revision 271127) @@ -1,299 +1,297 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include - #include #include #include "mlx4.h" #include "icm.h" void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type) { struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; struct mlx4_srq *srq; spin_lock(&srq_table->lock); srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); if (srq) atomic_inc(&srq->refcount); spin_unlock(&srq_table->lock); if (!srq) { mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn); return; } srq->event(srq, event_type); if (atomic_dec_and_test(&srq->refcount)) complete(&srq->free); } static int mlx4_SW2HW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, int srq_num) { return mlx4_cmd(dev, mailbox->dma, srq_num, 0, MLX4_CMD_SW2HW_SRQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } static int mlx4_HW2SW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, int srq_num) { return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, srq_num, mailbox ? 0 : 1, MLX4_CMD_HW2SW_SRQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } static int mlx4_ARM_SRQ(struct mlx4_dev *dev, int srq_num, int limit_watermark) { return mlx4_cmd(dev, limit_watermark, srq_num, 0, MLX4_CMD_ARM_SRQ, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); } static int mlx4_QUERY_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, int srq_num) { return mlx4_cmd_box(dev, 0, mailbox->dma, srq_num, 0, MLX4_CMD_QUERY_SRQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn) { struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; int err; *srqn = mlx4_bitmap_alloc(&srq_table->bitmap); if (*srqn == -1) return -ENOMEM; err = mlx4_table_get(dev, &srq_table->table, *srqn); if (err) goto err_out; err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn); if (err) goto err_put; return 0; err_put: mlx4_table_put(dev, &srq_table->table, *srqn); err_out: mlx4_bitmap_free(&srq_table->bitmap, *srqn); return err; } static int mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn) { u64 out_param; int err; if (mlx4_is_mfunc(dev)) { err = mlx4_cmd_imm(dev, 0, &out_param, RES_SRQ, RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (!err) *srqn = get_param_l(&out_param); return err; } return __mlx4_srq_alloc_icm(dev, srqn); } void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn) { struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; mlx4_table_put(dev, &srq_table->cmpt_table, srqn); mlx4_table_put(dev, &srq_table->table, srqn); mlx4_bitmap_free(&srq_table->bitmap, srqn); } static void mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn) { u64 in_param = 0; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, srqn); if (mlx4_cmd(dev, in_param, RES_SRQ, RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED)) mlx4_warn(dev, "Failed freeing cq:%d\n", srqn); return; } __mlx4_srq_free_icm(dev, srqn); } int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd, struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq) { struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; struct mlx4_cmd_mailbox *mailbox; struct mlx4_srq_context *srq_context; u64 mtt_addr; int err; err = mlx4_srq_alloc_icm(dev, &srq->srqn); if (err) return err; spin_lock_irq(&srq_table->lock); err = radix_tree_insert(&srq_table->tree, srq->srqn, srq); spin_unlock_irq(&srq_table->lock); if (err) goto err_icm; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { err = PTR_ERR(mailbox); goto err_radix; } srq_context = mailbox->buf; memset(srq_context, 0, sizeof *srq_context); srq_context->state_logsize_srqn = cpu_to_be32((ilog2(srq->max) << 24) | srq->srqn); srq_context->logstride = srq->wqe_shift - 4; srq_context->xrcd = cpu_to_be16(xrcd); srq_context->pg_offset_cqn = cpu_to_be32(cqn & 0xffffff); srq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; mtt_addr = mlx4_mtt_addr(dev, mtt); srq_context->mtt_base_addr_h = mtt_addr >> 32; srq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff); srq_context->pd = cpu_to_be32(pdn); srq_context->db_rec_addr = cpu_to_be64(db_rec); err = mlx4_SW2HW_SRQ(dev, mailbox, srq->srqn); mlx4_free_cmd_mailbox(dev, mailbox); if (err) goto err_radix; atomic_set(&srq->refcount, 1); init_completion(&srq->free); return 0; err_radix: spin_lock_irq(&srq_table->lock); radix_tree_delete(&srq_table->tree, srq->srqn); spin_unlock_irq(&srq_table->lock); err_icm: mlx4_srq_free_icm(dev, srq->srqn); return err; } EXPORT_SYMBOL_GPL(mlx4_srq_alloc); void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq) { struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; int err; err = mlx4_HW2SW_SRQ(dev, NULL, srq->srqn); if (err) mlx4_warn(dev, "HW2SW_SRQ failed (%d) for SRQN %06x\n", err, srq->srqn); spin_lock_irq(&srq_table->lock); radix_tree_delete(&srq_table->tree, srq->srqn); spin_unlock_irq(&srq_table->lock); if (atomic_dec_and_test(&srq->refcount)) complete(&srq->free); wait_for_completion(&srq->free); mlx4_srq_free_icm(dev, srq->srqn); } EXPORT_SYMBOL_GPL(mlx4_srq_free); int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark) { return mlx4_ARM_SRQ(dev, srq->srqn, limit_watermark); } EXPORT_SYMBOL_GPL(mlx4_srq_arm); int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_srq_context *srq_context; int err; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); srq_context = mailbox->buf; err = mlx4_QUERY_SRQ(dev, mailbox, srq->srqn); if (err) goto err_out; *limit_watermark = be16_to_cpu(srq_context->limit_watermark); err_out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } EXPORT_SYMBOL_GPL(mlx4_srq_query); int mlx4_init_srq_table(struct mlx4_dev *dev) { struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; int err; spin_lock_init(&srq_table->lock); INIT_RADIX_TREE(&srq_table->tree, GFP_ATOMIC); if (mlx4_is_slave(dev)) return 0; err = mlx4_bitmap_init(&srq_table->bitmap, dev->caps.num_srqs, dev->caps.num_srqs - 1, dev->caps.reserved_srqs, 0); if (err) return err; return 0; } void mlx4_cleanup_srq_table(struct mlx4_dev *dev) { if (mlx4_is_slave(dev)) return; mlx4_bitmap_cleanup(&mlx4_priv(dev)->srq_table.bitmap); } Index: stable/10/sys/ofed/drivers/net/mlx4/xrcd.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/xrcd.c (revision 271126) +++ stable/10/sys/ofed/drivers/net/mlx4/xrcd.c (revision 271127) @@ -1,70 +1,69 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include #include #include "mlx4.h" int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn) { struct mlx4_priv *priv = mlx4_priv(dev); *xrcdn = mlx4_bitmap_alloc(&priv->xrcd_bitmap); if (*xrcdn == -1) return -ENOMEM; return 0; } EXPORT_SYMBOL_GPL(mlx4_xrcd_alloc); void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn) { mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn); } EXPORT_SYMBOL_GPL(mlx4_xrcd_free); int __devinit mlx4_init_xrcd_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); return mlx4_bitmap_init(&priv->xrcd_bitmap, (1 << 16), (1 << 16) - 1, dev->caps.reserved_xrcds + 1, 0); } void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev) { mlx4_bitmap_cleanup(&mlx4_priv(dev)->xrcd_bitmap); } Index: stable/10/sys/ofed/include/asm/current.h =================================================================== --- stable/10/sys/ofed/include/asm/current.h (revision 271126) +++ stable/10/sys/ofed/include/asm/current.h (nonexistent) @@ -1,32 +0,0 @@ -/*- - * Copyright (c) 2010 Isilon Systems, Inc. - * Copyright (c) 2010 iX Systems, Inc. - * Copyright (c) 2010 Panasas, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _ASM_CURRENT_H_ -#define _ASM_CURRENT_H_ - -#endif /* _ASM_CURRENT_H_ */ Property changes on: stable/10/sys/ofed/include/asm/current.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/include/asm/system.h =================================================================== --- stable/10/sys/ofed/include/asm/system.h (revision 271126) +++ stable/10/sys/ofed/include/asm/system.h (nonexistent) @@ -1,27 +0,0 @@ -/*- - * Copyright (c) 2010 Isilon Systems, Inc. - * Copyright (c) 2010 iX Systems, Inc. - * Copyright (c) 2010 Panasas, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ Property changes on: stable/10/sys/ofed/include/asm/system.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/include/asm/semaphore.h =================================================================== --- stable/10/sys/ofed/include/asm/semaphore.h (revision 271126) +++ stable/10/sys/ofed/include/asm/semaphore.h (nonexistent) @@ -1,34 +0,0 @@ -/*- - * Copyright (c) 2010 Isilon Systems, Inc. - * Copyright (c) 2010 iX Systems, Inc. - * Copyright (c) 2010 Panasas, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _ASM_SEMAPHORE_H_ -#define _ASM_SEMAPHORE_H_ - -#include - -#endif /* _ASM_SEMAPHORE_H_ */ Property changes on: stable/10/sys/ofed/include/asm/semaphore.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/include/asm/atomic-long.h =================================================================== --- stable/10/sys/ofed/include/asm/atomic-long.h (revision 271126) +++ stable/10/sys/ofed/include/asm/atomic-long.h (revision 271127) @@ -1,79 +1,81 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + #ifndef _ATOMIC_LONG_H_ #define _ATOMIC_LONG_H_ #include #include #include typedef struct { volatile u_long counter; } atomic_long_t; #define atomic_long_add(i, v) atomic_long_add_return((i), (v)) #define atomic_long_inc_return(v) atomic_long_add_return(1, (v)) static inline long atomic_long_add_return(long i, atomic_long_t *v) { return i + atomic_fetchadd_long(&v->counter, i); } static inline void atomic_long_set(atomic_long_t *v, long i) { atomic_store_rel_long(&v->counter, i); } static inline long atomic_long_read(atomic_long_t *v) { return atomic_load_acq_long(&v->counter); } static inline long atomic_long_inc(atomic_long_t *v) { return atomic_fetchadd_long(&v->counter, 1) + 1; } static inline long atomic_long_dec(atomic_long_t *v) { return atomic_fetchadd_long(&v->counter, -1) - 1; } static inline long atomic_long_dec_and_test(atomic_long_t *v) { long i = atomic_long_add(-1, v); return i == 0 ; } #endif /* _ATOMIC_LONG_H_ */ Index: stable/10/sys/ofed/include/asm/atomic.h =================================================================== --- stable/10/sys/ofed/include/asm/atomic.h (revision 271126) +++ stable/10/sys/ofed/include/asm/atomic.h (revision 271127) @@ -1,107 +1,106 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _ASM_ATOMIC_H_ #define _ASM_ATOMIC_H_ #include #include #include -#include typedef struct { volatile u_int counter; } atomic_t; #define atomic_add(i, v) atomic_add_return((i), (v)) #define atomic_sub(i, v) atomic_sub_return((i), (v)) #define atomic_inc_return(v) atomic_add_return(1, (v)) #define atomic_add_negative(i, v) (atomic_add_return((i), (v)) < 0) #define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0) #define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0) #define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0) #define atomic_dec_return(v) atomic_sub_return(1, (v)) static inline int atomic_add_return(int i, atomic_t *v) { return i + atomic_fetchadd_int(&v->counter, i); } static inline int atomic_sub_return(int i, atomic_t *v) { return atomic_fetchadd_int(&v->counter, -i) - i; } static inline void atomic_set(atomic_t *v, int i) { atomic_store_rel_int(&v->counter, i); } static inline int atomic_read(atomic_t *v) { return atomic_load_acq_int(&v->counter); } static inline int atomic_inc(atomic_t *v) { return atomic_fetchadd_int(&v->counter, 1) + 1; } static inline int atomic_dec(atomic_t *v) { return atomic_fetchadd_int(&v->counter, -1) - 1; } static inline int atomic_add_unless(atomic_t *v, int a, int u) { int c, old; c = atomic_read(v); for (;;) { if (unlikely(c == (u))) break; - // old = atomic_cmpxchg((v), c, c + (a)); /*Linux*/ old = atomic_cmpset_int(&v->counter, c, c + (a)); if (likely(old == c)) break; c = old; } return c != (u); } #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) #endif /* _ASM_ATOMIC_H_ */ Index: stable/10/sys/ofed/include/asm/byteorder.h =================================================================== --- stable/10/sys/ofed/include/asm/byteorder.h (revision 271126) +++ stable/10/sys/ofed/include/asm/byteorder.h (revision 271127) @@ -1,91 +1,93 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + #ifndef _ASM_BYTEORDER_H_ #define _ASM_BYTEORDER_H_ #include #include #include #if BYTE_ORDER == LITTLE_ENDIAN #define __LITTLE_ENDIAN #else #define __BIG_ENDIAN #endif #define cpu_to_le64 htole64 #define le64_to_cpu le64toh #define cpu_to_le32 htole32 #define le32_to_cpu le32toh #define cpu_to_le16 htole16 #define le16_to_cpu le16toh #define cpu_to_be64 htobe64 #define be64_to_cpu be64toh #define cpu_to_be32 htobe32 #define be32_to_cpu be32toh #define cpu_to_be16 htobe16 #define be16_to_cpu be16toh #define __be16_to_cpu be16toh #define cpu_to_le64p(x) htole64(*((uint64_t *)x)) #define le64_to_cpup(x) le64toh(*((uint64_t *)x)) #define cpu_to_le32p(x) htole32(*((uint32_t *)x)) #define le32_to_cpup(x) le32toh(*((uint32_t *)x)) #define cpu_to_le16p(x) htole16(*((uint16_t *)x)) #define le16_to_cpup(x) le16toh(*((uint16_t *)x)) #define cpu_to_be64p(x) htobe64(*((uint64_t *)x)) #define be64_to_cpup(x) be64toh(*((uint64_t *)x)) #define cpu_to_be32p(x) htobe32(*((uint32_t *)x)) #define be32_to_cpup(x) be32toh(*((uint32_t *)x)) #define cpu_to_be16p(x) htobe16(*((uint16_t *)x)) #define be16_to_cpup(x) be16toh(*((uint16_t *)x)) #define cpu_to_le64s(x) do { *((uint64_t *)x) = cpu_to_le64p((x)) } while (0) #define le64_to_cpus(x) do { *((uint64_t *)x) = le64_to_cpup((x)) } while (0) #define cpu_to_le32s(x) do { *((uint32_t *)x) = cpu_to_le32p((x)) } while (0) #define le32_to_cpus(x) do { *((uint32_t *)x) = le32_to_cpup((x)) } while (0) #define cpu_to_le16s(x) do { *((uint16_t *)x) = cpu_to_le16p((x)) } while (0) #define le16_to_cpus(x) do { *((uint16_t *)x) = le16_to_cpup((x)) } while (0) #define cpu_to_be64s(x) do { *((uint64_t *)x) = cpu_to_be64p((x)) } while (0) #define be64_to_cpus(x) do { *((uint64_t *)x) = be64_to_cpup((x)) } while (0) #define cpu_to_be32s(x) do { *((uint32_t *)x) = cpu_to_be32p((x)) } while (0) #define be32_to_cpus(x) do { *((uint32_t *)x) = be32_to_cpup((x)) } while (0) #define cpu_to_be16s(x) do { *((uint16_t *)x) = cpu_to_be16p((x)) } while (0) #define be16_to_cpus(x) do { *((uint16_t *)x) = be16_to_cpup((x)) } while (0) #define swab16 bswap16 #define swab32 bswap32 #define swab64 bswap64 static inline void be16_add_cpu(u16 *var, u16 val) { *var = cpu_to_be16(be16_to_cpu(*var) + val); } #endif /* _ASM_BYTEORDER_H_ */ Index: stable/10/sys/ofed/include/asm/fcntl.h =================================================================== --- stable/10/sys/ofed/include/asm/fcntl.h (revision 271126) +++ stable/10/sys/ofed/include/asm/fcntl.h (revision 271127) @@ -1,33 +1,34 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _ASM_FCNTL_H_ #define _ASM_FCNTL_H_ #include #endif /* _ASM_FCNTL_H_ */ Index: stable/10/sys/ofed/include/asm/io.h =================================================================== --- stable/10/sys/ofed/include/asm/io.h (revision 271126) +++ stable/10/sys/ofed/include/asm/io.h (revision 271127) @@ -1,29 +1,35 @@ -/*- +/* * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifndef _ASM_IO_H_ +#define _ASM_IO_H_ + #include + +#endif /* _ASM_IO_H_ */ Index: stable/10/sys/ofed/include/asm/page.h =================================================================== --- stable/10/sys/ofed/include/asm/page.h (revision 271126) +++ stable/10/sys/ofed/include/asm/page.h (revision 271127) @@ -1,29 +1,35 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifndef _ASM_PAGE_H_ +#define _ASM_PAGE_H_ + #include + +#endif /*_ASM_PAGE_H_*/ Index: stable/10/sys/ofed/include/asm/pgtable.h =================================================================== --- stable/10/sys/ofed/include/asm/pgtable.h (revision 271126) +++ stable/10/sys/ofed/include/asm/pgtable.h (revision 271127) @@ -1,33 +1,34 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _ASM_PGTABLE_H_ #define _ASM_PGTABLE_H_ typedef int pgprot_t; #endif /* _ASM_PGTABLE_H_ */ Index: stable/10/sys/ofed/include/asm/types.h =================================================================== --- stable/10/sys/ofed/include/asm/types.h (revision 271126) +++ stable/10/sys/ofed/include/asm/types.h (revision 271127) @@ -1,67 +1,61 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + #ifndef _ASM_TYPES_H_ #define _ASM_TYPES_H_ -typedef unsigned short umode_t; - -typedef signed char __s8; -typedef unsigned char __u8; - -typedef signed short __s16; -typedef unsigned short __u16; - -typedef signed int __s32; -typedef unsigned int __u32; - -#if defined(__GNUC__) // && !defined(__STRICT_ANSI__) -typedef signed long long __s64; -typedef unsigned long long __u64; -#endif - #ifdef _KERNEL -typedef signed char s8; -typedef unsigned char u8; +typedef uint8_t u8; +typedef uint8_t __u8; +typedef uint16_t u16; +typedef uint16_t __u16; +typedef uint32_t u32; +typedef uint32_t __u32; +typedef uint64_t u64; +typedef uint64_t __u64; -typedef signed short s16; -typedef unsigned short u16; +typedef int8_t s8; +typedef int8_t __s8; +typedef int16_t s16; +typedef int16_t __s16; +typedef int32_t s32; +typedef int32_t __s32; +typedef int64_t s64; +typedef int64_t __s64; -typedef signed int s32; -typedef unsigned int u32; - -typedef signed long long s64; -typedef unsigned long long u64; - /* DMA addresses come in generic and 64-bit flavours. */ typedef vm_paddr_t dma_addr_t; typedef vm_paddr_t dma64_addr_t; + +typedef unsigned short umode_t; #endif /* _KERNEL */ #endif /* _ASM_TYPES_H_ */ Index: stable/10/sys/ofed/include/asm/uaccess.h =================================================================== --- stable/10/sys/ofed/include/asm/uaccess.h (revision 271126) +++ stable/10/sys/ofed/include/asm/uaccess.h (revision 271127) @@ -1,49 +1,51 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + #ifndef _ASM_UACCESS_H_ #define _ASM_UACCESS_H_ #include static inline long copy_to_user(void *to, const void *from, unsigned long n) { if (copyout(from, to, n) != 0) return n; return 0; } static inline long copy_from_user(void *to, const void *from, unsigned long n) { if (copyin(from, to, n) != 0) return n; return 0; } #endif /* _ASM_UACCESS_H_ */ Index: stable/10/sys/ofed/include/linux/bitmap.h =================================================================== --- stable/10/sys/ofed/include/linux/bitmap.h (revision 271126) +++ stable/10/sys/ofed/include/linux/bitmap.h (nonexistent) @@ -1,34 +0,0 @@ -/*- - * Copyright (c) 2010 Isilon Systems, Inc. - * Copyright (c) 2010 iX Systems, Inc. - * Copyright (c) 2010 Panasas, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef _LINUX_BITMAP_H_ -#define _LINUX_BITMAP_H_ - -#include -#include - -#endif /* _LINUX_BITMAP_H_ */ Property changes on: stable/10/sys/ofed/include/linux/bitmap.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/include/linux/atomic.h =================================================================== --- stable/10/sys/ofed/include/linux/atomic.h (revision 271126) +++ stable/10/sys/ofed/include/linux/atomic.h (nonexistent) @@ -1,53 +0,0 @@ -#ifndef _COMPAT_LINUX_ATOMIC_H -#define _COMPAT_LINUX_ATOMIC_H 1 - -/* -#include - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,36)) -#include_next -#else -*/ - -#include - -/* Shahar Klein: atomic_inc_not_zero_hint do we need it? */ -#if 0 - -/** - * atomic_inc_not_zero_hint - increment if not null - * @v: pointer of type atomic_t - * @hint: probable value of the atomic before the increment - * - * This version of atomic_inc_not_zero() gives a hint of probable - * value of the atomic. This helps processor to not read the memory - * before doing the atomic read/modify/write cycle, lowering - * number of bus transactions on some arches. - * - * Returns: 0 if increment was not done, 1 otherwise. - */ - -#ifndef atomic_inc_not_zero_hint -static inline int atomic_inc_not_zero_hint(atomic_t *v, int hint) -{ - int val, c = hint; - - /* sanity test, should be removed by compiler if hint is a constant */ - if (!hint) - return atomic_inc_not_zero(v); - - do { - val = atomic_cmpxchg(v, c, c + 1); - if (val == c) - return 1; - c = val; - } while (c); - - return 0; -} -#endif -#endif - -//#endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,36)) */ - -#endif /* _COMPAT_LINUX_ATOMIC_H */ Property changes on: stable/10/sys/ofed/include/linux/atomic.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/include/linux/stddef.h =================================================================== --- stable/10/sys/ofed/include/linux/stddef.h (revision 271126) +++ stable/10/sys/ofed/include/linux/stddef.h (nonexistent) @@ -1,34 +0,0 @@ -/*- - * Copyright (c) 2010 Isilon Systems, Inc. - * Copyright (c) 2010 iX Systems, Inc. - * Copyright (c) 2010 Panasas, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _LINUX_STDDEF_H_ -#define _LINUX_STDDEF_H_ - -#include - -#endif /* _LINUX_STDDEF_H_ */ Property changes on: stable/10/sys/ofed/include/linux/stddef.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/include/linux/rtnetlink.h =================================================================== --- stable/10/sys/ofed/include/linux/rtnetlink.h (revision 271126) +++ stable/10/sys/ofed/include/linux/rtnetlink.h (nonexistent) @@ -1,27 +0,0 @@ -/*- - * Copyright (c) 2010 Isilon Systems, Inc. - * Copyright (c) 2010 iX Systems, Inc. - * Copyright (c) 2010 Panasas, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ Property changes on: stable/10/sys/ofed/include/linux/rtnetlink.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/include/linux/init.h =================================================================== --- stable/10/sys/ofed/include/linux/init.h (revision 271126) +++ stable/10/sys/ofed/include/linux/init.h (nonexistent) @@ -1,31 +0,0 @@ -/*- - * Copyright (c) 2010 Isilon Systems, Inc. - * Copyright (c) 2010 iX Systems, Inc. - * Copyright (c) 2010 Panasas, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef _LINUX_INIT_H_ -#define _LINUX_INIT_H_ - -#endif /* _LINUX_INIT_H_ */ Property changes on: stable/10/sys/ofed/include/linux/init.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/include/linux/ctype.h =================================================================== --- stable/10/sys/ofed/include/linux/ctype.h (revision 271126) +++ stable/10/sys/ofed/include/linux/ctype.h (nonexistent) @@ -1,34 +0,0 @@ -/*- - * Copyright (c) 2010 Isilon Systems, Inc. - * Copyright (c) 2010 iX Systems, Inc. - * Copyright (c) 2010 Panasas, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _LINUX_CTYPE_H_ -#define _LINUX_CTYPE_H_ - -#include - -#endif /* _LINUX_CTYPE_H_ */ Property changes on: stable/10/sys/ofed/include/linux/ctype.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/include/linux/bitops.h =================================================================== --- stable/10/sys/ofed/include/linux/bitops.h (revision 271126) +++ stable/10/sys/ofed/include/linux/bitops.h (revision 271127) @@ -1,483 +1,512 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_BITOPS_H_ #define _LINUX_BITOPS_H_ #ifdef __LP64__ #define BITS_PER_LONG 64 #else #define BITS_PER_LONG 32 #endif #define BIT_MASK(n) (~0UL >> (BITS_PER_LONG - (n))) #define BITS_TO_LONGS(n) howmany((n), BITS_PER_LONG) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) +#define BITS_PER_BYTE 8 + static inline int __ffs(int mask) { return (ffs(mask) - 1); } static inline int __fls(int mask) { return (fls(mask) - 1); } static inline int __ffsl(long mask) { return (ffsl(mask) - 1); } static inline int __flsl(long mask) { return (flsl(mask) - 1); } #define ffz(mask) __ffs(~(mask)) static inline int get_count_order(unsigned int count) { int order; order = fls(count) - 1; if (count & (count - 1)) order++; return order; } static inline unsigned long find_first_bit(unsigned long *addr, unsigned long size) { long mask; int bit; for (bit = 0; size >= BITS_PER_LONG; size -= BITS_PER_LONG, bit += BITS_PER_LONG, addr++) { if (*addr == 0) continue; return (bit + __ffsl(*addr)); } if (size) { mask = (*addr) & BIT_MASK(size); if (mask) bit += __ffsl(mask); else bit += size; } return (bit); } static inline unsigned long find_first_zero_bit(unsigned long *addr, unsigned long size) { long mask; int bit; for (bit = 0; size >= BITS_PER_LONG; size -= BITS_PER_LONG, bit += BITS_PER_LONG, addr++) { if (~(*addr) == 0) continue; return (bit + __ffsl(~(*addr))); } if (size) { mask = ~(*addr) & BIT_MASK(size); if (mask) bit += __ffsl(mask); else bit += size; } return (bit); } static inline unsigned long find_last_bit(unsigned long *addr, unsigned long size) { long mask; int offs; int bit; int pos; pos = size / BITS_PER_LONG; offs = size % BITS_PER_LONG; bit = BITS_PER_LONG * pos; addr += pos; if (offs) { mask = (*addr) & BIT_MASK(offs); if (mask) return (bit + __flsl(mask)); } while (--pos) { addr--; bit -= BITS_PER_LONG; if (*addr) return (bit + __flsl(mask)); } return (size); } static inline unsigned long find_next_bit(unsigned long *addr, unsigned long size, unsigned long offset) { long mask; int offs; int bit; int pos; if (offset >= size) return (size); pos = offset / BITS_PER_LONG; offs = offset % BITS_PER_LONG; bit = BITS_PER_LONG * pos; addr += pos; if (offs) { mask = (*addr) & ~BIT_MASK(offs); if (mask) return (bit + __ffsl(mask)); bit += BITS_PER_LONG; addr++; } for (size -= bit; size >= BITS_PER_LONG; size -= BITS_PER_LONG, bit += BITS_PER_LONG, addr++) { if (*addr == 0) continue; return (bit + __ffsl(*addr)); } if (size) { mask = (*addr) & BIT_MASK(size); if (mask) bit += __ffsl(mask); else bit += size; } return (bit); } static inline unsigned long find_next_zero_bit(unsigned long *addr, unsigned long size, unsigned long offset) { long mask; int offs; int bit; int pos; if (offset >= size) return (size); pos = offset / BITS_PER_LONG; offs = offset % BITS_PER_LONG; bit = BITS_PER_LONG * pos; addr += pos; if (offs) { mask = ~(*addr) & ~BIT_MASK(offs); if (mask) return (bit + __ffsl(mask)); bit += BITS_PER_LONG; addr++; } for (size -= bit; size >= BITS_PER_LONG; size -= BITS_PER_LONG, bit += BITS_PER_LONG, addr++) { if (~(*addr) == 0) continue; return (bit + __ffsl(~(*addr))); } if (size) { mask = ~(*addr) & BIT_MASK(size); if (mask) bit += __ffsl(mask); else bit += size; } return (bit); } static inline void bitmap_zero(unsigned long *addr, int size) { int len; len = BITS_TO_LONGS(size) * sizeof(long); memset(addr, 0, len); } static inline void bitmap_fill(unsigned long *addr, int size) { int tail; int len; len = (size / BITS_PER_LONG) * sizeof(long); memset(addr, 0xff, len); tail = size & (BITS_PER_LONG - 1); if (tail) addr[size / BITS_PER_LONG] = BIT_MASK(tail); } static inline int bitmap_full(unsigned long *addr, int size) { long mask; int tail; int len; int i; len = size / BITS_PER_LONG; for (i = 0; i < len; i++) if (addr[i] != ~0UL) return (0); tail = size & (BITS_PER_LONG - 1); if (tail) { mask = BIT_MASK(tail); if ((addr[i] & mask) != mask) return (0); } return (1); } static inline int bitmap_empty(unsigned long *addr, int size) { long mask; int tail; int len; int i; len = size / BITS_PER_LONG; for (i = 0; i < len; i++) if (addr[i] != 0) return (0); tail = size & (BITS_PER_LONG - 1); if (tail) { mask = BIT_MASK(tail); if ((addr[i] & mask) != 0) return (0); } return (1); } #define NBLONG (NBBY * sizeof(long)) #define set_bit(i, a) \ atomic_set_long(&((volatile long *)(a))[(i)/NBLONG], 1UL << ((i) % NBLONG)) #define clear_bit(i, a) \ atomic_clear_long(&((volatile long *)(a))[(i)/NBLONG], 1UL << ((i) % NBLONG)) #define test_bit(i, a) \ !!(atomic_load_acq_long(&((volatile long *)(a))[(i)/NBLONG]) & \ (1UL << ((i) % NBLONG))) static inline long test_and_clear_bit(long bit, long *var) { long val; var += bit / (sizeof(long) * NBBY); bit %= sizeof(long) * NBBY; bit = (1UL << bit); do { val = *(volatile long *)var; } while (atomic_cmpset_long(var, val, val & ~bit) == 0); return !!(val & bit); } static inline long test_and_set_bit(long bit, long *var) { long val; var += bit / (sizeof(long) * NBBY); bit %= sizeof(long) * NBBY; bit = (1UL << bit); do { val = *(volatile long *)var; } while (atomic_cmpset_long(var, val, val | bit) == 0); return !!(val & bit); } #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG)) #define BITMAP_LAST_WORD_MASK(nbits) \ ( \ ((nbits) % BITS_PER_LONG) ? \ (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \ ) static inline void bitmap_set(unsigned long *map, int start, int nr) { unsigned long *p = map + BIT_WORD(start); const int size = start + nr; int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); while (nr - bits_to_set >= 0) { *p |= mask_to_set; nr -= bits_to_set; bits_to_set = BITS_PER_LONG; mask_to_set = ~0UL; p++; } if (nr) { mask_to_set &= BITMAP_LAST_WORD_MASK(size); *p |= mask_to_set; } } static inline void bitmap_clear(unsigned long *map, int start, int nr) { unsigned long *p = map + BIT_WORD(start); const int size = start + nr; int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); while (nr - bits_to_clear >= 0) { *p &= ~mask_to_clear; nr -= bits_to_clear; bits_to_clear = BITS_PER_LONG; mask_to_clear = ~0UL; p++; } if (nr) { mask_to_clear &= BITMAP_LAST_WORD_MASK(size); *p &= ~mask_to_clear; } } enum { REG_OP_ISFREE, /* true if region is all zero bits */ REG_OP_ALLOC, /* set all bits in region */ REG_OP_RELEASE, /* clear all bits in region */ }; static int __reg_op(unsigned long *bitmap, int pos, int order, int reg_op) { int nbits_reg; /* number of bits in region */ int index; /* index first long of region in bitmap */ int offset; /* bit offset region in bitmap[index] */ int nlongs_reg; /* num longs spanned by region in bitmap */ int nbitsinlong; /* num bits of region in each spanned long */ unsigned long mask; /* bitmask for one long of region */ int i; /* scans bitmap by longs */ int ret = 0; /* return value */ /* * Either nlongs_reg == 1 (for small orders that fit in one long) * or (offset == 0 && mask == ~0UL) (for larger multiword orders.) */ nbits_reg = 1 << order; index = pos / BITS_PER_LONG; offset = pos - (index * BITS_PER_LONG); nlongs_reg = BITS_TO_LONGS(nbits_reg); nbitsinlong = min(nbits_reg, BITS_PER_LONG); /* * Can't do "mask = (1UL << nbitsinlong) - 1", as that * overflows if nbitsinlong == BITS_PER_LONG. */ mask = (1UL << (nbitsinlong - 1)); mask += mask - 1; mask <<= offset; switch (reg_op) { case REG_OP_ISFREE: for (i = 0; i < nlongs_reg; i++) { if (bitmap[index + i] & mask) goto done; } ret = 1; /* all bits in region free (zero) */ break; case REG_OP_ALLOC: for (i = 0; i < nlongs_reg; i++) bitmap[index + i] |= mask; break; case REG_OP_RELEASE: for (i = 0; i < nlongs_reg; i++) bitmap[index + i] &= ~mask; break; } done: return ret; } /** * bitmap_find_free_region - find a contiguous aligned mem region * @bitmap: array of unsigned longs corresponding to the bitmap * @bits: number of bits in the bitmap * @order: region size (log base 2 of number of bits) to find * * Find a region of free (zero) bits in a @bitmap of @bits bits and * allocate them (set them to one). Only consider regions of length * a power (@order) of two, aligned to that power of two, which * makes the search algorithm much faster. * * Return the bit offset in bitmap of the allocated region, * or -errno on failure. */ static inline int bitmap_find_free_region(unsigned long *bitmap, int bits, int order) { int pos, end; /* scans bitmap by regions of size order */ for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) { if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE)) continue; __reg_op(bitmap, pos, order, REG_OP_ALLOC); return pos; } return -ENOMEM; } /** + * bitmap_allocate_region - allocate bitmap region + * @bitmap: array of unsigned longs corresponding to the bitmap + * @pos: beginning of bit region to allocate + * @order: region size (log base 2 of number of bits) to allocate + * + * Allocate (set bits in) a specified region of a bitmap. + * + * Return 0 on success, or %-EBUSY if specified region wasn't + * free (not all bits were zero). + */ + +static inline int +bitmap_allocate_region(unsigned long *bitmap, int pos, int order) +{ + if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE)) + return -EBUSY; + __reg_op(bitmap, pos, order, REG_OP_ALLOC); + return 0; +} + +/** * bitmap_release_region - release allocated bitmap region * @bitmap: array of unsigned longs corresponding to the bitmap * @pos: beginning of bit region to release * @order: region size (log base 2 of number of bits) to release * * This is the complement to __bitmap_find_free_region() and releases * the found region (by clearing it in the bitmap). * * No return value. */ static inline void bitmap_release_region(unsigned long *bitmap, int pos, int order) { __reg_op(bitmap, pos, order, REG_OP_RELEASE); } + +#define for_each_set_bit(bit, addr, size) \ + for ((bit) = find_first_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) #endif /* _LINUX_BITOPS_H_ */ Index: stable/10/sys/ofed/include/linux/cache.h =================================================================== --- stable/10/sys/ofed/include/linux/cache.h (nonexistent) +++ stable/10/sys/ofed/include/linux/cache.h (revision 271127) @@ -0,0 +1,37 @@ +/*- + * Copyright (c) 2010 Isilon Systems, Inc. + * Copyright (c) 2010 iX Systems, Inc. + * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_CACHE_H_ +#define _LINUX_CACHE_H_ + + +#define cache_line_size() CACHE_LINE_SIZE + + +#endif /* _LINUX_CACHE_H_ */ Property changes on: stable/10/sys/ofed/include/linux/cache.h ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +true \ No newline at end of property Index: stable/10/sys/ofed/include/linux/cdev.h =================================================================== --- stable/10/sys/ofed/include/linux/cdev.h (revision 271126) +++ stable/10/sys/ofed/include/linux/cdev.h (revision 271127) @@ -1,129 +1,130 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_CDEV_H_ #define _LINUX_CDEV_H_ #include #include #include struct file_operations; struct inode; struct module; extern struct cdevsw linuxcdevsw; struct linux_cdev { struct kobject kobj; struct module *owner; struct cdev *cdev; dev_t dev; const struct file_operations *ops; }; static inline void cdev_release(struct kobject *kobj) { struct linux_cdev *cdev; cdev = container_of(kobj, struct linux_cdev, kobj); if (cdev->cdev) destroy_dev(cdev->cdev); kfree(cdev); } static inline void cdev_static_release(struct kobject *kobj) { struct linux_cdev *cdev; cdev = container_of(kobj, struct linux_cdev, kobj); if (cdev->cdev) destroy_dev(cdev->cdev); } static struct kobj_type cdev_ktype = { .release = cdev_release, }; static struct kobj_type cdev_static_ktype = { .release = cdev_static_release, }; static inline void cdev_init(struct linux_cdev *cdev, const struct file_operations *ops) { kobject_init(&cdev->kobj, &cdev_static_ktype); cdev->ops = ops; } static inline struct linux_cdev * cdev_alloc(void) { struct linux_cdev *cdev; cdev = kzalloc(sizeof(struct linux_cdev), M_WAITOK); if (cdev) kobject_init(&cdev->kobj, &cdev_ktype); return (cdev); } static inline void cdev_put(struct linux_cdev *p) { kobject_put(&p->kobj); } static inline int cdev_add(struct linux_cdev *cdev, dev_t dev, unsigned count) { if (count != 1) panic("cdev_add: Unsupported count: %d", count); cdev->cdev = make_dev(&linuxcdevsw, MINOR(dev), 0, 0, 0700, "%s", kobject_name(&cdev->kobj)); cdev->dev = dev; cdev->cdev->si_drv1 = cdev; return (0); } static inline void cdev_del(struct linux_cdev *cdev) { if (cdev->cdev) { destroy_dev(cdev->cdev); cdev->cdev = NULL; } kobject_put(&cdev->kobj); } #define cdev linux_cdev #endif /* _LINUX_CDEV_H_ */ Index: stable/10/sys/ofed/include/linux/clocksource.h =================================================================== --- stable/10/sys/ofed/include/linux/clocksource.h (revision 271126) +++ stable/10/sys/ofed/include/linux/clocksource.h (revision 271127) @@ -1,17 +1,37 @@ -/* linux/include/linux/clocksource.h +/*- + * Copyright (c) 2010 Isilon Systems, Inc. + * Copyright (c) 2010 iX Systems, Inc. + * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. + * All rights reserved. * - * MLX4_CORE_PORT + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * This file contains the structure definitions for clocksources. - * - * If you are not a clocksource, or timekeeping code, you should - * not be including this file! + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + #ifndef _LINUX_CLOCKSOURCE_H #define _LINUX_CLOCKSOURCE_H /* clocksource cycle base type */ typedef u64 cycle_t; #endif /* _LINUX_CLOCKSOURCE_H */ Index: stable/10/sys/ofed/include/linux/compat.h =================================================================== --- stable/10/sys/ofed/include/linux/compat.h (revision 271126) +++ stable/10/sys/ofed/include/linux/compat.h (revision 271127) @@ -1,36 +1,37 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_COMPAT_H_ #define _LINUX_COMPAT_H_ #define is_multicast_ether_addr(x) 0 #define is_broadcast_ether_addr(x) 0 #endif /* _LINUX_COMPAT_H_ */ Index: stable/10/sys/ofed/include/linux/compiler.h =================================================================== --- stable/10/sys/ofed/include/linux/compiler.h (revision 271126) +++ stable/10/sys/ofed/include/linux/compiler.h (revision 271127) @@ -1,65 +1,66 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_COMPILER_H_ #define _LINUX_COMPILER_H_ #include #define __user #define __kernel #define __safe #define __force #define __nocast #define __iomem #define __chk_user_ptr(x) 0 #define __chk_io_ptr(x) 0 #define __builtin_warning(x, y...) (1) #define __acquires(x) #define __releases(x) #define __acquire(x) 0 #define __release(x) 0 #define __cond_lock(x,c) (c) #define __bitwise #define __devinitdata #define __init #define __devinit #define __devexit #define __exit #define __stringify(x) #x #define __attribute_const__ __attribute__((__const__)) #undef __always_inline #define __always_inline inline #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) #define typeof(x) __typeof(x) #define uninitialized_var(x) x = x #endif /* _LINUX_COMPILER_H_ */ Index: stable/10/sys/ofed/include/linux/completion.h =================================================================== --- stable/10/sys/ofed/include/linux/completion.h (revision 271126) +++ stable/10/sys/ofed/include/linux/completion.h (revision 271127) @@ -1,155 +1,155 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _LINUX_COMPLETION_H_ -#define _LINUX_COMPLETION_H_ +#ifndef _FBSD_COMPLETION_H_ +#define _FBSD_COMPLETION_H_ + #include -#include -#include #include #include #include #include #include struct completion { unsigned int done; }; #define INIT_COMPLETION(c) ((c).done = 0) #define init_completion(c) ((c)->done = 0) static inline void _complete_common(struct completion *c, int all) { int wakeup_swapper; sleepq_lock(c); c->done++; if (all) wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0); else wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0); sleepq_release(c); if (wakeup_swapper) kick_proc0(); } #define complete(c) _complete_common(c, 0) #define complete_all(c) _complete_common(c, 1) /* * Indefinite wait for done != 0 with or without signals. */ static inline long _wait_for_common(struct completion *c, int flags) { flags |= SLEEPQ_SLEEP; for (;;) { sleepq_lock(c); if (c->done) break; sleepq_add(c, NULL, "completion", flags, 0); if (flags & SLEEPQ_INTERRUPTIBLE) { if (sleepq_wait_sig(c, 0) != 0) return (-ERESTARTSYS); } else sleepq_wait(c, 0); } c->done--; sleepq_release(c); return (0); } #define wait_for_completion(c) _wait_for_common(c, 0) #define wait_for_completion_interuptible(c) \ _wait_for_common(c, SLEEPQ_INTERRUPTIBLE) static inline long _wait_for_timeout_common(struct completion *c, long timeout, int flags) { long end; end = ticks + timeout; flags |= SLEEPQ_SLEEP; for (;;) { sleepq_lock(c); if (c->done) break; sleepq_add(c, NULL, "completion", flags, 0); sleepq_set_timeout(c, end - ticks); if (flags & SLEEPQ_INTERRUPTIBLE) { if (sleepq_timedwait_sig(c, 0) != 0) return (-ERESTARTSYS); } else sleepq_timedwait(c, 0); } c->done--; sleepq_release(c); timeout = end - ticks; return (timeout > 0 ? timeout : 1); } #define wait_for_completion_timeout(c, timeout) \ _wait_for_timeout_common(c, timeout, 0) #define wait_for_completion_interruptible_timeout(c, timeout) \ _wait_for_timeout_common(c, timeout, SLEEPQ_INTERRUPTIBLE) static inline int try_wait_for_completion(struct completion *c) { int isdone; isdone = 1; sleepq_lock(c); if (c->done) c->done--; else isdone = 0; sleepq_release(c); return (isdone); } static inline int completion_done(struct completion *c) { int isdone; isdone = 1; sleepq_lock(c); if (c->done == 0) isdone = 0; sleepq_release(c); return (isdone); } #endif /* _LINUX_COMPLETION_H_ */ Index: stable/10/sys/ofed/include/linux/delay.h =================================================================== --- stable/10/sys/ofed/include/linux/delay.h (revision 271126) +++ stable/10/sys/ofed/include/linux/delay.h (revision 271127) @@ -1,43 +1,44 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_DELAY_H_ #define _LINUX_DELAY_H_ #include static inline void linux_msleep(int ms) { pause("lnxsleep", msecs_to_jiffies(ms)); } #undef msleep #define msleep linux_msleep #endif /* _LINUX_DELAY_H_ */ Index: stable/10/sys/ofed/include/linux/device.h =================================================================== --- stable/10/sys/ofed/include/linux/device.h (revision 271126) +++ stable/10/sys/ofed/include/linux/device.h (revision 271127) @@ -1,394 +1,447 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_DEVICE_H_ #define _LINUX_DEVICE_H_ #include #include #include #include #include #include #include #include #include #include #include enum irqreturn { IRQ_NONE = 0, IRQ_HANDLED, IRQ_WAKE_THREAD, }; typedef enum irqreturn irqreturn_t; struct class { const char *name; struct module *owner; struct kobject kobj; devclass_t bsdclass; void (*class_release)(struct class *class); void (*dev_release)(struct device *dev); + char * (*devnode)(struct device *dev, umode_t *mode); }; struct device { struct device *parent; struct list_head irqents; device_t bsddev; dev_t devt; struct class *class; void (*release)(struct device *dev); struct kobject kobj; uint64_t *dma_mask; void *driver_data; unsigned int irq; unsigned int msix; unsigned int msix_max; }; extern struct device linux_rootdev; extern struct kobject class_root; struct class_attribute { - struct attribute attr; - ssize_t (*show)(struct class *, char *); - ssize_t (*store)(struct class *, const char *, size_t); + struct attribute attr; + ssize_t (*show)(struct class *, struct class_attribute *, char *); + ssize_t (*store)(struct class *, struct class_attribute *, const char *, size_t); + const void *(*namespace)(struct class *, const struct class_attribute *); }; + #define CLASS_ATTR(_name, _mode, _show, _store) \ struct class_attribute class_attr_##_name = \ { { #_name, NULL, _mode }, _show, _store } struct device_attribute { struct attribute attr; ssize_t (*show)(struct device *, - struct device_attribute *, char *); + struct device_attribute *, char *); ssize_t (*store)(struct device *, - struct device_attribute *, const char *, - size_t); + struct device_attribute *, const char *, + size_t); }; #define DEVICE_ATTR(_name, _mode, _show, _store) \ struct device_attribute dev_attr_##_name = \ { { #_name, NULL, _mode }, _show, _store } +/* Simple class attribute that is just a static string */ +struct class_attribute_string { + struct class_attribute attr; + char *str; +}; + +static inline ssize_t +show_class_attr_string(struct class *class, + struct class_attribute *attr, char *buf) +{ + struct class_attribute_string *cs; + cs = container_of(attr, struct class_attribute_string, attr); + return snprintf(buf, PAGE_SIZE, "%s\n", cs->str); +} + +/* Currently read-only only */ +#define _CLASS_ATTR_STRING(_name, _mode, _str) \ + { __ATTR(_name, _mode, show_class_attr_string, NULL), _str } +#define CLASS_ATTR_STRING(_name, _mode, _str) \ + struct class_attribute_string class_attr_##_name = \ + _CLASS_ATTR_STRING(_name, _mode, _str) + #define dev_err(dev, fmt, ...) device_printf((dev)->bsddev, fmt, ##__VA_ARGS__) #define dev_warn(dev, fmt, ...) device_printf((dev)->bsddev, fmt, ##__VA_ARGS__) #define dev_info(dev, fmt, ...) device_printf((dev)->bsddev, fmt, ##__VA_ARGS__) #define dev_printk(lvl, dev, fmt, ...) \ device_printf((dev)->bsddev, fmt, ##__VA_ARGS__) static inline void * dev_get_drvdata(struct device *dev) { return dev->driver_data; } static inline void dev_set_drvdata(struct device *dev, void *data) { dev->driver_data = data; } static inline struct device * get_device(struct device *dev) { if (dev) kobject_get(&dev->kobj); return (dev); } static inline char * dev_name(const struct device *dev) { return kobject_name(&dev->kobj); } #define dev_set_name(_dev, _fmt, ...) \ kobject_set_name(&(_dev)->kobj, (_fmt), ##__VA_ARGS__) static inline void put_device(struct device *dev) { if (dev) kobject_put(&dev->kobj); } static inline ssize_t class_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct class_attribute *dattr; ssize_t error; dattr = container_of(attr, struct class_attribute, attr); error = -EIO; if (dattr->show) error = dattr->show(container_of(kobj, struct class, kobj), - buf); + dattr, buf); return (error); } static inline ssize_t class_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct class_attribute *dattr; ssize_t error; dattr = container_of(attr, struct class_attribute, attr); error = -EIO; if (dattr->store) error = dattr->store(container_of(kobj, struct class, kobj), - buf, count); + dattr, buf, count); return (error); } static inline void class_release(struct kobject *kobj) { struct class *class; class = container_of(kobj, struct class, kobj); if (class->class_release) class->class_release(class); } static struct sysfs_ops class_sysfs = { .show = class_show, .store = class_store, }; static struct kobj_type class_ktype = { .release = class_release, .sysfs_ops = &class_sysfs }; static inline int class_register(struct class *class) { class->bsdclass = devclass_create(class->name); kobject_init(&class->kobj, &class_ktype); kobject_set_name(&class->kobj, class->name); kobject_add(&class->kobj, &class_root, class->name); return (0); } static inline void class_unregister(struct class *class) { kobject_put(&class->kobj); } static inline void device_release(struct kobject *kobj) { struct device *dev; dev = container_of(kobj, struct device, kobj); /* This is the precedence defined by linux. */ if (dev->release) dev->release(dev); else if (dev->class && dev->class->dev_release) dev->class->dev_release(dev); } static inline ssize_t dev_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct device_attribute *dattr; ssize_t error; dattr = container_of(attr, struct device_attribute, attr); error = -EIO; if (dattr->show) error = dattr->show(container_of(kobj, struct device, kobj), dattr, buf); return (error); } static inline ssize_t dev_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct device_attribute *dattr; ssize_t error; dattr = container_of(attr, struct device_attribute, attr); error = -EIO; if (dattr->store) error = dattr->store(container_of(kobj, struct device, kobj), dattr, buf, count); return (error); } static struct sysfs_ops dev_sysfs = { .show = dev_show, .store = dev_store, }; static struct kobj_type dev_ktype = { .release = device_release, .sysfs_ops = &dev_sysfs }; /* * Devices are registered and created for exporting to sysfs. create * implies register and register assumes the device fields have been * setup appropriately before being called. */ static inline int device_register(struct device *dev) { device_t bsddev; int unit; bsddev = NULL; if (dev->devt) { unit = MINOR(dev->devt); bsddev = devclass_get_device(dev->class->bsdclass, unit); } else unit = -1; if (bsddev == NULL) bsddev = device_add_child(dev->parent->bsddev, dev->class->kobj.name, unit); if (bsddev) { if (dev->devt == 0) dev->devt = makedev(0, device_get_unit(bsddev)); device_set_softc(bsddev, dev); } dev->bsddev = bsddev; kobject_init(&dev->kobj, &dev_ktype); kobject_add(&dev->kobj, &dev->class->kobj, dev_name(dev)); return (0); } static inline void device_unregister(struct device *dev) { device_t bsddev; bsddev = dev->bsddev; mtx_lock(&Giant); if (bsddev) device_delete_child(device_get_parent(bsddev), bsddev); mtx_unlock(&Giant); put_device(dev); } struct device *device_create(struct class *class, struct device *parent, dev_t devt, void *drvdata, const char *fmt, ...); static inline void device_destroy(struct class *class, dev_t devt) { device_t bsddev; int unit; unit = MINOR(devt); bsddev = devclass_get_device(class->bsdclass, unit); if (bsddev) device_unregister(device_get_softc(bsddev)); } static inline void class_kfree(struct class *class) { kfree(class); } static inline struct class * class_create(struct module *owner, const char *name) { struct class *class; int error; class = kzalloc(sizeof(*class), M_WAITOK); class->owner = owner; class->name= name; class->class_release = class_kfree; error = class_register(class); if (error) { kfree(class); return (NULL); } return (class); } static inline void class_destroy(struct class *class) { if (class == NULL) return; class_unregister(class); } static inline int device_create_file(struct device *dev, const struct device_attribute *attr) { if (dev) return sysfs_create_file(&dev->kobj, &attr->attr); return -EINVAL; } static inline void device_remove_file(struct device *dev, const struct device_attribute *attr) { if (dev) sysfs_remove_file(&dev->kobj, &attr->attr); } static inline int class_create_file(struct class *class, const struct class_attribute *attr) { if (class) return sysfs_create_file(&class->kobj, &attr->attr); return -EINVAL; } static inline void class_remove_file(struct class *class, const struct class_attribute *attr) { if (class) sysfs_remove_file(&class->kobj, &attr->attr); } static inline int dev_to_node(struct device *dev) { return -1; +} + +static inline char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap) +{ + unsigned int len; + char *p = NULL; + va_list aq; + + va_copy(aq, ap); + len = vsnprintf(NULL, 0, fmt, aq); + va_end(aq); + + vsnprintf(p, len+1, fmt, ap); + + return p; +} + +static inline char *kasprintf(gfp_t gfp, const char *fmt, ...) +{ + va_list ap; + char *p; + + va_start(ap, fmt); + p = kvasprintf(gfp, fmt, ap); + va_end(ap); + + return p; } #endif /* _LINUX_DEVICE_H_ */ Index: stable/10/sys/ofed/include/linux/dma-attrs.h =================================================================== --- stable/10/sys/ofed/include/linux/dma-attrs.h (revision 271126) +++ stable/10/sys/ofed/include/linux/dma-attrs.h (revision 271127) @@ -1,47 +1,48 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_DMA_ATTR_H_ #define _LINUX_DMA_ATTR_H_ enum dma_attr { DMA_ATTR_WRITE_BARRIER, DMA_ATTR_WEAK_ORDERING, DMA_ATTR_MAX, }; #define __DMA_ATTRS_LONGS BITS_TO_LONGS(DMA_ATTR_MAX) struct dma_attrs { unsigned long flags; }; #define DEFINE_DMA_ATTRS(x) struct dma_attrs x = { } static inline void init_dma_attrs(struct dma_attrs *attrs) { attrs->flags = 0; } #endif /* _LINUX_DMA_ATTR_H_ */ Index: stable/10/sys/ofed/include/linux/dma-mapping.h =================================================================== --- stable/10/sys/ofed/include/linux/dma-mapping.h (revision 271126) +++ stable/10/sys/ofed/include/linux/dma-mapping.h (revision 271127) @@ -1,270 +1,271 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_DMA_MAPPING_H_ #define _LINUX_DMA_MAPPING_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include enum dma_data_direction { DMA_BIDIRECTIONAL = 0, DMA_TO_DEVICE = 1, DMA_FROM_DEVICE = 2, DMA_NONE = 3, }; struct dma_map_ops { void* (*alloc_coherent)(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp); void (*free_coherent)(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle); dma_addr_t (*map_page)(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs); void (*unmap_page)(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs); int (*map_sg)(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, struct dma_attrs *attrs); void (*unmap_sg)(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, struct dma_attrs *attrs); void (*sync_single_for_cpu)(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir); void (*sync_single_for_device)(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir); void (*sync_single_range_for_cpu)(struct device *dev, dma_addr_t dma_handle, unsigned long offset, size_t size, enum dma_data_direction dir); void (*sync_single_range_for_device)(struct device *dev, dma_addr_t dma_handle, unsigned long offset, size_t size, enum dma_data_direction dir); void (*sync_sg_for_cpu)(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir); void (*sync_sg_for_device)(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir); int (*mapping_error)(struct device *dev, dma_addr_t dma_addr); int (*dma_supported)(struct device *dev, u64 mask); int is_phys; }; #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL << (n)) - 1)) static inline int dma_supported(struct device *dev, u64 mask) { /* XXX busdma takes care of this elsewhere. */ return (1); } static inline int dma_set_mask(struct device *dev, u64 dma_mask) { if (!dev->dma_mask || !dma_supported(dev, dma_mask)) return -EIO; *dev->dma_mask = dma_mask; return (0); } static inline int dma_set_coherent_mask(struct device *dev, u64 mask) { if (!dma_supported(dev, mask)) return -EIO; /* XXX Currently we don't support a seperate coherent mask. */ return 0; } static inline void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { vm_paddr_t high; size_t align; void *mem; if (dev->dma_mask) high = *dev->dma_mask; else high = BUS_SPACE_MAXADDR_32BIT; align = PAGE_SIZE << get_order(size); mem = (void *)kmem_alloc_contig(kmem_arena, size, flag, 0, high, align, 0, VM_MEMATTR_DEFAULT); if (mem) *dma_handle = vtophys(mem); else *dma_handle = 0; return (mem); } static inline void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_handle) { kmem_free(kmem_arena, (vm_offset_t)cpu_addr, size); } /* XXX This only works with no iommu. */ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { return vtophys(ptr); } static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { } static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, struct dma_attrs *attrs) { struct scatterlist *sg; int i; for_each_sg(sgl, sg, nents, i) sg_dma_address(sg) = sg_phys(sg); return (nents); } static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, struct dma_attrs *attrs) { } static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction) { return VM_PAGE_TO_PHYS(page) + offset; } static inline void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, enum dma_data_direction direction) { } static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction) { } static inline void dma_sync_single(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { dma_sync_single_for_cpu(dev, addr, size, dir); } static inline void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction) { } static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction) { } static inline void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction) { } static inline void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, unsigned long offset, size_t size, int direction) { } static inline void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, unsigned long offset, size_t size, int direction) { } static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return (0); } static inline unsigned int dma_set_max_seg_size(struct device *dev, unsigned int size) { return (0); } #define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, NULL) #define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, NULL) #define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, NULL) #define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, NULL) #define DEFINE_DMA_UNMAP_ADDR(name) dma_addr_t name #define DEFINE_DMA_UNMAP_LEN(name) __u32 name #define dma_unmap_addr(p, name) ((p)->name) #define dma_unmap_addr_set(p, name, v) (((p)->name) = (v)) #define dma_unmap_len(p, name) ((p)->name) #define dma_unmap_len_set(p, name, v) (((p)->name) = (v)) extern int uma_align_cache; #define dma_get_cache_alignment() uma_align_cache #endif /* _LINUX_DMA_MAPPING_H_ */ Index: stable/10/sys/ofed/include/linux/dmapool.h =================================================================== --- stable/10/sys/ofed/include/linux/dmapool.h (revision 271126) +++ stable/10/sys/ofed/include/linux/dmapool.h (revision 271127) @@ -1,85 +1,86 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_DMAPOOL_H_ #define _LINUX_DMAPOOL_H_ #include #include #include #include #include struct dma_pool { uma_zone_t pool_zone; }; static inline struct dma_pool * dma_pool_create(char *name, struct device *dev, size_t size, size_t align, size_t boundary) { struct dma_pool *pool; pool = kmalloc(sizeof(*pool), GFP_KERNEL); align--; /* * XXX Eventually this could use a seperate allocf to honor boundary * and physical address requirements of the device. */ pool->pool_zone = uma_zcreate(name, size, NULL, NULL, NULL, NULL, align, UMA_ZONE_OFFPAGE|UMA_ZONE_HASH); return (pool); } static inline void dma_pool_destroy(struct dma_pool *pool) { uma_zdestroy(pool->pool_zone); kfree(pool); } static inline void * dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle) { void *vaddr; vaddr = uma_zalloc(pool->pool_zone, mem_flags); if (vaddr) *handle = vtophys(vaddr); return (vaddr); } static inline void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t addr) { uma_zfree(pool->pool_zone, vaddr); } #endif /* _LINUX_DMAPOOL_H_ */ Index: stable/10/sys/ofed/include/linux/err.h =================================================================== --- stable/10/sys/ofed/include/linux/err.h (revision 271126) +++ stable/10/sys/ofed/include/linux/err.h (revision 271127) @@ -1,60 +1,72 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_ERR_H_ #define _LINUX_ERR_H_ #define MAX_ERRNO 4095 #define IS_ERR_VALUE(x) ((x) >= (unsigned long)-MAX_ERRNO) static inline void * ERR_PTR(long error) { return (void *)error; } static inline long PTR_ERR(const void *ptr) { return (long)ptr; } static inline long IS_ERR(const void *ptr) { return IS_ERR_VALUE((unsigned long)ptr); } static inline void * ERR_CAST(void *ptr) { return (void *)ptr; } + +static inline int +PTR_ERR_OR_ZERO(const void *ptr) +{ + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + else + return 0; +} + +#define PTR_RET(p) PTR_ERR_OR_ZERO(p) #endif /* _LINUX_ERR_H_ */ Index: stable/10/sys/ofed/include/linux/errno.h =================================================================== --- stable/10/sys/ofed/include/linux/errno.h (revision 271126) +++ stable/10/sys/ofed/include/linux/errno.h (revision 271127) @@ -1,39 +1,42 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_ERRNO_H_ #define _LINUX_ERRNO_H_ #include -#define ECOMM ESTALE -#define ENODATA ECONNREFUSED -#define ENOIOCTLCMD ENOIOCTL /* XXX this is negative */ -#define ERESTARTSYS ERESTART /* XXX this is negative */ +#define ECOMM ESTALE +#define ENODATA ECONNREFUSED +#define ENOIOCTLCMD ENOIOCTL +#define ERESTARTSYS ERESTART +#define ENOTSUPP EOPNOTSUPP +#define ENONET EHOSTDOWN -#endif /* _LINUX_ERRNO_H_ */ +#endif /* _LINUX_ERRNO_H_ */ Index: stable/10/sys/ofed/include/linux/etherdevice.h =================================================================== --- stable/10/sys/ofed/include/linux/etherdevice.h (nonexistent) +++ stable/10/sys/ofed/include/linux/etherdevice.h (revision 271127) @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#ifndef _LINUX_ETHERDEVICE +#define _LINUX_ETHERDEVICE + +#include + +/** + * is_zero_ether_addr - Determine if give Ethernet address is all zeros. + * @addr: Pointer to a six-byte array containing the Ethernet address + * + * Return true if the address is all zeroes. + */ +static inline bool is_zero_ether_addr(const u8 *addr) +{ + return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]); +} + + + +/** + * is_multicast_ether_addr - Determine if the Ethernet address is a multicast. + * @addr: Pointer to a six-byte array containing the Ethernet address + * + * Return true if the address is a multicast address. + * By definition the broadcast address is also a multicast address. + */ +static inline bool is_multicast_ether_addr(const u8 *addr) +{ + return (0x01 & addr[0]); +} + +/** + * is_broadcast_ether_addr - Determine if the Ethernet address is broadcast + * @addr: Pointer to a six-byte array containing the Ethernet address + * + * Return true if the address is the broadcast address. + */ +static inline bool is_broadcast_ether_addr(const u8 *addr) +{ + return (addr[0] & addr[1] & addr[2] & addr[3] & addr[4] & addr[5]) == 0xff; +} + +/** + * is_valid_ether_addr - Determine if the given Ethernet address is valid + * @addr: Pointer to a six-byte array containing the Ethernet address + * + * Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is not + * a multicast address, and is not FF:FF:FF:FF:FF:FF. + * + * Return true if the address is valid. + **/ +static inline bool is_valid_ether_addr(const u8 *addr) +{ + /* FF:FF:FF:FF:FF:FF is a multicast address so we don't need to + ** explicitly check for it here. */ + return !is_multicast_ether_addr(addr) && !is_zero_ether_addr(addr); +} + + + +#endif /* _LINUX_ETHERDEVICE */ Property changes on: stable/10/sys/ofed/include/linux/etherdevice.h ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +true \ No newline at end of property Index: stable/10/sys/ofed/include/linux/ethtool.h =================================================================== --- stable/10/sys/ofed/include/linux/ethtool.h (revision 271126) +++ stable/10/sys/ofed/include/linux/ethtool.h (revision 271127) @@ -1,31 +1,32 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_ETHTOOL_H_ #define _LINUX_ETHTOOL_H_ #endif /* _LINUX_ETHTOOL_H_ */ Index: stable/10/sys/ofed/include/linux/file.h =================================================================== --- stable/10/sys/ofed/include/linux/file.h (revision 271126) +++ stable/10/sys/ofed/include/linux/file.h (revision 271127) @@ -1,127 +1,141 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_FILE_H_ #define _LINUX_FILE_H_ #include #include #include #include #include #include struct linux_file; #undef file extern struct fileops linuxfileops; static inline struct linux_file * linux_fget(unsigned int fd) { struct file *file; if (fget_unlocked(curthread->td_proc->p_fd, fd, NULL, 0, &file, NULL) != 0) { return (NULL); } return (struct linux_file *)file->f_data; } static inline void fput(struct linux_file *filp) { if (filp->_file == NULL) { kfree(filp); return; } if (refcount_release(&filp->_file->f_count)) { _fdrop(filp->_file, curthread); kfree(filp); } } static inline void put_unused_fd(unsigned int fd) { struct file *file; if (fget_unlocked(curthread->td_proc->p_fd, fd, NULL, 0, &file, NULL) != 0) { return; } fdclose(curthread->td_proc->p_fd, file, fd, curthread); } static inline void fd_install(unsigned int fd, struct linux_file *filp) { struct file *file; if (fget_unlocked(curthread->td_proc->p_fd, fd, NULL, 0, &file, NULL) != 0) { file = NULL; } filp->_file = file; finit(file, filp->f_mode, DTYPE_DEV, filp, &linuxfileops); } static inline int get_unused_fd(void) { struct file *file; int error; int fd; error = falloc(curthread, &file, &fd, 0); if (error) return -error; return fd; } static inline struct linux_file * -_alloc_file(int mode, const struct file_operations *fops) +alloc_file(int mode, const struct file_operations *fops) { struct linux_file *filp; filp = kzalloc(sizeof(*filp), GFP_KERNEL); - if (filp == NULL) + if (filp == NULL) return (NULL); filp->f_op = fops; filp->f_mode = mode; return filp; } -#define alloc_file(mnt, root, mode, fops) _alloc_file((mode), (fops)) +struct fd { + struct linux_file *linux_file; +}; + +static inline void fdput(struct fd fd) +{ + fput(fd.linux_file); +} + +static inline struct fd fdget(unsigned int fd) +{ + struct linux_file *f = linux_fget(fd); + return (struct fd){f}; +} #define file linux_file #define fget linux_fget #endif /* _LINUX_FILE_H_ */ Index: stable/10/sys/ofed/include/linux/fs.h =================================================================== --- stable/10/sys/ofed/include/linux/fs.h (revision 271126) +++ stable/10/sys/ofed/include/linux/fs.h (revision 271127) @@ -1,183 +1,211 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_FS_H_ #define _LINUX_FS_H_ #include #include #include #include #include #include #include #include struct module; struct kiocb; struct iovec; struct dentry; struct page; struct file_lock; struct pipe_inode_info; struct vm_area_struct; struct poll_table_struct; struct files_struct; #define inode vnode #define i_cdev v_rdev #define S_IRUGO (S_IRUSR | S_IRGRP | S_IROTH) #define S_IWUGO (S_IWUSR | S_IWGRP | S_IWOTH) typedef struct files_struct *fl_owner_t; struct dentry { struct inode *d_inode; }; struct file_operations; struct linux_file { struct file *_file; const struct file_operations *f_op; void *private_data; int f_flags; int f_mode; /* Just starting mode. */ struct dentry *f_dentry; struct dentry f_dentry_store; struct selinfo f_selinfo; struct sigio *f_sigio; struct vnode *f_vnode; }; #define file linux_file #define fasync_struct sigio * #define fasync_helper(fd, filp, on, queue) \ ({ \ if ((on)) \ *(queue) = &(filp)->f_sigio; \ else \ *(queue) = NULL; \ 0; \ }) #define kill_fasync(queue, sig, pollstat) \ do { \ if (*(queue) != NULL) \ pgsigio(*(queue), (sig), 0); \ } while (0) typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); struct file_operations { struct module *owner; ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); unsigned int (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl)(struct file *, unsigned int, unsigned long); int (*mmap)(struct file *, struct vm_area_struct *); int (*open)(struct inode *, struct file *); int (*release)(struct inode *, struct file *); int (*fasync)(int, struct file *, int); + +/* Although not supported in FreeBSD, to align with Linux code + * we are adding llseek() only when it is mapped to no_llseek which returns + * an illegal seek error + */ + loff_t (*llseek)(struct file *, loff_t, int); #if 0 /* We do not support these methods. Don't permit them to compile. */ loff_t (*llseek)(struct file *, loff_t, int); ssize_t (*aio_read)(struct kiocb *, const struct iovec *, unsigned long, loff_t); ssize_t (*aio_write)(struct kiocb *, const struct iovec *, unsigned long, loff_t); int (*readdir)(struct file *, void *, filldir_t); int (*ioctl)(struct inode *, struct file *, unsigned int, unsigned long); long (*compat_ioctl)(struct file *, unsigned int, unsigned long); int (*flush)(struct file *, fl_owner_t id); int (*fsync)(struct file *, struct dentry *, int datasync); int (*aio_fsync)(struct kiocb *, int datasync); int (*lock)(struct file *, int, struct file_lock *); ssize_t (*sendpage)(struct file *, struct page *, int, size_t, loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); int (*flock)(struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); int (*setlease)(struct file *, long, struct file_lock **); #endif }; #define fops_get(fops) (fops) #define FMODE_READ FREAD #define FMODE_WRITE FWRITE #define FMODE_EXEC FEXEC static inline int register_chrdev_region(dev_t dev, unsigned range, const char *name) { return 0; } static inline void unregister_chrdev_region(dev_t dev, unsigned range) { return; } +static inline int +alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count, + const char *name) +{ + + return 0; +} + +/* No current support for seek op in FreeBSD */ +static inline int +nonseekable_open(struct inode *inode, struct file *filp) +{ + return 0; +} + static inline dev_t iminor(struct inode *inode) { return dev2unit(inode->v_rdev); } static inline struct inode * igrab(struct inode *inode) { int error; error = vget(inode, 0, curthread); if (error) return (NULL); return (inode); } static inline void iput(struct inode *inode) { vrele(inode); } -#endif /* _LINUX_FS_H_ */ +static inline loff_t +no_llseek(struct file *file, loff_t offset, int whence) +{ + return -ESPIPE; +} + +#endif /* _LINUX_FS_H_ */ Index: stable/10/sys/ofed/include/linux/gfp.h =================================================================== --- stable/10/sys/ofed/include/linux/gfp.h (revision 271126) +++ stable/10/sys/ofed/include/linux/gfp.h (revision 271127) @@ -1,128 +1,129 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_GFP_H_ #define _LINUX_GFP_H_ #include #include #include #include #include #include #include #define __GFP_NOWARN 0 #define __GFP_HIGHMEM 0 #define __GFP_ZERO M_ZERO #define GFP_NOWAIT M_NOWAIT #define GFP_ATOMIC (M_NOWAIT | M_USE_RESERVE) #define GFP_KERNEL M_WAITOK #define GFP_USER M_WAITOK #define GFP_HIGHUSER M_WAITOK #define GFP_HIGHUSER_MOVABLE M_WAITOK #define GFP_IOFS M_NOWAIT static inline void * page_address(struct page *page) { if (page->object != kmem_object && page->object != kernel_object) return (NULL); return ((void *)(uintptr_t)(VM_MIN_KERNEL_ADDRESS + IDX_TO_OFF(page->pindex))); } static inline unsigned long _get_page(gfp_t mask) { return kmem_malloc(kmem_arena, PAGE_SIZE, mask); } #define get_zeroed_page(mask) _get_page((mask) | M_ZERO) #define alloc_page(mask) virt_to_page(_get_page((mask))) #define __get_free_page(mask) _get_page((mask)) static inline void free_page(unsigned long page) { if (page == 0) return; kmem_free(kmem_arena, page, PAGE_SIZE); } static inline void __free_page(struct page *m) { if (m->object != kmem_object) panic("__free_page: Freed page %p not allocated via wrappers.", m); kmem_free(kmem_arena, (vm_offset_t)page_address(m), PAGE_SIZE); } static inline void __free_pages(struct page *m, unsigned int order) { size_t size; if (m == NULL) return; size = PAGE_SIZE << order; kmem_free(kmem_arena, (vm_offset_t)page_address(m), size); } /* * Alloc pages allocates directly from the buddy allocator on linux so * order specifies a power of two bucket of pages and the results * are expected to be aligned on the size as well. */ static inline struct page * alloc_pages(gfp_t gfp_mask, unsigned int order) { unsigned long page; size_t size; size = PAGE_SIZE << order; page = kmem_alloc_contig(kmem_arena, size, gfp_mask, 0, -1, size, 0, VM_MEMATTR_DEFAULT); if (page == 0) return (NULL); return (virt_to_page(page)); } #define alloc_pages_node(node, mask, order) alloc_pages(mask, order) #define kmalloc_node(chunk, mask, node) kmalloc(chunk, mask) #endif /* _LINUX_GFP_H_ */ Index: stable/10/sys/ofed/include/linux/hardirq.h =================================================================== --- stable/10/sys/ofed/include/linux/hardirq.h (revision 271126) +++ stable/10/sys/ofed/include/linux/hardirq.h (revision 271127) @@ -1,39 +1,40 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_HARDIRQ_H_ #define _LINUX_HARDIRQ_H_ #include #include #include #include #define synchronize_irq(irq) _intr_drain((irq)) #endif /* _LINUX_HARDIRQ_H_ */ Index: stable/10/sys/ofed/include/linux/idr.h =================================================================== --- stable/10/sys/ofed/include/linux/idr.h (revision 271126) +++ stable/10/sys/ofed/include/linux/idr.h (revision 271127) @@ -1,74 +1,75 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_IDR_H_ #define _LINUX_IDR_H_ #include #define IDR_BITS 5 #define IDR_SIZE (1 << IDR_BITS) #define IDR_MASK (IDR_SIZE - 1) #define MAX_ID_SHIFT ((sizeof(int) * NBBY) - 1) #define MAX_ID_BIT (1U << MAX_ID_SHIFT) #define MAX_ID_MASK (MAX_ID_BIT - 1) #define MAX_LEVEL (MAX_ID_SHIFT + IDR_BITS - 1) / IDR_BITS #define MAX_IDR_SHIFT (sizeof(int)*8 - 1) #define MAX_IDR_BIT (1U << MAX_IDR_SHIFT) #define MAX_IDR_MASK (MAX_IDR_BIT - 1) struct idr_layer { unsigned long bitmap; struct idr_layer *ary[IDR_SIZE]; }; struct idr { struct mtx lock; struct idr_layer *top; struct idr_layer *free; int layers; }; #define DEFINE_IDR(name) \ struct idr name; \ SYSINIT(name##_idr_sysinit, SI_SUB_DRIVERS, SI_ORDER_FIRST, \ idr_init, &(name)); void *idr_find(struct idr *idp, int id); int idr_pre_get(struct idr *idp, gfp_t gfp_mask); int idr_get_new(struct idr *idp, void *ptr, int *id); int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); void *idr_replace(struct idr *idp, void *ptr, int id); void idr_remove(struct idr *idp, int id); void idr_remove_all(struct idr *idp); void idr_destroy(struct idr *idp); void idr_init(struct idr *idp); #endif /* _LINUX_IDR_H_ */ Index: stable/10/sys/ofed/include/linux/if_arp.h =================================================================== --- stable/10/sys/ofed/include/linux/if_arp.h (revision 271126) +++ stable/10/sys/ofed/include/linux/if_arp.h (revision 271127) @@ -1,32 +1,33 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_IF_ARP_H_ #define _LINUX_IF_ARP_H_ #include #include #endif /* _LINUX_IF_ARP_H_ */ Index: stable/10/sys/ofed/include/linux/if_ether.h =================================================================== --- stable/10/sys/ofed/include/linux/if_ether.h (revision 271126) +++ stable/10/sys/ofed/include/linux/if_ether.h (revision 271127) @@ -1,42 +1,50 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_IF_ETHER_H_ #define _LINUX_IF_ETHER_H_ #include #include #define ETH_P_8021Q ETHERTYPE_VLAN +#define ETH_HLEN ETHER_HDR_LEN /* Total octets in header. */ +#ifndef ETH_ALEN +#define ETH_ALEN ETHER_ADDR_LEN +#endif +#define ETH_FCS_LEN 4 /* Octets in the FCS */ +#define VLAN_HLEN 4 /* The additional bytes (on top of the Ethernet header) + * that VLAN requires. */ /* * defined Ethernet Protocol ID's. */ -#define ETH_P_IP 0x0800 /* Internet Protocol packet */ +#define ETH_P_IP 0x0800 /* Internet Protocol packet */ #endif /* _LINUX_IF_ETHER_H_ */ Index: stable/10/sys/ofed/include/linux/if_vlan.h =================================================================== --- stable/10/sys/ofed/include/linux/if_vlan.h (revision 271126) +++ stable/10/sys/ofed/include/linux/if_vlan.h (revision 271127) @@ -1,35 +1,40 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_IF_VLAN_H_ #define _LINUX_IF_VLAN_H_ +#include +#include #include #include + +#define VLAN_N_VID 4096 #endif /* _LINUX_IF_VLAN_H_ */ Index: stable/10/sys/ofed/include/linux/in.h =================================================================== --- stable/10/sys/ofed/include/linux/in.h (revision 271126) +++ stable/10/sys/ofed/include/linux/in.h (revision 271127) @@ -1,39 +1,40 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_IN_H_ #define _LINUX_IN_H_ #include "opt_inet.h" #include #include #define ipv4_is_zeronet IN_ZERONET #define ipv4_is_loopback IN_LOOPBACK #endif /* _LINUX_IN_H_ */ Index: stable/10/sys/ofed/include/linux/in6.h =================================================================== --- stable/10/sys/ofed/include/linux/in6.h (revision 271126) +++ stable/10/sys/ofed/include/linux/in6.h (revision 271127) @@ -1,36 +1,37 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_IN6_H_ #define _LINUX_IN6_H_ #ifndef KLD_MODULE #include "opt_inet6.h" #endif #endif /* _LINUX_IN6_H_ */ Index: stable/10/sys/ofed/include/linux/inet.h =================================================================== --- stable/10/sys/ofed/include/linux/inet.h (revision 271126) +++ stable/10/sys/ofed/include/linux/inet.h (revision 271127) @@ -1,31 +1,32 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_INET_H_ #define _LINUX_INET_H_ #endif /* _LINUX_INET_H_ */ Index: stable/10/sys/ofed/include/linux/inetdevice.h =================================================================== --- stable/10/sys/ofed/include/linux/inetdevice.h (revision 271126) +++ stable/10/sys/ofed/include/linux/inetdevice.h (revision 271127) @@ -1,56 +1,57 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_INETDEVICE_H_ #define _LINUX_INETDEVICE_H_ #include static inline struct net_device * ip_dev_find(struct net *net, uint32_t addr) { struct sockaddr_in sin; struct ifaddr *ifa; struct ifnet *ifp; ifp = NULL; memset(&sin, 0, sizeof(sin)); sin.sin_addr.s_addr = addr; sin.sin_port = 0; sin.sin_len = sizeof(sin); sin.sin_family = AF_INET; ifa = ifa_ifwithaddr((struct sockaddr *)&sin); if (ifa) { ifp = ifa->ifa_ifp; if_ref(ifp); ifa_free(ifa); } return (ifp); } #endif /* _LINUX_INETDEVICE_H_ */ Index: stable/10/sys/ofed/include/linux/interrupt.h =================================================================== --- stable/10/sys/ofed/include/linux/interrupt.h (revision 271126) +++ stable/10/sys/ofed/include/linux/interrupt.h (revision 271127) @@ -1,139 +1,140 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_INTERRUPT_H_ #define _LINUX_INTERRUPT_H_ #include #include #include #include typedef irqreturn_t (*irq_handler_t)(int, void *); #define IRQ_RETVAL(x) ((x) != IRQ_NONE) #define IRQF_SHARED RF_SHAREABLE struct irq_ent { struct list_head links; struct device *dev; struct resource *res; void *arg; irqreturn_t (*handler)(int, void *); void *tag; int irq; }; static inline int _irq_rid(struct device *dev, int irq) { if (irq == dev->irq) return (0); return irq - dev->msix + 1; } static void _irq_handler(void *ent) { struct irq_ent *irqe; irqe = ent; irqe->handler(irqe->irq, irqe->arg); } static inline struct irq_ent * _irq_ent(struct device *dev, int irq) { struct irq_ent *irqe; list_for_each_entry(irqe, &dev->irqents, links) if (irqe->irq == irq) return (irqe); return (NULL); } static inline int request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *name, void *arg) { struct resource *res; struct irq_ent *irqe; struct device *dev; int error; int rid; dev = _pci_find_irq_dev(irq); if (dev == NULL) return -ENXIO; rid = _irq_rid(dev, irq); res = bus_alloc_resource_any(dev->bsddev, SYS_RES_IRQ, &rid, flags | RF_ACTIVE); if (res == NULL) return (-ENXIO); irqe = kmalloc(sizeof(*irqe), GFP_KERNEL); irqe->dev = dev; irqe->res = res; irqe->arg = arg; irqe->handler = handler; irqe->irq = irq; error = bus_setup_intr(dev->bsddev, res, INTR_TYPE_NET | INTR_MPSAFE, NULL, _irq_handler, irqe, &irqe->tag); if (error) { bus_release_resource(dev->bsddev, SYS_RES_IRQ, rid, irqe->res); kfree(irqe); return (-error); } list_add(&irqe->links, &dev->irqents); return 0; } static inline void free_irq(unsigned int irq, void *device) { struct irq_ent *irqe; struct device *dev; int rid; dev = _pci_find_irq_dev(irq); if (dev == NULL) return; rid = _irq_rid(dev, irq); irqe = _irq_ent(dev, irq); if (irqe == NULL) return; bus_teardown_intr(dev->bsddev, irqe->res, irqe->tag); bus_release_resource(dev->bsddev, SYS_RES_IRQ, rid, irqe->res); list_del(&irqe->links); kfree(irqe); } #endif /* _LINUX_INTERRUPT_H_ */ Index: stable/10/sys/ofed/include/linux/io-mapping.h =================================================================== --- stable/10/sys/ofed/include/linux/io-mapping.h (revision 271126) +++ stable/10/sys/ofed/include/linux/io-mapping.h (revision 271127) @@ -1,77 +1,78 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_IO_MAPPING_H_ #define _LINUX_IO_MAPPING_H_ #include #include struct io_mapping; static inline struct io_mapping * io_mapping_create_wc(resource_size_t base, unsigned long size) { return ioremap_wc(base, size); } static inline void io_mapping_free(struct io_mapping *mapping) { iounmap(mapping); } static inline void * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) { return (((char *)mapping) + offset); } static inline void io_mapping_unmap_atomic(void *vaddr) { } static inline void * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { return (((char *) mapping) + offset); } static inline void io_mapping_unmap(void *vaddr) { } #endif /* _LINUX_IO_MAPPING_H_ */ Index: stable/10/sys/ofed/include/linux/io.h =================================================================== --- stable/10/sys/ofed/include/linux/io.h (revision 271126) +++ stable/10/sys/ofed/include/linux/io.h (revision 271127) @@ -1,125 +1,126 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_IO_H_ #define _LINUX_IO_H_ #include static inline uint32_t __raw_readl(const volatile void *addr) { return *(const volatile uint32_t *)addr; } static inline void __raw_writel(uint32_t b, volatile void *addr) { *(volatile uint32_t *)addr = b; } static inline uint64_t __raw_readq(const volatile void *addr) { return *(const volatile uint64_t *)addr; } static inline void __raw_writeq(uint64_t b, volatile void *addr) { *(volatile uint64_t *)addr = b; } /* * XXX This is all x86 specific. It should be bus space access. */ #define mmiowb() #undef writel static inline void writel(uint32_t b, void *addr) { *(volatile uint32_t *)addr = b; } #undef writeq static inline void writeq(uint64_t b, void *addr) { *(volatile uint64_t *)addr = b; } #undef writeb static inline void writeb(uint8_t b, void *addr) { *(volatile uint8_t *)addr = b; } #undef writew static inline void writew(uint16_t b, void *addr) { *(volatile uint16_t *)addr = b; } void *_ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr); #define ioremap_nocache(addr, size) \ _ioremap_attr((addr), (size), VM_MEMATTR_UNCACHEABLE) #define ioremap_wc(addr, size) \ _ioremap_attr((addr), (size), VM_MEMATTR_WRITE_COMBINING) #define ioremap ioremap_nocache void iounmap(void *addr); #define memset_io(a, b, c) memset((a), (b), (c)) #define memcpy_fromio(a, b, c) memcpy((a), (b), (c)) #define memcpy_toio(a, b, c) memcpy((a), (b), (c)) static inline void __iowrite64_copy(void *to, void *from, size_t count) { #ifdef __LP64__ uint64_t *src; uint64_t *dst; int i; for (i = 0, src = from, dst = to; i < count; i++, src++, dst++) __raw_writeq(*src, dst); #else uint32_t *src; uint32_t *dst; int i; count *= 2; for (i = 0, src = from, dst = to; i < count; i++, src++, dst++) __raw_writel(*src, dst); #endif } #endif /* _LINUX_IO_H_ */ Index: stable/10/sys/ofed/include/linux/ioctl.h =================================================================== --- stable/10/sys/ofed/include/linux/ioctl.h (revision 271126) +++ stable/10/sys/ofed/include/linux/ioctl.h (revision 271127) @@ -1,34 +1,35 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_IOCTL_H_ #define _LINUX_IOCTL_H_ #include #endif /* _LINUX_IOCTL_H_ */ Index: stable/10/sys/ofed/include/linux/jiffies.h =================================================================== --- stable/10/sys/ofed/include/linux/jiffies.h (revision 271126) +++ stable/10/sys/ofed/include/linux/jiffies.h (revision 271127) @@ -1,56 +1,60 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_JIFFIES_H_ #define _LINUX_JIFFIES_H_ #include #include #include #include static inline int msecs_to_jiffies(int msec) { struct timeval tv; tv.tv_sec = msec / 1000; tv.tv_usec = (msec % 1000) * 1000; return (tvtohz(&tv)); } -#define jiffies ticks + +#define jiffies ticks +#define jiffies_to_msecs(x) (((int64_t)(x)) * 1000 / hz) + #define time_after(a, b) ((long)(b) - (long)(a) < 0) #define time_before(a, b) time_after(b,a) #define time_after_eq(a, b) ((long)(a) - (long)(b) >= 0) #define time_before_eq(a, b) time_after_eq(b, a) #define HZ hz #endif /* _LINUX_JIFFIES_H_ */ Index: stable/10/sys/ofed/include/linux/kdev_t.h =================================================================== --- stable/10/sys/ofed/include/linux/kdev_t.h (revision 271126) +++ stable/10/sys/ofed/include/linux/kdev_t.h (revision 271127) @@ -1,36 +1,37 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_KDEV_T_H_ #define _LINUX_KDEV_T_H_ #define MAJOR(dev) major((dev)) #define MINOR(dev) minor((dev)) #define MKDEV(ma, mi) makedev((ma), (mi)) #endif /* _LINUX_KDEV_T_H_ */ Index: stable/10/sys/ofed/include/linux/kernel.h =================================================================== --- stable/10/sys/ofed/include/linux/kernel.h (revision 271126) +++ stable/10/sys/ofed/include/linux/kernel.h (revision 271127) @@ -1,158 +1,160 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_KERNEL_H_ #define _LINUX_KERNEL_H_ #include #include #include #include #include +#include #include #include #include -#include #include #include #include #include -#include -#include -#include +#include #include #define KERN_CONT "" #define KERN_EMERG "<0>" #define KERN_ALERT "<1>" #define KERN_CRIT "<2>" #define KERN_ERR "<3>" #define KERN_WARNING "<4>" #define KERN_NOTICE "<5>" #define KERN_INFO "<6>" #define KERN_DEBUG "<7>" #define BUG() panic("BUG") #define BUG_ON(condition) do { if (condition) BUG(); } while(0) #define WARN_ON BUG_ON #undef ALIGN #define ALIGN(x, y) roundup2((x), (y)) #define DIV_ROUND_UP howmany #define printk(X...) printf(X) #define pr_debug(fmt, ...) printk(KERN_DEBUG # fmt, ##__VA_ARGS__) #define udelay(t) DELAY(t) #ifndef pr_fmt #define pr_fmt(fmt) fmt #endif /* * Print a one-time message (analogous to WARN_ONCE() et al): */ #define printk_once(x...) ({ \ static bool __print_once; \ \ if (!__print_once) { \ __print_once = true; \ printk(x); \ } \ }) #define pr_emerg(fmt, ...) \ printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) #define pr_alert(fmt, ...) \ printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) #define pr_crit(fmt, ...) \ printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) #define pr_err(fmt, ...) \ printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) #define pr_warning(fmt, ...) \ printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) #define pr_warn pr_warning #define pr_notice(fmt, ...) \ printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) #define pr_info(fmt, ...) \ printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) +#define pr_info_once(fmt, ...) \ + printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) #define pr_cont(fmt, ...) \ printk(KERN_CONT fmt, ##__VA_ARGS__) /* pr_devel() should produce zero code unless DEBUG is defined */ #ifdef DEBUG #define pr_devel(fmt, ...) \ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_devel(fmt, ...) \ ({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; }) #endif #ifndef WARN #define WARN(condition, format...) ({ \ int __ret_warn_on = !!(condition); \ if (unlikely(__ret_warn_on)) \ pr_warning(format); \ unlikely(__ret_warn_on); \ }) #endif #define container_of(ptr, type, member) \ ({ \ __typeof(((type *)0)->member) *_p = (ptr); \ (type *)((char *)_p - offsetof(type, member)); \ }) #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define simple_strtoul strtoul #define simple_strtol strtol +#define kstrtol(a,b,c) ({*(c) = strtol(a,0,b);}) #define min(x, y) (x < y ? x : y) #define max(x, y) (x > y ? x : y) #define min_t(type, _x, _y) (type)(_x) < (type)(_y) ? (type)(_x) : (_y) #define max_t(type, _x, _y) (type)(_x) > (type)(_y) ? (type)(_x) : (_y) /* * This looks more complex than it should be. But we need to * get the type for the ~ right in round_down (it needs to be * as wide as the result!), and we want to evaluate the macro * arguments just once each. */ #define __round_mask(x, y) ((__typeof__(x))((y)-1)) #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) #define round_down(x, y) ((x) & ~__round_mask(x, y)) #define num_possible_cpus() mp_ncpus typedef struct pm_message { int event; } pm_message_t; #endif /* _LINUX_KERNEL_H_ */ Index: stable/10/sys/ofed/include/linux/kmod.h =================================================================== --- stable/10/sys/ofed/include/linux/kmod.h (nonexistent) +++ stable/10/sys/ofed/include/linux/kmod.h (revision 271127) @@ -0,0 +1,51 @@ +/*- + * Copyright (c) 2010 Isilon Systems, Inc. + * Copyright (c) 2010 iX Systems, Inc. + * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_KMOD_H_ +#define _LINUX_KMOD_H_ + +#include +#include +#include +#include +#include +#include + +#define request_module(...) \ +({\ + char modname[128]; \ + int fileid; \ + snprintf(modname, sizeof(modname), __VA_ARGS__); \ + kern_kldload(curthread, modname, &fileid); \ +}) + +#define request_module_nowait request_module + + +#endif /* _LINUX_KMOD_H_ */ Property changes on: stable/10/sys/ofed/include/linux/kmod.h ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +true \ No newline at end of property Index: stable/10/sys/ofed/include/linux/kobject.h =================================================================== --- stable/10/sys/ofed/include/linux/kobject.h (revision 271126) +++ stable/10/sys/ofed/include/linux/kobject.h (revision 271127) @@ -1,153 +1,169 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_KOBJECT_H_ #define _LINUX_KOBJECT_H_ #include #include #include #include struct kobject; struct sysctl_oid; struct kobj_type { void (*release)(struct kobject *kobj); const struct sysfs_ops *sysfs_ops; struct attribute **default_attrs; }; extern struct kobj_type kfree_type; struct kobject { struct kobject *parent; char *name; struct kref kref; struct kobj_type *ktype; struct list_head entry; struct sysctl_oid *oidp; }; +extern struct kobject *mm_kobj; + static inline void kobject_init(struct kobject *kobj, struct kobj_type *ktype) { kref_init(&kobj->kref); INIT_LIST_HEAD(&kobj->entry); kobj->ktype = ktype; kobj->oidp = NULL; } static inline void kobject_put(struct kobject *kobj); void kobject_release(struct kref *kref); static inline void kobject_put(struct kobject *kobj) { if (kobj) kref_put(&kobj->kref, kobject_release); } static inline struct kobject * kobject_get(struct kobject *kobj) { if (kobj) kref_get(&kobj->kref); return kobj; } static inline int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list args) { char *old; char *name; old = kobj->name; if (old && !fmt) return 0; name = kzalloc(MAXPATHLEN, GFP_KERNEL); if (!name) return -ENOMEM; vsnprintf(name, MAXPATHLEN, fmt, args); kobj->name = name; kfree(old); for (; *name != '\0'; name++) if (*name == '/') *name = '!'; return (0); } int kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...); static inline struct kobject * kobject_create(void) { struct kobject *kobj; kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); if (kobj == NULL) return (NULL); kobject_init(kobj, &kfree_type); return (kobj); } static inline struct kobject * kobject_create_and_add(const char *name, struct kobject *parent) { struct kobject *kobj; kobj = kobject_create(); if (kobj == NULL) return (NULL); if (kobject_add(kobj, parent, "%s", name) == 0) return (kobj); kobject_put(kobj); return (NULL); } static inline char * kobject_name(const struct kobject *kobj) { return kobj->name; } int kobject_set_name(struct kobject *kobj, const char *fmt, ...); int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, struct kobject *parent, const char *fmt, ...); + +/* sysfs.h calles for 'kobject' which is defined here, + * so we need to add the include only after the 'kobject' def. + */ +#include + +struct kobj_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr, + char *buf); + ssize_t (*store)(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count); +}; #endif /* _LINUX_KOBJECT_H_ */ Index: stable/10/sys/ofed/include/linux/kref.h =================================================================== --- stable/10/sys/ofed/include/linux/kref.h (revision 271126) +++ stable/10/sys/ofed/include/linux/kref.h (revision 271127) @@ -1,62 +1,63 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_KREF_H_ #define _LINUX_KREF_H_ #include struct kref { volatile u_int count; }; static inline void kref_init(struct kref *kref) { refcount_init(&kref->count, 1); } static inline void kref_get(struct kref *kref) { refcount_acquire(&kref->count); } static inline int kref_put(struct kref *kref, void (*rel)(struct kref *kref)) { if (refcount_release(&kref->count)) { rel(kref); return 1; } return 0; } -#endif /* _KREF_H_ */ +#endif /* _LINUX_KREF_H_ */ Index: stable/10/sys/ofed/include/linux/kthread.h =================================================================== --- stable/10/sys/ofed/include/linux/kthread.h (revision 271126) +++ stable/10/sys/ofed/include/linux/kthread.h (revision 271127) @@ -1,104 +1,105 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_KTHREAD_H_ #define _LINUX_KTHREAD_H_ #include #include #include #include #include #include #include #include static inline void _kthread_fn(void *arg) { struct task_struct *task; task = arg; task_struct_set(curthread, task); if (task->should_stop == 0) task->task_ret = task->task_fn(task->task_data); PROC_LOCK(task->task_thread->td_proc); task->should_stop = TASK_STOPPED; wakeup(task); PROC_UNLOCK(task->task_thread->td_proc); kthread_exit(); } static inline struct task_struct * _kthread_create(int (*threadfn)(void *data), void *data) { struct task_struct *task; task = kzalloc(sizeof(*task), GFP_KERNEL); task->task_fn = threadfn; task->task_data = data; return (task); } struct task_struct *kthread_create(int (*threadfn)(void *data), void *data, const char namefmt[], ...) __attribute__((format(printf, 3, 4))); #define kthread_run(fn, data, fmt, ...) \ ({ \ struct task_struct *_task; \ \ _task = _kthread_create((fn), (data)); \ if (kthread_add(_kthread_fn, _task, NULL, &_task->task_thread, \ 0, 0, fmt, ## __VA_ARGS__)) { \ kfree(_task); \ _task = NULL; \ } else \ task_struct_set(_task->task_thread, _task); \ _task; \ }) #define kthread_should_stop() current->should_stop static inline int kthread_stop(struct task_struct *task) { PROC_LOCK(task->task_thread->td_proc); task->should_stop = TASK_SHOULD_STOP; wake_up_process(task); while (task->should_stop != TASK_STOPPED) msleep(task, &task->task_thread->td_proc->p_mtx, PWAIT, "kstop", hz); PROC_UNLOCK(task->task_thread->td_proc); return task->task_ret; } #endif /* _LINUX_KTHREAD_H_ */ Index: stable/10/sys/ofed/include/linux/ktime.h =================================================================== --- stable/10/sys/ofed/include/linux/ktime.h (nonexistent) +++ stable/10/sys/ofed/include/linux/ktime.h (revision 271127) @@ -0,0 +1,291 @@ +/*- + * Copyright (c) 2014 Mellanox Technologies, Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_KTIME_H +#define _LINUX_KTIME_H + +#include +#include +#include + + +/* Get the monotonic time in timespec format: */ +#define ktime_get_ts getnanouptime + +#define NSEC_PER_USEC 1000L +#define NSEC_PER_SEC 1000000000L + +/* + * ktime_t: + * + * On 64-bit CPUs a single 64-bit variable is used to store the hrtimers + * internal representation of time values in scalar nanoseconds. The + * design plays out best on 64-bit CPUs, where most conversions are + * NOPs and most arithmetic ktime_t operations are plain arithmetic + * operations. + * + * On 32-bit CPUs an optimized representation of the timespec structure + * is used to avoid expensive conversions from and to timespecs. The + * endian-aware order of the tv struct members is chosen to allow + * mathematical operations on the tv64 member of the union too, which + * for certain operations produces better code. + * + * For architectures with efficient support for 64/32-bit conversions the + * plain scalar nanosecond based representation can be selected by the + * config switch CONFIG_KTIME_SCALAR. + */ +union ktime { + s64 tv64; +#if BITS_PER_LONG != 64 && !defined(CONFIG_KTIME_SCALAR) + struct { +# ifdef __BIG_ENDIAN + s32 sec, nsec; +# else + s32 nsec, sec; +# endif + } tv; +#endif +}; + +typedef union ktime ktime_t; /* Kill this */ + +#define KTIME_MAX ((s64)~((u64)1 << 63)) +#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) + +/* + * ktime_t definitions when using the 64-bit scalar representation: + */ + +#if (BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR) + +/** + * ktime_set - Set a ktime_t variable from a seconds/nanoseconds value + * @secs: seconds to set + * @nsecs: nanoseconds to set + * + * Return the ktime_t representation of the value + */ +static inline ktime_t ktime_set(const long secs, const unsigned long nsecs) +{ +#if (BITS_PER_LONG == 64) + if (unlikely(secs >= KTIME_SEC_MAX)) + return (ktime_t){ .tv64 = KTIME_MAX }; +#endif + return (ktime_t) { .tv64 = (s64)secs * NSEC_PER_SEC + (s64)nsecs }; +} + +/* Subtract two ktime_t variables. rem = lhs -rhs: */ +#define ktime_sub(lhs, rhs) \ + ({ (ktime_t){ .tv64 = (lhs).tv64 - (rhs).tv64 }; }) + +/* Add two ktime_t variables. res = lhs + rhs: */ +#define ktime_add(lhs, rhs) \ + ({ (ktime_t){ .tv64 = (lhs).tv64 + (rhs).tv64 }; }) + +/* + * Add a ktime_t variable and a scalar nanosecond value. + * res = kt + nsval: + */ +#define ktime_add_ns(kt, nsval) \ + ({ (ktime_t){ .tv64 = (kt).tv64 + (nsval) }; }) + +/* + * Subtract a scalar nanosecod from a ktime_t variable + * res = kt - nsval: + */ +#define ktime_sub_ns(kt, nsval) \ + ({ (ktime_t){ .tv64 = (kt).tv64 - (nsval) }; }) + +/* convert a timespec to ktime_t format: */ +static inline ktime_t timespec_to_ktime(struct timespec ts) +{ + return ktime_set(ts.tv_sec, ts.tv_nsec); +} + +/* convert a timeval to ktime_t format: */ +static inline ktime_t timeval_to_ktime(struct timeval tv) +{ + return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC); +} + +/* Map the ktime_t to timespec conversion to ns_to_timespec function */ +#define ktime_to_timespec(kt) ns_to_timespec((kt).tv64) + +/* Map the ktime_t to timeval conversion to ns_to_timeval function */ +#define ktime_to_timeval(kt) ns_to_timeval((kt).tv64) + +/* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */ +#define ktime_to_ns(kt) ((kt).tv64) + +#else /* !((BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR)) */ + +/* + * Helper macros/inlines to get the ktime_t math right in the timespec + * representation. The macros are sometimes ugly - their actual use is + * pretty okay-ish, given the circumstances. We do all this for + * performance reasons. The pure scalar nsec_t based code was nice and + * simple, but created too many 64-bit / 32-bit conversions and divisions. + * + * Be especially aware that negative values are represented in a way + * that the tv.sec field is negative and the tv.nsec field is greater + * or equal to zero but less than nanoseconds per second. This is the + * same representation which is used by timespecs. + * + * tv.sec < 0 and 0 >= tv.nsec < NSEC_PER_SEC + */ + +/* Set a ktime_t variable to a value in sec/nsec representation: */ +static inline ktime_t ktime_set(const long secs, const unsigned long nsecs) +{ + return (ktime_t) { .tv = { .sec = secs, .nsec = nsecs } }; +} + +/** + * ktime_sub - subtract two ktime_t variables + * @lhs: minuend + * @rhs: subtrahend + * + * Returns the remainder of the subtraction + */ +static inline ktime_t ktime_sub(const ktime_t lhs, const ktime_t rhs) +{ + ktime_t res; + + res.tv64 = lhs.tv64 - rhs.tv64; + if (res.tv.nsec < 0) + res.tv.nsec += NSEC_PER_SEC; + + return res; +} + +/** + * ktime_add - add two ktime_t variables + * @add1: addend1 + * @add2: addend2 + * + * Returns the sum of @add1 and @add2. + */ +static inline ktime_t ktime_add(const ktime_t add1, const ktime_t add2) +{ + ktime_t res; + + res.tv64 = add1.tv64 + add2.tv64; + /* + * performance trick: the (u32) -NSEC gives 0x00000000Fxxxxxxx + * so we subtract NSEC_PER_SEC and add 1 to the upper 32 bit. + * + * it's equivalent to: + * tv.nsec -= NSEC_PER_SEC + * tv.sec ++; + */ + if (res.tv.nsec >= NSEC_PER_SEC) + res.tv64 += (u32)-NSEC_PER_SEC; + + return res; +} + +/** + * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable + * @kt: addend + * @nsec: the scalar nsec value to add + * + * Returns the sum of @kt and @nsec in ktime_t format + */ +extern ktime_t ktime_add_ns(const ktime_t kt, u64 nsec); + +/** + * ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable + * @kt: minuend + * @nsec: the scalar nsec value to subtract + * + * Returns the subtraction of @nsec from @kt in ktime_t format + */ +extern ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec); + +/** + * timespec_to_ktime - convert a timespec to ktime_t format + * @ts: the timespec variable to convert + * + * Returns a ktime_t variable with the converted timespec value + */ +static inline ktime_t timespec_to_ktime(const struct timespec ts) +{ + return (ktime_t) { .tv = { .sec = (s32)ts.tv_sec, + .nsec = (s32)ts.tv_nsec } }; +} + +/** + * timeval_to_ktime - convert a timeval to ktime_t format + * @tv: the timeval variable to convert + * + * Returns a ktime_t variable with the converted timeval value + */ +static inline ktime_t timeval_to_ktime(const struct timeval tv) +{ + return (ktime_t) { .tv = { .sec = (s32)tv.tv_sec, + .nsec = (s32)(tv.tv_usec * + NSEC_PER_USEC) } }; +} + +/** + * ktime_to_timespec - convert a ktime_t variable to timespec format + * @kt: the ktime_t variable to convert + * + * Returns the timespec representation of the ktime value + */ +static inline struct timespec ktime_to_timespec(const ktime_t kt) +{ + return (struct timespec) { .tv_sec = (time_t) kt.tv.sec, + .tv_nsec = (long) kt.tv.nsec }; +} + +/** + * ktime_to_timeval - convert a ktime_t variable to timeval format + * @kt: the ktime_t variable to convert + * + * Returns the timeval representation of the ktime value + */ +static inline struct timeval ktime_to_timeval(const ktime_t kt) +{ + return (struct timeval) { + .tv_sec = (time_t) kt.tv.sec, + .tv_usec = (suseconds_t) (kt.tv.nsec / NSEC_PER_USEC) }; +} + +/** + * ktime_to_ns - convert a ktime_t variable to scalar nanoseconds + * @kt: the ktime_t variable to convert + * + * Returns the scalar nanoseconds representation of @kt + */ +static inline s64 ktime_to_ns(const ktime_t kt) +{ + return (s64) kt.tv.sec * NSEC_PER_SEC + kt.tv.nsec; +} + +#endif /* !((BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR)) */ + +#endif /* _LINUX_KTIME_H */ Property changes on: stable/10/sys/ofed/include/linux/ktime.h ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +true \ No newline at end of property Index: stable/10/sys/ofed/include/linux/linux_compat.c =================================================================== --- stable/10/sys/ofed/include/linux/linux_compat.c (revision 271126) +++ stable/10/sys/ofed/include/linux/linux_compat.c (revision 271127) @@ -1,703 +1,720 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat"); #include /* Undo Linux compat changes. */ #undef RB_ROOT #undef file #undef cdev #define RB_ROOT(head) (head)->rbh_root #undef LIST_HEAD /* From sys/queue.h */ #define LIST_HEAD(name, type) \ struct name { \ struct type *lh_first; /* first element */ \ } struct kobject class_root; struct device linux_rootdev; struct class miscclass; struct list_head pci_drivers; struct list_head pci_devices; spinlock_t pci_lock; int panic_cmp(struct rb_node *one, struct rb_node *two) { panic("no cmp"); } RB_GENERATE(linux_root, rb_node, __entry, panic_cmp); int kobject_set_name(struct kobject *kobj, const char *fmt, ...) { va_list args; int error; va_start(args, fmt); error = kobject_set_name_vargs(kobj, fmt, args); va_end(args); return (error); } static inline int kobject_add_complete(struct kobject *kobj, struct kobject *parent) { struct kobj_type *t; int error; kobj->parent = kobject_get(parent); error = sysfs_create_dir(kobj); if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) { struct attribute **attr; t = kobj->ktype; for (attr = t->default_attrs; *attr != NULL; attr++) { error = sysfs_create_file(kobj, *attr); if (error) break; } if (error) sysfs_remove_dir(kobj); } return (error); } int kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) { va_list args; int error; va_start(args, fmt); error = kobject_set_name_vargs(kobj, fmt, args); va_end(args); if (error) return (error); return kobject_add_complete(kobj, parent); } void kobject_release(struct kref *kref) { struct kobject *kobj; char *name; kobj = container_of(kref, struct kobject, kref); sysfs_remove_dir(kobj); if (kobj->parent) kobject_put(kobj->parent); kobj->parent = NULL; name = kobj->name; if (kobj->ktype && kobj->ktype->release) kobj->ktype->release(kobj); kfree(name); } static void kobject_kfree(struct kobject *kobj) { - kfree(kobj); } +static void +kobject_kfree_name(struct kobject *kobj) +{ + if (kobj) { + kfree(kobj->name); + } +} + struct kobj_type kfree_type = { .release = kobject_kfree }; struct device * device_create(struct class *class, struct device *parent, dev_t devt, void *drvdata, const char *fmt, ...) { struct device *dev; va_list args; dev = kzalloc(sizeof(*dev), M_WAITOK); dev->parent = parent; dev->class = class; dev->devt = devt; dev->driver_data = drvdata; va_start(args, fmt); kobject_set_name_vargs(&dev->kobj, fmt, args); va_end(args); device_register(dev); return (dev); } int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, struct kobject *parent, const char *fmt, ...) { va_list args; int error; kobject_init(kobj, ktype); kobj->ktype = ktype; kobj->parent = parent; kobj->name = NULL; va_start(args, fmt); error = kobject_set_name_vargs(kobj, fmt, args); va_end(args); if (error) return (error); return kobject_add_complete(kobj, parent); } static void linux_file_dtor(void *cdp) { struct linux_file *filp; filp = cdp; filp->f_op->release(filp->f_vnode, filp); vdrop(filp->f_vnode); kfree(filp); } static int linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (ENODEV); filp = kzalloc(sizeof(*filp), GFP_KERNEL); filp->f_dentry = &filp->f_dentry_store; filp->f_op = ldev->ops; filp->f_flags = file->f_flag; vhold(file->f_vnode); filp->f_vnode = file->f_vnode; if (filp->f_op->open) { error = -filp->f_op->open(file->f_vnode, filp); if (error) { kfree(filp); return (error); } } error = devfs_set_cdevpriv(filp, linux_file_dtor); if (error) { filp->f_op->release(file->f_vnode, filp); kfree(filp); return (error); } return 0; } static int linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (0); if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) return (error); filp->f_flags = file->f_flag; devfs_clear_cdevpriv(); return (0); } static int linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (0); if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) return (error); filp->f_flags = file->f_flag; /* * Linux does not have a generic ioctl copyin/copyout layer. All * linux ioctls must be converted to void ioctls which pass a * pointer to the address of the data. We want the actual user * address so we dereference here. */ data = *(void **)data; if (filp->f_op->unlocked_ioctl) error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data); else error = ENOTTY; return (error); } static int linux_dev_read(struct cdev *dev, struct uio *uio, int ioflag) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; ssize_t bytes; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (0); if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) return (error); filp->f_flags = file->f_flag; if (uio->uio_iovcnt != 1) panic("linux_dev_read: uio %p iovcnt %d", uio, uio->uio_iovcnt); if (filp->f_op->read) { bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, uio->uio_iov->iov_len, &uio->uio_offset); if (bytes >= 0) { uio->uio_iov->iov_base += bytes; uio->uio_iov->iov_len -= bytes; uio->uio_resid -= bytes; } else error = -bytes; } else error = ENXIO; return (error); } static int linux_dev_write(struct cdev *dev, struct uio *uio, int ioflag) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; ssize_t bytes; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (0); if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) return (error); filp->f_flags = file->f_flag; if (uio->uio_iovcnt != 1) panic("linux_dev_write: uio %p iovcnt %d", uio, uio->uio_iovcnt); if (filp->f_op->write) { bytes = filp->f_op->write(filp, uio->uio_iov->iov_base, uio->uio_iov->iov_len, &uio->uio_offset); if (bytes >= 0) { uio->uio_iov->iov_base += bytes; uio->uio_iov->iov_len -= bytes; uio->uio_resid -= bytes; } else error = -bytes; } else error = ENXIO; return (error); } static int linux_dev_poll(struct cdev *dev, int events, struct thread *td) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; int revents; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (0); if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) return (error); filp->f_flags = file->f_flag; if (filp->f_op->poll) revents = filp->f_op->poll(filp, NULL) & events; else revents = 0; return (revents); } static int linux_dev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr) { /* XXX memattr not honored. */ *paddr = offset; return (0); } static int linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, vm_size_t size, struct vm_object **object, int nprot) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; struct vm_area_struct vma; vm_paddr_t paddr; vm_page_t m; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (ENODEV); if (size != PAGE_SIZE) return (EINVAL); if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) return (error); filp->f_flags = file->f_flag; vma.vm_start = 0; vma.vm_end = PAGE_SIZE; vma.vm_pgoff = *offset / PAGE_SIZE; vma.vm_pfn = 0; vma.vm_page_prot = 0; if (filp->f_op->mmap) { error = -filp->f_op->mmap(filp, &vma); if (error == 0) { paddr = (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT; *offset = paddr; m = PHYS_TO_VM_PAGE(paddr); *object = vm_pager_allocate(OBJT_DEVICE, dev, PAGE_SIZE, nprot, *offset, curthread->td_ucred); if (*object == NULL) return (EINVAL); if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) pmap_page_set_memattr(m, vma.vm_page_prot); } } else error = ENODEV; return (error); } struct cdevsw linuxcdevsw = { .d_version = D_VERSION, .d_flags = D_TRACKCLOSE, .d_open = linux_dev_open, .d_close = linux_dev_close, .d_read = linux_dev_read, .d_write = linux_dev_write, .d_ioctl = linux_dev_ioctl, .d_mmap_single = linux_dev_mmap_single, .d_mmap = linux_dev_mmap, .d_poll = linux_dev_poll, }; static int linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { struct linux_file *filp; ssize_t bytes; int error; error = 0; filp = (struct linux_file *)file->f_data; filp->f_flags = file->f_flag; if (uio->uio_iovcnt != 1) panic("linux_file_read: uio %p iovcnt %d", uio, uio->uio_iovcnt); if (filp->f_op->read) { bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, uio->uio_iov->iov_len, &uio->uio_offset); if (bytes >= 0) { uio->uio_iov->iov_base += bytes; uio->uio_iov->iov_len -= bytes; uio->uio_resid -= bytes; } else error = -bytes; } else error = ENXIO; return (error); } static int linux_file_poll(struct file *file, int events, struct ucred *active_cred, struct thread *td) { struct linux_file *filp; int revents; filp = (struct linux_file *)file->f_data; filp->f_flags = file->f_flag; if (filp->f_op->poll) revents = filp->f_op->poll(filp, NULL) & events; else revents = 0; return (0); } static int linux_file_close(struct file *file, struct thread *td) { struct linux_file *filp; int error; filp = (struct linux_file *)file->f_data; filp->f_flags = file->f_flag; error = -filp->f_op->release(NULL, filp); funsetown(&filp->f_sigio); kfree(filp); return (error); } static int linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, struct thread *td) { struct linux_file *filp; int error; filp = (struct linux_file *)fp->f_data; filp->f_flags = fp->f_flag; error = 0; switch (cmd) { case FIONBIO: break; case FIOASYNC: if (filp->f_op->fasync == NULL) break; error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); break; case FIOSETOWN: error = fsetown(*(int *)data, &filp->f_sigio); if (error == 0) error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); break; case FIOGETOWN: *(int *)data = fgetown(&filp->f_sigio); break; default: error = ENOTTY; break; } return (error); } struct fileops linuxfileops = { .fo_read = linux_file_read, .fo_poll = linux_file_poll, .fo_close = linux_file_close, .fo_ioctl = linux_file_ioctl, .fo_chmod = invfo_chmod, .fo_chown = invfo_chown, .fo_sendfile = invfo_sendfile, }; /* * Hash of vmmap addresses. This is infrequently accessed and does not * need to be particularly large. This is done because we must store the * caller's idea of the map size to properly unmap. */ struct vmmap { LIST_ENTRY(vmmap) vm_next; void *vm_addr; unsigned long vm_size; }; LIST_HEAD(vmmaphd, vmmap); #define VMMAP_HASH_SIZE 64 #define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1) #define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE]; static struct mtx vmmaplock; static void vmmap_add(void *addr, unsigned long size) { struct vmmap *vmmap; vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL); mtx_lock(&vmmaplock); vmmap->vm_size = size; vmmap->vm_addr = addr; LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next); mtx_unlock(&vmmaplock); } static struct vmmap * vmmap_remove(void *addr) { struct vmmap *vmmap; mtx_lock(&vmmaplock); LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next) if (vmmap->vm_addr == addr) break; if (vmmap) LIST_REMOVE(vmmap, vm_next); mtx_unlock(&vmmaplock); return (vmmap); } void * _ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr) { void *addr; addr = pmap_mapdev_attr(phys_addr, size, attr); if (addr == NULL) return (NULL); vmmap_add(addr, size); return (addr); } void iounmap(void *addr) { struct vmmap *vmmap; vmmap = vmmap_remove(addr); if (vmmap == NULL) return; pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size); kfree(vmmap); } void * vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) { vm_offset_t off; size_t size; size = count * PAGE_SIZE; off = kva_alloc(size); if (off == 0) return (NULL); vmmap_add((void *)off, size); pmap_qenter(off, pages, count); return ((void *)off); } void vunmap(void *addr) { struct vmmap *vmmap; vmmap = vmmap_remove(addr); if (vmmap == NULL) return; pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE); kva_free((vm_offset_t)addr, vmmap->vm_size); kfree(vmmap); } static void linux_compat_init(void) { struct sysctl_oid *rootoid; int i; rootoid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(), OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys"); kobject_init(&class_root, &class_ktype); kobject_set_name(&class_root, "class"); class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class"); kobject_init(&linux_rootdev.kobj, &dev_ktype); kobject_set_name(&linux_rootdev.kobj, "device"); linux_rootdev.kobj.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL, "device"); linux_rootdev.bsddev = root_bus; miscclass.name = "misc"; class_register(&miscclass); INIT_LIST_HEAD(&pci_drivers); INIT_LIST_HEAD(&pci_devices); spin_lock_init(&pci_lock); mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF); for (i = 0; i < VMMAP_HASH_SIZE; i++) LIST_INIT(&vmmaphead[i]); } SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL); + +static void +linux_compat_uninit(void) +{ + kobject_kfree_name(&class_root); + kobject_kfree_name(&linux_rootdev.kobj); + kobject_kfree_name(&miscclass.kobj); +} +SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL); Index: stable/10/sys/ofed/include/linux/linux_idr.c =================================================================== --- stable/10/sys/ofed/include/linux/linux_idr.c (revision 271126) +++ stable/10/sys/ofed/include/linux/linux_idr.c (revision 271127) @@ -1,447 +1,448 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include /* * IDR Implementation. * * This is quick and dirty and not as re-entrant as the linux version * however it should be fairly fast. It is basically a radix tree with * a builtin bitmap for allocation. */ static MALLOC_DEFINE(M_IDR, "idr", "Linux IDR compat"); static inline int idr_max(struct idr *idr) { return (1 << (idr->layers * IDR_BITS)) - 1; } static inline int idr_pos(int id, int layer) { return (id >> (IDR_BITS * layer)) & IDR_MASK; } void idr_init(struct idr *idr) { bzero(idr, sizeof(*idr)); mtx_init(&idr->lock, "idr", NULL, MTX_DEF); } /* Only frees cached pages. */ void idr_destroy(struct idr *idr) { struct idr_layer *il, *iln; mtx_lock(&idr->lock); for (il = idr->free; il != NULL; il = iln) { iln = il->ary[0]; free(il, M_IDR); } mtx_unlock(&idr->lock); } static void idr_remove_layer(struct idr_layer *il, int layer) { int i; if (il == NULL) return; if (layer == 0) { free(il, M_IDR); return; } for (i = 0; i < IDR_SIZE; i++) if (il->ary[i]) idr_remove_layer(il->ary[i], layer - 1); } void idr_remove_all(struct idr *idr) { mtx_lock(&idr->lock); idr_remove_layer(idr->top, idr->layers - 1); idr->top = NULL; idr->layers = 0; mtx_unlock(&idr->lock); } void idr_remove(struct idr *idr, int id) { struct idr_layer *il; int layer; int idx; id &= MAX_ID_MASK; mtx_lock(&idr->lock); il = idr->top; layer = idr->layers - 1; if (il == NULL || id > idr_max(idr)) { mtx_unlock(&idr->lock); return; } /* * Walk down the tree to this item setting bitmaps along the way * as we know at least one item will be free along this path. */ while (layer && il) { idx = idr_pos(id, layer); il->bitmap |= 1 << idx; il = il->ary[idx]; layer--; } idx = id & IDR_MASK; /* * At this point we've set free space bitmaps up the whole tree. * We could make this non-fatal and unwind but linux dumps a stack * and a warning so I don't think it's necessary. */ if (il == NULL || (il->bitmap & (1 << idx)) != 0) panic("idr_remove: Item %d not allocated (%p, %p)\n", id, idr, il); il->ary[idx] = NULL; il->bitmap |= 1 << idx; mtx_unlock(&idr->lock); return; } void * idr_replace(struct idr *idr, void *ptr, int id) { struct idr_layer *il; void *res; int layer; int idx; res = ERR_PTR(-EINVAL); id &= MAX_ID_MASK; mtx_lock(&idr->lock); il = idr->top; layer = idr->layers - 1; if (il == NULL || id > idr_max(idr)) goto out; while (layer && il) { il = il->ary[idr_pos(id, layer)]; layer--; } idx = id & IDR_MASK; /* * Replace still returns an error if the item was not allocated. */ if (il != NULL && (il->bitmap & (1 << idx)) != 0) { res = il->ary[idx]; il->ary[idx] = ptr; } out: mtx_unlock(&idr->lock); return (res); } void * idr_find(struct idr *idr, int id) { struct idr_layer *il; void *res; int layer; res = NULL; id &= MAX_ID_MASK; mtx_lock(&idr->lock); il = idr->top; layer = idr->layers - 1; if (il == NULL || id > idr_max(idr)) goto out; while (layer && il) { il = il->ary[idr_pos(id, layer)]; layer--; } if (il != NULL) res = il->ary[id & IDR_MASK]; out: mtx_unlock(&idr->lock); return (res); } int idr_pre_get(struct idr *idr, gfp_t gfp_mask) { struct idr_layer *il, *iln; struct idr_layer *head; int need; mtx_lock(&idr->lock); for (;;) { need = idr->layers + 1; for (il = idr->free; il != NULL; il = il->ary[0]) need--; mtx_unlock(&idr->lock); if (need == 0) break; for (head = NULL; need; need--) { iln = malloc(sizeof(*il), M_IDR, M_ZERO | gfp_mask); if (iln == NULL) break; bitmap_fill(&iln->bitmap, IDR_SIZE); if (head != NULL) { il->ary[0] = iln; il = iln; } else head = il = iln; } if (head == NULL) return (0); mtx_lock(&idr->lock); il->ary[0] = idr->free; idr->free = head; } return (1); } static inline struct idr_layer * idr_get(struct idr *idr) { struct idr_layer *il; il = idr->free; if (il) { idr->free = il->ary[0]; il->ary[0] = NULL; return (il); } il = malloc(sizeof(*il), M_IDR, M_ZERO | M_NOWAIT); bitmap_fill(&il->bitmap, IDR_SIZE); return (il); } /* * Could be implemented as get_new_above(idr, ptr, 0, idp) but written * first for simplicity sake. */ int idr_get_new(struct idr *idr, void *ptr, int *idp) { struct idr_layer *stack[MAX_LEVEL]; struct idr_layer *il; int error; int layer; int idx; int id; error = -EAGAIN; mtx_lock(&idr->lock); /* * Expand the tree until there is free space. */ if (idr->top == NULL || idr->top->bitmap == 0) { if (idr->layers == MAX_LEVEL + 1) { error = -ENOSPC; goto out; } il = idr_get(idr); if (il == NULL) goto out; il->ary[0] = idr->top; if (idr->top) il->bitmap &= ~1; idr->top = il; idr->layers++; } il = idr->top; id = 0; /* * Walk the tree following free bitmaps, record our path. */ for (layer = idr->layers - 1;; layer--) { stack[layer] = il; idx = ffsl(il->bitmap); if (idx == 0) panic("idr_get_new: Invalid leaf state (%p, %p)\n", idr, il); idx--; id |= idx << (layer * IDR_BITS); if (layer == 0) break; if (il->ary[idx] == NULL) { il->ary[idx] = idr_get(idr); if (il->ary[idx] == NULL) goto out; } il = il->ary[idx]; } /* * Allocate the leaf to the consumer. */ il->bitmap &= ~(1 << idx); il->ary[idx] = ptr; *idp = id; /* * Clear bitmaps potentially up to the root. */ while (il->bitmap == 0 && ++layer < idr->layers) { il = stack[layer]; il->bitmap &= ~(1 << idr_pos(id, layer)); } error = 0; out: mtx_unlock(&idr->lock); #ifdef INVARIANTS if (error == 0 && idr_find(idr, id) != ptr) { panic("idr_get_new: Failed for idr %p, id %d, ptr %p\n", idr, id, ptr); } #endif return (error); } int idr_get_new_above(struct idr *idr, void *ptr, int starting_id, int *idp) { struct idr_layer *stack[MAX_LEVEL]; struct idr_layer *il; int error; int layer; int idx, sidx; int id; error = -EAGAIN; mtx_lock(&idr->lock); /* * Compute the layers required to support starting_id and the mask * at the top layer. */ restart: idx = starting_id; layer = 0; while (idx & ~IDR_MASK) { layer++; idx >>= IDR_BITS; } if (layer == MAX_LEVEL + 1) { error = -ENOSPC; goto out; } /* * Expand the tree until there is free space at or beyond starting_id. */ while (idr->layers <= layer || idr->top->bitmap < (1 << idr_pos(starting_id, idr->layers - 1))) { if (idr->layers == MAX_LEVEL + 1) { error = -ENOSPC; goto out; } il = idr_get(idr); if (il == NULL) goto out; il->ary[0] = idr->top; if (idr->top && idr->top->bitmap == 0) il->bitmap &= ~1; idr->top = il; idr->layers++; } il = idr->top; id = 0; /* * Walk the tree following free bitmaps, record our path. */ for (layer = idr->layers - 1;; layer--) { stack[layer] = il; sidx = idr_pos(starting_id, layer); /* Returns index numbered from 0 or size if none exists. */ idx = find_next_bit(&il->bitmap, IDR_SIZE, sidx); if (idx == IDR_SIZE && sidx == 0) panic("idr_get_new: Invalid leaf state (%p, %p)\n", idr, il); /* * We may have walked a path where there was a free bit but * it was lower than what we wanted. Restart the search with * a larger starting id. id contains the progress we made so * far. Search the leaf one above this level. This may * restart as many as MAX_LEVEL times but that is expected * to be rare. */ if (idx == IDR_SIZE) { starting_id = id + (1 << (layer+1 * IDR_BITS)); goto restart; } if (idx > sidx) starting_id = 0; /* Search the whole subtree. */ id |= idx << (layer * IDR_BITS); if (layer == 0) break; if (il->ary[idx] == NULL) { il->ary[idx] = idr_get(idr); if (il->ary[idx] == NULL) goto out; } il = il->ary[idx]; } /* * Allocate the leaf to the consumer. */ il->bitmap &= ~(1 << idx); il->ary[idx] = ptr; *idp = id; /* * Clear bitmaps potentially up to the root. */ while (il->bitmap == 0 && ++layer < idr->layers) { il = stack[layer]; il->bitmap &= ~(1 << idr_pos(id, layer)); } error = 0; out: mtx_unlock(&idr->lock); #ifdef INVARIANTS if (error == 0 && idr_find(idr, id) != ptr) { panic("idr_get_new_above: Failed for idr %p, id %d, ptr %p\n", idr, id, ptr); } #endif return (error); } Index: stable/10/sys/ofed/include/linux/linux_radix.c =================================================================== --- stable/10/sys/ofed/include/linux/linux_radix.c (revision 271126) +++ stable/10/sys/ofed/include/linux/linux_radix.c (revision 271127) @@ -1,214 +1,215 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_RADIX, "radix", "Linux radix compat"); static inline int radix_max(struct radix_tree_root *root) { return (1 << (root->height * RADIX_TREE_MAP_SHIFT)) - 1; } static inline int radix_pos(long id, int height) { return (id >> (RADIX_TREE_MAP_SHIFT * height)) & RADIX_TREE_MAP_MASK; } void * radix_tree_lookup(struct radix_tree_root *root, unsigned long index) { struct radix_tree_node *node; void *item; int height; item = NULL; node = root->rnode; height = root->height - 1; if (index > radix_max(root)) goto out; while (height && node) node = node->slots[radix_pos(index, height--)]; if (node) item = node->slots[radix_pos(index, 0)]; out: return (item); } void * radix_tree_delete(struct radix_tree_root *root, unsigned long index) { struct radix_tree_node *stack[RADIX_TREE_MAX_HEIGHT]; struct radix_tree_node *node; void *item; int height; int idx; item = NULL; node = root->rnode; height = root->height - 1; if (index > radix_max(root)) goto out; /* * Find the node and record the path in stack. */ while (height && node) { stack[height] = node; node = node->slots[radix_pos(index, height--)]; } idx = radix_pos(index, 0); if (node) item = node->slots[idx]; /* * If we removed something reduce the height of the tree. */ if (item) for (;;) { node->slots[idx] = NULL; node->count--; if (node->count > 0) break; free(node, M_RADIX); if (node == root->rnode) { root->rnode = NULL; root->height = 0; break; } height++; node = stack[height]; idx = radix_pos(index, height); } out: return (item); } int radix_tree_insert(struct radix_tree_root *root, unsigned long index, void *item) { struct radix_tree_node *node; struct radix_tree_node *temp[RADIX_TREE_MAX_HEIGHT - 1]; int height; int idx; /* bail out upon insertion of a NULL item */ if (item == NULL) return (-EINVAL); /* get root node, if any */ node = root->rnode; /* allocate root node, if any */ if (node == NULL) { node = malloc(sizeof(*node), M_RADIX, root->gfp_mask | M_ZERO); if (node == NULL) return (-ENOMEM); root->rnode = node; root->height++; } /* expand radix tree as needed */ while (radix_max(root) < index) { /* check if the radix tree is getting too big */ if (root->height == RADIX_TREE_MAX_HEIGHT) return (-E2BIG); /* * If the root radix level is not empty, we need to * allocate a new radix level: */ if (node->count != 0) { node = malloc(sizeof(*node), M_RADIX, root->gfp_mask | M_ZERO); if (node == NULL) return (-ENOMEM); node->slots[0] = root->rnode; node->count++; root->rnode = node; } root->height++; } /* get radix tree height index */ height = root->height - 1; /* walk down the tree until the first missing node, if any */ for ( ; height != 0; height--) { idx = radix_pos(index, height); if (node->slots[idx] == NULL) break; node = node->slots[idx]; } /* allocate the missing radix levels, if any */ for (idx = 0; idx != height; idx++) { temp[idx] = malloc(sizeof(*node), M_RADIX, root->gfp_mask | M_ZERO); if (temp[idx] == NULL) { while(idx--) free(temp[idx], M_RADIX); /* check if we should free the root node aswell */ if (root->rnode->count == 0) { free(root->rnode, M_RADIX); root->rnode = NULL; root->height = 0; } return (-ENOMEM); } } /* setup new radix levels, if any */ for ( ; height != 0; height--) { idx = radix_pos(index, height); node->slots[idx] = temp[height - 1]; node->count++; node = node->slots[idx]; } /* * Insert and adjust count if the item does not already exist. */ idx = radix_pos(index, 0); if (node->slots[idx]) return (-EEXIST); node->slots[idx] = item; node->count++; return (0); } Index: stable/10/sys/ofed/include/linux/list.h =================================================================== --- stable/10/sys/ofed/include/linux/list.h (revision 271126) +++ stable/10/sys/ofed/include/linux/list.h (revision 271127) @@ -1,335 +1,397 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_LIST_H_ #define _LINUX_LIST_H_ /* * Since LIST_HEAD conflicts with the linux definition we must include any * FreeBSD header which requires it here so it is resolved with the correct * definition prior to the undef. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define prefetch(x) struct list_head { struct list_head *next; struct list_head *prev; }; static inline void INIT_LIST_HEAD(struct list_head *list) { list->next = list->prev = list; } static inline int list_empty(const struct list_head *head) { return (head->next == head); } static inline void list_del(struct list_head *entry) { entry->next->prev = entry->prev; entry->prev->next = entry->next; } static inline void _list_add(struct list_head *new, struct list_head *prev, struct list_head *next) { next->prev = new; new->next = next; new->prev = prev; prev->next = new; } static inline void list_del_init(struct list_head *entry) { list_del(entry); INIT_LIST_HEAD(entry); } #define list_entry(ptr, type, field) container_of(ptr, type, field) #define list_first_entry(ptr, type, member) \ list_entry((ptr)->next, type, member) #define list_for_each(p, head) \ for (p = (head)->next; p != (head); p = p->next) #define list_for_each_safe(p, n, head) \ for (p = (head)->next, n = p->next; p != (head); p = n, n = p->next) #define list_for_each_entry(p, h, field) \ for (p = list_entry((h)->next, typeof(*p), field); &p->field != (h); \ p = list_entry(p->field.next, typeof(*p), field)) #define list_for_each_entry_safe(p, n, h, field) \ for (p = list_entry((h)->next, typeof(*p), field), \ n = list_entry(p->field.next, typeof(*p), field); &p->field != (h);\ p = n, n = list_entry(n->field.next, typeof(*n), field)) #define list_for_each_entry_reverse(p, h, field) \ for (p = list_entry((h)->prev, typeof(*p), field); &p->field != (h); \ p = list_entry(p->field.prev, typeof(*p), field)) #define list_for_each_prev(p, h) for (p = (h)->prev; p != (h); p = p->prev) static inline void list_add(struct list_head *new, struct list_head *head) { _list_add(new, head, head->next); } static inline void list_add_tail(struct list_head *new, struct list_head *head) { _list_add(new, head->prev, head); } static inline void list_move(struct list_head *list, struct list_head *head) { list_del(list); list_add(list, head); } static inline void list_move_tail(struct list_head *entry, struct list_head *head) { list_del(entry); list_add_tail(entry, head); } static inline void _list_splice(const struct list_head *list, struct list_head *prev, struct list_head *next) { struct list_head *first; struct list_head *last; if (list_empty(list)) return; first = list->next; last = list->prev; first->prev = prev; prev->next = first; last->next = next; next->prev = last; } static inline void list_splice(const struct list_head *list, struct list_head *head) { _list_splice(list, head, head->next); } static inline void list_splice_tail(struct list_head *list, struct list_head *head) { _list_splice(list, head->prev, head); } static inline void list_splice_init(struct list_head *list, struct list_head *head) { _list_splice(list, head, head->next); INIT_LIST_HEAD(list); } static inline void list_splice_tail_init(struct list_head *list, struct list_head *head) { _list_splice(list, head->prev, head); INIT_LIST_HEAD(list); } #undef LIST_HEAD #define LIST_HEAD(name) struct list_head name = { &(name), &(name) } struct hlist_head { struct hlist_node *first; }; struct hlist_node { struct hlist_node *next, **pprev; }; #define HLIST_HEAD_INIT { } #define HLIST_HEAD(name) struct hlist_head name = HLIST_HEAD_INIT #define INIT_HLIST_HEAD(head) (head)->first = NULL #define INIT_HLIST_NODE(node) \ do { \ (node)->next = NULL; \ (node)->pprev = NULL; \ } while (0) static inline int hlist_unhashed(const struct hlist_node *h) { return !h->pprev; } static inline int hlist_empty(const struct hlist_head *h) { return !h->first; } static inline void hlist_del(struct hlist_node *n) { if (n->next) n->next->pprev = n->pprev; *n->pprev = n->next; } static inline void hlist_del_init(struct hlist_node *n) { if (hlist_unhashed(n)) return; hlist_del(n); INIT_HLIST_NODE(n); } static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) { n->next = h->first; if (h->first) h->first->pprev = &n->next; h->first = n; n->pprev = &h->first; } static inline void hlist_add_before(struct hlist_node *n, struct hlist_node *next) { n->pprev = next->pprev; n->next = next; next->pprev = &n->next; *(n->pprev) = n; } static inline void hlist_add_after(struct hlist_node *n, struct hlist_node *next) { next->next = n->next; n->next = next; next->pprev = &n->next; if (next->next) next->next->pprev = &next->next; } static inline void hlist_move_list(struct hlist_head *old, struct hlist_head *new) { new->first = old->first; if (new->first) new->first->pprev = &new->first; old->first = NULL; } + +/** + * list_is_singular - tests whether a list has just one entry. + * @head: the list to test. + */ +static inline int list_is_singular(const struct list_head *head) +{ + return !list_empty(head) && (head->next == head->prev); +} + +static inline void __list_cut_position(struct list_head *list, + struct list_head *head, struct list_head *entry) +{ + struct list_head *new_first = entry->next; + list->next = head->next; + list->next->prev = list; + list->prev = entry; + entry->next = list; + head->next = new_first; + new_first->prev = head; +} + +/** + * list_cut_position - cut a list into two + * @list: a new list to add all removed entries + * @head: a list with entries + * @entry: an entry within head, could be the head itself + * and if so we won't cut the list + * + * This helper moves the initial part of @head, up to and + * including @entry, from @head to @list. You should + * pass on @entry an element you know is on @head. @list + * should be an empty list or a list you do not care about + * losing its data. + * + */ +static inline void list_cut_position(struct list_head *list, + struct list_head *head, struct list_head *entry) +{ + if (list_empty(head)) + return; + if (list_is_singular(head) && + (head->next != entry && head != entry)) + return; + if (entry == head) + INIT_LIST_HEAD(list); + else + __list_cut_position(list, head, entry); +} + +/** + * list_is_last - tests whether @list is the last entry in list @head + * @list: the entry to test + * @head: the head of the list + */ +static inline int list_is_last(const struct list_head *list, + const struct list_head *head) +{ + return list->next == head; +} #define hlist_entry(ptr, type, field) container_of(ptr, type, field) #define hlist_for_each(p, head) \ for (p = (head)->first; p; p = p->next) #define hlist_for_each_safe(p, n, head) \ for (p = (head)->first; p && ({ n = p->next; 1; }); p = n) #define hlist_for_each_entry(tp, p, head, field) \ for (p = (head)->first; \ p ? (tp = hlist_entry(p, typeof(*tp), field)): NULL; p = p->next) #define hlist_for_each_entry_continue(tp, p, field) \ for (p = (p)->next; \ p ? (tp = hlist_entry(p, typeof(*tp), field)): NULL; p = p->next) #define hlist_for_each_entry_from(tp, p, field) \ for (; p ? (tp = hlist_entry(p, typeof(*tp), field)): NULL; p = p->next) -#define hlist_for_each_entry_safe(tp, p, n, head, field) \ - for (p = (head)->first; p ? \ - (n = p->next) | (tp = hlist_entry(p, typeof(*tp), field)) : \ - NULL; p = n) +#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ + for (pos = (head)->first; \ + (pos) != 0 && ({ n = (pos)->next; \ + tpos = hlist_entry((pos), typeof(*(tpos)), member); 1;}); \ + pos = (n)) #endif /* _LINUX_LIST_H_ */ Index: stable/10/sys/ofed/include/linux/lockdep.h =================================================================== --- stable/10/sys/ofed/include/linux/lockdep.h (revision 271126) +++ stable/10/sys/ofed/include/linux/lockdep.h (revision 271127) @@ -1,37 +1,40 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_LOCKDEP_H_ #define _LINUX_LOCKDEP_H_ struct lock_class_key { }; #define lockdep_set_class(lock, key) -#endif /* _LINUX_LOCKDEP_H_ */ +#define lockdep_set_class_and_name(lock, key, name) + +#endif /* _LINUX_LOCKDEP_H_ */ Index: stable/10/sys/ofed/include/linux/log2.h =================================================================== --- stable/10/sys/ofed/include/linux/log2.h (revision 271126) +++ stable/10/sys/ofed/include/linux/log2.h (revision 271127) @@ -1,169 +1,170 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_LOG2_H_ #define _LINUX_LOG2_H_ #include #include static inline unsigned long roundup_pow_of_two(unsigned long x) { return (1UL << flsl(x - 1)); } static inline int is_power_of_2(unsigned long n) { return (n == roundup_pow_of_two(n)); } static inline unsigned long rounddown_pow_of_two(unsigned long x) { return (1UL << (flsl(x) - 1)); } /* * deal with unrepresentable constant logarithms */ extern __attribute__((const, noreturn)) int ____ilog2_NaN(void); /* * non-constant log of base 2 calculators * - the arch may override these in asm/bitops.h if they can be implemented * more efficiently than using fls() and fls64() * - the arch is not required to handle n==0 if implementing the fallback */ #ifndef CONFIG_ARCH_HAS_ILOG2_U32 static inline __attribute__((const)) int __ilog2_u32(u32 n) { return flsl(n) - 1; } #endif #ifndef CONFIG_ARCH_HAS_ILOG2_U64 static inline __attribute__((const)) int __ilog2_u64(u64 n) { return flsl(n) - 1; } #endif /** * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value * @n - parameter * * constant-capable log of base 2 calculation * - this can be used to initialise global variables from constant data, hence * the massive ternary operator construction * * selects the appropriately-sized optimised version depending on sizeof(n) */ #define ilog2(n) \ ( \ __builtin_constant_p(n) ? ( \ (n) < 1 ? ____ilog2_NaN() : \ (n) & (1ULL << 63) ? 63 : \ (n) & (1ULL << 62) ? 62 : \ (n) & (1ULL << 61) ? 61 : \ (n) & (1ULL << 60) ? 60 : \ (n) & (1ULL << 59) ? 59 : \ (n) & (1ULL << 58) ? 58 : \ (n) & (1ULL << 57) ? 57 : \ (n) & (1ULL << 56) ? 56 : \ (n) & (1ULL << 55) ? 55 : \ (n) & (1ULL << 54) ? 54 : \ (n) & (1ULL << 53) ? 53 : \ (n) & (1ULL << 52) ? 52 : \ (n) & (1ULL << 51) ? 51 : \ (n) & (1ULL << 50) ? 50 : \ (n) & (1ULL << 49) ? 49 : \ (n) & (1ULL << 48) ? 48 : \ (n) & (1ULL << 47) ? 47 : \ (n) & (1ULL << 46) ? 46 : \ (n) & (1ULL << 45) ? 45 : \ (n) & (1ULL << 44) ? 44 : \ (n) & (1ULL << 43) ? 43 : \ (n) & (1ULL << 42) ? 42 : \ (n) & (1ULL << 41) ? 41 : \ (n) & (1ULL << 40) ? 40 : \ (n) & (1ULL << 39) ? 39 : \ (n) & (1ULL << 38) ? 38 : \ (n) & (1ULL << 37) ? 37 : \ (n) & (1ULL << 36) ? 36 : \ (n) & (1ULL << 35) ? 35 : \ (n) & (1ULL << 34) ? 34 : \ (n) & (1ULL << 33) ? 33 : \ (n) & (1ULL << 32) ? 32 : \ (n) & (1ULL << 31) ? 31 : \ (n) & (1ULL << 30) ? 30 : \ (n) & (1ULL << 29) ? 29 : \ (n) & (1ULL << 28) ? 28 : \ (n) & (1ULL << 27) ? 27 : \ (n) & (1ULL << 26) ? 26 : \ (n) & (1ULL << 25) ? 25 : \ (n) & (1ULL << 24) ? 24 : \ (n) & (1ULL << 23) ? 23 : \ (n) & (1ULL << 22) ? 22 : \ (n) & (1ULL << 21) ? 21 : \ (n) & (1ULL << 20) ? 20 : \ (n) & (1ULL << 19) ? 19 : \ (n) & (1ULL << 18) ? 18 : \ (n) & (1ULL << 17) ? 17 : \ (n) & (1ULL << 16) ? 16 : \ (n) & (1ULL << 15) ? 15 : \ (n) & (1ULL << 14) ? 14 : \ (n) & (1ULL << 13) ? 13 : \ (n) & (1ULL << 12) ? 12 : \ (n) & (1ULL << 11) ? 11 : \ (n) & (1ULL << 10) ? 10 : \ (n) & (1ULL << 9) ? 9 : \ (n) & (1ULL << 8) ? 8 : \ (n) & (1ULL << 7) ? 7 : \ (n) & (1ULL << 6) ? 6 : \ (n) & (1ULL << 5) ? 5 : \ (n) & (1ULL << 4) ? 4 : \ (n) & (1ULL << 3) ? 3 : \ (n) & (1ULL << 2) ? 2 : \ (n) & (1ULL << 1) ? 1 : \ (n) & (1ULL << 0) ? 0 : \ ____ilog2_NaN() \ ) : \ (sizeof(n) <= 4) ? \ __ilog2_u32(n) : \ __ilog2_u64(n) \ ) #endif /* _LINUX_LOG2_H_ */ Index: stable/10/sys/ofed/include/linux/math64.h =================================================================== --- stable/10/sys/ofed/include/linux/math64.h (nonexistent) +++ stable/10/sys/ofed/include/linux/math64.h (revision 271127) @@ -0,0 +1,133 @@ +/*- + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Mellanox Technologies, Ltd. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_MATH64_H +#define _LINUX_MATH64_H + +#include +#include + +#if BITS_PER_LONG == 64 + +# define do_div(n, base) ({ \ + uint32_t __base = (base); \ + uint32_t __rem; \ + __rem = ((uint64_t)(n)) % __base; \ + (n) = ((uint64_t)(n)) / __base; \ + __rem; \ +}) + +/** +* div_u64_rem - unsigned 64bit divide with 32bit divisor with remainder +* +* This is commonly provided by 32bit archs to provide an optimized 64bit +* divide. +*/ +static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) +{ + *remainder = dividend % divisor; + return dividend / divisor; +} + + +#elif BITS_PER_LONG == 32 + +static uint32_t __div64_32(uint64_t *n, uint32_t base) +{ + uint64_t rem = *n; + uint64_t b = base; + uint64_t res, d = 1; + uint32_t high = rem >> 32; + + /* Reduce the thing a bit first */ + res = 0; + if (high >= base) { + high /= base; + res = (uint64_t) high << 32; + rem -= (uint64_t) (high*base) << 32; + } + + while ((int64_t)b > 0 && b < rem) { + b = b+b; + d = d+d; + } + + do { + if (rem >= b) { + rem -= b; + res += d; + } + b >>= 1; + d >>= 1; + } while (d); + + *n = res; + return rem; +} + +# define do_div(n, base) ({ \ + uint32_t __base = (base); \ + uint32_t __rem; \ + (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \ + if (likely(((n) >> 32) == 0)) { \ + __rem = (uint32_t)(n) % __base; \ + (n) = (uint32_t)(n) / __base; \ + } else \ + __rem = __div64_32(&(n), __base); \ + __rem; \ +}) + +#ifndef div_u64_rem +static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) +{ + *remainder = do_div(dividend, divisor); + return dividend; +} +#endif + + +#endif /* BITS_PER_LONG */ + + + +/** + ** div_u64 - unsigned 64bit divide with 32bit divisor + ** + ** This is the most common 64bit divide and should be used if possible, + ** as many 32bit archs can optimize this variant better than a full 64bit + ** divide. + * */ +#ifndef div_u64 + +static inline u64 div_u64(u64 dividend, u32 divisor) +{ + u32 remainder; + return div_u64_rem(dividend, divisor, &remainder); +} +#endif + +#endif /* _LINUX_MATH64_H */ Property changes on: stable/10/sys/ofed/include/linux/math64.h ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +true \ No newline at end of property Index: stable/10/sys/ofed/include/linux/miscdevice.h =================================================================== --- stable/10/sys/ofed/include/linux/miscdevice.h (revision 271126) +++ stable/10/sys/ofed/include/linux/miscdevice.h (revision 271127) @@ -1,72 +1,75 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_MISCDEVICE_H_ #define _LINUX_MISCDEVICE_H_ #define MISC_DYNAMIC_MINOR -1 #include #include struct miscdevice { const char *name; struct device *this_device; const struct file_operations *fops; struct cdev *cdev; int minor; + const char *nodename; + umode_t mode; }; extern struct class miscclass; static inline int misc_register(struct miscdevice *misc) { misc->this_device = device_create(&miscclass, &linux_rootdev, 0, misc, misc->name); misc->cdev = cdev_alloc(); if (misc->cdev == NULL) return -ENOMEM; misc->cdev->owner = THIS_MODULE; misc->cdev->ops = misc->fops; kobject_set_name(&misc->cdev->kobj, misc->name); if (cdev_add(misc->cdev, misc->this_device->devt, 1)) return -EINVAL; return (0); } static inline int misc_deregister(struct miscdevice *misc) { device_destroy(&miscclass, misc->this_device->devt); cdev_del(misc->cdev); return (0); } #endif /* _LINUX_MISCDEVICE_H_ */ Index: stable/10/sys/ofed/include/linux/mm.h =================================================================== --- stable/10/sys/ofed/include/linux/mm.h (revision 271126) +++ stable/10/sys/ofed/include/linux/mm.h (revision 271127) @@ -1,84 +1,85 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_MM_H_ #define _LINUX_MM_H_ #include #include #include #define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE) struct vm_area_struct { vm_offset_t vm_start; vm_offset_t vm_end; vm_offset_t vm_pgoff; vm_paddr_t vm_pfn; /* PFN For mmap. */ vm_memattr_t vm_page_prot; }; /* * Compute log2 of the power of two rounded up count of pages * needed for size bytes. */ static inline int get_order(unsigned long size) { int order; size = (size - 1) >> PAGE_SHIFT; order = 0; while (size) { order++; size >>= 1; } return (order); } static inline void * lowmem_page_address(struct page *page) { return page_address(page); } /* * This only works via mmap ops. */ static inline int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, vm_memattr_t prot) { vma->vm_page_prot = prot; vma->vm_pfn = pfn; return (0); } #endif /* _LINUX_MM_H_ */ Index: stable/10/sys/ofed/include/linux/module.h =================================================================== --- stable/10/sys/ofed/include/linux/module.h (revision 271126) +++ stable/10/sys/ofed/include/linux/module.h (revision 271127) @@ -1,92 +1,99 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_MODULE_H_ #define _LINUX_MODULE_H_ #include #include #include #include #include #define MODULE_AUTHOR(name) #define MODULE_DESCRIPTION(name) #define MODULE_LICENSE(name) -#define MODULE_VERSION(name) +#ifndef MODULE_VERSION +#define MODULE_VERSION(name) +#endif + #define THIS_MODULE ((struct module *)0) #define EXPORT_SYMBOL(name) #define EXPORT_SYMBOL_GPL(name) /* OFED pre-module initialization */ #define SI_SUB_OFED_PREINIT (SI_SUB_KTHREAD_INIT - 2) /* OFED default module initialization */ #define SI_SUB_OFED_MODINIT (SI_SUB_KTHREAD_INIT - 1) #include static inline void _module_run(void *arg) { void (*fn)(void); #ifdef OFED_DEBUG_INIT char name[1024]; caddr_t pc; long offset; pc = (caddr_t)arg; if (linker_search_symbol_name(pc, name, sizeof(name), &offset) != 0) printf("Running ??? (%p)\n", pc); else printf("Running %s (%p)\n", name, pc); #endif fn = arg; DROP_GIANT(); fn(); PICKUP_GIANT(); } #define module_init(fn) \ SYSINIT(fn, SI_SUB_OFED_MODINIT, SI_ORDER_FIRST, _module_run, (fn)) +#define module_exit(fn) \ + SYSUNINIT(fn, SI_SUB_OFED_MODINIT, SI_ORDER_SECOND, _module_run, (fn)) + /* - * XXX This is a freebsdism designed to work around not having a module - * load order resolver built in. + * The following two macros are a workaround for not having a module + * load and unload order resolver: */ #define module_init_order(fn, order) \ SYSINIT(fn, SI_SUB_OFED_MODINIT, (order), _module_run, (fn)) -#define module_exit(fn) \ - SYSUNINIT(fn, SI_SUB_OFED_MODINIT, SI_ORDER_FIRST, _module_run, (fn)) +#define module_exit_order(fn, order) \ + SYSUNINIT(fn, SI_SUB_OFED_MODINIT, (order), _module_run, (fn)) #define module_get(module) #define module_put(module) #define try_module_get(module) 1 #endif /* _LINUX_MODULE_H_ */ Index: stable/10/sys/ofed/include/linux/moduleparam.h =================================================================== --- stable/10/sys/ofed/include/linux/moduleparam.h (revision 271126) +++ stable/10/sys/ofed/include/linux/moduleparam.h (revision 271127) @@ -1,229 +1,233 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + #ifndef _LINUX_MODULEPARAM_H_ #define _LINUX_MODULEPARAM_H_ #include /* * These are presently not hooked up to anything. In linux the parameters * can be set when modules are loaded. On FreeBSD these could be mapped * to kenv in the future. */ struct kernel_param; typedef int (*param_set_fn)(const char *val, struct kernel_param *kp); typedef int (*param_get_fn)(char *buffer, struct kernel_param *kp); struct kernel_param { const char *name; u16 perm; u16 flags; param_set_fn set; param_get_fn get; union { void *arg; struct kparam_string *str; struct kparam_array *arr; } un; }; #define KPARAM_ISBOOL 2 struct kparam_string { unsigned int maxlen; char *string; }; struct kparam_array { unsigned int max; unsigned int *num; param_set_fn set; param_get_fn get; unsigned int elemsize; void *elem; }; static inline void param_sysinit(struct kernel_param *param) { } #define module_param_call(name, set, get, arg, perm) \ static struct kernel_param __param_##name = \ { #name, perm, 0, set, get, { arg } }; \ SYSINIT(name##_param_sysinit, SI_SUB_DRIVERS, SI_ORDER_FIRST, \ param_sysinit, &__param_##name); +#define module_param_string(name, string, len, perm) + #define module_param_named(name, var, type, mode) \ module_param_call(name, param_set_##type, param_get_##type, &var, mode) #define module_param(var, type, mode) \ module_param_named(var, var, type, mode) #define module_param_array(var, type, addr_argc, mode) \ module_param_named(var, var, type, mode) #define MODULE_PARM_DESC(name, desc) static inline int param_set_byte(const char *val, struct kernel_param *kp) { return 0; } static inline int param_get_byte(char *buffer, struct kernel_param *kp) { return 0; } static inline int param_set_short(const char *val, struct kernel_param *kp) { return 0; } static inline int param_get_short(char *buffer, struct kernel_param *kp) { return 0; } static inline int param_set_ushort(const char *val, struct kernel_param *kp) { return 0; } static inline int param_get_ushort(char *buffer, struct kernel_param *kp) { return 0; } static inline int param_set_int(const char *val, struct kernel_param *kp) { return 0; } static inline int param_get_int(char *buffer, struct kernel_param *kp) { return 0; } static inline int param_set_uint(const char *val, struct kernel_param *kp) { return 0; } static inline int param_get_uint(char *buffer, struct kernel_param *kp) { return 0; } static inline int param_set_long(const char *val, struct kernel_param *kp) { return 0; } static inline int param_get_long(char *buffer, struct kernel_param *kp) { return 0; } static inline int param_set_ulong(const char *val, struct kernel_param *kp) { return 0; } static inline int param_get_ulong(char *buffer, struct kernel_param *kp) { return 0; } static inline int param_set_charp(const char *val, struct kernel_param *kp) { return 0; } static inline int param_get_charp(char *buffer, struct kernel_param *kp) { return 0; } static inline int param_set_bool(const char *val, struct kernel_param *kp) { return 0; } static inline int param_get_bool(char *buffer, struct kernel_param *kp) { return 0; } #endif /* _LINUX_MODULEPARAM_H_ */ Index: stable/10/sys/ofed/include/linux/mount.h =================================================================== --- stable/10/sys/ofed/include/linux/mount.h (revision 271126) +++ stable/10/sys/ofed/include/linux/mount.h (revision 271127) @@ -1,33 +1,34 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_MOUNT_H_ #define _LINUX_MOUNT_H_ #endif /* _LINUX_MOUNT_H_ */ Index: stable/10/sys/ofed/include/linux/mutex.h =================================================================== --- stable/10/sys/ofed/include/linux/mutex.h (revision 271126) +++ stable/10/sys/ofed/include/linux/mutex.h (revision 271127) @@ -1,61 +1,62 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_MUTEX_H_ #define _LINUX_MUTEX_H_ #include #include #include #include typedef struct mutex { struct sx sx; } mutex_t; #define mutex_lock(_m) sx_xlock(&(_m)->sx) #define mutex_lock_nested(_m, _s) mutex_lock(_m) #define mutex_lock_interruptible(_m) ({ mutex_lock((_m)); 0; }) #define mutex_unlock(_m) sx_xunlock(&(_m)->sx) #define mutex_trylock(_m) !!sx_try_xlock(&(_m)->sx) #define DEFINE_MUTEX(lock) \ mutex_t lock; \ SX_SYSINIT_FLAGS(lock, &(lock).sx, "lnxmtx", SX_NOWITNESS) static inline void linux_mutex_init(mutex_t *m) { memset(&m->sx, 0, sizeof(m->sx)); sx_init_flags(&m->sx, "lnxmtx", SX_NOWITNESS); } #define mutex_init linux_mutex_init #endif /* _LINUX_MUTEX_H_ */ Index: stable/10/sys/ofed/include/linux/net.h =================================================================== --- stable/10/sys/ofed/include/linux/net.h (revision 271126) +++ stable/10/sys/ofed/include/linux/net.h (revision 271127) @@ -1,73 +1,74 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_NET_H_ #define _LINUX_NET_H_ #include #include #include static inline int sock_create_kern(int family, int type, int proto, struct socket **res) { return -socreate(family, res, type, proto, curthread->td_ucred, curthread); } static inline int sock_getname(struct socket *so, struct sockaddr *addr, int *sockaddr_len, int peer) { struct sockaddr *nam; int error; nam = NULL; if (peer) { if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) return (-ENOTCONN); error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &nam); } else error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &nam); if (error) return (-error); *addr = *nam; *sockaddr_len = addr->sa_len; free(nam, M_SONAME); return (0); } static inline void sock_release(struct socket *so) { soclose(so); } #endif /* _LINUX_NET_H_ */ Index: stable/10/sys/ofed/include/linux/netdevice.h =================================================================== --- stable/10/sys/ofed/include/linux/netdevice.h (revision 271126) +++ stable/10/sys/ofed/include/linux/netdevice.h (revision 271127) @@ -1,159 +1,204 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_NETDEVICE_H_ #define _LINUX_NETDEVICE_H_ #include #include #include #include #include #include #include #include #include #include #include #include struct net { }; extern struct net init_net; #define MAX_ADDR_LEN 20 #define net_device ifnet #define dev_get_by_index(n, idx) ifnet_byindex_ref((idx)) #define dev_hold(d) if_ref((d)) #define dev_put(d) if_rele((d)) #define netif_running(dev) !!((dev)->if_drv_flags & IFF_DRV_RUNNING) #define netif_oper_up(dev) !!((dev)->if_flags & IFF_UP) #define netif_carrier_ok(dev) netif_running(dev) static inline void * netdev_priv(const struct net_device *dev) { return (dev->if_softc); } static inline void _handle_ifnet_link_event(void *arg, struct ifnet *ifp, int linkstate) { struct notifier_block *nb; nb = arg; if (linkstate == LINK_STATE_UP) nb->notifier_call(nb, NETDEV_UP, ifp); else nb->notifier_call(nb, NETDEV_DOWN, ifp); } static inline void _handle_ifnet_arrival_event(void *arg, struct ifnet *ifp) { struct notifier_block *nb; nb = arg; nb->notifier_call(nb, NETDEV_REGISTER, ifp); } static inline void _handle_ifnet_departure_event(void *arg, struct ifnet *ifp) { struct notifier_block *nb; nb = arg; nb->notifier_call(nb, NETDEV_UNREGISTER, ifp); } +static inline void +_handle_iflladdr_event(void *arg, struct ifnet *ifp) +{ + struct notifier_block *nb; + + nb = arg; + nb->notifier_call(nb, NETDEV_CHANGEADDR, ifp); +} + +static inline void +_handle_ifaddr_event(void *arg, struct ifnet *ifp) +{ + struct notifier_block *nb; + + nb = arg; + nb->notifier_call(nb, NETDEV_CHANGEIFADDR, ifp); +} + static inline int register_netdevice_notifier(struct notifier_block *nb) { nb->tags[NETDEV_UP] = EVENTHANDLER_REGISTER( ifnet_link_event, _handle_ifnet_link_event, nb, 0); nb->tags[NETDEV_REGISTER] = EVENTHANDLER_REGISTER( ifnet_arrival_event, _handle_ifnet_arrival_event, nb, 0); nb->tags[NETDEV_UNREGISTER] = EVENTHANDLER_REGISTER( ifnet_departure_event, _handle_ifnet_departure_event, nb, 0); + nb->tags[NETDEV_CHANGEADDR] = EVENTHANDLER_REGISTER( + iflladdr_event, _handle_iflladdr_event, nb, 0); + return (0); } static inline int +register_inetaddr_notifier(struct notifier_block *nb) +{ + + nb->tags[NETDEV_CHANGEIFADDR] = EVENTHANDLER_REGISTER( + ifaddr_event, _handle_ifaddr_event, nb, 0); + return (0); +} + +static inline int unregister_netdevice_notifier(struct notifier_block *nb) { EVENTHANDLER_DEREGISTER(ifnet_link_event, nb->tags[NETDEV_UP]); EVENTHANDLER_DEREGISTER(ifnet_arrival_event, nb->tags[NETDEV_REGISTER]); EVENTHANDLER_DEREGISTER(ifnet_departure_event, nb->tags[NETDEV_UNREGISTER]); + EVENTHANDLER_DEREGISTER(iflladdr_event, + nb->tags[NETDEV_CHANGEADDR]); + return (0); } + +static inline int +unregister_inetaddr_notifier(struct notifier_block *nb) +{ + + EVENTHANDLER_DEREGISTER(ifaddr_event, + nb->tags[NETDEV_CHANGEIFADDR]); + + return (0); +} + #define rtnl_lock() #define rtnl_unlock() static inline int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all) { struct sockaddr_dl sdl; if (alen > sizeof(sdl.sdl_data)) return (-EINVAL); memset(&sdl, 0, sizeof(sdl)); sdl.sdl_len = sizeof(sdl); sdl.sdl_family = AF_LINK; sdl.sdl_alen = alen; memcpy(&sdl.sdl_data, addr, alen); return -if_delmulti(dev, (struct sockaddr *)&sdl); } static inline int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly) { struct sockaddr_dl sdl; if (alen > sizeof(sdl.sdl_data)) return (-EINVAL); memset(&sdl, 0, sizeof(sdl)); sdl.sdl_len = sizeof(sdl); sdl.sdl_family = AF_LINK; sdl.sdl_alen = alen; memcpy(&sdl.sdl_data, addr, alen); return -if_addmulti(dev, (struct sockaddr *)&sdl, NULL); } #endif /* _LINUX_NETDEVICE_H_ */ Index: stable/10/sys/ofed/include/linux/notifier.h =================================================================== --- stable/10/sys/ofed/include/linux/notifier.h (revision 271126) +++ stable/10/sys/ofed/include/linux/notifier.h (revision 271127) @@ -1,54 +1,57 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_NOTIFIER_H_ #define _LINUX_NOTIFIER_H_ #include /* * Max number of FreeBSD events to map to Linux events per notify type. */ #define NOTIFY_DONE 0 -#define _NOTIFY_COUNT 5 +#define _NOTIFY_COUNT 7 struct notifier_block { int (*notifier_call)(struct notifier_block *, unsigned long, void *); struct notifier_block *next; int priority; eventhandler_tag tags[_NOTIFY_COUNT]; }; /* Values must be less than NOTIFY_COUNT */ #define NETDEV_UP 0x0001 #define NETDEV_DOWN 0x0002 #define NETDEV_REGISTER 0x0003 #define NETDEV_UNREGISTER 0x0004 +#define NETDEV_CHANGEADDR 0x0005 +#define NETDEV_CHANGEIFADDR 0x0006 #endif /* _LINUX_NOTIFIER_H_ */ Index: stable/10/sys/ofed/include/linux/page.h =================================================================== --- stable/10/sys/ofed/include/linux/page.h (revision 271126) +++ stable/10/sys/ofed/include/linux/page.h (revision 271127) @@ -1,50 +1,51 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_PAGE_H_ #define _LINUX_PAGE_H_ #include #include #include #include #include #define page vm_page #define virt_to_page(x) PHYS_TO_VM_PAGE(vtophys((x))) #define clear_page(page) memset((page), 0, PAGE_SIZE) #define pgprot_noncached(prot) VM_MEMATTR_UNCACHEABLE #define pgprot_writecombine(prot) VM_MEMATTR_WRITE_COMBINING #undef PAGE_MASK #define PAGE_MASK (~(PAGE_SIZE-1)) #endif /* _LINUX_PAGE_H_ */ Index: stable/10/sys/ofed/include/linux/pci.h =================================================================== --- stable/10/sys/ofed/include/linux/pci.h (revision 271126) +++ stable/10/sys/ofed/include/linux/pci.h (revision 271127) @@ -1,689 +1,830 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_PCI_H_ #define _LINUX_PCI_H_ #define CONFIG_PCI_MSI #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include struct pci_device_id { uint32_t vendor; uint32_t device; uint32_t subvendor; uint32_t subdevice; uint32_t class_mask; uintptr_t driver_data; }; #define MODULE_DEVICE_TABLE(bus, table) #define PCI_ANY_ID (-1) #define PCI_VENDOR_ID_MELLANOX 0x15b3 #define PCI_VENDOR_ID_TOPSPIN 0x1867 #define PCI_DEVICE_ID_MELLANOX_TAVOR 0x5a44 #define PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE 0x5a46 #define PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT 0x6278 #define PCI_DEVICE_ID_MELLANOX_ARBEL 0x6282 #define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c #define PCI_DEVICE_ID_MELLANOX_SINAI 0x6274 #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) #define PCI_FUNC(devfn) ((devfn) & 0x07) #define PCI_VDEVICE(_vendor, _device) \ .vendor = PCI_VENDOR_ID_##_vendor, .device = (_device), \ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID #define PCI_DEVICE(_vendor, _device) \ .vendor = (_vendor), .device = (_device), \ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID #define to_pci_dev(n) container_of(n, struct pci_dev, dev) -#define PCI_VENDOR_ID PCIR_DEVVENDOR -#define PCI_COMMAND PCIR_COMMAND -#define PCI_EXP_DEVCTL PCIER_DEVICE_CTL -#define PCI_EXP_LNKCTL PCIER_LINK_CTL +#define PCI_VENDOR_ID PCIR_DEVVENDOR +#define PCI_COMMAND PCIR_COMMAND +#define PCI_EXP_DEVCTL PCIER_DEVICE_CTL /* Device Control */ +#define PCI_EXP_LNKCTL PCIER_LINK_CTL /* Link Control */ +#define PCI_EXP_FLAGS_TYPE PCIEM_FLAGS_TYPE /* Device/Port type */ +#define PCI_EXP_DEVCAP PCIER_DEVICE_CAP /* Device capabilities */ +#define PCI_EXP_DEVSTA PCIER_DEVICE_STA /* Device Status */ +#define PCI_EXP_LNKCAP PCIER_LINK_CAP /* Link Capabilities */ +#define PCI_EXP_LNKSTA PCIER_LINK_STA /* Link Status */ +#define PCI_EXP_SLTCAP PCIER_SLOT_CAP /* Slot Capabilities */ +#define PCI_EXP_SLTCTL PCIER_SLOT_CTL /* Slot Control */ +#define PCI_EXP_SLTSTA PCIER_SLOT_STA /* Slot Status */ +#define PCI_EXP_RTCTL PCIER_ROOT_CTL /* Root Control */ +#define PCI_EXP_RTCAP PCIER_ROOT_CAP /* Root Capabilities */ +#define PCI_EXP_RTSTA PCIER_ROOT_STA /* Root Status */ +#define PCI_EXP_DEVCAP2 PCIER_DEVICE_CAP2 /* Device Capabilities 2 */ +#define PCI_EXP_DEVCTL2 PCIER_DEVICE_CTL2 /* Device Control 2 */ +#define PCI_EXP_LNKCAP2 PCIER_LINK_CAP2 /* Link Capabilities 2 */ +#define PCI_EXP_LNKCTL2 PCIER_LINK_CTL2 /* Link Control 2 */ +#define PCI_EXP_LNKSTA2 PCIER_LINK_STA2 /* Link Status 2 */ +#define PCI_EXP_FLAGS PCIER_FLAGS /* Capabilities register */ +#define PCI_EXP_FLAGS_VERS PCIEM_FLAGS_VERSION /* Capability version */ +#define PCI_EXP_TYPE_ROOT_PORT PCIEM_TYPE_ROOT_PORT /* Root Port */ +#define PCI_EXP_TYPE_ENDPOINT PCIEM_TYPE_ENDPOINT /* Express Endpoint */ +#define PCI_EXP_TYPE_LEG_END PCIEM_TYPE_LEGACY_ENDPOINT /* Legacy Endpoint */ +#define PCI_EXP_TYPE_DOWNSTREAM PCIEM_TYPE_DOWNSTREAM_PORT /* Downstream Port */ +#define PCI_EXP_FLAGS_SLOT PCIEM_FLAGS_SLOT /* Slot implemented */ +#define PCI_EXP_TYPE_RC_EC PCIEM_TYPE_ROOT_EC /* Root Complex Event Collector */ + #define IORESOURCE_MEM SYS_RES_MEMORY #define IORESOURCE_IO SYS_RES_IOPORT #define IORESOURCE_IRQ SYS_RES_IRQ struct pci_dev; struct pci_driver { struct list_head links; char *name; - struct pci_device_id *id_table; + const struct pci_device_id *id_table; int (*probe)(struct pci_dev *dev, const struct pci_device_id *id); void (*remove)(struct pci_dev *dev); int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */ int (*resume) (struct pci_dev *dev); /* Device woken up */ driver_t driver; devclass_t bsdclass; - struct pci_error_handlers *err_handler; + const struct pci_error_handlers *err_handler; }; extern struct list_head pci_drivers; extern struct list_head pci_devices; extern spinlock_t pci_lock; #define __devexit_p(x) x struct pci_dev { struct device dev; struct list_head links; struct pci_driver *pdrv; uint64_t dma_mask; uint16_t device; uint16_t vendor; unsigned int irq; unsigned int devfn; u8 revision; struct pci_devinfo *bus; /* bus this device is on, equivalent to linux struct pci_bus */ }; static inline struct resource_list_entry * _pci_get_rle(struct pci_dev *pdev, int type, int rid) { struct pci_devinfo *dinfo; struct resource_list *rl; dinfo = device_get_ivars(pdev->dev.bsddev); rl = &dinfo->resources; return resource_list_find(rl, type, rid); } static inline struct resource_list_entry * _pci_get_bar(struct pci_dev *pdev, int bar) { struct resource_list_entry *rle; bar = PCIR_BAR(bar); if ((rle = _pci_get_rle(pdev, SYS_RES_MEMORY, bar)) == NULL) rle = _pci_get_rle(pdev, SYS_RES_IOPORT, bar); return (rle); } static inline struct device * _pci_find_irq_dev(unsigned int irq) { struct pci_dev *pdev; spin_lock(&pci_lock); list_for_each_entry(pdev, &pci_devices, links) { if (irq == pdev->dev.irq) break; if (irq >= pdev->dev.msix && irq < pdev->dev.msix_max) break; } spin_unlock(&pci_lock); if (pdev) return &pdev->dev; return (NULL); } static inline unsigned long pci_resource_start(struct pci_dev *pdev, int bar) { struct resource_list_entry *rle; if ((rle = _pci_get_bar(pdev, bar)) == NULL) return (0); return rle->start; } static inline unsigned long pci_resource_len(struct pci_dev *pdev, int bar) { struct resource_list_entry *rle; if ((rle = _pci_get_bar(pdev, bar)) == NULL) return (0); return rle->count; } /* * All drivers just seem to want to inspect the type not flags. */ static inline int pci_resource_flags(struct pci_dev *pdev, int bar) { struct resource_list_entry *rle; if ((rle = _pci_get_bar(pdev, bar)) == NULL) return (0); return rle->type; } static inline const char * pci_name(struct pci_dev *d) { return device_get_desc(d->dev.bsddev); } static inline void * pci_get_drvdata(struct pci_dev *pdev) { return dev_get_drvdata(&pdev->dev); } static inline void pci_set_drvdata(struct pci_dev *pdev, void *data) { dev_set_drvdata(&pdev->dev, data); } static inline int pci_enable_device(struct pci_dev *pdev) { pci_enable_io(pdev->dev.bsddev, SYS_RES_IOPORT); pci_enable_io(pdev->dev.bsddev, SYS_RES_MEMORY); return (0); } static inline void pci_disable_device(struct pci_dev *pdev) { } static inline int pci_set_master(struct pci_dev *pdev) { pci_enable_busmaster(pdev->dev.bsddev); return (0); } static inline int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) { int rid; int type; type = pci_resource_flags(pdev, bar); if (type == 0) return (-ENODEV); rid = PCIR_BAR(bar); if (bus_alloc_resource_any(pdev->dev.bsddev, type, &rid, RF_ACTIVE) == NULL) return (-EINVAL); return (0); } static inline void pci_release_region(struct pci_dev *pdev, int bar) { struct resource_list_entry *rle; if ((rle = _pci_get_bar(pdev, bar)) == NULL) return; bus_release_resource(pdev->dev.bsddev, rle->type, rle->rid, rle->res); } static inline void pci_release_regions(struct pci_dev *pdev) { int i; for (i = 0; i <= PCIR_MAX_BAR_0; i++) pci_release_region(pdev, i); } static inline int pci_request_regions(struct pci_dev *pdev, const char *res_name) { int error; int i; for (i = 0; i <= PCIR_MAX_BAR_0; i++) { error = pci_request_region(pdev, i, res_name); if (error && error != -ENODEV) { pci_release_regions(pdev); return (error); } } return (0); } static inline void pci_disable_msix(struct pci_dev *pdev) { pci_release_msi(pdev->dev.bsddev); } #define PCI_CAP_ID_EXP PCIY_EXPRESS #define PCI_CAP_ID_PCIX PCIY_PCIX static inline int pci_find_capability(struct pci_dev *pdev, int capid) { int reg; if (pci_find_cap(pdev->dev.bsddev, capid, ®)) return (0); return (reg); } /** * pci_pcie_cap - get the saved PCIe capability offset * @dev: PCI device * * PCIe capability offset is calculated at PCI device initialization * time and saved in the data structure. This function returns saved * PCIe capability offset. Using this instead of pci_find_capability() * reduces unnecessary search in the PCI configuration space. If you * need to calculate PCIe capability offset from raw device for some * reasons, please use pci_find_capability() instead. */ static inline int pci_pcie_cap(struct pci_dev *dev) { return pci_find_capability(dev, PCI_CAP_ID_EXP); } static inline int pci_read_config_byte(struct pci_dev *pdev, int where, u8 *val) { *val = (u8)pci_read_config(pdev->dev.bsddev, where, 1); return (0); } static inline int pci_read_config_word(struct pci_dev *pdev, int where, u16 *val) { *val = (u16)pci_read_config(pdev->dev.bsddev, where, 2); return (0); } static inline int pci_read_config_dword(struct pci_dev *pdev, int where, u32 *val) { *val = (u32)pci_read_config(pdev->dev.bsddev, where, 4); return (0); } static inline int pci_write_config_byte(struct pci_dev *pdev, int where, u8 val) { pci_write_config(pdev->dev.bsddev, where, val, 1); return (0); } static inline int pci_write_config_word(struct pci_dev *pdev, int where, u16 val) { pci_write_config(pdev->dev.bsddev, where, val, 2); return (0); } static inline int pci_write_config_dword(struct pci_dev *pdev, int where, u32 val) { pci_write_config(pdev->dev.bsddev, where, val, 4); return (0); } static struct pci_driver * -linux_pci_find(device_t dev, struct pci_device_id **idp) +linux_pci_find(device_t dev, const struct pci_device_id **idp) { - struct pci_device_id *id; + const struct pci_device_id *id; struct pci_driver *pdrv; uint16_t vendor; uint16_t device; vendor = pci_get_vendor(dev); device = pci_get_device(dev); spin_lock(&pci_lock); list_for_each_entry(pdrv, &pci_drivers, links) { for (id = pdrv->id_table; id->vendor != 0; id++) { if (vendor == id->vendor && device == id->device) { *idp = id; spin_unlock(&pci_lock); return (pdrv); } } } spin_unlock(&pci_lock); return (NULL); } static inline int linux_pci_probe(device_t dev) { - struct pci_device_id *id; + const struct pci_device_id *id; struct pci_driver *pdrv; if ((pdrv = linux_pci_find(dev, &id)) == NULL) return (ENXIO); if (device_get_driver(dev) != &pdrv->driver) return (ENXIO); device_set_desc(dev, pdrv->name); return (0); } static inline int linux_pci_attach(device_t dev) { struct resource_list_entry *rle; struct pci_dev *pdev; struct pci_driver *pdrv; - struct pci_device_id *id; + const struct pci_device_id *id; int error; pdrv = linux_pci_find(dev, &id); pdev = device_get_softc(dev); pdev->dev.parent = &linux_rootdev; pdev->dev.bsddev = dev; INIT_LIST_HEAD(&pdev->dev.irqents); pdev->device = id->device; pdev->vendor = id->vendor; pdev->dev.dma_mask = &pdev->dma_mask; pdev->pdrv = pdrv; kobject_init(&pdev->dev.kobj, &dev_ktype); kobject_set_name(&pdev->dev.kobj, device_get_nameunit(dev)); kobject_add(&pdev->dev.kobj, &linux_rootdev.kobj, kobject_name(&pdev->dev.kobj)); rle = _pci_get_rle(pdev, SYS_RES_IRQ, 0); if (rle) pdev->dev.irq = rle->start; else pdev->dev.irq = 0; pdev->irq = pdev->dev.irq; mtx_unlock(&Giant); spin_lock(&pci_lock); list_add(&pdev->links, &pci_devices); spin_unlock(&pci_lock); error = pdrv->probe(pdev, id); mtx_lock(&Giant); if (error) { spin_lock(&pci_lock); list_del(&pdev->links); spin_unlock(&pci_lock); put_device(&pdev->dev); return (-error); } return (0); } static inline int linux_pci_detach(device_t dev) { struct pci_dev *pdev; pdev = device_get_softc(dev); mtx_unlock(&Giant); pdev->pdrv->remove(pdev); mtx_lock(&Giant); spin_lock(&pci_lock); list_del(&pdev->links); spin_unlock(&pci_lock); put_device(&pdev->dev); return (0); } static device_method_t pci_methods[] = { DEVMETHOD(device_probe, linux_pci_probe), DEVMETHOD(device_attach, linux_pci_attach), DEVMETHOD(device_detach, linux_pci_detach), {0, 0} }; static inline int pci_register_driver(struct pci_driver *pdrv) { devclass_t bus; int error; spin_lock(&pci_lock); list_add(&pdrv->links, &pci_drivers); spin_unlock(&pci_lock); bus = devclass_find("pci"); pdrv->driver.name = pdrv->name; pdrv->driver.methods = pci_methods; pdrv->driver.size = sizeof(struct pci_dev); mtx_lock(&Giant); error = devclass_add_driver(bus, &pdrv->driver, BUS_PASS_DEFAULT, &pdrv->bsdclass); mtx_unlock(&Giant); if (error) return (-error); return (0); } static inline void pci_unregister_driver(struct pci_driver *pdrv) { devclass_t bus; list_del(&pdrv->links); bus = devclass_find("pci"); mtx_lock(&Giant); devclass_delete_driver(bus, &pdrv->driver); mtx_unlock(&Giant); } struct msix_entry { int entry; int vector; }; /* * Enable msix, positive errors indicate actual number of available * vectors. Negative errors are failures. */ static inline int pci_enable_msix(struct pci_dev *pdev, struct msix_entry *entries, int nreq) { struct resource_list_entry *rle; int error; int avail; int i; avail = pci_msix_count(pdev->dev.bsddev); if (avail < nreq) { if (avail == 0) return -EINVAL; return avail; } avail = nreq; if ((error = -pci_alloc_msix(pdev->dev.bsddev, &avail)) != 0) return error; rle = _pci_get_rle(pdev, SYS_RES_IRQ, 1); pdev->dev.msix = rle->start; pdev->dev.msix_max = rle->start + avail; for (i = 0; i < nreq; i++) entries[i].vector = pdev->dev.msix + i; return (0); } static inline int pci_channel_offline(struct pci_dev *pdev) { return false; } static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) { return -ENODEV; } static inline void pci_disable_sriov(struct pci_dev *dev) { } /** * DEFINE_PCI_DEVICE_TABLE - macro used to describe a pci device table * @_table: device table name * * This macro is used to create a struct pci_device_id array (a device table) * in a generic manner. */ #define DEFINE_PCI_DEVICE_TABLE(_table) \ const struct pci_device_id _table[] __devinitdata /* XXX This should not be necessary. */ #define pcix_set_mmrbc(d, v) 0 #define pcix_get_max_mmrbc(d) 0 #define pcie_set_readrq(d, v) 0 #define PCI_DMA_BIDIRECTIONAL 0 #define PCI_DMA_TODEVICE 1 #define PCI_DMA_FROMDEVICE 2 #define PCI_DMA_NONE 3 #define pci_pool dma_pool #define pci_pool_destroy dma_pool_destroy #define pci_pool_alloc dma_pool_alloc #define pci_pool_free dma_pool_free #define pci_pool_create(_name, _pdev, _size, _align, _alloc) \ dma_pool_create(_name, &(_pdev)->dev, _size, _align, _alloc) #define pci_free_consistent(_hwdev, _size, _vaddr, _dma_handle) \ dma_free_coherent((_hwdev) == NULL ? NULL : &(_hwdev)->dev, \ _size, _vaddr, _dma_handle) #define pci_map_sg(_hwdev, _sg, _nents, _dir) \ dma_map_sg((_hwdev) == NULL ? NULL : &(_hwdev->dev), \ _sg, _nents, (enum dma_data_direction)_dir) #define pci_map_single(_hwdev, _ptr, _size, _dir) \ dma_map_single((_hwdev) == NULL ? NULL : &(_hwdev->dev), \ (_ptr), (_size), (enum dma_data_direction)_dir) #define pci_unmap_single(_hwdev, _addr, _size, _dir) \ dma_unmap_single((_hwdev) == NULL ? NULL : &(_hwdev)->dev, \ _addr, _size, (enum dma_data_direction)_dir) #define pci_unmap_sg(_hwdev, _sg, _nents, _dir) \ dma_unmap_sg((_hwdev) == NULL ? NULL : &(_hwdev)->dev, \ _sg, _nents, (enum dma_data_direction)_dir) #define pci_map_page(_hwdev, _page, _offset, _size, _dir) \ dma_map_page((_hwdev) == NULL ? NULL : &(_hwdev)->dev, _page,\ _offset, _size, (enum dma_data_direction)_dir) #define pci_unmap_page(_hwdev, _dma_address, _size, _dir) \ dma_unmap_page((_hwdev) == NULL ? NULL : &(_hwdev)->dev, \ _dma_address, _size, (enum dma_data_direction)_dir) #define pci_set_dma_mask(_pdev, mask) dma_set_mask(&(_pdev)->dev, (mask)) #define pci_dma_mapping_error(_pdev, _dma_addr) \ dma_mapping_error(&(_pdev)->dev, _dma_addr) #define pci_set_consistent_dma_mask(_pdev, _mask) \ dma_set_coherent_mask(&(_pdev)->dev, (_mask)) #define DECLARE_PCI_UNMAP_ADDR(x) DEFINE_DMA_UNMAP_ADDR(x); #define DECLARE_PCI_UNMAP_LEN(x) DEFINE_DMA_UNMAP_LEN(x); #define pci_unmap_addr dma_unmap_addr #define pci_unmap_addr_set dma_unmap_addr_set #define pci_unmap_len dma_unmap_len #define pci_unmap_len_set dma_unmap_len_set typedef unsigned int __bitwise pci_channel_state_t; typedef unsigned int __bitwise pci_ers_result_t; enum pci_channel_state { /* I/O channel is in normal state */ pci_channel_io_normal = (__force pci_channel_state_t) 1, /* I/O to channel is blocked */ pci_channel_io_frozen = (__force pci_channel_state_t) 2, /* PCI card is dead */ pci_channel_io_perm_failure = (__force pci_channel_state_t) 3, }; enum pci_ers_result { /* no result/none/not supported in device driver */ PCI_ERS_RESULT_NONE = (__force pci_ers_result_t) 1, /* Device driver can recover without slot reset */ PCI_ERS_RESULT_CAN_RECOVER = (__force pci_ers_result_t) 2, /* Device driver wants slot to be reset. */ PCI_ERS_RESULT_NEED_RESET = (__force pci_ers_result_t) 3, /* Device has completely failed, is unrecoverable */ PCI_ERS_RESULT_DISCONNECT = (__force pci_ers_result_t) 4, /* Device driver is fully recovered and operational */ PCI_ERS_RESULT_RECOVERED = (__force pci_ers_result_t) 5, }; /* PCI bus error event callbacks */ struct pci_error_handlers { /* PCI bus error detected on this device */ pci_ers_result_t (*error_detected)(struct pci_dev *dev, enum pci_channel_state error); /* MMIO has been re-enabled, but not DMA */ pci_ers_result_t (*mmio_enabled)(struct pci_dev *dev); /* PCI Express link has been reset */ pci_ers_result_t (*link_reset)(struct pci_dev *dev); /* PCI slot has been reset */ pci_ers_result_t (*slot_reset)(struct pci_dev *dev); /* Device driver may resume normal operations */ void (*resume)(struct pci_dev *dev); }; +/* freeBSD does not support SRIOV - yet */ +static inline struct pci_dev *pci_physfn(struct pci_dev *dev) +{ + return dev; +} + +static inline bool pci_is_pcie(struct pci_dev *dev) +{ + return !!pci_pcie_cap(dev); +} + +static inline u16 pcie_flags_reg(struct pci_dev *dev) +{ + int pos; + u16 reg16; + + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (!pos) + return 0; + + pci_read_config_word(dev, pos + PCI_EXP_FLAGS, ®16); + + return reg16; +} + + +static inline int pci_pcie_type(struct pci_dev *dev) +{ + return (pcie_flags_reg(dev) & PCI_EXP_FLAGS_TYPE) >> 4; +} + +static inline int pcie_cap_version(struct pci_dev *dev) +{ + return pcie_flags_reg(dev) & PCI_EXP_FLAGS_VERS; +} + +static inline bool pcie_cap_has_lnkctl(struct pci_dev *dev) +{ + int type = pci_pcie_type(dev); + + return pcie_cap_version(dev) > 1 || + type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_ENDPOINT || + type == PCI_EXP_TYPE_LEG_END; +} + +static inline bool pcie_cap_has_devctl(const struct pci_dev *dev) +{ + return true; +} + +static inline bool pcie_cap_has_sltctl(struct pci_dev *dev) +{ + int type = pci_pcie_type(dev); + + return pcie_cap_version(dev) > 1 || + type == PCI_EXP_TYPE_ROOT_PORT || + (type == PCI_EXP_TYPE_DOWNSTREAM && + pcie_flags_reg(dev) & PCI_EXP_FLAGS_SLOT); +} + +static inline bool pcie_cap_has_rtctl(struct pci_dev *dev) +{ + int type = pci_pcie_type(dev); + + return pcie_cap_version(dev) > 1 || + type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_RC_EC; +} + +static bool pcie_capability_reg_implemented(struct pci_dev *dev, int pos) +{ + if (!pci_is_pcie(dev)) + return false; + + switch (pos) { + case PCI_EXP_FLAGS_TYPE: + return true; + case PCI_EXP_DEVCAP: + case PCI_EXP_DEVCTL: + case PCI_EXP_DEVSTA: + return pcie_cap_has_devctl(dev); + case PCI_EXP_LNKCAP: + case PCI_EXP_LNKCTL: + case PCI_EXP_LNKSTA: + return pcie_cap_has_lnkctl(dev); + case PCI_EXP_SLTCAP: + case PCI_EXP_SLTCTL: + case PCI_EXP_SLTSTA: + return pcie_cap_has_sltctl(dev); + case PCI_EXP_RTCTL: + case PCI_EXP_RTCAP: + case PCI_EXP_RTSTA: + return pcie_cap_has_rtctl(dev); + case PCI_EXP_DEVCAP2: + case PCI_EXP_DEVCTL2: + case PCI_EXP_LNKCAP2: + case PCI_EXP_LNKCTL2: + case PCI_EXP_LNKSTA2: + return pcie_cap_version(dev) > 1; + default: + return false; + } +} + + +static inline int pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val) +{ + if (pos & 1) + return -EINVAL; + + if (!pcie_capability_reg_implemented(dev, pos)) + return 0; + + return pci_write_config_word(dev, pci_pcie_cap(dev) + pos, val); +} #endif /* _LINUX_PCI_H_ */ Index: stable/10/sys/ofed/include/linux/poll.h =================================================================== --- stable/10/sys/ofed/include/linux/poll.h (revision 271126) +++ stable/10/sys/ofed/include/linux/poll.h (revision 271127) @@ -1,44 +1,45 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_POLL_H_ #define _LINUX_POLL_H_ #include #include typedef struct poll_table_struct { } poll_table; static inline void poll_wait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { selrecord(curthread, &filp->f_selinfo); } #endif /* _LINUX_POLL_H_ */ Index: stable/10/sys/ofed/include/linux/radix-tree.h =================================================================== --- stable/10/sys/ofed/include/linux/radix-tree.h (revision 271126) +++ stable/10/sys/ofed/include/linux/radix-tree.h (revision 271127) @@ -1,60 +1,61 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_RADIX_TREE_H_ #define _LINUX_RADIX_TREE_H_ #define RADIX_TREE_MAP_SHIFT 6 #define RADIX_TREE_MAP_SIZE (1 << RADIX_TREE_MAP_SHIFT) #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE - 1) #define RADIX_TREE_MAX_HEIGHT \ DIV_ROUND_UP((sizeof(long) * NBBY), RADIX_TREE_MAP_SHIFT) struct radix_tree_node { void *slots[RADIX_TREE_MAP_SIZE]; int count; }; struct radix_tree_root { struct radix_tree_node *rnode; gfp_t gfp_mask; int height; }; #define RADIX_TREE_INIT(mask) \ { .rnode = NULL, .gfp_mask = mask, .height = 0 }; #define INIT_RADIX_TREE(root, mask) \ { (root)->rnode = NULL; (root)->gfp_mask = mask; (root)->height = 0; } #define RADIX_TREE(name, mask) \ struct radix_tree_root name = RADIX_TREE_INIT(mask) void *radix_tree_lookup(struct radix_tree_root *, unsigned long); void *radix_tree_delete(struct radix_tree_root *, unsigned long); int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); #endif /* _LINUX_RADIX_TREE_H_ */ Index: stable/10/sys/ofed/include/linux/random.h =================================================================== --- stable/10/sys/ofed/include/linux/random.h (revision 271126) +++ stable/10/sys/ofed/include/linux/random.h (revision 271127) @@ -1,40 +1,41 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_RANDOM_H_ #define _LINUX_RANDOM_H_ #include static inline void get_random_bytes(void *buf, int nbytes) { read_random(buf, nbytes); } #endif /* _LINUX_RANDOM_H_ */ Index: stable/10/sys/ofed/include/linux/rbtree.h =================================================================== --- stable/10/sys/ofed/include/linux/rbtree.h (revision 271126) +++ stable/10/sys/ofed/include/linux/rbtree.h (revision 271127) @@ -1,111 +1,112 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_RBTREE_H_ #define _LINUX_RBTREE_H_ #include #include struct rb_node { RB_ENTRY(rb_node) __entry; }; #define rb_left __entry.rbe_left #define rb_right __entry.rbe_right /* * We provide a false structure that has the same bit pattern as tree.h * presents so it matches the member names expected by linux. */ struct rb_root { struct rb_node *rb_node; }; /* * In linux all of the comparisons are done by the caller. */ int panic_cmp(struct rb_node *one, struct rb_node *two); RB_HEAD(linux_root, rb_node); RB_PROTOTYPE(linux_root, rb_node, __entry, panic_cmp); #define rb_parent(r) RB_PARENT(r, __entry) #define rb_color(r) RB_COLOR(r, __entry) #define rb_is_red(r) (rb_color(r) == RB_RED) #define rb_is_black(r) (rb_color(r) == RB_BLACK) #define rb_set_parent(r, p) rb_parent((r)) = (p) #define rb_set_color(r, c) rb_color((r)) = (c) #define rb_entry(ptr, type, member) container_of(ptr, type, member) #define RB_EMPTY_ROOT(root) RB_EMPTY((struct linux_root *)root) #define RB_EMPTY_NODE(node) (rb_parent(node) == node) #define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) #define rb_insert_color(node, root) \ linux_root_RB_INSERT_COLOR((struct linux_root *)(root), (node)) #define rb_erase(node, root) \ linux_root_RB_REMOVE((struct linux_root *)(root), (node)) #define rb_next(node) RB_NEXT(linux_root, NULL, (node)) #define rb_prev(node) RB_PREV(linux_root, NULL, (node)) #define rb_first(root) RB_MIN(linux_root, (struct linux_root *)(root)) #define rb_last(root) RB_MAX(linux_root, (struct linux_root *)(root)) static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, struct rb_node **rb_link) { rb_set_parent(node, parent); rb_set_color(node, RB_RED); node->__entry.rbe_left = node->__entry.rbe_right = NULL; *rb_link = node; } static inline void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root) { struct rb_node *p; p = rb_parent(victim); if (p) { if (p->rb_left == victim) p->rb_left = new; else p->rb_right = new; } else root->rb_node = new; if (victim->rb_left) rb_set_parent(victim->rb_left, new); if (victim->rb_right) rb_set_parent(victim->rb_right, new); *new = *victim; } #undef RB_ROOT #define RB_ROOT (struct rb_root) { NULL } #endif /* _LINUX_RBTREE_H_ */ Index: stable/10/sys/ofed/include/linux/rwlock.h =================================================================== --- stable/10/sys/ofed/include/linux/rwlock.h (revision 271126) +++ stable/10/sys/ofed/include/linux/rwlock.h (revision 271127) @@ -1,63 +1,64 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_RWLOCK_H_ #define _LINUX_RWLOCK_H_ #include #include typedef struct { struct rwlock rw; } rwlock_t; #define read_lock(_l) rw_rlock(&(_l)->rw) #define write_lock(_l) rw_wlock(&(_l)->rw) #define read_unlock(_l) rw_runlock(&(_l)->rw) #define write_unlock(_l) rw_wunlock(&(_l)->rw) #define read_lock_irq(lock) read_lock((lock)) #define read_unlock_irq(lock) read_unlock((lock)) #define write_lock_irq(lock) write_lock((lock)) #define write_unlock_irq(lock) write_unlock((lock)) #define read_lock_irqsave(lock, flags) \ do {(flags) = 0; read_lock(lock); } while (0) #define write_lock_irqsave(lock, flags) \ do {(flags) = 0; write_lock(lock); } while (0) #define read_unlock_irqrestore(lock, flags) \ do { read_unlock(lock); } while (0) #define write_unlock_irqrestore(lock, flags) \ do { write_unlock(lock); } while (0) static inline void rwlock_init(rwlock_t *lock) { memset(&lock->rw, 0, sizeof(lock->rw)); rw_init_flags(&lock->rw, "lnxrw", RW_NOWITNESS); } #endif /* _LINUX_RWLOCK_H_ */ Index: stable/10/sys/ofed/include/linux/rwsem.h =================================================================== --- stable/10/sys/ofed/include/linux/rwsem.h (revision 271126) +++ stable/10/sys/ofed/include/linux/rwsem.h (revision 271127) @@ -1,56 +1,57 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_RWSEM_H_ #define _LINUX_RWSEM_H_ #include #include #include struct rw_semaphore { struct sx sx; }; #define down_write(_rw) sx_xlock(&(_rw)->sx) #define up_write(_rw) sx_xunlock(&(_rw)->sx) #define down_read(_rw) sx_slock(&(_rw)->sx) #define up_read(_rw) sx_sunlock(&(_rw)->sx) #define down_read_trylock(_rw) !!sx_try_slock(&(_rw)->sx) #define down_write_trylock(_rw) !!sx_try_xlock(&(_rw)->sx) #define downgrade_write(_rw) sx_downgrade(&(_rw)->sx) #define down_read_nested(_rw, _sc) down_read(_rw) static inline void init_rwsem(struct rw_semaphore *rw) { memset(&rw->sx, 0, sizeof(rw->sx)); sx_init_flags(&rw->sx, "lnxrwsem", SX_NOWITNESS); } #endif /* _LINUX_RWSEM_H_ */ Index: stable/10/sys/ofed/include/linux/scatterlist.h =================================================================== --- stable/10/sys/ofed/include/linux/scatterlist.h (revision 271126) +++ stable/10/sys/ofed/include/linux/scatterlist.h (revision 271127) @@ -1,98 +1,105 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + #ifndef _LINUX_SCATTERLIST_H_ #define _LINUX_SCATTERLIST_H_ -#include #include struct scatterlist { union { struct page *page; struct scatterlist *sg; } sl_un; dma_addr_t address; unsigned long offset; uint32_t length; uint32_t flags; +}; + +struct sg_table { + struct scatterlist *sgl; /* the list */ + unsigned int nents; /* number of mapped entries */ + unsigned int orig_nents; /* original size of list */ }; #define sg_dma_address(sg) (sg)->address #define sg_dma_len(sg) (sg)->length #define sg_page(sg) (sg)->sl_un.page #define sg_scatternext(sg) (sg)->sl_un.sg #define SG_END 0x01 #define SG_CHAIN 0x02 static inline void sg_set_page(struct scatterlist *sg, struct page *page, unsigned int len, unsigned int offset) { sg_page(sg) = page; sg_dma_len(sg) = len; sg->offset = offset; if (offset > PAGE_SIZE) panic("sg_set_page: Invalid offset %d\n", offset); } static inline void sg_set_buf(struct scatterlist *sg, const void *buf, unsigned int buflen) { sg_set_page(sg, virt_to_page(buf), buflen, ((uintptr_t)buf) & ~PAGE_MASK); } static inline void sg_init_table(struct scatterlist *sg, unsigned int nents) { bzero(sg, sizeof(*sg) * nents); sg[nents - 1].flags = SG_END; } static inline struct scatterlist * sg_next(struct scatterlist *sg) { if (sg->flags & SG_END) return (NULL); sg++; if (sg->flags & SG_CHAIN) sg = sg_scatternext(sg); return (sg); } static inline vm_paddr_t sg_phys(struct scatterlist *sg) { return sg_page(sg)->phys_addr + sg->offset; } #define for_each_sg(sglist, sg, sgmax, _itr) \ for (_itr = 0, sg = (sglist); _itr < (sgmax); _itr++, sg = sg_next(sg)) #endif /* _LINUX_SCATTERLIST_H_ */ Index: stable/10/sys/ofed/include/linux/sched.h =================================================================== --- stable/10/sys/ofed/include/linux/sched.h (revision 271126) +++ stable/10/sys/ofed/include/linux/sched.h (revision 271127) @@ -1,109 +1,110 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_SCHED_H_ #define _LINUX_SCHED_H_ #include #include #include #include #include #define MAX_SCHEDULE_TIMEOUT LONG_MAX #define TASK_RUNNING 0 #define TASK_INTERRUPTIBLE 1 #define TASK_UNINTERRUPTIBLE 2 #define TASK_DEAD 64 #define TASK_WAKEKILL 128 #define TASK_WAKING 256 #define TASK_SHOULD_STOP 1 #define TASK_STOPPED 2 /* * A task_struct is only provided for those tasks created with kthread. * Using these routines with threads not started via kthread will cause * panics because no task_struct is allocated and td_retval[1] is * overwritten by syscalls which kernel threads will not make use of. */ struct task_struct { struct thread *task_thread; int (*task_fn)(void *data); void *task_data; int task_ret; int state; int should_stop; }; #define current ((struct task_struct *)curthread->td_retval[1]) #define task_struct_get(x) (struct task_struct *)(x)->td_retval[1] #define task_struct_set(x, y) (x)->td_retval[1] = (register_t)(y) #define set_current_state(x) \ atomic_store_rel_int((volatile int *)¤t->state, (x)) #define __set_current_state(x) current->state = (x) #define schedule() \ do { \ void *c; \ \ if (cold) \ break; \ c = curthread; \ sleepq_lock(c); \ if (current->state == TASK_INTERRUPTIBLE || \ current->state == TASK_UNINTERRUPTIBLE) { \ sleepq_add(c, NULL, "task", SLEEPQ_SLEEP, 0); \ sleepq_wait(c, 0); \ } else { \ sleepq_release(c); \ sched_relinquish(curthread); \ } \ } while (0) #define wake_up_process(x) \ do { \ int wakeup_swapper; \ void *c; \ \ c = (x)->task_thread; \ sleepq_lock(c); \ (x)->state = TASK_RUNNING; \ wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0); \ sleepq_release(c); \ if (wakeup_swapper) \ kick_proc0(); \ } while (0) #define cond_resched() if (!cold) sched_relinquish(curthread) #define sched_yield() sched_relinquish(curthread) #endif /* _LINUX_SCHED_H_ */ Index: stable/10/sys/ofed/include/linux/semaphore.h =================================================================== --- stable/10/sys/ofed/include/linux/semaphore.h (revision 271126) +++ stable/10/sys/ofed/include/linux/semaphore.h (revision 271127) @@ -1,66 +1,67 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_SEMAPHORE_H_ #define _LINUX_SEMAPHORE_H_ #include #include #include /* * XXX BSD semaphores are disused and slow. They also do not provide a * sema_wait_sig method. This must be resolved eventually. */ struct semaphore { struct sema sema; }; #define down(_sem) sema_wait(&(_sem)->sema) #define down_interruptible(_sem) sema_wait(&(_sem)->sema), 0 #define down_trylock(_sem) !sema_trywait(&(_sem)->sema) #define up(_sem) sema_post(&(_sem)->sema) static inline void linux_sema_init(struct semaphore *sem, int val) { memset(&sem->sema, 0, sizeof(sem->sema)); sema_init(&sem->sema, val, "lnxsema"); } static inline void init_MUTEX(struct semaphore *sem) { memset(&sem->sema, 0, sizeof(sem->sema)); sema_init(&sem->sema, 1, "lnxsema"); } #define sema_init linux_sema_init #endif /* _LINUX_SEMAPHORE_H_ */ Index: stable/10/sys/ofed/include/linux/slab.h =================================================================== --- stable/10/sys/ofed/include/linux/slab.h (revision 271126) +++ stable/10/sys/ofed/include/linux/slab.h (revision 271127) @@ -1,102 +1,108 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_SLAB_H_ #define _LINUX_SLAB_H_ #include #include #include #include #include #include MALLOC_DECLARE(M_KMALLOC); -#define kmalloc(size, flags) malloc((size), M_KMALLOC, (flags)) -#define kzalloc(size, flags) kmalloc((size), (flags) | M_ZERO) -#define kfree(ptr) free(__DECONST(void *, (ptr)), M_KMALLOC) -#define krealloc(ptr, size, flags) realloc((ptr), (size), M_KMALLOC, (flags)) -#define kcalloc(n, size, flags) kmalloc((n) * (size), flags | M_ZERO) +#define kmalloc(size, flags) malloc((size), M_KMALLOC, (flags)) +#define kzalloc(size, flags) kmalloc((size), (flags) | M_ZERO) +#define kzalloc_node(size, flags, node) kzalloc(size, flags) +#define kfree(ptr) free(__DECONST(void *, (ptr)), M_KMALLOC) +#define krealloc(ptr, size, flags) realloc((ptr), (size), M_KMALLOC, (flags)) +#define kcalloc(n, size, flags) kmalloc((n) * (size), flags | M_ZERO) +#define vzalloc(size) kzalloc(size, GFP_KERNEL | __GFP_NOWARN) +#define vfree(arg) kfree(arg) +#define vmalloc(size) kmalloc(size, GFP_KERNEL) +#define vmalloc_node(size, node) kmalloc(size, GFP_KERNEL) struct kmem_cache { uma_zone_t cache_zone; void (*cache_ctor)(void *); }; #define SLAB_HWCACHE_ALIGN 0x0001 static inline int kmem_ctor(void *mem, int size, void *arg, int flags) { void (*ctor)(void *); ctor = arg; ctor(mem); return (0); } static inline struct kmem_cache * kmem_cache_create(char *name, size_t size, size_t align, u_long flags, void (*ctor)(void *)) { struct kmem_cache *c; c = malloc(sizeof(*c), M_KMALLOC, M_WAITOK); if (align) align--; if (flags & SLAB_HWCACHE_ALIGN) align = UMA_ALIGN_CACHE; c->cache_zone = uma_zcreate(name, size, ctor ? kmem_ctor : NULL, NULL, NULL, NULL, align, 0); c->cache_ctor = ctor; return c; } static inline void * kmem_cache_alloc(struct kmem_cache *c, int flags) { return uma_zalloc_arg(c->cache_zone, c->cache_ctor, flags); } static inline void kmem_cache_free(struct kmem_cache *c, void *m) { uma_zfree(c->cache_zone, m); } static inline void kmem_cache_destroy(struct kmem_cache *c) { uma_zdestroy(c->cache_zone); free(c, M_KMALLOC); } #endif /* _LINUX_SLAB_H_ */ Index: stable/10/sys/ofed/include/linux/socket.h =================================================================== --- stable/10/sys/ofed/include/linux/socket.h (revision 271126) +++ stable/10/sys/ofed/include/linux/socket.h (revision 271127) @@ -1,66 +1,67 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_SOCKET_H_ #define _LINUX_SOCKET_H_ #include #ifdef notyet static inline int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len) { struct uio uio; int error; uio.uio_iov = v; uio.uio_iovcnt = -1; uio.uio_offset = 0; uio.uio_resid = len; uio.uio_segflag = UIO_USERSPACE; uio.uio_rw = UIO_READ; error = -uiomove(kdata, len, &uio); return (error); } static inline int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len) { struct uio uio; int error; uio.uio_iov = v; uio.uio_iovcnt = -1; uio.uio_offset = 0; uio.uio_resid = len; uio.uio_segflag = UIO_USERSPACE; uio.uio_rw = UIO_WRITE; error = -uiomove(kdata, len, &uio); } #endif #endif /* _LINUX_SOCKET_H_ */ Index: stable/10/sys/ofed/include/linux/spinlock.h =================================================================== --- stable/10/sys/ofed/include/linux/spinlock.h (revision 271126) +++ stable/10/sys/ofed/include/linux/spinlock.h (revision 271127) @@ -1,68 +1,68 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_SPINLOCK_H_ #define _LINUX_SPINLOCK_H_ #include #include #include #include #include #include -#include #include typedef struct { struct mtx m; } spinlock_t; #define spin_lock(_l) mtx_lock(&(_l)->m) #define spin_unlock(_l) mtx_unlock(&(_l)->m) #define spin_trylock(_l) mtx_trylock(&(_l)->m) #define spin_lock_nested(_l, _n) mtx_lock_flags(&(_l)->m, MTX_DUPOK) #define spin_lock_irq(lock) spin_lock(lock) #define spin_unlock_irq(lock) spin_unlock(lock) #define spin_lock_irqsave(lock, flags) \ do {(flags) = 0; spin_lock(lock); } while (0) #define spin_unlock_irqrestore(lock, flags) \ do { spin_unlock(lock); } while (0) static inline void spin_lock_init(spinlock_t *lock) { memset(&lock->m, 0, sizeof(lock->m)); mtx_init(&lock->m, "lnxspin", NULL, MTX_DEF | MTX_NOWITNESS); } #define DEFINE_SPINLOCK(lock) \ spinlock_t lock; \ MTX_SYSINIT(lock, &(lock).m, "lnxspin", MTX_DEF) #endif /* _LINUX_SPINLOCK_H_ */ Index: stable/10/sys/ofed/include/linux/string.h =================================================================== --- stable/10/sys/ofed/include/linux/string.h (revision 271126) +++ stable/10/sys/ofed/include/linux/string.h (revision 271127) @@ -1,49 +1,53 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_STRING_H_ #define _LINUX_STRING_H_ #include #include #include #include + +#define strnicmp strncasecmp + static inline void * kmemdup(const void *src, size_t len, gfp_t gfp) { void *dst; dst = kmalloc(len, gfp); if (dst) memcpy(dst, src, len); return (dst); } #endif /* _LINUX_STRING_H_ */ Index: stable/10/sys/ofed/include/linux/sysfs.h =================================================================== --- stable/10/sys/ofed/include/linux/sysfs.h (revision 271126) +++ stable/10/sys/ofed/include/linux/sysfs.h (revision 271127) @@ -1,190 +1,191 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_SYSFS_H_ #define _LINUX_SYSFS_H_ #include struct attribute { const char *name; struct module *owner; mode_t mode; }; struct sysfs_ops { ssize_t (*show)(struct kobject *, struct attribute *, char *); ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t); }; struct attribute_group { const char *name; mode_t (*is_visible)(struct kobject *, struct attribute *, int); struct attribute **attrs; }; #define __ATTR(_name, _mode, _show, _store) { \ .attr = { .name = __stringify(_name), .mode = _mode }, \ .show = _show, .store = _store, \ } #define __ATTR_RO(_name) { \ .attr = { .name = __stringify(_name), .mode = 0444 }, \ .show = _name##_show, \ } #define __ATTR_NULL { .attr = { .name = NULL } } /* * Handle our generic '\0' terminated 'C' string. * Two cases: * a variable string: point arg1 at it, arg2 is max length. * a constant string: point arg1 at it, arg2 is zero. */ static inline int sysctl_handle_attr(SYSCTL_HANDLER_ARGS) { struct kobject *kobj; struct attribute *attr; const struct sysfs_ops *ops; char *buf; int error; ssize_t len; kobj = arg1; attr = (struct attribute *)arg2; if (kobj->ktype == NULL || kobj->ktype->sysfs_ops == NULL) return (ENODEV); buf = (char *)get_zeroed_page(GFP_KERNEL); if (buf == NULL) return (ENOMEM); ops = kobj->ktype->sysfs_ops; if (ops->show) { len = ops->show(kobj, attr, buf); /* * It's valid to not have a 'show' so just return an * empty string. */ if (len < 0) { error = -len; if (error != EIO) goto out; buf[0] = '\0'; } else if (len) { len--; if (len >= PAGE_SIZE) len = PAGE_SIZE - 1; /* Trim trailing newline. */ buf[len] = '\0'; } } /* Leave one trailing byte to append a newline. */ error = sysctl_handle_string(oidp, buf, PAGE_SIZE - 1, req); if (error != 0 || req->newptr == NULL || ops->store == NULL) goto out; len = strlcat(buf, "\n", PAGE_SIZE); KASSERT(len < PAGE_SIZE, ("new attribute truncated")); len = ops->store(kobj, attr, buf, len); if (len < 0) error = -len; out: free_page((unsigned long)buf); return (error); } static inline int sysfs_create_file(struct kobject *kobj, const struct attribute *attr) { sysctl_add_oid(NULL, SYSCTL_CHILDREN(kobj->oidp), OID_AUTO, attr->name, CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_MPSAFE, kobj, (uintptr_t)attr, sysctl_handle_attr, "A", ""); return (0); } static inline void sysfs_remove_file(struct kobject *kobj, const struct attribute *attr) { if (kobj->oidp) sysctl_remove_name(kobj->oidp, attr->name, 1, 1); } static inline void sysfs_remove_group(struct kobject *kobj, const struct attribute_group *grp) { if (kobj->oidp) sysctl_remove_name(kobj->oidp, grp->name, 1, 1); } static inline int sysfs_create_group(struct kobject *kobj, const struct attribute_group *grp) { struct attribute **attr; struct sysctl_oid *oidp; oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(kobj->oidp), OID_AUTO, grp->name, CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, grp->name); for (attr = grp->attrs; *attr != NULL; attr++) { sysctl_add_oid(NULL, SYSCTL_CHILDREN(oidp), OID_AUTO, (*attr)->name, CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_MPSAFE, kobj, (uintptr_t)*attr, sysctl_handle_attr, "A", ""); } return (0); } static inline int sysfs_create_dir(struct kobject *kobj) { kobj->oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(kobj->parent->oidp), OID_AUTO, kobj->name, CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, kobj->name); return (0); } static inline void sysfs_remove_dir(struct kobject *kobj) { if (kobj->oidp == NULL) return; sysctl_remove_oid(kobj->oidp, 1, 1); } #define sysfs_attr_init(attr) do {} while(0) #endif /* _LINUX_SYSFS_H_ */ Index: stable/10/sys/ofed/include/linux/timer.h =================================================================== --- stable/10/sys/ofed/include/linux/timer.h (revision 271126) +++ stable/10/sys/ofed/include/linux/timer.h (revision 271127) @@ -1,89 +1,92 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_TIMER_H_ #define _LINUX_TIMER_H_ #include #include #include #include struct timer_list { struct callout timer_callout; void (*function)(unsigned long); unsigned long data; unsigned long expires; }; static inline void _timer_fn(void *context) { struct timer_list *timer; timer = context; timer->function(timer->data); } #define setup_timer(timer, func, dat) \ do { \ (timer)->function = (func); \ (timer)->data = (dat); \ callout_init(&(timer)->timer_callout, CALLOUT_MPSAFE); \ } while (0) #define init_timer(timer) \ do { \ (timer)->function = NULL; \ (timer)->data = 0; \ callout_init(&(timer)->timer_callout, CALLOUT_MPSAFE); \ } while (0) #define mod_timer(timer, exp) \ do { \ (timer)->expires = (exp); \ callout_reset(&(timer)->timer_callout, (exp) - jiffies, \ _timer_fn, (timer)); \ } while (0) #define add_timer(timer) \ callout_reset(&(timer)->timer_callout, \ (timer)->expires - jiffies, _timer_fn, (timer)) #define del_timer(timer) callout_stop(&(timer)->timer_callout) #define del_timer_sync(timer) callout_drain(&(timer)->timer_callout) #define timer_pending(timer) callout_pending(&(timer)->timer_callout) static inline unsigned long round_jiffies(unsigned long j) { return roundup(j, hz); } + +#define round_jiffies_relative(j) round_jiffies(j) #endif /* _LINUX_TIMER_H_ */ Index: stable/10/sys/ofed/include/linux/types.h =================================================================== --- stable/10/sys/ofed/include/linux/types.h (revision 271126) +++ stable/10/sys/ofed/include/linux/types.h (revision 271127) @@ -1,59 +1,66 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_TYPES_H_ #define _LINUX_TYPES_H_ #include #include +#include +#include #include #include -typedef __u16 __le16; -typedef __u16 __be16; -typedef __u32 __le32; -typedef __u32 __be32; -typedef __u64 __le64; -typedef __u64 __be64; -#ifndef __bool_true_false_are_defined -typedef _Bool bool; -#define true TRUE -#define false FALSE +#define __read_mostly __attribute__((__section__(".data.read_mostly"))) + +#ifndef __bitwise__ +#ifdef __CHECKER__ +#define __bitwise__ __attribute__((bitwise)) +#else +#define __bitwise__ #endif +#endif -typedef u64 phys_addr_t; +typedef uint16_t __le16; +typedef uint16_t __be16; +typedef uint32_t __le32; +typedef uint32_t __be32; +typedef uint64_t __le64; +typedef uint64_t __be64; -typedef unsigned long kernel_ulong_t; typedef unsigned int uint; typedef unsigned gfp_t; typedef uint64_t loff_t; typedef vm_paddr_t resource_size_t; + +typedef u64 phys_addr_t; #define DECLARE_BITMAP(n, bits) \ unsigned long n[howmany(bits, sizeof(long) * 8)] #endif /* _LINUX_TYPES_H_ */ Index: stable/10/sys/ofed/include/linux/uaccess.h =================================================================== --- stable/10/sys/ofed/include/linux/uaccess.h (revision 271126) +++ stable/10/sys/ofed/include/linux/uaccess.h (revision 271127) @@ -1,34 +1,35 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_UACCESS_H_ #define _LINUX_UACCESS_H_ #define get_user(_x, _p) -copyin((_p), &(_x), sizeof(*(_p))) #define put_user(_x, _p) -copyout(&(_x), (_p), sizeof(*(_p))) #endif /* _LINUX_UACCESS_H_ */ Index: stable/10/sys/ofed/include/linux/vmalloc.h =================================================================== --- stable/10/sys/ofed/include/linux/vmalloc.h (revision 271126) +++ stable/10/sys/ofed/include/linux/vmalloc.h (revision 271127) @@ -1,41 +1,42 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_VMALLOC_H_ #define _LINUX_VMALLOC_H_ #include #define VM_MAP 0x0000 #define PAGE_KERNEL 0x0000 void *vmap(struct page **pages, unsigned int count, unsigned long flags, int prot); void vunmap(void *addr); #endif /* _LINUX_VMALLOC_H_ */ Index: stable/10/sys/ofed/include/linux/wait.h =================================================================== --- stable/10/sys/ofed/include/linux/wait.h (revision 271126) +++ stable/10/sys/ofed/include/linux/wait.h (revision 271127) @@ -1,112 +1,113 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_WAIT_H_ #define _LINUX_WAIT_H_ #include #include #include #include #include #include #include #include struct __wait_queue_head { unsigned int wchan; }; typedef struct __wait_queue_head wait_queue_head_t; #define init_waitqueue_head(x) static inline void __wake_up(struct __wait_queue_head *q, int all) { int wakeup_swapper; void *c; c = &q->wchan; sleepq_lock(c); if (all) wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0); else wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0); sleepq_release(c); if (wakeup_swapper) kick_proc0(); } #define wake_up(q) __wake_up(q, 0) #define wake_up_nr(q, nr) __wake_up(q, 1) #define wake_up_all(q) __wake_up(q, 1) #define wake_up_interruptible(q) __wake_up(q, 0) #define wake_up_interruptible_nr(q, nr) __wake_up(q, 1) #define wake_up_interruptible_all(q, nr) __wake_up(q, 1) #define wait_event(q, cond) \ do { \ void *c = &(q).wchan; \ if (!(cond)) { \ for (;;) { \ sleepq_lock(c); \ if (cond) { \ sleepq_release(c); \ break; \ } \ sleepq_add(c, NULL, "completion", SLEEPQ_SLEEP, 0); \ sleepq_wait(c, 0); \ } \ } \ } while (0) #define wait_event_interruptible(q, cond) \ ({ \ void *c = &(q).wchan; \ int _error; \ \ _error = 0; \ if (!(cond)) { \ for (; _error == 0;) { \ sleepq_lock(c); \ if (cond) { \ sleepq_release(c); \ break; \ } \ sleepq_add(c, NULL, "completion", \ SLEEPQ_SLEEP | SLEEPQ_INTERRUPTIBLE, 0); \ if (sleepq_wait_sig(c, 0)) \ _error = -ERESTARTSYS; \ } \ } \ -_error; \ }) #define DEFINE_WAIT(x) #endif /* _LINUX_WAIT_H_ */ Index: stable/10/sys/ofed/include/linux/workqueue.h =================================================================== --- stable/10/sys/ofed/include/linux/workqueue.h (revision 271126) +++ stable/10/sys/ofed/include/linux/workqueue.h (revision 271127) @@ -1,212 +1,223 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_WORKQUEUE_H_ #define _LINUX_WORKQUEUE_H_ #include #include #include #include #include struct workqueue_struct { struct taskqueue *taskqueue; }; struct work_struct { struct task work_task; struct taskqueue *taskqueue; void (*fn)(struct work_struct *); }; struct delayed_work { struct work_struct work; struct callout timer; }; static inline struct delayed_work * to_delayed_work(struct work_struct *work) { return container_of(work, struct delayed_work, work); } static inline void _work_fn(void *context, int pending) { struct work_struct *work; work = context; work->fn(work); } #define INIT_WORK(work, func) \ do { \ (work)->fn = (func); \ (work)->taskqueue = NULL; \ TASK_INIT(&(work)->work_task, 0, _work_fn, (work)); \ } while (0) #define INIT_DELAYED_WORK(_work, func) \ do { \ INIT_WORK(&(_work)->work, func); \ callout_init(&(_work)->timer, CALLOUT_MPSAFE); \ } while (0) #define INIT_DEFERRABLE_WORK INIT_DELAYED_WORK #define schedule_work(work) \ do { \ (work)->taskqueue = taskqueue_thread; \ taskqueue_enqueue(taskqueue_thread, &(work)->work_task); \ } while (0) #define flush_scheduled_work() flush_taskqueue(taskqueue_thread) -#define queue_work(q, work) \ -do { \ - (work)->taskqueue = (q)->taskqueue; \ - taskqueue_enqueue((q)->taskqueue, &(work)->work_task); \ -} while (0) +static inline int queue_work (struct workqueue_struct *q, struct work_struct *work) +{ + (work)->taskqueue = (q)->taskqueue; + /* Return opposite val to align with Linux logic */ + return !taskqueue_enqueue((q)->taskqueue, &(work)->work_task); +} static inline void _delayed_work_fn(void *arg) { struct delayed_work *work; work = arg; taskqueue_enqueue(work->work.taskqueue, &work->work.work_task); } static inline int queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay) { int pending; pending = work->work.work_task.ta_pending; work->work.taskqueue = wq->taskqueue; if (delay != 0) callout_reset(&work->timer, delay, _delayed_work_fn, work); else _delayed_work_fn((void *)work); return (!pending); } static inline bool schedule_delayed_work(struct delayed_work *dwork, unsigned long delay) { struct workqueue_struct wq; wq.taskqueue = taskqueue_thread; return queue_delayed_work(&wq, dwork, delay); } static inline struct workqueue_struct * _create_workqueue_common(char *name, int cpus) { struct workqueue_struct *wq; wq = kmalloc(sizeof(*wq), M_WAITOK); wq->taskqueue = taskqueue_create((name), M_WAITOK, taskqueue_thread_enqueue, &wq->taskqueue); taskqueue_start_threads(&wq->taskqueue, cpus, PWAIT, "%s", name); return (wq); } #define create_singlethread_workqueue(name) \ _create_workqueue_common(name, 1) #define create_workqueue(name) \ _create_workqueue_common(name, MAXCPU) static inline void destroy_workqueue(struct workqueue_struct *wq) { taskqueue_free(wq->taskqueue); kfree(wq); } #define flush_workqueue(wq) flush_taskqueue((wq)->taskqueue) static inline void _flush_fn(void *context, int pending) { } static inline void flush_taskqueue(struct taskqueue *tq) { struct task flushtask; PHOLD(curproc); TASK_INIT(&flushtask, 0, _flush_fn, NULL); taskqueue_enqueue(tq, &flushtask); taskqueue_drain(tq, &flushtask); PRELE(curproc); } static inline int cancel_work_sync(struct work_struct *work) { if (work->taskqueue && taskqueue_cancel(work->taskqueue, &work->work_task, NULL)) taskqueue_drain(work->taskqueue, &work->work_task); return 0; } /* * This may leave work running on another CPU as it does on Linux. */ static inline int cancel_delayed_work(struct delayed_work *work) { callout_stop(&work->timer); if (work->work.taskqueue) return (taskqueue_cancel(work->work.taskqueue, &work->work.work_task, NULL) == 0); return 0; } static inline int cancel_delayed_work_sync(struct delayed_work *work) { callout_drain(&work->timer); if (work->work.taskqueue && taskqueue_cancel(work->work.taskqueue, &work->work.work_task, NULL)) taskqueue_drain(work->work.taskqueue, &work->work.work_task); return 0; +} + +static inline bool +mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork, + unsigned long delay) +{ + cancel_delayed_work(dwork); + queue_delayed_work(wq, dwork, delay); + return false; } #endif /* _LINUX_WORKQUEUE_H_ */ Index: stable/10/sys/ofed/include/net/arp.h =================================================================== --- stable/10/sys/ofed/include/net/arp.h (revision 271126) +++ stable/10/sys/ofed/include/net/arp.h (nonexistent) @@ -1,27 +0,0 @@ -/*- - * Copyright (c) 2010 Isilon Systems, Inc. - * Copyright (c) 2010 iX Systems, Inc. - * Copyright (c) 2010 Panasas, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ Property changes on: stable/10/sys/ofed/include/net/arp.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/include/net/addrconf.h =================================================================== --- stable/10/sys/ofed/include/net/addrconf.h (revision 271126) +++ stable/10/sys/ofed/include/net/addrconf.h (nonexistent) @@ -1,27 +0,0 @@ -/*- - * Copyright (c) 2010 Isilon Systems, Inc. - * Copyright (c) 2010 iX Systems, Inc. - * Copyright (c) 2010 Panasas, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ Property changes on: stable/10/sys/ofed/include/net/addrconf.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/include/net/neighbour.h =================================================================== --- stable/10/sys/ofed/include/net/neighbour.h (revision 271126) +++ stable/10/sys/ofed/include/net/neighbour.h (nonexistent) @@ -1,27 +0,0 @@ -/*- - * Copyright (c) 2010 Isilon Systems, Inc. - * Copyright (c) 2010 iX Systems, Inc. - * Copyright (c) 2010 Panasas, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ Property changes on: stable/10/sys/ofed/include/net/neighbour.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/include/net/ip6_route.h =================================================================== --- stable/10/sys/ofed/include/net/ip6_route.h (revision 271126) +++ stable/10/sys/ofed/include/net/ip6_route.h (nonexistent) @@ -1,27 +0,0 @@ -/*- - * Copyright (c) 2010 Isilon Systems, Inc. - * Copyright (c) 2010 iX Systems, Inc. - * Copyright (c) 2010 Panasas, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ Property changes on: stable/10/sys/ofed/include/net/ip6_route.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/include/net/if_inet6.h =================================================================== --- stable/10/sys/ofed/include/net/if_inet6.h (nonexistent) +++ stable/10/sys/ofed/include/net/if_inet6.h (revision 271127) @@ -0,0 +1,47 @@ +/*- + * Copyright (c) 2010 Isilon Systems, Inc. + * Copyright (c) 2010 iX Systems, Inc. + * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _NET_IF_INET6_H_ +#define _NET_IF_INET6_H_ + +static inline void ipv6_eth_mc_map(const struct in6_addr *addr, char *buf) +{ +/* + * +-------+-------+-------+-------+-------+-------+ + * | 33 | 33 | DST13 | DST14 | DST15 | DST16 | + * +-------+-------+-------+-------+-------+-------+ + */ + + buf[0]= 0x33; + buf[1]= 0x33; + + memcpy(buf + 2, &addr->s6_addr32[3], sizeof(__u32)); +} + +#endif /* _NET_IF_INET6_H_ */ Property changes on: stable/10/sys/ofed/include/net/if_inet6.h ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +true \ No newline at end of property Index: stable/10/sys/ofed/include/net/ip.h =================================================================== --- stable/10/sys/ofed/include/net/ip.h (revision 271126) +++ stable/10/sys/ofed/include/net/ip.h (revision 271127) @@ -1,81 +1,82 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_NET_IP_H_ #define _LINUX_NET_IP_H_ #include "opt_inet.h" #include #include #include #include #include #include #include #ifdef INET static inline void inet_get_local_port_range(int *low, int *high) { *low = V_ipport_firstauto; *high = V_ipport_lastauto; } static inline void ip_ib_mc_map(uint32_t addr, const unsigned char *bcast, char *buf) { unsigned char scope; addr = ntohl(addr); scope = bcast[5] & 0xF; buf[0] = 0; buf[1] = 0xff; buf[2] = 0xff; buf[3] = 0xff; buf[4] = 0xff; buf[5] = 0x10 | scope; buf[6] = 0x40; buf[7] = 0x1b; buf[8] = bcast[8]; buf[9] = bcast[9]; buf[10] = 0; buf[11] = 0; buf[12] = 0; buf[13] = 0; buf[14] = 0; buf[15] = 0; buf[16] = (addr >> 24) & 0x0f; buf[17] = (addr >> 16) & 0xff; buf[18] = (addr >> 8) & 0xff; buf[19] = addr & 0xff; } #endif #endif /* _LINUX_NET_IP_H_ */ Index: stable/10/sys/ofed/include/net/ipv6.h =================================================================== --- stable/10/sys/ofed/include/net/ipv6.h (revision 271126) +++ stable/10/sys/ofed/include/net/ipv6.h (revision 271127) @@ -1,60 +1,110 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_NET_IPV6_H_ #define _LINUX_NET_IPV6_H_ #include "opt_inet6.h" #define ipv6_addr_loopback IN6_IS_ADDR_LOOPBACK #define ipv6_addr_copy(dst, src) \ memcpy((dst), (src), sizeof(struct in6_addr)) #ifdef INET6 static inline void ipv6_ib_mc_map(const struct in6_addr *addr, const unsigned char *broadcast, char *buf) { unsigned char scope; scope = broadcast[5] & 0xF; buf[0] = 0; buf[1] = 0xff; buf[2] = 0xff; buf[3] = 0xff; buf[4] = 0xff; buf[5] = 0x10 | scope; buf[6] = 0x60; buf[7] = 0x1b; buf[8] = broadcast[8]; buf[9] = broadcast[9]; memcpy(&buf[10], &addr->s6_addr[6], 10); } #endif + +static inline void __ipv6_addr_set_half(__be32 *addr, + __be32 wh, __be32 wl) +{ +#if BITS_PER_LONG == 64 +#if defined(__BIG_ENDIAN) + if (__builtin_constant_p(wh) && __builtin_constant_p(wl)) { + *(__force u64 *)addr = ((__force u64)(wh) << 32 | (__force u64)(wl)); + return; + } +#elif defined(__LITTLE_ENDIAN) + if (__builtin_constant_p(wl) && __builtin_constant_p(wh)) { + *(__force u64 *)addr = ((__force u64)(wl) << 32 | (__force u64)(wh)); + return; + } +#endif +#endif + addr[0] = wh; + addr[1] = wl; +} + +static inline void ipv6_addr_set(struct in6_addr *addr, + __be32 w1, __be32 w2, + __be32 w3, __be32 w4) +{ + __ipv6_addr_set_half(&addr->s6_addr32[0], w1, w2); + __ipv6_addr_set_half(&addr->s6_addr32[2], w3, w4); +} + +static inline void ipv6_addr_set_v4mapped(const __be32 addr, + struct in6_addr *v4mapped) +{ + ipv6_addr_set(v4mapped, + 0, 0, + htonl(0x0000FFFF), + addr); +} + +static inline int ipv6_addr_v4mapped(const struct in6_addr *a) +{ + return ((a->s6_addr32[0] | a->s6_addr32[1] | + (a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0); +} + +static inline int ipv6_addr_cmp(const struct in6_addr *a1, const struct in6_addr *a2) +{ + return memcmp(a1, a2, sizeof(struct in6_addr)); +} + #endif /* _LINUX_NET_IPV6_H_ */ Index: stable/10/sys/ofed/include/net/netevent.h =================================================================== --- stable/10/sys/ofed/include/net/netevent.h (revision 271126) +++ stable/10/sys/ofed/include/net/netevent.h (revision 271127) @@ -1,70 +1,71 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_NET_NETEVENT_H_ #define _LINUX_NET_NETEVENT_H_ #include enum netevent_notif_type { NETEVENT_NEIGH_UPDATE = 0, #if 0 /* Unsupported events. */ NETEVENT_PMTU_UPDATE, NETEVENT_REDIRECT, #endif }; struct llentry; static inline void _handle_arp_update_event(void *arg, struct llentry *lle, int evt __unused) { struct notifier_block *nb; nb = arg; nb->notifier_call(nb, NETEVENT_NEIGH_UPDATE, lle); } static inline int register_netevent_notifier(struct notifier_block *nb) { nb->tags[NETEVENT_NEIGH_UPDATE] = EVENTHANDLER_REGISTER( lle_event, _handle_arp_update_event, nb, 0); return (0); } static inline int unregister_netevent_notifier(struct notifier_block *nb) { EVENTHANDLER_DEREGISTER(lle_event, nb->tags[NETEVENT_NEIGH_UPDATE]); return (0); } #endif /* _LINUX_NET_NETEVENT_H_ */ Index: stable/10/sys/ofed/include/net/tcp.h =================================================================== --- stable/10/sys/ofed/include/net/tcp.h (revision 271126) +++ stable/10/sys/ofed/include/net/tcp.h (revision 271127) @@ -1,38 +1,39 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_NET_TCP_H_ #define _LINUX_NET_TCP_H_ #include #include #include #include #endif /* _LINUX_NET_TCP_H_ */ Index: stable/10/sys/ofed/include/rdma/ib_umem.h =================================================================== --- stable/10/sys/ofed/include/rdma/ib_umem.h (revision 271126) +++ stable/10/sys/ofed/include/rdma/ib_umem.h (revision 271127) @@ -1,101 +1,102 @@ /* * Copyright (c) 2007 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef IB_UMEM_H #define IB_UMEM_H #include #include #include #include struct ib_ucontext; +struct vm_area_struct; struct ib_umem { struct ib_ucontext *context; size_t length; int offset; int page_size; int writable; int hugetlb; struct list_head chunk_list; #ifdef __linux__ struct work_struct work; struct mm_struct *mm; #else unsigned long start; #endif unsigned long diff; }; struct ib_cmem { struct ib_ucontext *context; size_t length; /* Link list of contiguous blocks being part of that cmem */ struct list_head ib_cmem_block; /* Order of cmem block, 2^ block_order will equal number of physical pages per block */ unsigned long block_order; /* Refernce counter for that memory area - When value became 0 pages will be returned to the kernel. */ struct kref refcount; }; struct ib_umem_chunk { struct list_head list; int nents; int nmap; struct dma_attrs attrs; struct scatterlist page_list[0]; }; struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, size_t size, int access, int dmasync); void ib_umem_release(struct ib_umem *umem); int ib_umem_page_count(struct ib_umem *umem); int ib_cmem_map_contiguous_pages_to_vma(struct ib_cmem *ib_cmem, struct vm_area_struct *vma); struct ib_cmem *ib_cmem_alloc_contiguous_pages(struct ib_ucontext *context, unsigned long total_size, unsigned long page_size_order); void ib_cmem_release_contiguous_pages(struct ib_cmem *cmem); int ib_umem_map_to_vma(struct ib_umem *umem, struct vm_area_struct *vma); #endif /* IB_UMEM_H */ Index: stable/10/sys/ofed/include/rdma/ib_verbs.h =================================================================== --- stable/10/sys/ofed/include/rdma/ib_verbs.h (revision 271126) +++ stable/10/sys/ofed/include/rdma/ib_verbs.h (revision 271127) @@ -1,2341 +1,2340 @@ /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #if !defined(IB_VERBS_H) #define IB_VERBS_H #include #include #include #include #include #include #include #include #include -#include #include #include #include extern struct workqueue_struct *ib_wq; union ib_gid { u8 raw[16]; struct { __be64 subnet_prefix; __be64 interface_id; } global; }; enum rdma_node_type { /* IB values map to NodeInfo:NodeType. */ RDMA_NODE_IB_CA = 1, RDMA_NODE_IB_SWITCH, RDMA_NODE_IB_ROUTER, RDMA_NODE_RNIC }; enum rdma_transport_type { RDMA_TRANSPORT_IB, RDMA_TRANSPORT_IWARP }; enum rdma_transport_type rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__; enum rdma_link_layer { IB_LINK_LAYER_UNSPECIFIED, IB_LINK_LAYER_INFINIBAND, IB_LINK_LAYER_ETHERNET, }; enum ib_device_cap_flags { IB_DEVICE_RESIZE_MAX_WR = 1, IB_DEVICE_BAD_PKEY_CNTR = (1<<1), IB_DEVICE_BAD_QKEY_CNTR = (1<<2), IB_DEVICE_RAW_MULTI = (1<<3), IB_DEVICE_AUTO_PATH_MIG = (1<<4), IB_DEVICE_CHANGE_PHY_PORT = (1<<5), IB_DEVICE_UD_AV_PORT_ENFORCE = (1<<6), IB_DEVICE_CURR_QP_STATE_MOD = (1<<7), IB_DEVICE_SHUTDOWN_PORT = (1<<8), IB_DEVICE_INIT_TYPE = (1<<9), IB_DEVICE_PORT_ACTIVE_EVENT = (1<<10), IB_DEVICE_SYS_IMAGE_GUID = (1<<11), IB_DEVICE_RC_RNR_NAK_GEN = (1<<12), IB_DEVICE_SRQ_RESIZE = (1<<13), IB_DEVICE_N_NOTIFY_CQ = (1<<14), IB_DEVICE_LOCAL_DMA_LKEY = (1<<15), IB_DEVICE_RESERVED = (1<<16), /* old SEND_W_INV */ IB_DEVICE_MEM_WINDOW = (1<<17), /* * Devices should set IB_DEVICE_UD_IP_SUM if they support * insertion of UDP and TCP checksum on outgoing UD IPoIB * messages and can verify the validity of checksum for * incoming messages. Setting this flag implies that the * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode. */ IB_DEVICE_UD_IP_CSUM = (1<<18), IB_DEVICE_UD_TSO = (1<<19), IB_DEVICE_XRC = (1<<20), IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21), IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22), IB_DEVICE_MR_ALLOCATE = (1<<23), IB_DEVICE_SHARED_MR = (1<<24), IB_DEVICE_QPG = (1<<25), IB_DEVICE_UD_RSS = (1<<26), IB_DEVICE_UD_TSS = (1<<27) }; enum ib_atomic_cap { IB_ATOMIC_NONE, IB_ATOMIC_HCA, IB_ATOMIC_GLOB }; struct ib_device_attr { u64 fw_ver; __be64 sys_image_guid; u64 max_mr_size; u64 page_size_cap; u32 vendor_id; u32 vendor_part_id; u32 hw_ver; int max_qp; int max_qp_wr; int device_cap_flags; int max_sge; int max_sge_rd; int max_cq; int max_cqe; int max_mr; int max_pd; int max_qp_rd_atom; int max_ee_rd_atom; int max_res_rd_atom; int max_qp_init_rd_atom; int max_ee_init_rd_atom; enum ib_atomic_cap atomic_cap; enum ib_atomic_cap masked_atomic_cap; int max_ee; int max_rdd; int max_mw; int max_raw_ipv6_qp; int max_raw_ethy_qp; int max_mcast_grp; int max_mcast_qp_attach; int max_total_mcast_qp_attach; int max_ah; int max_fmr; int max_map_per_fmr; int max_srq; int max_srq_wr; int max_srq_sge; unsigned int max_fast_reg_page_list_len; int max_rss_tbl_sz; u16 max_pkeys; u8 local_ca_ack_delay; }; enum ib_mtu { IB_MTU_256 = 1, IB_MTU_512 = 2, IB_MTU_1024 = 3, IB_MTU_2048 = 4, IB_MTU_4096 = 5 }; static inline int ib_mtu_enum_to_int(enum ib_mtu mtu) { switch (mtu) { case IB_MTU_256: return 256; case IB_MTU_512: return 512; case IB_MTU_1024: return 1024; case IB_MTU_2048: return 2048; case IB_MTU_4096: return 4096; default: return -1; } } enum ib_port_state { IB_PORT_NOP = 0, IB_PORT_DOWN = 1, IB_PORT_INIT = 2, IB_PORT_ARMED = 3, IB_PORT_ACTIVE = 4, IB_PORT_ACTIVE_DEFER = 5 }; enum ib_port_cap_flags { IB_PORT_SM = 1 << 1, IB_PORT_NOTICE_SUP = 1 << 2, IB_PORT_TRAP_SUP = 1 << 3, IB_PORT_OPT_IPD_SUP = 1 << 4, IB_PORT_AUTO_MIGR_SUP = 1 << 5, IB_PORT_SL_MAP_SUP = 1 << 6, IB_PORT_MKEY_NVRAM = 1 << 7, IB_PORT_PKEY_NVRAM = 1 << 8, IB_PORT_LED_INFO_SUP = 1 << 9, IB_PORT_SM_DISABLED = 1 << 10, IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14, IB_PORT_CM_SUP = 1 << 16, IB_PORT_SNMP_TUNNEL_SUP = 1 << 17, IB_PORT_REINIT_SUP = 1 << 18, IB_PORT_DEVICE_MGMT_SUP = 1 << 19, IB_PORT_VENDOR_CLASS_SUP = 1 << 20, IB_PORT_DR_NOTICE_SUP = 1 << 21, IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, IB_PORT_BOOT_MGMT_SUP = 1 << 23, IB_PORT_LINK_LATENCY_SUP = 1 << 24, IB_PORT_CLIENT_REG_SUP = 1 << 25 }; enum ib_port_width { IB_WIDTH_1X = 1, IB_WIDTH_4X = 2, IB_WIDTH_8X = 4, IB_WIDTH_12X = 8 }; static inline int ib_width_enum_to_int(enum ib_port_width width) { switch (width) { case IB_WIDTH_1X: return 1; case IB_WIDTH_4X: return 4; case IB_WIDTH_8X: return 8; case IB_WIDTH_12X: return 12; default: return -1; } } enum ib_port_speed { IB_SPEED_SDR = 1, IB_SPEED_DDR = 2, IB_SPEED_QDR = 4, IB_SPEED_FDR10 = 8, IB_SPEED_FDR = 16, IB_SPEED_EDR = 32 }; struct ib_protocol_stats { /* TBD... */ }; struct iw_protocol_stats { u64 ipInReceives; u64 ipInHdrErrors; u64 ipInTooBigErrors; u64 ipInNoRoutes; u64 ipInAddrErrors; u64 ipInUnknownProtos; u64 ipInTruncatedPkts; u64 ipInDiscards; u64 ipInDelivers; u64 ipOutForwDatagrams; u64 ipOutRequests; u64 ipOutDiscards; u64 ipOutNoRoutes; u64 ipReasmTimeout; u64 ipReasmReqds; u64 ipReasmOKs; u64 ipReasmFails; u64 ipFragOKs; u64 ipFragFails; u64 ipFragCreates; u64 ipInMcastPkts; u64 ipOutMcastPkts; u64 ipInBcastPkts; u64 ipOutBcastPkts; u64 tcpRtoAlgorithm; u64 tcpRtoMin; u64 tcpRtoMax; u64 tcpMaxConn; u64 tcpActiveOpens; u64 tcpPassiveOpens; u64 tcpAttemptFails; u64 tcpEstabResets; u64 tcpCurrEstab; u64 tcpInSegs; u64 tcpOutSegs; u64 tcpRetransSegs; u64 tcpInErrs; u64 tcpOutRsts; }; union rdma_protocol_stats { struct ib_protocol_stats ib; struct iw_protocol_stats iw; }; struct ib_port_attr { enum ib_port_state state; enum ib_mtu max_mtu; enum ib_mtu active_mtu; int gid_tbl_len; u32 port_cap_flags; u32 max_msg_sz; u32 bad_pkey_cntr; u32 qkey_viol_cntr; u16 pkey_tbl_len; u16 lid; u16 sm_lid; u8 lmc; u8 max_vl_num; u8 sm_sl; u8 subnet_timeout; u8 init_type_reply; u8 active_width; u8 active_speed; u8 phys_state; enum rdma_link_layer link_layer; }; enum ib_device_modify_flags { IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1 << 0, IB_DEVICE_MODIFY_NODE_DESC = 1 << 1 }; struct ib_device_modify { u64 sys_image_guid; char node_desc[64]; }; enum ib_port_modify_flags { IB_PORT_SHUTDOWN = 1, IB_PORT_INIT_TYPE = (1<<2), IB_PORT_RESET_QKEY_CNTR = (1<<3) }; struct ib_port_modify { u32 set_port_cap_mask; u32 clr_port_cap_mask; u8 init_type; }; enum ib_event_type { IB_EVENT_CQ_ERR, IB_EVENT_QP_FATAL, IB_EVENT_QP_REQ_ERR, IB_EVENT_QP_ACCESS_ERR, IB_EVENT_COMM_EST, IB_EVENT_SQ_DRAINED, IB_EVENT_PATH_MIG, IB_EVENT_PATH_MIG_ERR, IB_EVENT_DEVICE_FATAL, IB_EVENT_PORT_ACTIVE, IB_EVENT_PORT_ERR, IB_EVENT_LID_CHANGE, IB_EVENT_PKEY_CHANGE, IB_EVENT_SM_CHANGE, IB_EVENT_SRQ_ERR, IB_EVENT_SRQ_LIMIT_REACHED, IB_EVENT_QP_LAST_WQE_REACHED, IB_EVENT_CLIENT_REREGISTER, IB_EVENT_GID_CHANGE, }; enum ib_event_flags { IB_XRC_QP_EVENT_FLAG = 0x80000000, }; struct ib_event { struct ib_device *device; union { struct ib_cq *cq; struct ib_qp *qp; struct ib_srq *srq; u8 port_num; u32 xrc_qp_num; } element; enum ib_event_type event; }; struct ib_event_handler { struct ib_device *device; void (*handler)(struct ib_event_handler *, struct ib_event *); struct list_head list; }; #define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler) \ do { \ (_ptr)->device = _device; \ (_ptr)->handler = _handler; \ INIT_LIST_HEAD(&(_ptr)->list); \ } while (0) struct ib_global_route { union ib_gid dgid; u32 flow_label; u8 sgid_index; u8 hop_limit; u8 traffic_class; }; struct ib_grh { __be32 version_tclass_flow; __be16 paylen; u8 next_hdr; u8 hop_limit; union ib_gid sgid; union ib_gid dgid; }; enum { IB_MULTICAST_QPN = 0xffffff }; #define IB_LID_PERMISSIVE cpu_to_be16(0xFFFF) enum ib_ah_flags { IB_AH_GRH = 1 }; enum ib_rate { IB_RATE_PORT_CURRENT = 0, IB_RATE_2_5_GBPS = 2, IB_RATE_5_GBPS = 5, IB_RATE_10_GBPS = 3, IB_RATE_20_GBPS = 6, IB_RATE_30_GBPS = 4, IB_RATE_40_GBPS = 7, IB_RATE_60_GBPS = 8, IB_RATE_80_GBPS = 9, IB_RATE_120_GBPS = 10, IB_RATE_14_GBPS = 11, IB_RATE_56_GBPS = 12, IB_RATE_112_GBPS = 13, IB_RATE_168_GBPS = 14, IB_RATE_25_GBPS = 15, IB_RATE_100_GBPS = 16, IB_RATE_200_GBPS = 17, IB_RATE_300_GBPS = 18 }; /** * ib_rate_to_mult - Convert the IB rate enum to a multiple of the * base rate of 2.5 Gbit/sec. For example, IB_RATE_5_GBPS will be * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec. * @rate: rate to convert. */ int ib_rate_to_mult(enum ib_rate rate) __attribute_const__; /** * ib_rate_to_mbps - Convert the IB rate enum to Mbps. * For example, IB_RATE_2_5_GBPS will be converted to 2500. * @rate: rate to convert. */ int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__; /** * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate * enum. * @mult: multiple to convert. */ enum ib_rate mult_to_ib_rate(int mult) __attribute_const__; struct ib_ah_attr { struct ib_global_route grh; u16 dlid; u8 sl; u8 src_path_bits; u8 static_rate; u8 ah_flags; u8 port_num; }; enum ib_wc_status { IB_WC_SUCCESS, IB_WC_LOC_LEN_ERR, IB_WC_LOC_QP_OP_ERR, IB_WC_LOC_EEC_OP_ERR, IB_WC_LOC_PROT_ERR, IB_WC_WR_FLUSH_ERR, IB_WC_MW_BIND_ERR, IB_WC_BAD_RESP_ERR, IB_WC_LOC_ACCESS_ERR, IB_WC_REM_INV_REQ_ERR, IB_WC_REM_ACCESS_ERR, IB_WC_REM_OP_ERR, IB_WC_RETRY_EXC_ERR, IB_WC_RNR_RETRY_EXC_ERR, IB_WC_LOC_RDD_VIOL_ERR, IB_WC_REM_INV_RD_REQ_ERR, IB_WC_REM_ABORT_ERR, IB_WC_INV_EECN_ERR, IB_WC_INV_EEC_STATE_ERR, IB_WC_FATAL_ERR, IB_WC_RESP_TIMEOUT_ERR, IB_WC_GENERAL_ERR }; enum ib_wc_opcode { IB_WC_SEND, IB_WC_RDMA_WRITE, IB_WC_RDMA_READ, IB_WC_COMP_SWAP, IB_WC_FETCH_ADD, IB_WC_BIND_MW, IB_WC_LSO, IB_WC_LOCAL_INV, IB_WC_FAST_REG_MR, IB_WC_MASKED_COMP_SWAP, IB_WC_MASKED_FETCH_ADD, /* * Set value of IB_WC_RECV so consumers can test if a completion is a * receive by testing (opcode & IB_WC_RECV). */ IB_WC_RECV = 1 << 7, IB_WC_RECV_RDMA_WITH_IMM }; enum ib_wc_flags { IB_WC_GRH = 1, IB_WC_WITH_IMM = (1<<1), IB_WC_WITH_INVALIDATE = (1<<2), IB_WC_IP_CSUM_OK = (1<<3), }; struct ib_wc { u64 wr_id; enum ib_wc_status status; enum ib_wc_opcode opcode; u32 vendor_err; u32 byte_len; struct ib_qp *qp; union { __be32 imm_data; u32 invalidate_rkey; } ex; u32 src_qp; int wc_flags; u16 pkey_index; u16 slid; u8 sl; u8 dlid_path_bits; u8 port_num; /* valid only for DR SMPs on switches */ int csum_ok; }; enum ib_cq_notify_flags { IB_CQ_SOLICITED = 1 << 0, IB_CQ_NEXT_COMP = 1 << 1, IB_CQ_SOLICITED_MASK = IB_CQ_SOLICITED | IB_CQ_NEXT_COMP, IB_CQ_REPORT_MISSED_EVENTS = 1 << 2, }; enum ib_srq_type { IB_SRQT_BASIC, IB_SRQT_XRC }; enum ib_srq_attr_mask { IB_SRQ_MAX_WR = 1 << 0, IB_SRQ_LIMIT = 1 << 1, }; struct ib_srq_attr { u32 max_wr; u32 max_sge; u32 srq_limit; }; struct ib_srq_init_attr { void (*event_handler)(struct ib_event *, void *); void *srq_context; struct ib_srq_attr attr; enum ib_srq_type srq_type; union { struct { struct ib_xrcd *xrcd; struct ib_cq *cq; } xrc; } ext; }; struct ib_qp_cap { u32 max_send_wr; u32 max_recv_wr; u32 max_send_sge; u32 max_recv_sge; u32 max_inline_data; u32 qpg_tss_mask_sz; }; enum ib_sig_type { IB_SIGNAL_ALL_WR, IB_SIGNAL_REQ_WR }; enum ib_qp_type { /* * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries * here (and in that order) since the MAD layer uses them as * indices into a 2-entry table. */ IB_QPT_SMI, IB_QPT_GSI, IB_QPT_RC, IB_QPT_UC, IB_QPT_UD, IB_QPT_XRC, IB_QPT_RAW_IPV6, IB_QPT_RAW_ETHERTYPE, IB_QPT_RAW_PACKET = 8, IB_QPT_XRC_INI = 9, IB_QPT_XRC_TGT, IB_QPT_MAX, }; enum ib_qp_create_flags { IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, IB_QP_CREATE_NETIF_QP = 1 << 2, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, IB_QP_CREATE_RESERVED_END = 1 << 31, }; enum ib_qpg_type { IB_QPG_NONE = 0, IB_QPG_PARENT = (1<<0), IB_QPG_CHILD_RX = (1<<1), IB_QPG_CHILD_TX = (1<<2) }; struct ib_qpg_init_attrib { u32 tss_child_count; u32 rss_child_count; }; struct ib_qp_init_attr { void (*event_handler)(struct ib_event *, void *); void *qp_context; struct ib_cq *send_cq; struct ib_cq *recv_cq; struct ib_srq *srq; struct ib_xrcd *xrcd; /* XRC TGT QPs only */ struct ib_qp_cap cap; union { struct ib_qp *qpg_parent; /* see qpg_type */ struct ib_qpg_init_attrib parent_attrib; } pp; enum ib_sig_type sq_sig_type; enum ib_qp_type qp_type; enum ib_qp_create_flags create_flags; enum ib_qpg_type qpg_type; u8 port_num; /* special QP types only */ }; struct ib_qp_open_attr { void (*event_handler)(struct ib_event *, void *); void *qp_context; u32 qp_num; enum ib_qp_type qp_type; }; enum ib_rnr_timeout { IB_RNR_TIMER_655_36 = 0, IB_RNR_TIMER_000_01 = 1, IB_RNR_TIMER_000_02 = 2, IB_RNR_TIMER_000_03 = 3, IB_RNR_TIMER_000_04 = 4, IB_RNR_TIMER_000_06 = 5, IB_RNR_TIMER_000_08 = 6, IB_RNR_TIMER_000_12 = 7, IB_RNR_TIMER_000_16 = 8, IB_RNR_TIMER_000_24 = 9, IB_RNR_TIMER_000_32 = 10, IB_RNR_TIMER_000_48 = 11, IB_RNR_TIMER_000_64 = 12, IB_RNR_TIMER_000_96 = 13, IB_RNR_TIMER_001_28 = 14, IB_RNR_TIMER_001_92 = 15, IB_RNR_TIMER_002_56 = 16, IB_RNR_TIMER_003_84 = 17, IB_RNR_TIMER_005_12 = 18, IB_RNR_TIMER_007_68 = 19, IB_RNR_TIMER_010_24 = 20, IB_RNR_TIMER_015_36 = 21, IB_RNR_TIMER_020_48 = 22, IB_RNR_TIMER_030_72 = 23, IB_RNR_TIMER_040_96 = 24, IB_RNR_TIMER_061_44 = 25, IB_RNR_TIMER_081_92 = 26, IB_RNR_TIMER_122_88 = 27, IB_RNR_TIMER_163_84 = 28, IB_RNR_TIMER_245_76 = 29, IB_RNR_TIMER_327_68 = 30, IB_RNR_TIMER_491_52 = 31 }; enum ib_qp_attr_mask { IB_QP_STATE = 1, IB_QP_CUR_STATE = (1<<1), IB_QP_EN_SQD_ASYNC_NOTIFY = (1<<2), IB_QP_ACCESS_FLAGS = (1<<3), IB_QP_PKEY_INDEX = (1<<4), IB_QP_PORT = (1<<5), IB_QP_QKEY = (1<<6), IB_QP_AV = (1<<7), IB_QP_PATH_MTU = (1<<8), IB_QP_TIMEOUT = (1<<9), IB_QP_RETRY_CNT = (1<<10), IB_QP_RNR_RETRY = (1<<11), IB_QP_RQ_PSN = (1<<12), IB_QP_MAX_QP_RD_ATOMIC = (1<<13), IB_QP_ALT_PATH = (1<<14), IB_QP_MIN_RNR_TIMER = (1<<15), IB_QP_SQ_PSN = (1<<16), IB_QP_MAX_DEST_RD_ATOMIC = (1<<17), IB_QP_PATH_MIG_STATE = (1<<18), IB_QP_CAP = (1<<19), IB_QP_DEST_QPN = (1<<20), IB_QP_GROUP_RSS = (1<<21) }; enum ib_qp_state { IB_QPS_RESET, IB_QPS_INIT, IB_QPS_RTR, IB_QPS_RTS, IB_QPS_SQD, IB_QPS_SQE, IB_QPS_ERR }; enum ib_mig_state { IB_MIG_MIGRATED, IB_MIG_REARM, IB_MIG_ARMED }; struct ib_qp_attr { enum ib_qp_state qp_state; enum ib_qp_state cur_qp_state; enum ib_mtu path_mtu; enum ib_mig_state path_mig_state; u32 qkey; u32 rq_psn; u32 sq_psn; u32 dest_qp_num; int qp_access_flags; struct ib_qp_cap cap; struct ib_ah_attr ah_attr; struct ib_ah_attr alt_ah_attr; u16 pkey_index; u16 alt_pkey_index; u8 en_sqd_async_notify; u8 sq_draining; u8 max_rd_atomic; u8 max_dest_rd_atomic; u8 min_rnr_timer; u8 port_num; u8 timeout; u8 retry_cnt; u8 rnr_retry; u8 alt_port_num; u8 alt_timeout; }; enum ib_wr_opcode { IB_WR_RDMA_WRITE, IB_WR_RDMA_WRITE_WITH_IMM, IB_WR_SEND, IB_WR_SEND_WITH_IMM, IB_WR_RDMA_READ, IB_WR_ATOMIC_CMP_AND_SWP, IB_WR_ATOMIC_FETCH_AND_ADD, IB_WR_LSO, IB_WR_BIG_LSO, IB_WR_SEND_WITH_INV, IB_WR_RDMA_READ_WITH_INV, IB_WR_LOCAL_INV, IB_WR_FAST_REG_MR, IB_WR_MASKED_ATOMIC_CMP_AND_SWP, IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, }; enum ib_send_flags { IB_SEND_FENCE = 1, IB_SEND_SIGNALED = (1<<1), IB_SEND_SOLICITED = (1<<2), IB_SEND_INLINE = (1<<3), IB_SEND_IP_CSUM = (1<<4) }; enum ib_flow_types { IB_FLOW_ETH = 0, IB_FLOW_IB_UC = 1, IB_FLOW_IB_MC_IPV4 = 2, IB_FLOW_IB_MC_IPV6 = 3 }; enum { IB_FLOW_L4_NONE = 0, IB_FLOW_L4_OTHER = 3, IB_FLOW_L4_UDP = 5, IB_FLOW_L4_TCP = 6 }; struct ib_sge { u64 addr; u32 length; u32 lkey; }; struct ib_fast_reg_page_list { struct ib_device *device; u64 *page_list; unsigned int max_page_list_len; }; struct ib_send_wr { struct ib_send_wr *next; u64 wr_id; struct ib_sge *sg_list; int num_sge; enum ib_wr_opcode opcode; int send_flags; union { __be32 imm_data; u32 invalidate_rkey; } ex; union { struct { u64 remote_addr; u32 rkey; } rdma; struct { u64 remote_addr; u64 compare_add; u64 swap; u64 compare_add_mask; u64 swap_mask; u32 rkey; } atomic; struct { struct ib_ah *ah; void *header; int hlen; int mss; u32 remote_qpn; u32 remote_qkey; u16 pkey_index; /* valid for GSI only */ u8 port_num; /* valid for DR SMPs on switch only */ } ud; struct { u64 iova_start; struct ib_fast_reg_page_list *page_list; unsigned int page_shift; unsigned int page_list_len; u32 length; int access_flags; u32 rkey; } fast_reg; struct { struct ib_unpacked_lrh *lrh; u32 eth_type; u8 static_rate; } raw_ety; } wr; u32 xrc_remote_srq_num; /* XRC TGT QPs only */ }; struct ib_recv_wr { struct ib_recv_wr *next; u64 wr_id; struct ib_sge *sg_list; int num_sge; }; enum ib_access_flags { IB_ACCESS_LOCAL_WRITE = 1, IB_ACCESS_REMOTE_WRITE = (1<<1), IB_ACCESS_REMOTE_READ = (1<<2), IB_ACCESS_REMOTE_ATOMIC = (1<<3), IB_ACCESS_MW_BIND = (1<<4), IB_ACCESS_ALLOCATE_MR = (1<<5), IB_ACCESS_SHARED_MR_USER_READ = (1<<6), IB_ACCESS_SHARED_MR_USER_WRITE = (1<<7), IB_ACCESS_SHARED_MR_GROUP_READ = (1<<8), IB_ACCESS_SHARED_MR_GROUP_WRITE = (1<<9), IB_ACCESS_SHARED_MR_OTHER_READ = (1<<10), IB_ACCESS_SHARED_MR_OTHER_WRITE = (1<<11) }; struct ib_phys_buf { u64 addr; u64 size; }; struct ib_mr_attr { struct ib_pd *pd; u64 device_virt_addr; u64 size; int mr_access_flags; u32 lkey; u32 rkey; }; enum ib_mr_rereg_flags { IB_MR_REREG_TRANS = 1, IB_MR_REREG_PD = (1<<1), IB_MR_REREG_ACCESS = (1<<2) }; struct ib_mw_bind { struct ib_mr *mr; u64 wr_id; u64 addr; u32 length; int send_flags; int mw_access_flags; }; struct ib_fmr_attr { int max_pages; int max_maps; u8 page_shift; }; struct ib_ucontext { struct ib_device *device; struct list_head pd_list; struct list_head mr_list; struct list_head mw_list; struct list_head cq_list; struct list_head qp_list; struct list_head srq_list; struct list_head ah_list; struct list_head xrcd_list; int closing; }; struct ib_uobject { u64 user_handle; /* handle given to us by userspace */ struct ib_ucontext *context; /* associated user context */ void *object; /* containing object */ struct list_head list; /* link to context's list */ int id; /* index into kernel idr */ struct kref ref; struct rw_semaphore mutex; /* protects .live */ int live; }; struct ib_udata { void __user *inbuf; void __user *outbuf; size_t inlen; size_t outlen; }; struct ib_uxrc_rcv_object { struct list_head list; /* link to context's list */ u32 qp_num; u32 domain_handle; }; struct ib_pd { struct ib_device *device; struct ib_uobject *uobject; atomic_t usecnt; /* count all resources */ }; struct ib_xrcd { struct ib_device *device; struct ib_uobject *uobject; atomic_t usecnt; /* count all exposed resources */ struct inode *inode; struct rb_node node; struct mutex tgt_qp_mutex; struct list_head tgt_qp_list; }; struct ib_ah { struct ib_device *device; struct ib_pd *pd; struct ib_uobject *uobject; }; typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); struct ib_cq { struct ib_device *device; struct ib_uobject *uobject; ib_comp_handler comp_handler; void (*event_handler)(struct ib_event *, void *); void *cq_context; int cqe; atomic_t usecnt; /* count number of work queues */ }; struct ib_srq { struct ib_device *device; struct ib_pd *pd; struct ib_uobject *uobject; void (*event_handler)(struct ib_event *, void *); void *srq_context; enum ib_srq_type srq_type; atomic_t usecnt; union { struct { struct ib_xrcd *xrcd; struct ib_cq *cq; u32 srq_num; } xrc; } ext; }; struct ib_qp { struct ib_device *device; struct ib_pd *pd; struct ib_cq *send_cq; struct ib_cq *recv_cq; struct ib_srq *srq; struct ib_xrcd *xrcd; /* XRC TGT QPs only */ struct list_head xrcd_list; atomic_t usecnt; /* count times opened, mcast attaches */ struct list_head open_list; struct ib_qp *real_qp; struct ib_uobject *uobject; void (*event_handler)(struct ib_event *, void *); void *qp_context; u32 qp_num; enum ib_qp_type qp_type; enum ib_qpg_type qpg_type; }; struct ib_mr { struct ib_device *device; struct ib_pd *pd; struct ib_uobject *uobject; u32 lkey; u32 rkey; atomic_t usecnt; /* count number of MWs */ }; struct ib_mw { struct ib_device *device; struct ib_pd *pd; struct ib_uobject *uobject; u32 rkey; }; struct ib_fmr { struct ib_device *device; struct ib_pd *pd; struct list_head list; u32 lkey; u32 rkey; }; struct ib_flow_spec { enum ib_flow_types type; union { struct { __be16 ethertype; __be16 vlan; u8 vlan_present; u8 mac[6]; u8 port; } eth; struct { __be32 qpn; } ib_uc; struct { u8 mgid[16]; } ib_mc; } l2_id; __be32 src_ip; __be32 dst_ip; __be16 src_port; __be16 dst_port; u8 l4_protocol; u8 block_mc_loopback; u8 rule_type; }; struct ib_mad; struct ib_grh; enum ib_process_mad_flags { IB_MAD_IGNORE_MKEY = 1, IB_MAD_IGNORE_BKEY = 2, IB_MAD_IGNORE_ALL = IB_MAD_IGNORE_MKEY | IB_MAD_IGNORE_BKEY }; enum ib_mad_result { IB_MAD_RESULT_FAILURE = 0, /* (!SUCCESS is the important flag) */ IB_MAD_RESULT_SUCCESS = 1 << 0, /* MAD was successfully processed */ IB_MAD_RESULT_REPLY = 1 << 1, /* Reply packet needs to be sent */ IB_MAD_RESULT_CONSUMED = 1 << 2 /* Packet consumed: stop processing */ }; #define IB_DEVICE_NAME_MAX 64 struct ib_cache { rwlock_t lock; struct ib_event_handler event_handler; struct ib_pkey_cache **pkey_cache; struct ib_gid_cache **gid_cache; u8 *lmc_cache; }; struct ib_dma_mapping_ops { int (*mapping_error)(struct ib_device *dev, u64 dma_addr); u64 (*map_single)(struct ib_device *dev, void *ptr, size_t size, enum dma_data_direction direction); void (*unmap_single)(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction); u64 (*map_page)(struct ib_device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction); void (*unmap_page)(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction); int (*map_sg)(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction); void (*unmap_sg)(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction); u64 (*dma_address)(struct ib_device *dev, struct scatterlist *sg); unsigned int (*dma_len)(struct ib_device *dev, struct scatterlist *sg); void (*sync_single_for_cpu)(struct ib_device *dev, u64 dma_handle, size_t size, enum dma_data_direction dir); void (*sync_single_for_device)(struct ib_device *dev, u64 dma_handle, size_t size, enum dma_data_direction dir); void *(*alloc_coherent)(struct ib_device *dev, size_t size, u64 *dma_handle, gfp_t flag); void (*free_coherent)(struct ib_device *dev, size_t size, void *cpu_addr, u64 dma_handle); }; struct iw_cm_verbs; struct ib_device { struct device *dma_device; char name[IB_DEVICE_NAME_MAX]; struct list_head event_handler_list; spinlock_t event_handler_lock; spinlock_t client_data_lock; struct list_head core_list; struct list_head client_data_list; struct ib_cache cache; int *pkey_tbl_len; int *gid_tbl_len; int num_comp_vectors; struct iw_cm_verbs *iwcm; int (*get_protocol_stats)(struct ib_device *device, union rdma_protocol_stats *stats); int (*query_device)(struct ib_device *device, struct ib_device_attr *device_attr); int (*query_port)(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr); enum rdma_link_layer (*get_link_layer)(struct ib_device *device, u8 port_num); int (*query_gid)(struct ib_device *device, u8 port_num, int index, union ib_gid *gid); int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index, u16 *pkey); int (*modify_device)(struct ib_device *device, int device_modify_mask, struct ib_device_modify *device_modify); int (*modify_port)(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify); struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device, struct ib_udata *udata); int (*dealloc_ucontext)(struct ib_ucontext *context); int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); struct ib_pd * (*alloc_pd)(struct ib_device *device, struct ib_ucontext *context, struct ib_udata *udata); int (*dealloc_pd)(struct ib_pd *pd); struct ib_ah * (*create_ah)(struct ib_pd *pd, struct ib_ah_attr *ah_attr); int (*modify_ah)(struct ib_ah *ah, struct ib_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct ib_ah_attr *ah_attr); int (*destroy_ah)(struct ib_ah *ah); struct ib_srq * (*create_srq)(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int (*destroy_srq)(struct ib_srq *srq); int (*post_srq_recv)(struct ib_srq *srq, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr); struct ib_qp * (*create_qp)(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr, struct ib_udata *udata); int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_udata *udata); int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int (*destroy_qp)(struct ib_qp *qp); int (*post_send)(struct ib_qp *qp, struct ib_send_wr *send_wr, struct ib_send_wr **bad_send_wr); int (*post_recv)(struct ib_qp *qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr); struct ib_cq * (*create_cq)(struct ib_device *device, int cqe, int comp_vector, struct ib_ucontext *context, struct ib_udata *udata); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*destroy_cq)(struct ib_cq *cq); int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc); int (*peek_cq)(struct ib_cq *cq, int wc_cnt); int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags); int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt); struct ib_mr * (*get_dma_mr)(struct ib_pd *pd, int mr_access_flags); struct ib_mr * (*reg_phys_mr)(struct ib_pd *pd, struct ib_phys_buf *phys_buf_array, int num_phys_buf, int mr_access_flags, u64 *iova_start); struct ib_mr * (*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata, int mr_id); int (*query_mr)(struct ib_mr *mr, struct ib_mr_attr *mr_attr); int (*dereg_mr)(struct ib_mr *mr); struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd, int max_page_list_len); struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device, int page_list_len); void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list); int (*rereg_phys_mr)(struct ib_mr *mr, int mr_rereg_mask, struct ib_pd *pd, struct ib_phys_buf *phys_buf_array, int num_phys_buf, int mr_access_flags, u64 *iova_start); struct ib_mw * (*alloc_mw)(struct ib_pd *pd); int (*bind_mw)(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind); int (*dealloc_mw)(struct ib_mw *mw); struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr); int (*map_phys_fmr)(struct ib_fmr *fmr, u64 *page_list, int list_len, u64 iova); int (*unmap_fmr)(struct list_head *fmr_list); int (*dealloc_fmr)(struct ib_fmr *fmr); int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); int (*process_mad)(struct ib_device *device, int process_mad_flags, u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad); struct ib_srq * (*create_xrc_srq)(struct ib_pd *pd, struct ib_cq *xrc_cq, struct ib_xrcd *xrcd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device, struct ib_ucontext *ucontext, struct ib_udata *udata); int (*dealloc_xrcd)(struct ib_xrcd *xrcd); int (*create_xrc_rcv_qp)(struct ib_qp_init_attr *init_attr, u32 *qp_num); int (*modify_xrc_rcv_qp)(struct ib_xrcd *xrcd, u32 qp_num, struct ib_qp_attr *attr, int attr_mask); int (*query_xrc_rcv_qp)(struct ib_xrcd *xrcd, u32 qp_num, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr); int (*reg_xrc_rcv_qp)(struct ib_xrcd *xrcd, void *context, u32 qp_num); int (*unreg_xrc_rcv_qp)(struct ib_xrcd *xrcd, void *context, u32 qp_num); int (*attach_flow)(struct ib_qp *qp, struct ib_flow_spec *spec, int priority); int (*detach_flow)(struct ib_qp *qp, struct ib_flow_spec *spec, int priority); unsigned long (*get_unmapped_area)(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); struct ib_dma_mapping_ops *dma_ops; struct module *owner; struct device dev; struct kobject *ports_parent; struct list_head port_list; enum { IB_DEV_UNINITIALIZED, IB_DEV_REGISTERED, IB_DEV_UNREGISTERED } reg_state; int uverbs_abi_ver; u64 uverbs_cmd_mask; char node_desc[64]; __be64 node_guid; u32 local_dma_lkey; u8 node_type; u8 phys_port_cnt; struct rb_root ib_uverbs_xrcd_table; struct mutex xrcd_table_mutex; }; struct ib_client { char *name; void (*add) (struct ib_device *); void (*remove)(struct ib_device *); struct list_head list; }; struct ib_device *ib_alloc_device(size_t size); void ib_dealloc_device(struct ib_device *device); int ib_register_device(struct ib_device *device, int (*port_callback)(struct ib_device *, u8, struct kobject *)); void ib_unregister_device(struct ib_device *device); int ib_register_client (struct ib_client *client); void ib_unregister_client(struct ib_client *client); void *ib_get_client_data(struct ib_device *device, struct ib_client *client); void ib_set_client_data(struct ib_device *device, struct ib_client *client, void *data); static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) { return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0; } static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) { return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; } /** * ib_modify_qp_is_ok - Check that the supplied attribute mask * contains all required attributes and no attributes not allowed for * the given QP state transition. * @cur_state: Current QP state * @next_state: Next QP state * @type: QP type * @mask: Mask of supplied QP attributes * * This function is a helper function that a low-level driver's * modify_qp method can use to validate the consumer's input. It * checks that cur_state and next_state are valid QP states, that a * transition from cur_state to next_state is allowed by the IB spec, * and that the attribute mask supplied is allowed for the transition. */ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, enum ib_qp_type type, enum ib_qp_attr_mask mask); int ib_register_event_handler (struct ib_event_handler *event_handler); int ib_unregister_event_handler(struct ib_event_handler *event_handler); void ib_dispatch_event(struct ib_event *event); int ib_query_device(struct ib_device *device, struct ib_device_attr *device_attr); int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr); enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num); int ib_query_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid); int ib_query_pkey(struct ib_device *device, u8 port_num, u16 index, u16 *pkey); int ib_modify_device(struct ib_device *device, int device_modify_mask, struct ib_device_modify *device_modify); int ib_modify_port(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify); int ib_find_gid(struct ib_device *device, union ib_gid *gid, u8 *port_num, u16 *index); int ib_find_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index); /** * ib_alloc_pd - Allocates an unused protection domain. * @device: The device on which to allocate the protection domain. * * A protection domain object provides an association between QPs, shared * receive queues, address handles, memory regions, and memory windows. */ struct ib_pd *ib_alloc_pd(struct ib_device *device); /** * ib_dealloc_pd - Deallocates a protection domain. * @pd: The protection domain to deallocate. */ int ib_dealloc_pd(struct ib_pd *pd); /** * ib_create_ah - Creates an address handle for the given address vector. * @pd: The protection domain associated with the address handle. * @ah_attr: The attributes of the address vector. * * The address handle is used to reference a local or global destination * in all UD QP post sends. */ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); /** * ib_init_ah_from_wc - Initializes address handle attributes from a * work completion. * @device: Device on which the received message arrived. * @port_num: Port on which the received message arrived. * @wc: Work completion associated with the received message. * @grh: References the received global route header. This parameter is * ignored unless the work completion indicates that the GRH is valid. * @ah_attr: Returned attributes that can be used when creating an address * handle for replying to the message. */ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc, struct ib_grh *grh, struct ib_ah_attr *ah_attr); /** * ib_create_ah_from_wc - Creates an address handle associated with the * sender of the specified work completion. * @pd: The protection domain associated with the address handle. * @wc: Work completion information associated with a received message. * @grh: References the received global route header. This parameter is * ignored unless the work completion indicates that the GRH is valid. * @port_num: The outbound port number to associate with the address. * * The address handle is used to reference a local or global destination * in all UD QP post sends. */ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, struct ib_grh *grh, u8 port_num); /** * ib_modify_ah - Modifies the address vector associated with an address * handle. * @ah: The address handle to modify. * @ah_attr: The new address vector attributes to associate with the * address handle. */ int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); /** * ib_query_ah - Queries the address vector associated with an address * handle. * @ah: The address handle to query. * @ah_attr: The address vector attributes associated with the address * handle. */ int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); /** * ib_destroy_ah - Destroys an address handle. * @ah: The address handle to destroy. */ int ib_destroy_ah(struct ib_ah *ah); /** * ib_create_xrc_srq - Creates an XRC SRQ associated with the specified * protection domain, cq, and xrc domain. * @pd: The protection domain associated with the SRQ. * @xrc_cq: The cq to be associated with the XRC SRQ. * @xrcd: The XRC domain to be associated with the XRC SRQ. * @srq_init_attr: A list of initial attributes required to create the * XRC SRQ. If XRC SRQ creation succeeds, then the attributes are updated * to the actual capabilities of the created XRC SRQ. * * srq_attr->max_wr and srq_attr->max_sge are read the determine the * requested size of the XRC SRQ, and set to the actual values allocated * on return. If ib_create_xrc_srq() succeeds, then max_wr and max_sge * will always be at least as large as the requested values. */ struct ib_srq *ib_create_xrc_srq(struct ib_pd *pd, struct ib_cq *xrc_cq, struct ib_xrcd *xrcd, struct ib_srq_init_attr *srq_init_attr); /** * ib_create_srq - Creates a SRQ associated with the specified protection * domain. * @pd: The protection domain associated with the SRQ. * @srq_init_attr: A list of initial attributes required to create the * SRQ. If SRQ creation succeeds, then the attributes are updated to * the actual capabilities of the created SRQ. * * srq_attr->max_wr and srq_attr->max_sge are read the determine the * requested size of the SRQ, and set to the actual values allocated * on return. If ib_create_srq() succeeds, then max_wr and max_sge * will always be at least as large as the requested values. */ struct ib_srq *ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr); /** * ib_modify_srq - Modifies the attributes for the specified SRQ. * @srq: The SRQ to modify. * @srq_attr: On input, specifies the SRQ attributes to modify. On output, * the current values of selected SRQ attributes are returned. * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ * are being modified. * * The mask may contain IB_SRQ_MAX_WR to resize the SRQ and/or * IB_SRQ_LIMIT to set the SRQ's limit and request notification when * the number of receives queued drops below the limit. */ int ib_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask); /** * ib_query_srq - Returns the attribute list and current values for the * specified SRQ. * @srq: The SRQ to query. * @srq_attr: The attributes of the specified SRQ. */ int ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); /** * ib_destroy_srq - Destroys the specified SRQ. * @srq: The SRQ to destroy. */ int ib_destroy_srq(struct ib_srq *srq); /** * ib_post_srq_recv - Posts a list of work requests to the specified SRQ. * @srq: The SRQ to post the work request on. * @recv_wr: A list of work requests to post on the receive queue. * @bad_recv_wr: On an immediate failure, this parameter will reference * the work request that failed to be posted on the QP. */ static inline int ib_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr) { return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr); } /** * ib_create_qp - Creates a QP associated with the specified protection * domain. * @pd: The protection domain associated with the QP. * @qp_init_attr: A list of initial attributes required to create the * QP. If QP creation succeeds, then the attributes are updated to * the actual capabilities of the created QP. */ struct ib_qp *ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr); /** * ib_modify_qp - Modifies the attributes for the specified QP and then * transitions the QP to the given state. * @qp: The QP to modify. * @qp_attr: On input, specifies the QP attributes to modify. On output, * the current values of selected QP attributes are returned. * @qp_attr_mask: A bit-mask used to specify which attributes of the QP * are being modified. */ int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask); /** * ib_query_qp - Returns the attribute list and current values for the * specified QP. * @qp: The QP to query. * @qp_attr: The attributes of the specified QP. * @qp_attr_mask: A bit-mask used to select specific attributes to query. * @qp_init_attr: Additional attributes of the selected QP. * * The qp_attr_mask may be used to limit the query to gathering only the * selected attributes. */ int ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); /** * ib_destroy_qp - Destroys the specified QP. * @qp: The QP to destroy. */ int ib_destroy_qp(struct ib_qp *qp); /** * ib_open_qp - Obtain a reference to an existing sharable QP. * @xrcd - XRC domain * @qp_open_attr: Attributes identifying the QP to open. * * Returns a reference to a sharable QP. */ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, struct ib_qp_open_attr *qp_open_attr); /** * ib_close_qp - Release an external reference to a QP. * @qp: The QP handle to release * * The opened QP handle is released by the caller. The underlying * shared QP is not destroyed until all internal references are released. */ int ib_close_qp(struct ib_qp *qp); /** * ib_post_send - Posts a list of work requests to the send queue of * the specified QP. * @qp: The QP to post the work request on. * @send_wr: A list of work requests to post on the send queue. * @bad_send_wr: On an immediate failure, this parameter will reference * the work request that failed to be posted on the QP. * * While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate * error is returned, the QP state shall not be affected, * ib_post_send() will return an immediate error after queueing any * earlier work requests in the list. */ static inline int ib_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, struct ib_send_wr **bad_send_wr) { return qp->device->post_send(qp, send_wr, bad_send_wr); } /** * ib_post_recv - Posts a list of work requests to the receive queue of * the specified QP. * @qp: The QP to post the work request on. * @recv_wr: A list of work requests to post on the receive queue. * @bad_recv_wr: On an immediate failure, this parameter will reference * the work request that failed to be posted on the QP. */ static inline int ib_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr) { return qp->device->post_recv(qp, recv_wr, bad_recv_wr); } /* * IB_CQ_VECTOR_LEAST_ATTACHED: The constant specifies that * the CQ will be attached to the completion vector that has * the least number of CQs already attached to it. */ #define IB_CQ_VECTOR_LEAST_ATTACHED 0xffffffff /** * ib_create_cq - Creates a CQ on the specified device. * @device: The device on which to create the CQ. * @comp_handler: A user-specified callback that is invoked when a * completion event occurs on the CQ. * @event_handler: A user-specified callback that is invoked when an * asynchronous event not associated with a completion occurs on the CQ. * @cq_context: Context associated with the CQ returned to the user via * the associated completion and event handlers. * @cqe: The minimum size of the CQ. * @comp_vector - Completion vector used to signal completion events. * Must be >= 0 and < context->num_comp_vectors. * * Users can examine the cq structure to determine the actual CQ size. */ struct ib_cq *ib_create_cq(struct ib_device *device, ib_comp_handler comp_handler, void (*event_handler)(struct ib_event *, void *), void *cq_context, int cqe, int comp_vector); /** * ib_resize_cq - Modifies the capacity of the CQ. * @cq: The CQ to resize. * @cqe: The minimum size of the CQ. * * Users can examine the cq structure to determine the actual CQ size. */ int ib_resize_cq(struct ib_cq *cq, int cqe); /** * ib_modify_cq - Modifies moderation params of the CQ * @cq: The CQ to modify. * @cq_count: number of CQEs that will trigger an event * @cq_period: max period of time in usec before triggering an event * */ int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); /** * ib_destroy_cq - Destroys the specified CQ. * @cq: The CQ to destroy. */ int ib_destroy_cq(struct ib_cq *cq); /** * ib_poll_cq - poll a CQ for completion(s) * @cq:the CQ being polled * @num_entries:maximum number of completions to return * @wc:array of at least @num_entries &struct ib_wc where completions * will be returned * * Poll a CQ for (possibly multiple) completions. If the return value * is < 0, an error occurred. If the return value is >= 0, it is the * number of completions returned. If the return value is * non-negative and < num_entries, then the CQ was emptied. */ static inline int ib_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) { return cq->device->poll_cq(cq, num_entries, wc); } /** * ib_peek_cq - Returns the number of unreaped completions currently * on the specified CQ. * @cq: The CQ to peek. * @wc_cnt: A minimum number of unreaped completions to check for. * * If the number of unreaped completions is greater than or equal to wc_cnt, * this function returns wc_cnt, otherwise, it returns the actual number of * unreaped completions. */ int ib_peek_cq(struct ib_cq *cq, int wc_cnt); /** * ib_req_notify_cq - Request completion notification on a CQ. * @cq: The CQ to generate an event for. * @flags: * Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP * to request an event on the next solicited event or next work * completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS * may also be |ed in to request a hint about missed events, as * described below. * * Return Value: * < 0 means an error occurred while requesting notification * == 0 means notification was requested successfully, and if * IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events * were missed and it is safe to wait for another event. In * this case is it guaranteed that any work completions added * to the CQ since the last CQ poll will trigger a completion * notification event. * > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed * in. It means that the consumer must poll the CQ again to * make sure it is empty to avoid missing an event because of a * race between requesting notification and an entry being * added to the CQ. This return value means it is possible * (but not guaranteed) that a work completion has been added * to the CQ since the last poll without triggering a * completion notification event. */ static inline int ib_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags) { return cq->device->req_notify_cq(cq, flags); } /** * ib_req_ncomp_notif - Request completion notification when there are * at least the specified number of unreaped completions on the CQ. * @cq: The CQ to generate an event for. * @wc_cnt: The number of unreaped completions that should be on the * CQ before an event is generated. */ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) { return cq->device->req_ncomp_notif ? cq->device->req_ncomp_notif(cq, wc_cnt) : -ENOSYS; } /** * ib_get_dma_mr - Returns a memory region for system memory that is * usable for DMA. * @pd: The protection domain associated with the memory region. * @mr_access_flags: Specifies the memory access rights. * * Note that the ib_dma_*() functions defined below must be used * to create/destroy addresses used with the Lkey or Rkey returned * by ib_get_dma_mr(). */ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags); /** * ib_dma_mapping_error - check a DMA addr for error * @dev: The device for which the dma_addr was created * @dma_addr: The DMA address to check */ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) { if (dev->dma_ops) return dev->dma_ops->mapping_error(dev, dma_addr); return dma_mapping_error(dev->dma_device, dma_addr); } /** * ib_dma_map_single - Map a kernel virtual address to DMA address * @dev: The device for which the dma_addr is to be created * @cpu_addr: The kernel virtual address * @size: The size of the region in bytes * @direction: The direction of the DMA */ static inline u64 ib_dma_map_single(struct ib_device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) { if (dev->dma_ops) return dev->dma_ops->map_single(dev, cpu_addr, size, direction); return dma_map_single(dev->dma_device, cpu_addr, size, direction); } /** * ib_dma_unmap_single - Destroy a mapping created by ib_dma_map_single() * @dev: The device for which the DMA address was created * @addr: The DMA address * @size: The size of the region in bytes * @direction: The direction of the DMA */ static inline void ib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { if (dev->dma_ops) dev->dma_ops->unmap_single(dev, addr, size, direction); else dma_unmap_single(dev->dma_device, addr, size, direction); } static inline u64 ib_dma_map_single_attrs(struct ib_device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction, struct dma_attrs *attrs) { return dma_map_single_attrs(dev->dma_device, cpu_addr, size, direction, attrs); } static inline void ib_dma_unmap_single_attrs(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction, struct dma_attrs *attrs) { return dma_unmap_single_attrs(dev->dma_device, addr, size, direction, attrs); } /** * ib_dma_map_page - Map a physical page to DMA address * @dev: The device for which the dma_addr is to be created * @page: The page to be mapped * @offset: The offset within the page * @size: The size of the region in bytes * @direction: The direction of the DMA */ static inline u64 ib_dma_map_page(struct ib_device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction) { if (dev->dma_ops) return dev->dma_ops->map_page(dev, page, offset, size, direction); return dma_map_page(dev->dma_device, page, offset, size, direction); } /** * ib_dma_unmap_page - Destroy a mapping created by ib_dma_map_page() * @dev: The device for which the DMA address was created * @addr: The DMA address * @size: The size of the region in bytes * @direction: The direction of the DMA */ static inline void ib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { if (dev->dma_ops) dev->dma_ops->unmap_page(dev, addr, size, direction); else dma_unmap_page(dev->dma_device, addr, size, direction); } /** * ib_dma_map_sg - Map a scatter/gather list to DMA addresses * @dev: The device for which the DMA addresses are to be created * @sg: The array of scatter/gather entries * @nents: The number of scatter/gather entries * @direction: The direction of the DMA */ static inline int ib_dma_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { if (dev->dma_ops) return dev->dma_ops->map_sg(dev, sg, nents, direction); return dma_map_sg(dev->dma_device, sg, nents, direction); } /** * ib_dma_unmap_sg - Unmap a scatter/gather list of DMA addresses * @dev: The device for which the DMA addresses were created * @sg: The array of scatter/gather entries * @nents: The number of scatter/gather entries * @direction: The direction of the DMA */ static inline void ib_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { if (dev->dma_ops) dev->dma_ops->unmap_sg(dev, sg, nents, direction); else dma_unmap_sg(dev->dma_device, sg, nents, direction); } static inline int ib_dma_map_sg_attrs(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, struct dma_attrs *attrs) { return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, attrs); } static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, struct dma_attrs *attrs) { dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, attrs); } /** * ib_sg_dma_address - Return the DMA address from a scatter/gather entry * @dev: The device for which the DMA addresses were created * @sg: The scatter/gather entry */ static inline u64 ib_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) { if (dev->dma_ops) return dev->dma_ops->dma_address(dev, sg); return sg_dma_address(sg); } /** * ib_sg_dma_len - Return the DMA length from a scatter/gather entry * @dev: The device for which the DMA addresses were created * @sg: The scatter/gather entry */ static inline unsigned int ib_sg_dma_len(struct ib_device *dev, struct scatterlist *sg) { if (dev->dma_ops) return dev->dma_ops->dma_len(dev, sg); return sg_dma_len(sg); } /** * ib_dma_sync_single_for_cpu - Prepare DMA region to be accessed by CPU * @dev: The device for which the DMA address was created * @addr: The DMA address * @size: The size of the region in bytes * @dir: The direction of the DMA */ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) { if (dev->dma_ops) dev->dma_ops->sync_single_for_cpu(dev, addr, size, dir); else dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); } /** * ib_dma_sync_single_for_device - Prepare DMA region to be accessed by device * @dev: The device for which the DMA address was created * @addr: The DMA address * @size: The size of the region in bytes * @dir: The direction of the DMA */ static inline void ib_dma_sync_single_for_device(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) { if (dev->dma_ops) dev->dma_ops->sync_single_for_device(dev, addr, size, dir); else dma_sync_single_for_device(dev->dma_device, addr, size, dir); } /** * ib_dma_alloc_coherent - Allocate memory and map it for DMA * @dev: The device for which the DMA address is requested * @size: The size of the region to allocate in bytes * @dma_handle: A pointer for returning the DMA address of the region * @flag: memory allocator flags */ static inline void *ib_dma_alloc_coherent(struct ib_device *dev, size_t size, u64 *dma_handle, gfp_t flag) { if (dev->dma_ops) return dev->dma_ops->alloc_coherent(dev, size, dma_handle, flag); else { dma_addr_t handle; void *ret; ret = dma_alloc_coherent(dev->dma_device, size, &handle, flag); *dma_handle = handle; return ret; } } /** * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent() * @dev: The device for which the DMA addresses were allocated * @size: The size of the region * @cpu_addr: the address returned by ib_dma_alloc_coherent() * @dma_handle: the DMA address returned by ib_dma_alloc_coherent() */ static inline void ib_dma_free_coherent(struct ib_device *dev, size_t size, void *cpu_addr, u64 dma_handle) { if (dev->dma_ops) dev->dma_ops->free_coherent(dev, size, cpu_addr, dma_handle); else dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); } /** * ib_reg_phys_mr - Prepares a virtually addressed memory region for use * by an HCA. * @pd: The protection domain associated assigned to the registered region. * @phys_buf_array: Specifies a list of physical buffers to use in the * memory region. * @num_phys_buf: Specifies the size of the phys_buf_array. * @mr_access_flags: Specifies the memory access rights. * @iova_start: The offset of the region's starting I/O virtual address. */ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd, struct ib_phys_buf *phys_buf_array, int num_phys_buf, int mr_access_flags, u64 *iova_start); /** * ib_rereg_phys_mr - Modifies the attributes of an existing memory region. * Conceptually, this call performs the functions deregister memory region * followed by register physical memory region. Where possible, * resources are reused instead of deallocated and reallocated. * @mr: The memory region to modify. * @mr_rereg_mask: A bit-mask used to indicate which of the following * properties of the memory region are being modified. * @pd: If %IB_MR_REREG_PD is set in mr_rereg_mask, this field specifies * the new protection domain to associated with the memory region, * otherwise, this parameter is ignored. * @phys_buf_array: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this * field specifies a list of physical buffers to use in the new * translation, otherwise, this parameter is ignored. * @num_phys_buf: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this * field specifies the size of the phys_buf_array, otherwise, this * parameter is ignored. * @mr_access_flags: If %IB_MR_REREG_ACCESS is set in mr_rereg_mask, this * field specifies the new memory access rights, otherwise, this * parameter is ignored. * @iova_start: The offset of the region's starting I/O virtual address. */ int ib_rereg_phys_mr(struct ib_mr *mr, int mr_rereg_mask, struct ib_pd *pd, struct ib_phys_buf *phys_buf_array, int num_phys_buf, int mr_access_flags, u64 *iova_start); /** * ib_query_mr - Retrieves information about a specific memory region. * @mr: The memory region to retrieve information about. * @mr_attr: The attributes of the specified memory region. */ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); /** * ib_dereg_mr - Deregisters a memory region and removes it from the * HCA translation table. * @mr: The memory region to deregister. */ int ib_dereg_mr(struct ib_mr *mr); /** * ib_alloc_fast_reg_mr - Allocates memory region usable with the * IB_WR_FAST_REG_MR send work request. * @pd: The protection domain associated with the region. * @max_page_list_len: requested max physical buffer list length to be * used with fast register work requests for this MR. */ struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); /** * ib_alloc_fast_reg_page_list - Allocates a page list array * @device - ib device pointer. * @page_list_len - size of the page list array to be allocated. * * This allocates and returns a struct ib_fast_reg_page_list * and a * page_list array that is at least page_list_len in size. The actual * size is returned in max_page_list_len. The caller is responsible * for initializing the contents of the page_list array before posting * a send work request with the IB_WC_FAST_REG_MR opcode. * * The page_list array entries must be translated using one of the * ib_dma_*() functions just like the addresses passed to * ib_map_phys_fmr(). Once the ib_post_send() is issued, the struct * ib_fast_reg_page_list must not be modified by the caller until the * IB_WC_FAST_REG_MR work request completes. */ struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list( struct ib_device *device, int page_list_len); /** * ib_free_fast_reg_page_list - Deallocates a previously allocated * page list array. * @page_list - struct ib_fast_reg_page_list pointer to be deallocated. */ void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); /** * ib_update_fast_reg_key - updates the key portion of the fast_reg MR * R_Key and L_Key. * @mr - struct ib_mr pointer to be updated. * @newkey - new key to be used. */ static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey) { mr->lkey = (mr->lkey & 0xffffff00) | newkey; mr->rkey = (mr->rkey & 0xffffff00) | newkey; } /** * ib_alloc_mw - Allocates a memory window. * @pd: The protection domain associated with the memory window. */ struct ib_mw *ib_alloc_mw(struct ib_pd *pd); /** * ib_bind_mw - Posts a work request to the send queue of the specified * QP, which binds the memory window to the given address range and * remote access attributes. * @qp: QP to post the bind work request on. * @mw: The memory window to bind. * @mw_bind: Specifies information about the memory window, including * its address range, remote access rights, and associated memory region. */ static inline int ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind) { /* XXX reference counting in corresponding MR? */ return mw->device->bind_mw ? mw->device->bind_mw(qp, mw, mw_bind) : -ENOSYS; } /** * ib_dealloc_mw - Deallocates a memory window. * @mw: The memory window to deallocate. */ int ib_dealloc_mw(struct ib_mw *mw); /** * ib_alloc_fmr - Allocates a unmapped fast memory region. * @pd: The protection domain associated with the unmapped region. * @mr_access_flags: Specifies the memory access rights. * @fmr_attr: Attributes of the unmapped region. * * A fast memory region must be mapped before it can be used as part of * a work request. */ struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr); /** * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region. * @fmr: The fast memory region to associate with the pages. * @page_list: An array of physical pages to map to the fast memory region. * @list_len: The number of pages in page_list. * @iova: The I/O virtual address to use with the mapped region. */ static inline int ib_map_phys_fmr(struct ib_fmr *fmr, u64 *page_list, int list_len, u64 iova) { return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova); } /** * ib_unmap_fmr - Removes the mapping from a list of fast memory regions. * @fmr_list: A linked list of fast memory regions to unmap. */ int ib_unmap_fmr(struct list_head *fmr_list); /** * ib_dealloc_fmr - Deallocates a fast memory region. * @fmr: The fast memory region to deallocate. */ int ib_dealloc_fmr(struct ib_fmr *fmr); /** * ib_attach_mcast - Attaches the specified QP to a multicast group. * @qp: QP to attach to the multicast group. The QP must be type * IB_QPT_UD. * @gid: Multicast group GID. * @lid: Multicast group LID in host byte order. * * In order to send and receive multicast packets, subnet * administration must have created the multicast group and configured * the fabric appropriately. The port associated with the specified * QP must also be a member of the multicast group. */ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); /** * ib_detach_mcast - Detaches the specified QP from a multicast group. * @qp: QP to detach from the multicast group. * @gid: Multicast group GID. * @lid: Multicast group LID in host byte order. */ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); /** * ib_alloc_xrcd - Allocates an XRC domain. * @device: The device on which to allocate the XRC domain. */ struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device); /** * ib_dealloc_xrcd - Deallocates an XRC domain. * @xrcd: The XRC domain to deallocate. */ int ib_dealloc_xrcd(struct ib_xrcd *xrcd); int ib_attach_flow(struct ib_qp *qp, struct ib_flow_spec *spec, int priority); int ib_detach_flow(struct ib_qp *qp, struct ib_flow_spec *spec, int priority); #endif /* IB_VERBS_H */ Index: stable/10 =================================================================== --- stable/10 (revision 271126) +++ stable/10 (revision 271127) Property changes on: stable/10 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r270710,270821