Index: stable/10/sys/contrib/rdma/krping/krping.c =================================================================== --- stable/10/sys/contrib/rdma/krping/krping.c (revision 273245) +++ stable/10/sys/contrib/rdma/krping/krping.c (revision 273246) @@ -1,2328 +1,2327 @@ /* * Copyright (c) 2005 Ammasso, Inc. All rights reserved. * Copyright (c) 2006-2009 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include "krping.h" #include "getopt.h" extern int krping_debug; #define DEBUG_LOG(cb, x...) if (krping_debug) krping_printf((cb)->cookie, x) #define PRINTF(cb, x...) krping_printf((cb)->cookie, x) MODULE_AUTHOR("Steve Wise"); MODULE_DESCRIPTION("RDMA ping client/server"); MODULE_LICENSE("Dual BSD/GPL"); static __inline uint64_t get_cycles(void) { uint32_t low, high; __asm __volatile("rdtsc" : "=a" (low), "=d" (high)); return (low | ((u_int64_t)high << 32)); } typedef uint64_t cycles_t; enum mem_type { DMA = 1, FASTREG = 2, MW = 3, MR = 4 }; static const struct krping_option krping_opts[] = { {"count", OPT_INT, 'C'}, {"size", OPT_INT, 'S'}, {"addr", OPT_STRING, 'a'}, {"port", OPT_INT, 'p'}, {"verbose", OPT_NOPARAM, 'v'}, {"validate", OPT_NOPARAM, 'V'}, {"server", OPT_NOPARAM, 's'}, {"client", OPT_NOPARAM, 'c'}, {"mem_mode", OPT_STRING, 'm'}, {"server_inv", OPT_NOPARAM, 'I'}, {"wlat", OPT_NOPARAM, 'l'}, {"rlat", OPT_NOPARAM, 'L'}, {"bw", OPT_NOPARAM, 'B'}, {"duplex", OPT_NOPARAM, 'd'}, {"txdepth", OPT_INT, 'T'}, {"poll", OPT_NOPARAM, 'P'}, {"local_dma_lkey", OPT_NOPARAM, 'Z'}, {"read_inv", OPT_NOPARAM, 'R'}, {"fr", OPT_NOPARAM, 'f'}, {NULL, 0, 0} }; #define htonll(x) cpu_to_be64((x)) #define ntohll(x) cpu_to_be64((x)) static struct mutex krping_mutex; /* * List of running krping threads. */ static LIST_HEAD(krping_cbs); /* * krping "ping/pong" loop: * client sends source rkey/addr/len * server receives source rkey/add/len * server rdma reads "ping" data from source * server sends "go ahead" on rdma read completion * client sends sink rkey/addr/len * server receives sink rkey/addr/len * server rdma writes "pong" data to sink * server sends "go ahead" on rdma write completion * */ /* * These states are used to signal events between the completion handler * and the main client or server thread. * * Once CONNECTED, they cycle through RDMA_READ_ADV, RDMA_WRITE_ADV, * and RDMA_WRITE_COMPLETE for each ping. */ enum test_state { IDLE = 1, CONNECT_REQUEST, ADDR_RESOLVED, ROUTE_RESOLVED, CONNECTED, RDMA_READ_ADV, RDMA_READ_COMPLETE, RDMA_WRITE_ADV, RDMA_WRITE_COMPLETE, ERROR }; struct krping_rdma_info { uint64_t buf; uint32_t rkey; uint32_t size; }; /* * Default max buffer size for IO... */ #define RPING_BUFSIZE 128*1024 #define RPING_SQ_DEPTH 64 /* * Control block struct. */ struct krping_cb { void *cookie; int server; /* 0 iff client */ struct ib_cq *cq; struct ib_pd *pd; struct ib_qp *qp; enum mem_type mem; struct ib_mr *dma_mr; struct ib_fast_reg_page_list *page_list; int page_list_len; struct ib_send_wr fastreg_wr; struct ib_send_wr invalidate_wr; struct ib_mr *fastreg_mr; int server_invalidate; int read_inv; u8 key; struct ib_mw *mw; struct ib_mw_bind bind_attr; struct ib_recv_wr rq_wr; /* recv work request record */ struct ib_sge recv_sgl; /* recv single SGE */ struct krping_rdma_info recv_buf;/* malloc'd buffer */ u64 recv_dma_addr; DECLARE_PCI_UNMAP_ADDR(recv_mapping) struct ib_mr *recv_mr; struct ib_send_wr sq_wr; /* send work requrest record */ struct ib_sge send_sgl; struct krping_rdma_info send_buf;/* single send buf */ u64 send_dma_addr; DECLARE_PCI_UNMAP_ADDR(send_mapping) struct ib_mr *send_mr; struct ib_send_wr rdma_sq_wr; /* rdma work request record */ struct ib_sge rdma_sgl; /* rdma single SGE */ char *rdma_buf; /* used as rdma sink */ u64 rdma_dma_addr; DECLARE_PCI_UNMAP_ADDR(rdma_mapping) struct ib_mr *rdma_mr; uint32_t remote_rkey; /* remote guys RKEY */ uint64_t remote_addr; /* remote guys TO */ uint32_t remote_len; /* remote guys LEN */ char *start_buf; /* rdma read src */ u64 start_dma_addr; DECLARE_PCI_UNMAP_ADDR(start_mapping) struct ib_mr *start_mr; enum test_state state; /* used for cond/signalling */ wait_queue_head_t sem; struct krping_stats stats; uint16_t port; /* dst port in NBO */ struct in_addr addr; /* dst addr in NBO */ char *addr_str; /* dst addr string */ int verbose; /* verbose logging */ int count; /* ping count */ int size; /* ping data size */ int validate; /* validate ping data */ int wlat; /* run wlat test */ int rlat; /* run rlat test */ int bw; /* run bw test */ int duplex; /* run bw full duplex test */ int poll; /* poll or block for rlat test */ int txdepth; /* SQ depth */ int local_dma_lkey; /* use 0 for lkey */ int frtest; /* fastreg test */ /* CM stuff */ struct rdma_cm_id *cm_id; /* connection on client side,*/ /* listener on server side. */ struct rdma_cm_id *child_cm_id; /* connection on server side */ struct list_head list; }; static int krping_cma_event_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { int ret; struct krping_cb *cb = cma_id->context; DEBUG_LOG(cb, "cma_event type %d cma_id %p (%s)\n", event->event, cma_id, (cma_id == cb->cm_id) ? "parent" : "child"); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: cb->state = ADDR_RESOLVED; ret = rdma_resolve_route(cma_id, 2000); if (ret) { PRINTF(cb, "rdma_resolve_route error %d\n", ret); wake_up_interruptible(&cb->sem); } break; case RDMA_CM_EVENT_ROUTE_RESOLVED: cb->state = ROUTE_RESOLVED; wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_CONNECT_REQUEST: cb->state = CONNECT_REQUEST; cb->child_cm_id = cma_id; DEBUG_LOG(cb, "child cma %p\n", cb->child_cm_id); wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_ESTABLISHED: DEBUG_LOG(cb, "ESTABLISHED\n"); if (!cb->server) { cb->state = CONNECTED; } wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR: case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_REJECTED: PRINTF(cb, "cma event %d, error %d\n", event->event, event->status); cb->state = ERROR; wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_DISCONNECTED: PRINTF(cb, "DISCONNECT EVENT...\n"); cb->state = ERROR; wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: PRINTF(cb, "cma detected device removal!!!!\n"); break; default: PRINTF(cb, "oof bad type!\n"); wake_up_interruptible(&cb->sem); break; } return 0; } static int server_recv(struct krping_cb *cb, struct ib_wc *wc) { if (wc->byte_len != sizeof(cb->recv_buf)) { PRINTF(cb, "Received bogus data, size %d\n", wc->byte_len); return -1; } cb->remote_rkey = ntohl(cb->recv_buf.rkey); cb->remote_addr = ntohll(cb->recv_buf.buf); cb->remote_len = ntohl(cb->recv_buf.size); DEBUG_LOG(cb, "Received rkey %x addr %llx len %d from peer\n", cb->remote_rkey, (unsigned long long)cb->remote_addr, cb->remote_len); if (cb->state <= CONNECTED || cb->state == RDMA_WRITE_COMPLETE) cb->state = RDMA_READ_ADV; else cb->state = RDMA_WRITE_ADV; return 0; } static int client_recv(struct krping_cb *cb, struct ib_wc *wc) { if (wc->byte_len != sizeof(cb->recv_buf)) { PRINTF(cb, "Received bogus data, size %d\n", wc->byte_len); return -1; } if (cb->state == RDMA_READ_ADV) cb->state = RDMA_WRITE_ADV; else cb->state = RDMA_WRITE_COMPLETE; return 0; } static void krping_cq_event_handler(struct ib_cq *cq, void *ctx) { struct krping_cb *cb = ctx; struct ib_wc wc; struct ib_recv_wr *bad_wr; int ret; BUG_ON(cb->cq != cq); if (cb->state == ERROR) { PRINTF(cb, "cq completion in ERROR state\n"); return; } if (cb->frtest) { PRINTF(cb, "cq completion event in frtest!\n"); return; } if (!cb->wlat && !cb->rlat && !cb->bw) ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) { if (wc.status) { if (wc.status == IB_WC_WR_FLUSH_ERR) { DEBUG_LOG(cb, "cq flushed\n"); continue; } else { PRINTF(cb, "cq completion failed with " "wr_id %Lx status %d opcode %d vender_err %x\n", wc.wr_id, wc.status, wc.opcode, wc.vendor_err); goto error; } } switch (wc.opcode) { case IB_WC_SEND: DEBUG_LOG(cb, "send completion\n"); cb->stats.send_bytes += cb->send_sgl.length; cb->stats.send_msgs++; break; case IB_WC_RDMA_WRITE: DEBUG_LOG(cb, "rdma write completion\n"); cb->stats.write_bytes += cb->rdma_sq_wr.sg_list->length; cb->stats.write_msgs++; cb->state = RDMA_WRITE_COMPLETE; wake_up_interruptible(&cb->sem); break; case IB_WC_RDMA_READ: DEBUG_LOG(cb, "rdma read completion\n"); cb->stats.read_bytes += cb->rdma_sq_wr.sg_list->length; cb->stats.read_msgs++; cb->state = RDMA_READ_COMPLETE; wake_up_interruptible(&cb->sem); break; case IB_WC_RECV: DEBUG_LOG(cb, "recv completion\n"); cb->stats.recv_bytes += sizeof(cb->recv_buf); cb->stats.recv_msgs++; if (cb->wlat || cb->rlat || cb->bw) ret = server_recv(cb, &wc); else ret = cb->server ? server_recv(cb, &wc) : client_recv(cb, &wc); if (ret) { PRINTF(cb, "recv wc error: %d\n", ret); goto error; } ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); if (ret) { PRINTF(cb, "post recv error: %d\n", ret); goto error; } wake_up_interruptible(&cb->sem); break; default: PRINTF(cb, "%s:%d Unexpected opcode %d, Shutting down\n", __func__, __LINE__, wc.opcode); goto error; } } if (ret) { PRINTF(cb, "poll error %d\n", ret); goto error; } return; error: cb->state = ERROR; wake_up_interruptible(&cb->sem); } static int krping_accept(struct krping_cb *cb) { struct rdma_conn_param conn_param; int ret; DEBUG_LOG(cb, "accepting client connection request\n"); memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 1; conn_param.initiator_depth = 1; ret = rdma_accept(cb->child_cm_id, &conn_param); if (ret) { PRINTF(cb, "rdma_accept error: %d\n", ret); return ret; } if (!cb->wlat && !cb->rlat && !cb->bw) { wait_event_interruptible(cb->sem, cb->state >= CONNECTED); if (cb->state == ERROR) { PRINTF(cb, "wait for CONNECTED state %d\n", cb->state); return -1; } } return 0; } static void krping_setup_wr(struct krping_cb *cb) { cb->recv_sgl.addr = cb->recv_dma_addr; cb->recv_sgl.length = sizeof cb->recv_buf; if (cb->local_dma_lkey) cb->recv_sgl.lkey = cb->qp->device->local_dma_lkey; else if (cb->mem == DMA) cb->recv_sgl.lkey = cb->dma_mr->lkey; else cb->recv_sgl.lkey = cb->recv_mr->lkey; cb->rq_wr.sg_list = &cb->recv_sgl; cb->rq_wr.num_sge = 1; cb->send_sgl.addr = cb->send_dma_addr; cb->send_sgl.length = sizeof cb->send_buf; if (cb->local_dma_lkey) cb->send_sgl.lkey = cb->qp->device->local_dma_lkey; else if (cb->mem == DMA) cb->send_sgl.lkey = cb->dma_mr->lkey; else cb->send_sgl.lkey = cb->send_mr->lkey; cb->sq_wr.opcode = IB_WR_SEND; cb->sq_wr.send_flags = IB_SEND_SIGNALED; cb->sq_wr.sg_list = &cb->send_sgl; cb->sq_wr.num_sge = 1; if (cb->server || cb->wlat || cb->rlat || cb->bw) { cb->rdma_sgl.addr = cb->rdma_dma_addr; if (cb->mem == MR) cb->rdma_sgl.lkey = cb->rdma_mr->lkey; cb->rdma_sq_wr.send_flags = IB_SEND_SIGNALED; cb->rdma_sq_wr.sg_list = &cb->rdma_sgl; cb->rdma_sq_wr.num_sge = 1; } switch(cb->mem) { case FASTREG: /* * A chain of 2 WRs, INVALDATE_MR + FAST_REG_MR. * both unsignaled. The client uses them to reregister * the rdma buffers with a new key each iteration. */ cb->fastreg_wr.opcode = IB_WR_FAST_REG_MR; cb->fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; cb->fastreg_wr.wr.fast_reg.length = cb->size; cb->fastreg_wr.wr.fast_reg.page_list = cb->page_list; cb->fastreg_wr.wr.fast_reg.page_list_len = cb->page_list_len; cb->invalidate_wr.next = &cb->fastreg_wr; cb->invalidate_wr.opcode = IB_WR_LOCAL_INV; break; case MW: cb->bind_attr.wr_id = 0xabbaabba; cb->bind_attr.send_flags = 0; /* unsignaled */ cb->bind_attr.length = cb->size; break; default: break; } } static int krping_setup_buffers(struct krping_cb *cb) { int ret; struct ib_phys_buf buf; u64 iovbase; DEBUG_LOG(cb, "krping_setup_buffers called on cb %p\n", cb); cb->recv_dma_addr = dma_map_single(cb->pd->device->dma_device, &cb->recv_buf, sizeof(cb->recv_buf), DMA_BIDIRECTIONAL); pci_unmap_addr_set(cb, recv_mapping, cb->recv_dma_addr); cb->send_dma_addr = dma_map_single(cb->pd->device->dma_device, &cb->send_buf, sizeof(cb->send_buf), DMA_BIDIRECTIONAL); pci_unmap_addr_set(cb, send_mapping, cb->send_dma_addr); if (cb->mem == DMA) { cb->dma_mr = ib_get_dma_mr(cb->pd, IB_ACCESS_LOCAL_WRITE| IB_ACCESS_REMOTE_READ| IB_ACCESS_REMOTE_WRITE); if (IS_ERR(cb->dma_mr)) { DEBUG_LOG(cb, "reg_dmamr failed\n"); ret = PTR_ERR(cb->dma_mr); goto bail; } } else { if (!cb->local_dma_lkey) { buf.addr = cb->recv_dma_addr; buf.size = sizeof cb->recv_buf; DEBUG_LOG(cb, "recv buf dma_addr %llx size %d\n", buf.addr, (int)buf.size); iovbase = cb->recv_dma_addr; cb->recv_mr = ib_reg_phys_mr(cb->pd, &buf, 1, IB_ACCESS_LOCAL_WRITE, &iovbase); if (IS_ERR(cb->recv_mr)) { DEBUG_LOG(cb, "recv_buf reg_mr failed\n"); ret = PTR_ERR(cb->recv_mr); goto bail; } buf.addr = cb->send_dma_addr; buf.size = sizeof cb->send_buf; DEBUG_LOG(cb, "send buf dma_addr %llx size %d\n", buf.addr, (int)buf.size); iovbase = cb->send_dma_addr; cb->send_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 0, &iovbase); if (IS_ERR(cb->send_mr)) { DEBUG_LOG(cb, "send_buf reg_mr failed\n"); ret = PTR_ERR(cb->send_mr); goto bail; } } } cb->rdma_buf = kmalloc(cb->size, GFP_KERNEL); if (!cb->rdma_buf) { DEBUG_LOG(cb, "rdma_buf malloc failed\n"); ret = -ENOMEM; goto bail; } cb->rdma_dma_addr = dma_map_single(cb->pd->device->dma_device, cb->rdma_buf, cb->size, DMA_BIDIRECTIONAL); pci_unmap_addr_set(cb, rdma_mapping, cb->rdma_dma_addr); if (cb->mem != DMA) { switch (cb->mem) { case FASTREG: cb->page_list_len = (((cb->size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT; cb->page_list = ib_alloc_fast_reg_page_list( cb->pd->device, cb->page_list_len); if (IS_ERR(cb->page_list)) { DEBUG_LOG(cb, "recv_buf reg_mr failed\n"); ret = PTR_ERR(cb->page_list); goto bail; } cb->fastreg_mr = ib_alloc_fast_reg_mr(cb->pd, cb->page_list->max_page_list_len); if (IS_ERR(cb->fastreg_mr)) { DEBUG_LOG(cb, "recv_buf reg_mr failed\n"); ret = PTR_ERR(cb->fastreg_mr); goto bail; } DEBUG_LOG(cb, "fastreg rkey 0x%x page_list %p" " page_list_len %u\n", cb->fastreg_mr->rkey, cb->page_list, cb->page_list_len); break; case MW: cb->mw = ib_alloc_mw(cb->pd); if (IS_ERR(cb->mw)) { DEBUG_LOG(cb, "recv_buf alloc_mw failed\n"); ret = PTR_ERR(cb->mw); goto bail; } DEBUG_LOG(cb, "mw rkey 0x%x\n", cb->mw->rkey); /*FALLTHROUGH*/ case MR: buf.addr = cb->rdma_dma_addr; buf.size = cb->size; iovbase = cb->rdma_dma_addr; cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1, IB_ACCESS_REMOTE_READ| IB_ACCESS_REMOTE_WRITE, &iovbase); if (IS_ERR(cb->rdma_mr)) { DEBUG_LOG(cb, "rdma_buf reg_mr failed\n"); ret = PTR_ERR(cb->rdma_mr); goto bail; } DEBUG_LOG(cb, "rdma buf dma_addr %llx size %d mr rkey 0x%x\n", buf.addr, (int)buf.size, cb->rdma_mr->rkey); break; default: ret = -EINVAL; goto bail; break; } } if (!cb->server || cb->wlat || cb->rlat || cb->bw) { cb->start_buf = kmalloc(cb->size, GFP_KERNEL); if (!cb->start_buf) { DEBUG_LOG(cb, "start_buf malloc failed\n"); ret = -ENOMEM; goto bail; } cb->start_dma_addr = dma_map_single(cb->pd->device->dma_device, cb->start_buf, cb->size, DMA_BIDIRECTIONAL); pci_unmap_addr_set(cb, start_mapping, cb->start_dma_addr); if (cb->mem == MR || cb->mem == MW) { unsigned flags = IB_ACCESS_REMOTE_READ; if (cb->wlat || cb->rlat || cb->bw) flags |= IB_ACCESS_REMOTE_WRITE; buf.addr = cb->start_dma_addr; buf.size = cb->size; DEBUG_LOG(cb, "start buf dma_addr %llx size %d\n", buf.addr, (int)buf.size); iovbase = cb->start_dma_addr; cb->start_mr = ib_reg_phys_mr(cb->pd, &buf, 1, flags, &iovbase); if (IS_ERR(cb->start_mr)) { DEBUG_LOG(cb, "start_buf reg_mr failed\n"); ret = PTR_ERR(cb->start_mr); goto bail; } } } krping_setup_wr(cb); DEBUG_LOG(cb, "allocated & registered buffers...\n"); return 0; bail: if (cb->fastreg_mr && !IS_ERR(cb->fastreg_mr)) ib_dereg_mr(cb->fastreg_mr); if (cb->mw && !IS_ERR(cb->mw)) ib_dealloc_mw(cb->mw); if (cb->rdma_mr && !IS_ERR(cb->rdma_mr)) ib_dereg_mr(cb->rdma_mr); if (cb->page_list && !IS_ERR(cb->page_list)) ib_free_fast_reg_page_list(cb->page_list); if (cb->dma_mr && !IS_ERR(cb->dma_mr)) ib_dereg_mr(cb->dma_mr); if (cb->recv_mr && !IS_ERR(cb->recv_mr)) ib_dereg_mr(cb->recv_mr); if (cb->send_mr && !IS_ERR(cb->send_mr)) ib_dereg_mr(cb->send_mr); if (cb->rdma_buf) kfree(cb->rdma_buf); if (cb->start_buf) kfree(cb->start_buf); return ret; } static void krping_free_buffers(struct krping_cb *cb) { DEBUG_LOG(cb, "krping_free_buffers called on cb %p\n", cb); if (cb->dma_mr) ib_dereg_mr(cb->dma_mr); if (cb->send_mr) ib_dereg_mr(cb->send_mr); if (cb->recv_mr) ib_dereg_mr(cb->recv_mr); if (cb->rdma_mr) ib_dereg_mr(cb->rdma_mr); if (cb->start_mr) ib_dereg_mr(cb->start_mr); if (cb->fastreg_mr) ib_dereg_mr(cb->fastreg_mr); if (cb->mw) ib_dealloc_mw(cb->mw); dma_unmap_single(cb->pd->device->dma_device, pci_unmap_addr(cb, recv_mapping), sizeof(cb->recv_buf), DMA_BIDIRECTIONAL); dma_unmap_single(cb->pd->device->dma_device, pci_unmap_addr(cb, send_mapping), sizeof(cb->send_buf), DMA_BIDIRECTIONAL); dma_unmap_single(cb->pd->device->dma_device, pci_unmap_addr(cb, rdma_mapping), cb->size, DMA_BIDIRECTIONAL); kfree(cb->rdma_buf); if (cb->start_buf) { dma_unmap_single(cb->pd->device->dma_device, pci_unmap_addr(cb, start_mapping), cb->size, DMA_BIDIRECTIONAL); kfree(cb->start_buf); } } static int krping_create_qp(struct krping_cb *cb) { struct ib_qp_init_attr init_attr; int ret; memset(&init_attr, 0, sizeof(init_attr)); init_attr.cap.max_send_wr = cb->txdepth; init_attr.cap.max_recv_wr = 2; init_attr.cap.max_recv_sge = 1; init_attr.cap.max_send_sge = 1; init_attr.qp_type = IB_QPT_RC; init_attr.send_cq = cb->cq; init_attr.recv_cq = cb->cq; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; if (cb->server) { ret = rdma_create_qp(cb->child_cm_id, cb->pd, &init_attr); if (!ret) cb->qp = cb->child_cm_id->qp; } else { ret = rdma_create_qp(cb->cm_id, cb->pd, &init_attr); if (!ret) cb->qp = cb->cm_id->qp; } return ret; } static void krping_free_qp(struct krping_cb *cb) { ib_destroy_qp(cb->qp); ib_destroy_cq(cb->cq); ib_dealloc_pd(cb->pd); } static int krping_setup_qp(struct krping_cb *cb, struct rdma_cm_id *cm_id) { int ret; cb->pd = ib_alloc_pd(cm_id->device); if (IS_ERR(cb->pd)) { PRINTF(cb, "ib_alloc_pd failed\n"); return PTR_ERR(cb->pd); } DEBUG_LOG(cb, "created pd %p\n", cb->pd); strlcpy(cb->stats.name, cb->pd->device->name, sizeof(cb->stats.name)); cb->cq = ib_create_cq(cm_id->device, krping_cq_event_handler, NULL, cb, cb->txdepth * 2, 0); if (IS_ERR(cb->cq)) { PRINTF(cb, "ib_create_cq failed\n"); ret = PTR_ERR(cb->cq); goto err1; } DEBUG_LOG(cb, "created cq %p\n", cb->cq); if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest) { ret = ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); if (ret) { PRINTF(cb, "ib_create_cq failed\n"); goto err2; } } ret = krping_create_qp(cb); if (ret) { PRINTF(cb, "krping_create_qp failed: %d\n", ret); goto err2; } DEBUG_LOG(cb, "created qp %p\n", cb->qp); return 0; err2: ib_destroy_cq(cb->cq); err1: ib_dealloc_pd(cb->pd); return ret; } /* * return the (possibly rebound) rkey for the rdma buffer. * FASTREG mode: invalidate and rebind via fastreg wr. * MW mode: rebind the MW. * other modes: just return the mr rkey. */ static u32 krping_rdma_rkey(struct krping_cb *cb, u64 buf, int post_inv) { u32 rkey = 0xffffffff; u64 p; struct ib_send_wr *bad_wr; int i; int ret; switch (cb->mem) { case FASTREG: cb->invalidate_wr.ex.invalidate_rkey = cb->fastreg_mr->rkey; /* * Update the fastreg key. */ ib_update_fast_reg_key(cb->fastreg_mr, ++cb->key); cb->fastreg_wr.wr.fast_reg.rkey = cb->fastreg_mr->rkey; /* * Update the fastreg WR with new buf info. */ if (buf == (u64)cb->start_dma_addr) cb->fastreg_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_READ; else cb->fastreg_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE; cb->fastreg_wr.wr.fast_reg.iova_start = buf; p = (u64)(buf & PAGE_MASK); for (i=0; i < cb->fastreg_wr.wr.fast_reg.page_list_len; i++, p += PAGE_SIZE) { cb->page_list->page_list[i] = p; DEBUG_LOG(cb, "page_list[%d] 0x%llx\n", i, p); } DEBUG_LOG(cb, "post_inv = %d, fastreg new rkey 0x%x shift %u len %u" " iova_start %llx page_list_len %u\n", post_inv, cb->fastreg_wr.wr.fast_reg.rkey, cb->fastreg_wr.wr.fast_reg.page_shift, cb->fastreg_wr.wr.fast_reg.length, cb->fastreg_wr.wr.fast_reg.iova_start, cb->fastreg_wr.wr.fast_reg.page_list_len); if (post_inv) ret = ib_post_send(cb->qp, &cb->invalidate_wr, &bad_wr); else ret = ib_post_send(cb->qp, &cb->fastreg_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); cb->state = ERROR; } rkey = cb->fastreg_mr->rkey; break; case MW: /* * Update the MW with new buf info. */ if (buf == (u64)cb->start_dma_addr) { cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_READ; cb->bind_attr.mr = cb->start_mr; } else { cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_WRITE; cb->bind_attr.mr = cb->rdma_mr; } cb->bind_attr.addr = buf; DEBUG_LOG(cb, "binding mw rkey 0x%x to buf %llx mr rkey 0x%x\n", cb->mw->rkey, buf, cb->bind_attr.mr->rkey); ret = ib_bind_mw(cb->qp, cb->mw, &cb->bind_attr); if (ret) { PRINTF(cb, "bind mw error %d\n", ret); cb->state = ERROR; } else rkey = cb->mw->rkey; break; case MR: if (buf == (u64)cb->start_dma_addr) rkey = cb->start_mr->rkey; else rkey = cb->rdma_mr->rkey; break; case DMA: rkey = cb->dma_mr->rkey; break; default: PRINTF(cb, "%s:%d case ERROR\n", __func__, __LINE__); cb->state = ERROR; break; } return rkey; } static void krping_format_send(struct krping_cb *cb, u64 buf) { struct krping_rdma_info *info = &cb->send_buf; u32 rkey; /* * Client side will do fastreg or mw bind before * advertising the rdma buffer. Server side * sends have no data. */ if (!cb->server || cb->wlat || cb->rlat || cb->bw) { rkey = krping_rdma_rkey(cb, buf, !cb->server_invalidate); info->buf = htonll(buf); info->rkey = htonl(rkey); info->size = htonl(cb->size); DEBUG_LOG(cb, "RDMA addr %llx rkey %x len %d\n", (unsigned long long)buf, rkey, cb->size); } } static void krping_test_server(struct krping_cb *cb) { struct ib_send_wr *bad_wr, inv; int ret; while (1) { /* Wait for client's Start STAG/TO/Len */ wait_event_interruptible(cb->sem, cb->state >= RDMA_READ_ADV); if (cb->state != RDMA_READ_ADV) { PRINTF(cb, "wait for RDMA_READ_ADV state %d\n", cb->state); break; } DEBUG_LOG(cb, "server received sink adv\n"); cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = cb->remote_len; cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 1); cb->rdma_sq_wr.next = NULL; /* Issue RDMA Read. */ if (cb->read_inv) cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ_WITH_INV; else { cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ; if (cb->mem == FASTREG) { /* * Immediately follow the read with a * fenced LOCAL_INV. */ cb->rdma_sq_wr.next = &inv; memset(&inv, 0, sizeof inv); inv.opcode = IB_WR_LOCAL_INV; inv.ex.invalidate_rkey = cb->fastreg_mr->rkey; inv.send_flags = IB_SEND_FENCE; } } ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); break; } cb->rdma_sq_wr.next = NULL; DEBUG_LOG(cb, "server posted rdma read req \n"); /* Wait for read completion */ wait_event_interruptible(cb->sem, cb->state >= RDMA_READ_COMPLETE); if (cb->state != RDMA_READ_COMPLETE) { PRINTF(cb, "wait for RDMA_READ_COMPLETE state %d\n", cb->state); break; } DEBUG_LOG(cb, "server received read complete\n"); /* Display data in recv buf */ if (cb->verbose) PRINTF(cb, "server ping data: %s\n", cb->rdma_buf); /* Tell client to continue */ if (cb->server && cb->server_invalidate) { cb->sq_wr.ex.invalidate_rkey = cb->remote_rkey; cb->sq_wr.opcode = IB_WR_SEND_WITH_INV; DEBUG_LOG(cb, "send-w-inv rkey 0x%x\n", cb->remote_rkey); } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); break; } DEBUG_LOG(cb, "server posted go ahead\n"); /* Wait for client's RDMA STAG/TO/Len */ wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV); if (cb->state != RDMA_WRITE_ADV) { PRINTF(cb, "wait for RDMA_WRITE_ADV state %d\n", cb->state); break; } DEBUG_LOG(cb, "server received sink adv\n"); /* RDMA Write echo data */ cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = strlen(cb->rdma_buf) + 1; if (cb->local_dma_lkey) cb->rdma_sgl.lkey = cb->qp->device->local_dma_lkey; else cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 0); DEBUG_LOG(cb, "rdma write from lkey %x laddr %llx len %d\n", cb->rdma_sq_wr.sg_list->lkey, (unsigned long long)cb->rdma_sq_wr.sg_list->addr, cb->rdma_sq_wr.sg_list->length); ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); break; } /* Wait for completion */ ret = wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_COMPLETE); if (cb->state != RDMA_WRITE_COMPLETE) { PRINTF(cb, "wait for RDMA_WRITE_COMPLETE state %d\n", cb->state); break; } DEBUG_LOG(cb, "server rdma write complete \n"); cb->state = CONNECTED; /* Tell client to begin again */ if (cb->server && cb->server_invalidate) { cb->sq_wr.ex.invalidate_rkey = cb->remote_rkey; cb->sq_wr.opcode = IB_WR_SEND_WITH_INV; DEBUG_LOG(cb, "send-w-inv rkey 0x%x\n", cb->remote_rkey); } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); break; } DEBUG_LOG(cb, "server posted go ahead\n"); } } static void rlat_test(struct krping_cb *cb) { int scnt; int iters = cb->count; struct timeval start_tv, stop_tv; int ret; struct ib_wc wc; struct ib_send_wr *bad_wr; int ne; scnt = 0; cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = cb->size; microtime(&start_tv); if (!cb->poll) { cb->state = RDMA_READ_ADV; ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); } while (scnt < iters) { cb->state = RDMA_READ_ADV; ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); if (ret) { PRINTF(cb, "Couldn't post send: ret=%d scnt %d\n", ret, scnt); return; } do { if (!cb->poll) { wait_event_interruptible(cb->sem, cb->state != RDMA_READ_ADV); if (cb->state == RDMA_READ_COMPLETE) { ne = 1; ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); } else { ne = -1; } } else ne = ib_poll_cq(cb->cq, 1, &wc); if (cb->state == ERROR) { PRINTF(cb, "state == ERROR...bailing scnt %d\n", scnt); return; } } while (ne == 0); if (ne < 0) { PRINTF(cb, "poll CQ failed %d\n", ne); return; } if (cb->poll && wc.status != IB_WC_SUCCESS) { PRINTF(cb, "Completion wth error at %s:\n", cb->server ? "server" : "client"); PRINTF(cb, "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); return; } ++scnt; } microtime(&stop_tv); if (stop_tv.tv_usec < start_tv.tv_usec) { stop_tv.tv_usec += 1000000; stop_tv.tv_sec -= 1; } PRINTF(cb, "delta sec %lu delta usec %lu iter %d size %d\n", stop_tv.tv_sec - start_tv.tv_sec, stop_tv.tv_usec - start_tv.tv_usec, scnt, cb->size); } static void wlat_test(struct krping_cb *cb) { int ccnt, scnt, rcnt; int iters=cb->count; volatile char *poll_buf = (char *) cb->start_buf; char *buf = (char *)cb->rdma_buf; struct timeval start_tv, stop_tv; cycles_t *post_cycles_start, *post_cycles_stop; cycles_t *poll_cycles_start, *poll_cycles_stop; cycles_t *last_poll_cycles_start; cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0; int i; int cycle_iters = 1000; ccnt = 0; scnt = 0; rcnt = 0; post_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!post_cycles_start) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } post_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!post_cycles_stop) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!poll_cycles_start) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } poll_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!poll_cycles_stop) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } last_poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!last_poll_cycles_start) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = cb->size; if (cycle_iters > iters) cycle_iters = iters; microtime(&start_tv); while (scnt < iters || ccnt < iters || rcnt < iters) { /* Wait till buffer changes. */ if (rcnt < iters && !(scnt < 1 && !cb->server)) { ++rcnt; while (*poll_buf != (char)rcnt) { if (cb->state == ERROR) { PRINTF(cb, "state = ERROR, bailing\n"); return; } } } if (scnt < iters) { struct ib_send_wr *bad_wr; *buf = (char)scnt+1; if (scnt < cycle_iters) post_cycles_start[scnt] = get_cycles(); if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { PRINTF(cb, "Couldn't post send: scnt=%d\n", scnt); return; } if (scnt < cycle_iters) post_cycles_stop[scnt] = get_cycles(); scnt++; } if (ccnt < iters) { struct ib_wc wc; int ne; if (ccnt < cycle_iters) poll_cycles_start[ccnt] = get_cycles(); do { if (ccnt < cycle_iters) last_poll_cycles_start[ccnt] = get_cycles(); ne = ib_poll_cq(cb->cq, 1, &wc); } while (ne == 0); if (ccnt < cycle_iters) poll_cycles_stop[ccnt] = get_cycles(); ++ccnt; if (ne < 0) { PRINTF(cb, "poll CQ failed %d\n", ne); return; } if (wc.status != IB_WC_SUCCESS) { PRINTF(cb, "Completion wth error at %s:\n", cb->server ? "server" : "client"); PRINTF(cb, "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); PRINTF(cb, "scnt=%d, rcnt=%d, ccnt=%d\n", scnt, rcnt, ccnt); return; } } } microtime(&stop_tv); if (stop_tv.tv_usec < start_tv.tv_usec) { stop_tv.tv_usec += 1000000; stop_tv.tv_sec -= 1; } for (i=0; i < cycle_iters; i++) { sum_post += post_cycles_stop[i] - post_cycles_start[i]; sum_poll += poll_cycles_stop[i] - poll_cycles_start[i]; sum_last_poll += poll_cycles_stop[i]-last_poll_cycles_start[i]; } PRINTF(cb, "delta sec %lu delta usec %lu iter %d size %d cycle_iters %d" " sum_post %llu sum_poll %llu sum_last_poll %llu\n", stop_tv.tv_sec - start_tv.tv_sec, stop_tv.tv_usec - start_tv.tv_usec, scnt, cb->size, cycle_iters, (unsigned long long)sum_post, (unsigned long long)sum_poll, (unsigned long long)sum_last_poll); kfree(post_cycles_start); kfree(post_cycles_stop); kfree(poll_cycles_start); kfree(poll_cycles_stop); kfree(last_poll_cycles_start); } static void bw_test(struct krping_cb *cb) { int ccnt, scnt, rcnt; int iters=cb->count; struct timeval start_tv, stop_tv; cycles_t *post_cycles_start, *post_cycles_stop; cycles_t *poll_cycles_start, *poll_cycles_stop; cycles_t *last_poll_cycles_start; cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0; int i; int cycle_iters = 1000; ccnt = 0; scnt = 0; rcnt = 0; post_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!post_cycles_start) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } post_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!post_cycles_stop) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!poll_cycles_start) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } poll_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!poll_cycles_stop) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } last_poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!last_poll_cycles_start) { PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = cb->size; if (cycle_iters > iters) cycle_iters = iters; microtime(&start_tv); while (scnt < iters || ccnt < iters) { while (scnt < iters && scnt - ccnt < cb->txdepth) { struct ib_send_wr *bad_wr; if (scnt < cycle_iters) post_cycles_start[scnt] = get_cycles(); if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { PRINTF(cb, "Couldn't post send: scnt=%d\n", scnt); return; } if (scnt < cycle_iters) post_cycles_stop[scnt] = get_cycles(); ++scnt; } if (ccnt < iters) { int ne; struct ib_wc wc; if (ccnt < cycle_iters) poll_cycles_start[ccnt] = get_cycles(); do { if (ccnt < cycle_iters) last_poll_cycles_start[ccnt] = get_cycles(); ne = ib_poll_cq(cb->cq, 1, &wc); } while (ne == 0); if (ccnt < cycle_iters) poll_cycles_stop[ccnt] = get_cycles(); ccnt += 1; if (ne < 0) { PRINTF(cb, "poll CQ failed %d\n", ne); return; } if (wc.status != IB_WC_SUCCESS) { PRINTF(cb, "Completion wth error at %s:\n", cb->server ? "server" : "client"); PRINTF(cb, "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); return; } } } microtime(&stop_tv); if (stop_tv.tv_usec < start_tv.tv_usec) { stop_tv.tv_usec += 1000000; stop_tv.tv_sec -= 1; } for (i=0; i < cycle_iters; i++) { sum_post += post_cycles_stop[i] - post_cycles_start[i]; sum_poll += poll_cycles_stop[i] - poll_cycles_start[i]; sum_last_poll += poll_cycles_stop[i]-last_poll_cycles_start[i]; } PRINTF(cb, "delta sec %lu delta usec %lu iter %d size %d cycle_iters %d" " sum_post %llu sum_poll %llu sum_last_poll %llu\n", stop_tv.tv_sec - start_tv.tv_sec, stop_tv.tv_usec - start_tv.tv_usec, scnt, cb->size, cycle_iters, (unsigned long long)sum_post, (unsigned long long)sum_poll, (unsigned long long)sum_last_poll); kfree(post_cycles_start); kfree(post_cycles_stop); kfree(poll_cycles_start); kfree(poll_cycles_stop); kfree(last_poll_cycles_start); } static void krping_rlat_test_server(struct krping_cb *cb) { struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; /* Spin waiting for client's Start STAG/TO/Len */ while (cb->state < RDMA_READ_ADV) { krping_cq_event_handler(cb->cq, cb); } /* Send STAG/TO/Len to client */ krping_format_send(cb, cb->start_dma_addr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { PRINTF(cb, "poll error %d\n", ret); return; } if (wc.status) { PRINTF(cb, "send completiong error %d\n", wc.status); return; } wait_event_interruptible(cb->sem, cb->state == ERROR); } static void krping_wlat_test_server(struct krping_cb *cb) { struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; /* Spin waiting for client's Start STAG/TO/Len */ while (cb->state < RDMA_READ_ADV) { krping_cq_event_handler(cb->cq, cb); } /* Send STAG/TO/Len to client */ krping_format_send(cb, cb->start_dma_addr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { PRINTF(cb, "poll error %d\n", ret); return; } if (wc.status) { PRINTF(cb, "send completiong error %d\n", wc.status); return; } wlat_test(cb); wait_event_interruptible(cb->sem, cb->state == ERROR); } static void krping_bw_test_server(struct krping_cb *cb) { struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; /* Spin waiting for client's Start STAG/TO/Len */ while (cb->state < RDMA_READ_ADV) { krping_cq_event_handler(cb->cq, cb); } /* Send STAG/TO/Len to client */ krping_format_send(cb, cb->start_dma_addr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { PRINTF(cb, "poll error %d\n", ret); return; } if (wc.status) { PRINTF(cb, "send completiong error %d\n", wc.status); return; } if (cb->duplex) bw_test(cb); wait_event_interruptible(cb->sem, cb->state == ERROR); } static int fastreg_supported(struct krping_cb *cb) { struct ib_device *dev = cb->child_cm_id->device; struct ib_device_attr attr; int ret; ret = ib_query_device(dev, &attr); if (ret) { PRINTF(cb, "ib_query_device failed ret %d\n", ret); return 0; } if (!(attr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) { PRINTF(cb, "Fastreg not supported - device_cap_flags 0x%x\n", attr.device_cap_flags); return 0; } DEBUG_LOG(cb, "Fastreg supported - device_cap_flags 0x%x\n", attr.device_cap_flags); return 1; } static int krping_bind_server(struct krping_cb *cb) { struct sockaddr_in sin; int ret; memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof sin; sin.sin_family = AF_INET; sin.sin_addr.s_addr = cb->addr.s_addr; sin.sin_port = cb->port; ret = rdma_bind_addr(cb->cm_id, (struct sockaddr *) &sin); if (ret) { PRINTF(cb, "rdma_bind_addr error %d\n", ret); return ret; } DEBUG_LOG(cb, "rdma_bind_addr successful\n"); DEBUG_LOG(cb, "rdma_listen\n"); ret = rdma_listen(cb->cm_id, 3); if (ret) { PRINTF(cb, "rdma_listen failed: %d\n", ret); return ret; } wait_event_interruptible(cb->sem, cb->state >= CONNECT_REQUEST); if (cb->state != CONNECT_REQUEST) { PRINTF(cb, "wait for CONNECT_REQUEST state %d\n", cb->state); return -1; } if (cb->mem == FASTREG && !fastreg_supported(cb)) return -EINVAL; return 0; } static void krping_run_server(struct krping_cb *cb) { struct ib_recv_wr *bad_wr; int ret; ret = krping_bind_server(cb); if (ret) return; ret = krping_setup_qp(cb, cb->child_cm_id); if (ret) { PRINTF(cb, "setup_qp failed: %d\n", ret); goto err0; } ret = krping_setup_buffers(cb); if (ret) { PRINTF(cb, "krping_setup_buffers failed: %d\n", ret); goto err1; } ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); if (ret) { PRINTF(cb, "ib_post_recv failed: %d\n", ret); goto err2; } ret = krping_accept(cb); if (ret) { PRINTF(cb, "connect error %d\n", ret); goto err2; } if (cb->wlat) krping_wlat_test_server(cb); else if (cb->rlat) krping_rlat_test_server(cb); else if (cb->bw) krping_bw_test_server(cb); else krping_test_server(cb); rdma_disconnect(cb->child_cm_id); err2: krping_free_buffers(cb); err1: krping_free_qp(cb); err0: rdma_destroy_id(cb->child_cm_id); } static void krping_test_client(struct krping_cb *cb) { int ping, start, cc, i, ret; struct ib_send_wr *bad_wr; unsigned char c; start = 65; for (ping = 0; !cb->count || ping < cb->count; ping++) { cb->state = RDMA_READ_ADV; /* Put some ascii text in the buffer. */ cc = sprintf(cb->start_buf, "rdma-ping-%d: ", ping); for (i = cc, c = start; i < cb->size; i++) { cb->start_buf[i] = c; c++; if (c > 122) c = 65; } start++; if (start > 122) start = 65; cb->start_buf[cb->size - 1] = 0; krping_format_send(cb, cb->start_dma_addr); if (cb->state == ERROR) { PRINTF(cb, "krping_format_send failed\n"); break; } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); break; } /* Wait for server to ACK */ wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV); if (cb->state != RDMA_WRITE_ADV) { PRINTF(cb, "wait for RDMA_WRITE_ADV state %d\n", cb->state); break; } krping_format_send(cb, cb->rdma_dma_addr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); break; } /* Wait for the server to say the RDMA Write is complete. */ wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_COMPLETE); if (cb->state != RDMA_WRITE_COMPLETE) { PRINTF(cb, "wait for RDMA_WRITE_COMPLETE state %d\n", cb->state); break; } if (cb->validate) if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) { PRINTF(cb, "data mismatch!\n"); break; } if (cb->verbose) PRINTF(cb, "ping data: %s\n", cb->rdma_buf); #ifdef SLOW_KRPING wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ); #endif } } static void krping_rlat_test_client(struct krping_cb *cb) { struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; cb->state = RDMA_READ_ADV; /* Send STAG/TO/Len to client */ krping_format_send(cb, cb->start_dma_addr); if (cb->state == ERROR) { PRINTF(cb, "krping_format_send failed\n"); return; } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { PRINTF(cb, "poll error %d\n", ret); return; } if (wc.status) { PRINTF(cb, "send completion error %d\n", wc.status); return; } /* Spin waiting for server's Start STAG/TO/Len */ while (cb->state < RDMA_WRITE_ADV) { krping_cq_event_handler(cb->cq, cb); } #if 0 { int i; struct timeval start, stop; time_t sec; suseconds_t usec; unsigned long long elapsed; struct ib_wc wc; struct ib_send_wr *bad_wr; int ne; cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = 0; cb->rdma_sq_wr.num_sge = 0; microtime(&start); for (i=0; i < 100000; i++) { if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { PRINTF(cb, "Couldn't post send\n"); return; } do { ne = ib_poll_cq(cb->cq, 1, &wc); } while (ne == 0); if (ne < 0) { PRINTF(cb, "poll CQ failed %d\n", ne); return; } if (wc.status != IB_WC_SUCCESS) { PRINTF(cb, "Completion wth error at %s:\n", cb->server ? "server" : "client"); PRINTF(cb, "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); return; } } microtime(&stop); if (stop.tv_usec < start.tv_usec) { stop.tv_usec += 1000000; stop.tv_sec -= 1; } sec = stop.tv_sec - start.tv_sec; usec = stop.tv_usec - start.tv_usec; elapsed = sec * 1000000 + usec; PRINTF(cb, "0B-write-lat iters 100000 usec %llu\n", elapsed); } #endif rlat_test(cb); } static void krping_wlat_test_client(struct krping_cb *cb) { struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; cb->state = RDMA_READ_ADV; /* Send STAG/TO/Len to client */ krping_format_send(cb, cb->start_dma_addr); if (cb->state == ERROR) { PRINTF(cb, "krping_format_send failed\n"); return; } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { PRINTF(cb, "poll error %d\n", ret); return; } if (wc.status) { PRINTF(cb, "send completion error %d\n", wc.status); return; } /* Spin waiting for server's Start STAG/TO/Len */ while (cb->state < RDMA_WRITE_ADV) { krping_cq_event_handler(cb->cq, cb); } wlat_test(cb); } static void krping_bw_test_client(struct krping_cb *cb) { struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; cb->state = RDMA_READ_ADV; /* Send STAG/TO/Len to client */ krping_format_send(cb, cb->start_dma_addr); if (cb->state == ERROR) { PRINTF(cb, "krping_format_send failed\n"); return; } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { PRINTF(cb, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { PRINTF(cb, "poll error %d\n", ret); return; } if (wc.status) { PRINTF(cb, "send completion error %d\n", wc.status); return; } /* Spin waiting for server's Start STAG/TO/Len */ while (cb->state < RDMA_WRITE_ADV) { krping_cq_event_handler(cb->cq, cb); } bw_test(cb); } static void krping_fr_test(struct krping_cb *cb) { struct ib_fast_reg_page_list *pl; struct ib_send_wr fr, inv, *bad; struct ib_wc wc; u8 key = 0; struct ib_mr *mr; int i; int ret; int size = cb->size; int plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT; time_t start; int count = 0; int scnt = 0; pl = ib_alloc_fast_reg_page_list(cb->qp->device, plen); if (IS_ERR(pl)) { PRINTF(cb, "ib_alloc_fast_reg_page_list failed %ld\n", PTR_ERR(pl)); return; } mr = ib_alloc_fast_reg_mr(cb->pd, plen); if (IS_ERR(mr)) { PRINTF(cb, "ib_alloc_fast_reg_mr failed %ld\n", PTR_ERR(pl)); goto err1; } for (i=0; ipage_list[i] = 0xcafebabe | i; memset(&fr, 0, sizeof fr); fr.opcode = IB_WR_FAST_REG_MR; fr.wr.fast_reg.page_shift = PAGE_SHIFT; fr.wr.fast_reg.length = size; fr.wr.fast_reg.page_list = pl; fr.wr.fast_reg.page_list_len = plen; fr.wr.fast_reg.iova_start = 0; fr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE; fr.next = &inv; memset(&inv, 0, sizeof inv); inv.opcode = IB_WR_LOCAL_INV; inv.send_flags = IB_SEND_SIGNALED; DEBUG_LOG(cb, "fr_test: stag index 0x%x plen %u size %u depth %u\n", mr->rkey >> 8, plen, cb->size, cb->txdepth); start = time_uptime; while (1) { if ((time_uptime - start) >= 9) { DEBUG_LOG(cb, "fr_test: pausing 1 second! count %u latest size %u plen %u\n", count, size, plen); wait_event_interruptible(cb->sem, cb->state == ERROR); if (cb->state == ERROR) break; start = time_uptime; } while (scnt < (cb->txdepth>>1)) { ib_update_fast_reg_key(mr, ++key); fr.wr.fast_reg.rkey = mr->rkey; inv.ex.invalidate_rkey = mr->rkey; size = arc4random() % cb->size; if (size == 0) size = cb->size; plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT; fr.wr.fast_reg.length = size; fr.wr.fast_reg.page_list_len = plen; ret = ib_post_send(cb->qp, &fr, &bad); if (ret) { PRINTF(cb, "ib_post_send failed %d\n", ret); goto err2; } scnt++; } do { ret = ib_poll_cq(cb->cq, 1, &wc); if (ret < 0) { PRINTF(cb, "ib_poll_cq failed %d\n", ret); goto err2; } if (ret == 1) { if (wc.status) { PRINTF(cb, "completion error %u\n", wc.status); goto err2; } count++; scnt--; } else if (krping_sigpending()) { PRINTF(cb, "signal!\n"); goto err2; } } while (ret == 1); } err2: #if 0 DEBUG_LOG(cb, "sleeping 1 second\n"); wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ); #endif DEBUG_LOG(cb, "draining the cq...\n"); do { ret = ib_poll_cq(cb->cq, 1, &wc); if (ret < 0) { PRINTF(cb, "ib_poll_cq failed %d\n", ret); break; } if (ret == 1) { if (wc.status) { PRINTF(cb, "completion error %u opcode %u\n", wc.status, wc.opcode); } } } while (ret == 1); DEBUG_LOG(cb, "fr_test: done!\n"); ib_dereg_mr(mr); err1: ib_free_fast_reg_page_list(pl); } static int krping_connect_client(struct krping_cb *cb) { struct rdma_conn_param conn_param; int ret; memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 1; conn_param.initiator_depth = 1; conn_param.retry_count = 10; ret = rdma_connect(cb->cm_id, &conn_param); if (ret) { PRINTF(cb, "rdma_connect error %d\n", ret); return ret; } wait_event_interruptible(cb->sem, cb->state >= CONNECTED); if (cb->state == ERROR) { PRINTF(cb, "wait for CONNECTED state %d\n", cb->state); return -1; } DEBUG_LOG(cb, "rdma_connect successful\n"); return 0; } static int krping_bind_client(struct krping_cb *cb) { struct sockaddr_in sin; int ret; memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof sin; sin.sin_family = AF_INET; sin.sin_addr.s_addr = cb->addr.s_addr; sin.sin_port = cb->port; ret = rdma_resolve_addr(cb->cm_id, NULL, (struct sockaddr *) &sin, 2000); if (ret) { PRINTF(cb, "rdma_resolve_addr error %d\n", ret); return ret; } wait_event_interruptible(cb->sem, cb->state >= ROUTE_RESOLVED); if (cb->state != ROUTE_RESOLVED) { PRINTF(cb, "addr/route resolution did not resolve: state %d\n", cb->state); return -EINTR; } if (cb->mem == FASTREG && !fastreg_supported(cb)) return -EINVAL; DEBUG_LOG(cb, "rdma_resolve_addr - rdma_resolve_route successful\n"); return 0; } static void krping_run_client(struct krping_cb *cb) { struct ib_recv_wr *bad_wr; int ret; ret = krping_bind_client(cb); if (ret) return; ret = krping_setup_qp(cb, cb->cm_id); if (ret) { PRINTF(cb, "setup_qp failed: %d\n", ret); return; } ret = krping_setup_buffers(cb); if (ret) { PRINTF(cb, "krping_setup_buffers failed: %d\n", ret); goto err1; } ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); if (ret) { PRINTF(cb, "ib_post_recv failed: %d\n", ret); goto err2; } ret = krping_connect_client(cb); if (ret) { PRINTF(cb, "connect error %d\n", ret); goto err2; } if (cb->wlat) krping_wlat_test_client(cb); else if (cb->rlat) krping_rlat_test_client(cb); else if (cb->bw) krping_bw_test_client(cb); else if (cb->frtest) krping_fr_test(cb); else krping_test_client(cb); rdma_disconnect(cb->cm_id); err2: krping_free_buffers(cb); err1: krping_free_qp(cb); } int krping_doit(char *cmd, void *cookie) { struct krping_cb *cb; int op; int ret = 0; char *optarg; unsigned long optint; cb = kzalloc(sizeof(*cb), GFP_KERNEL); if (!cb) return -ENOMEM; mutex_lock(&krping_mutex); list_add_tail(&cb->list, &krping_cbs); mutex_unlock(&krping_mutex); cb->cookie = cookie; cb->server = -1; cb->state = IDLE; cb->size = 64; cb->txdepth = RPING_SQ_DEPTH; cb->mem = DMA; init_waitqueue_head(&cb->sem); while ((op = krping_getopt("krping", &cmd, krping_opts, NULL, &optarg, &optint)) != 0) { switch (op) { case 'a': cb->addr_str = optarg; DEBUG_LOG(cb, "ipaddr (%s)\n", optarg); if (!inet_aton(optarg, &cb->addr)) { PRINTF(cb, "bad addr string %s\n", optarg); ret = EINVAL; } break; case 'p': cb->port = htons(optint); DEBUG_LOG(cb, "port %d\n", (int)optint); break; case 'P': cb->poll = 1; DEBUG_LOG(cb, "server\n"); break; case 's': cb->server = 1; DEBUG_LOG(cb, "server\n"); break; case 'c': cb->server = 0; DEBUG_LOG(cb, "client\n"); break; case 'S': cb->size = optint; if ((cb->size < 1) || (cb->size > RPING_BUFSIZE)) { PRINTF(cb, "Invalid size %d " "(valid range is 1 to %d)\n", cb->size, RPING_BUFSIZE); ret = EINVAL; } else DEBUG_LOG(cb, "size %d\n", (int)optint); break; case 'C': cb->count = optint; if (cb->count < 0) { PRINTF(cb, "Invalid count %d\n", cb->count); ret = EINVAL; } else DEBUG_LOG(cb, "count %d\n", (int) cb->count); break; case 'v': cb->verbose++; DEBUG_LOG(cb, "verbose\n"); break; case 'V': cb->validate++; DEBUG_LOG(cb, "validate data\n"); break; case 'l': cb->wlat++; break; case 'L': cb->rlat++; break; case 'B': cb->bw++; break; case 'd': cb->duplex++; break; case 'm': if (!strncmp(optarg, "dma", 3)) cb->mem = DMA; else if (!strncmp(optarg, "fastreg", 7)) cb->mem = FASTREG; else if (!strncmp(optarg, "mw", 2)) cb->mem = MW; else if (!strncmp(optarg, "mr", 2)) cb->mem = MR; else { PRINTF(cb, "unknown mem mode %s. " "Must be dma, fastreg, mw, or mr\n", optarg); ret = -EINVAL; break; } break; case 'I': cb->server_invalidate = 1; break; case 'T': cb->txdepth = optint; DEBUG_LOG(cb, "txdepth %d\n", (int) cb->txdepth); break; case 'Z': cb->local_dma_lkey = 1; DEBUG_LOG(cb, "using local dma lkey\n"); break; case 'R': cb->read_inv = 1; DEBUG_LOG(cb, "using read-with-inv\n"); break; case 'f': cb->frtest = 1; DEBUG_LOG(cb, "fast-reg test!\n"); break; default: PRINTF(cb, "unknown opt %s\n", optarg); ret = -EINVAL; break; } } if (ret) goto out; if (cb->server == -1) { PRINTF(cb, "must be either client or server\n"); ret = -EINVAL; goto out; } if (cb->server && cb->frtest) { PRINTF(cb, "must be client to run frtest\n"); ret = -EINVAL; goto out; } if ((cb->frtest + cb->bw + cb->rlat + cb->wlat) > 1) { PRINTF(cb, "Pick only one test: fr, bw, rlat, wlat\n"); ret = -EINVAL; goto out; } if (cb->server_invalidate && cb->mem != FASTREG) { PRINTF(cb, "server_invalidate only valid with fastreg mem_mode\n"); ret = -EINVAL; goto out; } if (cb->read_inv && cb->mem != FASTREG) { PRINTF(cb, "read_inv only valid with fastreg mem_mode\n"); ret = -EINVAL; goto out; } if (cb->mem != MR && (cb->wlat || cb->rlat || cb->bw)) { PRINTF(cb, "wlat, rlat, and bw tests only support mem_mode MR\n"); ret = -EINVAL; goto out; } cb->cm_id = rdma_create_id(krping_cma_event_handler, cb, RDMA_PS_TCP); if (IS_ERR(cb->cm_id)) { ret = PTR_ERR(cb->cm_id); PRINTF(cb, "rdma_create_id error %d\n", ret); goto out; } DEBUG_LOG(cb, "created cm_id %p\n", cb->cm_id); if (cb->server) krping_run_server(cb); else krping_run_client(cb); DEBUG_LOG(cb, "destroy cm_id %p\n", cb->cm_id); rdma_destroy_id(cb->cm_id); out: mutex_lock(&krping_mutex); list_del(&cb->list); mutex_unlock(&krping_mutex); kfree(cb); return ret; } void krping_walk_cb_list(void (*f)(struct krping_stats *, void *), void *arg) { struct krping_cb *cb; mutex_lock(&krping_mutex); list_for_each_entry(cb, &krping_cbs, list) (*f)(cb->pd ? &cb->stats : NULL, arg); mutex_unlock(&krping_mutex); } void krping_init(void) { mutex_init(&krping_mutex); } Index: stable/10/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h =================================================================== --- stable/10/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h (revision 273245) +++ stable/10/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h (revision 273246) @@ -1,1046 +1,1045 @@ /* * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * $FreeBSD$ */ #ifndef __IW_CXGB4_H__ #define __IW_CXGB4_H__ #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #undef prefetch #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_tcb.h" #include "t4_l2t.h" #define DRV_NAME "iw_cxgbe" #define MOD DRV_NAME ":" #define KTR_IW_CXGBE KTR_SPARE3 extern int c4iw_debug; #define PDBG(fmt, args...) \ do { \ if (c4iw_debug) \ printf(MOD fmt, ## args); \ } while (0) #include "t4.h" static inline void *cplhdr(struct mbuf *m) { return mtod(m, void*); } #define PBL_OFF(rdev_p, a) ((a) - (rdev_p)->adap->vres.pbl.start) #define RQT_OFF(rdev_p, a) ((a) - (rdev_p)->adap->vres.rq.start) #define C4IW_ID_TABLE_F_RANDOM 1 /* Pseudo-randomize the id's returned */ #define C4IW_ID_TABLE_F_EMPTY 2 /* Table is initially empty */ struct c4iw_id_table { u32 flags; u32 start; /* logical minimal id */ u32 last; /* hint for find */ u32 max; spinlock_t lock; unsigned long *table; }; struct c4iw_resource { struct c4iw_id_table tpt_table; struct c4iw_id_table qid_table; struct c4iw_id_table pdid_table; }; struct c4iw_qid_list { struct list_head entry; u32 qid; }; struct c4iw_dev_ucontext { struct list_head qpids; struct list_head cqids; struct mutex lock; }; enum c4iw_rdev_flags { T4_FATAL_ERROR = (1<<0), }; struct c4iw_stat { u64 total; u64 cur; u64 max; u64 fail; }; struct c4iw_stats { struct mutex lock; struct c4iw_stat qid; struct c4iw_stat pd; struct c4iw_stat stag; struct c4iw_stat pbl; struct c4iw_stat rqt; u64 db_full; u64 db_empty; u64 db_drop; u64 db_state_transitions; }; struct c4iw_rdev { struct adapter *adap; struct c4iw_resource resource; unsigned long qpshift; u32 qpmask; unsigned long cqshift; u32 cqmask; struct c4iw_dev_ucontext uctx; struct gen_pool *pbl_pool; struct gen_pool *rqt_pool; u32 flags; struct c4iw_stats stats; }; static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) { return rdev->flags & T4_FATAL_ERROR; } static inline int c4iw_num_stags(struct c4iw_rdev *rdev) { return min((int)T4_MAX_NUM_STAG, (int)(rdev->adap->vres.stag.size >> 5)); } #define C4IW_WR_TO (10*HZ) struct c4iw_wr_wait { int ret; atomic_t completion; }; static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp) { wr_waitp->ret = 0; atomic_set(&wr_waitp->completion, 0); } static inline void c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret) { wr_waitp->ret = ret; atomic_set(&wr_waitp->completion, 1); wakeup(wr_waitp); } static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, struct c4iw_wr_wait *wr_waitp, u32 hwtid, u32 qpid, const char *func) { struct adapter *sc = rdev->adap; unsigned to = C4IW_WR_TO; while (!atomic_read(&wr_waitp->completion)) { tsleep(wr_waitp, 0, "c4iw_wait", to); if (SIGPENDING(curthread)) { printf("%s - Device %s not responding - " "tid %u qpid %u\n", func, device_get_nameunit(sc->dev), hwtid, qpid); if (c4iw_fatal_error(rdev)) { wr_waitp->ret = -EIO; break; } to = to << 2; } } if (wr_waitp->ret) CTR4(KTR_IW_CXGBE, "%s: FW reply %d tid %u qpid %u", device_get_nameunit(sc->dev), wr_waitp->ret, hwtid, qpid); return (wr_waitp->ret); } enum db_state { NORMAL = 0, FLOW_CONTROL = 1, RECOVERY = 2 }; struct c4iw_dev { struct ib_device ibdev; struct c4iw_rdev rdev; u32 device_cap_flags; struct idr cqidr; struct idr qpidr; struct idr mmidr; spinlock_t lock; struct dentry *debugfs_root; enum db_state db_state; int qpcnt; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) { return container_of(ibdev, struct c4iw_dev, ibdev); } static inline struct c4iw_dev *rdev_to_c4iw_dev(struct c4iw_rdev *rdev) { return container_of(rdev, struct c4iw_dev, rdev); } static inline struct c4iw_cq *get_chp(struct c4iw_dev *rhp, u32 cqid) { return idr_find(&rhp->cqidr, cqid); } static inline struct c4iw_qp *get_qhp(struct c4iw_dev *rhp, u32 qpid) { return idr_find(&rhp->qpidr, qpid); } static inline struct c4iw_mr *get_mhp(struct c4iw_dev *rhp, u32 mmid) { return idr_find(&rhp->mmidr, mmid); } static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr, void *handle, u32 id, int lock) { int ret; int newid; do { if (!idr_pre_get(idr, lock ? GFP_KERNEL : GFP_ATOMIC)) return -ENOMEM; if (lock) spin_lock_irq(&rhp->lock); ret = idr_get_new_above(idr, handle, id, &newid); BUG_ON(!ret && newid != id); if (lock) spin_unlock_irq(&rhp->lock); } while (ret == -EAGAIN); return ret; } static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr, void *handle, u32 id) { return _insert_handle(rhp, idr, handle, id, 1); } static inline int insert_handle_nolock(struct c4iw_dev *rhp, struct idr *idr, void *handle, u32 id) { return _insert_handle(rhp, idr, handle, id, 0); } static inline void _remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id, int lock) { if (lock) spin_lock_irq(&rhp->lock); idr_remove(idr, id); if (lock) spin_unlock_irq(&rhp->lock); } static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id) { _remove_handle(rhp, idr, id, 1); } static inline void remove_handle_nolock(struct c4iw_dev *rhp, struct idr *idr, u32 id) { _remove_handle(rhp, idr, id, 0); } struct c4iw_pd { struct ib_pd ibpd; u32 pdid; struct c4iw_dev *rhp; }; static inline struct c4iw_pd *to_c4iw_pd(struct ib_pd *ibpd) { return container_of(ibpd, struct c4iw_pd, ibpd); } struct tpt_attributes { u64 len; u64 va_fbo; enum fw_ri_mem_perms perms; u32 stag; u32 pdid; u32 qpid; u32 pbl_addr; u32 pbl_size; u32 state:1; u32 type:2; u32 rsvd:1; u32 remote_invaliate_disable:1; u32 zbva:1; u32 mw_bind_enable:1; u32 page_size:5; }; struct c4iw_mr { struct ib_mr ibmr; struct ib_umem *umem; struct c4iw_dev *rhp; u64 kva; struct tpt_attributes attr; }; static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr) { return container_of(ibmr, struct c4iw_mr, ibmr); } struct c4iw_mw { struct ib_mw ibmw; struct c4iw_dev *rhp; u64 kva; struct tpt_attributes attr; }; static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw) { return container_of(ibmw, struct c4iw_mw, ibmw); } struct c4iw_fr_page_list { struct ib_fast_reg_page_list ibpl; DECLARE_PCI_UNMAP_ADDR(mapping); dma_addr_t dma_addr; struct c4iw_dev *dev; int size; }; static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list( struct ib_fast_reg_page_list *ibpl) { return container_of(ibpl, struct c4iw_fr_page_list, ibpl); } struct c4iw_cq { struct ib_cq ibcq; struct c4iw_dev *rhp; struct t4_cq cq; spinlock_t lock; spinlock_t comp_handler_lock; atomic_t refcnt; wait_queue_head_t wait; }; static inline struct c4iw_cq *to_c4iw_cq(struct ib_cq *ibcq) { return container_of(ibcq, struct c4iw_cq, ibcq); } struct c4iw_mpa_attributes { u8 initiator; u8 recv_marker_enabled; u8 xmit_marker_enabled; u8 crc_enabled; u8 enhanced_rdma_conn; u8 version; u8 p2p_type; }; struct c4iw_qp_attributes { u32 scq; u32 rcq; u32 sq_num_entries; u32 rq_num_entries; u32 sq_max_sges; u32 sq_max_sges_rdma_write; u32 rq_max_sges; u32 state; u8 enable_rdma_read; u8 enable_rdma_write; u8 enable_bind; u8 enable_mmid0_fastreg; u32 max_ord; u32 max_ird; u32 pd; u32 next_state; char terminate_buffer[52]; u32 terminate_msg_len; u8 is_terminate_local; struct c4iw_mpa_attributes mpa_attr; struct c4iw_ep *llp_stream_handle; u8 layer_etype; u8 ecode; u16 sq_db_inc; u16 rq_db_inc; }; struct c4iw_qp { struct ib_qp ibqp; struct c4iw_dev *rhp; struct c4iw_ep *ep; struct c4iw_qp_attributes attr; struct t4_wq wq; spinlock_t lock; struct mutex mutex; atomic_t refcnt; wait_queue_head_t wait; struct timer_list timer; }; static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) { return container_of(ibqp, struct c4iw_qp, ibqp); } struct c4iw_ucontext { struct ib_ucontext ibucontext; struct c4iw_dev_ucontext uctx; u32 key; spinlock_t mmap_lock; struct list_head mmaps; }; static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) { return container_of(c, struct c4iw_ucontext, ibucontext); } struct c4iw_mm_entry { struct list_head entry; u64 addr; u32 key; unsigned len; }; static inline struct c4iw_mm_entry *remove_mmap(struct c4iw_ucontext *ucontext, u32 key, unsigned len) { struct list_head *pos, *nxt; struct c4iw_mm_entry *mm; spin_lock(&ucontext->mmap_lock); list_for_each_safe(pos, nxt, &ucontext->mmaps) { mm = list_entry(pos, struct c4iw_mm_entry, entry); if (mm->key == key && mm->len == len) { list_del_init(&mm->entry); spin_unlock(&ucontext->mmap_lock); CTR4(KTR_IW_CXGBE, "%s key 0x%x addr 0x%llx len %d", __func__, key, (unsigned long long) mm->addr, mm->len); return mm; } } spin_unlock(&ucontext->mmap_lock); return NULL; } static inline void insert_mmap(struct c4iw_ucontext *ucontext, struct c4iw_mm_entry *mm) { spin_lock(&ucontext->mmap_lock); CTR4(KTR_IW_CXGBE, "%s key 0x%x addr 0x%llx len %d", __func__, mm->key, (unsigned long long) mm->addr, mm->len); list_add_tail(&mm->entry, &ucontext->mmaps); spin_unlock(&ucontext->mmap_lock); } enum c4iw_qp_attr_mask { C4IW_QP_ATTR_NEXT_STATE = 1 << 0, C4IW_QP_ATTR_SQ_DB = 1<<1, C4IW_QP_ATTR_RQ_DB = 1<<2, C4IW_QP_ATTR_ENABLE_RDMA_READ = 1 << 7, C4IW_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8, C4IW_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9, C4IW_QP_ATTR_MAX_ORD = 1 << 11, C4IW_QP_ATTR_MAX_IRD = 1 << 12, C4IW_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22, C4IW_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23, C4IW_QP_ATTR_MPA_ATTR = 1 << 24, C4IW_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25, C4IW_QP_ATTR_VALID_MODIFY = (C4IW_QP_ATTR_ENABLE_RDMA_READ | C4IW_QP_ATTR_ENABLE_RDMA_WRITE | C4IW_QP_ATTR_MAX_ORD | C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_STREAM_MSG_BUFFER | C4IW_QP_ATTR_MPA_ATTR | C4IW_QP_ATTR_QP_CONTEXT_ACTIVATE) }; int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, enum c4iw_qp_attr_mask mask, struct c4iw_qp_attributes *attrs, int internal); enum c4iw_qp_state { C4IW_QP_STATE_IDLE, C4IW_QP_STATE_RTS, C4IW_QP_STATE_ERROR, C4IW_QP_STATE_TERMINATE, C4IW_QP_STATE_CLOSING, C4IW_QP_STATE_TOT }; static inline int c4iw_convert_state(enum ib_qp_state ib_state) { switch (ib_state) { case IB_QPS_RESET: case IB_QPS_INIT: return C4IW_QP_STATE_IDLE; case IB_QPS_RTS: return C4IW_QP_STATE_RTS; case IB_QPS_SQD: return C4IW_QP_STATE_CLOSING; case IB_QPS_SQE: return C4IW_QP_STATE_TERMINATE; case IB_QPS_ERR: return C4IW_QP_STATE_ERROR; default: return -1; } } static inline int to_ib_qp_state(int c4iw_qp_state) { switch (c4iw_qp_state) { case C4IW_QP_STATE_IDLE: return IB_QPS_INIT; case C4IW_QP_STATE_RTS: return IB_QPS_RTS; case C4IW_QP_STATE_CLOSING: return IB_QPS_SQD; case C4IW_QP_STATE_TERMINATE: return IB_QPS_SQE; case C4IW_QP_STATE_ERROR: return IB_QPS_ERR; } return IB_QPS_ERR; } static inline u32 c4iw_ib_to_tpt_access(int a) { return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | (a & IB_ACCESS_REMOTE_READ ? FW_RI_MEM_ACCESS_REM_READ : 0) | (a & IB_ACCESS_LOCAL_WRITE ? FW_RI_MEM_ACCESS_LOCAL_WRITE : 0) | FW_RI_MEM_ACCESS_LOCAL_READ; } static inline u32 c4iw_ib_to_tpt_bind_access(int acc) { return (acc & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? FW_RI_MEM_ACCESS_REM_READ : 0); } enum c4iw_mmid_state { C4IW_STAG_STATE_VALID, C4IW_STAG_STATE_INVALID }; #define C4IW_NODE_DESC "iw_cxgbe Chelsio Communications" #define MPA_KEY_REQ "MPA ID Req Frame" #define MPA_KEY_REP "MPA ID Rep Frame" #define MPA_MAX_PRIVATE_DATA 256 #define MPA_ENHANCED_RDMA_CONN 0x10 #define MPA_REJECT 0x20 #define MPA_CRC 0x40 #define MPA_MARKERS 0x80 #define MPA_FLAGS_MASK 0xE0 #define MPA_V2_PEER2PEER_MODEL 0x8000 #define MPA_V2_ZERO_LEN_FPDU_RTR 0x4000 #define MPA_V2_RDMA_WRITE_RTR 0x8000 #define MPA_V2_RDMA_READ_RTR 0x4000 #define MPA_V2_IRD_ORD_MASK 0x3FFF /* Fixme: Use atomic_read for kref.count as same as Linux */ #define c4iw_put_ep(ep) { \ CTR4(KTR_IW_CXGBE, "put_ep (%s:%u) ep %p, refcnt %d", \ __func__, __LINE__, ep, (ep)->kref.count); \ WARN_ON((ep)->kref.count < 1); \ kref_put(&((ep)->kref), _c4iw_free_ep); \ } /* Fixme: Use atomic_read for kref.count as same as Linux */ #define c4iw_get_ep(ep) { \ CTR4(KTR_IW_CXGBE, "get_ep (%s:%u) ep %p, refcnt %d", \ __func__, __LINE__, ep, (ep)->kref.count); \ kref_get(&((ep)->kref)); \ } void _c4iw_free_ep(struct kref *kref); struct mpa_message { u8 key[16]; u8 flags; u8 revision; __be16 private_data_size; u8 private_data[0]; }; struct mpa_v2_conn_params { __be16 ird; __be16 ord; }; struct terminate_message { u8 layer_etype; u8 ecode; __be16 hdrct_rsvd; u8 len_hdrs[0]; }; #define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28) enum c4iw_layers_types { LAYER_RDMAP = 0x00, LAYER_DDP = 0x10, LAYER_MPA = 0x20, RDMAP_LOCAL_CATA = 0x00, RDMAP_REMOTE_PROT = 0x01, RDMAP_REMOTE_OP = 0x02, DDP_LOCAL_CATA = 0x00, DDP_TAGGED_ERR = 0x01, DDP_UNTAGGED_ERR = 0x02, DDP_LLP = 0x03 }; enum c4iw_rdma_ecodes { RDMAP_INV_STAG = 0x00, RDMAP_BASE_BOUNDS = 0x01, RDMAP_ACC_VIOL = 0x02, RDMAP_STAG_NOT_ASSOC = 0x03, RDMAP_TO_WRAP = 0x04, RDMAP_INV_VERS = 0x05, RDMAP_INV_OPCODE = 0x06, RDMAP_STREAM_CATA = 0x07, RDMAP_GLOBAL_CATA = 0x08, RDMAP_CANT_INV_STAG = 0x09, RDMAP_UNSPECIFIED = 0xff }; enum c4iw_ddp_ecodes { DDPT_INV_STAG = 0x00, DDPT_BASE_BOUNDS = 0x01, DDPT_STAG_NOT_ASSOC = 0x02, DDPT_TO_WRAP = 0x03, DDPT_INV_VERS = 0x04, DDPU_INV_QN = 0x01, DDPU_INV_MSN_NOBUF = 0x02, DDPU_INV_MSN_RANGE = 0x03, DDPU_INV_MO = 0x04, DDPU_MSG_TOOBIG = 0x05, DDPU_INV_VERS = 0x06 }; enum c4iw_mpa_ecodes { MPA_CRC_ERR = 0x02, MPA_MARKER_ERR = 0x03, MPA_LOCAL_CATA = 0x05, MPA_INSUFF_IRD = 0x06, MPA_NOMATCH_RTR = 0x07, }; enum c4iw_ep_state { IDLE = 0, LISTEN, CONNECTING, MPA_REQ_WAIT, MPA_REQ_SENT, MPA_REQ_RCVD, MPA_REP_SENT, FPDU_MODE, ABORTING, CLOSING, MORIBUND, DEAD, }; enum c4iw_ep_flags { PEER_ABORT_IN_PROGRESS = 0, ABORT_REQ_IN_PROGRESS = 1, RELEASE_RESOURCES = 2, CLOSE_SENT = 3, TIMEOUT = 4 }; enum c4iw_ep_history { ACT_OPEN_REQ = 0, ACT_OFLD_CONN = 1, ACT_OPEN_RPL = 2, ACT_ESTAB = 3, PASS_ACCEPT_REQ = 4, PASS_ESTAB = 5, ABORT_UPCALL = 6, ESTAB_UPCALL = 7, CLOSE_UPCALL = 8, ULP_ACCEPT = 9, ULP_REJECT = 10, TIMEDOUT = 11, PEER_ABORT = 12, PEER_CLOSE = 13, CONNREQ_UPCALL = 14, ABORT_CONN = 15, DISCONN_UPCALL = 16, EP_DISC_CLOSE = 17, EP_DISC_ABORT = 18, CONN_RPL_UPCALL = 19, ACT_RETRY_NOMEM = 20, ACT_RETRY_INUSE = 21 }; struct c4iw_ep_common { TAILQ_ENTRY(c4iw_ep_common) entry; /* Work queue attachment */ struct iw_cm_id *cm_id; struct c4iw_qp *qp; struct c4iw_dev *dev; enum c4iw_ep_state state; struct kref kref; struct mutex mutex; struct sockaddr_in local_addr; struct sockaddr_in remote_addr; struct c4iw_wr_wait wr_wait; unsigned long flags; unsigned long history; int rpl_err; int rpl_done; struct thread *thread; struct socket *so; }; struct c4iw_listen_ep { struct c4iw_ep_common com; unsigned int stid; int backlog; }; struct c4iw_ep { struct c4iw_ep_common com; struct c4iw_ep *parent_ep; struct timer_list timer; struct list_head entry; unsigned int atid; u32 hwtid; u32 snd_seq; u32 rcv_seq; struct l2t_entry *l2t; struct dst_entry *dst; struct c4iw_mpa_attributes mpa_attr; u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA]; unsigned int mpa_pkt_len; u32 ird; u32 ord; u32 smac_idx; u32 tx_chan; u32 mtu; u16 mss; u16 emss; u16 plen; u16 rss_qid; u16 txq_idx; u16 ctrlq_idx; u8 tos; u8 retry_with_mpa_v1; u8 tried_with_mpa_v1; }; static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) { return cm_id->provider_data; } static inline struct c4iw_listen_ep *to_listen_ep(struct iw_cm_id *cm_id) { return cm_id->provider_data; } static inline int compute_wscale(int win) { int wscale = 0; while (wscale < 14 && (65535< struct gen_pool { blist_t gen_list; daddr_t gen_base; int gen_chunk_shift; struct mutex gen_lock; }; static __inline struct gen_pool * gen_pool_create(daddr_t base, u_int chunk_shift, u_int len) { struct gen_pool *gp; gp = malloc(sizeof(struct gen_pool), M_DEVBUF, M_NOWAIT); if (gp == NULL) return (NULL); memset(gp, 0, sizeof(struct gen_pool)); gp->gen_list = blist_create(len >> chunk_shift, M_NOWAIT); if (gp->gen_list == NULL) { free(gp, M_DEVBUF); return (NULL); } blist_free(gp->gen_list, 0, len >> chunk_shift); gp->gen_base = base; gp->gen_chunk_shift = chunk_shift; //mutex_init(&gp->gen_lock, "genpool", NULL, MTX_DUPOK|MTX_DEF); mutex_init(&gp->gen_lock); return (gp); } static __inline unsigned long gen_pool_alloc(struct gen_pool *gp, int size) { int chunks; daddr_t blkno; chunks = (size + (1<gen_chunk_shift) - 1) >> gp->gen_chunk_shift; mutex_lock(&gp->gen_lock); blkno = blist_alloc(gp->gen_list, chunks); mutex_unlock(&gp->gen_lock); if (blkno == SWAPBLK_NONE) return (0); return (gp->gen_base + ((1 << gp->gen_chunk_shift) * blkno)); } static __inline void gen_pool_free(struct gen_pool *gp, daddr_t address, int size) { int chunks; daddr_t blkno; chunks = (size + (1<gen_chunk_shift) - 1) >> gp->gen_chunk_shift; blkno = (address - gp->gen_base) / (1 << gp->gen_chunk_shift); mutex_lock(&gp->gen_lock); blist_free(gp->gen_list, blkno, chunks); mutex_unlock(&gp->gen_lock); } static __inline void gen_pool_destroy(struct gen_pool *gp) { blist_destroy(gp->gen_list); free(gp, M_DEVBUF); } #if defined(__i386__) || defined(__amd64__) #define L1_CACHE_BYTES 128 #else #define L1_CACHE_BYTES 32 #endif static inline int idr_for_each(struct idr *idp, int (*fn)(int id, void *p, void *data), void *data) { int n, id, max, error = 0; struct idr_layer *p; struct idr_layer *pa[MAX_LEVEL]; struct idr_layer **paa = &pa[0]; n = idp->layers * IDR_BITS; p = idp->top; max = 1 << n; id = 0; while (id < max) { while (n > 0 && p) { n -= IDR_BITS; *paa++ = p; p = p->ary[(id >> n) & IDR_MASK]; } if (p) { error = fn(id, (void *)p, data); if (error) break; } id += 1 << n; while (n < fls(id)) { n += IDR_BITS; p = *--paa; } } return error; } void c4iw_cm_init_cpl(struct adapter *); void c4iw_cm_term_cpl(struct adapter *); void your_reg_device(struct c4iw_dev *dev); #define SGE_CTRLQ_NUM 0 extern int spg_creds;/* Status Page size in credit units(1 unit = 64) */ #endif Index: stable/10/sys/ofed/drivers/infiniband/core/addr.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/core/addr.c (revision 273245) +++ stable/10/sys/ofed/drivers/infiniband/core/addr.c (revision 273246) @@ -1,641 +1,644 @@ /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include +#include #include +#include +#include #include #include #include +#include + MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("IB Address Translation"); MODULE_LICENSE("Dual BSD/GPL"); struct addr_req { struct list_head list; struct sockaddr_storage src_addr; struct sockaddr_storage dst_addr; struct rdma_dev_addr *addr; struct rdma_addr_client *client; void *context; void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context); unsigned long timeout; int status; }; static void process_req(struct work_struct *work); static DEFINE_MUTEX(lock); static LIST_HEAD(req_list); static struct delayed_work work; static struct workqueue_struct *addr_wq; void rdma_addr_register_client(struct rdma_addr_client *client) { atomic_set(&client->refcount, 1); init_completion(&client->comp); } EXPORT_SYMBOL(rdma_addr_register_client); static inline void put_client(struct rdma_addr_client *client) { if (atomic_dec_and_test(&client->refcount)) complete(&client->comp); } void rdma_addr_unregister_client(struct rdma_addr_client *client) { put_client(client); wait_for_completion(&client->comp); } EXPORT_SYMBOL(rdma_addr_unregister_client); #ifdef __linux__ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, const unsigned char *dst_dev_addr) { dev_addr->dev_type = dev->type; memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); if (dst_dev_addr) memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); dev_addr->bound_dev_if = dev->ifindex; return 0; } #else int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev, const unsigned char *dst_dev_addr) { if (dev->if_type == IFT_INFINIBAND) dev_addr->dev_type = ARPHRD_INFINIBAND; else if (dev->if_type == IFT_ETHER) dev_addr->dev_type = ARPHRD_ETHER; else dev_addr->dev_type = 0; memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen); memcpy(dev_addr->broadcast, __DECONST(char *, dev->if_broadcastaddr), dev->if_addrlen); if (dst_dev_addr) memcpy(dev_addr->dst_dev_addr, dst_dev_addr, dev->if_addrlen); dev_addr->bound_dev_if = dev->if_index; return 0; } #endif EXPORT_SYMBOL(rdma_copy_addr); int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) { struct net_device *dev; int ret = -EADDRNOTAVAIL; if (dev_addr->bound_dev_if) { dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); if (!dev) return -ENODEV; ret = rdma_copy_addr(dev_addr, dev, NULL); dev_put(dev); return ret; } switch (addr->sa_family) { #ifdef INET case AF_INET: dev = ip_dev_find(NULL, ((struct sockaddr_in *) addr)->sin_addr.s_addr); if (!dev) return ret; ret = rdma_copy_addr(dev_addr, dev, NULL); dev_put(dev); break; #endif #if defined(INET6) case AF_INET6: #ifdef __linux__ read_lock(&dev_base_lock); for_each_netdev(&init_net, dev) { if (ipv6_chk_addr(&init_net, &((struct sockaddr_in6 *) addr)->sin6_addr, dev, 1)) { ret = rdma_copy_addr(dev_addr, dev, NULL); break; } } read_unlock(&dev_base_lock); #else { struct sockaddr_in6 *sin6; struct ifaddr *ifa; in_port_t port; sin6 = (struct sockaddr_in6 *)addr; port = sin6->sin6_port; sin6->sin6_port = 0; ifa = ifa_ifwithaddr(addr); sin6->sin6_port = port; if (ifa == NULL) { ret = -ENODEV; break; } ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL); ifa_free(ifa); break; } #endif break; #endif } return ret; } EXPORT_SYMBOL(rdma_translate_ip); static void set_timeout(unsigned long time) { unsigned long delay; - cancel_delayed_work(&work); - delay = time - jiffies; if ((long)delay <= 0) delay = 1; - queue_delayed_work(addr_wq, &work, delay); + mod_delayed_work(addr_wq, &work, delay); } static void queue_req(struct addr_req *req) { struct addr_req *temp_req; mutex_lock(&lock); list_for_each_entry_reverse(temp_req, &req_list, list) { if (time_after_eq(req->timeout, temp_req->timeout)) break; } list_add(&req->list, &temp_req->list); if (req_list.next == &req->list) set_timeout(req->timeout); mutex_unlock(&lock); } #ifdef __linux__ static int addr4_resolve(struct sockaddr_in *src_in, struct sockaddr_in *dst_in, struct rdma_dev_addr *addr) { __be32 src_ip = src_in->sin_addr.s_addr; __be32 dst_ip = dst_in->sin_addr.s_addr; struct flowi fl; struct rtable *rt; struct neighbour *neigh; int ret; memset(&fl, 0, sizeof fl); fl.nl_u.ip4_u.daddr = dst_ip; fl.nl_u.ip4_u.saddr = src_ip; fl.oif = addr->bound_dev_if; ret = ip_route_output_key(&init_net, &rt, &fl); if (ret) goto out; src_in->sin_family = AF_INET; src_in->sin_addr.s_addr = rt->rt_src; if (rt->idev->dev->flags & IFF_LOOPBACK) { ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); if (!ret) memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); goto put; } /* If the device does ARP internally, return 'done' */ if (rt->idev->dev->flags & IFF_NOARP) { rdma_copy_addr(addr, rt->idev->dev, NULL); goto put; } neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); if (!neigh || !(neigh->nud_state & NUD_VALID)) { neigh_event_send(rt->u.dst.neighbour, NULL); ret = -ENODATA; if (neigh) goto release; goto put; } ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); release: neigh_release(neigh); put: ip_rt_put(rt); out: return ret; } #if defined(INET6) static int addr6_resolve(struct sockaddr_in6 *src_in, struct sockaddr_in6 *dst_in, struct rdma_dev_addr *addr) { struct flowi fl; struct neighbour *neigh; struct dst_entry *dst; int ret; memset(&fl, 0, sizeof fl); ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr); ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr); fl.oif = addr->bound_dev_if; dst = ip6_route_output(&init_net, NULL, &fl); if ((ret = dst->error)) goto put; if (ipv6_addr_any(&fl.fl6_src)) { ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, &fl.fl6_dst, 0, &fl.fl6_src); if (ret) goto put; src_in->sin6_family = AF_INET6; ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src); } if (dst->dev->flags & IFF_LOOPBACK) { ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); if (!ret) memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); goto put; } /* If the device does ARP internally, return 'done' */ if (dst->dev->flags & IFF_NOARP) { ret = rdma_copy_addr(addr, dst->dev, NULL); goto put; } neigh = dst->neighbour; if (!neigh || !(neigh->nud_state & NUD_VALID)) { neigh_event_send(dst->neighbour, NULL); ret = -ENODATA; goto put; } ret = rdma_copy_addr(addr, dst->dev, neigh->ha); put: dst_release(dst); return ret; } #else static int addr6_resolve(struct sockaddr_in6 *src_in, struct sockaddr_in6 *dst_in, struct rdma_dev_addr *addr) { return -EADDRNOTAVAIL; } #endif #else #include static int addr_resolve(struct sockaddr *src_in, struct sockaddr *dst_in, struct rdma_dev_addr *addr) { struct sockaddr_in *sin; struct sockaddr_in6 *sin6; struct ifaddr *ifa; struct ifnet *ifp; #if defined(INET) || defined(INET6) struct llentry *lle; #endif struct rtentry *rte; in_port_t port; u_char edst[MAX_ADDR_LEN]; int multi; int bcast; int error = 0; /* * Determine whether the address is unicast, multicast, or broadcast * and whether the source interface is valid. */ multi = 0; bcast = 0; sin = NULL; sin6 = NULL; ifp = NULL; rte = NULL; switch (dst_in->sa_family) { #ifdef INET case AF_INET: sin = (struct sockaddr_in *)dst_in; if (sin->sin_addr.s_addr == INADDR_BROADCAST) bcast = 1; if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) multi = 1; sin = (struct sockaddr_in *)src_in; if (sin->sin_addr.s_addr != INADDR_ANY) { /* * Address comparison fails if the port is set * cache it here to be restored later. */ port = sin->sin_port; sin->sin_port = 0; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); } else src_in = NULL; break; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)dst_in; if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) multi = 1; sin6 = (struct sockaddr_in6 *)src_in; if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { port = sin6->sin6_port; sin6->sin6_port = 0; } else src_in = NULL; break; #endif default: return -EINVAL; } /* * If we have a source address to use look it up first and verify * that it is a local interface. */ if (src_in) { ifa = ifa_ifwithaddr(src_in); if (sin) sin->sin_port = port; if (sin6) sin6->sin6_port = port; if (ifa == NULL) return -ENETUNREACH; ifp = ifa->ifa_ifp; ifa_free(ifa); if (bcast || multi) goto mcast; } /* * Make sure the route exists and has a valid link. */ rte = rtalloc1(dst_in, 1, 0); if (rte == NULL || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) { if (rte) RTFREE_LOCKED(rte); return -EHOSTUNREACH; } /* * If it's not multicast or broadcast and the route doesn't match the * requested interface return unreachable. Otherwise fetch the * correct interface pointer and unlock the route. */ if (multi || bcast) { if (ifp == NULL) ifp = rte->rt_ifp; RTFREE_LOCKED(rte); } else if (ifp && ifp != rte->rt_ifp) { RTFREE_LOCKED(rte); return -ENETUNREACH; } else { if (ifp == NULL) ifp = rte->rt_ifp; RT_UNLOCK(rte); } mcast: if (bcast) return rdma_copy_addr(addr, ifp, ifp->if_broadcastaddr); if (multi) { struct sockaddr *llsa; error = ifp->if_resolvemulti(ifp, &llsa, dst_in); if (error) return -error; error = rdma_copy_addr(addr, ifp, LLADDR((struct sockaddr_dl *)llsa)); free(llsa, M_IFMADDR); return error; } /* * Resolve the link local address. */ switch (dst_in->sa_family) { #ifdef INET case AF_INET: error = arpresolve(ifp, rte, NULL, dst_in, edst, &lle); break; #endif #ifdef INET6 case AF_INET6: error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst, &lle); break; #endif default: /* XXX: Shouldn't happen. */ error = -EINVAL; } RTFREE(rte); if (error == 0) return rdma_copy_addr(addr, ifp, edst); if (error == EWOULDBLOCK) return -ENODATA; return -error; } #endif static void process_req(struct work_struct *work) { struct addr_req *req, *temp_req; struct sockaddr *src_in, *dst_in; struct list_head done_list; INIT_LIST_HEAD(&done_list); mutex_lock(&lock); list_for_each_entry_safe(req, temp_req, &req_list, list) { if (req->status == -ENODATA) { src_in = (struct sockaddr *) &req->src_addr; dst_in = (struct sockaddr *) &req->dst_addr; req->status = addr_resolve(src_in, dst_in, req->addr); if (req->status && time_after_eq(jiffies, req->timeout)) req->status = -ETIMEDOUT; else if (req->status == -ENODATA) continue; } list_move_tail(&req->list, &done_list); } if (!list_empty(&req_list)) { req = list_entry(req_list.next, struct addr_req, list); set_timeout(req->timeout); } mutex_unlock(&lock); list_for_each_entry_safe(req, temp_req, &done_list, list) { list_del(&req->list); req->callback(req->status, (struct sockaddr *) &req->src_addr, req->addr, req->context); put_client(req->client); kfree(req); } } int rdma_resolve_ip(struct rdma_addr_client *client, struct sockaddr *src_addr, struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), void *context) { struct sockaddr *src_in, *dst_in; struct addr_req *req; int ret = 0; req = kzalloc(sizeof *req, GFP_KERNEL); if (!req) return -ENOMEM; src_in = (struct sockaddr *) &req->src_addr; dst_in = (struct sockaddr *) &req->dst_addr; if (src_addr) { if (src_addr->sa_family != dst_addr->sa_family) { ret = -EINVAL; goto err; } memcpy(src_in, src_addr, ip_addr_size(src_addr)); } else { src_in->sa_family = dst_addr->sa_family; } memcpy(dst_in, dst_addr, ip_addr_size(dst_addr)); req->addr = addr; req->callback = callback; req->context = context; req->client = client; atomic_inc(&client->refcount); req->status = addr_resolve(src_in, dst_in, addr); switch (req->status) { case 0: req->timeout = jiffies; queue_req(req); break; case -ENODATA: req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; queue_req(req); break; default: ret = req->status; atomic_dec(&client->refcount); goto err; } return ret; err: kfree(req); return ret; } EXPORT_SYMBOL(rdma_resolve_ip); void rdma_addr_cancel(struct rdma_dev_addr *addr) { struct addr_req *req, *temp_req; mutex_lock(&lock); list_for_each_entry_safe(req, temp_req, &req_list, list) { if (req->addr == addr) { req->status = -ECANCELED; req->timeout = jiffies; list_move(&req->list, &req_list); set_timeout(req->timeout); break; } } mutex_unlock(&lock); } EXPORT_SYMBOL(rdma_addr_cancel); static int netevent_callback(struct notifier_block *self, unsigned long event, void *ctx) { if (event == NETEVENT_NEIGH_UPDATE) { #ifdef __linux__ struct neighbour *neigh = ctx; if (neigh->nud_state & NUD_VALID) { set_timeout(jiffies); } #else set_timeout(jiffies); #endif } return 0; } static struct notifier_block nb = { .notifier_call = netevent_callback }; -static int addr_init(void) +static int __init addr_init(void) { INIT_DELAYED_WORK(&work, process_req); addr_wq = create_singlethread_workqueue("ib_addr"); if (!addr_wq) return -ENOMEM; register_netevent_notifier(&nb); return 0; } -static void addr_cleanup(void) +static void __exit addr_cleanup(void) { unregister_netevent_notifier(&nb); destroy_workqueue(addr_wq); } module_init(addr_init); module_exit(addr_cleanup); Index: stable/10/sys/ofed/drivers/infiniband/core/agent.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/core/agent.c (revision 273245) +++ stable/10/sys/ofed/drivers/infiniband/core/agent.c (revision 273246) @@ -1,216 +1,217 @@ /* * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include #include #include "agent.h" #include "smi.h" #include "mad_priv.h" #define SPFX "ib_agent: " struct ib_agent_port_private { struct list_head port_list; struct ib_mad_agent *agent[2]; }; static DEFINE_SPINLOCK(ib_agent_port_list_lock); static LIST_HEAD(ib_agent_port_list); static struct ib_agent_port_private * __ib_get_agent_port(struct ib_device *device, int port_num) { struct ib_agent_port_private *entry; list_for_each_entry(entry, &ib_agent_port_list, port_list) { if (entry->agent[1]->device == device && entry->agent[1]->port_num == port_num) return entry; } return NULL; } static struct ib_agent_port_private * ib_get_agent_port(struct ib_device *device, int port_num) { struct ib_agent_port_private *entry; unsigned long flags; spin_lock_irqsave(&ib_agent_port_list_lock, flags); entry = __ib_get_agent_port(device, port_num); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); return entry; } void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, struct ib_wc *wc, struct ib_device *device, int port_num, int qpn) { struct ib_agent_port_private *port_priv; struct ib_mad_agent *agent; struct ib_mad_send_buf *send_buf; struct ib_ah *ah; struct ib_mad_send_wr_private *mad_send_wr; if (device->node_type == RDMA_NODE_IB_SWITCH) port_priv = ib_get_agent_port(device, 0); else port_priv = ib_get_agent_port(device, port_num); if (!port_priv) { printk(KERN_ERR SPFX "Unable to find port agent\n"); return; } agent = port_priv->agent[qpn]; ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num); if (IS_ERR(ah)) { - printk(KERN_ERR SPFX "ib_create_ah_from_wc error\n"); + printk(KERN_ERR SPFX "ib_create_ah_from_wc error %ld\n", + PTR_ERR(ah)); return; } send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_KERNEL); if (IS_ERR(send_buf)) { printk(KERN_ERR SPFX "ib_create_send_mad error\n"); goto err1; } memcpy(send_buf->mad, mad, sizeof *mad); send_buf->ah = ah; if (device->node_type == RDMA_NODE_IB_SWITCH) { mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); mad_send_wr->send_wr.wr.ud.port_num = port_num; } if (ib_post_send_mad(send_buf, NULL)) { printk(KERN_ERR SPFX "ib_post_send_mad error\n"); goto err2; } return; err2: ib_free_send_mad(send_buf); err1: ib_destroy_ah(ah); } static void agent_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { ib_destroy_ah(mad_send_wc->send_buf->ah); ib_free_send_mad(mad_send_wc->send_buf); } int ib_agent_port_open(struct ib_device *device, int port_num) { struct ib_agent_port_private *port_priv; unsigned long flags; int ret; /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); if (!port_priv) { printk(KERN_ERR SPFX "No memory for ib_agent_port_private\n"); ret = -ENOMEM; goto error1; } if (rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND) { /* Obtain send only MAD agent for SMI QP */ port_priv->agent[0] = ib_register_mad_agent(device, port_num, IB_QPT_SMI, NULL, 0, &agent_send_handler, NULL, NULL); if (IS_ERR(port_priv->agent[0])) { ret = PTR_ERR(port_priv->agent[0]); goto error2; } } /* Obtain send only MAD agent for GSI QP */ port_priv->agent[1] = ib_register_mad_agent(device, port_num, IB_QPT_GSI, NULL, 0, &agent_send_handler, NULL, NULL); if (IS_ERR(port_priv->agent[1])) { ret = PTR_ERR(port_priv->agent[1]); goto error3; } spin_lock_irqsave(&ib_agent_port_list_lock, flags); list_add_tail(&port_priv->port_list, &ib_agent_port_list); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); return 0; error3: if (port_priv->agent[0]) ib_unregister_mad_agent(port_priv->agent[0]); error2: kfree(port_priv); error1: return ret; } int ib_agent_port_close(struct ib_device *device, int port_num) { struct ib_agent_port_private *port_priv; unsigned long flags; spin_lock_irqsave(&ib_agent_port_list_lock, flags); port_priv = __ib_get_agent_port(device, port_num); if (port_priv == NULL) { spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); printk(KERN_ERR SPFX "Port %d not found\n", port_num); return -ENODEV; } list_del(&port_priv->port_list); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); ib_unregister_mad_agent(port_priv->agent[1]); if (port_priv->agent[0]) ib_unregister_mad_agent(port_priv->agent[0]); kfree(port_priv); return 0; } Index: stable/10/sys/ofed/drivers/infiniband/core/ucm.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/core/ucm.c (revision 273245) +++ stable/10/sys/ofed/drivers/infiniband/core/ucm.c (revision 273246) @@ -1,1345 +1,1344 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include MODULE_AUTHOR("Libor Michalek"); MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); MODULE_LICENSE("Dual BSD/GPL"); struct ib_ucm_device { int devnum; struct cdev cdev; struct device dev; struct ib_device *ib_dev; }; struct ib_ucm_file { struct mutex file_mutex; struct file *filp; struct ib_ucm_device *device; struct list_head ctxs; struct list_head events; wait_queue_head_t poll_wait; }; struct ib_ucm_context { int id; struct completion comp; atomic_t ref; int events_reported; struct ib_ucm_file *file; struct ib_cm_id *cm_id; __u64 uid; struct list_head events; /* list of pending events. */ struct list_head file_list; /* member in file ctx list */ }; struct ib_ucm_event { struct ib_ucm_context *ctx; struct list_head file_list; /* member in file event list */ struct list_head ctx_list; /* member in ctx event list */ struct ib_cm_id *cm_id; struct ib_ucm_event_resp resp; void *data; void *info; int data_len; int info_len; }; enum { IB_UCM_MAJOR = 231, IB_UCM_BASE_MINOR = 224, IB_UCM_MAX_DEVICES = 32 }; #define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) static void ib_ucm_add_one(struct ib_device *device); static void ib_ucm_remove_one(struct ib_device *device); static struct ib_client ucm_client = { .name = "ucm", .add = ib_ucm_add_one, .remove = ib_ucm_remove_one }; static DEFINE_MUTEX(ctx_id_mutex); static DEFINE_IDR(ctx_id_table); static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES); static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) { struct ib_ucm_context *ctx; mutex_lock(&ctx_id_mutex); ctx = idr_find(&ctx_id_table, id); if (!ctx) ctx = ERR_PTR(-ENOENT); else if (ctx->file != file) ctx = ERR_PTR(-EINVAL); else atomic_inc(&ctx->ref); mutex_unlock(&ctx_id_mutex); return ctx; } static void ib_ucm_ctx_put(struct ib_ucm_context *ctx) { if (atomic_dec_and_test(&ctx->ref)) complete(&ctx->comp); } static inline int ib_ucm_new_cm_id(int event) { return event == IB_CM_REQ_RECEIVED || event == IB_CM_SIDR_REQ_RECEIVED; } static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx) { struct ib_ucm_event *uevent; mutex_lock(&ctx->file->file_mutex); list_del(&ctx->file_list); while (!list_empty(&ctx->events)) { uevent = list_entry(ctx->events.next, struct ib_ucm_event, ctx_list); list_del(&uevent->file_list); list_del(&uevent->ctx_list); mutex_unlock(&ctx->file->file_mutex); /* clear incoming connections. */ if (ib_ucm_new_cm_id(uevent->resp.event)) ib_destroy_cm_id(uevent->cm_id); kfree(uevent); mutex_lock(&ctx->file->file_mutex); } mutex_unlock(&ctx->file->file_mutex); } static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) { struct ib_ucm_context *ctx; int result; ctx = kzalloc(sizeof *ctx, GFP_KERNEL); if (!ctx) return NULL; atomic_set(&ctx->ref, 1); init_completion(&ctx->comp); ctx->file = file; INIT_LIST_HEAD(&ctx->events); do { result = idr_pre_get(&ctx_id_table, GFP_KERNEL); if (!result) goto error; mutex_lock(&ctx_id_mutex); result = idr_get_new(&ctx_id_table, ctx, &ctx->id); mutex_unlock(&ctx_id_mutex); } while (result == -EAGAIN); if (result) goto error; list_add_tail(&ctx->file_list, &file->ctxs); return ctx; error: kfree(ctx); return NULL; } static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, struct ib_cm_req_event_param *kreq) { ureq->remote_ca_guid = kreq->remote_ca_guid; ureq->remote_qkey = kreq->remote_qkey; ureq->remote_qpn = kreq->remote_qpn; ureq->qp_type = kreq->qp_type; ureq->starting_psn = kreq->starting_psn; ureq->responder_resources = kreq->responder_resources; ureq->initiator_depth = kreq->initiator_depth; ureq->local_cm_response_timeout = kreq->local_cm_response_timeout; ureq->flow_control = kreq->flow_control; ureq->remote_cm_response_timeout = kreq->remote_cm_response_timeout; ureq->retry_count = kreq->retry_count; ureq->rnr_retry_count = kreq->rnr_retry_count; ureq->srq = kreq->srq; ureq->port = kreq->port; ib_copy_path_rec_to_user(&ureq->primary_path, kreq->primary_path); if (kreq->alternate_path) ib_copy_path_rec_to_user(&ureq->alternate_path, kreq->alternate_path); } static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, struct ib_cm_rep_event_param *krep) { urep->remote_ca_guid = krep->remote_ca_guid; urep->remote_qkey = krep->remote_qkey; urep->remote_qpn = krep->remote_qpn; urep->starting_psn = krep->starting_psn; urep->responder_resources = krep->responder_resources; urep->initiator_depth = krep->initiator_depth; urep->target_ack_delay = krep->target_ack_delay; urep->failover_accepted = krep->failover_accepted; urep->flow_control = krep->flow_control; urep->rnr_retry_count = krep->rnr_retry_count; urep->srq = krep->srq; } static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep, struct ib_cm_sidr_rep_event_param *krep) { urep->status = krep->status; urep->qkey = krep->qkey; urep->qpn = krep->qpn; }; static int ib_ucm_event_process(struct ib_cm_event *evt, struct ib_ucm_event *uvt) { void *info = NULL; switch (evt->event) { case IB_CM_REQ_RECEIVED: ib_ucm_event_req_get(&uvt->resp.u.req_resp, &evt->param.req_rcvd); uvt->data_len = IB_CM_REQ_PRIVATE_DATA_SIZE; uvt->resp.present = IB_UCM_PRES_PRIMARY; uvt->resp.present |= (evt->param.req_rcvd.alternate_path ? IB_UCM_PRES_ALTERNATE : 0); break; case IB_CM_REP_RECEIVED: ib_ucm_event_rep_get(&uvt->resp.u.rep_resp, &evt->param.rep_rcvd); uvt->data_len = IB_CM_REP_PRIVATE_DATA_SIZE; break; case IB_CM_RTU_RECEIVED: uvt->data_len = IB_CM_RTU_PRIVATE_DATA_SIZE; uvt->resp.u.send_status = evt->param.send_status; break; case IB_CM_DREQ_RECEIVED: uvt->data_len = IB_CM_DREQ_PRIVATE_DATA_SIZE; uvt->resp.u.send_status = evt->param.send_status; break; case IB_CM_DREP_RECEIVED: uvt->data_len = IB_CM_DREP_PRIVATE_DATA_SIZE; uvt->resp.u.send_status = evt->param.send_status; break; case IB_CM_MRA_RECEIVED: uvt->resp.u.mra_resp.timeout = evt->param.mra_rcvd.service_timeout; uvt->data_len = IB_CM_MRA_PRIVATE_DATA_SIZE; break; case IB_CM_REJ_RECEIVED: uvt->resp.u.rej_resp.reason = evt->param.rej_rcvd.reason; uvt->data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; uvt->info_len = evt->param.rej_rcvd.ari_length; info = evt->param.rej_rcvd.ari; break; case IB_CM_LAP_RECEIVED: ib_copy_path_rec_to_user(&uvt->resp.u.lap_resp.path, evt->param.lap_rcvd.alternate_path); uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE; uvt->resp.present = IB_UCM_PRES_ALTERNATE; break; case IB_CM_APR_RECEIVED: uvt->resp.u.apr_resp.status = evt->param.apr_rcvd.ap_status; uvt->data_len = IB_CM_APR_PRIVATE_DATA_SIZE; uvt->info_len = evt->param.apr_rcvd.info_len; info = evt->param.apr_rcvd.apr_info; break; case IB_CM_SIDR_REQ_RECEIVED: uvt->resp.u.sidr_req_resp.pkey = evt->param.sidr_req_rcvd.pkey; uvt->resp.u.sidr_req_resp.port = evt->param.sidr_req_rcvd.port; uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; break; case IB_CM_SIDR_REP_RECEIVED: ib_ucm_event_sidr_rep_get(&uvt->resp.u.sidr_rep_resp, &evt->param.sidr_rep_rcvd); uvt->data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; uvt->info_len = evt->param.sidr_rep_rcvd.info_len; info = evt->param.sidr_rep_rcvd.info; break; default: uvt->resp.u.send_status = evt->param.send_status; break; } if (uvt->data_len) { uvt->data = kmemdup(evt->private_data, uvt->data_len, GFP_KERNEL); if (!uvt->data) goto err1; uvt->resp.present |= IB_UCM_PRES_DATA; } if (uvt->info_len) { uvt->info = kmemdup(info, uvt->info_len, GFP_KERNEL); if (!uvt->info) goto err2; uvt->resp.present |= IB_UCM_PRES_INFO; } return 0; err2: kfree(uvt->data); err1: return -ENOMEM; } static int ib_ucm_event_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) { struct ib_ucm_event *uevent; struct ib_ucm_context *ctx; int result = 0; ctx = cm_id->context; uevent = kzalloc(sizeof *uevent, GFP_KERNEL); if (!uevent) goto err1; uevent->ctx = ctx; uevent->cm_id = cm_id; uevent->resp.uid = ctx->uid; uevent->resp.id = ctx->id; uevent->resp.event = event->event; result = ib_ucm_event_process(event, uevent); if (result) goto err2; mutex_lock(&ctx->file->file_mutex); list_add_tail(&uevent->file_list, &ctx->file->events); list_add_tail(&uevent->ctx_list, &ctx->events); wake_up_interruptible(&ctx->file->poll_wait); if (ctx->file->filp) selwakeup(&ctx->file->filp->f_selinfo); mutex_unlock(&ctx->file->file_mutex); return 0; err2: kfree(uevent); err1: /* Destroy new cm_id's */ return ib_ucm_new_cm_id(event->event); } static ssize_t ib_ucm_event(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_context *ctx; struct ib_ucm_event_get cmd; struct ib_ucm_event *uevent; int result = 0; DEFINE_WAIT(wait); if (out_len < sizeof(struct ib_ucm_event_resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&file->file_mutex); while (list_empty(&file->events)) { mutex_unlock(&file->file_mutex); if (file->filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->poll_wait, !list_empty(&file->events))) return -ERESTARTSYS; mutex_lock(&file->file_mutex); } uevent = list_entry(file->events.next, struct ib_ucm_event, file_list); if (ib_ucm_new_cm_id(uevent->resp.event)) { ctx = ib_ucm_ctx_alloc(file); if (!ctx) { result = -ENOMEM; goto done; } ctx->cm_id = uevent->cm_id; ctx->cm_id->context = ctx; uevent->resp.id = ctx->id; } if (copy_to_user((void __user *)(unsigned long)cmd.response, &uevent->resp, sizeof(uevent->resp))) { result = -EFAULT; goto done; } if (uevent->data) { if (cmd.data_len < uevent->data_len) { result = -ENOMEM; goto done; } if (copy_to_user((void __user *)(unsigned long)cmd.data, uevent->data, uevent->data_len)) { result = -EFAULT; goto done; } } if (uevent->info) { if (cmd.info_len < uevent->info_len) { result = -ENOMEM; goto done; } if (copy_to_user((void __user *)(unsigned long)cmd.info, uevent->info, uevent->info_len)) { result = -EFAULT; goto done; } } list_del(&uevent->file_list); list_del(&uevent->ctx_list); uevent->ctx->events_reported++; kfree(uevent->data); kfree(uevent->info); kfree(uevent); done: mutex_unlock(&file->file_mutex); return result; } static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_create_id cmd; struct ib_ucm_create_id_resp resp; struct ib_ucm_context *ctx; int result; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&file->file_mutex); ctx = ib_ucm_ctx_alloc(file); mutex_unlock(&file->file_mutex); if (!ctx) return -ENOMEM; ctx->uid = cmd.uid; ctx->cm_id = ib_create_cm_id(file->device->ib_dev, ib_ucm_event_handler, ctx); if (IS_ERR(ctx->cm_id)) { result = PTR_ERR(ctx->cm_id); goto err1; } resp.id = ctx->id; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) { result = -EFAULT; goto err2; } return 0; err2: ib_destroy_cm_id(ctx->cm_id); err1: mutex_lock(&ctx_id_mutex); idr_remove(&ctx_id_table, ctx->id); mutex_unlock(&ctx_id_mutex); kfree(ctx); return result; } static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_destroy_id cmd; struct ib_ucm_destroy_id_resp resp; struct ib_ucm_context *ctx; int result = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&ctx_id_mutex); ctx = idr_find(&ctx_id_table, cmd.id); if (!ctx) ctx = ERR_PTR(-ENOENT); else if (ctx->file != file) ctx = ERR_PTR(-EINVAL); else idr_remove(&ctx_id_table, ctx->id); mutex_unlock(&ctx_id_mutex); if (IS_ERR(ctx)) return PTR_ERR(ctx); ib_ucm_ctx_put(ctx); wait_for_completion(&ctx->comp); /* No new events will be generated after destroying the cm_id. */ ib_destroy_cm_id(ctx->cm_id); /* Cleanup events not yet reported to the user. */ ib_ucm_cleanup_events(ctx); resp.events_reported = ctx->events_reported; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) result = -EFAULT; kfree(ctx); return result; } static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_attr_id_resp resp; struct ib_ucm_attr_id cmd; struct ib_ucm_context *ctx; int result = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ib_ucm_ctx_get(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); resp.service_id = ctx->cm_id->service_id; resp.service_mask = ctx->cm_id->service_mask; resp.local_id = ctx->cm_id->local_id; resp.remote_id = ctx->cm_id->remote_id; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) result = -EFAULT; ib_ucm_ctx_put(ctx); return result; } static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_uverbs_qp_attr resp; struct ib_ucm_init_qp_attr cmd; struct ib_ucm_context *ctx; struct ib_qp_attr qp_attr; int result = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ib_ucm_ctx_get(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); resp.qp_attr_mask = 0; memset(&qp_attr, 0, sizeof qp_attr); qp_attr.qp_state = cmd.qp_state; result = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); if (result) goto out; ib_copy_qp_attr_to_user(&resp, &qp_attr); if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) result = -EFAULT; out: ib_ucm_ctx_put(ctx); return result; } static int ucm_validate_listen(__be64 service_id, __be64 service_mask) { service_id &= service_mask; if (((service_id & IB_CMA_SERVICE_ID_MASK) == IB_CMA_SERVICE_ID) || ((service_id & IB_SDP_SERVICE_ID_MASK) == IB_SDP_SERVICE_ID)) return -EINVAL; return 0; } static ssize_t ib_ucm_listen(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_listen cmd; struct ib_ucm_context *ctx; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ib_ucm_ctx_get(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); result = ucm_validate_listen(cmd.service_id, cmd.service_mask); if (result) goto out; result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask, NULL); out: ib_ucm_ctx_put(ctx); return result; } static ssize_t ib_ucm_notify(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_notify cmd; struct ib_ucm_context *ctx; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ib_ucm_ctx_get(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); result = ib_cm_notify(ctx->cm_id, (enum ib_event_type) cmd.event); ib_ucm_ctx_put(ctx); return result; } static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len) { void *data; *dest = NULL; if (!len) return 0; data = kmalloc(len, GFP_KERNEL); if (!data) return -ENOMEM; if (copy_from_user(data, (void __user *)(unsigned long)src, len)) { kfree(data); return -EFAULT; } *dest = data; return 0; } static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src) { struct ib_user_path_rec upath; struct ib_sa_path_rec *sa_path; *path = NULL; if (!src) return 0; sa_path = kmalloc(sizeof(*sa_path), GFP_KERNEL); if (!sa_path) return -ENOMEM; if (copy_from_user(&upath, (void __user *)(unsigned long)src, sizeof(upath))) { kfree(sa_path); return -EFAULT; } ib_copy_path_rec_from_user(sa_path, &upath); *path = sa_path; return 0; } static ssize_t ib_ucm_send_req(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_cm_req_param param; struct ib_ucm_context *ctx; struct ib_ucm_req cmd; int result; param.private_data = NULL; param.primary_path = NULL; param.alternate_path = NULL; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); if (result) goto done; result = ib_ucm_path_get(¶m.primary_path, cmd.primary_path); if (result) goto done; result = ib_ucm_path_get(¶m.alternate_path, cmd.alternate_path); if (result) goto done; param.private_data_len = cmd.len; param.service_id = cmd.sid; param.qp_num = cmd.qpn; param.qp_type = cmd.qp_type; param.starting_psn = cmd.psn; param.peer_to_peer = cmd.peer_to_peer; param.responder_resources = cmd.responder_resources; param.initiator_depth = cmd.initiator_depth; param.remote_cm_response_timeout = cmd.remote_cm_response_timeout; param.flow_control = cmd.flow_control; param.local_cm_response_timeout = cmd.local_cm_response_timeout; param.retry_count = cmd.retry_count; param.rnr_retry_count = cmd.rnr_retry_count; param.max_cm_retries = cmd.max_cm_retries; param.srq = cmd.srq; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_req(ctx->cm_id, ¶m); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(param.private_data); kfree(param.primary_path); kfree(param.alternate_path); return result; } static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_cm_rep_param param; struct ib_ucm_context *ctx; struct ib_ucm_rep cmd; int result; param.private_data = NULL; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); if (result) return result; param.qp_num = cmd.qpn; param.starting_psn = cmd.psn; param.private_data_len = cmd.len; param.responder_resources = cmd.responder_resources; param.initiator_depth = cmd.initiator_depth; param.failover_accepted = cmd.failover_accepted; param.flow_control = cmd.flow_control; param.rnr_retry_count = cmd.rnr_retry_count; param.srq = cmd.srq; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { ctx->uid = cmd.uid; result = ib_send_cm_rep(ctx->cm_id, ¶m); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); kfree(param.private_data); return result; } static ssize_t ib_ucm_send_private_data(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int (*func)(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len)) { struct ib_ucm_private_data cmd; struct ib_ucm_context *ctx; const void *private_data = NULL; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(&private_data, cmd.data, cmd.len); if (result) return result; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = func(ctx->cm_id, private_data, cmd.len); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); kfree(private_data); return result; } static ssize_t ib_ucm_send_rtu(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_rtu); } static ssize_t ib_ucm_send_dreq(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_dreq); } static ssize_t ib_ucm_send_drep(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_drep); } static ssize_t ib_ucm_send_info(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int (*func)(struct ib_cm_id *cm_id, int status, const void *info, u8 info_len, const void *data, u8 data_len)) { struct ib_ucm_context *ctx; struct ib_ucm_info cmd; const void *data = NULL; const void *info = NULL; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(&data, cmd.data, cmd.data_len); if (result) goto done; result = ib_ucm_alloc_data(&info, cmd.info, cmd.info_len); if (result) goto done; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = func(ctx->cm_id, cmd.status, info, cmd.info_len, data, cmd.data_len); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(data); kfree(info); return result; } static ssize_t ib_ucm_send_rej(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_rej); } static ssize_t ib_ucm_send_apr(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_apr); } static ssize_t ib_ucm_send_mra(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_context *ctx; struct ib_ucm_mra cmd; const void *data = NULL; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); if (result) return result; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_mra(ctx->cm_id, cmd.timeout, data, cmd.len); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); kfree(data); return result; } static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_context *ctx; struct ib_sa_path_rec *path = NULL; struct ib_ucm_lap cmd; const void *data = NULL; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); if (result) goto done; result = ib_ucm_path_get(&path, cmd.path); if (result) goto done; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_lap(ctx->cm_id, path, data, cmd.len); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(data); kfree(path); return result; } static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_cm_sidr_req_param param; struct ib_ucm_context *ctx; struct ib_ucm_sidr_req cmd; int result; param.private_data = NULL; param.path = NULL; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); if (result) goto done; result = ib_ucm_path_get(¶m.path, cmd.path); if (result) goto done; param.private_data_len = cmd.len; param.service_id = cmd.sid; param.timeout_ms = cmd.timeout; param.max_cm_retries = cmd.max_cm_retries; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_sidr_req(ctx->cm_id, ¶m); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(param.private_data); kfree(param.path); return result; } static ssize_t ib_ucm_send_sidr_rep(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_cm_sidr_rep_param param; struct ib_ucm_sidr_rep cmd; struct ib_ucm_context *ctx; int result; param.info = NULL; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.data_len); if (result) goto done; result = ib_ucm_alloc_data(¶m.info, cmd.info, cmd.info_len); if (result) goto done; param.qp_num = cmd.qpn; param.qkey = cmd.qkey; param.status = cmd.status; param.info_length = cmd.info_len; param.private_data_len = cmd.data_len; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_sidr_rep(ctx->cm_id, ¶m); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(param.private_data); kfree(param.info); return result; } static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) = { [IB_USER_CM_CMD_CREATE_ID] = ib_ucm_create_id, [IB_USER_CM_CMD_DESTROY_ID] = ib_ucm_destroy_id, [IB_USER_CM_CMD_ATTR_ID] = ib_ucm_attr_id, [IB_USER_CM_CMD_LISTEN] = ib_ucm_listen, [IB_USER_CM_CMD_NOTIFY] = ib_ucm_notify, [IB_USER_CM_CMD_SEND_REQ] = ib_ucm_send_req, [IB_USER_CM_CMD_SEND_REP] = ib_ucm_send_rep, [IB_USER_CM_CMD_SEND_RTU] = ib_ucm_send_rtu, [IB_USER_CM_CMD_SEND_DREQ] = ib_ucm_send_dreq, [IB_USER_CM_CMD_SEND_DREP] = ib_ucm_send_drep, [IB_USER_CM_CMD_SEND_REJ] = ib_ucm_send_rej, [IB_USER_CM_CMD_SEND_MRA] = ib_ucm_send_mra, [IB_USER_CM_CMD_SEND_LAP] = ib_ucm_send_lap, [IB_USER_CM_CMD_SEND_APR] = ib_ucm_send_apr, [IB_USER_CM_CMD_SEND_SIDR_REQ] = ib_ucm_send_sidr_req, [IB_USER_CM_CMD_SEND_SIDR_REP] = ib_ucm_send_sidr_rep, [IB_USER_CM_CMD_EVENT] = ib_ucm_event, [IB_USER_CM_CMD_INIT_QP_ATTR] = ib_ucm_init_qp_attr, }; static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, size_t len, loff_t *pos) { struct ib_ucm_file *file = filp->private_data; struct ib_ucm_cmd_hdr hdr; ssize_t result; if (len < sizeof(hdr)) return -EINVAL; if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; if (hdr.in + sizeof(hdr) > len) return -EINVAL; result = ucm_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); if (!result) result = len; return result; } static unsigned int ib_ucm_poll(struct file *filp, struct poll_table_struct *wait) { struct ib_ucm_file *file = filp->private_data; unsigned int mask = 0; poll_wait(filp, &file->poll_wait, wait); if (!list_empty(&file->events)) mask = POLLIN | POLLRDNORM; return mask; } /* * ib_ucm_open() does not need the BKL: * * - no global state is referred to; * - there is no ioctl method to race against; * - no further module initialization is required for open to work * after the device is registered. */ static int ib_ucm_open(struct inode *inode, struct file *filp) { struct ib_ucm_file *file; file = kzalloc(sizeof(*file), GFP_KERNEL); if (!file) return -ENOMEM; INIT_LIST_HEAD(&file->events); INIT_LIST_HEAD(&file->ctxs); init_waitqueue_head(&file->poll_wait); mutex_init(&file->file_mutex); filp->private_data = file; file->filp = filp; file->device = container_of(inode->i_cdev->si_drv1, struct ib_ucm_device, cdev); return 0; } static int ib_ucm_close(struct inode *inode, struct file *filp) { struct ib_ucm_file *file = filp->private_data; struct ib_ucm_context *ctx; mutex_lock(&file->file_mutex); while (!list_empty(&file->ctxs)) { ctx = list_entry(file->ctxs.next, struct ib_ucm_context, file_list); mutex_unlock(&file->file_mutex); mutex_lock(&ctx_id_mutex); idr_remove(&ctx_id_table, ctx->id); mutex_unlock(&ctx_id_mutex); ib_destroy_cm_id(ctx->cm_id); ib_ucm_cleanup_events(ctx); kfree(ctx); mutex_lock(&file->file_mutex); } mutex_unlock(&file->file_mutex); kfree(file); return 0; } static void ib_ucm_release_dev(struct device *dev) { struct ib_ucm_device *ucm_dev; ucm_dev = container_of(dev, struct ib_ucm_device, dev); cdev_del(&ucm_dev->cdev); clear_bit(ucm_dev->devnum, dev_map); kfree(ucm_dev); } static const struct file_operations ucm_fops = { .owner = THIS_MODULE, .open = ib_ucm_open, .release = ib_ucm_close, .write = ib_ucm_write, .poll = ib_ucm_poll, }; static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, char *buf) { struct ib_ucm_device *ucm_dev; ucm_dev = container_of(dev, struct ib_ucm_device, dev); return sprintf(buf, "%s\n", ucm_dev->ib_dev->name); } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static void ib_ucm_add_one(struct ib_device *device) { struct ib_ucm_device *ucm_dev; if (!device->alloc_ucontext || rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL); if (!ucm_dev) return; ucm_dev->ib_dev = device; ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES) goto err; set_bit(ucm_dev->devnum, dev_map); cdev_init(&ucm_dev->cdev, &ucm_fops); ucm_dev->cdev.owner = THIS_MODULE; kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum); if (cdev_add(&ucm_dev->cdev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1)) goto err; ucm_dev->dev.class = &cm_class; ucm_dev->dev.parent = device->dma_device; ucm_dev->dev.devt = ucm_dev->cdev.dev; ucm_dev->dev.release = ib_ucm_release_dev; dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum); if (device_register(&ucm_dev->dev)) goto err_cdev; if (device_create_file(&ucm_dev->dev, &dev_attr_ibdev)) goto err_dev; ib_set_client_data(device, &ucm_client, ucm_dev); return; err_dev: device_unregister(&ucm_dev->dev); err_cdev: cdev_del(&ucm_dev->cdev); clear_bit(ucm_dev->devnum, dev_map); err: kfree(ucm_dev); return; } static void ib_ucm_remove_one(struct ib_device *device) { struct ib_ucm_device *ucm_dev = ib_get_client_data(device, &ucm_client); if (!ucm_dev) return; device_unregister(&ucm_dev->dev); } static ssize_t show_abi_version(struct class *class, struct class_attribute *attr, char *buf) { return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION); } static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); static int __init ib_ucm_init(void) { int ret; ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES, "infiniband_cm"); if (ret) { printk(KERN_ERR "ucm: couldn't register device number\n"); goto error1; } ret = class_create_file(&cm_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "ucm: couldn't create abi_version attribute\n"); goto error2; } ret = ib_register_client(&ucm_client); if (ret) { printk(KERN_ERR "ucm: couldn't register client\n"); goto error3; } return 0; error3: class_remove_file(&cm_class, &class_attr_abi_version); error2: unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); error1: return ret; } static void __exit ib_ucm_cleanup(void) { ib_unregister_client(&ucm_client); class_remove_file(&cm_class, &class_attr_abi_version); unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); idr_destroy(&ctx_id_table); } module_init_order(ib_ucm_init, SI_ORDER_THIRD); module_exit(ib_ucm_cleanup); Index: stable/10/sys/ofed/drivers/infiniband/core/uverbs_main.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/core/uverbs_main.c (revision 273245) +++ stable/10/sys/ofed/drivers/infiniband/core/uverbs_main.c (revision 273246) @@ -1,1014 +1,1013 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include -#include #include #include #include "uverbs.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace verbs access"); MODULE_LICENSE("Dual BSD/GPL"); #define INFINIBANDEVENTFS_MAGIC 0x49426576 /* "IBev" */ enum { IB_UVERBS_MAJOR = 231, IB_UVERBS_BASE_MINOR = 192, IB_UVERBS_MAX_DEVICES = 32 }; #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) static struct class *uverbs_class; DEFINE_SPINLOCK(ib_uverbs_idr_lock); DEFINE_IDR(ib_uverbs_pd_idr); DEFINE_IDR(ib_uverbs_mr_idr); DEFINE_IDR(ib_uverbs_mw_idr); DEFINE_IDR(ib_uverbs_ah_idr); DEFINE_IDR(ib_uverbs_cq_idr); DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); DEFINE_IDR(ib_uverbs_xrc_domain_idr); static spinlock_t map_lock; static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES]; static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) = { [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, [IB_USER_VERBS_CMD_CREATE_XRC_SRQ] = ib_uverbs_create_xrc_srq, [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrc_domain, [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrc_domain, [IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP] = ib_uverbs_create_xrc_rcv_qp, [IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP] = ib_uverbs_modify_xrc_rcv_qp, [IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP] = ib_uverbs_query_xrc_rcv_qp, [IB_USER_VERBS_CMD_REG_XRC_RCV_QP] = ib_uverbs_reg_xrc_rcv_qp, [IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP] = ib_uverbs_unreg_xrc_rcv_qp, }; #ifdef __linux__ /* BSD Does not require a fake mountpoint for all files. */ static struct vfsmount *uverbs_event_mnt; #endif static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device); static void ib_uverbs_release_dev(struct kref *ref) { struct ib_uverbs_device *dev = container_of(ref, struct ib_uverbs_device, ref); complete(&dev->comp); } static void ib_uverbs_release_event_file(struct kref *ref) { struct ib_uverbs_event_file *file = container_of(ref, struct ib_uverbs_event_file, ref); kfree(file); } void ib_uverbs_release_ucq(struct ib_uverbs_file *file, struct ib_uverbs_event_file *ev_file, struct ib_ucq_object *uobj) { struct ib_uverbs_event *evt, *tmp; if (ev_file) { spin_lock_irq(&ev_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&ev_file->lock); kref_put(&ev_file->ref, ib_uverbs_release_event_file); } spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&file->async_file->lock); } void ib_uverbs_release_uevent(struct ib_uverbs_file *file, struct ib_uevent_object *uobj) { struct ib_uverbs_event *evt, *tmp; spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&file->async_file->lock); } static void ib_uverbs_detach_umcast(struct ib_qp *qp, struct ib_uqp_object *uobj) { struct ib_uverbs_mcast_entry *mcast, *tmp; list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) { ib_detach_mcast(qp, &mcast->gid, mcast->lid); list_del(&mcast->list); kfree(mcast); } } static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, struct ib_ucontext *context) { struct ib_uobject *uobj, *tmp; if (!context) return 0; context->closing = 1; list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { struct ib_ah *ah = uobj->object; idr_remove_uobj(&ib_uverbs_ah_idr, uobj); ib_destroy_ah(ah); kfree(uobj); } list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { struct ib_qp *qp = uobj->object; struct ib_uqp_object *uqp = container_of(uobj, struct ib_uqp_object, uevent.uobject); idr_remove_uobj(&ib_uverbs_qp_idr, uobj); ib_uverbs_detach_umcast(qp, uqp); ib_destroy_qp(qp); ib_uverbs_release_uevent(file, &uqp->uevent); kfree(uqp); } list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { struct ib_srq *srq = uobj->object; struct ib_uevent_object *uevent = container_of(uobj, struct ib_uevent_object, uobject); idr_remove_uobj(&ib_uverbs_srq_idr, uobj); ib_destroy_srq(srq); ib_uverbs_release_uevent(file, uevent); kfree(uevent); } list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { struct ib_cq *cq = uobj->object; struct ib_uverbs_event_file *ev_file = cq->cq_context; struct ib_ucq_object *ucq = container_of(uobj, struct ib_ucq_object, uobject); idr_remove_uobj(&ib_uverbs_cq_idr, uobj); ib_destroy_cq(cq); ib_uverbs_release_ucq(file, ev_file, ucq); kfree(ucq); } /* XXX Free MWs */ list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { struct ib_mr *mr = uobj->object; idr_remove_uobj(&ib_uverbs_mr_idr, uobj); ib_dereg_mr(mr); kfree(uobj); } mutex_lock(&file->device->ib_dev->xrcd_table_mutex); list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) { struct ib_xrcd *xrcd = uobj->object; struct ib_uxrc_rcv_object *xrc_qp_obj, *tmp1; struct ib_uxrcd_object *xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject); list_for_each_entry_safe(xrc_qp_obj, tmp1, &xrcd_uobj->xrc_reg_qp_list, list) { list_del(&xrc_qp_obj->list); ib_uverbs_cleanup_xrc_rcv_qp(file, xrcd, xrc_qp_obj->qp_num); kfree(xrc_qp_obj); } idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj); ib_uverbs_dealloc_xrcd(file->device->ib_dev, xrcd); kfree(uobj); } mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { struct ib_pd *pd = uobj->object; idr_remove_uobj(&ib_uverbs_pd_idr, uobj); ib_dealloc_pd(pd); kfree(uobj); } return context->device->dealloc_ucontext(context); } static void ib_uverbs_release_file(struct kref *ref) { struct ib_uverbs_file *file = container_of(ref, struct ib_uverbs_file, ref); module_put(file->device->ib_dev->owner); kref_put(&file->device->ref, ib_uverbs_release_dev); kfree(file); } static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_event_file *file = filp->private_data; struct ib_uverbs_event *event; int eventsz; int ret = 0; spin_lock_irq(&file->lock); while (list_empty(&file->event_list)) { spin_unlock_irq(&file->lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->poll_wait, !list_empty(&file->event_list))) return -ERESTARTSYS; spin_lock_irq(&file->lock); } event = list_entry(file->event_list.next, struct ib_uverbs_event, list); if (file->is_async) eventsz = sizeof (struct ib_uverbs_async_event_desc); else eventsz = sizeof (struct ib_uverbs_comp_event_desc); if (eventsz > count) { ret = -EINVAL; event = NULL; } else { list_del(file->event_list.next); if (event->counter) { ++(*event->counter); list_del(&event->obj_list); } } spin_unlock_irq(&file->lock); if (event) { if (copy_to_user(buf, event, eventsz)) ret = -EFAULT; else ret = eventsz; } kfree(event); return ret; } static unsigned int ib_uverbs_event_poll(struct file *filp, struct poll_table_struct *wait) { unsigned int pollflags = 0; struct ib_uverbs_event_file *file = filp->private_data; file->filp = filp; poll_wait(filp, &file->poll_wait, wait); spin_lock_irq(&file->lock); if (!list_empty(&file->event_list)) pollflags = POLLIN | POLLRDNORM; spin_unlock_irq(&file->lock); return pollflags; } static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) { struct ib_uverbs_event_file *file = filp->private_data; return fasync_helper(fd, filp, on, &file->async_queue); } static int ib_uverbs_event_close(struct inode *inode, struct file *filp) { struct ib_uverbs_event_file *file = filp->private_data; struct ib_uverbs_event *entry, *tmp; spin_lock_irq(&file->lock); file->is_closed = 1; list_for_each_entry_safe(entry, tmp, &file->event_list, list) { if (entry->counter) list_del(&entry->obj_list); kfree(entry); } spin_unlock_irq(&file->lock); if (file->is_async) { ib_unregister_event_handler(&file->uverbs_file->event_handler); kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); } kref_put(&file->ref, ib_uverbs_release_event_file); return 0; } static const struct file_operations uverbs_event_fops = { .owner = THIS_MODULE, .read = ib_uverbs_event_read, .poll = ib_uverbs_event_poll, .release = ib_uverbs_event_close, .fasync = ib_uverbs_event_fasync }; void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { struct ib_uverbs_event_file *file = cq_context; struct ib_ucq_object *uobj; struct ib_uverbs_event *entry; unsigned long flags; if (!file) return; spin_lock_irqsave(&file->lock, flags); if (file->is_closed) { spin_unlock_irqrestore(&file->lock, flags); return; } entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) { spin_unlock_irqrestore(&file->lock, flags); return; } uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); entry->desc.comp.cq_handle = cq->uobject->user_handle; entry->counter = &uobj->comp_events_reported; list_add_tail(&entry->list, &file->event_list); list_add_tail(&entry->obj_list, &uobj->comp_list); spin_unlock_irqrestore(&file->lock, flags); wake_up_interruptible(&file->poll_wait); if (file->filp) selwakeup(&file->filp->f_selinfo); kill_fasync(&file->async_queue, SIGIO, POLL_IN); } static void ib_uverbs_async_handler(struct ib_uverbs_file *file, __u64 element, __u64 event, struct list_head *obj_list, u32 *counter) { struct ib_uverbs_event *entry; unsigned long flags; spin_lock_irqsave(&file->async_file->lock, flags); if (file->async_file->is_closed) { spin_unlock_irqrestore(&file->async_file->lock, flags); return; } entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) { spin_unlock_irqrestore(&file->async_file->lock, flags); return; } entry->desc.async.element = element; entry->desc.async.event_type = event; entry->counter = counter; list_add_tail(&entry->list, &file->async_file->event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); spin_unlock_irqrestore(&file->async_file->lock, flags); wake_up_interruptible(&file->async_file->poll_wait); if (file->async_file->filp) selwakeup(&file->async_file->filp->f_selinfo); kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN); } void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) { struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, struct ib_ucq_object, uobject); ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle, event->event, &uobj->async_list, &uobj->async_events_reported); } void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj; uobj = container_of(event->element.qp->uobject, struct ib_uevent_object, uobject); ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, event->event, &uobj->event_list, &uobj->events_reported); } void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj; uobj = container_of(event->element.srq->uobject, struct ib_uevent_object, uobject); ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, event->event, &uobj->event_list, &uobj->events_reported); } void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event) { struct ib_uverbs_file *file = container_of(handler, struct ib_uverbs_file, event_handler); ib_uverbs_async_handler(file, event->element.port_num, event->event, NULL, NULL); } void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event, void *context_ptr) { ib_uverbs_async_handler(context_ptr, event->element.xrc_qp_num, event->event, NULL, NULL); } struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, int is_async, int *fd) { struct ib_uverbs_event_file *ev_file; struct file *filp; int ret; ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL); if (!ev_file) return ERR_PTR(-ENOMEM); kref_init(&ev_file->ref); spin_lock_init(&ev_file->lock); INIT_LIST_HEAD(&ev_file->event_list); init_waitqueue_head(&ev_file->poll_wait); ev_file->uverbs_file = uverbs_file; ev_file->async_queue = NULL; ev_file->is_async = is_async; ev_file->is_closed = 0; ev_file->filp = NULL; *fd = get_unused_fd(); if (*fd < 0) { ret = *fd; goto err; } /* * fops_get() can't fail here, because we're coming from a * system call on a uverbs file, which will already have a * module reference. */ #ifdef __linux__ filp = alloc_file(uverbs_event_mnt, dget(uverbs_event_mnt->mnt_root), FMODE_READ, fops_get(&uverbs_event_fops)); #else filp = alloc_file(FMODE_READ, fops_get(&uverbs_event_fops)); #endif if (!filp) { ret = -ENFILE; goto err_fd; } filp->private_data = ev_file; return filp; err_fd: put_unused_fd(*fd); err: kfree(ev_file); return ERR_PTR(ret); } /* * Look up a completion event file by FD. If lookup is successful, * takes a ref to the event file struct that it returns; if * unsuccessful, returns NULL. */ struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) { struct ib_uverbs_event_file *ev_file = NULL; struct file *filp; filp = fget(fd); if (!filp) return NULL; if (filp->f_op != &uverbs_event_fops) goto out; ev_file = filp->private_data; if (ev_file->is_async) { ev_file = NULL; goto out; } kref_get(&ev_file->ref); out: fput(filp); return ev_file; } static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_file *file = filp->private_data; struct ib_uverbs_cmd_hdr hdr; if (count < sizeof hdr) return -EINVAL; if (copy_from_user(&hdr, buf, sizeof hdr)) return -EFAULT; if (hdr.in_words * 4 != count) return -EINVAL; if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || !uverbs_cmd_table[hdr.command] || !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command))) return -EINVAL; if (!file->ucontext && hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) return -EINVAL; return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr, hdr.in_words * 4, hdr.out_words * 4); } static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) { struct ib_uverbs_file *file = filp->private_data; if (!file->ucontext) return -ENODEV; else return file->device->ib_dev->mmap(file->ucontext, vma); } /* * ib_uverbs_open() does not need the BKL: * * - dev_table[] accesses are protected by map_lock, the * ib_uverbs_device structures are properly reference counted, and * everything else is purely local to the file being created, so * races against other open calls are not a problem; * - there is no ioctl method to race against; * - the device is added to dev_table[] as the last part of module * initialization, the open method will either immediately run * -ENXIO, or all required initialization will be done. */ static int ib_uverbs_open(struct inode *inode, struct file *filp) { struct ib_uverbs_device *dev; struct ib_uverbs_file *file; int ret; spin_lock(&map_lock); dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR]; if (dev) kref_get(&dev->ref); spin_unlock(&map_lock); if (!dev) return -ENXIO; if (!try_module_get(dev->ib_dev->owner)) { ret = -ENODEV; goto err; } file = kmalloc(sizeof *file, GFP_KERNEL); if (!file) { ret = -ENOMEM; goto err_module; } file->device = dev; file->ucontext = NULL; file->async_file = NULL; kref_init(&file->ref); mutex_init(&file->mutex); filp->private_data = file; return 0; err_module: module_put(dev->ib_dev->owner); err: kref_put(&dev->ref, ib_uverbs_release_dev); return ret; } static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; ib_uverbs_cleanup_ucontext(file, file->ucontext); if (file->async_file) kref_put(&file->async_file->ref, ib_uverbs_release_event_file); kref_put(&file->ref, ib_uverbs_release_file); return 0; } static const struct file_operations uverbs_fops = { .owner = THIS_MODULE, .write = ib_uverbs_write, .open = ib_uverbs_open, .release = ib_uverbs_close }; static const struct file_operations uverbs_mmap_fops = { .owner = THIS_MODULE, .write = ib_uverbs_write, .mmap = ib_uverbs_mmap, .open = ib_uverbs_open, .release = ib_uverbs_close }; static struct ib_client uverbs_client = { .name = "uverbs", .add = ib_uverbs_add_one, .remove = ib_uverbs_remove_one }; static ssize_t show_ibdev(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); if (!dev) return -ENODEV; return sprintf(buf, "%s\n", dev->ib_dev->name); } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static ssize_t show_dev_abi_version(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); if (!dev) return -ENODEV; return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver); } static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); static ssize_t show_abi_version(struct class *class, struct class_attribute *attr, char *buf) { return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION); } static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); #include static ssize_t show_dev_device(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); if (!dev) return -ENODEV; return sprintf(buf, "0x%04x\n", ((struct pci_dev *)dev->ib_dev->dma_device)->device); } static DEVICE_ATTR(device, S_IRUGO, show_dev_device, NULL); static ssize_t show_dev_vendor(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); if (!dev) return -ENODEV; return sprintf(buf, "0x%04x\n", ((struct pci_dev *)dev->ib_dev->dma_device)->vendor); } static DEVICE_ATTR(vendor, S_IRUGO, show_dev_vendor, NULL); struct attribute *device_attrs[] = { &dev_attr_device.attr, &dev_attr_vendor.attr, NULL }; static struct attribute_group device_group = { .name = "device", .attrs = device_attrs }; static void ib_uverbs_add_one(struct ib_device *device) { struct ib_uverbs_device *uverbs_dev; if (!device->alloc_ucontext) return; uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL); if (!uverbs_dev) return; kref_init(&uverbs_dev->ref); init_completion(&uverbs_dev->comp); spin_lock(&map_lock); uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) { spin_unlock(&map_lock); goto err; } set_bit(uverbs_dev->devnum, dev_map); spin_unlock(&map_lock); uverbs_dev->ib_dev = device; uverbs_dev->num_comp_vectors = device->num_comp_vectors; uverbs_dev->cdev = cdev_alloc(); if (!uverbs_dev->cdev) goto err; uverbs_dev->cdev->owner = THIS_MODULE; uverbs_dev->cdev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; kobject_set_name(&uverbs_dev->cdev->kobj, "uverbs%d", uverbs_dev->devnum); if (cdev_add(uverbs_dev->cdev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) goto err_cdev; uverbs_dev->dev = device_create(uverbs_class, device->dma_device, uverbs_dev->cdev->dev, uverbs_dev, "uverbs%d", uverbs_dev->devnum); if (IS_ERR(uverbs_dev->dev)) goto err_cdev; if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev)) goto err_class; if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) goto err_class; if (sysfs_create_group(&uverbs_dev->dev->kobj, &device_group)) goto err_class; spin_lock(&map_lock); dev_table[uverbs_dev->devnum] = uverbs_dev; spin_unlock(&map_lock); ib_set_client_data(device, &uverbs_client, uverbs_dev); return; err_class: device_destroy(uverbs_class, uverbs_dev->cdev->dev); err_cdev: cdev_del(uverbs_dev->cdev); clear_bit(uverbs_dev->devnum, dev_map); err: kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); wait_for_completion(&uverbs_dev->comp); kfree(uverbs_dev); return; } static void ib_uverbs_remove_one(struct ib_device *device) { struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client); if (!uverbs_dev) return; sysfs_remove_group(&uverbs_dev->dev->kobj, &device_group); dev_set_drvdata(uverbs_dev->dev, NULL); device_destroy(uverbs_class, uverbs_dev->cdev->dev); cdev_del(uverbs_dev->cdev); spin_lock(&map_lock); dev_table[uverbs_dev->devnum] = NULL; spin_unlock(&map_lock); clear_bit(uverbs_dev->devnum, dev_map); kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); wait_for_completion(&uverbs_dev->comp); kfree(uverbs_dev); } #ifdef __linux__ static int uverbs_event_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { return get_sb_pseudo(fs_type, "infinibandevent:", NULL, INFINIBANDEVENTFS_MAGIC, mnt); } static struct file_system_type uverbs_event_fs = { /* No owner field so module can be unloaded */ .name = "infinibandeventfs", .get_sb = uverbs_event_get_sb, .kill_sb = kill_litter_super }; #endif static int __init ib_uverbs_init(void) { int ret; spin_lock_init(&map_lock); ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES, "infiniband_verbs"); if (ret) { printk(KERN_ERR "user_verbs: couldn't register device number\n"); goto out; } uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); if (IS_ERR(uverbs_class)) { ret = PTR_ERR(uverbs_class); printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n"); goto out_chrdev; } ret = class_create_file(uverbs_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); goto out_class; } #ifdef __linux__ ret = register_filesystem(&uverbs_event_fs); if (ret) { printk(KERN_ERR "user_verbs: couldn't register infinibandeventfs\n"); goto out_class; } uverbs_event_mnt = kern_mount(&uverbs_event_fs); if (IS_ERR(uverbs_event_mnt)) { ret = PTR_ERR(uverbs_event_mnt); printk(KERN_ERR "user_verbs: couldn't mount infinibandeventfs\n"); goto out_fs; } #endif ret = ib_register_client(&uverbs_client); if (ret) { printk(KERN_ERR "user_verbs: couldn't register client\n"); goto out_mnt; } return 0; out_mnt: #ifdef __linux__ mntput(uverbs_event_mnt); out_fs: unregister_filesystem(&uverbs_event_fs); #endif out_class: class_destroy(uverbs_class); out_chrdev: unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); out: return ret; } static void __exit ib_uverbs_cleanup(void) { ib_unregister_client(&uverbs_client); #ifdef __linux__ mntput(uverbs_event_mnt); unregister_filesystem(&uverbs_event_fs); #endif class_destroy(uverbs_class); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); idr_destroy(&ib_uverbs_pd_idr); idr_destroy(&ib_uverbs_mr_idr); idr_destroy(&ib_uverbs_mw_idr); idr_destroy(&ib_uverbs_ah_idr); idr_destroy(&ib_uverbs_cq_idr); idr_destroy(&ib_uverbs_qp_idr); idr_destroy(&ib_uverbs_srq_idr); } module_init(ib_uverbs_init); module_exit(ib_uverbs_cleanup); Index: stable/10/sys/ofed/drivers/infiniband/hw/mlx4/ah.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/hw/mlx4/ah.c (revision 273245) +++ stable/10/sys/ofed/drivers/infiniband/hw/mlx4/ah.c (revision 273246) @@ -1,203 +1,202 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include -#include #include #include "mlx4_ib.h" int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, u8 *mac, int *is_mcast, u8 port) { struct in6_addr in6; *is_mcast = 0; memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6); if (rdma_link_local_addr(&in6)) rdma_get_ll_mac(&in6, mac); else if (rdma_is_multicast_addr(&in6)) { rdma_get_mcast_mac(&in6, mac); *is_mcast = 1; } else return -EINVAL; return 0; } static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, struct mlx4_ib_ah *ah) { struct mlx4_dev *dev = to_mdev(pd->device)->dev; ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); ah->av.ib.g_slid = ah_attr->src_path_bits; if (ah_attr->ah_flags & IB_AH_GRH) { ah->av.ib.g_slid |= 0x80; ah->av.ib.gid_index = ah_attr->grh.sgid_index; ah->av.ib.hop_limit = ah_attr->grh.hop_limit; ah->av.ib.sl_tclass_flowlabel |= cpu_to_be32((ah_attr->grh.traffic_class << 20) | ah_attr->grh.flow_label); memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16); } ah->av.ib.dlid = cpu_to_be16(ah_attr->dlid); if (ah_attr->static_rate) { ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support)) --ah->av.ib.stat_rate; } ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); return &ah->ibah; } static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, struct mlx4_ib_ah *ah) { struct mlx4_ib_dev *ibdev = to_mdev(pd->device); struct mlx4_dev *dev = ibdev->dev; union ib_gid sgid; u8 mac[6]; int err; int is_mcast; u16 vlan_tag; err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num); if (err) return ERR_PTR(err); memcpy(ah->av.eth.mac, mac, 6); err = ib_get_cached_gid(pd->device, ah_attr->port_num, ah_attr->grh.sgid_index, &sgid); if (err) return ERR_PTR(err); vlan_tag = rdma_get_vlan_id(&sgid); if (vlan_tag < 0x1000) vlan_tag |= (ah_attr->sl & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); ah->av.eth.gid_index = ah_attr->grh.sgid_index; ah->av.eth.vlan = cpu_to_be16(vlan_tag); if (ah_attr->static_rate) { ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && !(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support)) --ah->av.eth.stat_rate; } /* * HW requires multicast LID so we just choose one. */ if (is_mcast) ah->av.ib.dlid = cpu_to_be16(0xc000); memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16); ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29); return &ah->ibah; } struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { struct mlx4_ib_ah *ah; struct ib_ah *ret; ah = kzalloc(sizeof *ah, GFP_ATOMIC); if (!ah) return ERR_PTR(-ENOMEM); if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) { if (!(ah_attr->ah_flags & IB_AH_GRH)) { ret = ERR_PTR(-EINVAL); } else { /* * TBD: need to handle the case when we get * called in an atomic context and there we * might sleep. We don't expect this * currently since we're working with link * local addresses which we can translate * without going to sleep. */ ret = create_iboe_ah(pd, ah_attr, ah); } if (IS_ERR(ret)) kfree(ah); return ret; } else return create_ib_ah(pd, ah_attr, ah); /* never fails */ } int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { struct mlx4_ib_ah *ah = to_mah(ibah); enum rdma_link_layer ll; memset(ah_attr, 0, sizeof *ah_attr); ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24; ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num); ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0; if (ah->av.ib.stat_rate) ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET; ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F; if (mlx4_ib_ah_grh_present(ah)) { ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.traffic_class = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20; ah_attr->grh.flow_label = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff; ah_attr->grh.hop_limit = ah->av.ib.hop_limit; ah_attr->grh.sgid_index = ah->av.ib.gid_index; memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16); } return 0; } int mlx4_ib_destroy_ah(struct ib_ah *ah) { kfree(to_mah(ah)); return 0; } Index: stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_config_reg.h =================================================================== --- stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_config_reg.h (revision 273245) +++ stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_config_reg.h (revision 273246) @@ -1,50 +1,50 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef MTHCA_CONFIG_REG_H #define MTHCA_CONFIG_REG_H -#include +#include #define MTHCA_HCR_BASE 0x80680 #define MTHCA_HCR_SIZE 0x0001c #define MTHCA_ECR_BASE 0x80700 #define MTHCA_ECR_SIZE 0x00008 #define MTHCA_ECR_CLR_BASE 0x80708 #define MTHCA_ECR_CLR_SIZE 0x00008 #define MTHCA_MAP_ECR_SIZE (MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE) #define MTHCA_CLR_INT_BASE 0xf00d8 #define MTHCA_CLR_INT_SIZE 0x00008 #define MTHCA_EQ_SET_CI_SIZE (8 * 32) #endif /* MTHCA_CONFIG_REG_H */ Index: stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.c (revision 273245) +++ stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.c (revision 273246) @@ -1,880 +1,880 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include -#include +#include #include "mthca_memfree.h" #include "mthca_dev.h" #include "mthca_cmd.h" /* * We allocate in as big chunks as we can, up to a maximum of 256 KB * per chunk. */ enum { MTHCA_ICM_ALLOC_SIZE = 1 << 18, MTHCA_TABLE_CHUNK_SIZE = 1 << 18 }; struct mthca_user_db_table { struct mutex mutex; struct { u64 uvirt; struct scatterlist mem; int refcount; } page[0]; }; static void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk *chunk) { int i; if (chunk->nsg > 0) pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages, PCI_DMA_BIDIRECTIONAL); for (i = 0; i < chunk->npages; ++i) __free_pages(sg_page(&chunk->mem[i]), get_order(chunk->mem[i].length)); } static void mthca_free_icm_coherent(struct mthca_dev *dev, struct mthca_icm_chunk *chunk) { int i; for (i = 0; i < chunk->npages; ++i) { dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length, lowmem_page_address(sg_page(&chunk->mem[i])), sg_dma_address(&chunk->mem[i])); } } void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent) { struct mthca_icm_chunk *chunk, *tmp; if (!icm) return; list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) { if (coherent) mthca_free_icm_coherent(dev, chunk); else mthca_free_icm_pages(dev, chunk); kfree(chunk); } kfree(icm); } static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask) { struct page *page; /* * Use __GFP_ZERO because buggy firmware assumes ICM pages are * cleared, and subtle failures are seen if they aren't. */ page = alloc_pages(gfp_mask | __GFP_ZERO, order); if (!page) return -ENOMEM; sg_set_page(mem, page, PAGE_SIZE << order, 0); return 0; } static int mthca_alloc_icm_coherent(struct device *dev, struct scatterlist *mem, int order, gfp_t gfp_mask) { void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order, &sg_dma_address(mem), gfp_mask); if (!buf) return -ENOMEM; sg_set_buf(mem, buf, PAGE_SIZE << order); BUG_ON(mem->offset); sg_dma_len(mem) = PAGE_SIZE << order; return 0; } struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages, gfp_t gfp_mask, int coherent) { struct mthca_icm *icm; struct mthca_icm_chunk *chunk = NULL; int cur_order; int ret; /* We use sg_set_buf for coherent allocs, which assumes low memory */ BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM)); icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); if (!icm) return icm; icm->refcount = 0; INIT_LIST_HEAD(&icm->chunk_list); cur_order = get_order(MTHCA_ICM_ALLOC_SIZE); while (npages > 0) { if (!chunk) { chunk = kmalloc(sizeof *chunk, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); if (!chunk) goto fail; sg_init_table(chunk->mem, MTHCA_ICM_CHUNK_LEN); chunk->npages = 0; chunk->nsg = 0; list_add_tail(&chunk->list, &icm->chunk_list); } while (1 << cur_order > npages) --cur_order; if (coherent) ret = mthca_alloc_icm_coherent(&dev->pdev->dev, &chunk->mem[chunk->npages], cur_order, gfp_mask); else ret = mthca_alloc_icm_pages(&chunk->mem[chunk->npages], cur_order, gfp_mask); if (!ret) { ++chunk->npages; if (coherent) ++chunk->nsg; else if (chunk->npages == MTHCA_ICM_CHUNK_LEN) { chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, chunk->npages, PCI_DMA_BIDIRECTIONAL); if (chunk->nsg <= 0) goto fail; } if (chunk->npages == MTHCA_ICM_CHUNK_LEN) chunk = NULL; npages -= 1 << cur_order; } else { --cur_order; if (cur_order < 0) goto fail; } } if (!coherent && chunk) { chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, chunk->npages, PCI_DMA_BIDIRECTIONAL); if (chunk->nsg <= 0) goto fail; } return icm; fail: mthca_free_icm(dev, icm, coherent); return NULL; } int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj) { int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE; int ret = 0; u8 status; mutex_lock(&table->mutex); if (table->icm[i]) { ++table->icm[i]->refcount; goto out; } table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT, (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | __GFP_NOWARN, table->coherent); if (!table->icm[i]) { ret = -ENOMEM; goto out; } if (mthca_MAP_ICM(dev, table->icm[i], table->virt + i * MTHCA_TABLE_CHUNK_SIZE, &status) || status) { mthca_free_icm(dev, table->icm[i], table->coherent); table->icm[i] = NULL; ret = -ENOMEM; goto out; } ++table->icm[i]->refcount; out: mutex_unlock(&table->mutex); return ret; } void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj) { int i; u8 status; if (!mthca_is_memfree(dev)) return; i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE; mutex_lock(&table->mutex); if (--table->icm[i]->refcount == 0) { mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE, MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE, &status); mthca_free_icm(dev, table->icm[i], table->coherent); table->icm[i] = NULL; } mutex_unlock(&table->mutex); } void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle) { int idx, offset, dma_offset, i; struct mthca_icm_chunk *chunk; struct mthca_icm *icm; struct page *page = NULL; if (!table->lowmem) return NULL; mutex_lock(&table->mutex); idx = (obj & (table->num_obj - 1)) * table->obj_size; icm = table->icm[idx / MTHCA_TABLE_CHUNK_SIZE]; dma_offset = offset = idx % MTHCA_TABLE_CHUNK_SIZE; if (!icm) goto out; list_for_each_entry(chunk, &icm->chunk_list, list) { for (i = 0; i < chunk->npages; ++i) { if (dma_handle && dma_offset >= 0) { if (sg_dma_len(&chunk->mem[i]) > dma_offset) *dma_handle = sg_dma_address(&chunk->mem[i]) + dma_offset; dma_offset -= sg_dma_len(&chunk->mem[i]); } /* DMA mapping can merge pages but not split them, * so if we found the page, dma_handle has already * been assigned to. */ if (chunk->mem[i].length > offset) { page = sg_page(&chunk->mem[i]); goto out; } offset -= chunk->mem[i].length; } } out: mutex_unlock(&table->mutex); return page ? lowmem_page_address(page) + offset : NULL; } int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table, int start, int end) { int inc = MTHCA_TABLE_CHUNK_SIZE / table->obj_size; int i, err; for (i = start; i <= end; i += inc) { err = mthca_table_get(dev, table, i); if (err) goto fail; } return 0; fail: while (i > start) { i -= inc; mthca_table_put(dev, table, i); } return err; } void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table, int start, int end) { int i; if (!mthca_is_memfree(dev)) return; for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size) mthca_table_put(dev, table, i); } struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, u64 virt, int obj_size, int nobj, int reserved, int use_lowmem, int use_coherent) { struct mthca_icm_table *table; int obj_per_chunk; int num_icm; unsigned chunk_size; int i; u8 status; obj_per_chunk = MTHCA_TABLE_CHUNK_SIZE / obj_size; num_icm = DIV_ROUND_UP(nobj, obj_per_chunk); table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL); if (!table) return NULL; table->virt = virt; table->num_icm = num_icm; table->num_obj = nobj; table->obj_size = obj_size; table->lowmem = use_lowmem; table->coherent = use_coherent; mutex_init(&table->mutex); for (i = 0; i < num_icm; ++i) table->icm[i] = NULL; for (i = 0; i * MTHCA_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) { chunk_size = MTHCA_TABLE_CHUNK_SIZE; if ((i + 1) * MTHCA_TABLE_CHUNK_SIZE > nobj * obj_size) chunk_size = nobj * obj_size - i * MTHCA_TABLE_CHUNK_SIZE; table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT, (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) | __GFP_NOWARN, use_coherent); if (!table->icm[i]) goto err; if (mthca_MAP_ICM(dev, table->icm[i], virt + i * MTHCA_TABLE_CHUNK_SIZE, &status) || status) { mthca_free_icm(dev, table->icm[i], table->coherent); table->icm[i] = NULL; goto err; } /* * Add a reference to this ICM chunk so that it never * gets freed (since it contains reserved firmware objects). */ ++table->icm[i]->refcount; } return table; err: for (i = 0; i < num_icm; ++i) if (table->icm[i]) { mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE, MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE, &status); mthca_free_icm(dev, table->icm[i], table->coherent); } kfree(table); return NULL; } void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table) { int i; u8 status; for (i = 0; i < table->num_icm; ++i) if (table->icm[i]) { mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE, MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE, &status); mthca_free_icm(dev, table->icm[i], table->coherent); } kfree(table); } static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page) { return dev->uar_table.uarc_base + uar->index * dev->uar_table.uarc_size + page * MTHCA_ICM_PAGE_SIZE; } #include #include #include #include #include int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, struct mthca_user_db_table *db_tab, int index, u64 uaddr) { #ifdef __linux__ struct page *pages[1]; int ret = 0; u8 status; int i; if (!mthca_is_memfree(dev)) return 0; if (index < 0 || index > dev->uar_table.uarc_size / 8) return -EINVAL; mutex_lock(&db_tab->mutex); i = index / MTHCA_DB_REC_PER_PAGE; if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE) || (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) || (uaddr & 4095)) { ret = -EINVAL; goto out; } if (db_tab->page[i].refcount) { ++db_tab->page[i].refcount; goto out; } ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0, pages, NULL); if (ret < 0) goto out; sg_set_page(&db_tab->page[i].mem, pages[0], MTHCA_ICM_PAGE_SIZE, uaddr & ~PAGE_MASK); ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); if (ret < 0) { put_page(pages[0]); goto out; } ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem), mthca_uarc_virt(dev, uar, i), &status); if (!ret && status) ret = -EINVAL; if (ret) { pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); put_page(sg_page(&db_tab->page[i].mem)); goto out; } db_tab->page[i].uvirt = uaddr; db_tab->page[i].refcount = 1; out: mutex_unlock(&db_tab->mutex); return ret; #else struct proc *proc; vm_offset_t start; vm_paddr_t paddr; pmap_t pmap; vm_page_t m; int ret = 0; u8 status; int i; if (!mthca_is_memfree(dev)) return 0; if (index < 0 || index > dev->uar_table.uarc_size / 8) return -EINVAL; mutex_lock(&db_tab->mutex); i = index / MTHCA_DB_REC_PER_PAGE; start = 0; if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE) || (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) || (uaddr & 4095)) { ret = -EINVAL; goto out; } if (db_tab->page[i].refcount) { ++db_tab->page[i].refcount; goto out; } proc = curproc; pmap = vm_map_pmap(&proc->p_vmspace->vm_map); PROC_LOCK(proc); if (ptoa(pmap_wired_count(pmap) + 1) > lim_cur(proc, RLIMIT_MEMLOCK)) { PROC_UNLOCK(proc); ret = -ENOMEM; goto out; } PROC_UNLOCK(proc); if (cnt.v_wire_count + 1 > vm_page_max_wired) { ret = -EAGAIN; goto out; } start = uaddr & PAGE_MASK; ret = vm_map_wire(&proc->p_vmspace->vm_map, start, start + PAGE_SIZE, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES | VM_MAP_WIRE_WRITE); if (ret != KERN_SUCCESS) { start = 0; ret = -ENOMEM; goto out; } paddr = pmap_extract(pmap, uaddr); if (paddr == 0) { ret = -EFAULT; goto out; } m = PHYS_TO_VM_PAGE(paddr); sg_set_page(&db_tab->page[i].mem, m, MTHCA_ICM_PAGE_SIZE, uaddr & ~PAGE_MASK); ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); if (ret < 0) goto out; ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem), mthca_uarc_virt(dev, uar, i), &status); if (!ret && status) ret = -EINVAL; if (ret) { pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); goto out; } db_tab->page[i].uvirt = uaddr; db_tab->page[i].refcount = 1; out: if (ret < 0 && start) vm_map_unwire(&curthread->td_proc->p_vmspace->vm_map, start, start + PAGE_SIZE, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); mutex_unlock(&db_tab->mutex); return ret; #endif } void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar, struct mthca_user_db_table *db_tab, int index) { if (!mthca_is_memfree(dev)) return; /* * To make our bookkeeping simpler, we don't unmap DB * pages until we clean up the whole db table. */ mutex_lock(&db_tab->mutex); --db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount; mutex_unlock(&db_tab->mutex); } struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev) { struct mthca_user_db_table *db_tab; int npages; int i; if (!mthca_is_memfree(dev)) return NULL; npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL); if (!db_tab) return ERR_PTR(-ENOMEM); mutex_init(&db_tab->mutex); for (i = 0; i < npages; ++i) { db_tab->page[i].refcount = 0; db_tab->page[i].uvirt = 0; sg_init_table(&db_tab->page[i].mem, 1); } return db_tab; } void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, struct mthca_user_db_table *db_tab) { int i; u8 status; if (!mthca_is_memfree(dev)) return; for (i = 0; i < dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; ++i) { if (db_tab->page[i].uvirt) { mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status); pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); #ifdef __linux__ put_page(sg_page(&db_tab->page[i].mem)); #else vm_offset_t start; start = db_tab->page[i].uvirt & PAGE_MASK; vm_map_unwire(&curthread->td_proc->p_vmspace->vm_map, start, start + PAGE_SIZE, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); #endif } } kfree(db_tab); } int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, u32 qn, __be32 **db) { int group; int start, end, dir; int i, j; struct mthca_db_page *page; int ret = 0; u8 status; mutex_lock(&dev->db_tab->mutex); switch (type) { case MTHCA_DB_TYPE_CQ_ARM: case MTHCA_DB_TYPE_SQ: group = 0; start = 0; end = dev->db_tab->max_group1; dir = 1; break; case MTHCA_DB_TYPE_CQ_SET_CI: case MTHCA_DB_TYPE_RQ: case MTHCA_DB_TYPE_SRQ: group = 1; start = dev->db_tab->npages - 1; end = dev->db_tab->min_group2; dir = -1; break; default: ret = -EINVAL; goto out; } for (i = start; i != end; i += dir) if (dev->db_tab->page[i].db_rec && !bitmap_full(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE)) { page = dev->db_tab->page + i; goto found; } for (i = start; i != end; i += dir) if (!dev->db_tab->page[i].db_rec) { page = dev->db_tab->page + i; goto alloc; } if (dev->db_tab->max_group1 >= dev->db_tab->min_group2 - 1) { ret = -ENOMEM; goto out; } if (group == 0) ++dev->db_tab->max_group1; else --dev->db_tab->min_group2; page = dev->db_tab->page + end; alloc: page->db_rec = dma_alloc_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE, &page->mapping, GFP_KERNEL); if (!page->db_rec) { ret = -ENOMEM; goto out; } memset(page->db_rec, 0, MTHCA_ICM_PAGE_SIZE); ret = mthca_MAP_ICM_page(dev, page->mapping, mthca_uarc_virt(dev, &dev->driver_uar, i), &status); if (!ret && status) ret = -EINVAL; if (ret) { dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE, page->db_rec, page->mapping); goto out; } bitmap_zero(page->used, MTHCA_DB_REC_PER_PAGE); found: j = find_first_zero_bit(page->used, MTHCA_DB_REC_PER_PAGE); set_bit(j, page->used); if (group == 1) j = MTHCA_DB_REC_PER_PAGE - 1 - j; ret = i * MTHCA_DB_REC_PER_PAGE + j; page->db_rec[j] = cpu_to_be64((qn << 8) | (type << 5)); *db = (__be32 *) &page->db_rec[j]; out: mutex_unlock(&dev->db_tab->mutex); return ret; } void mthca_free_db(struct mthca_dev *dev, int type, int db_index) { int i, j; struct mthca_db_page *page; u8 status; i = db_index / MTHCA_DB_REC_PER_PAGE; j = db_index % MTHCA_DB_REC_PER_PAGE; page = dev->db_tab->page + i; mutex_lock(&dev->db_tab->mutex); page->db_rec[j] = 0; if (i >= dev->db_tab->min_group2) j = MTHCA_DB_REC_PER_PAGE - 1 - j; clear_bit(j, page->used); if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) && i >= dev->db_tab->max_group1 - 1) { mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE, page->db_rec, page->mapping); page->db_rec = NULL; if (i == dev->db_tab->max_group1) { --dev->db_tab->max_group1; /* XXX may be able to unmap more pages now */ } if (i == dev->db_tab->min_group2) ++dev->db_tab->min_group2; } mutex_unlock(&dev->db_tab->mutex); } int mthca_init_db_tab(struct mthca_dev *dev) { int i; if (!mthca_is_memfree(dev)) return 0; dev->db_tab = kmalloc(sizeof *dev->db_tab, GFP_KERNEL); if (!dev->db_tab) return -ENOMEM; mutex_init(&dev->db_tab->mutex); dev->db_tab->npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; dev->db_tab->max_group1 = 0; dev->db_tab->min_group2 = dev->db_tab->npages - 1; dev->db_tab->page = kmalloc(dev->db_tab->npages * sizeof *dev->db_tab->page, GFP_KERNEL); if (!dev->db_tab->page) { kfree(dev->db_tab); return -ENOMEM; } for (i = 0; i < dev->db_tab->npages; ++i) dev->db_tab->page[i].db_rec = NULL; return 0; } void mthca_cleanup_db_tab(struct mthca_dev *dev) { int i; u8 status; if (!mthca_is_memfree(dev)) return; /* * Because we don't always free our UARC pages when they * become empty to make mthca_free_db() simpler we need to * make a sweep through the doorbell pages and free any * leftover pages now. */ for (i = 0; i < dev->db_tab->npages; ++i) { if (!dev->db_tab->page[i].db_rec) continue; if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE)) mthca_warn(dev, "Kernel UARC page %d not empty\n", i); mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE, dev->db_tab->page[i].db_rec, dev->db_tab->page[i].mapping); } kfree(dev->db_tab->page); kfree(dev->db_tab); } Index: stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_uar.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_uar.c (revision 273245) +++ stable/10/sys/ofed/drivers/infiniband/hw/mthca/mthca_uar.c (revision 273246) @@ -1,78 +1,78 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include /* PAGE_SHIFT */ +#include #include "mthca_dev.h" #include "mthca_memfree.h" int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar) { uar->index = mthca_alloc(&dev->uar_table.alloc); if (uar->index == -1) return -ENOMEM; uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index; return 0; } void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar) { mthca_free(&dev->uar_table.alloc, uar->index); } int mthca_init_uar_table(struct mthca_dev *dev) { int ret; ret = mthca_alloc_init(&dev->uar_table.alloc, dev->limits.num_uars, dev->limits.num_uars - 1, dev->limits.reserved_uars + 1); if (ret) return ret; ret = mthca_init_db_tab(dev); if (ret) mthca_alloc_cleanup(&dev->uar_table.alloc); return ret; } void mthca_cleanup_uar_table(struct mthca_dev *dev) { mthca_cleanup_db_tab(dev); /* XXX check if any UARs are still allocated? */ mthca_alloc_cleanup(&dev->uar_table.alloc); } Index: stable/10/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c (revision 273245) +++ stable/10/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c (revision 273246) @@ -1,159 +1,158 @@ /* * Copyright (c) 2007 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include -#include #include #include "ipoib.h" static void ipoib_get_drvinfo(struct ifnet *netdev, struct ethtool_drvinfo *drvinfo) { strncpy(drvinfo->driver, "ipoib", sizeof(drvinfo->driver) - 1); } static u32 ipoib_get_rx_csum(struct ifnet *dev) { struct ipoib_dev_priv *priv = dev->if_softc; return test_bit(IPOIB_FLAG_CSUM, &priv->flags) && !test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); } static int ipoib_get_coalesce(struct ifnet *dev, struct ethtool_coalesce *coal) { struct ipoib_dev_priv *priv = dev->if_softc; coal->rx_coalesce_usecs = priv->ethtool.coalesce_usecs; coal->tx_coalesce_usecs = priv->ethtool.coalesce_usecs; coal->rx_max_coalesced_frames = priv->ethtool.max_coalesced_frames; coal->tx_max_coalesced_frames = priv->ethtool.max_coalesced_frames; return 0; } static int ipoib_set_coalesce(struct ifnet *dev, struct ethtool_coalesce *coal) { struct ipoib_dev_priv *priv = dev->if_softc; int ret; /* * Since IPoIB uses a single CQ for both rx and tx, we assume * that rx params dictate the configuration. These values are * saved in the private data and returned when ipoib_get_coalesce() * is called. */ if (coal->rx_coalesce_usecs > 0xffff || coal->rx_max_coalesced_frames > 0xffff) return -EINVAL; if (coal->rx_max_coalesced_frames | coal->rx_coalesce_usecs) { if (!coal->rx_max_coalesced_frames) coal->rx_max_coalesced_frames = 0xffff; else if (!coal->rx_coalesce_usecs) coal->rx_coalesce_usecs = 0xffff; } ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames, coal->rx_coalesce_usecs); if (ret && ret != -ENOSYS) { ipoib_warn(priv, "failed modifying CQ (%d)\n", ret); return ret; } coal->tx_coalesce_usecs = coal->rx_coalesce_usecs; coal->tx_max_coalesced_frames = coal->rx_max_coalesced_frames; priv->ethtool.coalesce_usecs = coal->rx_coalesce_usecs; priv->ethtool.max_coalesced_frames = coal->rx_max_coalesced_frames; return 0; } static const char ipoib_stats_keys[][ETH_GSTRING_LEN] = { "LRO aggregated", "LRO flushed", "LRO avg aggr", "LRO no desc" }; static void ipoib_get_strings(struct ifnet *netdev, u32 stringset, u8 *data) { switch (stringset) { case ETH_SS_STATS: memcpy(data, *ipoib_stats_keys, sizeof(ipoib_stats_keys)); break; } } static int ipoib_get_sset_count(struct ifnet *dev, int sset) { switch (sset) { case ETH_SS_STATS: return ARRAY_SIZE(ipoib_stats_keys); default: return -EOPNOTSUPP; } } static void ipoib_get_ethtool_stats(struct ifnet *dev, struct ethtool_stats *stats, uint64_t *data) { struct ipoib_dev_priv *priv = dev->if_softc; int index = 0; /* Get LRO statistics */ data[index++] = priv->lro.lro_mgr.stats.aggregated; data[index++] = priv->lro.lro_mgr.stats.flushed; if (priv->lro.lro_mgr.stats.flushed) data[index++] = priv->lro.lro_mgr.stats.aggregated / priv->lro.lro_mgr.stats.flushed; else data[index++] = 0; data[index++] = priv->lro.lro_mgr.stats.no_desc; } static const struct ethtool_ops ipoib_ethtool_ops = { .get_drvinfo = ipoib_get_drvinfo, .get_rx_csum = ipoib_get_rx_csum, .get_coalesce = ipoib_get_coalesce, .set_coalesce = ipoib_set_coalesce, .get_flags = ethtool_op_get_flags, .set_flags = ethtool_op_set_flags, .get_strings = ipoib_get_strings, .get_sset_count = ipoib_get_sset_count, .get_ethtool_stats = ipoib_get_ethtool_stats, }; void ipoib_set_ethtool_ops(struct ifnet *dev) { SET_ETHTOOL_OPS(dev, &ipoib_ethtool_ops); } Index: stable/10/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c =================================================================== --- stable/10/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c (revision 273245) +++ stable/10/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c (revision 273246) @@ -1,294 +1,293 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "ipoib.h" -#include int ipoib_mcast_attach(struct ipoib_dev_priv *priv, u16 mlid, union ib_gid *mgid, int set_qkey) { struct ib_qp_attr *qp_attr = NULL; int ret; u16 pkey_index; if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ret = -ENXIO; goto out; } set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); if (set_qkey) { ret = -ENOMEM; qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); if (!qp_attr) goto out; /* set correct QKey for QP */ qp_attr->qkey = priv->qkey; ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY); if (ret) { ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret); goto out; } } /* attach QP to multicast group */ ret = ib_attach_mcast(priv->qp, mgid, mlid); if (ret) ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret); out: kfree(qp_attr); return ret; } int ipoib_init_qp(struct ipoib_dev_priv *priv) { int ret; struct ib_qp_attr qp_attr; int attr_mask; if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) return -1; qp_attr.qp_state = IB_QPS_INIT; qp_attr.qkey = 0; qp_attr.port_num = priv->port; qp_attr.pkey_index = priv->pkey_index; attr_mask = IB_QP_QKEY | IB_QP_PORT | IB_QP_PKEY_INDEX | IB_QP_STATE; ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to init, ret = %d\n", ret); goto out_fail; } qp_attr.qp_state = IB_QPS_RTR; /* Can't set this in a INIT->RTR transition */ attr_mask &= ~IB_QP_PORT; ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to RTR, ret = %d\n", ret); goto out_fail; } qp_attr.qp_state = IB_QPS_RTS; qp_attr.sq_psn = 0; attr_mask |= IB_QP_SQ_PSN; attr_mask &= ~IB_QP_PKEY_INDEX; ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask); if (ret) { ipoib_warn(priv, "failed to modify QP to RTS, ret = %d\n", ret); goto out_fail; } return 0; out_fail: qp_attr.qp_state = IB_QPS_RESET; if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) ipoib_warn(priv, "Failed to modify QP to RESET state\n"); return ret; } int ipoib_transport_dev_init(struct ipoib_dev_priv *priv, struct ib_device *ca) { struct ib_qp_init_attr init_attr = { .cap = { .max_send_wr = ipoib_sendq_size, .max_recv_wr = ipoib_recvq_size, .max_send_sge = 1, .max_recv_sge = IPOIB_UD_RX_SG }, .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_UD }; int ret, size; int i; /* XXX struct ethtool_coalesce *coal; */ priv->pd = ib_alloc_pd(priv->ca); if (IS_ERR(priv->pd)) { printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name); return -ENODEV; } priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(priv->mr)) { printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name); goto out_free_pd; } size = ipoib_recvq_size + 1; ret = ipoib_cm_dev_init(priv); if (!ret) { size += ipoib_sendq_size; if (ipoib_cm_has_srq(priv)) size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */ else size += ipoib_recvq_size * ipoib_max_conn_qp; } priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, priv, size, 0); if (IS_ERR(priv->recv_cq)) { printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name); goto out_free_mr; } priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL, priv, ipoib_sendq_size, 0); if (IS_ERR(priv->send_cq)) { printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name); goto out_free_recv_cq; } if (ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP)) goto out_free_send_cq; #if 0 /* XXX */ coal = kzalloc(sizeof *coal, GFP_KERNEL); if (coal) { coal->rx_coalesce_usecs = 10; coal->tx_coalesce_usecs = 10; coal->rx_max_coalesced_frames = 16; coal->tx_max_coalesced_frames = 16; dev->ethtool_ops->set_coalesce(dev, coal); kfree(coal); } #endif init_attr.send_cq = priv->send_cq; init_attr.recv_cq = priv->recv_cq; if (priv->hca_caps & IB_DEVICE_UD_TSO) init_attr.create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK) init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; init_attr.cap.max_send_sge = IPOIB_UD_TX_SG; priv->qp = ib_create_qp(priv->pd, &init_attr); if (IS_ERR(priv->qp)) { printk(KERN_WARNING "%s: failed to create QP\n", ca->name); goto out_free_send_cq; } IF_LLADDR(priv->dev)[1] = (priv->qp->qp_num >> 16) & 0xff; IF_LLADDR(priv->dev)[2] = (priv->qp->qp_num >> 8) & 0xff; IF_LLADDR(priv->dev)[3] = (priv->qp->qp_num ) & 0xff; for (i = 0; i < IPOIB_MAX_TX_SG; ++i) priv->tx_sge[i].lkey = priv->mr->lkey; priv->tx_wr.opcode = IB_WR_SEND; priv->tx_wr.sg_list = priv->tx_sge; priv->tx_wr.send_flags = IB_SEND_SIGNALED; for (i = 0; i < IPOIB_UD_RX_SG; ++i) priv->rx_sge[i].lkey = priv->mr->lkey; priv->rx_wr.next = NULL; priv->rx_wr.sg_list = priv->rx_sge; return 0; out_free_send_cq: ib_destroy_cq(priv->send_cq); out_free_recv_cq: ib_destroy_cq(priv->recv_cq); out_free_mr: ib_dereg_mr(priv->mr); ipoib_cm_dev_cleanup(priv); out_free_pd: ib_dealloc_pd(priv->pd); return -ENODEV; } void ipoib_transport_dev_cleanup(struct ipoib_dev_priv *priv) { if (priv->qp) { if (ib_destroy_qp(priv->qp)) ipoib_warn(priv, "ib_qp_destroy failed\n"); priv->qp = NULL; clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); } if (ib_destroy_cq(priv->send_cq)) ipoib_warn(priv, "ib_cq_destroy (send) failed\n"); if (ib_destroy_cq(priv->recv_cq)) ipoib_warn(priv, "ib_cq_destroy (recv) failed\n"); ipoib_cm_dev_cleanup(priv); if (ib_dereg_mr(priv->mr)) ipoib_warn(priv, "ib_dereg_mr failed\n"); if (ib_dealloc_pd(priv->pd)) ipoib_warn(priv, "ib_dealloc_pd failed\n"); } void ipoib_event(struct ib_event_handler *handler, struct ib_event *record) { struct ipoib_dev_priv *priv = container_of(handler, struct ipoib_dev_priv, event_handler); if (record->element.port_num != priv->port) return; ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event, record->device->name, record->element.port_num); if (record->event == IB_EVENT_SM_CHANGE || record->event == IB_EVENT_CLIENT_REREGISTER) { queue_work(ipoib_workqueue, &priv->flush_light); } else if (record->event == IB_EVENT_PORT_ERR || record->event == IB_EVENT_PORT_ACTIVE || record->event == IB_EVENT_LID_CHANGE) { queue_work(ipoib_workqueue, &priv->flush_normal); } else if (record->event == IB_EVENT_PKEY_CHANGE) { queue_work(ipoib_workqueue, &priv->flush_heavy); } } Index: stable/10/sys/ofed/drivers/net/mlx4/en_params.c =================================================================== Property changes on: stable/10/sys/ofed/drivers/net/mlx4/en_params.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/drivers/net/mlx4/xrcd.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/xrcd.c (revision 273245) +++ stable/10/sys/ofed/drivers/net/mlx4/xrcd.c (nonexistent) @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include - -#include "mlx4.h" - -int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn) -{ - struct mlx4_priv *priv = mlx4_priv(dev); - - *xrcdn = mlx4_bitmap_alloc(&priv->xrcd_bitmap); - if (*xrcdn == -1) - return -ENOMEM; - - return 0; -} -EXPORT_SYMBOL_GPL(mlx4_xrcd_alloc); - -void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn) -{ - mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn); -} -EXPORT_SYMBOL_GPL(mlx4_xrcd_free); - -int __devinit mlx4_init_xrcd_table(struct mlx4_dev *dev) -{ - struct mlx4_priv *priv = mlx4_priv(dev); - - return mlx4_bitmap_init(&priv->xrcd_bitmap, (1 << 16), - (1 << 16) - 1, dev->caps.reserved_xrcds + 1, 0); -} - -void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev) -{ - mlx4_bitmap_cleanup(&mlx4_priv(dev)->xrcd_bitmap); -} - - Property changes on: stable/10/sys/ofed/drivers/net/mlx4/xrcd.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/drivers/net/mlx4/en_frag.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/en_frag.c (revision 273245) +++ stable/10/sys/ofed/drivers/net/mlx4/en_frag.c (nonexistent) @@ -1,192 +0,0 @@ -/* - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include "opt_inet.h" -#include "mlx4_en.h" - -#ifdef INET - -#include -#include -#include - -static struct mlx4_en_ipfrag *find_session(struct mlx4_en_rx_ring *ring, - struct ip *iph) -{ - struct mlx4_en_ipfrag *session; - int i; - - for (i = 0; i < MLX4_EN_NUM_IPFRAG_SESSIONS; i++) { - session = &ring->ipfrag[i]; - if (session->fragments == NULL) - continue; - if (session->daddr == iph->ip_dst.s_addr && - session->saddr == iph->ip_src.s_addr && - session->id == iph->ip_id && - session->protocol == iph->ip_p) { - return session; - } - } - return NULL; -} - -static struct mlx4_en_ipfrag *start_session(struct mlx4_en_rx_ring *ring, - struct ip *iph) -{ - struct mlx4_en_ipfrag *session; - int index = -1; - int i; - - for (i = 0; i < MLX4_EN_NUM_IPFRAG_SESSIONS; i++) { - if (ring->ipfrag[i].fragments == NULL) { - index = i; - break; - } - } - if (index < 0) - return NULL; - - session = &ring->ipfrag[index]; - - return session; -} - - -static void flush_session(struct mlx4_en_priv *priv, - struct mlx4_en_ipfrag *session, - u16 more) -{ - struct mbuf *mb = session->fragments; - struct ip *iph = mb->m_pkthdr.PH_loc.ptr; - struct net_device *dev = mb->m_pkthdr.rcvif; - - /* Update IP length and checksum */ - iph->ip_len = htons(session->total_len); - iph->ip_off = htons(more | (session->offset >> 3)); - iph->ip_sum = 0; - iph->ip_sum = in_cksum_skip(mb, iph->ip_hl * 4, - (char *)iph - mb->m_data); - - dev->if_input(dev, mb); - session->fragments = NULL; - session->last = NULL; -} - - -static inline void frag_append(struct mlx4_en_priv *priv, - struct mlx4_en_ipfrag *session, - struct mbuf *mb, - unsigned int data_len) -{ - struct mbuf *parent = session->fragments; - - /* Update mb bookkeeping */ - parent->m_pkthdr.len += data_len; - session->total_len += data_len; - - m_adj(mb, mb->m_pkthdr.len - data_len); - - session->last->m_next = mb; - for (; mb->m_next != NULL; mb = mb->m_next); - session->last = mb; -} - -int mlx4_en_rx_frags(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, - struct mbuf *mb, struct mlx4_cqe *cqe) -{ - struct mlx4_en_ipfrag *session; - struct ip *iph; - u16 ip_len; - u16 ip_hlen; - int data_len; - u16 offset; - - iph = (struct ip *)(mtod(mb, char *) + ETHER_HDR_LEN); - mb->m_pkthdr.PH_loc.ptr = iph; - ip_len = ntohs(iph->ip_len); - ip_hlen = iph->ip_hl * 4; - data_len = ip_len - ip_hlen; - offset = ntohs(iph->ip_off); - offset &= IP_OFFMASK; - offset <<= 3; - - session = find_session(ring, iph); - if (unlikely(in_cksum_skip(mb, ip_hlen, (char *)iph - mb->m_data))) { - if (session) - flush_session(priv, session, IP_MF); - return -EINVAL; - } - if (session) { - if (unlikely(session->offset + session->total_len != - offset + ip_hlen || - session->total_len + mb->m_pkthdr.len > 65536)) { - flush_session(priv, session, IP_MF); - goto new_session; - } - frag_append(priv, session, mb, data_len); - } else { -new_session: - session = start_session(ring, iph); - if (unlikely(!session)) - return -ENOSPC; - - session->fragments = mb; - session->daddr = iph->ip_dst.s_addr; - session->saddr = iph->ip_src.s_addr; - session->id = iph->ip_id; - session->protocol = iph->ip_p; - session->total_len = ip_len; - session->offset = offset; - for (; mb->m_next != NULL; mb = mb->m_next); - session->last = mb; - } - if (!(ntohs(iph->ip_off) & IP_MF)) - flush_session(priv, session, 0); - - return 0; -} - - -void mlx4_en_flush_frags(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring) -{ - struct mlx4_en_ipfrag *session; - int i; - - for (i = 0; i < MLX4_EN_NUM_IPFRAG_SESSIONS; i++) { - session = &ring->ipfrag[i]; - if (session->fragments) - flush_session(priv, session, IP_MF); - } -} -#endif Property changes on: stable/10/sys/ofed/drivers/net/mlx4/en_frag.c ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/drivers/net/mlx4/Makefile =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/Makefile (revision 273245) +++ stable/10/sys/ofed/drivers/net/mlx4/Makefile (revision 273246) @@ -1,34 +1,33 @@ # $FreeBSD$ #.PATH: ${.CURDIR}/../../ofed/drivers/net/mlx4:${.CURDIR}/../../ofed/include/linux .PATH: ${.CURDIR}/../../../../../include/linux .include KMOD = mlx4 SRCS = device_if.h bus_if.h pci_if.h vnode_if.h SRCS+= alloc.c catas.c cmd.c cq.c eq.c fw.c icm.c intf.c main.c mcg.c mr.c linux_compat.c linux_radix.c SRCS+= pd.c port.c profile.c qp.c reset.c sense.c srq.c resource_tracker.c sys_tune.c SRCS+= opt_inet.h opt_inet6.h #CFLAGS+= -I${.CURDIR}/../../ofed/drivers/net/mlx4 #CFLAGS+= -I${.CURDIR}/../../ofed/include/ CFLAGS+= -I${.CURDIR}/../../../../../include .if !defined(KERNBUILDDIR) .if ${MK_INET_SUPPORT} != "no" opt_inet.h: @echo "#define INET 1" > ${.TARGET} .endif .if ${MK_INET6_SUPPORT} != "no" opt_inet6.h: @echo "#define INET6 1" > ${.TARGET} .endif .endif .include CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS} - Index: stable/10/sys/ofed/drivers/net/mlx4/en_ethtool.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/en_ethtool.c (revision 273245) +++ stable/10/sys/ofed/drivers/net/mlx4/en_ethtool.c (revision 273246) @@ -1,1617 +1,1616 @@ /* * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include -#include #include #include #include #include #include #include "mlx4_en.h" #include "en_port.h" #define EN_ETHTOOL_QP_ATTACH (1ull << 63) union mlx4_ethtool_flow_union { struct ethtool_tcpip4_spec tcp_ip4_spec; struct ethtool_tcpip4_spec udp_ip4_spec; struct ethtool_tcpip4_spec sctp_ip4_spec; struct ethtool_ah_espip4_spec ah_ip4_spec; struct ethtool_ah_espip4_spec esp_ip4_spec; struct ethtool_usrip4_spec usr_ip4_spec; struct ethhdr ether_spec; __u8 hdata[52]; }; struct mlx4_ethtool_flow_ext { __u8 padding[2]; unsigned char h_dest[ETH_ALEN]; __be16 vlan_etype; __be16 vlan_tci; __be32 data[2]; }; struct mlx4_ethtool_rx_flow_spec { __u32 flow_type; union mlx4_ethtool_flow_union h_u; struct mlx4_ethtool_flow_ext h_ext; union mlx4_ethtool_flow_union m_u; struct mlx4_ethtool_flow_ext m_ext; __u64 ring_cookie; __u32 location; }; struct mlx4_ethtool_rxnfc { __u32 cmd; __u32 flow_type; __u64 data; struct mlx4_ethtool_rx_flow_spec fs; __u32 rule_cnt; __u32 rule_locs[0]; }; #ifndef FLOW_MAC_EXT #define FLOW_MAC_EXT 0x40000000 #endif static void mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); strlcpy(drvinfo->version, DRV_VERSION " (" DRV_RELDATE ")", sizeof(drvinfo->version)); snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%d", (u16) (mdev->dev->caps.fw_ver >> 32), (u16) ((mdev->dev->caps.fw_ver >> 16) & 0xffff), (u16) (mdev->dev->caps.fw_ver & 0xffff)); strlcpy(drvinfo->bus_info, pci_name(mdev->dev->pdev), sizeof(drvinfo->bus_info)); drvinfo->n_stats = 0; drvinfo->regdump_len = 0; drvinfo->eedump_len = 0; } static const char main_strings[][ETH_GSTRING_LEN] = { /* packet statistics */ "rx_packets", "rx_bytes", "rx_multicast_packets", "rx_broadcast_packets", "rx_errors", "rx_dropped", "rx_length_errors", "rx_over_errors", "rx_crc_errors", "rx_jabbers", "rx_in_range_length_error", "rx_out_range_length_error", "rx_lt_64_bytes_packets", "rx_127_bytes_packets", "rx_255_bytes_packets", "rx_511_bytes_packets", "rx_1023_bytes_packets", "rx_1518_bytes_packets", "rx_1522_bytes_packets", "rx_1548_bytes_packets", "rx_gt_1548_bytes_packets", "tx_packets", "tx_bytes", "tx_multicast_packets", "tx_broadcast_packets", "tx_errors", "tx_dropped", "tx_lt_64_bytes_packets", "tx_127_bytes_packets", "tx_255_bytes_packets", "tx_511_bytes_packets", "tx_1023_bytes_packets", "tx_1518_bytes_packets", "tx_1522_bytes_packets", "tx_1548_bytes_packets", "tx_gt_1548_bytes_packets", "rx_prio_0_packets", "rx_prio_0_bytes", "rx_prio_1_packets", "rx_prio_1_bytes", "rx_prio_2_packets", "rx_prio_2_bytes", "rx_prio_3_packets", "rx_prio_3_bytes", "rx_prio_4_packets", "rx_prio_4_bytes", "rx_prio_5_packets", "rx_prio_5_bytes", "rx_prio_6_packets", "rx_prio_6_bytes", "rx_prio_7_packets", "rx_prio_7_bytes", "rx_novlan_packets", "rx_novlan_bytes", "tx_prio_0_packets", "tx_prio_0_bytes", "tx_prio_1_packets", "tx_prio_1_bytes", "tx_prio_2_packets", "tx_prio_2_bytes", "tx_prio_3_packets", "tx_prio_3_bytes", "tx_prio_4_packets", "tx_prio_4_bytes", "tx_prio_5_packets", "tx_prio_5_bytes", "tx_prio_6_packets", "tx_prio_6_bytes", "tx_prio_7_packets", "tx_prio_7_bytes", "tx_novlan_packets", "tx_novlan_bytes", /* flow control statistics */ "rx_pause_prio_0", "rx_pause_duration_prio_0", "rx_pause_transition_prio_0", "tx_pause_prio_0", "tx_pause_duration_prio_0", "tx_pause_transition_prio_0", "rx_pause_prio_1", "rx_pause_duration_prio_1", "rx_pause_transition_prio_1", "tx_pause_prio_1", "tx_pause_duration_prio_1", "tx_pause_transition_prio_1", "rx_pause_prio_2", "rx_pause_duration_prio_2", "rx_pause_transition_prio_2", "tx_pause_prio_2", "tx_pause_duration_prio_2", "tx_pause_transition_prio_2", "rx_pause_prio_3", "rx_pause_duration_prio_3", "rx_pause_transition_prio_3", "tx_pause_prio_3", "tx_pause_duration_prio_3", "tx_pause_transition_prio_3", "rx_pause_prio_4", "rx_pause_duration_prio_4", "rx_pause_transition_prio_4", "tx_pause_prio_4", "tx_pause_duration_prio_4", "tx_pause_transition_prio_4", "rx_pause_prio_5", "rx_pause_duration_prio_5", "rx_pause_transition_prio_5", "tx_pause_prio_5", "tx_pause_duration_prio_5", "tx_pause_transition_prio_5", "rx_pause_prio_6", "rx_pause_duration_prio_6", "rx_pause_transition_prio_6", "tx_pause_prio_6", "tx_pause_duration_prio_6", "tx_pause_transition_prio_6", "rx_pause_prio_7", "rx_pause_duration_prio_7", "rx_pause_transition_prio_7", "tx_pause_prio_7", "tx_pause_duration_prio_7", "tx_pause_transition_prio_7", /* VF statistics */ "rx_packets", "rx_bytes", "rx_multicast_packets", "rx_broadcast_packets", "rx_errors", "rx_dropped", "tx_packets", "tx_bytes", "tx_multicast_packets", "tx_broadcast_packets", "tx_errors", /* VPort statistics */ "vport_rx_unicast_packets", "vport_rx_unicast_bytes", "vport_rx_multicast_packets", "vport_rx_multicast_bytes", "vport_rx_broadcast_packets", "vport_rx_broadcast_bytes", "vport_rx_dropped", "vport_rx_errors", "vport_tx_unicast_packets", "vport_tx_unicast_bytes", "vport_tx_multicast_packets", "vport_tx_multicast_bytes", "vport_tx_broadcast_packets", "vport_tx_broadcast_bytes", "vport_tx_errors", /* port statistics */ "tx_tso_packets", "tx_queue_stopped", "tx_wake_queue", "tx_timeout", "rx_alloc_failed", "rx_csum_good", "rx_csum_none", "tx_chksum_offload", }; static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= { "Interrupt Test", "Link Test", "Speed Test", "Register Test", "Loopback Test", }; static u32 mlx4_en_get_msglevel(struct net_device *dev) { return ((struct mlx4_en_priv *) netdev_priv(dev))->msg_enable; } static void mlx4_en_set_msglevel(struct net_device *dev, u32 val) { ((struct mlx4_en_priv *) netdev_priv(dev))->msg_enable = val; } static void mlx4_en_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) { struct mlx4_en_priv *priv = netdev_priv(netdev); int err = 0; u64 config = 0; u64 mask; if ((priv->port < 1) || (priv->port > 2)) { en_err(priv, "Failed to get WoL information\n"); return; } mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 : MLX4_DEV_CAP_FLAG_WOL_PORT2; if (!(priv->mdev->dev->caps.flags & mask)) { wol->supported = 0; wol->wolopts = 0; return; } err = mlx4_wol_read(priv->mdev->dev, &config, priv->port); if (err) { en_err(priv, "Failed to get WoL information\n"); return; } if (config & MLX4_EN_WOL_MAGIC) wol->supported = WAKE_MAGIC; else wol->supported = 0; if (config & MLX4_EN_WOL_ENABLED) wol->wolopts = WAKE_MAGIC; else wol->wolopts = 0; } static int mlx4_en_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) { struct mlx4_en_priv *priv = netdev_priv(netdev); u64 config = 0; int err = 0; u64 mask; if ((priv->port < 1) || (priv->port > 2)) return -EOPNOTSUPP; mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 : MLX4_DEV_CAP_FLAG_WOL_PORT2; if (!(priv->mdev->dev->caps.flags & mask)) return -EOPNOTSUPP; if (wol->supported & ~WAKE_MAGIC) return -EINVAL; err = mlx4_wol_read(priv->mdev->dev, &config, priv->port); if (err) { en_err(priv, "Failed to get WoL info, unable to modify\n"); return err; } if (wol->wolopts & WAKE_MAGIC) { config |= MLX4_EN_WOL_DO_MODIFY | MLX4_EN_WOL_ENABLED | MLX4_EN_WOL_MAGIC; } else { config &= ~(MLX4_EN_WOL_ENABLED | MLX4_EN_WOL_MAGIC); config |= MLX4_EN_WOL_DO_MODIFY; } err = mlx4_wol_write(priv->mdev->dev, config, priv->port); if (err) en_err(priv, "Failed to set WoL information\n"); return err; } struct bitmap_sim_iterator { bool advance_array; unsigned long *stats_bitmap; unsigned int count; unsigned int j; }; static inline void bitmap_sim_iterator_init(struct bitmap_sim_iterator *h, unsigned long *stats_bitmap, int count) { h->j = 0; h->advance_array = !bitmap_empty(stats_bitmap, count); h->count = h->advance_array ? bitmap_weight(stats_bitmap, count) : count; h->stats_bitmap = stats_bitmap; } static inline int bitmap_sim_iterator_test(struct bitmap_sim_iterator *h) { return !h->advance_array ? 1 : test_bit(h->j, h->stats_bitmap); } static inline int bitmap_sim_iterator_inc(struct bitmap_sim_iterator *h) { return h->j++; } static inline unsigned int bitmap_sim_iterator_count( struct bitmap_sim_iterator *h) { return h->count; } int mlx4_en_get_sset_count(struct net_device *dev, int sset) { struct mlx4_en_priv *priv = netdev_priv(dev); struct bitmap_sim_iterator it; int num_of_stats = NUM_ALL_STATS - ((priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) ? 0 : NUM_FLOW_STATS); bitmap_sim_iterator_init(&it, priv->stats_bitmap, num_of_stats); switch (sset) { case ETH_SS_STATS: return bitmap_sim_iterator_count(&it) + (priv->tx_ring_num * 2) + #ifdef LL_EXTENDED_STATS (priv->rx_ring_num * 5); #else (priv->rx_ring_num * 2); #endif case ETH_SS_TEST: return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) * 2; default: return -EOPNOTSUPP; } } void mlx4_en_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct mlx4_en_priv *priv = netdev_priv(dev); int index = 0; int i; struct bitmap_sim_iterator it; int num_of_stats = NUM_ALL_STATS - ((priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) ? 0 : NUM_FLOW_STATS); bitmap_sim_iterator_init(&it, priv->stats_bitmap, num_of_stats); if (!data || !priv->port_up) return; spin_lock_bh(&priv->stats_lock); for (i = 0; i < NUM_PKT_STATS; i++, bitmap_sim_iterator_inc(&it)) if (bitmap_sim_iterator_test(&it)) data[index++] = ((unsigned long *)&priv->pkstats)[i]; for (i = 0; i < NUM_FLOW_STATS; i++, bitmap_sim_iterator_inc(&it)) if (priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) if (bitmap_sim_iterator_test(&it)) data[index++] = ((u64 *)&priv->flowstats)[i]; for (i = 0; i < NUM_VF_STATS; i++, bitmap_sim_iterator_inc(&it)) if (bitmap_sim_iterator_test(&it)) data[index++] = ((unsigned long *)&priv->vf_stats)[i]; for (i = 0; i < NUM_VPORT_STATS; i++, bitmap_sim_iterator_inc(&it)) if (bitmap_sim_iterator_test(&it)) data[index++] = ((unsigned long *)&priv->vport_stats)[i]; for (i = 0; i < NUM_PORT_STATS; i++, bitmap_sim_iterator_inc(&it)) if (bitmap_sim_iterator_test(&it)) data[index++] = ((unsigned long *)&priv->port_stats)[i]; for (i = 0; i < priv->tx_ring_num; i++) { data[index++] = priv->tx_ring[i]->packets; data[index++] = priv->tx_ring[i]->bytes; } for (i = 0; i < priv->rx_ring_num; i++) { data[index++] = priv->rx_ring[i]->packets; data[index++] = priv->rx_ring[i]->bytes; #ifdef LL_EXTENDED_STATS data[index++] = priv->rx_ring[i]->yields; data[index++] = priv->rx_ring[i]->misses; data[index++] = priv->rx_ring[i]->cleaned; #endif } spin_unlock_bh(&priv->stats_lock); } void mlx4_en_restore_ethtool_stats(struct mlx4_en_priv *priv, u64 *data) { int index = 0; int i; struct bitmap_sim_iterator it; int num_of_stats = NUM_ALL_STATS - ((priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) ? 0 : NUM_FLOW_STATS); bitmap_sim_iterator_init(&it, priv->stats_bitmap, num_of_stats); if (!data || !priv->port_up) return; spin_lock_bh(&priv->stats_lock); for (i = 0; i < NUM_PKT_STATS; i++, bitmap_sim_iterator_inc(&it)) if (bitmap_sim_iterator_test(&it)) ((unsigned long *)&priv->pkstats)[i] = data[index++]; for (i = 0; i < NUM_FLOW_STATS; i++, bitmap_sim_iterator_inc(&it)) if (priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) if (bitmap_sim_iterator_test(&it)) ((u64 *)&priv->flowstats)[i] = data[index++]; for (i = 0; i < NUM_VF_STATS; i++, bitmap_sim_iterator_inc(&it)) if (bitmap_sim_iterator_test(&it)) ((unsigned long *)&priv->vf_stats)[i] = data[index++]; for (i = 0; i < NUM_VPORT_STATS; i++, bitmap_sim_iterator_inc(&it)) if (bitmap_sim_iterator_test(&it)) ((unsigned long *)&priv->vport_stats)[i] = data[index++]; for (i = 0; i < NUM_PORT_STATS; i++, bitmap_sim_iterator_inc(&it)) if (bitmap_sim_iterator_test(&it)) ((unsigned long *)&priv->port_stats)[i] = data[index++]; for (i = 0; i < priv->tx_ring_num; i++) { priv->tx_ring[i]->packets = data[index++]; priv->tx_ring[i]->bytes = data[index++]; } for (i = 0; i < priv->rx_ring_num; i++) { priv->rx_ring[i]->packets = data[index++]; priv->rx_ring[i]->bytes = data[index++]; } spin_unlock_bh(&priv->stats_lock); } static void mlx4_en_self_test(struct net_device *dev, struct ethtool_test *etest, u64 *buf) { mlx4_en_ex_selftest(dev, &etest->flags, buf); } static void mlx4_en_get_strings(struct net_device *dev, uint32_t stringset, uint8_t *data) { struct mlx4_en_priv *priv = netdev_priv(dev); int index = 0; int i, k; struct bitmap_sim_iterator it; int num_of_stats = NUM_ALL_STATS - ((priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) ? 0 : NUM_FLOW_STATS); bitmap_sim_iterator_init(&it, priv->stats_bitmap, num_of_stats); switch (stringset) { case ETH_SS_TEST: for (i = 0; i < MLX4_EN_NUM_SELF_TEST - 2; i++) strcpy(data + i * ETH_GSTRING_LEN, mlx4_en_test_names[i]); if (priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) for (; i < MLX4_EN_NUM_SELF_TEST; i++) strcpy(data + i * ETH_GSTRING_LEN, mlx4_en_test_names[i]); break; case ETH_SS_STATS: /* Add main counters */ for (i = 0; i < NUM_PKT_STATS; i++, bitmap_sim_iterator_inc(&it)) if (bitmap_sim_iterator_test(&it)) strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i]); for (k = 0; k < NUM_FLOW_STATS; k++, bitmap_sim_iterator_inc(&it)) if (priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) if (bitmap_sim_iterator_test(&it)) strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i + k]); for (; (i + k) < num_of_stats; i++, bitmap_sim_iterator_inc(&it)) if (bitmap_sim_iterator_test(&it)) strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i + k]); for (i = 0; i < priv->tx_ring_num; i++) { sprintf(data + (index++) * ETH_GSTRING_LEN, "tx%d_packets", i); sprintf(data + (index++) * ETH_GSTRING_LEN, "tx%d_bytes", i); } for (i = 0; i < priv->rx_ring_num; i++) { sprintf(data + (index++) * ETH_GSTRING_LEN, "rx%d_packets", i); sprintf(data + (index++) * ETH_GSTRING_LEN, "rx%d_bytes", i); #ifdef LL_EXTENDED_STATS sprintf(data + (index++) * ETH_GSTRING_LEN, "rx%d_napi_yield", i); sprintf(data + (index++) * ETH_GSTRING_LEN, "rx%d_misses", i); sprintf(data + (index++) * ETH_GSTRING_LEN, "rx%d_cleaned", i); #endif } break; } } static u32 mlx4_en_autoneg_get(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; u32 autoneg = AUTONEG_DISABLE; if ((mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP) && priv->port_state.autoneg) { autoneg = AUTONEG_ENABLE; } return autoneg; } static int mlx4_en_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct mlx4_en_priv *priv = netdev_priv(dev); int trans_type; /* SUPPORTED_1000baseT_Half isn't supported */ cmd->supported = SUPPORTED_1000baseT_Full |SUPPORTED_10000baseT_Full; cmd->advertising = ADVERTISED_1000baseT_Full |ADVERTISED_10000baseT_Full; cmd->supported |= SUPPORTED_1000baseKX_Full |SUPPORTED_10000baseKX4_Full |SUPPORTED_10000baseKR_Full |SUPPORTED_10000baseR_FEC |SUPPORTED_40000baseKR4_Full |SUPPORTED_40000baseCR4_Full |SUPPORTED_40000baseSR4_Full |SUPPORTED_40000baseLR4_Full; /* ADVERTISED_1000baseT_Half isn't advertised */ cmd->advertising |= ADVERTISED_1000baseKX_Full |ADVERTISED_10000baseKX4_Full |ADVERTISED_10000baseKR_Full |ADVERTISED_10000baseR_FEC |ADVERTISED_40000baseKR4_Full |ADVERTISED_40000baseCR4_Full |ADVERTISED_40000baseSR4_Full |ADVERTISED_40000baseLR4_Full; if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) return -ENOMEM; cmd->autoneg = mlx4_en_autoneg_get(dev); if (cmd->autoneg == AUTONEG_ENABLE) { cmd->supported |= SUPPORTED_Autoneg; cmd->advertising |= ADVERTISED_Autoneg; } trans_type = priv->port_state.transciver; if (netif_carrier_ok(dev)) { ethtool_cmd_speed_set(cmd, priv->port_state.link_speed); cmd->duplex = DUPLEX_FULL; } else { ethtool_cmd_speed_set(cmd, -1); cmd->duplex = -1; } if (trans_type > 0 && trans_type <= 0xC) { cmd->port = PORT_FIBRE; cmd->transceiver = XCVR_EXTERNAL; cmd->supported |= SUPPORTED_FIBRE; cmd->advertising |= ADVERTISED_FIBRE; } else if (trans_type == 0x80 || trans_type == 0) { cmd->port = PORT_TP; cmd->transceiver = XCVR_INTERNAL; cmd->supported |= SUPPORTED_TP; cmd->advertising |= ADVERTISED_TP; } else { cmd->port = -1; cmd->transceiver = -1; } return 0; } static const char *mlx4_en_duplex_to_string(int duplex) { switch (duplex) { case DUPLEX_FULL: return "FULL"; case DUPLEX_HALF: return "HALF"; default: break; } return "UNKNOWN"; } static int mlx4_en_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_port_state *port_state = &priv->port_state; if ((cmd->autoneg != port_state->autoneg) || (ethtool_cmd_speed(cmd) != port_state->link_speed) || (cmd->duplex != DUPLEX_FULL)) { en_info(priv, "Changing port state properties (auto-negotiation" " , speed/duplex) is not supported. Current:" " auto-negotiation=%d speed/duplex=%d/%s\n", port_state->autoneg, port_state->link_speed, mlx4_en_duplex_to_string(DUPLEX_FULL)); return -EOPNOTSUPP; } /* User provided same port state properties that are currently set. * Nothing to change */ return 0; } static int mlx4_en_get_coalesce(struct net_device *dev, struct ethtool_coalesce *coal) { struct mlx4_en_priv *priv = netdev_priv(dev); coal->tx_coalesce_usecs = priv->tx_usecs; coal->tx_max_coalesced_frames = priv->tx_frames; coal->rx_coalesce_usecs = priv->rx_usecs; coal->rx_max_coalesced_frames = priv->rx_frames; coal->pkt_rate_low = priv->pkt_rate_low; coal->rx_coalesce_usecs_low = priv->rx_usecs_low; coal->pkt_rate_high = priv->pkt_rate_high; coal->rx_coalesce_usecs_high = priv->rx_usecs_high; coal->rate_sample_interval = priv->sample_interval; coal->use_adaptive_rx_coalesce = priv->adaptive_rx_coal; return 0; } static int mlx4_en_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal) { struct mlx4_en_priv *priv = netdev_priv(dev); int err, i; priv->rx_frames = (coal->rx_max_coalesced_frames == MLX4_EN_AUTO_CONF) ? MLX4_EN_RX_COAL_TARGET / priv->dev->mtu + 1 : coal->rx_max_coalesced_frames; priv->rx_usecs = (coal->rx_coalesce_usecs == MLX4_EN_AUTO_CONF) ? MLX4_EN_RX_COAL_TIME : coal->rx_coalesce_usecs; /* Setting TX coalescing parameters */ if (coal->tx_coalesce_usecs != priv->tx_usecs || coal->tx_max_coalesced_frames != priv->tx_frames) { priv->tx_usecs = coal->tx_coalesce_usecs; priv->tx_frames = coal->tx_max_coalesced_frames; if (priv->port_up) { for (i = 0; i < priv->tx_ring_num; i++) { priv->tx_cq[i]->moder_cnt = priv->tx_frames; priv->tx_cq[i]->moder_time = priv->tx_usecs; if (mlx4_en_set_cq_moder(priv, priv->tx_cq[i])) en_warn(priv, "Failed changing moderation for TX cq %d\n", i); } } } /* Set adaptive coalescing params */ priv->pkt_rate_low = coal->pkt_rate_low; priv->rx_usecs_low = coal->rx_coalesce_usecs_low; priv->pkt_rate_high = coal->pkt_rate_high; priv->rx_usecs_high = coal->rx_coalesce_usecs_high; priv->sample_interval = coal->rate_sample_interval; priv->adaptive_rx_coal = coal->use_adaptive_rx_coalesce; if (priv->adaptive_rx_coal) return 0; if (priv->port_up) { for (i = 0; i < priv->rx_ring_num; i++) { priv->rx_cq[i]->moder_cnt = priv->rx_frames; priv->rx_cq[i]->moder_time = priv->rx_usecs; priv->last_moder_time[i] = MLX4_EN_AUTO_CONF; err = mlx4_en_set_cq_moder(priv, priv->rx_cq[i]); if (err) return err; } } return 0; } static int mlx4_en_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err; if (pause->autoneg) return -EOPNOTSUPP; priv->prof->tx_pause = pause->tx_pause != 0; priv->prof->rx_pause = pause->rx_pause != 0; err = mlx4_SET_PORT_general(mdev->dev, priv->port, priv->rx_skb_size + ETH_FCS_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); if (err) en_err(priv, "Failed setting pause params\n"); return err; } static void mlx4_en_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) { struct mlx4_en_priv *priv = netdev_priv(dev); pause->tx_pause = priv->prof->tx_pause; pause->rx_pause = priv->prof->rx_pause; pause->autoneg = mlx4_en_autoneg_get(dev); } /* rtnl lock must be taken before calling */ int mlx4_en_pre_config(struct mlx4_en_priv *priv) { #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rmap; if (!priv->dev->rx_cpu_rmap) return 0; /* Disable RFS events * Must have all RFS jobs flushed before freeing resources */ rmap = priv->dev->rx_cpu_rmap; priv->dev->rx_cpu_rmap = NULL; rtnl_unlock(); free_irq_cpu_rmap(rmap); rtnl_lock(); if (priv->dev->rx_cpu_rmap) return -EBUSY; /* another configuration completed while lock * was free */ /* Make sure all currently running filter_work are being processed * Other work will return immediatly because of disable_rfs */ flush_workqueue(priv->mdev->workqueue); #endif return 0; } static int mlx4_en_set_ringparam(struct net_device *dev, struct ethtool_ringparam *param) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; u32 rx_size, tx_size; int port_up = 0; int err = 0; int i, n_stats; u64 *data = NULL; if (!priv->port_up) return -ENOMEM; if (param->rx_jumbo_pending || param->rx_mini_pending) return -EINVAL; rx_size = roundup_pow_of_two(param->rx_pending); rx_size = max_t(u32, rx_size, MLX4_EN_MIN_RX_SIZE); rx_size = min_t(u32, rx_size, MLX4_EN_MAX_RX_SIZE); tx_size = roundup_pow_of_two(param->tx_pending); tx_size = max_t(u32, tx_size, MLX4_EN_MIN_TX_SIZE); tx_size = min_t(u32, tx_size, MLX4_EN_MAX_TX_SIZE); if (rx_size == (priv->port_up ? priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size) && tx_size == priv->tx_ring[0]->size) return 0; err = mlx4_en_pre_config(priv); if (err) return err; mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; mlx4_en_stop_port(dev); } /* Cache port statistics */ n_stats = mlx4_en_get_sset_count(dev, ETH_SS_STATS); if (n_stats > 0) { data = kmalloc(n_stats * sizeof(u64), GFP_KERNEL); if (data) mlx4_en_get_ethtool_stats(dev, NULL, data); } mlx4_en_free_resources(priv); priv->prof->tx_ring_size = tx_size; priv->prof->rx_ring_size = rx_size; err = mlx4_en_alloc_resources(priv); if (err) { en_err(priv, "Failed reallocating port resources\n"); goto out; } /* Restore port statistics */ if (n_stats > 0 && data) mlx4_en_restore_ethtool_stats(priv, data); if (port_up) { err = mlx4_en_start_port(dev); if (err) { en_err(priv, "Failed starting port\n"); goto out; } for (i = 0; i < priv->rx_ring_num; i++) { priv->rx_cq[i]->moder_cnt = priv->rx_frames; priv->rx_cq[i]->moder_time = priv->rx_usecs; priv->last_moder_time[i] = MLX4_EN_AUTO_CONF; err = mlx4_en_set_cq_moder(priv, priv->rx_cq[i]); if (err) goto out; } } out: kfree(data); mutex_unlock(&mdev->state_lock); return err; } static void mlx4_en_get_ringparam(struct net_device *dev, struct ethtool_ringparam *param) { struct mlx4_en_priv *priv = netdev_priv(dev); if (!priv->port_up) return; memset(param, 0, sizeof(*param)); param->rx_max_pending = MLX4_EN_MAX_RX_SIZE; param->tx_max_pending = MLX4_EN_MAX_TX_SIZE; param->rx_pending = priv->port_up ? priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size; param->tx_pending = priv->tx_ring[0]->size; } static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); return priv->rx_ring_num; } static int mlx4_en_get_rxfh_indir(struct net_device *dev, u32 *ring_index) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_rss_map *rss_map = &priv->rss_map; int rss_rings; size_t n = priv->rx_ring_num; int err = 0; rss_rings = priv->prof->rss_rings ?: priv->rx_ring_num; rss_rings = 1 << ilog2(rss_rings); while (n--) { ring_index[n] = rss_map->qps[n % rss_rings].qpn - rss_map->base_qpn; } return err; } static int mlx4_en_set_rxfh_indir(struct net_device *dev, const u32 *ring_index) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int port_up = 0; int err = 0; int i; int rss_rings = 0; /* Calculate RSS table size and make sure flows are spread evenly * between rings */ for (i = 0; i < priv->rx_ring_num; i++) { if (i > 0 && !ring_index[i] && !rss_rings) rss_rings = i; if (ring_index[i] != (i % (rss_rings ?: priv->rx_ring_num))) return -EINVAL; } if (!rss_rings) rss_rings = priv->rx_ring_num; /* RSS table size must be an order of 2 */ if (!is_power_of_2(rss_rings)) return -EINVAL; mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; mlx4_en_stop_port(dev); } priv->prof->rss_rings = rss_rings; if (port_up) { err = mlx4_en_start_port(dev); if (err) en_err(priv, "Failed starting port\n"); } mutex_unlock(&mdev->state_lock); return err; } #define all_zeros_or_all_ones(field) \ ((field) == 0 || (field) == (__force typeof(field))-1) static int mlx4_en_validate_flow(struct net_device *dev, struct mlx4_ethtool_rxnfc *cmd) { struct ethtool_usrip4_spec *l3_mask; struct ethtool_tcpip4_spec *l4_mask; struct ethhdr *eth_mask; if (cmd->fs.location >= MAX_NUM_OF_FS_RULES) return -EINVAL; if (cmd->fs.flow_type & FLOW_MAC_EXT) { /* dest mac mask must be ff:ff:ff:ff:ff:ff */ if (!is_broadcast_ether_addr(cmd->fs.m_ext.h_dest)) return -EINVAL; } switch (cmd->fs.flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { case TCP_V4_FLOW: case UDP_V4_FLOW: if (cmd->fs.m_u.tcp_ip4_spec.tos) return -EINVAL; l4_mask = &cmd->fs.m_u.tcp_ip4_spec; /* don't allow mask which isn't all 0 or 1 */ if (!all_zeros_or_all_ones(l4_mask->ip4src) || !all_zeros_or_all_ones(l4_mask->ip4dst) || !all_zeros_or_all_ones(l4_mask->psrc) || !all_zeros_or_all_ones(l4_mask->pdst)) return -EINVAL; break; case IP_USER_FLOW: l3_mask = &cmd->fs.m_u.usr_ip4_spec; if (l3_mask->l4_4_bytes || l3_mask->tos || l3_mask->proto || cmd->fs.h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4 || (!l3_mask->ip4src && !l3_mask->ip4dst) || !all_zeros_or_all_ones(l3_mask->ip4src) || !all_zeros_or_all_ones(l3_mask->ip4dst)) return -EINVAL; break; case ETHER_FLOW: eth_mask = &cmd->fs.m_u.ether_spec; /* source mac mask must not be set */ if (!is_zero_ether_addr(eth_mask->h_source)) return -EINVAL; /* dest mac mask must be ff:ff:ff:ff:ff:ff */ if (!is_broadcast_ether_addr(eth_mask->h_dest)) return -EINVAL; if (!all_zeros_or_all_ones(eth_mask->h_proto)) return -EINVAL; break; default: return -EINVAL; } if ((cmd->fs.flow_type & FLOW_EXT)) { if (cmd->fs.m_ext.vlan_etype || !(cmd->fs.m_ext.vlan_tci == 0 || cmd->fs.m_ext.vlan_tci == cpu_to_be16(0xfff))) return -EINVAL; if (cmd->fs.m_ext.vlan_tci) { if (be16_to_cpu(cmd->fs.h_ext.vlan_tci) < VLAN_MIN_VALUE || be16_to_cpu(cmd->fs.h_ext.vlan_tci) > VLAN_MAX_VALUE) return -EINVAL; } } return 0; } static int mlx4_en_ethtool_add_mac_rule(struct mlx4_ethtool_rxnfc *cmd, struct list_head *rule_list_h, struct mlx4_spec_list *spec_l2, unsigned char *mac) { int err = 0; __be64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16); spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH; memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN); memcpy(spec_l2->eth.dst_mac, mac, ETH_ALEN); if ((cmd->fs.flow_type & FLOW_EXT) && cmd->fs.m_ext.vlan_tci) { spec_l2->eth.vlan_id = cmd->fs.h_ext.vlan_tci; spec_l2->eth.vlan_id_msk = cpu_to_be16(0xfff); } list_add_tail(&spec_l2->list, rule_list_h); return err; } static int mlx4_en_ethtool_add_mac_rule_by_ipv4(struct mlx4_en_priv *priv, struct mlx4_ethtool_rxnfc *cmd, struct list_head *rule_list_h, struct mlx4_spec_list *spec_l2, __be32 ipv4_dst) { unsigned char mac[ETH_ALEN]; if (!ipv4_is_multicast(ipv4_dst)) { if (cmd->fs.flow_type & FLOW_MAC_EXT) memcpy(&mac, cmd->fs.h_ext.h_dest, ETH_ALEN); else memcpy(&mac, priv->dev->dev_addr, ETH_ALEN); } else { ip_eth_mc_map(ipv4_dst, mac); } return mlx4_en_ethtool_add_mac_rule(cmd, rule_list_h, spec_l2, &mac[0]); } static int add_ip_rule(struct mlx4_en_priv *priv, struct mlx4_ethtool_rxnfc *cmd, struct list_head *list_h) { struct mlx4_spec_list *spec_l2 = NULL; struct mlx4_spec_list *spec_l3 = NULL; struct ethtool_usrip4_spec *l3_mask = &cmd->fs.m_u.usr_ip4_spec; spec_l3 = kzalloc(sizeof(*spec_l3), GFP_KERNEL); spec_l2 = kzalloc(sizeof(*spec_l2), GFP_KERNEL); if (!spec_l2 || !spec_l3) { en_err(priv, "Fail to alloc ethtool rule.\n"); kfree(spec_l2); kfree(spec_l3); return -ENOMEM; } mlx4_en_ethtool_add_mac_rule_by_ipv4(priv, cmd, list_h, spec_l2, cmd->fs.h_u. usr_ip4_spec.ip4dst); spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4; spec_l3->ipv4.src_ip = cmd->fs.h_u.usr_ip4_spec.ip4src; if (l3_mask->ip4src) spec_l3->ipv4.src_ip_msk = MLX4_BE_WORD_MASK; spec_l3->ipv4.dst_ip = cmd->fs.h_u.usr_ip4_spec.ip4dst; if (l3_mask->ip4dst) spec_l3->ipv4.dst_ip_msk = MLX4_BE_WORD_MASK; list_add_tail(&spec_l3->list, list_h); return 0; } static int add_tcp_udp_rule(struct mlx4_en_priv *priv, struct mlx4_ethtool_rxnfc *cmd, struct list_head *list_h, int proto) { struct mlx4_spec_list *spec_l2 = NULL; struct mlx4_spec_list *spec_l3 = NULL; struct mlx4_spec_list *spec_l4 = NULL; struct ethtool_tcpip4_spec *l4_mask = &cmd->fs.m_u.tcp_ip4_spec; spec_l2 = kzalloc(sizeof(*spec_l2), GFP_KERNEL); spec_l3 = kzalloc(sizeof(*spec_l3), GFP_KERNEL); spec_l4 = kzalloc(sizeof(*spec_l4), GFP_KERNEL); if (!spec_l2 || !spec_l3 || !spec_l4) { en_err(priv, "Fail to alloc ethtool rule.\n"); kfree(spec_l2); kfree(spec_l3); kfree(spec_l4); return -ENOMEM; } spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4; if (proto == TCP_V4_FLOW) { mlx4_en_ethtool_add_mac_rule_by_ipv4(priv, cmd, list_h, spec_l2, cmd->fs.h_u. tcp_ip4_spec.ip4dst); spec_l4->id = MLX4_NET_TRANS_RULE_ID_TCP; spec_l3->ipv4.src_ip = cmd->fs.h_u.tcp_ip4_spec.ip4src; spec_l3->ipv4.dst_ip = cmd->fs.h_u.tcp_ip4_spec.ip4dst; spec_l4->tcp_udp.src_port = cmd->fs.h_u.tcp_ip4_spec.psrc; spec_l4->tcp_udp.dst_port = cmd->fs.h_u.tcp_ip4_spec.pdst; } else { mlx4_en_ethtool_add_mac_rule_by_ipv4(priv, cmd, list_h, spec_l2, cmd->fs.h_u. udp_ip4_spec.ip4dst); spec_l4->id = MLX4_NET_TRANS_RULE_ID_UDP; spec_l3->ipv4.src_ip = cmd->fs.h_u.udp_ip4_spec.ip4src; spec_l3->ipv4.dst_ip = cmd->fs.h_u.udp_ip4_spec.ip4dst; spec_l4->tcp_udp.src_port = cmd->fs.h_u.udp_ip4_spec.psrc; spec_l4->tcp_udp.dst_port = cmd->fs.h_u.udp_ip4_spec.pdst; } if (l4_mask->ip4src) spec_l3->ipv4.src_ip_msk = MLX4_BE_WORD_MASK; if (l4_mask->ip4dst) spec_l3->ipv4.dst_ip_msk = MLX4_BE_WORD_MASK; if (l4_mask->psrc) spec_l4->tcp_udp.src_port_msk = MLX4_BE_SHORT_MASK; if (l4_mask->pdst) spec_l4->tcp_udp.dst_port_msk = MLX4_BE_SHORT_MASK; list_add_tail(&spec_l3->list, list_h); list_add_tail(&spec_l4->list, list_h); return 0; } static int mlx4_en_ethtool_to_net_trans_rule(struct net_device *dev, struct mlx4_ethtool_rxnfc *cmd, struct list_head *rule_list_h) { int err; struct ethhdr *eth_spec; struct mlx4_spec_list *spec_l2; struct mlx4_en_priv *priv = netdev_priv(dev); err = mlx4_en_validate_flow(dev, cmd); if (err) return err; switch (cmd->fs.flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { case ETHER_FLOW: spec_l2 = kzalloc(sizeof(*spec_l2), GFP_KERNEL); if (!spec_l2) return -ENOMEM; eth_spec = &cmd->fs.h_u.ether_spec; mlx4_en_ethtool_add_mac_rule(cmd, rule_list_h, spec_l2, ð_spec->h_dest[0]); spec_l2->eth.ether_type = eth_spec->h_proto; if (eth_spec->h_proto) spec_l2->eth.ether_type_enable = 1; break; case IP_USER_FLOW: err = add_ip_rule(priv, cmd, rule_list_h); break; case TCP_V4_FLOW: err = add_tcp_udp_rule(priv, cmd, rule_list_h, TCP_V4_FLOW); break; case UDP_V4_FLOW: err = add_tcp_udp_rule(priv, cmd, rule_list_h, UDP_V4_FLOW); break; } return err; } static int mlx4_en_flow_replace(struct net_device *dev, struct mlx4_ethtool_rxnfc *cmd) { int err; struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct ethtool_flow_id *loc_rule; struct mlx4_spec_list *spec, *tmp_spec; u32 qpn; u64 reg_id; struct mlx4_net_trans_rule rule = { .queue_mode = MLX4_NET_TRANS_Q_FIFO, .exclusive = 0, .allow_loopback = 1, .promisc_mode = MLX4_FS_REGULAR, }; rule.port = priv->port; rule.priority = MLX4_DOMAIN_ETHTOOL | cmd->fs.location; INIT_LIST_HEAD(&rule.list); /* Allow direct QP attaches if the EN_ETHTOOL_QP_ATTACH flag is set */ if (cmd->fs.ring_cookie == RX_CLS_FLOW_DISC) qpn = priv->drop_qp.qpn; else if (cmd->fs.ring_cookie & EN_ETHTOOL_QP_ATTACH) { qpn = cmd->fs.ring_cookie & (EN_ETHTOOL_QP_ATTACH - 1); } else { if (cmd->fs.ring_cookie >= priv->rx_ring_num) { en_warn(priv, "rxnfc: RX ring (%llu) doesn't exist.\n", cmd->fs.ring_cookie); return -EINVAL; } qpn = priv->rss_map.qps[cmd->fs.ring_cookie].qpn; if (!qpn) { en_warn(priv, "rxnfc: RX ring (%llu) is inactive.\n", cmd->fs.ring_cookie); return -EINVAL; } } rule.qpn = qpn; err = mlx4_en_ethtool_to_net_trans_rule(dev, cmd, &rule.list); if (err) goto out_free_list; mutex_lock(&mdev->state_lock); loc_rule = &priv->ethtool_rules[cmd->fs.location]; if (loc_rule->id) { err = mlx4_flow_detach(priv->mdev->dev, loc_rule->id); if (err) { en_err(priv, "Fail to detach network rule at location %d. registration id = %llx\n", cmd->fs.location, loc_rule->id); goto unlock; } loc_rule->id = 0; memset(&loc_rule->flow_spec, 0, sizeof(struct ethtool_rx_flow_spec)); list_del(&loc_rule->list); } err = mlx4_flow_attach(priv->mdev->dev, &rule, ®_id); if (err) { en_err(priv, "Fail to attach network rule at location %d.\n", cmd->fs.location); goto unlock; } loc_rule->id = reg_id; memcpy(&loc_rule->flow_spec, &cmd->fs, sizeof(struct ethtool_rx_flow_spec)); list_add_tail(&loc_rule->list, &priv->ethtool_list); unlock: mutex_unlock(&mdev->state_lock); out_free_list: list_for_each_entry_safe(spec, tmp_spec, &rule.list, list) { list_del(&spec->list); kfree(spec); } return err; } static int mlx4_en_flow_detach(struct net_device *dev, struct mlx4_ethtool_rxnfc *cmd) { int err = 0; struct ethtool_flow_id *rule; struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; if (cmd->fs.location >= MAX_NUM_OF_FS_RULES) return -EINVAL; mutex_lock(&mdev->state_lock); rule = &priv->ethtool_rules[cmd->fs.location]; if (!rule->id) { err = -ENOENT; goto out; } err = mlx4_flow_detach(priv->mdev->dev, rule->id); if (err) { en_err(priv, "Fail to detach network rule at location %d. registration id = 0x%llx\n", cmd->fs.location, rule->id); goto out; } rule->id = 0; memset(&rule->flow_spec, 0, sizeof(struct ethtool_rx_flow_spec)); list_del(&rule->list); out: mutex_unlock(&mdev->state_lock); return err; } static int mlx4_en_get_flow(struct net_device *dev, struct mlx4_ethtool_rxnfc *cmd, int loc) { int err = 0; struct ethtool_flow_id *rule; struct mlx4_en_priv *priv = netdev_priv(dev); if (loc < 0 || loc >= MAX_NUM_OF_FS_RULES) return -EINVAL; rule = &priv->ethtool_rules[loc]; if (rule->id) memcpy(&cmd->fs, &rule->flow_spec, sizeof(struct ethtool_rx_flow_spec)); else err = -ENOENT; return err; } static int mlx4_en_get_num_flows(struct mlx4_en_priv *priv) { int i, res = 0; for (i = 0; i < MAX_NUM_OF_FS_RULES; i++) { if (priv->ethtool_rules[i].id) res++; } return res; } static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *c, u32 *rule_locs) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err = 0; int i = 0, priority = 0; struct mlx4_ethtool_rxnfc *cmd = (struct mlx4_ethtool_rxnfc *)c; if ((cmd->cmd == ETHTOOL_GRXCLSRLCNT || cmd->cmd == ETHTOOL_GRXCLSRULE || cmd->cmd == ETHTOOL_GRXCLSRLALL) && (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED || !priv->port_up)) return -EINVAL; switch (cmd->cmd) { case ETHTOOL_GRXRINGS: cmd->data = priv->rx_ring_num; break; case ETHTOOL_GRXCLSRLCNT: cmd->rule_cnt = mlx4_en_get_num_flows(priv); break; case ETHTOOL_GRXCLSRULE: err = mlx4_en_get_flow(dev, cmd, cmd->fs.location); break; case ETHTOOL_GRXCLSRLALL: while ((!err || err == -ENOENT) && priority < cmd->rule_cnt) { err = mlx4_en_get_flow(dev, cmd, i); if (!err) rule_locs[priority++] = i; i++; } err = 0; break; default: err = -EOPNOTSUPP; break; } return err; } static int mlx4_en_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *c) { int err = 0; struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_ethtool_rxnfc *cmd = (struct mlx4_ethtool_rxnfc *)c; if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED || !priv->port_up) return -EINVAL; switch (cmd->cmd) { case ETHTOOL_SRXCLSRLINS: err = mlx4_en_flow_replace(dev, cmd); break; case ETHTOOL_SRXCLSRLDEL: err = mlx4_en_flow_detach(dev, cmd); break; default: en_warn(priv, "Unsupported ethtool command. (%d)\n", cmd->cmd); return -EINVAL; } return err; } static void mlx4_en_get_channels(struct net_device *dev, struct ethtool_channels *channel) { struct mlx4_en_priv *priv = netdev_priv(dev); memset(channel, 0, sizeof(*channel)); channel->max_rx = MAX_RX_RINGS; channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP; channel->rx_count = priv->rx_ring_num; channel->tx_count = priv->tx_ring_num / MLX4_EN_NUM_UP; } static int mlx4_en_set_channels(struct net_device *dev, struct ethtool_channels *channel) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int port_up = 0; int i; int err = 0; if (channel->other_count || channel->combined_count || channel->tx_count > MLX4_EN_MAX_TX_RING_P_UP || channel->rx_count > MAX_RX_RINGS || !channel->tx_count || !channel->rx_count) return -EINVAL; err = mlx4_en_pre_config(priv); if (err) return err; mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; mlx4_en_stop_port(dev); } mlx4_en_free_resources(priv); priv->num_tx_rings_p_up = channel->tx_count; priv->tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP; priv->rx_ring_num = channel->rx_count; err = mlx4_en_alloc_resources(priv); if (err) { en_err(priv, "Failed reallocating port resources\n"); goto out; } netif_set_real_num_tx_queues(dev, priv->tx_ring_num); netif_set_real_num_rx_queues(dev, priv->rx_ring_num); mlx4_en_setup_tc(dev, MLX4_EN_NUM_UP); en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num); en_warn(priv, "Using %d RX rings\n", priv->rx_ring_num); if (port_up) { err = mlx4_en_start_port(dev); if (err) en_err(priv, "Failed starting port\n"); for (i = 0; i < priv->rx_ring_num; i++) { priv->rx_cq[i]->moder_cnt = priv->rx_frames; priv->rx_cq[i]->moder_time = priv->rx_usecs; priv->last_moder_time[i] = MLX4_EN_AUTO_CONF; err = mlx4_en_set_cq_moder(priv, priv->rx_cq[i]); if (err) goto out; } } out: mutex_unlock(&mdev->state_lock); return err; } static int mlx4_en_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int ret; ret = ethtool_op_get_ts_info(dev, info); if (ret) return ret; if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { info->so_timestamping |= SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON); info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) | (1 << HWTSTAMP_FILTER_ALL); } return ret; } const struct ethtool_ops mlx4_en_ethtool_ops = { .get_drvinfo = mlx4_en_get_drvinfo, .get_settings = mlx4_en_get_settings, .set_settings = mlx4_en_set_settings, .get_link = ethtool_op_get_link, .get_strings = mlx4_en_get_strings, .get_sset_count = mlx4_en_get_sset_count, .get_ethtool_stats = mlx4_en_get_ethtool_stats, .self_test = mlx4_en_self_test, .get_wol = mlx4_en_get_wol, .set_wol = mlx4_en_set_wol, .get_msglevel = mlx4_en_get_msglevel, .set_msglevel = mlx4_en_set_msglevel, .get_coalesce = mlx4_en_get_coalesce, .set_coalesce = mlx4_en_set_coalesce, .get_pauseparam = mlx4_en_get_pauseparam, .set_pauseparam = mlx4_en_set_pauseparam, .get_ringparam = mlx4_en_get_ringparam, .set_ringparam = mlx4_en_set_ringparam, .get_rxnfc = mlx4_en_get_rxnfc, .set_rxnfc = mlx4_en_set_rxnfc, .get_rxfh_indir_size = mlx4_en_get_rxfh_indir_size, .get_rxfh_indir = mlx4_en_get_rxfh_indir, .set_rxfh_indir = mlx4_en_set_rxfh_indir, .get_channels = mlx4_en_get_channels, .set_channels = mlx4_en_set_channels, .get_ts_info = mlx4_en_get_ts_info, }; Index: stable/10/sys/ofed/drivers/net/mlx4/en_netdev.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/en_netdev.c (revision 273245) +++ stable/10/sys/ofed/drivers/net/mlx4/en_netdev.c (revision 273246) @@ -1,2577 +1,2585 @@ /* * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include #include #include #ifdef CONFIG_NET_RX_BUSY_POLL #include #endif #include #include #include #include #include #include #include #include #include "mlx4_en.h" #include "en_port.h" static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv); static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv); static int mlx4_en_unit; #ifdef CONFIG_NET_RX_BUSY_POLL /* must be called with local_bh_disable()d */ static int mlx4_en_low_latency_recv(struct napi_struct *napi) { struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); struct net_device *dev = cq->dev; struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring]; int done; if (!priv->port_up) return LL_FLUSH_FAILED; if (!mlx4_en_cq_lock_poll(cq)) return LL_FLUSH_BUSY; done = mlx4_en_process_rx_cq(dev, cq, 4); #ifdef LL_EXTENDED_STATS if (done) rx_ring->cleaned += done; else rx_ring->misses++; #endif mlx4_en_cq_unlock_poll(cq); return done; } #endif /* CONFIG_NET_RX_BUSY_POLL */ #ifdef CONFIG_RFS_ACCEL struct mlx4_en_filter { struct list_head next; struct work_struct work; u8 ip_proto; __be32 src_ip; __be32 dst_ip; __be16 src_port; __be16 dst_port; int rxq_index; struct mlx4_en_priv *priv; u32 flow_id; /* RFS infrastructure id */ int id; /* mlx4_en driver id */ u64 reg_id; /* Flow steering API id */ u8 activated; /* Used to prevent expiry before filter * is attached */ struct hlist_node filter_chain; }; static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv); static enum mlx4_net_trans_rule_id mlx4_ip_proto_to_trans_rule_id(u8 ip_proto) { switch (ip_proto) { case IPPROTO_UDP: return MLX4_NET_TRANS_RULE_ID_UDP; case IPPROTO_TCP: return MLX4_NET_TRANS_RULE_ID_TCP; default: return -EPROTONOSUPPORT; } }; static void mlx4_en_filter_work(struct work_struct *work) { struct mlx4_en_filter *filter = container_of(work, struct mlx4_en_filter, work); struct mlx4_en_priv *priv = filter->priv; struct mlx4_spec_list spec_tcp_udp = { .id = mlx4_ip_proto_to_trans_rule_id(filter->ip_proto), { .tcp_udp = { .dst_port = filter->dst_port, .dst_port_msk = (__force __be16)-1, .src_port = filter->src_port, .src_port_msk = (__force __be16)-1, }, }, }; struct mlx4_spec_list spec_ip = { .id = MLX4_NET_TRANS_RULE_ID_IPV4, { .ipv4 = { .dst_ip = filter->dst_ip, .dst_ip_msk = (__force __be32)-1, .src_ip = filter->src_ip, .src_ip_msk = (__force __be32)-1, }, }, }; struct mlx4_spec_list spec_eth = { .id = MLX4_NET_TRANS_RULE_ID_ETH, }; struct mlx4_net_trans_rule rule = { .list = LIST_HEAD_INIT(rule.list), .queue_mode = MLX4_NET_TRANS_Q_LIFO, .exclusive = 1, .allow_loopback = 1, .promisc_mode = MLX4_FS_REGULAR, .port = priv->port, .priority = MLX4_DOMAIN_RFS, }; int rc; __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); if (spec_tcp_udp.id < 0) { en_warn(priv, "RFS: ignoring unsupported ip protocol (%d)\n", filter->ip_proto); goto ignore; } list_add_tail(&spec_eth.list, &rule.list); list_add_tail(&spec_ip.list, &rule.list); list_add_tail(&spec_tcp_udp.list, &rule.list); rule.qpn = priv->rss_map.qps[filter->rxq_index].qpn; memcpy(spec_eth.eth.dst_mac, priv->dev->dev_addr, ETH_ALEN); memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); filter->activated = 0; if (filter->reg_id) { rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id); if (rc && rc != -ENOENT) en_err(priv, "Error detaching flow. rc = %d\n", rc); } rc = mlx4_flow_attach(priv->mdev->dev, &rule, &filter->reg_id); if (rc) en_err(priv, "Error attaching flow. err = %d\n", rc); ignore: mlx4_en_filter_rfs_expire(priv); filter->activated = 1; } static inline struct hlist_head * filter_hash_bucket(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, __be16 src_port, __be16 dst_port) { unsigned long l; int bucket_idx; l = (__force unsigned long)src_port | ((__force unsigned long)dst_port << 2); l ^= (__force unsigned long)(src_ip ^ dst_ip); bucket_idx = hash_long(l, MLX4_EN_FILTER_HASH_SHIFT); return &priv->filter_hash[bucket_idx]; } static struct mlx4_en_filter * mlx4_en_filter_alloc(struct mlx4_en_priv *priv, int rxq_index, __be32 src_ip, __be32 dst_ip, u8 ip_proto, __be16 src_port, __be16 dst_port, u32 flow_id) { struct mlx4_en_filter *filter = NULL; filter = kzalloc(sizeof(struct mlx4_en_filter), GFP_ATOMIC); if (!filter) return NULL; filter->priv = priv; filter->rxq_index = rxq_index; INIT_WORK(&filter->work, mlx4_en_filter_work); filter->src_ip = src_ip; filter->dst_ip = dst_ip; filter->ip_proto = ip_proto; filter->src_port = src_port; filter->dst_port = dst_port; filter->flow_id = flow_id; filter->id = priv->last_filter_id++ % RPS_NO_FILTER; list_add_tail(&filter->next, &priv->filters); hlist_add_head(&filter->filter_chain, filter_hash_bucket(priv, src_ip, dst_ip, src_port, dst_port)); return filter; } static void mlx4_en_filter_free(struct mlx4_en_filter *filter) { struct mlx4_en_priv *priv = filter->priv; int rc; list_del(&filter->next); rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id); if (rc && rc != -ENOENT) en_err(priv, "Error detaching flow. rc = %d\n", rc); kfree(filter); } static inline struct mlx4_en_filter * mlx4_en_filter_find(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, u8 ip_proto, __be16 src_port, __be16 dst_port) { struct hlist_node *elem; struct mlx4_en_filter *filter; struct mlx4_en_filter *ret = NULL; hlist_for_each_entry(filter, elem, filter_hash_bucket(priv, src_ip, dst_ip, src_port, dst_port), filter_chain) { if (filter->src_ip == src_ip && filter->dst_ip == dst_ip && filter->ip_proto == ip_proto && filter->src_port == src_port && filter->dst_port == dst_port) { ret = filter; break; } } return ret; } static int mlx4_en_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id) { struct mlx4_en_priv *priv = netdev_priv(net_dev); struct mlx4_en_filter *filter; const struct iphdr *ip; const __be16 *ports; u8 ip_proto; __be32 src_ip; __be32 dst_ip; __be16 src_port; __be16 dst_port; int nhoff = skb_network_offset(skb); int ret = 0; if (skb->protocol != htons(ETH_P_IP)) return -EPROTONOSUPPORT; ip = (const struct iphdr *)(skb->data + nhoff); if (ip_is_fragment(ip)) return -EPROTONOSUPPORT; if ((ip->protocol != IPPROTO_TCP) && (ip->protocol != IPPROTO_UDP)) return -EPROTONOSUPPORT; ports = (const __be16 *)(skb->data + nhoff + 4 * ip->ihl); ip_proto = ip->protocol; src_ip = ip->saddr; dst_ip = ip->daddr; src_port = ports[0]; dst_port = ports[1]; spin_lock_bh(&priv->filters_lock); filter = mlx4_en_filter_find(priv, src_ip, dst_ip, ip_proto, src_port, dst_port); if (filter) { if (filter->rxq_index == rxq_index) goto out; filter->rxq_index = rxq_index; } else { filter = mlx4_en_filter_alloc(priv, rxq_index, src_ip, dst_ip, ip_proto, src_port, dst_port, flow_id); if (!filter) { ret = -ENOMEM; goto err; } } queue_work(priv->mdev->workqueue, &filter->work); out: ret = filter->id; err: spin_unlock_bh(&priv->filters_lock); return ret; } void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *rx_ring) { struct mlx4_en_filter *filter, *tmp; LIST_HEAD(del_list); spin_lock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &priv->filters, next) { list_move(&filter->next, &del_list); hlist_del(&filter->filter_chain); } spin_unlock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &del_list, next) { cancel_work_sync(&filter->work); mlx4_en_filter_free(filter); } } static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv) { struct mlx4_en_filter *filter = NULL, *tmp, *last_filter = NULL; LIST_HEAD(del_list); int i = 0; spin_lock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &priv->filters, next) { if (i > MLX4_EN_FILTER_EXPIRY_QUOTA) break; if (filter->activated && !work_pending(&filter->work) && rps_may_expire_flow(priv->dev, filter->rxq_index, filter->flow_id, filter->id)) { list_move(&filter->next, &del_list); hlist_del(&filter->filter_chain); } else last_filter = filter; i++; } if (last_filter && (&last_filter->next != priv->filters.next)) list_move(&priv->filters, &last_filter->next); spin_unlock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &del_list, next) mlx4_en_filter_free(filter); } #endif static void mlx4_en_vlan_rx_add_vid(void *arg, struct net_device *dev, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err; int idx; if (arg != priv) return; en_dbg(HW, priv, "adding VLAN:%d\n", vid); set_bit(vid, priv->active_vlans); /* Add VID to port VLAN filter */ mutex_lock(&mdev->state_lock); if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); if (err) en_err(priv, "Failed configuring VLAN filter\n"); } if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx)) en_dbg(HW, priv, "failed adding vlan %d\n", vid); mutex_unlock(&mdev->state_lock); } static void mlx4_en_vlan_rx_kill_vid(void *arg, struct net_device *dev, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err; if (arg != priv) return; en_dbg(HW, priv, "Killing VID:%d\n", vid); clear_bit(vid, priv->active_vlans); /* Remove VID from port VLAN filter */ mutex_lock(&mdev->state_lock); mlx4_unregister_vlan(mdev->dev, priv->port, vid); if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); if (err) en_err(priv, "Failed configuring VLAN filter\n"); } mutex_unlock(&mdev->state_lock); } static int mlx4_en_uc_steer_add(struct mlx4_en_priv *priv, unsigned char *mac, int *qpn, u64 *reg_id) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; int err; switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_B0: { struct mlx4_qp qp; u8 gid[16] = {0}; qp.qpn = *qpn; memcpy(&gid[10], mac, ETH_ALEN); gid[5] = priv->port; err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH); break; } case MLX4_STEERING_MODE_DEVICE_MANAGED: { struct mlx4_spec_list spec_eth = { {NULL} }; __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); struct mlx4_net_trans_rule rule = { .queue_mode = MLX4_NET_TRANS_Q_FIFO, .exclusive = 0, .allow_loopback = 1, .promisc_mode = MLX4_FS_REGULAR, .priority = MLX4_DOMAIN_NIC, }; rule.port = priv->port; rule.qpn = *qpn; INIT_LIST_HEAD(&rule.list); spec_eth.id = MLX4_NET_TRANS_RULE_ID_ETH; memcpy(spec_eth.eth.dst_mac, mac, ETH_ALEN); memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); list_add_tail(&spec_eth.list, &rule.list); err = mlx4_flow_attach(dev, &rule, reg_id); break; } default: return -EINVAL; } if (err) en_warn(priv, "Failed Attaching Unicast\n"); return err; } static void mlx4_en_uc_steer_release(struct mlx4_en_priv *priv, unsigned char *mac, int qpn, u64 reg_id) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_B0: { struct mlx4_qp qp; u8 gid[16] = {0}; qp.qpn = qpn; memcpy(&gid[10], mac, ETH_ALEN); gid[5] = priv->port; mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH); break; } case MLX4_STEERING_MODE_DEVICE_MANAGED: { mlx4_flow_detach(dev, reg_id); break; } default: en_err(priv, "Invalid steering mode.\n"); } } static int mlx4_en_get_qp(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; struct mlx4_mac_entry *entry; int index = 0; int err = 0; u64 reg_id; int *qpn = &priv->base_qpn; u64 mac = mlx4_mac_to_u64(IF_LLADDR(priv->dev)); en_dbg(DRV, priv, "Registering MAC: %pM for adding\n", IF_LLADDR(priv->dev)); index = mlx4_register_mac(dev, priv->port, mac); if (index < 0) { err = index; en_err(priv, "Failed adding MAC: %pM\n", IF_LLADDR(priv->dev)); return err; } if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { int base_qpn = mlx4_get_base_qpn(dev, priv->port); *qpn = base_qpn + index; return 0; } err = mlx4_qp_reserve_range(dev, 1, 1, qpn, 0); en_dbg(DRV, priv, "Reserved qp %d\n", *qpn); if (err) { en_err(priv, "Failed to reserve qp for mac registration\n"); goto qp_err; } err = mlx4_en_uc_steer_add(priv, IF_LLADDR(priv->dev), qpn, ®_id); if (err) goto steer_err; entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) { err = -ENOMEM; goto alloc_err; } memcpy(entry->mac, IF_LLADDR(priv->dev), sizeof(entry->mac)); entry->reg_id = reg_id; hlist_add_head(&entry->hlist, &priv->mac_hash[entry->mac[MLX4_EN_MAC_HASH_IDX]]); return 0; alloc_err: mlx4_en_uc_steer_release(priv, IF_LLADDR(priv->dev), *qpn, reg_id); steer_err: mlx4_qp_release_range(dev, *qpn, 1); qp_err: mlx4_unregister_mac(dev, priv->port, mac); return err; } static void mlx4_en_put_qp(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; int qpn = priv->base_qpn; u64 mac; if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { mac = mlx4_mac_to_u64(IF_LLADDR(priv->dev)); en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n", IF_LLADDR(priv->dev)); mlx4_unregister_mac(dev, priv->port, mac); } else { struct mlx4_mac_entry *entry; struct hlist_node *n, *tmp; struct hlist_head *bucket; unsigned int i; for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) { bucket = &priv->mac_hash[i]; hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) { mac = mlx4_mac_to_u64(entry->mac); en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n", entry->mac); mlx4_en_uc_steer_release(priv, entry->mac, qpn, entry->reg_id); mlx4_unregister_mac(dev, priv->port, mac); hlist_del(&entry->hlist); kfree(entry); } } en_dbg(DRV, priv, "Releasing qp: port %d, qpn %d\n", priv->port, qpn); mlx4_qp_release_range(dev, qpn, 1); priv->flags &= ~MLX4_EN_FLAG_FORCE_PROMISC; } } static void mlx4_en_clear_list(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_mc_list *tmp, *mc_to_del; list_for_each_entry_safe(mc_to_del, tmp, &priv->mc_list, list) { list_del(&mc_to_del->list); kfree(mc_to_del); } } static void mlx4_en_cache_mclist(struct net_device *dev) { struct ifmultiaddr *ifma; struct mlx4_en_mc_list *tmp; struct mlx4_en_priv *priv = netdev_priv(dev); TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen != ETHER_ADDR_LEN) continue; /* Make sure the list didn't grow. */ tmp = kzalloc(sizeof(struct mlx4_en_mc_list), GFP_ATOMIC); memcpy(tmp->addr, LLADDR((struct sockaddr_dl *)ifma->ifma_addr), ETH_ALEN); list_add_tail(&tmp->list, &priv->mc_list); } } static void update_mclist_flags(struct mlx4_en_priv *priv, struct list_head *dst, struct list_head *src) { struct mlx4_en_mc_list *dst_tmp, *src_tmp, *new_mc; bool found; /* Find all the entries that should be removed from dst, * These are the entries that are not found in src */ list_for_each_entry(dst_tmp, dst, list) { found = false; list_for_each_entry(src_tmp, src, list) { if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) { found = true; break; } } if (!found) dst_tmp->action = MCLIST_REM; } /* Add entries that exist in src but not in dst * mark them as need to add */ list_for_each_entry(src_tmp, src, list) { found = false; list_for_each_entry(dst_tmp, dst, list) { if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) { dst_tmp->action = MCLIST_NONE; found = true; break; } } if (!found) { new_mc = kmalloc(sizeof(struct mlx4_en_mc_list), GFP_KERNEL); if (!new_mc) { en_err(priv, "Failed to allocate current multicast list\n"); return; } memcpy(new_mc, src_tmp, sizeof(struct mlx4_en_mc_list)); new_mc->action = MCLIST_ADD; list_add_tail(&new_mc->list, dst); } } } static void mlx4_en_set_rx_mode(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); if (!priv->port_up) return; queue_work(priv->mdev->workqueue, &priv->rx_mode_task); } static void mlx4_en_set_promisc_mode(struct mlx4_en_priv *priv, struct mlx4_en_dev *mdev) { int err = 0; if (!(priv->flags & MLX4_EN_FLAG_PROMISC)) { priv->flags |= MLX4_EN_FLAG_PROMISC; /* Enable promiscouos mode */ switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_add(mdev->dev, priv->port, priv->base_qpn, MLX4_FS_ALL_DEFAULT); if (err) en_err(priv, "Failed enabling promiscuous mode\n"); priv->flags |= MLX4_EN_FLAG_MC_PROMISC; break; case MLX4_STEERING_MODE_B0: err = mlx4_unicast_promisc_add(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed enabling unicast promiscuous mode\n"); /* Add the default qp number as multicast * promisc */ if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed enabling multicast promiscuous mode\n"); priv->flags |= MLX4_EN_FLAG_MC_PROMISC; } break; case MLX4_STEERING_MODE_A0: err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 1); if (err) en_err(priv, "Failed enabling promiscuous mode\n"); break; } /* Disable port multicast filter (unconditionally) */ err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); } } static void mlx4_en_clear_promisc_mode(struct mlx4_en_priv *priv, struct mlx4_en_dev *mdev) { int err = 0; priv->flags &= ~MLX4_EN_FLAG_PROMISC; /* Disable promiscouos mode */ switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_ALL_DEFAULT); if (err) en_err(priv, "Failed disabling promiscuous mode\n"); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; break; case MLX4_STEERING_MODE_B0: err = mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed disabling unicast promiscuous mode\n"); /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed disabling multicast promiscuous mode\n"); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; } break; case MLX4_STEERING_MODE_A0: err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0); if (err) en_err(priv, "Failed disabling promiscuous mode\n"); break; } } static void mlx4_en_do_multicast(struct mlx4_en_priv *priv, struct net_device *dev, struct mlx4_en_dev *mdev) { struct mlx4_en_mc_list *mclist, *tmp; u8 mc_list[16] = {0}; int err = 0; u64 mcast_addr = 0; /* Enable/disable the multicast filter according to IFF_ALLMULTI */ if (dev->if_flags & IFF_ALLMULTI) { err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); /* Add the default qp number as multicast promisc */ if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_add(mdev->dev, priv->port, priv->base_qpn, MLX4_FS_MC_DEFAULT); break; case MLX4_STEERING_MODE_B0: err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn, priv->port); break; case MLX4_STEERING_MODE_A0: break; } if (err) en_err(priv, "Failed entering multicast promisc mode\n"); priv->flags |= MLX4_EN_FLAG_MC_PROMISC; } } else { /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_MC_DEFAULT); break; case MLX4_STEERING_MODE_B0: err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); break; case MLX4_STEERING_MODE_A0: break; } if (err) en_err(priv, "Failed disabling multicast promiscuous mode\n"); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; } err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); /* Flush mcast filter and init it with broadcast address */ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, ETH_BCAST, 1, MLX4_MCAST_CONFIG); /* Update multicast list - we cache all addresses so they won't * change while HW is updated holding the command semaphor */ mlx4_en_cache_mclist(dev); list_for_each_entry(mclist, &priv->mc_list, list) { mcast_addr = mlx4_mac_to_u64(mclist->addr); mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, mcast_addr, 0, MLX4_MCAST_CONFIG); } err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_ENABLE); if (err) en_err(priv, "Failed enabling multicast filter\n"); update_mclist_flags(priv, &priv->curr_list, &priv->mc_list); list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) { if (mclist->action == MCLIST_REM) { /* detach this address and delete from list */ memcpy(&mc_list[10], mclist->addr, ETH_ALEN); mc_list[5] = priv->port; err = mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, mclist->reg_id); if (err) en_err(priv, "Fail to detach multicast address\n"); /* remove from list */ list_del(&mclist->list); kfree(mclist); } else if (mclist->action == MCLIST_ADD) { /* attach the address */ memcpy(&mc_list[10], mclist->addr, ETH_ALEN); /* needed for B0 steering support */ mc_list[5] = priv->port; err = mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list, priv->port, 0, MLX4_PROT_ETH, &mclist->reg_id); if (err) en_err(priv, "Fail to attach multicast address\n"); } } } } static void mlx4_en_do_set_rx_mode(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, rx_mode_task); struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; mutex_lock(&mdev->state_lock); if (!mdev->device_up) { en_dbg(HW, priv, "Card is not up, ignoring rx mode change.\n"); goto out; } if (!priv->port_up) { en_dbg(HW, priv, "Port is down, ignoring rx mode change.\n"); goto out; } if (!mlx4_en_QUERY_PORT(mdev, priv->port)) { if (priv->port_state.link_state) { priv->last_link_state = MLX4_DEV_EVENT_PORT_UP; /* Important note: the following call for if_link_state_change * is needed for interface up scenario (start port, link state * change) */ if_link_state_change(priv->dev, LINK_STATE_UP); en_dbg(HW, priv, "Link Up\n"); } } /* Promsicuous mode: disable all filters */ if ((dev->if_flags & IFF_PROMISC) || (priv->flags & MLX4_EN_FLAG_FORCE_PROMISC)) { mlx4_en_set_promisc_mode(priv, mdev); goto out; } /* Not in promiscuous mode */ if (priv->flags & MLX4_EN_FLAG_PROMISC) mlx4_en_clear_promisc_mode(priv, mdev); mlx4_en_do_multicast(priv, dev, mdev); out: mutex_unlock(&mdev->state_lock); } #ifdef CONFIG_NET_POLL_CONTROLLER static void mlx4_en_netpoll(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_cq *cq; unsigned long flags; int i; for (i = 0; i < priv->rx_ring_num; i++) { cq = priv->rx_cq[i]; spin_lock_irqsave(&cq->lock, flags); napi_synchronize(&cq->napi); mlx4_en_process_rx_cq(dev, cq, 0); spin_unlock_irqrestore(&cq->lock, flags); } } #endif static void mlx4_en_watchdog_timeout(void *arg) { struct mlx4_en_priv *priv = arg; struct mlx4_en_dev *mdev = priv->mdev; en_dbg(DRV, priv, "Scheduling watchdog\n"); queue_work(mdev->workqueue, &priv->watchdog_task); if (priv->port_up) callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT, mlx4_en_watchdog_timeout, priv); } static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) { struct mlx4_en_cq *cq; int i; /* If we haven't received a specific coalescing setting * (module param), we set the moderation parameters as follows: * - moder_cnt is set to the number of mtu sized packets to * satisfy our coelsing target. * - moder_time is set to a fixed value. */ priv->rx_frames = MLX4_EN_RX_COAL_TARGET / priv->dev->if_mtu + 1; priv->rx_usecs = MLX4_EN_RX_COAL_TIME; priv->tx_frames = MLX4_EN_TX_COAL_PKTS; priv->tx_usecs = MLX4_EN_TX_COAL_TIME; en_dbg(INTR, priv, "Default coalesing params for mtu: %u - " "rx_frames:%d rx_usecs:%d\n", (unsigned)priv->dev->if_mtu, priv->rx_frames, priv->rx_usecs); /* Setup cq moderation params */ for (i = 0; i < priv->rx_ring_num; i++) { cq = priv->rx_cq[i]; cq->moder_cnt = priv->rx_frames; cq->moder_time = priv->rx_usecs; priv->last_moder_time[i] = MLX4_EN_AUTO_CONF; priv->last_moder_packets[i] = 0; priv->last_moder_bytes[i] = 0; } for (i = 0; i < priv->tx_ring_num; i++) { cq = priv->tx_cq[i]; cq->moder_cnt = priv->tx_frames; cq->moder_time = priv->tx_usecs; } /* Reset auto-moderation params */ priv->pkt_rate_low = MLX4_EN_RX_RATE_LOW; priv->rx_usecs_low = MLX4_EN_RX_COAL_TIME_LOW; priv->pkt_rate_high = MLX4_EN_RX_RATE_HIGH; priv->rx_usecs_high = MLX4_EN_RX_COAL_TIME_HIGH; priv->sample_interval = MLX4_EN_SAMPLE_INTERVAL; priv->adaptive_rx_coal = 1; priv->last_moder_jiffies = 0; priv->last_moder_tx_packets = 0; } static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv) { unsigned long period = (unsigned long) (jiffies - priv->last_moder_jiffies); struct mlx4_en_cq *cq; unsigned long packets; unsigned long rate; unsigned long avg_pkt_size; unsigned long rx_packets; unsigned long rx_bytes; unsigned long rx_pkt_diff; int moder_time; int ring, err; if (!priv->adaptive_rx_coal || period < priv->sample_interval * HZ) return; for (ring = 0; ring < priv->rx_ring_num; ring++) { spin_lock(&priv->stats_lock); rx_packets = priv->rx_ring[ring]->packets; rx_bytes = priv->rx_ring[ring]->bytes; spin_unlock(&priv->stats_lock); rx_pkt_diff = ((unsigned long) (rx_packets - priv->last_moder_packets[ring])); packets = rx_pkt_diff; rate = packets * HZ / period; avg_pkt_size = packets ? ((unsigned long) (rx_bytes - priv->last_moder_bytes[ring])) / packets : 0; /* Apply auto-moderation only when packet rate * exceeds a rate that it matters */ if (rate > (MLX4_EN_RX_RATE_THRESH / priv->rx_ring_num) && avg_pkt_size > MLX4_EN_AVG_PKT_SMALL) { if (rate < priv->pkt_rate_low) moder_time = priv->rx_usecs_low; else if (rate > priv->pkt_rate_high) moder_time = priv->rx_usecs_high; else moder_time = (rate - priv->pkt_rate_low) * (priv->rx_usecs_high - priv->rx_usecs_low) / (priv->pkt_rate_high - priv->pkt_rate_low) + priv->rx_usecs_low; } else { moder_time = priv->rx_usecs_low; } if (moder_time != priv->last_moder_time[ring]) { priv->last_moder_time[ring] = moder_time; cq = priv->rx_cq[ring]; cq->moder_time = moder_time; err = mlx4_en_set_cq_moder(priv, cq); if (err) en_err(priv, "Failed modifying moderation for cq:%d\n", ring); } priv->last_moder_packets[ring] = rx_packets; priv->last_moder_bytes[ring] = rx_bytes; } priv->last_moder_jiffies = jiffies; } static void mlx4_en_do_get_stats(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, stats_task); struct mlx4_en_dev *mdev = priv->mdev; int err; mutex_lock(&mdev->state_lock); if (mdev->device_up) { if (priv->port_up) { err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0); if (err) en_dbg(HW, priv, "Could not update stats\n"); mlx4_en_auto_moderation(priv); } queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); } mutex_unlock(&mdev->state_lock); } /* mlx4_en_service_task - Run service task for tasks that needed to be done * periodically */ static void mlx4_en_service_task(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, service_task); struct mlx4_en_dev *mdev = priv->mdev; mutex_lock(&mdev->state_lock); if (mdev->device_up) { queue_delayed_work(mdev->workqueue, &priv->service_task, SERVICE_TASK_DELAY); } mutex_unlock(&mdev->state_lock); } static void mlx4_en_linkstate(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, linkstate_task); struct mlx4_en_dev *mdev = priv->mdev; int linkstate = priv->link_state; mutex_lock(&mdev->state_lock); /* If observable port state changed set carrier state and * report to system log */ if (priv->last_link_state != linkstate) { if (linkstate == MLX4_DEV_EVENT_PORT_DOWN) { en_info(priv, "Link Down\n"); if_link_state_change(priv->dev, LINK_STATE_DOWN); /* make sure the port is up before notifying the OS. * This is tricky since we get here on INIT_PORT and * in such case we can't tell the OS the port is up. * To solve this there is a call to if_link_state_change * in set_rx_mode. * */ } else if (priv->port_up && (linkstate == MLX4_DEV_EVENT_PORT_UP)){ en_info(priv, "Link Up\n"); if_link_state_change(priv->dev, LINK_STATE_UP); } } priv->last_link_state = linkstate; mutex_unlock(&mdev->state_lock); } int mlx4_en_start_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_cq *cq; struct mlx4_en_tx_ring *tx_ring; int rx_index = 0; int tx_index = 0; int err = 0; int i; int j; u8 mc_list[16] = {0}; if (priv->port_up) { en_dbg(DRV, priv, "start port called while port already up\n"); return 0; } INIT_LIST_HEAD(&priv->mc_list); INIT_LIST_HEAD(&priv->curr_list); INIT_LIST_HEAD(&priv->ethtool_list); /* Calculate Rx buf size */ dev->if_mtu = min(dev->if_mtu, priv->max_mtu); mlx4_en_calc_rx_buf(dev); priv->rx_alloc_size = max_t(int, 2 * roundup_pow_of_two(priv->rx_mb_size), PAGE_SIZE); priv->rx_alloc_order = get_order(priv->rx_alloc_size); priv->rx_buf_size = roundup_pow_of_two(priv->rx_mb_size); priv->log_rx_info = ROUNDUP_LOG2(sizeof(struct mlx4_en_rx_buf)); en_dbg(DRV, priv, "Rx buf size:%d\n", priv->rx_mb_size); /* Configure rx cq's and rings */ err = mlx4_en_activate_rx_rings(priv); if (err) { en_err(priv, "Failed to activate RX rings\n"); return err; } for (i = 0; i < priv->rx_ring_num; i++) { cq = priv->rx_cq[i]; mlx4_en_cq_init_lock(cq); err = mlx4_en_activate_cq(priv, cq, i); if (err) { en_err(priv, "Failed activating Rx CQ\n"); goto cq_err; } for (j = 0; j < cq->size; j++) cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK; err = mlx4_en_set_cq_moder(priv, cq); if (err) { en_err(priv, "Failed setting cq moderation parameters"); mlx4_en_deactivate_cq(priv, cq); goto cq_err; } mlx4_en_arm_cq(priv, cq); priv->rx_ring[i]->cqn = cq->mcq.cqn; ++rx_index; } /* Set qp number */ en_dbg(DRV, priv, "Getting qp number for port %d\n", priv->port); err = mlx4_en_get_qp(priv); if (err) { en_err(priv, "Failed getting eth qp\n"); goto cq_err; } mdev->mac_removed[priv->port] = 0; /* gets default allocated counter index from func cap */ /* or sink counter index if no resources */ priv->counter_index = mdev->dev->caps.def_counter_index[priv->port - 1]; en_dbg(DRV, priv, "%s: default counter index %d for port %d\n", __func__, priv->counter_index, priv->port); err = mlx4_en_config_rss_steer(priv); if (err) { en_err(priv, "Failed configuring rss steering\n"); goto mac_err; } err = mlx4_en_create_drop_qp(priv); if (err) goto rss_err; /* Configure tx cq's and rings */ for (i = 0; i < priv->tx_ring_num; i++) { /* Configure cq */ cq = priv->tx_cq[i]; err = mlx4_en_activate_cq(priv, cq, i); if (err) { en_err(priv, "Failed allocating Tx CQ\n"); goto tx_err; } err = mlx4_en_set_cq_moder(priv, cq); if (err) { en_err(priv, "Failed setting cq moderation parameters"); mlx4_en_deactivate_cq(priv, cq); goto tx_err; } en_dbg(DRV, priv, "Resetting index of collapsed CQ:%d to -1\n", i); cq->buf->wqe_index = cpu_to_be16(0xffff); /* Configure ring */ tx_ring = priv->tx_ring[i]; err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn, i / priv->num_tx_rings_p_up); if (err) { en_err(priv, "Failed allocating Tx ring\n"); mlx4_en_deactivate_cq(priv, cq); goto tx_err; } /* Arm CQ for TX completions */ mlx4_en_arm_cq(priv, cq); /* Set initial ownership of all Tx TXBBs to SW (1) */ for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) *((u32 *) (tx_ring->buf + j)) = 0xffffffff; ++tx_index; } /* Configure port */ err = mlx4_SET_PORT_general(mdev->dev, priv->port, priv->rx_mb_size, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); if (err) { en_err(priv, "Failed setting port general configurations for port %d, with error %d\n", priv->port, err); goto tx_err; } /* Set default qp number */ err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0); if (err) { en_err(priv, "Failed setting default qp numbers\n"); goto tx_err; } /* Init port */ en_dbg(HW, priv, "Initializing port\n"); err = mlx4_INIT_PORT(mdev->dev, priv->port); if (err) { en_err(priv, "Failed Initializing port\n"); goto tx_err; } /* Attach rx QP to bradcast address */ memset(&mc_list[10], 0xff, ETH_ALEN); mc_list[5] = priv->port; /* needed for B0 steering support */ if (mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list, priv->port, 0, MLX4_PROT_ETH, &priv->broadcast_id)) mlx4_warn(mdev, "Failed Attaching Broadcast\n"); /* Must redo promiscuous mode setup. */ priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC); /* Schedule multicast task to populate multicast list */ queue_work(mdev->workqueue, &priv->rx_mode_task); mlx4_set_stats_bitmap(mdev->dev, priv->stats_bitmap); priv->port_up = true; /* Enable the queues. */ dev->if_drv_flags &= ~IFF_DRV_OACTIVE; dev->if_drv_flags |= IFF_DRV_RUNNING; #ifdef CONFIG_DEBUG_FS mlx4_en_create_debug_files(priv); #endif callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT, mlx4_en_watchdog_timeout, priv); return 0; tx_err: while (tx_index--) { mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[tx_index]); mlx4_en_deactivate_cq(priv, priv->tx_cq[tx_index]); } mlx4_en_destroy_drop_qp(priv); rss_err: mlx4_en_release_rss_steer(priv); mac_err: mlx4_en_put_qp(priv); cq_err: while (rx_index--) mlx4_en_deactivate_cq(priv, priv->rx_cq[rx_index]); for (i = 0; i < priv->rx_ring_num; i++) mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); return err; /* need to close devices */ } void mlx4_en_stop_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_mc_list *mclist, *tmp; int i; u8 mc_list[16] = {0}; if (!priv->port_up) { en_dbg(DRV, priv, "stop port called while port already down\n"); return; } #ifdef CONFIG_DEBUG_FS mlx4_en_delete_debug_files(priv); #endif /* close port*/ mlx4_CLOSE_PORT(mdev->dev, priv->port); /* Set port as not active */ priv->port_up = false; if (priv->counter_index != 0xff) { mlx4_counter_free(mdev->dev, priv->port, priv->counter_index); priv->counter_index = 0xff; } /* Promsicuous mode */ if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC); mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_ALL_DEFAULT); mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_MC_DEFAULT); } else if (priv->flags & MLX4_EN_FLAG_PROMISC) { priv->flags &= ~MLX4_EN_FLAG_PROMISC; /* Disable promiscouos mode */ mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; } } /* Detach All multicasts */ memset(&mc_list[10], 0xff, ETH_ALEN); mc_list[5] = priv->port; /* needed for B0 steering support */ mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, priv->broadcast_id); list_for_each_entry(mclist, &priv->curr_list, list) { memcpy(&mc_list[10], mclist->addr, ETH_ALEN); mc_list[5] = priv->port; mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, mclist->reg_id); } mlx4_en_clear_list(dev); list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) { list_del(&mclist->list); kfree(mclist); } /* Flush multicast filter */ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 1, MLX4_MCAST_CONFIG); mlx4_en_destroy_drop_qp(priv); /* Free TX Rings */ for (i = 0; i < priv->tx_ring_num; i++) { mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[i]); mlx4_en_deactivate_cq(priv, priv->tx_cq[i]); } msleep(10); for (i = 0; i < priv->tx_ring_num; i++) mlx4_en_free_tx_buf(dev, priv->tx_ring[i]); /* Free RSS qps */ mlx4_en_release_rss_steer(priv); /* Unregister Mac address for the port */ mlx4_en_put_qp(priv); mdev->mac_removed[priv->port] = 1; /* Free RX Rings */ for (i = 0; i < priv->rx_ring_num; i++) { struct mlx4_en_cq *cq = priv->rx_cq[i]; mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); mlx4_en_deactivate_cq(priv, cq); } callout_stop(&priv->watchdog_timer); dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } static void mlx4_en_restart(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, watchdog_task); struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; struct mlx4_en_tx_ring *ring; int i; if (priv->blocked == 0 || priv->port_up == 0) return; for (i = 0; i < priv->tx_ring_num; i++) { ring = priv->tx_ring[i]; if (ring->blocked && ring->watchdog_time + MLX4_EN_WATCHDOG_TIMEOUT < ticks) goto reset; } return; reset: priv->port_stats.tx_timeout++; en_dbg(DRV, priv, "Watchdog task called for port %d\n", priv->port); mutex_lock(&mdev->state_lock); if (priv->port_up) { mlx4_en_stop_port(dev); //for (i = 0; i < priv->tx_ring_num; i++) // netdev_tx_reset_queue(priv->tx_ring[i]->tx_queue); if (mlx4_en_start_port(dev)) en_err(priv, "Failed restarting port %d\n", priv->port); } mutex_unlock(&mdev->state_lock); } static void mlx4_en_clear_stats(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int i; if (!mlx4_is_slave(mdev->dev)) if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) en_dbg(HW, priv, "Failed dumping statistics\n"); memset(&priv->pstats, 0, sizeof(priv->pstats)); memset(&priv->pkstats, 0, sizeof(priv->pkstats)); memset(&priv->port_stats, 0, sizeof(priv->port_stats)); memset(&priv->vport_stats, 0, sizeof(priv->vport_stats)); for (i = 0; i < priv->tx_ring_num; i++) { priv->tx_ring[i]->bytes = 0; priv->tx_ring[i]->packets = 0; priv->tx_ring[i]->tx_csum = 0; } for (i = 0; i < priv->rx_ring_num; i++) { priv->rx_ring[i]->bytes = 0; priv->rx_ring[i]->packets = 0; priv->rx_ring[i]->csum_ok = 0; priv->rx_ring[i]->csum_none = 0; } } static void mlx4_en_open(void* arg) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; struct net_device *dev; int err = 0; priv = arg; mdev = priv->mdev; dev = priv->dev; mutex_lock(&mdev->state_lock); if (!mdev->device_up) { en_err(priv, "Cannot open - device down/disabled\n"); goto out; } /* Reset HW statistics and SW counters */ mlx4_en_clear_stats(dev); err = mlx4_en_start_port(dev); if (err) en_err(priv, "Failed starting port:%d\n", priv->port); out: mutex_unlock(&mdev->state_lock); return; } void mlx4_en_free_resources(struct mlx4_en_priv *priv) { int i; #ifdef CONFIG_RFS_ACCEL if (priv->dev->rx_cpu_rmap) { free_irq_cpu_rmap(priv->dev->rx_cpu_rmap); priv->dev->rx_cpu_rmap = NULL; } #endif for (i = 0; i < priv->tx_ring_num; i++) { if (priv->tx_ring && priv->tx_ring[i]) mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]); if (priv->tx_cq && priv->tx_cq[i]) mlx4_en_destroy_cq(priv, &priv->tx_cq[i]); } for (i = 0; i < priv->rx_ring_num; i++) { if (priv->rx_ring[i]) mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i], priv->prof->rx_ring_size, priv->stride); if (priv->rx_cq[i]) mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); } if (priv->sysctl) sysctl_ctx_free(&priv->stat_ctx); } int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) { struct mlx4_en_port_profile *prof = priv->prof; int i; int node = 0; /* Create rx Rings */ for (i = 0; i < priv->rx_ring_num; i++) { if (mlx4_en_create_cq(priv, &priv->rx_cq[i], prof->rx_ring_size, i, RX, node)) goto err; if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i], prof->rx_ring_size, node)) goto err; } /* Create tx Rings */ for (i = 0; i < priv->tx_ring_num; i++) { if (mlx4_en_create_cq(priv, &priv->tx_cq[i], prof->tx_ring_size, i, TX, node)) goto err; if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], prof->tx_ring_size, TXBB_SIZE, node, i)) goto err; } #ifdef CONFIG_RFS_ACCEL priv->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->rx_ring_num); if (!priv->dev->rx_cpu_rmap) goto err; #endif /* Re-create stat sysctls in case the number of rings changed. */ mlx4_en_sysctl_stat(priv); return 0; err: en_err(priv, "Failed to allocate NIC resources\n"); for (i = 0; i < priv->rx_ring_num; i++) { if (priv->rx_ring[i]) mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i], prof->rx_ring_size, priv->stride); if (priv->rx_cq[i]) mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); } for (i = 0; i < priv->tx_ring_num; i++) { if (priv->tx_ring[i]) mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]); if (priv->tx_cq[i]) mlx4_en_destroy_cq(priv, &priv->tx_cq[i]); } priv->port_up = false; return -ENOMEM; } struct en_port_attribute { struct attribute attr; ssize_t (*show)(struct en_port *, struct en_port_attribute *, char *buf); ssize_t (*store)(struct en_port *, struct en_port_attribute *, char *buf, size_t count); }; #define PORT_ATTR_RO(_name) \ struct en_port_attribute en_port_attr_##_name = __ATTR_RO(_name) #define EN_PORT_ATTR(_name, _mode, _show, _store) \ struct en_port_attribute en_port_attr_##_name = __ATTR(_name, _mode, _show, _store) void mlx4_en_destroy_netdev(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port); if (priv->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach); if (priv->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach); /* Unregister device - this will close the port if it was up */ if (priv->registered) ether_ifdetach(dev); if (priv->allocated) mlx4_free_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE); mutex_lock(&mdev->state_lock); mlx4_en_stop_port(dev); mutex_unlock(&mdev->state_lock); cancel_delayed_work(&priv->stats_task); cancel_delayed_work(&priv->service_task); /* flush any pending task for this netdev */ flush_workqueue(mdev->workqueue); callout_drain(&priv->watchdog_timer); /* Detach the netdev so tasks would not attempt to access it */ mutex_lock(&mdev->state_lock); mdev->pndev[priv->port] = NULL; mutex_unlock(&mdev->state_lock); mlx4_en_free_resources(priv); /* freeing the sysctl conf cannot be called from within mlx4_en_free_resources */ if (priv->sysctl) sysctl_ctx_free(&priv->conf_ctx); kfree(priv->tx_ring); kfree(priv->tx_cq); kfree(priv); if_free(dev); } static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err = 0; en_dbg(DRV, priv, "Change MTU called - current:%u new:%u\n", (unsigned)dev->if_mtu, (unsigned)new_mtu); if ((new_mtu < MLX4_EN_MIN_MTU) || (new_mtu > priv->max_mtu)) { en_err(priv, "Bad MTU size:%d.\n", new_mtu); return -EPERM; } mutex_lock(&mdev->state_lock); dev->if_mtu = new_mtu; if (dev->if_drv_flags & IFF_DRV_RUNNING) { if (!mdev->device_up) { /* NIC is probably restarting - let watchdog task reset * * the port */ en_dbg(DRV, priv, "Change MTU called with card down!?\n"); } else { mlx4_en_stop_port(dev); err = mlx4_en_start_port(dev); if (err) { en_err(priv, "Failed restarting port:%d\n", priv->port); queue_work(mdev->workqueue, &priv->watchdog_task); } } } mutex_unlock(&mdev->state_lock); return 0; } static int mlx4_en_calc_media(struct mlx4_en_priv *priv) { int trans_type; int active; active = IFM_ETHER; if (priv->last_link_state == MLX4_DEV_EVENT_PORT_DOWN) return (active); /* * [ShaharK] mlx4_en_QUERY_PORT sleeps and cannot be called under a * non-sleepable lock. * I moved it to the periodic mlx4_en_do_get_stats. if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) return (active); */ active |= IFM_FDX; trans_type = priv->port_state.transciver; /* XXX I don't know all of the transceiver values. */ switch (priv->port_state.link_speed) { case 1000: active |= IFM_1000_T; break; case 10000: if (trans_type > 0 && trans_type <= 0xC) active |= IFM_10G_SR; else if (trans_type == 0x80 || trans_type == 0) active |= IFM_10G_CX4; break; case 40000: active |= IFM_40G_CR4; break; } if (priv->prof->tx_pause) active |= IFM_ETH_TXPAUSE; if (priv->prof->rx_pause) active |= IFM_ETH_RXPAUSE; return (active); } static void mlx4_en_media_status(struct ifnet *dev, struct ifmediareq *ifmr) { struct mlx4_en_priv *priv; priv = dev->if_softc; ifmr->ifm_status = IFM_AVALID; if (priv->last_link_state != MLX4_DEV_EVENT_PORT_DOWN) ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active = mlx4_en_calc_media(priv); return; } static int mlx4_en_media_change(struct ifnet *dev) { struct mlx4_en_priv *priv; struct ifmedia *ifm; int rxpause; int txpause; int error; priv = dev->if_softc; ifm = &priv->media; rxpause = txpause = 0; error = 0; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: break; case IFM_10G_SR: case IFM_10G_CX4: case IFM_1000_T: case IFM_40G_CR4: if ((IFM_SUBTYPE(ifm->ifm_media) == IFM_SUBTYPE(mlx4_en_calc_media(priv))) && (ifm->ifm_media & IFM_FDX)) break; /* Fallthrough */ default: printf("%s: Only auto media type\n", if_name(dev)); return (EINVAL); } /* Allow user to set/clear pause */ if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_RXPAUSE) rxpause = 1; if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_TXPAUSE) txpause = 1; if (priv->prof->tx_pause != txpause || priv->prof->rx_pause != rxpause) { priv->prof->tx_pause = txpause; priv->prof->rx_pause = rxpause; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); } return (error); } static int mlx4_en_ioctl(struct ifnet *dev, u_long command, caddr_t data) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; struct ifreq *ifr; int error; int mask; error = 0; mask = 0; priv = dev->if_softc; mdev = priv->mdev; ifr = (struct ifreq *) data; switch (command) { case SIOCSIFMTU: error = -mlx4_en_change_mtu(dev, ifr->ifr_mtu); break; case SIOCSIFFLAGS: mutex_lock(&mdev->state_lock); if (dev->if_flags & IFF_UP) { if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) mlx4_en_start_port(dev); else mlx4_en_set_rx_mode(dev); } else { if (dev->if_drv_flags & IFF_DRV_RUNNING) { mlx4_en_stop_port(dev); if_link_state_change(dev, LINK_STATE_DOWN); } } mutex_unlock(&mdev->state_lock); break; case SIOCADDMULTI: case SIOCDELMULTI: mlx4_en_set_rx_mode(dev); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(dev, ifr, &priv->media, command); break; case SIOCSIFCAP: mutex_lock(&mdev->state_lock); mask = ifr->ifr_reqcap ^ dev->if_capenable; if (mask & IFCAP_HWCSUM) dev->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) dev->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_TSO6) dev->if_capenable ^= IFCAP_TSO6; if (mask & IFCAP_LRO) dev->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) dev->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) dev->if_capenable ^= IFCAP_VLAN_HWFILTER; if (mask & IFCAP_WOL_MAGIC) dev->if_capenable ^= IFCAP_WOL_MAGIC; if (dev->if_drv_flags & IFF_DRV_RUNNING) mlx4_en_start_port(dev); mutex_unlock(&mdev->state_lock); VLAN_CAPABILITIES(dev); break; default: error = ether_ioctl(dev, command, data); break; } return (error); } int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_port_profile *prof) { struct net_device *dev; struct mlx4_en_priv *priv; uint8_t dev_addr[ETHER_ADDR_LEN]; int err; int i; priv = kzalloc(sizeof(*priv), GFP_KERNEL); dev = priv->dev = if_alloc(IFT_ETHER); if (dev == NULL) { en_err(priv, "Net device allocation failed\n"); kfree(priv); return -ENOMEM; } dev->if_softc = priv; if_initname(dev, "mlxen", atomic_fetchadd_int(&mlx4_en_unit, 1)); dev->if_mtu = ETHERMTU; dev->if_baudrate = 1000000000; dev->if_init = mlx4_en_open; dev->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; dev->if_ioctl = mlx4_en_ioctl; dev->if_transmit = mlx4_en_transmit; dev->if_qflush = mlx4_en_qflush; dev->if_snd.ifq_maxlen = prof->tx_ring_size; /* * Initialize driver private data */ priv->counter_index = 0xff; spin_lock_init(&priv->stats_lock); INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode); INIT_WORK(&priv->watchdog_task, mlx4_en_restart); INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); callout_init(&priv->watchdog_timer, 1); #ifdef CONFIG_RFS_ACCEL INIT_LIST_HEAD(&priv->filters); spin_lock_init(&priv->filters_lock); #endif priv->msg_enable = MLX4_EN_MSG_LEVEL; priv->dev = dev; priv->mdev = mdev; priv->ddev = &mdev->pdev->dev; priv->prof = prof; priv->port = port; priv->port_up = false; priv->flags = prof->flags; priv->ctrl_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE | MLX4_WQE_CTRL_SOLICITED); priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up; priv->tx_ring_num = prof->tx_ring_num; priv->tx_ring = kcalloc(MAX_TX_RINGS, sizeof(struct mlx4_en_tx_ring *), GFP_KERNEL); if (!priv->tx_ring) { err = -ENOMEM; goto out; } priv->tx_cq = kcalloc(sizeof(struct mlx4_en_cq *), MAX_TX_RINGS, GFP_KERNEL); if (!priv->tx_cq) { err = -ENOMEM; goto out; } priv->rx_ring_num = prof->rx_ring_num; priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0; priv->mac_index = -1; priv->last_ifq_jiffies = 0; priv->if_counters_rx_errors = 0; priv->if_counters_rx_no_buffer = 0; #ifdef CONFIG_MLX4_EN_DCB if (!mlx4_is_slave(priv->mdev->dev)) { priv->dcbx_cap = DCB_CAP_DCBX_HOST; priv->flags |= MLX4_EN_FLAG_DCB_ENABLED; if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) { dev->dcbnl_ops = &mlx4_en_dcbnl_ops; } else { en_info(priv, "QoS disabled - no HW support\n"); dev->dcbnl_ops = &mlx4_en_dcbnl_pfc_ops; } } #endif for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) INIT_HLIST_HEAD(&priv->mac_hash[i]); /* Query for default mac and max mtu */ priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port]; priv->mac = mdev->dev->caps.def_mac[priv->port]; if (ILLEGAL_MAC(priv->mac)) { #if BITS_PER_LONG == 64 en_err(priv, "Port: %d, invalid mac burned: 0x%lx, quiting\n", priv->port, priv->mac); #elif BITS_PER_LONG == 32 en_err(priv, "Port: %d, invalid mac burned: 0x%llx, quiting\n", priv->port, priv->mac); #endif err = -EINVAL; goto out; } priv->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + DS_SIZE); mlx4_en_sysctl_conf(priv); err = mlx4_en_alloc_resources(priv); if (err) goto out; /* Allocate page for receive rings */ err = mlx4_alloc_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE); if (err) { en_err(priv, "Failed to allocate page for rx qps\n"); goto out; } priv->allocated = 1; /* * Set driver features */ dev->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM; dev->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; dev->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER; dev->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU; dev->if_capabilities |= IFCAP_LRO; if (mdev->LSO_support) dev->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTSO; #if 0 /* set TSO limits so that we don't have to drop TX packets */ dev->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); dev->if_hw_tsomaxsegcount = 16; dev->if_hw_tsomaxsegsize = 65536; /* XXX can do up to 4GByte */ #endif dev->if_capenable = dev->if_capabilities; dev->if_hwassist = 0; if (dev->if_capenable & (IFCAP_TSO4 | IFCAP_TSO6)) dev->if_hwassist |= CSUM_TSO; if (dev->if_capenable & IFCAP_TXCSUM) dev->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP); /* Register for VLAN events */ priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, mlx4_en_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST); priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, mlx4_en_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST); mdev->pndev[priv->port] = dev; priv->last_link_state = MLX4_DEV_EVENT_PORT_DOWN; mlx4_en_set_default_moderation(priv); /* Set default MAC */ for (i = 0; i < ETHER_ADDR_LEN; i++) dev_addr[ETHER_ADDR_LEN - 1 - i] = (u8) (priv->mac >> (8 * i)); ether_ifattach(dev, dev_addr); if_link_state_change(dev, LINK_STATE_DOWN); ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK, mlx4_en_media_change, mlx4_en_media_status); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_1000_T, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_SR, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_CX4, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_40G_CR4, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO); en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); priv->registered = 1; en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); priv->rx_mb_size = dev->if_mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN; err = mlx4_SET_PORT_general(mdev->dev, priv->port, priv->rx_mb_size, prof->tx_pause, prof->tx_ppp, prof->rx_pause, prof->rx_ppp); if (err) { en_err(priv, "Failed setting port general configurations " "for port %d, with error %d\n", priv->port, err); goto out; } /* Init port */ en_warn(priv, "Initializing port\n"); err = mlx4_INIT_PORT(mdev->dev, priv->port); if (err) { en_err(priv, "Failed Initializing port\n"); goto out; } queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) queue_delayed_work(mdev->workqueue, &priv->service_task, SERVICE_TASK_DELAY); return 0; out: mlx4_en_destroy_netdev(dev); return err; } static int mlx4_en_set_ring_size(struct net_device *dev, int rx_size, int tx_size) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int port_up = 0; int err = 0; rx_size = roundup_pow_of_two(rx_size); rx_size = max_t(u32, rx_size, MLX4_EN_MIN_RX_SIZE); rx_size = min_t(u32, rx_size, MLX4_EN_MAX_RX_SIZE); tx_size = roundup_pow_of_two(tx_size); tx_size = max_t(u32, tx_size, MLX4_EN_MIN_TX_SIZE); tx_size = min_t(u32, tx_size, MLX4_EN_MAX_TX_SIZE); if (rx_size == (priv->port_up ? priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size) && tx_size == priv->tx_ring[0]->size) return 0; mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; mlx4_en_stop_port(dev); } mlx4_en_free_resources(priv); priv->prof->tx_ring_size = tx_size; priv->prof->rx_ring_size = rx_size; err = mlx4_en_alloc_resources(priv); if (err) { en_err(priv, "Failed reallocating port resources\n"); goto out; } if (port_up) { err = mlx4_en_start_port(dev); if (err) en_err(priv, "Failed starting port\n"); } out: mutex_unlock(&mdev->state_lock); return err; } static int mlx4_en_set_rx_ring_size(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int size; int error; priv = arg1; size = priv->prof->rx_ring_size; error = sysctl_handle_int(oidp, &size, 0, req); if (error || !req->newptr) return (error); error = -mlx4_en_set_ring_size(priv->dev, size, priv->prof->tx_ring_size); return (error); } static int mlx4_en_set_tx_ring_size(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int size; int error; priv = arg1; size = priv->prof->tx_ring_size; error = sysctl_handle_int(oidp, &size, 0, req); if (error || !req->newptr) return (error); error = -mlx4_en_set_ring_size(priv->dev, priv->prof->rx_ring_size, size); return (error); } static int mlx4_en_set_tx_ppp(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int ppp; int error; priv = arg1; ppp = priv->prof->tx_ppp; error = sysctl_handle_int(oidp, &ppp, 0, req); if (error || !req->newptr) return (error); if (ppp > 0xff || ppp < 0) return (-EINVAL); priv->prof->tx_ppp = ppp; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); return (error); } static int mlx4_en_set_rx_ppp(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; int ppp; int error; int port_up; port_up = 0; priv = arg1; mdev = priv->mdev; ppp = priv->prof->rx_ppp; error = sysctl_handle_int(oidp, &ppp, 0, req); if (error || !req->newptr) return (error); if (ppp > 0xff || ppp < 0) return (-EINVAL); /* See if we have to change the number of tx queues. */ if (!ppp != !priv->prof->rx_ppp) { mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; mlx4_en_stop_port(priv->dev); } mlx4_en_free_resources(priv); priv->prof->rx_ppp = ppp; error = -mlx4_en_alloc_resources(priv); if (error) en_err(priv, "Failed reallocating port resources\n"); if (error == 0 && port_up) { error = -mlx4_en_start_port(priv->dev); if (error) en_err(priv, "Failed starting port\n"); } mutex_unlock(&mdev->state_lock); return (error); } priv->prof->rx_ppp = ppp; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); return (error); } static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv) { struct net_device *dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *node; struct sysctl_oid_list *node_list; struct sysctl_oid *coal; struct sysctl_oid_list *coal_list; + const char *pnameunit; dev = priv->dev; ctx = &priv->conf_ctx; + pnameunit = device_get_nameunit(priv->mdev->pdev->dev.bsddev); sysctl_ctx_init(ctx); priv->sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO, dev->if_xname, CTLFLAG_RD, 0, "mlx4 10gig ethernet"); node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->sysctl), OID_AUTO, "conf", CTLFLAG_RD, NULL, "Configuration"); node_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "msg_enable", CTLFLAG_RW, &priv->msg_enable, 0, "Driver message enable bitfield"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_rings", - CTLTYPE_INT | CTLFLAG_RD, &priv->rx_ring_num, 0, + CTLFLAG_RD, &priv->rx_ring_num, 0, "Number of receive rings"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_rings", - CTLTYPE_INT | CTLFLAG_RD, &priv->tx_ring_num, 0, + CTLFLAG_RD, &priv->tx_ring_num, 0, "Number of transmit rings"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_rx_ring_size, "I", "Receive ring size"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_tx_ring_size, "I", "Transmit ring size"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_ppp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_tx_ppp, "I", "TX Per-priority pause"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_ppp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_rx_ppp, "I", "RX Per-priority pause"); + SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "port_num", + CTLFLAG_RD, &priv->port, 0, + "Port Number"); + SYSCTL_ADD_STRING(ctx, node_list, OID_AUTO, "device_name", + CTLFLAG_RD, __DECONST(void *, pnameunit), 0, + "PCI device name"); /* Add coalescer configuration. */ coal = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, "coalesce", CTLFLAG_RD, NULL, "Interrupt coalesce configuration"); coal_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_low", CTLFLAG_RW, &priv->pkt_rate_low, 0, "Packets per-second for minimum delay"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_low", CTLFLAG_RW, &priv->rx_usecs_low, 0, "Minimum RX delay in micro-seconds"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_high", CTLFLAG_RW, &priv->pkt_rate_high, 0, "Packets per-second for maximum delay"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_high", CTLFLAG_RW, &priv->rx_usecs_high, 0, "Maximum RX delay in micro-seconds"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "sample_interval", CTLFLAG_RW, &priv->sample_interval, 0, "adaptive frequency in units of HZ ticks"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "adaptive_rx_coal", CTLFLAG_RW, &priv->adaptive_rx_coal, 0, "Enable adaptive rx coalescing"); } static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv) { struct net_device *dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *node; struct sysctl_oid_list *node_list; struct sysctl_oid *ring_node; struct sysctl_oid_list *ring_list; struct mlx4_en_tx_ring *tx_ring; struct mlx4_en_rx_ring *rx_ring; char namebuf[128]; int i; dev = priv->dev; ctx = &priv->stat_ctx; sysctl_ctx_init(ctx); node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->sysctl), OID_AUTO, "stat", CTLFLAG_RD, NULL, "Statistics"); node_list = SYSCTL_CHILDREN(node); #ifdef MLX4_EN_PERF_STAT SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_poll", CTLFLAG_RD, &priv->pstats.tx_poll, "TX Poll calls"); SYSCTL_ADD_QUAD(ctx, node_list, OID_AUTO, "tx_pktsz_avg", CTLFLAG_RD, &priv->pstats.tx_pktsz_avg, "TX average packet size"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "inflight_avg", CTLFLAG_RD, &priv->pstats.inflight_avg, "TX average packets in-flight"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_coal_avg", CTLFLAG_RD, &priv->pstats.tx_coal_avg, "TX average coalesced completions"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_coal_avg", CTLFLAG_RD, &priv->pstats.rx_coal_avg, "RX average coalesced completions"); #endif SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tso_packets", CTLFLAG_RD, &priv->port_stats.tso_packets, "TSO packets sent"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "queue_stopped", CTLFLAG_RD, &priv->port_stats.queue_stopped, "Queue full"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "wake_queue", CTLFLAG_RD, &priv->port_stats.wake_queue, "Queue resumed after full"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_timeout", CTLFLAG_RD, &priv->port_stats.tx_timeout, "Transmit timeouts"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_alloc_failed", CTLFLAG_RD, &priv->port_stats.rx_alloc_failed, "RX failed to allocate mbuf"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_chksum_good", CTLFLAG_RD, &priv->port_stats.rx_chksum_good, "RX checksum offload success"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_chksum_none", CTLFLAG_RD, &priv->port_stats.rx_chksum_none, "RX without checksum offload"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_chksum_offload", CTLFLAG_RD, &priv->port_stats.tx_chksum_offload, "TX checksum offloads"); /* Could strdup the names and add in a loop. This is simpler. */ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &priv->pkstats.rx_bytes, "RX Bytes"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &priv->pkstats.rx_packets, "RX packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_multicast_packets", CTLFLAG_RD, &priv->pkstats.rx_multicast_packets, "RX Multicast Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_broadcast_packets", CTLFLAG_RD, &priv->pkstats.rx_broadcast_packets, "RX Broadcast Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_errors", CTLFLAG_RD, &priv->pkstats.rx_errors, "RX Errors"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_dropped", CTLFLAG_RD, &priv->pkstats.rx_dropped, "RX Dropped"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_length_errors", CTLFLAG_RD, &priv->pkstats.rx_length_errors, "RX Length Errors"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_over_errors", CTLFLAG_RD, &priv->pkstats.rx_over_errors, "RX Over Errors"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_crc_errors", CTLFLAG_RD, &priv->pkstats.rx_crc_errors, "RX CRC Errors"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_jabbers", CTLFLAG_RD, &priv->pkstats.rx_jabbers, "RX Jabbers"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_in_range_length_error", CTLFLAG_RD, &priv->pkstats.rx_in_range_length_error, "RX IN_Range Length Error"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_out_range_length_error", CTLFLAG_RD, &priv->pkstats.rx_out_range_length_error, "RX Out Range Length Error"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_lt_64_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_lt_64_bytes_packets, "RX Lt 64 Bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_127_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_127_bytes_packets, "RX 127 bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_255_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_255_bytes_packets, "RX 255 bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_511_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_511_bytes_packets, "RX 511 bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_1023_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1023_bytes_packets, "RX 1023 bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_1518_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1518_bytes_packets, "RX 1518 bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_1522_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1522_bytes_packets, "RX 1522 bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1548_bytes_packets, "RX 1548 bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_gt_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_gt_1548_bytes_packets, "RX Greater Then 1548 bytes Packets"); struct mlx4_en_pkt_stats { unsigned long tx_packets; unsigned long tx_bytes; unsigned long tx_multicast_packets; unsigned long tx_broadcast_packets; unsigned long tx_errors; unsigned long tx_dropped; unsigned long tx_lt_64_bytes_packets; unsigned long tx_127_bytes_packets; unsigned long tx_255_bytes_packets; unsigned long tx_511_bytes_packets; unsigned long tx_1023_bytes_packets; unsigned long tx_1518_bytes_packets; unsigned long tx_1522_bytes_packets; unsigned long tx_1548_bytes_packets; unsigned long tx_gt_1548_bytes_packets; unsigned long rx_prio[NUM_PRIORITIES][NUM_PRIORITY_STATS]; unsigned long tx_prio[NUM_PRIORITIES][NUM_PRIORITY_STATS]; #define NUM_PKT_STATS 72 }; SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &priv->pkstats.tx_packets, "TX packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, &priv->pkstats.tx_packets, "TX Bytes"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_multicast_packets", CTLFLAG_RD, &priv->pkstats.tx_multicast_packets, "TX Multicast Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_broadcast_packets", CTLFLAG_RD, &priv->pkstats.tx_broadcast_packets, "TX Broadcast Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_errors", CTLFLAG_RD, &priv->pkstats.tx_errors, "TX Errors"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_dropped", CTLFLAG_RD, &priv->pkstats.tx_dropped, "TX Dropped"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_lt_64_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_lt_64_bytes_packets, "TX Less Then 64 Bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_127_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_127_bytes_packets, "TX 127 Bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_255_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_255_bytes_packets, "TX 255 Bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_511_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_511_bytes_packets, "TX 511 Bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_1023_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1023_bytes_packets, "TX 1023 Bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_1518_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1518_bytes_packets, "TX 1518 Bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_1522_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1522_bytes_packets, "TX 1522 Bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1548_bytes_packets, "TX 1548 Bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_gt_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_gt_1548_bytes_packets, "TX Greater Then 1548 Bytes Packets"); for (i = 0; i < priv->tx_ring_num; i++) { tx_ring = priv->tx_ring[i]; snprintf(namebuf, sizeof(namebuf), "tx_ring%d", i); ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "TX Ring"); ring_list = SYSCTL_CHILDREN(ring_node); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "packets", CTLFLAG_RD, &tx_ring->packets, "TX packets"); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "bytes", CTLFLAG_RD, &tx_ring->bytes, "TX bytes"); } for (i = 0; i < priv->rx_ring_num; i++) { rx_ring = priv->rx_ring[i]; snprintf(namebuf, sizeof(namebuf), "rx_ring%d", i); ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "RX Ring"); ring_list = SYSCTL_CHILDREN(ring_node); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "packets", CTLFLAG_RD, &rx_ring->packets, "RX packets"); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "bytes", CTLFLAG_RD, &rx_ring->bytes, "RX bytes"); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "error", CTLFLAG_RD, &rx_ring->errors, "RX soft errors"); } } Index: stable/10/sys/ofed/drivers/net/mlx4/en_selftest.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/en_selftest.c (revision 273245) +++ stable/10/sys/ofed/drivers/net/mlx4/en_selftest.c (revision 273246) @@ -1,179 +1,178 @@ /* * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include -#include #include #include #include #include "mlx4_en.h" static int mlx4_en_test_registers(struct mlx4_en_priv *priv) { return mlx4_cmd(priv->mdev->dev, 0, 0, 0, MLX4_CMD_HW_HEALTH_CHECK, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } static int mlx4_en_test_loopback_xmit(struct mlx4_en_priv *priv) { struct sk_buff *skb; struct ethhdr *ethh; unsigned char *packet; unsigned int packet_size = MLX4_LOOPBACK_TEST_PAYLOAD; unsigned int i; int err; /* build the pkt before xmit */ skb = netdev_alloc_skb(priv->dev, MLX4_LOOPBACK_TEST_PAYLOAD + ETH_HLEN + NET_IP_ALIGN); if (!skb) { en_err(priv, "-LOOPBACK_TEST_XMIT- failed to create skb for xmit\n"); return -ENOMEM; } skb_reserve(skb, NET_IP_ALIGN); ethh = (struct ethhdr *)skb_put(skb, sizeof(struct ethhdr)); packet = (unsigned char *)skb_put(skb, packet_size); memcpy(ethh->h_dest, priv->dev->dev_addr, ETH_ALEN); memset(ethh->h_source, 0, ETH_ALEN); ethh->h_proto = htons(ETH_P_ARP); skb_set_mac_header(skb, 0); for (i = 0; i < packet_size; ++i) /* fill our packet */ packet[i] = (unsigned char)(i & 0xff); /* xmit the pkt */ err = mlx4_en_xmit(skb, priv->dev); return err; } static int mlx4_en_test_loopback(struct mlx4_en_priv *priv) { u32 loopback_ok = 0; int i; priv->loopback_ok = 0; priv->validate_loopback = 1; mlx4_en_update_loopback_state(priv->dev, priv->dev->features); /* xmit */ if (mlx4_en_test_loopback_xmit(priv)) { en_err(priv, "Transmitting loopback packet failed\n"); goto mlx4_en_test_loopback_exit; } /* polling for result */ for (i = 0; i < MLX4_EN_LOOPBACK_RETRIES; ++i) { msleep(MLX4_EN_LOOPBACK_TIMEOUT); if (priv->loopback_ok) { loopback_ok = 1; break; } } if (!loopback_ok) en_err(priv, "Loopback packet didn't arrive\n"); mlx4_en_test_loopback_exit: priv->validate_loopback = 0; mlx4_en_update_loopback_state(priv->dev, priv->dev->features); return !loopback_ok; } static int mlx4_en_test_link(struct mlx4_en_priv *priv) { if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) return -ENOMEM; if (priv->port_state.link_state == 1) return 0; else return 1; } static int mlx4_en_test_speed(struct mlx4_en_priv *priv) { if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) return -ENOMEM; /* The device supports 1G, 10G and 40G speed */ if (priv->port_state.link_speed != MLX4_EN_LINK_SPEED_1G && priv->port_state.link_speed != MLX4_EN_LINK_SPEED_10G && priv->port_state.link_speed != MLX4_EN_LINK_SPEED_40G) return priv->port_state.link_speed; return 0; } void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int i, carrier_ok; memset(buf, 0, sizeof(u64) * MLX4_EN_NUM_SELF_TEST); if (*flags & ETH_TEST_FL_OFFLINE) { /* disable the interface */ carrier_ok = netif_carrier_ok(dev); netif_carrier_off(dev); /* Wait until all tx queues are empty. * there should not be any additional incoming traffic * since we turned the carrier off */ msleep(200); if (priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) { buf[3] = mlx4_en_test_registers(priv); if (priv->port_up) buf[4] = mlx4_en_test_loopback(priv); } if (carrier_ok) netif_carrier_on(dev); } buf[0] = mlx4_test_interrupts(mdev->dev); buf[1] = mlx4_en_test_link(priv); buf[2] = mlx4_en_test_speed(priv); for (i = 0; i < MLX4_EN_NUM_SELF_TEST; i++) { if (buf[i]) *flags |= ETH_TEST_FL_FAILED; } } Index: stable/10/sys/ofed/drivers/net/mlx4/en_tx.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/en_tx.c (revision 273245) +++ stable/10/sys/ofed/drivers/net/mlx4/en_tx.c (revision 273246) @@ -1,1115 +1,1115 @@ /* * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ -#include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mlx4_en.h" #include "utils.h" enum { MAX_INLINE = 104, /* 128 - 16 - 4 - 4 */ MAX_BF = 256, MIN_PKT_LEN = 17, }; static int inline_thold __read_mostly = MAX_INLINE; module_param_named(inline_thold, inline_thold, uint, 0444); MODULE_PARM_DESC(inline_thold, "threshold for using inline data"); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, u32 size, u16 stride, int node, int queue_idx) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_ring *ring; int tmp; int err; ring = kzalloc_node(sizeof(struct mlx4_en_tx_ring), GFP_KERNEL, node); if (!ring) { ring = kzalloc(sizeof(struct mlx4_en_tx_ring), GFP_KERNEL); if (!ring) { en_err(priv, "Failed allocating TX ring\n"); return -ENOMEM; } } ring->size = size; ring->size_mask = size - 1; ring->stride = stride; ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS; ring->inline_thold = min(inline_thold, MAX_INLINE); mtx_init(&ring->tx_lock.m, "mlx4 tx", NULL, MTX_DEF); mtx_init(&ring->comp_lock.m, "mlx4 comp", NULL, MTX_DEF); /* Allocate the buf ring */ ring->br = buf_ring_alloc(MLX4_EN_DEF_TX_QUEUE_SIZE, M_DEVBUF, M_WAITOK, &ring->tx_lock.m); if (ring->br == NULL) { en_err(priv, "Failed allocating tx_info ring\n"); return -ENOMEM; } tmp = size * sizeof(struct mlx4_en_tx_info); ring->tx_info = vmalloc_node(tmp, node); if (!ring->tx_info) { ring->tx_info = vmalloc(tmp); if (!ring->tx_info) { err = -ENOMEM; goto err_ring; } } en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n", ring->tx_info, tmp); ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, GFP_KERNEL, node); if (!ring->bounce_buf) { ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL); if (!ring->bounce_buf) { err = -ENOMEM; goto err_info; } } ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE); /* Allocate HW buffers on provided NUMA node */ err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); if (err) { en_err(priv, "Failed allocating hwq resources\n"); goto err_bounce; } err = mlx4_en_map_buffer(&ring->wqres.buf); if (err) { en_err(priv, "Failed to map TX buffer\n"); goto err_hwq_res; } ring->buf = ring->wqres.buf.direct.buf; en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d " "buf_size:%d dma:%llx\n", ring, ring->buf, ring->size, ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map); err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn, MLX4_RESERVE_BF_QP); if (err) { en_err(priv, "failed reserving qp for TX ring\n"); goto err_map; } err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp); if (err) { en_err(priv, "Failed allocating qp %d\n", ring->qpn); goto err_reserve; } ring->qp.event = mlx4_en_sqp_event; err = mlx4_bf_alloc(mdev->dev, &ring->bf, node); if (err) { en_dbg(DRV, priv, "working without blueflame (%d)", err); ring->bf.uar = &mdev->priv_uar; ring->bf.uar->map = mdev->uar_map; ring->bf_enabled = false; } else ring->bf_enabled = true; ring->queue_index = queue_idx; if (queue_idx < priv->num_tx_rings_p_up ) CPU_SET(queue_idx, &ring->affinity_mask); *pring = ring; return 0; err_reserve: mlx4_qp_release_range(mdev->dev, ring->qpn, 1); err_map: mlx4_en_unmap_buffer(&ring->wqres.buf); err_hwq_res: mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); err_bounce: kfree(ring->bounce_buf); err_info: vfree(ring->tx_info); err_ring: buf_ring_free(ring->br, M_DEVBUF); kfree(ring); return err; } void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_ring *ring = *pring; en_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn); buf_ring_free(ring->br, M_DEVBUF); if (ring->bf_enabled) mlx4_bf_free(mdev->dev, &ring->bf); mlx4_qp_remove(mdev->dev, &ring->qp); mlx4_qp_free(mdev->dev, &ring->qp); mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1); mlx4_en_unmap_buffer(&ring->wqres.buf); mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); kfree(ring->bounce_buf); vfree(ring->tx_info); mtx_destroy(&ring->tx_lock.m); mtx_destroy(&ring->comp_lock.m); kfree(ring); *pring = NULL; } int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int cq, int user_prio) { struct mlx4_en_dev *mdev = priv->mdev; int err; ring->cqn = cq; ring->prod = 0; ring->cons = 0xffffffff; ring->last_nr_txbb = 1; ring->poll_cnt = 0; ring->blocked = 0; memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info)); memset(ring->buf, 0, ring->buf_size); ring->qp_state = MLX4_QP_STATE_RST; ring->doorbell_qpn = ring->qp.qpn << 8; mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn, ring->cqn, user_prio, &ring->context); if (ring->bf_enabled) ring->context.usr_page = cpu_to_be32(ring->bf.uar->index); err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context, &ring->qp, &ring->qp_state); return err; } void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring) { struct mlx4_en_dev *mdev = priv->mdev; mlx4_qp_modify(mdev->dev, NULL, ring->qp_state, MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp); } static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int index, u8 owner) { struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; void *end = ring->buf + ring->buf_size; __be32 *ptr = (__be32 *)tx_desc; __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); int i; /* Optimize the common case when there are no wraparounds */ if (likely((void *)tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) /* Stamp the freed descriptor */ for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { *ptr = stamp; ptr += STAMP_DWORDS; } else /* Stamp the freed descriptor */ for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { *ptr = stamp; ptr += STAMP_DWORDS; if ((void *)ptr >= end) { ptr = ring->buf; stamp ^= cpu_to_be32(0x80000000); } } } static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int index, u8 owner, u64 timestamp) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset; struct mbuf *mb = tx_info->mb; void *end = ring->buf + ring->buf_size; int frags = tx_info->nr_segs;; int i; /* Optimize the common case when there are no wraparounds */ if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { if (!tx_info->inl) { if (tx_info->linear) { dma_unmap_single(priv->ddev, (dma_addr_t) be64_to_cpu(data->addr), be32_to_cpu(data->byte_count), PCI_DMA_TODEVICE); ++data; } for (i = 0; i < frags; i++) { pci_unmap_single(mdev->pdev, (dma_addr_t) be64_to_cpu(data[i].addr), data[i].byte_count, PCI_DMA_TODEVICE); } } } else { if (!tx_info->inl) { if ((void *) data >= end) { data = ring->buf + ((void *)data - end); } if (tx_info->linear) { dma_unmap_single(priv->ddev, (dma_addr_t) be64_to_cpu(data->addr), be32_to_cpu(data->byte_count), PCI_DMA_TODEVICE); ++data; } for (i = 0; i < frags; i++) { /* Check for wraparound before unmapping */ if ((void *) data >= end) data = ring->buf; pci_unmap_single(mdev->pdev, (dma_addr_t) be64_to_cpu(data->addr), data->byte_count, PCI_DMA_TODEVICE); ++data; } } } /* Send a copy of the frame to the BPF listener */ if (priv->dev && priv->dev->if_bpf) ETHER_BPF_MTAP(priv->dev, mb); m_freem(mb); return tx_info->nr_txbb; } int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) { struct mlx4_en_priv *priv = netdev_priv(dev); int cnt = 0; /* Skip last polled descriptor */ ring->cons += ring->last_nr_txbb; en_dbg(DRV, priv, "Freeing Tx buf - cons:0x%x prod:0x%x\n", ring->cons, ring->prod); if ((u32) (ring->prod - ring->cons) > ring->size) { en_warn(priv, "Tx consumer passed producer!\n"); return 0; } while (ring->cons != ring->prod) { ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring, ring->cons & ring->size_mask, !!(ring->cons & ring->size), 0); ring->cons += ring->last_nr_txbb; cnt++; } if (cnt) en_dbg(DRV, priv, "Freed %d uncompleted tx descriptors\n", cnt); return cnt; } static int mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; struct mlx4_cqe *cqe; u16 index; u16 new_index, ring_index, stamp_index; u32 txbbs_skipped = 0; u32 txbbs_stamp = 0; u32 cons_index = mcq->cons_index; int size = cq->size; u32 size_mask = ring->size_mask; struct mlx4_cqe *buf = cq->buf; u32 packets = 0; u32 bytes = 0; int factor = priv->cqe_factor; u64 timestamp = 0; int done = 0; if (!priv->port_up) return 0; index = cons_index & size_mask; cqe = &buf[(index << factor) + factor]; ring_index = ring->cons & size_mask; stamp_index = ring_index; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cons_index & size)) { /* * make sure we read the CQE after we read the * ownership bit */ rmb(); if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR)) { en_err(priv, "CQE completed in error - vendor syndrom: 0x%x syndrom: 0x%x\n", ((struct mlx4_err_cqe *)cqe)-> vendor_err_syndrome, ((struct mlx4_err_cqe *)cqe)->syndrome); } /* Skip over last polled CQE */ new_index = be16_to_cpu(cqe->wqe_index) & size_mask; do { txbbs_skipped += ring->last_nr_txbb; ring_index = (ring_index + ring->last_nr_txbb) & size_mask; /* free next descriptor */ ring->last_nr_txbb = mlx4_en_free_tx_desc( priv, ring, ring_index, !!((ring->cons + txbbs_skipped) & ring->size), timestamp); mlx4_en_stamp_wqe(priv, ring, stamp_index, !!((ring->cons + txbbs_stamp) & ring->size)); stamp_index = ring_index; txbbs_stamp = txbbs_skipped; packets++; bytes += ring->tx_info[ring_index].nr_bytes; } while (ring_index != new_index); ++cons_index; index = cons_index & size_mask; cqe = &buf[(index << factor) + factor]; } /* * To prevent CQ overflow we first update CQ consumer and only then * the ring consumer. */ mcq->cons_index = cons_index; mlx4_cq_set_ci(mcq); wmb(); ring->cons += txbbs_skipped; /* Wakeup Tx queue if it was stopped and ring is not full */ if (unlikely(ring->blocked) && (ring->prod - ring->cons) <= ring->full_size) { ring->blocked = 0; if (atomic_fetchadd_int(&priv->blocked, -1) == 1) atomic_clear_int(&dev->if_drv_flags ,IFF_DRV_OACTIVE); ring->wake_queue++; priv->port_stats.wake_queue++; } return done; } void mlx4_en_tx_irq(struct mlx4_cq *mcq) { struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); struct mlx4_en_priv *priv = netdev_priv(cq->dev); struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; if (!spin_trylock(&ring->comp_lock)) return; mlx4_en_process_tx_cq(cq->dev, cq); mod_timer(&cq->timer, jiffies + 1); spin_unlock(&ring->comp_lock); } void mlx4_en_poll_tx_cq(unsigned long data) { struct mlx4_en_cq *cq = (struct mlx4_en_cq *) data; struct mlx4_en_priv *priv = netdev_priv(cq->dev); struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; u32 inflight; INC_PERF_COUNTER(priv->pstats.tx_poll); if (!spin_trylock(&ring->comp_lock)) { mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); return; } mlx4_en_process_tx_cq(cq->dev, cq); inflight = (u32) (ring->prod - ring->cons - ring->last_nr_txbb); /* If there are still packets in flight and the timer has not already * been scheduled by the Tx routine then schedule it here to guarantee * completion processing of these packets */ if (inflight && priv->port_up) mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); spin_unlock(&ring->comp_lock); } static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, u32 index, unsigned int desc_size) { u32 copy = (ring->size - index) * TXBB_SIZE; int i; for (i = desc_size - copy - 4; i >= 0; i -= 4) { if ((i & (TXBB_SIZE - 1)) == 0) wmb(); *((u32 *) (ring->buf + i)) = *((u32 *) (ring->bounce_buf + copy + i)); } for (i = copy - 4; i >= 4 ; i -= 4) { if ((i & (TXBB_SIZE - 1)) == 0) wmb(); *((u32 *) (ring->buf + index * TXBB_SIZE + i)) = *((u32 *) (ring->bounce_buf + i)); } /* Return real descriptor location */ return ring->buf + index * TXBB_SIZE; } static inline void mlx4_en_xmit_poll(struct mlx4_en_priv *priv, int tx_ind) { struct mlx4_en_cq *cq = priv->tx_cq[tx_ind]; struct mlx4_en_tx_ring *ring = priv->tx_ring[tx_ind]; /* If we don't have a pending timer, set one up to catch our recent post in case the interface becomes idle */ if (!timer_pending(&cq->timer)) mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); /* Poll the CQ every mlx4_en_TX_MODER_POLL packets */ if ((++ring->poll_cnt & (MLX4_EN_TX_POLL_MODER - 1)) == 0) if (spin_trylock(&ring->comp_lock)) { mlx4_en_process_tx_cq(priv->dev, cq); spin_unlock(&ring->comp_lock); } } static int is_inline(struct mbuf *mb, int thold) { if (thold && mb->m_pkthdr.len <= thold && (mb->m_pkthdr.csum_flags & CSUM_TSO) == 0) return 1; return 0; } static int inline_size(struct mbuf *mb) { int len; len = mb->m_pkthdr.len; if (len + CTRL_SIZE + sizeof(struct mlx4_wqe_inline_seg) <= MLX4_INLINE_ALIGN) return ALIGN(len + CTRL_SIZE + sizeof(struct mlx4_wqe_inline_seg), 16); else return ALIGN(len + CTRL_SIZE + 2 * sizeof(struct mlx4_wqe_inline_seg), 16); } static int get_head_size(struct mbuf *mb) { struct ether_vlan_header *eh; struct tcphdr *th; struct ip *ip; int ip_hlen, tcp_hlen; struct ip6_hdr *ip6; uint16_t eth_type; int eth_hdr_len; eh = mtod(mb, struct ether_vlan_header *); if (mb->m_len < ETHER_HDR_LEN) return (0); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { eth_type = ntohs(eh->evl_proto); eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { eth_type = ntohs(eh->evl_encap_proto); eth_hdr_len = ETHER_HDR_LEN; } if (mb->m_len < eth_hdr_len) return (0); switch (eth_type) { case ETHERTYPE_IP: ip = (struct ip *)(mb->m_data + eth_hdr_len); if (mb->m_len < eth_hdr_len + sizeof(*ip)) return (0); if (ip->ip_p != IPPROTO_TCP) return (0); ip_hlen = ip->ip_hl << 2; eth_hdr_len += ip_hlen; break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mb->m_data + eth_hdr_len); if (mb->m_len < eth_hdr_len + sizeof(*ip6)) return (0); if (ip6->ip6_nxt != IPPROTO_TCP) return (0); eth_hdr_len += sizeof(*ip6); break; default: return (0); } if (mb->m_len < eth_hdr_len + sizeof(*th)) return (0); th = (struct tcphdr *)(mb->m_data + eth_hdr_len); tcp_hlen = th->th_off << 2; eth_hdr_len += tcp_hlen; if (mb->m_len < eth_hdr_len) return (0); return (eth_hdr_len); } static int get_real_size(struct mbuf *mb, struct net_device *dev, int *p_n_segs, int *lso_header_size, int inl) { struct mbuf *m; int nr_segs = 0; for (m = mb; m != NULL; m = m->m_next) if (m->m_len) nr_segs++; if (mb->m_pkthdr.csum_flags & CSUM_TSO) { *lso_header_size = get_head_size(mb); if (*lso_header_size) { if (mb->m_len == *lso_header_size) nr_segs--; *p_n_segs = nr_segs; return CTRL_SIZE + nr_segs * DS_SIZE + ALIGN(*lso_header_size + 4, DS_SIZE); } } else *lso_header_size = 0; *p_n_segs = nr_segs; if (inl) return inline_size(mb); return (CTRL_SIZE + nr_segs * DS_SIZE); } static struct mbuf *mb_copy(struct mbuf *mb, int *offp, char *data, int len) { int bytes; int off; off = *offp; while (len) { bytes = min(mb->m_len - off, len); if (bytes) memcpy(data, mb->m_data + off, bytes); len -= bytes; data += bytes; off += bytes; if (off == mb->m_len) { off = 0; mb = mb->m_next; } } *offp = off; return (mb); } static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct mbuf *mb, int real_size, u16 *vlan_tag, int tx_ind) { struct mlx4_wqe_inline_seg *inl = &tx_desc->inl; int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl; int len; int off; off = 0; len = mb->m_pkthdr.len; if (len <= spc) { inl->byte_count = cpu_to_be32(1 << 31 | (max_t(typeof(len), len, MIN_PKT_LEN))); mb_copy(mb, &off, (void *)(inl + 1), len); if (len < MIN_PKT_LEN) memset(((void *)(inl + 1)) + len, 0, MIN_PKT_LEN - len); } else { inl->byte_count = cpu_to_be32(1 << 31 | spc); mb = mb_copy(mb, &off, (void *)(inl + 1), spc); inl = (void *) (inl + 1) + spc; mb_copy(mb, &off, (void *)(inl + 1), len - spc); wmb(); inl->byte_count = cpu_to_be32(1 << 31 | (len - spc)); } tx_desc->ctrl.vlan_tag = cpu_to_be16(*vlan_tag); tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * !!(*vlan_tag); tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f; } static unsigned long hashrandom; static void hashrandom_init(void *arg) { hashrandom = random(); } SYSINIT(hashrandom_init, SI_SUB_KLD, SI_ORDER_SECOND, &hashrandom_init, NULL); u16 mlx4_en_select_queue(struct net_device *dev, struct mbuf *mb) { struct mlx4_en_priv *priv = netdev_priv(dev); u32 rings_p_up = priv->num_tx_rings_p_up; u32 vlan_tag = 0; u32 up = 0; u32 queue_index; /* Obtain VLAN information if present */ if (mb->m_flags & M_VLANTAG) { vlan_tag = mb->m_pkthdr.ether_vtag; up = (vlan_tag >> 13); } /* hash mbuf */ queue_index = mlx4_en_hashmbuf(MLX4_F_HASHL3 | MLX4_F_HASHL4, mb, hashrandom); return ((queue_index % rings_p_up) + (up * rings_p_up)); } static void mlx4_bf_copy(void __iomem *dst, unsigned long *src, unsigned bytecnt) { __iowrite64_copy(dst, src, bytecnt / 8); } static u64 mlx4_en_mac_to_u64(u8 *addr) { u64 mac = 0; int i; for (i = 0; i < ETHER_ADDR_LEN; i++) { mac <<= 8; mac |= addr[i]; } return mac; } static int mlx4_en_xmit(struct net_device *dev, int tx_ind, struct mbuf **mbp) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_ring *ring; struct mlx4_en_cq *cq; struct mlx4_en_tx_desc *tx_desc; struct mlx4_wqe_data_seg *data; struct mlx4_en_tx_info *tx_info; struct mbuf *m; int nr_txbb; int nr_segs; int desc_size; int real_size; dma_addr_t dma; u32 index, bf_index, ring_size; __be32 op_own; u16 vlan_tag = 0; int i; int lso_header_size; bool bounce = false; bool inl = false; struct mbuf *mb; mb = *mbp; int defrag = 1; if (!priv->port_up) goto tx_drop; ring = priv->tx_ring[tx_ind]; ring_size = ring->size; inl = is_inline(mb, ring->inline_thold); retry: real_size = get_real_size(mb, dev, &nr_segs, &lso_header_size, inl); if (unlikely(!real_size)) goto tx_drop; /* Align descriptor to TXBB size */ desc_size = ALIGN(real_size, TXBB_SIZE); nr_txbb = desc_size / TXBB_SIZE; if (unlikely(nr_txbb > MAX_DESC_TXBBS)) { if (defrag) { mb = m_defrag(*mbp, M_NOWAIT); if (mb == NULL) { mb = *mbp; goto tx_drop; } *mbp = mb; defrag = 0; goto retry; } en_warn(priv, "Oversized header or SG list\n"); goto tx_drop; } /* Obtain VLAN information if present */ if (mb->m_flags & M_VLANTAG) { vlan_tag = mb->m_pkthdr.ether_vtag; } /* Check available TXBBs and 2K spare for prefetch * Even if netif_tx_stop_queue() will be called * driver will send current packet to ensure * that at least one completion will be issued after * stopping the queue */ if (unlikely((int)(ring->prod - ring->cons) > ring->full_size)) { /* every full Tx ring stops queue */ if (ring->blocked == 0) atomic_add_int(&priv->blocked, 1); /* Set HW-queue-is-full flag */ atomic_set_int(&dev->if_drv_flags, IFF_DRV_OACTIVE); ring->blocked = 1; priv->port_stats.queue_stopped++; ring->queue_stopped++; /* Use interrupts to find out when queue opened */ cq = priv->tx_cq[tx_ind]; mlx4_en_arm_cq(priv, cq); return EBUSY; } /* Track current inflight packets for performance analysis */ AVG_PERF_COUNTER(priv->pstats.inflight_avg, (u32) (ring->prod - ring->cons - 1)); /* Packet is good - grab an index and transmit it */ index = ring->prod & ring->size_mask; bf_index = ring->prod; /* See if we have enough space for whole descriptor TXBB for setting * SW ownership on next descriptor; if not, use a bounce buffer. */ if (likely(index + nr_txbb <= ring_size)) tx_desc = ring->buf + index * TXBB_SIZE; else { tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf; bounce = true; } /* Save mb in tx_info ring */ tx_info = &ring->tx_info[index]; tx_info->mb = mb; tx_info->nr_txbb = nr_txbb; tx_info->nr_segs = nr_segs; if (lso_header_size) { memcpy(tx_desc->lso.header, mb->m_data, lso_header_size); data = ((void *)&tx_desc->lso + ALIGN(lso_header_size + 4, DS_SIZE)); /* lso header is part of m_data. * need to omit when mapping DMA */ mb->m_data += lso_header_size; mb->m_len -= lso_header_size; } else data = &tx_desc->data; /* valid only for none inline segments */ tx_info->data_offset = (void *)data - (void *)tx_desc; if (inl) { tx_info->inl = 1; } else { for (i = 0, m = mb; i < nr_segs; i++, m = m->m_next) { if (m->m_len == 0) { i--; continue; } dma = pci_map_single(mdev->dev->pdev, m->m_data, m->m_len, PCI_DMA_TODEVICE); data->addr = cpu_to_be64(dma); data->lkey = cpu_to_be32(mdev->mr.key); wmb(); data->byte_count = cpu_to_be32(m->m_len); data++; } if (lso_header_size) { mb->m_data -= lso_header_size; mb->m_len += lso_header_size; } tx_info->inl = 0; } /* Prepare ctrl segement apart opcode+ownership, which depends on * whether LSO is used */ tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag); tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * !!vlan_tag; tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f; tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO | CSUM_TCP | CSUM_UDP | CSUM_TCP_IPV6 | CSUM_UDP_IPV6)) { if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM); if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_TCP_UDP_CSUM); priv->port_stats.tx_chksum_offload++; ring->tx_csum++; } if (unlikely(priv->validate_loopback)) { /* Copy dst mac address to wqe */ struct ether_header *ethh; u64 mac; u32 mac_l, mac_h; ethh = mtod(mb, struct ether_header *); mac = mlx4_en_mac_to_u64(ethh->ether_dhost); if (mac) { mac_h = (u32) ((mac & 0xffff00000000ULL) >> 16); mac_l = (u32) (mac & 0xffffffff); tx_desc->ctrl.srcrb_flags |= cpu_to_be32(mac_h); tx_desc->ctrl.imm = cpu_to_be32(mac_l); } } /* Handle LSO (TSO) packets */ if (lso_header_size) { int segsz; /* Mark opcode as LSO */ op_own = cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6)) | ((ring->prod & ring_size) ? cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); /* Fill in the LSO prefix */ tx_desc->lso.mss_hdr_size = cpu_to_be32( mb->m_pkthdr.tso_segsz << 16 | lso_header_size); priv->port_stats.tso_packets++; segsz = mb->m_pkthdr.tso_segsz; i = ((mb->m_pkthdr.len - lso_header_size + segsz - 1) / segsz); tx_info->nr_bytes= mb->m_pkthdr.len + (i - 1) * lso_header_size; ring->packets += i; } else { /* Normal (Non LSO) packet */ op_own = cpu_to_be32(MLX4_OPCODE_SEND) | ((ring->prod & ring_size) ? cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); tx_info->nr_bytes = max(mb->m_pkthdr.len, (unsigned int)ETHER_MIN_LEN - ETHER_CRC_LEN); ring->packets++; } ring->bytes += tx_info->nr_bytes; AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, mb->m_pkthdr.len); if (tx_info->inl) { build_inline_wqe(tx_desc, mb, real_size, &vlan_tag, tx_ind); tx_info->inl = 1; } ring->prod += nr_txbb; /* If we used a bounce buffer then copy descriptor back into place */ if (unlikely(bounce)) tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size); if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && !vlan_tag) { *(__be32 *) (&tx_desc->ctrl.vlan_tag) |= cpu_to_be32(ring->doorbell_qpn); op_own |= htonl((bf_index & 0xffff) << 8); /* Ensure new descirptor hits memory * before setting ownership of this descriptor to HW */ wmb(); tx_desc->ctrl.owner_opcode = op_own; wmb(); mlx4_bf_copy(ring->bf.reg + ring->bf.offset, (unsigned long *) &tx_desc->ctrl, desc_size); wmb(); ring->bf.offset ^= ring->bf.buf_size; } else { /* Ensure new descirptor hits memory * before setting ownership of this descriptor to HW */ wmb(); tx_desc->ctrl.owner_opcode = op_own; wmb(); writel(cpu_to_be32(ring->doorbell_qpn), ring->bf.uar->map + MLX4_SEND_DOORBELL); } return 0; tx_drop: *mbp = NULL; m_freem(mb); return EINVAL; } static int mlx4_en_transmit_locked(struct ifnet *dev, int tx_ind, struct mbuf *m) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_tx_ring *ring; struct mbuf *next; int enqueued, err = 0; ring = priv->tx_ring[tx_ind]; if ((dev->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || priv->port_up == 0) { if (m != NULL) err = drbr_enqueue(dev, ring->br, m); return (err); } enqueued = 0; if (m != NULL) { if ((err = drbr_enqueue(dev, ring->br, m)) != 0) return (err); } /* Process the queue */ while ((next = drbr_peek(dev, ring->br)) != NULL) { if ((err = mlx4_en_xmit(dev, tx_ind, &next)) != 0) { if (next == NULL) { drbr_advance(dev, ring->br); } else { drbr_putback(dev, ring->br, next); } break; } drbr_advance(dev, ring->br); enqueued++; dev->if_obytes += next->m_pkthdr.len; if (next->m_flags & M_MCAST) dev->if_omcasts++; if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) break; } if (enqueued > 0) ring->watchdog_time = ticks; return (err); } void mlx4_en_tx_que(void *context, int pending) { struct mlx4_en_tx_ring *ring; struct mlx4_en_priv *priv; struct net_device *dev; struct mlx4_en_cq *cq; int tx_ind; cq = context; dev = cq->dev; priv = dev->if_softc; tx_ind = cq->ring; ring = priv->tx_ring[tx_ind]; if (dev->if_drv_flags & IFF_DRV_RUNNING) { mlx4_en_xmit_poll(priv, tx_ind); spin_lock(&ring->tx_lock); if (!drbr_empty(dev, ring->br)) mlx4_en_transmit_locked(dev, tx_ind, NULL); spin_unlock(&ring->tx_lock); } } int mlx4_en_transmit(struct ifnet *dev, struct mbuf *m) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_tx_ring *ring; struct mlx4_en_cq *cq; int i = 0, err = 0; /* Which queue to use */ if ((m->m_flags & (M_FLOWID | M_VLANTAG)) == M_FLOWID) { i = m->m_pkthdr.flowid % (priv->tx_ring_num - 1); } else { i = mlx4_en_select_queue(dev, m); } ring = priv->tx_ring[i]; if (spin_trylock(&ring->tx_lock)) { err = mlx4_en_transmit_locked(dev, i, m); spin_unlock(&ring->tx_lock); /* Poll CQ here */ mlx4_en_xmit_poll(priv, i); } else { err = drbr_enqueue(dev, ring->br, m); cq = priv->tx_cq[i]; taskqueue_enqueue(cq->tq, &cq->cq_task); } return (err); } /* * Flush ring buffers. */ void mlx4_en_qflush(struct ifnet *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_tx_ring *ring; struct mbuf *m; for (int i = 0; i < priv->tx_ring_num; i++) { ring = priv->tx_ring[i]; spin_lock(&ring->tx_lock); while ((m = buf_ring_dequeue_sc(ring->br)) != NULL) m_freem(m); spin_unlock(&ring->tx_lock); } if_qflush(dev); } Index: stable/10/sys/ofed/drivers/net/mlx4/pd.c =================================================================== --- stable/10/sys/ofed/drivers/net/mlx4/pd.c (revision 273245) +++ stable/10/sys/ofed/drivers/net/mlx4/pd.c (revision 273246) @@ -1,302 +1,302 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2005, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include -#include +#include #include "mlx4.h" #include "icm.h" enum { MLX4_NUM_RESERVED_UARS = 8 }; int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn) { struct mlx4_priv *priv = mlx4_priv(dev); *pdn = mlx4_bitmap_alloc(&priv->pd_bitmap); if (*pdn == -1) return -ENOMEM; return 0; } EXPORT_SYMBOL_GPL(mlx4_pd_alloc); void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn) { mlx4_bitmap_free(&mlx4_priv(dev)->pd_bitmap, pdn, MLX4_USE_RR); } EXPORT_SYMBOL_GPL(mlx4_pd_free); int __mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn) { struct mlx4_priv *priv = mlx4_priv(dev); *xrcdn = mlx4_bitmap_alloc(&priv->xrcd_bitmap); if (*xrcdn == -1) return -ENOMEM; return 0; } int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn) { u64 out_param; int err; if (mlx4_is_mfunc(dev)) { err = mlx4_cmd_imm(dev, 0, &out_param, RES_XRCD, RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) return err; *xrcdn = get_param_l(&out_param); return 0; } return __mlx4_xrcd_alloc(dev, xrcdn); } EXPORT_SYMBOL_GPL(mlx4_xrcd_alloc); void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn) { mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn, MLX4_USE_RR); } void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn) { u64 in_param = 0; int err; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, xrcdn); err = mlx4_cmd(dev, in_param, RES_XRCD, RES_OP_RESERVE, MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) mlx4_warn(dev, "Failed to release xrcdn %d\n", xrcdn); } else __mlx4_xrcd_free(dev, xrcdn); } EXPORT_SYMBOL_GPL(mlx4_xrcd_free); int mlx4_init_pd_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); return mlx4_bitmap_init(&priv->pd_bitmap, dev->caps.num_pds, (1 << NOT_MASKED_PD_BITS) - 1, dev->caps.reserved_pds, 0); } void mlx4_cleanup_pd_table(struct mlx4_dev *dev) { mlx4_bitmap_cleanup(&mlx4_priv(dev)->pd_bitmap); } int mlx4_init_xrcd_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); return mlx4_bitmap_init(&priv->xrcd_bitmap, (1 << 16), (1 << 16) - 1, dev->caps.reserved_xrcds + 1, 0); } void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev) { mlx4_bitmap_cleanup(&mlx4_priv(dev)->xrcd_bitmap); } int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar) { int offset; uar->index = mlx4_bitmap_alloc(&mlx4_priv(dev)->uar_table.bitmap); if (uar->index == -1) return -ENOMEM; if (mlx4_is_slave(dev)) offset = uar->index % ((int) pci_resource_len(dev->pdev, 2) / dev->caps.uar_page_size); else offset = uar->index; uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + offset; uar->map = NULL; return 0; } EXPORT_SYMBOL_GPL(mlx4_uar_alloc); void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar) { mlx4_bitmap_free(&mlx4_priv(dev)->uar_table.bitmap, uar->index, MLX4_USE_RR); } EXPORT_SYMBOL_GPL(mlx4_uar_free); #ifndef CONFIG_PPC int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_uar *uar; int err = 0; int idx; if (!priv->bf_mapping) return -ENOMEM; mutex_lock(&priv->bf_mutex); if (!list_empty(&priv->bf_list)) uar = list_entry(priv->bf_list.next, struct mlx4_uar, bf_list); else { if (mlx4_bitmap_avail(&priv->uar_table.bitmap) < MLX4_NUM_RESERVED_UARS) { err = -ENOMEM; goto out; } uar = kmalloc_node(sizeof *uar, GFP_KERNEL, node); if (!uar) { uar = kmalloc(sizeof *uar, GFP_KERNEL); if (!uar) { err = -ENOMEM; goto out; } } err = mlx4_uar_alloc(dev, uar); if (err) goto free_kmalloc; uar->map = ioremap(uar->pfn << PAGE_SHIFT, PAGE_SIZE); if (!uar->map) { err = -ENOMEM; goto free_uar; } uar->bf_map = io_mapping_map_wc(priv->bf_mapping, uar->index << PAGE_SHIFT); if (!uar->bf_map) { err = -ENOMEM; goto unamp_uar; } uar->free_bf_bmap = 0; list_add(&uar->bf_list, &priv->bf_list); } bf->uar = uar; idx = ffz(uar->free_bf_bmap); uar->free_bf_bmap |= 1 << idx; bf->uar = uar; bf->offset = 0; bf->buf_size = dev->caps.bf_reg_size / 2; bf->reg = uar->bf_map + idx * dev->caps.bf_reg_size; if (uar->free_bf_bmap == (1 << dev->caps.bf_regs_per_page) - 1) list_del_init(&uar->bf_list); goto out; unamp_uar: bf->uar = NULL; iounmap(uar->map); free_uar: mlx4_uar_free(dev, uar); free_kmalloc: kfree(uar); out: mutex_unlock(&priv->bf_mutex); return err; } EXPORT_SYMBOL_GPL(mlx4_bf_alloc); void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf) { struct mlx4_priv *priv = mlx4_priv(dev); int idx; if (!bf->uar || !bf->uar->bf_map) return; mutex_lock(&priv->bf_mutex); idx = (bf->reg - bf->uar->bf_map) / dev->caps.bf_reg_size; bf->uar->free_bf_bmap &= ~(1 << idx); if (!bf->uar->free_bf_bmap) { if (!list_empty(&bf->uar->bf_list)) list_del(&bf->uar->bf_list); io_mapping_unmap(bf->uar->bf_map); iounmap(bf->uar->map); mlx4_uar_free(dev, bf->uar); kfree(bf->uar); } else if (list_empty(&bf->uar->bf_list)) list_add(&bf->uar->bf_list, &priv->bf_list); mutex_unlock(&priv->bf_mutex); } EXPORT_SYMBOL_GPL(mlx4_bf_free); #else int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node) { memset(bf, 0, sizeof *bf); return -ENOSYS; } EXPORT_SYMBOL_GPL(mlx4_bf_alloc); void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf) { return; } EXPORT_SYMBOL_GPL(mlx4_bf_free); #endif int mlx4_init_uar_table(struct mlx4_dev *dev) { if (dev->caps.num_uars <= 128) { mlx4_err(dev, "Only %d UAR pages (need more than 128)\n", dev->caps.num_uars); mlx4_err(dev, "Increase firmware log2_uar_bar_megabytes?\n"); return -ENODEV; } return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap, dev->caps.num_uars, dev->caps.num_uars - 1, dev->caps.reserved_uars, 0); } void mlx4_cleanup_uar_table(struct mlx4_dev *dev) { mlx4_bitmap_cleanup(&mlx4_priv(dev)->uar_table.bitmap); } Index: stable/10/sys/ofed/include/asm/page.h =================================================================== --- stable/10/sys/ofed/include/asm/page.h (revision 273245) +++ stable/10/sys/ofed/include/asm/page.h (nonexistent) @@ -1,35 +0,0 @@ -/*- - * Copyright (c) 2010 Isilon Systems, Inc. - * Copyright (c) 2010 iX Systems, Inc. - * Copyright (c) 2010 Panasas, Inc. - * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _ASM_PAGE_H_ -#define _ASM_PAGE_H_ - -#include - -#endif /*_ASM_PAGE_H_*/ Property changes on: stable/10/sys/ofed/include/asm/page.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/include/linux/mount.h =================================================================== --- stable/10/sys/ofed/include/linux/mount.h (revision 273245) +++ stable/10/sys/ofed/include/linux/mount.h (nonexistent) @@ -1,34 +0,0 @@ -/*- - * Copyright (c) 2010 Isilon Systems, Inc. - * Copyright (c) 2010 iX Systems, Inc. - * Copyright (c) 2010 Panasas, Inc. - * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _LINUX_MOUNT_H_ -#define _LINUX_MOUNT_H_ - - -#endif /* _LINUX_MOUNT_H_ */ Property changes on: stable/10/sys/ofed/include/linux/mount.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/include/linux/inet.h =================================================================== --- stable/10/sys/ofed/include/linux/inet.h (revision 273245) +++ stable/10/sys/ofed/include/linux/inet.h (nonexistent) @@ -1,32 +0,0 @@ -/*- - * Copyright (c) 2010 Isilon Systems, Inc. - * Copyright (c) 2010 iX Systems, Inc. - * Copyright (c) 2010 Panasas, Inc. - * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _LINUX_INET_H_ -#define _LINUX_INET_H_ -#endif /* _LINUX_INET_H_ */ Property changes on: stable/10/sys/ofed/include/linux/inet.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/include/linux/ethtool.h =================================================================== --- stable/10/sys/ofed/include/linux/ethtool.h (revision 273245) +++ stable/10/sys/ofed/include/linux/ethtool.h (nonexistent) @@ -1,32 +0,0 @@ -/*- - * Copyright (c) 2010 Isilon Systems, Inc. - * Copyright (c) 2010 iX Systems, Inc. - * Copyright (c) 2010 Panasas, Inc. - * Copyright (c) 2013 Mellanox Technologies, Ltd. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef _LINUX_ETHTOOL_H_ -#define _LINUX_ETHTOOL_H_ - -#endif /* _LINUX_ETHTOOL_H_ */ Property changes on: stable/10/sys/ofed/include/linux/ethtool.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Index: stable/10/sys/ofed/include/linux/completion.h =================================================================== --- stable/10/sys/ofed/include/linux/completion.h (revision 273245) +++ stable/10/sys/ofed/include/linux/completion.h (revision 273246) @@ -1,155 +1,155 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _FBSD_COMPLETION_H_ -#define _FBSD_COMPLETION_H_ +#ifndef _LINUX_COMPLETION_H_ +#define _LINUX_COMPLETION_H_ #include #include #include #include #include #include struct completion { unsigned int done; }; #define INIT_COMPLETION(c) ((c).done = 0) #define init_completion(c) ((c)->done = 0) static inline void _complete_common(struct completion *c, int all) { int wakeup_swapper; sleepq_lock(c); c->done++; if (all) wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0); else wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0); sleepq_release(c); if (wakeup_swapper) kick_proc0(); } #define complete(c) _complete_common(c, 0) #define complete_all(c) _complete_common(c, 1) /* * Indefinite wait for done != 0 with or without signals. */ static inline long _wait_for_common(struct completion *c, int flags) { flags |= SLEEPQ_SLEEP; for (;;) { sleepq_lock(c); if (c->done) break; sleepq_add(c, NULL, "completion", flags, 0); if (flags & SLEEPQ_INTERRUPTIBLE) { if (sleepq_wait_sig(c, 0) != 0) return (-ERESTARTSYS); } else sleepq_wait(c, 0); } c->done--; sleepq_release(c); return (0); } #define wait_for_completion(c) _wait_for_common(c, 0) #define wait_for_completion_interuptible(c) \ _wait_for_common(c, SLEEPQ_INTERRUPTIBLE) static inline long _wait_for_timeout_common(struct completion *c, long timeout, int flags) { long end; end = ticks + timeout; flags |= SLEEPQ_SLEEP; for (;;) { sleepq_lock(c); if (c->done) break; sleepq_add(c, NULL, "completion", flags, 0); sleepq_set_timeout(c, end - ticks); if (flags & SLEEPQ_INTERRUPTIBLE) { if (sleepq_timedwait_sig(c, 0) != 0) return (-ERESTARTSYS); } else sleepq_timedwait(c, 0); } c->done--; sleepq_release(c); timeout = end - ticks; return (timeout > 0 ? timeout : 1); } #define wait_for_completion_timeout(c, timeout) \ _wait_for_timeout_common(c, timeout, 0) #define wait_for_completion_interruptible_timeout(c, timeout) \ _wait_for_timeout_common(c, timeout, SLEEPQ_INTERRUPTIBLE) static inline int try_wait_for_completion(struct completion *c) { int isdone; isdone = 1; sleepq_lock(c); if (c->done) c->done--; else isdone = 0; sleepq_release(c); return (isdone); } static inline int completion_done(struct completion *c) { int isdone; isdone = 1; sleepq_lock(c); if (c->done == 0) isdone = 0; sleepq_release(c); return (isdone); } #endif /* _LINUX_COMPLETION_H_ */ Index: stable/10/sys/ofed/include/linux/etherdevice.h =================================================================== --- stable/10/sys/ofed/include/linux/etherdevice.h (revision 273245) +++ stable/10/sys/ofed/include/linux/etherdevice.h (revision 273246) @@ -1,94 +1,94 @@ -/* +/*- * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Ltd. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef _LINUX_ETHERDEVICE #define _LINUX_ETHERDEVICE #include /** * is_zero_ether_addr - Determine if give Ethernet address is all zeros. * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if the address is all zeroes. */ static inline bool is_zero_ether_addr(const u8 *addr) { return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]); } /** * is_multicast_ether_addr - Determine if the Ethernet address is a multicast. * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if the address is a multicast address. * By definition the broadcast address is also a multicast address. */ static inline bool is_multicast_ether_addr(const u8 *addr) { return (0x01 & addr[0]); } /** * is_broadcast_ether_addr - Determine if the Ethernet address is broadcast * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if the address is the broadcast address. */ static inline bool is_broadcast_ether_addr(const u8 *addr) { return (addr[0] & addr[1] & addr[2] & addr[3] & addr[4] & addr[5]) == 0xff; } /** * is_valid_ether_addr - Determine if the given Ethernet address is valid * @addr: Pointer to a six-byte array containing the Ethernet address * * Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is not * a multicast address, and is not FF:FF:FF:FF:FF:FF. * * Return true if the address is valid. **/ static inline bool is_valid_ether_addr(const u8 *addr) { /* FF:FF:FF:FF:FF:FF is a multicast address so we don't need to ** explicitly check for it here. */ return !is_multicast_ether_addr(addr) && !is_zero_ether_addr(addr); } #endif /* _LINUX_ETHERDEVICE */ Index: stable/10/sys/ofed/include/linux/mlx4/device.h =================================================================== --- stable/10/sys/ofed/include/linux/mlx4/device.h (revision 273245) +++ stable/10/sys/ofed/include/linux/mlx4/device.h (revision 273246) @@ -1,1285 +1,1287 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef MLX4_DEVICE_H #define MLX4_DEVICE_H #include #include #include #include #include #include #include #include #define MAX_MSIX_P_PORT 17 #define MAX_MSIX 64 #define MSIX_LEGACY_SZ 4 #define MIN_MSIX_P_PORT 5 #define MLX4_ROCE_MAX_GIDS 128 #define MLX4_ROCE_PF_GIDS 16 #define MLX4_NUM_UP 8 #define MLX4_NUM_TC 8 #define MLX4_MAX_100M_UNITS_VAL 255 /* * work around: can't set values * greater then this value when * using 100 Mbps units. */ #define MLX4_RATELIMIT_100M_UNITS 3 /* 100 Mbps */ #define MLX4_RATELIMIT_1G_UNITS 4 /* 1 Gbps */ #define MLX4_RATELIMIT_DEFAULT 0x00ff #define CORE_CLOCK_MASK 0xffffffffffffULL enum { MLX4_FLAG_MSI_X = 1 << 0, MLX4_FLAG_OLD_PORT_CMDS = 1 << 1, MLX4_FLAG_MASTER = 1 << 2, MLX4_FLAG_SLAVE = 1 << 3, MLX4_FLAG_SRIOV = 1 << 4, MLX4_FLAG_DEV_NUM_STR = 1 << 5, MLX4_FLAG_OLD_REG_MAC = 1 << 6, }; enum { MLX4_PORT_CAP_IS_SM = 1 << 1, MLX4_PORT_CAP_DEV_MGMT_SUP = 1 << 19, }; enum { MLX4_MAX_PORTS = 2, MLX4_MAX_PORT_PKEYS = 128 }; /* base qkey for use in sriov tunnel-qp/proxy-qp communication. * These qkeys must not be allowed for general use. This is a 64k range, * and to test for violation, we use the mask (protect against future chg). */ #define MLX4_RESERVED_QKEY_BASE (0xFFFF0000) #define MLX4_RESERVED_QKEY_MASK (0xFFFF0000) enum { MLX4_BOARD_ID_LEN = 64, MLX4_VSD_LEN = 208 }; enum { MLX4_MAX_NUM_PF = 16, MLX4_MAX_NUM_VF = 64, MLX4_MFUNC_MAX = 80, MLX4_MAX_EQ_NUM = 1024, MLX4_MFUNC_EQ_NUM = 4, MLX4_MFUNC_MAX_EQES = 8, MLX4_MFUNC_EQE_MASK = (MLX4_MFUNC_MAX_EQES - 1) }; /* Driver supports 3 diffrent device methods to manage traffic steering: * -device managed - High level API for ib and eth flow steering. FW is * managing flow steering tables. * - B0 steering mode - Common low level API for ib and (if supported) eth. * - A0 steering mode - Limited low level API for eth. In case of IB, * B0 mode is in use. */ enum { MLX4_STEERING_MODE_A0, MLX4_STEERING_MODE_B0, MLX4_STEERING_MODE_DEVICE_MANAGED }; static inline const char *mlx4_steering_mode_str(int steering_mode) { switch (steering_mode) { case MLX4_STEERING_MODE_A0: return "A0 steering"; case MLX4_STEERING_MODE_B0: return "B0 steering"; case MLX4_STEERING_MODE_DEVICE_MANAGED: return "Device managed flow steering"; default: return "Unrecognize steering mode"; } } enum { MLX4_DEV_CAP_FLAG_RC = 1LL << 0, MLX4_DEV_CAP_FLAG_UC = 1LL << 1, MLX4_DEV_CAP_FLAG_UD = 1LL << 2, MLX4_DEV_CAP_FLAG_XRC = 1LL << 3, MLX4_DEV_CAP_FLAG_SRQ = 1LL << 6, MLX4_DEV_CAP_FLAG_IPOIB_CSUM = 1LL << 7, MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8, MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9, MLX4_DEV_CAP_FLAG_DPDP = 1LL << 12, MLX4_DEV_CAP_FLAG_BLH = 1LL << 15, MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1LL << 16, MLX4_DEV_CAP_FLAG_APM = 1LL << 17, MLX4_DEV_CAP_FLAG_ATOMIC = 1LL << 18, MLX4_DEV_CAP_FLAG_RAW_MCAST = 1LL << 19, MLX4_DEV_CAP_FLAG_UD_AV_PORT = 1LL << 20, MLX4_DEV_CAP_FLAG_UD_MCAST = 1LL << 21, MLX4_DEV_CAP_FLAG_IBOE = 1LL << 30, MLX4_DEV_CAP_FLAG_UC_LOOPBACK = 1LL << 32, MLX4_DEV_CAP_FLAG_FCS_KEEP = 1LL << 34, MLX4_DEV_CAP_FLAG_WOL_PORT1 = 1LL << 37, MLX4_DEV_CAP_FLAG_WOL_PORT2 = 1LL << 38, MLX4_DEV_CAP_FLAG_UDP_RSS = 1LL << 40, MLX4_DEV_CAP_FLAG_VEP_UC_STEER = 1LL << 41, MLX4_DEV_CAP_FLAG_VEP_MC_STEER = 1LL << 42, MLX4_DEV_CAP_FLAG_CROSS_CHANNEL = 1LL << 44, MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48, MLX4_DEV_CAP_FLAG_COUNTERS_EXT = 1LL << 49, MLX4_DEV_CAP_FLAG_SET_PORT_ETH_SCHED = 1LL << 53, MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55, MLX4_DEV_CAP_FLAG_FAST_DROP = 1LL << 57, MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59, MLX4_DEV_CAP_FLAG_64B_EQE = 1LL << 61, MLX4_DEV_CAP_FLAG_64B_CQE = 1LL << 62 }; enum { MLX4_DEV_CAP_FLAG2_RSS = 1LL << 0, MLX4_DEV_CAP_FLAG2_RSS_TOP = 1LL << 1, MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2, MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3, MLX4_DEV_CAP_FLAG2_FSM = 1LL << 4, MLX4_DEV_CAP_FLAG2_VLAN_CONTROL = 1LL << 5, MLX4_DEV_CAP_FLAG2_UPDATE_QP = 1LL << 6, MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1LL << 7, MLX4_DEV_CAP_FLAG2_DMFS_IPOIB = 1LL << 8, MLX4_DEV_CAP_FLAG2_ETS_CFG = 1LL << 9, MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP = 1LL << 10, MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 11, MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 12, MLX4_DEV_CAP_FLAG2_TS = 1LL << 13, MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW = 1LL << 14 }; /* bit enums for an 8-bit flags field indicating special use * QPs which require special handling in qp_reserve_range. * Currently, this only includes QPs used by the ETH interface, * where we expect to use blueflame. These QPs must not have * bits 6 and 7 set in their qp number. * * This enum may use only bits 0..7. */ enum { MLX4_RESERVE_BF_QP = 1 << 7, }; enum { MLX4_DEV_CAP_CQ_FLAG_IO = 1 << 0 }; enum { MLX4_DEV_CAP_64B_EQE_ENABLED = 1LL << 0, MLX4_DEV_CAP_64B_CQE_ENABLED = 1LL << 1 }; enum { MLX4_USER_DEV_CAP_64B_CQE = 1L << 0 }; enum { MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0 }; #define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) enum { MLX4_BMME_FLAG_WIN_TYPE_2B = 1 << 1, MLX4_BMME_FLAG_LOCAL_INV = 1 << 6, MLX4_BMME_FLAG_REMOTE_INV = 1 << 7, MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9, MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10, MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11, }; enum mlx4_event { MLX4_EVENT_TYPE_COMP = 0x00, MLX4_EVENT_TYPE_PATH_MIG = 0x01, MLX4_EVENT_TYPE_COMM_EST = 0x02, MLX4_EVENT_TYPE_SQ_DRAINED = 0x03, MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13, MLX4_EVENT_TYPE_SRQ_LIMIT = 0x14, MLX4_EVENT_TYPE_CQ_ERROR = 0x04, MLX4_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, MLX4_EVENT_TYPE_EEC_CATAS_ERROR = 0x06, MLX4_EVENT_TYPE_PATH_MIG_FAILED = 0x07, MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10, MLX4_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11, MLX4_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR = 0x08, MLX4_EVENT_TYPE_PORT_CHANGE = 0x09, MLX4_EVENT_TYPE_EQ_OVERFLOW = 0x0f, MLX4_EVENT_TYPE_ECC_DETECT = 0x0e, MLX4_EVENT_TYPE_CMD = 0x0a, MLX4_EVENT_TYPE_VEP_UPDATE = 0x19, MLX4_EVENT_TYPE_COMM_CHANNEL = 0x18, MLX4_EVENT_TYPE_OP_REQUIRED = 0x1a, MLX4_EVENT_TYPE_FATAL_WARNING = 0x1b, MLX4_EVENT_TYPE_FLR_EVENT = 0x1c, MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT = 0x1d, MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT = 0x3e, MLX4_EVENT_TYPE_NONE = 0xff, }; enum { MLX4_PORT_CHANGE_SUBTYPE_DOWN = 1, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE = 4 }; enum { MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_BAD_CABLE = 1, MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_UNSUPPORTED_CABLE = 2, }; enum { MLX4_FATAL_WARNING_SUBTYPE_WARMING = 0, }; enum slave_port_state { SLAVE_PORT_DOWN = 0, SLAVE_PENDING_UP, SLAVE_PORT_UP, }; enum slave_port_gen_event { SLAVE_PORT_GEN_EVENT_DOWN = 0, SLAVE_PORT_GEN_EVENT_UP, SLAVE_PORT_GEN_EVENT_NONE, }; enum slave_port_state_event { MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, MLX4_PORT_STATE_DEV_EVENT_PORT_UP, MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID, MLX4_PORT_STATE_IB_EVENT_GID_INVALID, }; enum { MLX4_PERM_LOCAL_READ = 1 << 10, MLX4_PERM_LOCAL_WRITE = 1 << 11, MLX4_PERM_REMOTE_READ = 1 << 12, MLX4_PERM_REMOTE_WRITE = 1 << 13, MLX4_PERM_ATOMIC = 1 << 14, MLX4_PERM_BIND_MW = 1 << 15, }; enum { MLX4_OPCODE_NOP = 0x00, MLX4_OPCODE_SEND_INVAL = 0x01, MLX4_OPCODE_RDMA_WRITE = 0x08, MLX4_OPCODE_RDMA_WRITE_IMM = 0x09, MLX4_OPCODE_SEND = 0x0a, MLX4_OPCODE_SEND_IMM = 0x0b, MLX4_OPCODE_LSO = 0x0e, MLX4_OPCODE_RDMA_READ = 0x10, MLX4_OPCODE_ATOMIC_CS = 0x11, MLX4_OPCODE_ATOMIC_FA = 0x12, MLX4_OPCODE_MASKED_ATOMIC_CS = 0x14, MLX4_OPCODE_MASKED_ATOMIC_FA = 0x15, MLX4_OPCODE_BIND_MW = 0x18, MLX4_OPCODE_FMR = 0x19, MLX4_OPCODE_LOCAL_INVAL = 0x1b, MLX4_OPCODE_CONFIG_CMD = 0x1f, MLX4_RECV_OPCODE_RDMA_WRITE_IMM = 0x00, MLX4_RECV_OPCODE_SEND = 0x01, MLX4_RECV_OPCODE_SEND_IMM = 0x02, MLX4_RECV_OPCODE_SEND_INVAL = 0x03, MLX4_CQE_OPCODE_ERROR = 0x1e, MLX4_CQE_OPCODE_RESIZE = 0x16, }; enum { MLX4_STAT_RATE_OFFSET = 5 }; enum mlx4_protocol { MLX4_PROT_IB_IPV6 = 0, MLX4_PROT_ETH, MLX4_PROT_IB_IPV4, MLX4_PROT_FCOE }; enum { MLX4_MTT_FLAG_PRESENT = 1 }; enum { MLX4_MAX_MTT_SHIFT = 31 }; enum mlx4_qp_region { MLX4_QP_REGION_FW = 0, MLX4_QP_REGION_ETH_ADDR, MLX4_QP_REGION_FC_ADDR, MLX4_QP_REGION_FC_EXCH, MLX4_NUM_QP_REGION }; enum mlx4_port_type { MLX4_PORT_TYPE_NONE = 0, MLX4_PORT_TYPE_IB = 1, MLX4_PORT_TYPE_ETH = 2, MLX4_PORT_TYPE_AUTO = 3, MLX4_PORT_TYPE_NA = 4 }; enum mlx4_special_vlan_idx { MLX4_NO_VLAN_IDX = 0, MLX4_VLAN_MISS_IDX, MLX4_VLAN_REGULAR }; enum mlx4_steer_type { MLX4_MC_STEER = 0, MLX4_UC_STEER, MLX4_NUM_STEERS }; enum { MLX4_NUM_FEXCH = 64 * 1024, }; enum { MLX4_MAX_FAST_REG_PAGES = 511, }; enum { MLX4_DEV_PMC_SUBTYPE_GUID_INFO = 0x14, MLX4_DEV_PMC_SUBTYPE_PORT_INFO = 0x15, MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE = 0x16, }; /* Port mgmt change event handling */ enum { MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK = 1 << 0, MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK = 1 << 1, MLX4_EQ_PORT_INFO_LID_CHANGE_MASK = 1 << 2, MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK = 1 << 3, MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK = 1 << 4, }; #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK) static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) { return (major << 32) | (minor << 16) | subminor; } struct mlx4_phys_caps { u32 gid_phys_table_len[MLX4_MAX_PORTS + 1]; u32 pkey_phys_table_len[MLX4_MAX_PORTS + 1]; u32 num_phys_eqs; u32 base_sqpn; u32 base_proxy_sqpn; u32 base_tunnel_sqpn; }; struct mlx4_caps { u64 fw_ver; u32 function; int num_ports; int vl_cap[MLX4_MAX_PORTS + 1]; int ib_mtu_cap[MLX4_MAX_PORTS + 1]; __be32 ib_port_def_cap[MLX4_MAX_PORTS + 1]; u64 def_mac[MLX4_MAX_PORTS + 1]; int eth_mtu_cap[MLX4_MAX_PORTS + 1]; int gid_table_len[MLX4_MAX_PORTS + 1]; int pkey_table_len[MLX4_MAX_PORTS + 1]; int trans_type[MLX4_MAX_PORTS + 1]; int vendor_oui[MLX4_MAX_PORTS + 1]; int wavelength[MLX4_MAX_PORTS + 1]; u64 trans_code[MLX4_MAX_PORTS + 1]; int local_ca_ack_delay; int num_uars; u32 uar_page_size; int bf_reg_size; int bf_regs_per_page; int max_sq_sg; int max_rq_sg; int num_qps; int max_wqes; int max_sq_desc_sz; int max_rq_desc_sz; int max_qp_init_rdma; int max_qp_dest_rdma; u32 *qp0_proxy; u32 *qp1_proxy; u32 *qp0_tunnel; u32 *qp1_tunnel; int num_srqs; int max_srq_wqes; int max_srq_sge; int reserved_srqs; int num_cqs; int max_cqes; int reserved_cqs; int num_eqs; int reserved_eqs; int num_comp_vectors; int comp_pool; int num_mpts; int max_fmr_maps; u64 num_mtts; int fmr_reserved_mtts; int reserved_mtts; int reserved_mrws; int reserved_uars; int num_mgms; int num_amgms; int reserved_mcgs; int num_qp_per_mgm; int steering_mode; int num_pds; int reserved_pds; int max_xrcds; int reserved_xrcds; int mtt_entry_sz; u32 max_msg_sz; u32 page_size_cap; u64 flags; u64 flags2; u32 bmme_flags; u32 reserved_lkey; u16 stat_rate_support; u8 cq_timestamp; u8 port_width_cap[MLX4_MAX_PORTS + 1]; int max_gso_sz; int max_rss_tbl_sz; int reserved_qps_cnt[MLX4_NUM_QP_REGION]; int reserved_qps; int reserved_qps_base[MLX4_NUM_QP_REGION]; int log_num_macs; int log_num_vlans; enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; u8 supported_type[MLX4_MAX_PORTS + 1]; u8 suggested_type[MLX4_MAX_PORTS + 1]; u8 default_sense[MLX4_MAX_PORTS + 1]; u32 port_mask[MLX4_MAX_PORTS + 1]; enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1]; u32 max_counters; u8 port_ib_mtu[MLX4_MAX_PORTS + 1]; u16 sqp_demux; u32 sync_qp; u32 cq_flags; u32 eqe_size; u32 cqe_size; u8 eqe_factor; u32 userspace_caps; /* userspace must be aware to */ u32 function_caps; /* functions must be aware to */ u8 fast_drop; u16 hca_core_clock; u32 max_basic_counters; u32 max_extended_counters; u8 def_counter_index[MLX4_MAX_PORTS + 1]; }; struct mlx4_buf_list { void *buf; dma_addr_t map; }; struct mlx4_buf { struct mlx4_buf_list direct; struct mlx4_buf_list *page_list; int nbufs; int npages; int page_shift; }; struct mlx4_mtt { u32 offset; int order; int page_shift; }; enum { MLX4_DB_PER_PAGE = PAGE_SIZE / 4 }; struct mlx4_db_pgdir { struct list_head list; DECLARE_BITMAP(order0, MLX4_DB_PER_PAGE); DECLARE_BITMAP(order1, MLX4_DB_PER_PAGE / 2); unsigned long *bits[2]; __be32 *db_page; dma_addr_t db_dma; }; struct mlx4_ib_user_db_page; struct mlx4_db { __be32 *db; union { struct mlx4_db_pgdir *pgdir; struct mlx4_ib_user_db_page *user_page; } u; dma_addr_t dma; int index; int order; }; struct mlx4_hwq_resources { struct mlx4_db db; struct mlx4_mtt mtt; struct mlx4_buf buf; }; struct mlx4_mr { struct mlx4_mtt mtt; u64 iova; u64 size; u32 key; u32 pd; u32 access; int enabled; }; enum mlx4_mw_type { MLX4_MW_TYPE_1 = 1, MLX4_MW_TYPE_2 = 2, }; struct mlx4_mw { u32 key; u32 pd; enum mlx4_mw_type type; int enabled; }; struct mlx4_fmr { struct mlx4_mr mr; struct mlx4_mpt_entry *mpt; __be64 *mtts; dma_addr_t dma_handle; int max_pages; int max_maps; int maps; u8 page_shift; }; struct mlx4_uar { unsigned long pfn; int index; struct list_head bf_list; unsigned free_bf_bmap; void __iomem *map; void __iomem *bf_map; }; struct mlx4_bf { unsigned long offset; int buf_size; struct mlx4_uar *uar; void __iomem *reg; }; struct mlx4_cq { void (*comp) (struct mlx4_cq *); void (*event) (struct mlx4_cq *, enum mlx4_event); struct mlx4_uar *uar; u32 cons_index; __be32 *set_ci_db; __be32 *arm_db; int arm_sn; int cqn; unsigned vector; atomic_t refcount; struct completion free; int eqn; u16 irq; }; struct mlx4_qp { void (*event) (struct mlx4_qp *, enum mlx4_event); int qpn; atomic_t refcount; struct completion free; }; struct mlx4_srq { void (*event) (struct mlx4_srq *, enum mlx4_event); int srqn; int max; int max_gs; int wqe_shift; atomic_t refcount; struct completion free; }; struct mlx4_av { __be32 port_pd; u8 reserved1; u8 g_slid; __be16 dlid; u8 reserved2; u8 gid_index; u8 stat_rate; u8 hop_limit; __be32 sl_tclass_flowlabel; u8 dgid[16]; }; struct mlx4_eth_av { __be32 port_pd; u8 reserved1; u8 smac_idx; u16 reserved2; u8 reserved3; u8 gid_index; u8 stat_rate; u8 hop_limit; __be32 sl_tclass_flowlabel; u8 dgid[16]; u8 s_mac[6]; u8 reserved4[2]; __be16 vlan; u8 mac[6]; }; union mlx4_ext_av { struct mlx4_av ib; struct mlx4_eth_av eth; }; struct mlx4_if_stat_control { u8 reserved1[3]; /* Extended counters enabled */ u8 cnt_mode; /* Number of interfaces */ __be32 num_of_if; __be32 reserved[2]; }; struct mlx4_if_stat_basic { struct mlx4_if_stat_control control; struct { __be64 IfRxFrames; __be64 IfRxOctets; __be64 IfTxFrames; __be64 IfTxOctets; } counters[]; }; #define MLX4_IF_STAT_BSC_SZ(ports)(sizeof(struct mlx4_if_stat_extended) +\ sizeof(((struct mlx4_if_stat_extended *)0)->\ counters[0]) * ports) struct mlx4_if_stat_extended { struct mlx4_if_stat_control control; struct { __be64 IfRxUnicastFrames; __be64 IfRxUnicastOctets; __be64 IfRxMulticastFrames; __be64 IfRxMulticastOctets; __be64 IfRxBroadcastFrames; __be64 IfRxBroadcastOctets; __be64 IfRxNoBufferFrames; __be64 IfRxNoBufferOctets; __be64 IfRxErrorFrames; __be64 IfRxErrorOctets; __be32 reserved[39]; __be64 IfTxUnicastFrames; __be64 IfTxUnicastOctets; __be64 IfTxMulticastFrames; __be64 IfTxMulticastOctets; __be64 IfTxBroadcastFrames; __be64 IfTxBroadcastOctets; __be64 IfTxDroppedFrames; __be64 IfTxDroppedOctets; __be64 IfTxRequestedFramesSent; __be64 IfTxGeneratedFramesSent; __be64 IfTxTsoOctets; } __packed counters[]; }; #define MLX4_IF_STAT_EXT_SZ(ports) (sizeof(struct mlx4_if_stat_extended) +\ sizeof(((struct mlx4_if_stat_extended *)\ 0)->counters[0]) * ports) union mlx4_counter { struct mlx4_if_stat_control control; struct mlx4_if_stat_basic basic; struct mlx4_if_stat_extended ext; }; #define MLX4_IF_STAT_SZ(ports) MLX4_IF_STAT_EXT_SZ(ports) struct mlx4_quotas { int qp; int cq; int srq; int mpt; int mtt; int counter; int xrcd; }; struct mlx4_dev { struct pci_dev *pdev; unsigned long flags; unsigned long num_slaves; struct mlx4_caps caps; struct mlx4_phys_caps phys_caps; struct mlx4_quotas quotas; struct radix_tree_root qp_table_tree; u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; u16 vsd_vendor_id; char vsd[MLX4_VSD_LEN]; int num_vfs; int numa_node; int oper_log_mgm_entry_size; u64 regid_promisc_array[MLX4_MAX_PORTS + 1]; u64 regid_allmulti_array[MLX4_MAX_PORTS + 1]; }; struct mlx4_clock_params { u64 offset; u8 bar; u8 size; }; struct mlx4_eqe { u8 reserved1; u8 type; u8 reserved2; u8 subtype; union { u32 raw[6]; struct { __be32 cqn; } __packed comp; struct { u16 reserved1; __be16 token; u32 reserved2; u8 reserved3[3]; u8 status; __be64 out_param; } __packed cmd; struct { __be32 qpn; } __packed qp; struct { __be32 srqn; } __packed srq; struct { __be32 cqn; u32 reserved1; u8 reserved2[3]; u8 syndrome; } __packed cq_err; struct { u32 reserved1[2]; __be32 port; } __packed port_change; struct { #define COMM_CHANNEL_BIT_ARRAY_SIZE 4 u32 reserved; u32 bit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE]; } __packed comm_channel_arm; struct { u8 port; u8 reserved[3]; __be64 mac; } __packed mac_update; struct { __be32 slave_id; } __packed flr_event; struct { __be16 current_temperature; __be16 warning_threshold; } __packed warming; struct { u8 reserved[3]; u8 port; union { struct { __be16 mstr_sm_lid; __be16 port_lid; __be32 changed_attr; u8 reserved[3]; u8 mstr_sm_sl; __be64 gid_prefix; } __packed port_info; struct { __be32 block_ptr; __be32 tbl_entries_mask; } __packed tbl_change_info; } params; } __packed port_mgmt_change; struct { u8 reserved[3]; u8 port; u32 reserved1[5]; } __packed bad_cable; } event; u8 slave_id; u8 reserved3[2]; u8 owner; } __packed; struct mlx4_init_port_param { int set_guid0; int set_node_guid; int set_si_guid; u16 mtu; int port_width_cap; u16 vl_cap; u16 max_gid; u16 max_pkey; u64 guid0; u64 node_guid; u64 si_guid; }; #define mlx4_foreach_port(port, dev, type) \ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if ((type) == (dev)->caps.port_mask[(port)]) #define mlx4_foreach_non_ib_transport_port(port, dev) \ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB)) #define mlx4_foreach_ib_transport_port(port, dev) \ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) #define MLX4_INVALID_SLAVE_ID 0xFF #define MLX4_SINK_COUNTER_INDEX 0xff void handle_port_mgmt_change_event(struct work_struct *work); static inline int mlx4_master_func_num(struct mlx4_dev *dev) { return dev->caps.function; } static inline int mlx4_is_master(struct mlx4_dev *dev) { return dev->flags & MLX4_FLAG_MASTER; } static inline int mlx4_num_reserved_sqps(struct mlx4_dev *dev) { return dev->phys_caps.base_sqpn + 8 + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev); } static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn) { return (qpn < dev->phys_caps.base_sqpn + 8 + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev)); } static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn) { int guest_proxy_base = dev->phys_caps.base_proxy_sqpn + slave * 8; if (qpn >= guest_proxy_base && qpn < guest_proxy_base + 8) return 1; return 0; } static inline int mlx4_is_mfunc(struct mlx4_dev *dev) { return dev->flags & (MLX4_FLAG_SLAVE | MLX4_FLAG_MASTER); } static inline int mlx4_is_slave(struct mlx4_dev *dev) { return dev->flags & MLX4_FLAG_SLAVE; } int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, struct mlx4_buf *buf); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) { if (BITS_PER_LONG == 64 || buf->nbufs == 1) return buf->direct.buf + offset; else return buf->page_list[offset >> PAGE_SHIFT].buf + (offset & (PAGE_SIZE - 1)); } int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn); void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn); int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn); void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn); int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar); void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar); int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node); void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf); int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift, struct mlx4_mtt *mtt); void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt); u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt); int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, int npages, int page_shift, struct mlx4_mr *mr); int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr); int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr); int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type, struct mlx4_mw *mw); void mlx4_mw_free(struct mlx4_dev *dev, struct mlx4_mw *mw); int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw); int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, struct mlx4_buf *buf); int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order); void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db); int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, int size, int max_direct); void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres, int size); int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, unsigned vector, int collapsed, int timestamp_en); void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base, u8 flags); void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp); void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp); int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn, struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq); void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq); int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark); int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark); int mlx4_INIT_PORT(struct mlx4_dev *dev, int port); int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port); int mlx4_unicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot); int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot); int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], u8 port, int block_mcast_loopback, enum mlx4_protocol protocol, u64 *reg_id); int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol protocol, u64 reg_id); enum { MLX4_DOMAIN_UVERBS = 0x1000, MLX4_DOMAIN_ETHTOOL = 0x2000, MLX4_DOMAIN_RFS = 0x3000, MLX4_DOMAIN_NIC = 0x5000, }; enum mlx4_net_trans_rule_id { MLX4_NET_TRANS_RULE_ID_ETH = 0, MLX4_NET_TRANS_RULE_ID_IB, MLX4_NET_TRANS_RULE_ID_IPV6, MLX4_NET_TRANS_RULE_ID_IPV4, MLX4_NET_TRANS_RULE_ID_TCP, MLX4_NET_TRANS_RULE_ID_UDP, MLX4_NET_TRANS_RULE_NUM, /* should be last */ + MLX4_NET_TRANS_RULE_DUMMY = -1, /* force enum to be signed */ }; extern const u16 __sw_id_hw[]; static inline int map_hw_to_sw_id(u16 header_id) { int i; for (i = 0; i < MLX4_NET_TRANS_RULE_NUM; i++) { if (header_id == __sw_id_hw[i]) return i; } return -EINVAL; } enum mlx4_net_trans_promisc_mode { MLX4_FS_REGULAR = 1, MLX4_FS_ALL_DEFAULT, MLX4_FS_MC_DEFAULT, MLX4_FS_UC_SNIFFER, MLX4_FS_MC_SNIFFER, MLX4_FS_MODE_NUM, /* should be last */ + MLX4_FS_MODE_DUMMY = -1, /* force enum to be signed */ }; struct mlx4_spec_eth { u8 dst_mac[6]; u8 dst_mac_msk[6]; u8 src_mac[6]; u8 src_mac_msk[6]; u8 ether_type_enable; __be16 ether_type; __be16 vlan_id_msk; __be16 vlan_id; }; struct mlx4_spec_tcp_udp { __be16 dst_port; __be16 dst_port_msk; __be16 src_port; __be16 src_port_msk; }; struct mlx4_spec_ipv4 { __be32 dst_ip; __be32 dst_ip_msk; __be32 src_ip; __be32 src_ip_msk; }; struct mlx4_spec_ib { __be32 l3_qpn; __be32 qpn_msk; u8 dst_gid[16]; u8 dst_gid_msk[16]; }; struct mlx4_spec_list { struct list_head list; enum mlx4_net_trans_rule_id id; union { struct mlx4_spec_eth eth; struct mlx4_spec_ib ib; struct mlx4_spec_ipv4 ipv4; struct mlx4_spec_tcp_udp tcp_udp; }; }; enum mlx4_net_trans_hw_rule_queue { MLX4_NET_TRANS_Q_FIFO, MLX4_NET_TRANS_Q_LIFO, }; struct mlx4_net_trans_rule { struct list_head list; enum mlx4_net_trans_hw_rule_queue queue_mode; bool exclusive; bool allow_loopback; enum mlx4_net_trans_promisc_mode promisc_mode; u8 port; u16 priority; u32 qpn; }; struct mlx4_net_trans_rule_hw_ctrl { __be16 prio; u8 type; u8 flags; u8 rsvd1; u8 funcid; u8 vep; u8 port; __be32 qpn; __be32 rsvd2; }; struct mlx4_net_trans_rule_hw_ib { u8 size; u8 rsvd1; __be16 id; u32 rsvd2; __be32 l3_qpn; __be32 qpn_mask; u8 dst_gid[16]; u8 dst_gid_msk[16]; } __packed; struct mlx4_net_trans_rule_hw_eth { u8 size; u8 rsvd; __be16 id; u8 rsvd1[6]; u8 dst_mac[6]; u16 rsvd2; u8 dst_mac_msk[6]; u16 rsvd3; u8 src_mac[6]; u16 rsvd4; u8 src_mac_msk[6]; u8 rsvd5; u8 ether_type_enable; __be16 ether_type; __be16 vlan_tag_msk; __be16 vlan_tag; } __packed; struct mlx4_net_trans_rule_hw_tcp_udp { u8 size; u8 rsvd; __be16 id; __be16 rsvd1[3]; __be16 dst_port; __be16 rsvd2; __be16 dst_port_msk; __be16 rsvd3; __be16 src_port; __be16 rsvd4; __be16 src_port_msk; } __packed; struct mlx4_net_trans_rule_hw_ipv4 { u8 size; u8 rsvd; __be16 id; __be32 rsvd1; __be32 dst_ip; __be32 dst_ip_msk; __be32 src_ip; __be32 src_ip_msk; } __packed; struct _rule_hw { union { struct { u8 size; u8 rsvd; __be16 id; }; struct mlx4_net_trans_rule_hw_eth eth; struct mlx4_net_trans_rule_hw_ib ib; struct mlx4_net_trans_rule_hw_ipv4 ipv4; struct mlx4_net_trans_rule_hw_tcp_udp tcp_udp; }; }; int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn, enum mlx4_net_trans_promisc_mode mode); int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port, enum mlx4_net_trans_promisc_mode mode); int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port); int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port); int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port); int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port); int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode); int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac); void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac); int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port); int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac); void mlx4_set_stats_bitmap(struct mlx4_dev *dev, unsigned long *stats_bitmap); int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx); int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u8 promisc); int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc); int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, u8 *pg, u16 *ratelimit); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, int npages, u64 iova, u32 *lkey, u32 *rkey); int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, int max_maps, u8 page_shift, struct mlx4_fmr *fmr); int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr); void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u32 *lkey, u32 *rkey); int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); int mlx4_query_diag_counters(struct mlx4_dev *mlx4_dev, int array_length, u8 op_modifier, u32 in_offset[], u32 counter_out[]); int mlx4_test_interrupts(struct mlx4_dev *dev); int mlx4_assign_eq(struct mlx4_dev *dev, char* name, int * vector); void mlx4_release_eq(struct mlx4_dev *dev, int vec); int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port); int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port); int mlx4_counter_alloc(struct mlx4_dev *dev, u8 port, u32 *idx); void mlx4_counter_free(struct mlx4_dev *dev, u8 port, u32 idx); int mlx4_flow_attach(struct mlx4_dev *dev, struct mlx4_net_trans_rule *rule, u64 *reg_id); int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id); int map_sw_to_hw_steering_mode(struct mlx4_dev *dev, enum mlx4_net_trans_promisc_mode flow_type); int map_sw_to_hw_steering_id(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id); int hw_rule_sz(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id); void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val); int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey); int mlx4_is_slave_active(struct mlx4_dev *dev, int slave); int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port); int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port); int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr, u16 lid, u8 sl); int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, u8 port_subtype_change); enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port); int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int event, enum slave_port_gen_event *gen_event); void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid); __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave); int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *slave_id); int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, u8 *gid); int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, u32 max_range_qpn); int mlx4_read_clock(struct mlx4_dev *dev); int mlx4_get_internal_clock_params(struct mlx4_dev *dev, struct mlx4_clock_params *params); #endif /* MLX4_DEVICE_H */ Index: stable/10/sys/ofed/include/linux/mlx4/driver.h =================================================================== --- stable/10/sys/ofed/include/linux/mlx4/driver.h (revision 273245) +++ stable/10/sys/ofed/include/linux/mlx4/driver.h (revision 273246) @@ -1,136 +1,136 @@ /* * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef MLX4_DRIVER_H #define MLX4_DRIVER_H #include struct mlx4_dev; #define MLX4_MAC_MASK 0xffffffffffffULL #define MLX4_BE_SHORT_MASK cpu_to_be16(0xffff) #define MLX4_BE_WORD_MASK cpu_to_be32(0xffffffff) enum mlx4_dev_event { MLX4_DEV_EVENT_CATASTROPHIC_ERROR, MLX4_DEV_EVENT_PORT_UP, MLX4_DEV_EVENT_PORT_DOWN, MLX4_DEV_EVENT_PORT_REINIT, MLX4_DEV_EVENT_PORT_MGMT_CHANGE, MLX4_DEV_EVENT_SLAVE_INIT, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, }; struct mlx4_interface { void * (*add) (struct mlx4_dev *dev); void (*remove)(struct mlx4_dev *dev, void *context); void (*event) (struct mlx4_dev *dev, void *context, enum mlx4_dev_event event, unsigned long param); void * (*get_dev)(struct mlx4_dev *dev, void *context, u8 port); struct list_head list; enum mlx4_protocol protocol; }; enum { MLX4_MAX_DEVICES = 32, MLX4_DEVS_TBL_SIZE = MLX4_MAX_DEVICES + 1, MLX4_DBDF2VAL_STR_SIZE = 512, MLX4_STR_NAME_SIZE = 64, MLX4_MAX_BDF_VALS = 2, MLX4_ENDOF_TBL = -1LL }; struct mlx4_dbdf2val { u64 dbdf; int val[MLX4_MAX_BDF_VALS]; }; struct mlx4_range { int min; int max; }; /* * mlx4_dbdf2val_lst struct holds all the data needed to convert * dbdf-to-value-list string into dbdf-to-value table. * dbdf-to-value-list string is a comma separated list of dbdf-to-value strings. * the format of dbdf-to-value string is: "[mmmm:]bb:dd.f-v1[;v2]" * mmmm - Domain number (optional) * bb - Bus number * dd - device number * f - Function number * v1 - First value related to the domain-bus-device-function. * v2 - Second value related to the domain-bus-device-function (optional). * bb, dd - Two hexadecimal digits without preceding 0x. * mmmm - Four hexadecimal digits without preceding 0x. * f - One hexadecimal without preceding 0x. * v1,v2 - Number with normal convention (e.g 100, 0xd3). * dbdf-to-value-list string format: * "[mmmm:]bb:dd.f-v1[;v2],[mmmm:]bb:dd.f-v1[;v2],..." * */ struct mlx4_dbdf2val_lst { char name[MLX4_STR_NAME_SIZE]; /* String name */ char str[MLX4_DBDF2VAL_STR_SIZE]; /* dbdf2val list str */ struct mlx4_dbdf2val tbl[MLX4_DEVS_TBL_SIZE];/* dbdf to value table */ int num_vals; /* # of vals per dbdf */ int def_val[MLX4_MAX_BDF_VALS]; /* Default values */ struct mlx4_range range; /* Valid values range */ }; int mlx4_fill_dbdf2val_tbl(struct mlx4_dbdf2val_lst *dbdf2val_lst); int mlx4_get_val(struct mlx4_dbdf2val *tbl, struct pci_dev *pdev, int idx, int *val); int mlx4_register_interface(struct mlx4_interface *intf); void mlx4_unregister_interface(struct mlx4_interface *intf); void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port); #ifndef ETH_ALEN #define ETH_ALEN 6 #endif -static inline u64 mlx4_mac_to_u64(u8 *addr) +static inline u64 mlx4_mac_to_u64(const u8 *addr) { u64 mac = 0; int i; for (i = 0; i < ETH_ALEN; i++) { mac <<= 8; mac |= addr[i]; } return mac; } #endif /* MLX4_DRIVER_H */ Index: stable/10/sys/ofed/include/linux/netdevice.h =================================================================== --- stable/10/sys/ofed/include/linux/netdevice.h (revision 273245) +++ stable/10/sys/ofed/include/linux/netdevice.h (revision 273246) @@ -1,204 +1,203 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_NETDEVICE_H_ #define _LINUX_NETDEVICE_H_ #include #include #include #include #include #include #include #include -#include #include #include #include struct net { }; extern struct net init_net; #define MAX_ADDR_LEN 20 #define net_device ifnet #define dev_get_by_index(n, idx) ifnet_byindex_ref((idx)) #define dev_hold(d) if_ref((d)) #define dev_put(d) if_rele((d)) #define netif_running(dev) !!((dev)->if_drv_flags & IFF_DRV_RUNNING) #define netif_oper_up(dev) !!((dev)->if_flags & IFF_UP) #define netif_carrier_ok(dev) netif_running(dev) static inline void * netdev_priv(const struct net_device *dev) { return (dev->if_softc); } static inline void _handle_ifnet_link_event(void *arg, struct ifnet *ifp, int linkstate) { struct notifier_block *nb; nb = arg; if (linkstate == LINK_STATE_UP) nb->notifier_call(nb, NETDEV_UP, ifp); else nb->notifier_call(nb, NETDEV_DOWN, ifp); } static inline void _handle_ifnet_arrival_event(void *arg, struct ifnet *ifp) { struct notifier_block *nb; nb = arg; nb->notifier_call(nb, NETDEV_REGISTER, ifp); } static inline void _handle_ifnet_departure_event(void *arg, struct ifnet *ifp) { struct notifier_block *nb; nb = arg; nb->notifier_call(nb, NETDEV_UNREGISTER, ifp); } static inline void _handle_iflladdr_event(void *arg, struct ifnet *ifp) { struct notifier_block *nb; nb = arg; nb->notifier_call(nb, NETDEV_CHANGEADDR, ifp); } static inline void _handle_ifaddr_event(void *arg, struct ifnet *ifp) { struct notifier_block *nb; nb = arg; nb->notifier_call(nb, NETDEV_CHANGEIFADDR, ifp); } static inline int register_netdevice_notifier(struct notifier_block *nb) { nb->tags[NETDEV_UP] = EVENTHANDLER_REGISTER( ifnet_link_event, _handle_ifnet_link_event, nb, 0); nb->tags[NETDEV_REGISTER] = EVENTHANDLER_REGISTER( ifnet_arrival_event, _handle_ifnet_arrival_event, nb, 0); nb->tags[NETDEV_UNREGISTER] = EVENTHANDLER_REGISTER( ifnet_departure_event, _handle_ifnet_departure_event, nb, 0); nb->tags[NETDEV_CHANGEADDR] = EVENTHANDLER_REGISTER( iflladdr_event, _handle_iflladdr_event, nb, 0); return (0); } static inline int register_inetaddr_notifier(struct notifier_block *nb) { nb->tags[NETDEV_CHANGEIFADDR] = EVENTHANDLER_REGISTER( ifaddr_event, _handle_ifaddr_event, nb, 0); return (0); } static inline int unregister_netdevice_notifier(struct notifier_block *nb) { EVENTHANDLER_DEREGISTER(ifnet_link_event, nb->tags[NETDEV_UP]); EVENTHANDLER_DEREGISTER(ifnet_arrival_event, nb->tags[NETDEV_REGISTER]); EVENTHANDLER_DEREGISTER(ifnet_departure_event, nb->tags[NETDEV_UNREGISTER]); EVENTHANDLER_DEREGISTER(iflladdr_event, nb->tags[NETDEV_CHANGEADDR]); return (0); } static inline int unregister_inetaddr_notifier(struct notifier_block *nb) { EVENTHANDLER_DEREGISTER(ifaddr_event, nb->tags[NETDEV_CHANGEIFADDR]); return (0); } #define rtnl_lock() #define rtnl_unlock() static inline int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all) { struct sockaddr_dl sdl; if (alen > sizeof(sdl.sdl_data)) return (-EINVAL); memset(&sdl, 0, sizeof(sdl)); sdl.sdl_len = sizeof(sdl); sdl.sdl_family = AF_LINK; sdl.sdl_alen = alen; memcpy(&sdl.sdl_data, addr, alen); return -if_delmulti(dev, (struct sockaddr *)&sdl); } static inline int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly) { struct sockaddr_dl sdl; if (alen > sizeof(sdl.sdl_data)) return (-EINVAL); memset(&sdl, 0, sizeof(sdl)); sdl.sdl_len = sizeof(sdl); sdl.sdl_family = AF_LINK; sdl.sdl_alen = alen; memcpy(&sdl.sdl_data, addr, alen); return -if_addmulti(dev, (struct sockaddr *)&sdl, NULL); } #endif /* _LINUX_NETDEVICE_H_ */ Index: stable/10/sys/ofed/include/linux/scatterlist.h =================================================================== --- stable/10/sys/ofed/include/linux/scatterlist.h (revision 273245) +++ stable/10/sys/ofed/include/linux/scatterlist.h (revision 273246) @@ -1,105 +1,332 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_SCATTERLIST_H_ #define _LINUX_SCATTERLIST_H_ #include +#include +/* + * SG table design. + * + * If flags bit 0 is set, then the sg field contains a pointer to the next sg + * table list. Otherwise the next entry is at sg + 1, can be determined using + * the sg_is_chain() function. + * + * If flags bit 1 is set, then this sg entry is the last element in a list, + * can be determined using the sg_is_last() function. + * + * See sg_next(). + * + */ + struct scatterlist { union { struct page *page; struct scatterlist *sg; } sl_un; dma_addr_t address; unsigned long offset; uint32_t length; uint32_t flags; }; struct sg_table { struct scatterlist *sgl; /* the list */ unsigned int nents; /* number of mapped entries */ unsigned int orig_nents; /* original size of list */ }; +/* + * Maximum number of entries that will be allocated in one piece, if + * a list larger than this is required then chaining will be utilized. + */ +#define SG_MAX_SINGLE_ALLOC (PAGE_SIZE / sizeof(struct scatterlist)) + #define sg_dma_address(sg) (sg)->address #define sg_dma_len(sg) (sg)->length #define sg_page(sg) (sg)->sl_un.page #define sg_scatternext(sg) (sg)->sl_un.sg #define SG_END 0x01 #define SG_CHAIN 0x02 static inline void sg_set_page(struct scatterlist *sg, struct page *page, unsigned int len, unsigned int offset) { sg_page(sg) = page; sg_dma_len(sg) = len; sg->offset = offset; if (offset > PAGE_SIZE) panic("sg_set_page: Invalid offset %d\n", offset); } static inline void sg_set_buf(struct scatterlist *sg, const void *buf, unsigned int buflen) { sg_set_page(sg, virt_to_page(buf), buflen, ((uintptr_t)buf) & ~PAGE_MASK); } static inline void sg_init_table(struct scatterlist *sg, unsigned int nents) { bzero(sg, sizeof(*sg) * nents); sg[nents - 1].flags = SG_END; } static inline struct scatterlist * sg_next(struct scatterlist *sg) { if (sg->flags & SG_END) return (NULL); sg++; if (sg->flags & SG_CHAIN) sg = sg_scatternext(sg); return (sg); } static inline vm_paddr_t sg_phys(struct scatterlist *sg) { return sg_page(sg)->phys_addr + sg->offset; +} + +/** + * sg_chain - Chain two sglists together + * @prv: First scatterlist + * @prv_nents: Number of entries in prv + * @sgl: Second scatterlist + * + * Description: + * Links @prv@ and @sgl@ together, to form a longer scatterlist. + * + **/ +static inline void +sg_chain(struct scatterlist *prv, unsigned int prv_nents, + struct scatterlist *sgl) +{ +/* + * offset and length are unused for chain entry. Clear them. + */ + struct scatterlist *sg = &prv[prv_nents - 1]; + + sg->offset = 0; + sg->length = 0; + + /* + * Indicate a link pointer, and set the link to the second list. + */ + sg->flags = SG_CHAIN; + sg->sl_un.sg = sgl; +} + +/** + * sg_mark_end - Mark the end of the scatterlist + * @sg: SG entryScatterlist + * + * Description: + * Marks the passed in sg entry as the termination point for the sg + * table. A call to sg_next() on this entry will return NULL. + * + **/ +static inline void sg_mark_end(struct scatterlist *sg) +{ + sg->flags = SG_END; +} + +/** + * __sg_free_table - Free a previously mapped sg table + * @table: The sg table header to use + * @max_ents: The maximum number of entries per single scatterlist + * + * Description: + * Free an sg table previously allocated and setup with + * __sg_alloc_table(). The @max_ents value must be identical to + * that previously used with __sg_alloc_table(). + * + **/ +static inline void +__sg_free_table(struct sg_table *table, unsigned int max_ents) +{ + struct scatterlist *sgl, *next; + + if (unlikely(!table->sgl)) + return; + + sgl = table->sgl; + while (table->orig_nents) { + unsigned int alloc_size = table->orig_nents; + unsigned int sg_size; + + /* + * If we have more than max_ents segments left, + * then assign 'next' to the sg table after the current one. + * sg_size is then one less than alloc size, since the last + * element is the chain pointer. + */ + if (alloc_size > max_ents) { + next = sgl[max_ents - 1].sl_un.sg; + alloc_size = max_ents; + sg_size = alloc_size - 1; + } else { + sg_size = alloc_size; + next = NULL; + } + + table->orig_nents -= sg_size; + kfree(sgl); + sgl = next; + } + + table->sgl = NULL; +} + +/** + * sg_free_table - Free a previously allocated sg table + * @table: The mapped sg table header + * + **/ +static inline void +sg_free_table(struct sg_table *table) +{ + __sg_free_table(table, SG_MAX_SINGLE_ALLOC); +} + +/** + * __sg_alloc_table - Allocate and initialize an sg table with given allocator + * @table: The sg table header to use + * @nents: Number of entries in sg list + * @max_ents: The maximum number of entries the allocator returns per call + * @gfp_mask: GFP allocation mask + * + * Description: + * This function returns a @table @nents long. The allocator is + * defined to return scatterlist chunks of maximum size @max_ents. + * Thus if @nents is bigger than @max_ents, the scatterlists will be + * chained in units of @max_ents. + * + * Notes: + * If this function returns non-0 (eg failure), the caller must call + * __sg_free_table() to cleanup any leftover allocations. + * + **/ +static inline int +__sg_alloc_table(struct sg_table *table, unsigned int nents, + unsigned int max_ents, gfp_t gfp_mask) +{ + struct scatterlist *sg, *prv; + unsigned int left; + + memset(table, 0, sizeof(*table)); + + if (nents == 0) + return -EINVAL; + left = nents; + prv = NULL; + do { + unsigned int sg_size, alloc_size = left; + + if (alloc_size > max_ents) { + alloc_size = max_ents; + sg_size = alloc_size - 1; + } else + sg_size = alloc_size; + + left -= sg_size; + + sg = kmalloc(alloc_size * sizeof(struct scatterlist), gfp_mask); + if (unlikely(!sg)) { + /* + * Adjust entry count to reflect that the last + * entry of the previous table won't be used for + * linkage. Without this, sg_kfree() may get + * confused. + */ + if (prv) + table->nents = ++table->orig_nents; + + return -ENOMEM; + } + + sg_init_table(sg, alloc_size); + table->nents = table->orig_nents += sg_size; + + /* + * If this is the first mapping, assign the sg table header. + * If this is not the first mapping, chain previous part. + */ + if (prv) + sg_chain(prv, max_ents, sg); + else + table->sgl = sg; + + /* + * If no more entries after this one, mark the end + */ + if (!left) + sg_mark_end(&sg[sg_size - 1]); + + prv = sg; + } while (left); + + return 0; +} + +/** + * sg_alloc_table - Allocate and initialize an sg table + * @table: The sg table header to use + * @nents: Number of entries in sg list + * @gfp_mask: GFP allocation mask + * + * Description: + * Allocate and initialize an sg table. If @nents@ is larger than + * SG_MAX_SINGLE_ALLOC a chained sg table will be setup. + * + **/ + +static inline int +sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask) +{ + int ret; + + ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC, + gfp_mask); + if (unlikely(ret)) + __sg_free_table(table, SG_MAX_SINGLE_ALLOC); + + return ret; } #define for_each_sg(sglist, sg, sgmax, _itr) \ for (_itr = 0, sg = (sglist); _itr < (sgmax); _itr++, sg = sg_next(sg)) #endif /* _LINUX_SCATTERLIST_H_ */ Index: stable/10/sys/ofed/include/linux/vmalloc.h =================================================================== --- stable/10/sys/ofed/include/linux/vmalloc.h (revision 273245) +++ stable/10/sys/ofed/include/linux/vmalloc.h (revision 273246) @@ -1,42 +1,42 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_VMALLOC_H_ #define _LINUX_VMALLOC_H_ -#include +#include #define VM_MAP 0x0000 #define PAGE_KERNEL 0x0000 void *vmap(struct page **pages, unsigned int count, unsigned long flags, int prot); void vunmap(void *addr); #endif /* _LINUX_VMALLOC_H_ */ Index: stable/10/sys/ofed/include/rdma/ib_addr.h =================================================================== --- stable/10/sys/ofed/include/rdma/ib_addr.h (revision 273245) +++ stable/10/sys/ofed/include/rdma/ib_addr.h (revision 273246) @@ -1,318 +1,317 @@ /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #if !defined(IB_ADDR_H) #define IB_ADDR_H #include #include #include #include #include #include #include -#include #include struct rdma_addr_client { atomic_t refcount; struct completion comp; }; /** * rdma_addr_register_client - Register an address client. */ void rdma_addr_register_client(struct rdma_addr_client *client); /** * rdma_addr_unregister_client - Deregister an address client. * @client: Client object to deregister. */ void rdma_addr_unregister_client(struct rdma_addr_client *client); struct rdma_dev_addr { unsigned char src_dev_addr[MAX_ADDR_LEN]; unsigned char dst_dev_addr[MAX_ADDR_LEN]; unsigned char broadcast[MAX_ADDR_LEN]; unsigned short dev_type; int bound_dev_if; enum rdma_transport_type transport; }; /** * rdma_translate_ip - Translate a local IP address to an RDMA hardware * address. */ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr); /** * rdma_resolve_ip - Resolve source and destination IP addresses to * RDMA hardware addresses. * @client: Address client associated with request. * @src_addr: An optional source address to use in the resolution. If a * source address is not provided, a usable address will be returned via * the callback. * @dst_addr: The destination address to resolve. * @addr: A reference to a data location that will receive the resolved * addresses. The data location must remain valid until the callback has * been invoked. * @timeout_ms: Amount of time to wait for the address resolution to complete. * @callback: Call invoked once address resolution has completed, timed out, * or been canceled. A status of 0 indicates success. * @context: User-specified context associated with the call. */ int rdma_resolve_ip(struct rdma_addr_client *client, struct sockaddr *src_addr, struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), void *context); void rdma_addr_cancel(struct rdma_dev_addr *addr); int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, const unsigned char *dst_dev_addr); static inline int ip_addr_size(struct sockaddr *addr) { return addr->sa_family == AF_INET6 ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in); } static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) { return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9]; } static inline void ib_addr_set_pkey(struct rdma_dev_addr *dev_addr, u16 pkey) { dev_addr->broadcast[8] = pkey >> 8; dev_addr->broadcast[9] = (unsigned char) pkey; } static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(gid, dev_addr->broadcast + 4, sizeof *gid); } static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr) { return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; } static inline void iboe_mac_vlan_to_ll(union ib_gid *gid, u8 *mac, u16 vid) { memset(gid->raw, 0, 16); *((u32 *)gid->raw) = cpu_to_be32(0xfe800000); if (vid < 0x1000) { gid->raw[12] = vid & 0xff; gid->raw[11] = vid >> 8; } else { gid->raw[12] = 0xfe; gid->raw[11] = 0xff; } memcpy(gid->raw + 13, mac + 3, 3); memcpy(gid->raw + 8, mac, 3); gid->raw[8] ^= 2; } static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev) { #ifdef __linux__ return dev->priv_flags & IFF_802_1Q_VLAN ? vlan_dev_vlan_id(dev) : 0xffff; #else uint16_t tag; if (VLAN_TAG(__DECONST(struct ifnet *, dev), &tag) != 0) return 0xffff; return tag; #endif } static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { struct net_device *dev; u16 vid = 0xffff; dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); if (dev) { vid = rdma_vlan_dev_vlan_id(dev); dev_put(dev); } iboe_mac_vlan_to_ll(gid, dev_addr->src_dev_addr, vid); } static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { if (dev_addr->transport == RDMA_TRANSPORT_IB && dev_addr->dev_type != ARPHRD_INFINIBAND) iboe_addr_get_sgid(dev_addr, gid); else memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); } static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); } static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } static inline enum ib_mtu iboe_get_mtu(int mtu) { /* * reduce IB headers from effective IBoE MTU. 28 stands for * atomic header which is the biggest possible header after BTH */ mtu = mtu - IB_GRH_BYTES - IB_BTH_BYTES - 28; if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096)) return IB_MTU_4096; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_2048)) return IB_MTU_2048; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_1024)) return IB_MTU_1024; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_512)) return IB_MTU_512; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_256)) return IB_MTU_256; else return 0; } #ifdef __linux__ static inline int iboe_get_rate(struct net_device *dev) { struct ethtool_cmd cmd; if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings || dev->ethtool_ops->get_settings(dev, &cmd)) return IB_RATE_PORT_CURRENT; if (cmd.speed >= 40000) return IB_RATE_40_GBPS; else if (cmd.speed >= 30000) return IB_RATE_30_GBPS; else if (cmd.speed >= 20000) return IB_RATE_20_GBPS; else if (cmd.speed >= 10000) return IB_RATE_10_GBPS; else return IB_RATE_PORT_CURRENT; } #else static inline int iboe_get_rate(struct net_device *dev) { uintmax_t baudrate; int exp; baudrate = dev->if_baudrate; for (exp = dev->if_baudrate_pf; exp > 0; exp--) baudrate *= 10; if (baudrate >= IF_Gbps(40)) return IB_RATE_40_GBPS; else if (baudrate >= IF_Gbps(30)) return IB_RATE_30_GBPS; else if (baudrate >= IF_Gbps(20)) return IB_RATE_20_GBPS; else if (baudrate >= IF_Gbps(10)) return IB_RATE_10_GBPS; else return IB_RATE_PORT_CURRENT; } #endif static inline int rdma_link_local_addr(struct in6_addr *addr) { if (addr->s6_addr32[0] == cpu_to_be32(0xfe800000) && addr->s6_addr32[1] == 0) return 1; return 0; } static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac) { memcpy(mac, &addr->s6_addr[8], 3); memcpy(mac + 3, &addr->s6_addr[13], 3); mac[0] ^= 2; } static inline int rdma_is_multicast_addr(struct in6_addr *addr) { return addr->s6_addr[0] == 0xff; } static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac) { int i; mac[0] = 0x33; mac[1] = 0x33; for (i = 2; i < 6; ++i) mac[i] = addr->s6_addr[i + 10]; } static inline u16 rdma_get_vlan_id(union ib_gid *dgid) { u16 vid; vid = dgid->raw[11] << 8 | dgid->raw[12]; return vid < 0x1000 ? vid : 0xffff; } static inline struct net_device *rdma_vlan_dev_real_dev(const struct net_device *dev) { #ifdef __linux__ return dev->priv_flags & IFF_802_1Q_VLAN ? vlan_dev_real_dev(dev) : 0; #else return VLAN_TRUNKDEV(__DECONST(struct ifnet *, dev)); #endif } #endif /* IB_ADDR_H */ Index: stable/10/sys/ofed/include/rdma/ib_smi.h =================================================================== --- stable/10/sys/ofed/include/rdma/ib_smi.h (revision 273245) +++ stable/10/sys/ofed/include/rdma/ib_smi.h (revision 273246) @@ -1,128 +1,129 @@ /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #if !defined(IB_SMI_H) #define IB_SMI_H #include +#include #define IB_SMP_DATA_SIZE 64 #define IB_SMP_MAX_PATH_HOPS 64 struct ib_smp { u8 base_version; u8 mgmt_class; u8 class_version; u8 method; __be16 status; u8 hop_ptr; u8 hop_cnt; __be64 tid; __be16 attr_id; __be16 resv; __be32 attr_mod; __be64 mkey; __be16 dr_slid; __be16 dr_dlid; u8 reserved[28]; u8 data[IB_SMP_DATA_SIZE]; u8 initial_path[IB_SMP_MAX_PATH_HOPS]; u8 return_path[IB_SMP_MAX_PATH_HOPS]; } __attribute__ ((packed)); #define IB_SMP_DIRECTION cpu_to_be16(0x8000) /* Subnet management attributes */ #define IB_SMP_ATTR_NOTICE cpu_to_be16(0x0002) #define IB_SMP_ATTR_NODE_DESC cpu_to_be16(0x0010) #define IB_SMP_ATTR_NODE_INFO cpu_to_be16(0x0011) #define IB_SMP_ATTR_SWITCH_INFO cpu_to_be16(0x0012) #define IB_SMP_ATTR_GUID_INFO cpu_to_be16(0x0014) #define IB_SMP_ATTR_PORT_INFO cpu_to_be16(0x0015) #define IB_SMP_ATTR_PKEY_TABLE cpu_to_be16(0x0016) #define IB_SMP_ATTR_SL_TO_VL_TABLE cpu_to_be16(0x0017) #define IB_SMP_ATTR_VL_ARB_TABLE cpu_to_be16(0x0018) #define IB_SMP_ATTR_LINEAR_FORWARD_TABLE cpu_to_be16(0x0019) #define IB_SMP_ATTR_RANDOM_FORWARD_TABLE cpu_to_be16(0x001A) #define IB_SMP_ATTR_MCAST_FORWARD_TABLE cpu_to_be16(0x001B) #define IB_SMP_ATTR_SM_INFO cpu_to_be16(0x0020) #define IB_SMP_ATTR_VENDOR_DIAG cpu_to_be16(0x0030) #define IB_SMP_ATTR_LED_INFO cpu_to_be16(0x0031) #define IB_SMP_ATTR_VENDOR_MASK cpu_to_be16(0xFF00) struct ib_port_info { __be64 mkey; __be64 gid_prefix; __be16 lid; __be16 sm_lid; __be32 cap_mask; __be16 diag_code; __be16 mkey_lease_period; u8 local_port_num; u8 link_width_enabled; u8 link_width_supported; u8 link_width_active; u8 linkspeed_portstate; /* 4 bits, 4 bits */ u8 portphysstate_linkdown; /* 4 bits, 4 bits */ u8 mkeyprot_resv_lmc; /* 2 bits, 3, 3 */ u8 linkspeedactive_enabled; /* 4 bits, 4 bits */ u8 neighbormtu_mastersmsl; /* 4 bits, 4 bits */ u8 vlcap_inittype; /* 4 bits, 4 bits */ u8 vl_high_limit; u8 vl_arb_high_cap; u8 vl_arb_low_cap; u8 inittypereply_mtucap; /* 4 bits, 4 bits */ u8 vlstallcnt_hoqlife; /* 3 bits, 5 bits */ u8 operationalvl_pei_peo_fpi_fpo; /* 4 bits, 1, 1, 1, 1 */ __be16 mkey_violations; __be16 pkey_violations; __be16 qkey_violations; u8 guid_cap; u8 clientrereg_resv_subnetto; /* 1 bit, 2 bits, 5 */ u8 resv_resptimevalue; /* 3 bits, 5 bits */ u8 localphyerrors_overrunerrors; /* 4 bits, 4 bits */ __be16 max_credit_hint; u8 resv; u8 link_roundtrip_latency[3]; }; static inline u8 ib_get_smp_direction(struct ib_smp *smp) { return ((smp->status & IB_SMP_DIRECTION) == IB_SMP_DIRECTION); } #endif /* IB_SMI_H */ Index: stable/10/sys/ofed/include/rdma/ib_user_cm.h =================================================================== --- stable/10/sys/ofed/include/rdma/ib_user_cm.h (revision 273245) +++ stable/10/sys/ofed/include/rdma/ib_user_cm.h (revision 273246) @@ -1,324 +1,325 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef IB_USER_CM_H #define IB_USER_CM_H +#include #include #define IB_USER_CM_ABI_VERSION 5 enum { IB_USER_CM_CMD_CREATE_ID, IB_USER_CM_CMD_DESTROY_ID, IB_USER_CM_CMD_ATTR_ID, IB_USER_CM_CMD_LISTEN, IB_USER_CM_CMD_NOTIFY, IB_USER_CM_CMD_SEND_REQ, IB_USER_CM_CMD_SEND_REP, IB_USER_CM_CMD_SEND_RTU, IB_USER_CM_CMD_SEND_DREQ, IB_USER_CM_CMD_SEND_DREP, IB_USER_CM_CMD_SEND_REJ, IB_USER_CM_CMD_SEND_MRA, IB_USER_CM_CMD_SEND_LAP, IB_USER_CM_CMD_SEND_APR, IB_USER_CM_CMD_SEND_SIDR_REQ, IB_USER_CM_CMD_SEND_SIDR_REP, IB_USER_CM_CMD_EVENT, IB_USER_CM_CMD_INIT_QP_ATTR, }; /* * command ABI structures. */ struct ib_ucm_cmd_hdr { __u32 cmd; __u16 in; __u16 out; }; struct ib_ucm_create_id { __u64 uid; __u64 response; }; struct ib_ucm_create_id_resp { __u32 id; }; struct ib_ucm_destroy_id { __u64 response; __u32 id; __u32 reserved; }; struct ib_ucm_destroy_id_resp { __u32 events_reported; }; struct ib_ucm_attr_id { __u64 response; __u32 id; __u32 reserved; }; struct ib_ucm_attr_id_resp { __be64 service_id; __be64 service_mask; __be32 local_id; __be32 remote_id; }; struct ib_ucm_init_qp_attr { __u64 response; __u32 id; __u32 qp_state; }; struct ib_ucm_listen { __be64 service_id; __be64 service_mask; __u32 id; __u32 reserved; }; struct ib_ucm_notify { __u32 id; __u32 event; }; struct ib_ucm_private_data { __u64 data; __u32 id; __u8 len; __u8 reserved[3]; }; struct ib_ucm_req { __u32 id; __u32 qpn; __u32 qp_type; __u32 psn; __be64 sid; __u64 data; __u64 primary_path; __u64 alternate_path; __u8 len; __u8 peer_to_peer; __u8 responder_resources; __u8 initiator_depth; __u8 remote_cm_response_timeout; __u8 flow_control; __u8 local_cm_response_timeout; __u8 retry_count; __u8 rnr_retry_count; __u8 max_cm_retries; __u8 srq; __u8 reserved[5]; }; struct ib_ucm_rep { __u64 uid; __u64 data; __u32 id; __u32 qpn; __u32 psn; __u8 len; __u8 responder_resources; __u8 initiator_depth; __u8 target_ack_delay; __u8 failover_accepted; __u8 flow_control; __u8 rnr_retry_count; __u8 srq; __u8 reserved[4]; }; struct ib_ucm_info { __u32 id; __u32 status; __u64 info; __u64 data; __u8 info_len; __u8 data_len; __u8 reserved[6]; }; struct ib_ucm_mra { __u64 data; __u32 id; __u8 len; __u8 timeout; __u8 reserved[2]; }; struct ib_ucm_lap { __u64 path; __u64 data; __u32 id; __u8 len; __u8 reserved[3]; }; struct ib_ucm_sidr_req { __u32 id; __u32 timeout; __be64 sid; __u64 data; __u64 path; __u16 reserved_pkey; __u8 len; __u8 max_cm_retries; __u8 reserved[4]; }; struct ib_ucm_sidr_rep { __u32 id; __u32 qpn; __u32 qkey; __u32 status; __u64 info; __u64 data; __u8 info_len; __u8 data_len; __u8 reserved[6]; }; /* * event notification ABI structures. */ struct ib_ucm_event_get { __u64 response; __u64 data; __u64 info; __u8 data_len; __u8 info_len; __u8 reserved[6]; }; struct ib_ucm_req_event_resp { struct ib_user_path_rec primary_path; struct ib_user_path_rec alternate_path; __be64 remote_ca_guid; __u32 remote_qkey; __u32 remote_qpn; __u32 qp_type; __u32 starting_psn; __u8 responder_resources; __u8 initiator_depth; __u8 local_cm_response_timeout; __u8 flow_control; __u8 remote_cm_response_timeout; __u8 retry_count; __u8 rnr_retry_count; __u8 srq; __u8 port; __u8 reserved[7]; }; struct ib_ucm_rep_event_resp { __be64 remote_ca_guid; __u32 remote_qkey; __u32 remote_qpn; __u32 starting_psn; __u8 responder_resources; __u8 initiator_depth; __u8 target_ack_delay; __u8 failover_accepted; __u8 flow_control; __u8 rnr_retry_count; __u8 srq; __u8 reserved[5]; }; struct ib_ucm_rej_event_resp { __u32 reason; /* ari in ib_ucm_event_get info field. */ }; struct ib_ucm_mra_event_resp { __u8 timeout; __u8 reserved[3]; }; struct ib_ucm_lap_event_resp { struct ib_user_path_rec path; }; struct ib_ucm_apr_event_resp { __u32 status; /* apr info in ib_ucm_event_get info field. */ }; struct ib_ucm_sidr_req_event_resp { __u16 pkey; __u8 port; __u8 reserved; }; struct ib_ucm_sidr_rep_event_resp { __u32 status; __u32 qkey; __u32 qpn; /* info in ib_ucm_event_get info field. */ }; #define IB_UCM_PRES_DATA 0x01 #define IB_UCM_PRES_INFO 0x02 #define IB_UCM_PRES_PRIMARY 0x04 #define IB_UCM_PRES_ALTERNATE 0x08 struct ib_ucm_event_resp { __u64 uid; __u32 id; __u32 event; __u32 present; __u32 reserved; union { struct ib_ucm_req_event_resp req_resp; struct ib_ucm_rep_event_resp rep_resp; struct ib_ucm_rej_event_resp rej_resp; struct ib_ucm_mra_event_resp mra_resp; struct ib_ucm_lap_event_resp lap_resp; struct ib_ucm_apr_event_resp apr_resp; struct ib_ucm_sidr_req_event_resp sidr_req_resp; struct ib_ucm_sidr_rep_event_resp sidr_rep_resp; __u32 send_status; } u; }; #endif /* IB_USER_CM_H */ Index: stable/10 =================================================================== --- stable/10 (revision 273245) +++ stable/10 (revision 273246) Property changes on: stable/10 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r273135