diff --git a/sys/contrib/rdma/krping/krping.c b/sys/contrib/rdma/krping/krping.c index fe8ff9ed9243..ead1bf54dca2 100644 --- a/sys/contrib/rdma/krping/krping.c +++ b/sys/contrib/rdma/krping/krping.c @@ -1,2211 +1,2211 @@ /* * Copyright (c) 2005 Ammasso, Inc. All rights reserved. * Copyright (c) 2006-2009 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "krping.h" #include "getopt.h" #define PFX "krping: " extern int krping_debug; #define DEBUG_LOG(...) do { if (krping_debug) log(LOG_INFO, __VA_ARGS__); } while (0) #define BIND_INFO 1 MODULE_AUTHOR("Steve Wise"); MODULE_DESCRIPTION("RDMA ping server"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(krping, 1); MODULE_DEPEND(krping, linuxkpi, 1, 1, 1); static __inline uint64_t get_cycles(void) { return (get_cyclecount()); } typedef uint64_t cycles_t; enum mem_type { DMA = 1, REG = 2, }; static const struct krping_option krping_opts[] = { {"count", OPT_INT, 'C'}, {"size", OPT_INT, 'S'}, {"addr", OPT_STRING, 'a'}, {"addr6", OPT_STRING, 'A'}, {"port", OPT_INT, 'p'}, {"verbose", OPT_NOPARAM, 'v'}, {"validate", OPT_NOPARAM, 'V'}, {"server", OPT_NOPARAM, 's'}, {"client", OPT_NOPARAM, 'c'}, {"server_inv", OPT_NOPARAM, 'I'}, {"wlat", OPT_NOPARAM, 'l'}, {"rlat", OPT_NOPARAM, 'L'}, {"bw", OPT_NOPARAM, 'B'}, {"duplex", OPT_NOPARAM, 'd'}, {"tos", OPT_INT, 't'}, {"txdepth", OPT_INT, 'T'}, {"poll", OPT_NOPARAM, 'P'}, {"local_dma_lkey", OPT_NOPARAM, 'Z'}, {"read_inv", OPT_NOPARAM, 'R'}, {"fr", OPT_NOPARAM, 'f'}, {NULL, 0, 0} }; #define htonll(x) cpu_to_be64((x)) #define ntohll(x) cpu_to_be64((x)) static DEFINE_MUTEX(krping_mutex); /* * List of running krping threads. */ static LIST_HEAD(krping_cbs); /* * Invoke like this, one on each side, using the server's address on * the RDMA device (iw%d): * * /bin/echo server,port=9999,addr=192.168.69.142,validate > /proc/krping * /bin/echo client,port=9999,addr=192.168.69.142,validate > /proc/krping * /bin/echo client,port=9999,addr6=2001:db8:0:f101::1,validate > /proc/krping * * krping "ping/pong" loop: * client sends source rkey/addr/len * server receives source rkey/add/len * server rdma reads "ping" data from source * server sends "go ahead" on rdma read completion * client sends sink rkey/addr/len * server receives sink rkey/addr/len * server rdma writes "pong" data to sink * server sends "go ahead" on rdma write completion * */ /* * These states are used to signal events between the completion handler * and the main client or server thread. * * Once CONNECTED, they cycle through RDMA_READ_ADV, RDMA_WRITE_ADV, * and RDMA_WRITE_COMPLETE for each ping. */ enum test_state { IDLE = 1, CONNECT_REQUEST, ADDR_RESOLVED, ROUTE_RESOLVED, CONNECTED, RDMA_READ_ADV, RDMA_READ_COMPLETE, RDMA_WRITE_ADV, RDMA_WRITE_COMPLETE, ERROR }; struct krping_rdma_info { uint64_t buf; uint32_t rkey; uint32_t size; }; /* * Default max buffer size for IO... */ #define RPING_BUFSIZE 128*1024 #define RPING_SQ_DEPTH 64 /* * Control block struct. */ struct krping_cb { int server; /* 0 iff client */ struct ib_cq *cq; struct ib_pd *pd; struct ib_qp *qp; struct ib_mr *dma_mr; struct ib_fast_reg_page_list *page_list; int page_list_len; struct ib_reg_wr reg_mr_wr; struct ib_send_wr invalidate_wr; struct ib_mr *reg_mr; int server_invalidate; int read_inv; u8 key; struct ib_recv_wr rq_wr; /* recv work request record */ struct ib_sge recv_sgl; /* recv single SGE */ struct krping_rdma_info recv_buf __aligned(16); /* malloc'd buffer */ u64 recv_dma_addr; DECLARE_PCI_UNMAP_ADDR(recv_mapping) struct ib_send_wr sq_wr; /* send work requrest record */ struct ib_sge send_sgl; struct krping_rdma_info send_buf __aligned(16); /* single send buf */ u64 send_dma_addr; DECLARE_PCI_UNMAP_ADDR(send_mapping) struct ib_rdma_wr rdma_sq_wr; /* rdma work request record */ struct ib_sge rdma_sgl; /* rdma single SGE */ char *rdma_buf; /* used as rdma sink */ u64 rdma_dma_addr; DECLARE_PCI_UNMAP_ADDR(rdma_mapping) struct ib_mr *rdma_mr; uint32_t remote_rkey; /* remote guys RKEY */ uint64_t remote_addr; /* remote guys TO */ uint32_t remote_len; /* remote guys LEN */ char *start_buf; /* rdma read src */ u64 start_dma_addr; DECLARE_PCI_UNMAP_ADDR(start_mapping) struct ib_mr *start_mr; enum test_state state; /* used for cond/signalling */ wait_queue_head_t sem; struct krping_stats stats; uint16_t port; /* dst port in NBO */ u8 addr[16] __aligned(8); /* dst addr in NBO */ char *addr_str; /* dst addr string */ uint8_t addr_type; /* ADDR_FAMILY - IPv4/V6 */ int verbose; /* verbose logging */ int count; /* ping count */ int size; /* ping data size */ int validate; /* validate ping data */ int wlat; /* run wlat test */ int rlat; /* run rlat test */ int bw; /* run bw test */ int duplex; /* run bw full duplex test */ int poll; /* poll or block for rlat test */ int txdepth; /* SQ depth */ int local_dma_lkey; /* use 0 for lkey */ int frtest; /* reg test */ int tos; /* type of service */ /* CM stuff */ struct rdma_cm_id *cm_id; /* connection on client side,*/ /* listener on server side. */ struct rdma_cm_id *child_cm_id; /* connection on server side */ struct list_head list; }; static int krping_cma_event_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { int ret; struct krping_cb *cb = cma_id->context; DEBUG_LOG("cma_event type %d cma_id %p (%s)\n", event->event, cma_id, (cma_id == cb->cm_id) ? "parent" : "child"); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: cb->state = ADDR_RESOLVED; ret = rdma_resolve_route(cma_id, 2000); if (ret) { printk(KERN_ERR PFX "rdma_resolve_route error %d\n", ret); wake_up_interruptible(&cb->sem); } break; case RDMA_CM_EVENT_ROUTE_RESOLVED: cb->state = ROUTE_RESOLVED; wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_CONNECT_REQUEST: cb->state = CONNECT_REQUEST; cb->child_cm_id = cma_id; DEBUG_LOG("child cma %p\n", cb->child_cm_id); wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_ESTABLISHED: DEBUG_LOG("ESTABLISHED\n"); if (!cb->server) { cb->state = CONNECTED; } wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR: case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_REJECTED: printk(KERN_ERR PFX "cma event %d, error %d\n", event->event, event->status); cb->state = ERROR; wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_DISCONNECTED: printk(KERN_ERR PFX "DISCONNECT EVENT...\n"); cb->state = ERROR; wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: printk(KERN_ERR PFX "cma detected device removal!!!!\n"); cb->state = ERROR; wake_up_interruptible(&cb->sem); break; default: printk(KERN_ERR PFX "oof bad type!\n"); wake_up_interruptible(&cb->sem); break; } return 0; } static int server_recv(struct krping_cb *cb, struct ib_wc *wc) { if (wc->byte_len != sizeof(cb->recv_buf)) { printk(KERN_ERR PFX "Received bogus data, size %d\n", wc->byte_len); return -1; } cb->remote_rkey = ntohl(cb->recv_buf.rkey); cb->remote_addr = ntohll(cb->recv_buf.buf); cb->remote_len = ntohl(cb->recv_buf.size); DEBUG_LOG("Received rkey %x addr %llx len %d from peer\n", cb->remote_rkey, (unsigned long long)cb->remote_addr, cb->remote_len); if (cb->state <= CONNECTED || cb->state == RDMA_WRITE_COMPLETE) cb->state = RDMA_READ_ADV; else cb->state = RDMA_WRITE_ADV; return 0; } static int client_recv(struct krping_cb *cb, struct ib_wc *wc) { if (wc->byte_len != sizeof(cb->recv_buf)) { printk(KERN_ERR PFX "Received bogus data, size %d\n", wc->byte_len); return -1; } if (cb->state == RDMA_READ_ADV) cb->state = RDMA_WRITE_ADV; else cb->state = RDMA_WRITE_COMPLETE; return 0; } static void krping_cq_event_handler(struct ib_cq *cq, void *ctx) { struct krping_cb *cb = ctx; struct ib_wc wc; const struct ib_recv_wr *bad_wr; int ret; BUG_ON(cb->cq != cq); if (cb->frtest) { printk(KERN_ERR PFX "cq completion event in frtest!\n"); return; } if (!cb->wlat && !cb->rlat && !cb->bw) ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) { if (wc.status) { if (wc.status == IB_WC_WR_FLUSH_ERR) { DEBUG_LOG("cq flushed\n"); continue; } else { printk(KERN_ERR PFX "cq completion failed with " "wr_id %jx status %d opcode %d vender_err %x\n", (uintmax_t)wc.wr_id, wc.status, wc.opcode, wc.vendor_err); goto error; } } if (cb->state == ERROR) { printk(KERN_ERR PFX "cq completion in ERROR state\n"); return; } switch (wc.opcode) { case IB_WC_SEND: DEBUG_LOG("send completion\n"); cb->stats.send_bytes += cb->send_sgl.length; cb->stats.send_msgs++; break; case IB_WC_RDMA_WRITE: DEBUG_LOG("rdma write completion\n"); cb->stats.write_bytes += cb->rdma_sq_wr.wr.sg_list->length; cb->stats.write_msgs++; cb->state = RDMA_WRITE_COMPLETE; wake_up_interruptible(&cb->sem); break; case IB_WC_RDMA_READ: DEBUG_LOG("rdma read completion\n"); cb->stats.read_bytes += cb->rdma_sq_wr.wr.sg_list->length; cb->stats.read_msgs++; cb->state = RDMA_READ_COMPLETE; wake_up_interruptible(&cb->sem); break; case IB_WC_RECV: DEBUG_LOG("recv completion\n"); cb->stats.recv_bytes += sizeof(cb->recv_buf); cb->stats.recv_msgs++; if (cb->wlat || cb->rlat || cb->bw) ret = server_recv(cb, &wc); else ret = cb->server ? server_recv(cb, &wc) : client_recv(cb, &wc); if (ret) { printk(KERN_ERR PFX "recv wc error: %d\n", ret); goto error; } ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); if (ret) { printk(KERN_ERR PFX "post recv error: %d\n", ret); goto error; } wake_up_interruptible(&cb->sem); break; default: printk(KERN_ERR PFX "%s:%d Unexpected opcode %d, Shutting down\n", __func__, __LINE__, wc.opcode); goto error; } } if (ret) { printk(KERN_ERR PFX "poll error %d\n", ret); goto error; } return; error: cb->state = ERROR; wake_up_interruptible(&cb->sem); } static int krping_accept(struct krping_cb *cb) { struct rdma_conn_param conn_param; int ret; DEBUG_LOG("accepting client connection request\n"); memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 1; conn_param.initiator_depth = 1; ret = rdma_accept(cb->child_cm_id, &conn_param); if (ret) { printk(KERN_ERR PFX "rdma_accept error: %d\n", ret); return ret; } if (!cb->wlat && !cb->rlat && !cb->bw) { wait_event_interruptible(cb->sem, cb->state >= CONNECTED); if (cb->state == ERROR) { printk(KERN_ERR PFX "wait for CONNECTED state %d\n", cb->state); return -1; } } return 0; } static void krping_setup_wr(struct krping_cb *cb) { cb->recv_sgl.addr = cb->recv_dma_addr; cb->recv_sgl.length = sizeof cb->recv_buf; cb->recv_sgl.lkey = cb->pd->local_dma_lkey; cb->rq_wr.sg_list = &cb->recv_sgl; cb->rq_wr.num_sge = 1; cb->send_sgl.addr = cb->send_dma_addr; cb->send_sgl.length = sizeof cb->send_buf; cb->send_sgl.lkey = cb->pd->local_dma_lkey; cb->sq_wr.opcode = IB_WR_SEND; cb->sq_wr.send_flags = IB_SEND_SIGNALED; cb->sq_wr.sg_list = &cb->send_sgl; cb->sq_wr.num_sge = 1; if (cb->server || cb->wlat || cb->rlat || cb->bw) { cb->rdma_sgl.addr = cb->rdma_dma_addr; cb->rdma_sq_wr.wr.send_flags = IB_SEND_SIGNALED; cb->rdma_sq_wr.wr.sg_list = &cb->rdma_sgl; cb->rdma_sq_wr.wr.num_sge = 1; } /* * A chain of 2 WRs, INVALDATE_MR + REG_MR. * both unsignaled. The client uses them to reregister * the rdma buffers with a new key each iteration. */ cb->reg_mr_wr.wr.opcode = IB_WR_REG_MR; cb->reg_mr_wr.mr = cb->reg_mr; cb->invalidate_wr.next = &cb->reg_mr_wr.wr; cb->invalidate_wr.opcode = IB_WR_LOCAL_INV; } static int krping_setup_buffers(struct krping_cb *cb) { int ret; DEBUG_LOG(PFX "krping_setup_buffers called on cb %p\n", cb); cb->recv_dma_addr = ib_dma_map_single(cb->pd->device, &cb->recv_buf, sizeof(cb->recv_buf), DMA_BIDIRECTIONAL); pci_unmap_addr_set(cb, recv_mapping, cb->recv_dma_addr); cb->send_dma_addr = ib_dma_map_single(cb->pd->device, &cb->send_buf, sizeof(cb->send_buf), DMA_BIDIRECTIONAL); pci_unmap_addr_set(cb, send_mapping, cb->send_dma_addr); cb->rdma_buf = ib_dma_alloc_coherent(cb->pd->device, cb->size, &cb->rdma_dma_addr, GFP_KERNEL); if (!cb->rdma_buf) { DEBUG_LOG(PFX "rdma_buf allocation failed\n"); ret = -ENOMEM; goto bail; } pci_unmap_addr_set(cb, rdma_mapping, cb->rdma_dma_addr); cb->page_list_len = (((cb->size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT; cb->reg_mr = ib_alloc_mr(cb->pd, IB_MR_TYPE_MEM_REG, cb->page_list_len); if (IS_ERR(cb->reg_mr)) { ret = PTR_ERR(cb->reg_mr); DEBUG_LOG(PFX "recv_buf reg_mr failed %d\n", ret); goto bail; } DEBUG_LOG(PFX "reg rkey 0x%x page_list_len %u\n", cb->reg_mr->rkey, cb->page_list_len); if (!cb->server || cb->wlat || cb->rlat || cb->bw) { cb->start_buf = ib_dma_alloc_coherent(cb->pd->device, cb->size, &cb->start_dma_addr, GFP_KERNEL); if (!cb->start_buf) { DEBUG_LOG(PFX "start_buf malloc failed\n"); ret = -ENOMEM; goto bail; } pci_unmap_addr_set(cb, start_mapping, cb->start_dma_addr); } krping_setup_wr(cb); DEBUG_LOG(PFX "allocated & registered buffers...\n"); return 0; bail: if (cb->reg_mr && !IS_ERR(cb->reg_mr)) ib_dereg_mr(cb->reg_mr); if (cb->rdma_mr && !IS_ERR(cb->rdma_mr)) ib_dereg_mr(cb->rdma_mr); if (cb->dma_mr && !IS_ERR(cb->dma_mr)) ib_dereg_mr(cb->dma_mr); if (cb->rdma_buf) { ib_dma_free_coherent(cb->pd->device, cb->size, cb->rdma_buf, cb->rdma_dma_addr); } if (cb->start_buf) { ib_dma_free_coherent(cb->pd->device, cb->size, cb->start_buf, cb->start_dma_addr); } return ret; } static void krping_free_buffers(struct krping_cb *cb) { DEBUG_LOG("krping_free_buffers called on cb %p\n", cb); if (cb->dma_mr) ib_dereg_mr(cb->dma_mr); if (cb->rdma_mr) ib_dereg_mr(cb->rdma_mr); if (cb->start_mr) ib_dereg_mr(cb->start_mr); if (cb->reg_mr) ib_dereg_mr(cb->reg_mr); dma_unmap_single(cb->pd->device->dma_device, pci_unmap_addr(cb, recv_mapping), sizeof(cb->recv_buf), DMA_BIDIRECTIONAL); dma_unmap_single(cb->pd->device->dma_device, pci_unmap_addr(cb, send_mapping), sizeof(cb->send_buf), DMA_BIDIRECTIONAL); ib_dma_free_coherent(cb->pd->device, cb->size, cb->rdma_buf, cb->rdma_dma_addr); if (cb->start_buf) { ib_dma_free_coherent(cb->pd->device, cb->size, cb->start_buf, cb->start_dma_addr); } } static int krping_create_qp(struct krping_cb *cb) { struct ib_qp_init_attr init_attr; int ret; memset(&init_attr, 0, sizeof(init_attr)); init_attr.cap.max_send_wr = cb->txdepth; init_attr.cap.max_recv_wr = 2; /* For flush_qp() */ init_attr.cap.max_send_wr++; init_attr.cap.max_recv_wr++; init_attr.cap.max_recv_sge = 1; init_attr.cap.max_send_sge = 1; init_attr.qp_type = IB_QPT_RC; init_attr.send_cq = cb->cq; init_attr.recv_cq = cb->cq; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; if (cb->server) { ret = rdma_create_qp(cb->child_cm_id, cb->pd, &init_attr); if (!ret) cb->qp = cb->child_cm_id->qp; } else { ret = rdma_create_qp(cb->cm_id, cb->pd, &init_attr); if (!ret) cb->qp = cb->cm_id->qp; } return ret; } static void krping_free_qp(struct krping_cb *cb) { ib_destroy_qp(cb->qp); ib_destroy_cq(cb->cq); ib_dealloc_pd(cb->pd); } static int krping_setup_qp(struct krping_cb *cb, struct rdma_cm_id *cm_id) { int ret; struct ib_cq_init_attr attr = {0}; cb->pd = ib_alloc_pd(cm_id->device, 0); if (IS_ERR(cb->pd)) { printk(KERN_ERR PFX "ib_alloc_pd failed\n"); return PTR_ERR(cb->pd); } DEBUG_LOG("created pd %p\n", cb->pd); strlcpy(cb->stats.name, cb->pd->device->name, sizeof(cb->stats.name)); attr.cqe = cb->txdepth * 2; attr.comp_vector = 0; cb->cq = ib_create_cq(cm_id->device, krping_cq_event_handler, NULL, cb, &attr); if (IS_ERR(cb->cq)) { printk(KERN_ERR PFX "ib_create_cq failed\n"); ret = PTR_ERR(cb->cq); goto err1; } DEBUG_LOG("created cq %p\n", cb->cq); if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest) { ret = ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); if (ret) { printk(KERN_ERR PFX "ib_create_cq failed\n"); goto err2; } } ret = krping_create_qp(cb); if (ret) { printk(KERN_ERR PFX "krping_create_qp failed: %d\n", ret); goto err2; } DEBUG_LOG("created qp %p\n", cb->qp); return 0; err2: ib_destroy_cq(cb->cq); err1: ib_dealloc_pd(cb->pd); return ret; } /* * return the (possibly rebound) rkey for the rdma buffer. * REG mode: invalidate and rebind via reg wr. * other modes: just return the mr rkey. */ static u32 krping_rdma_rkey(struct krping_cb *cb, u64 buf, int post_inv) { u32 rkey; const struct ib_send_wr *bad_wr; int ret; struct scatterlist sg = {0}; cb->invalidate_wr.ex.invalidate_rkey = cb->reg_mr->rkey; /* * Update the reg key. */ ib_update_fast_reg_key(cb->reg_mr, ++cb->key); cb->reg_mr_wr.key = cb->reg_mr->rkey; /* * Update the reg WR with new buf info. */ if (buf == (u64)cb->start_dma_addr) cb->reg_mr_wr.access = IB_ACCESS_REMOTE_READ; else cb->reg_mr_wr.access = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE; sg_dma_address(&sg) = buf; sg_dma_len(&sg) = cb->size; ret = ib_map_mr_sg(cb->reg_mr, &sg, 1, NULL, PAGE_SIZE); BUG_ON(ret <= 0 || ret > cb->page_list_len); DEBUG_LOG(PFX "post_inv = %d, reg_mr new rkey 0x%x pgsz %u len %u" " iova_start %llx\n", post_inv, cb->reg_mr_wr.key, cb->reg_mr->page_size, (unsigned)cb->reg_mr->length, (unsigned long long)cb->reg_mr->iova); if (post_inv) ret = ib_post_send(cb->qp, &cb->invalidate_wr, &bad_wr); else ret = ib_post_send(cb->qp, &cb->reg_mr_wr.wr, &bad_wr); if (ret) { printk(KERN_ERR PFX "post send error %d\n", ret); cb->state = ERROR; } rkey = cb->reg_mr->rkey; return rkey; } static void krping_format_send(struct krping_cb *cb, u64 buf) { struct krping_rdma_info *info = &cb->send_buf; u32 rkey; /* * Client side will do reg or mw bind before * advertising the rdma buffer. Server side * sends have no data. */ if (!cb->server || cb->wlat || cb->rlat || cb->bw) { rkey = krping_rdma_rkey(cb, buf, !cb->server_invalidate); info->buf = htonll(buf); info->rkey = htonl(rkey); info->size = htonl(cb->size); DEBUG_LOG("RDMA addr %llx rkey %x len %d\n", (unsigned long long)buf, rkey, cb->size); } } static void krping_test_server(struct krping_cb *cb) { const struct ib_send_wr *bad_wr; struct ib_send_wr inv; int ret; while (1) { /* Wait for client's Start STAG/TO/Len */ wait_event_interruptible(cb->sem, cb->state >= RDMA_READ_ADV); if (cb->state != RDMA_READ_ADV) { printk(KERN_ERR PFX "wait for RDMA_READ_ADV state %d\n", cb->state); break; } DEBUG_LOG("server received sink adv\n"); cb->rdma_sq_wr.rkey = cb->remote_rkey; cb->rdma_sq_wr.remote_addr = cb->remote_addr; cb->rdma_sq_wr.wr.sg_list->length = cb->remote_len; cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, !cb->read_inv); cb->rdma_sq_wr.wr.next = NULL; /* Issue RDMA Read. */ if (cb->read_inv) cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV; else { cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_READ; /* * Immediately follow the read with a * fenced LOCAL_INV. */ cb->rdma_sq_wr.wr.next = &inv; memset(&inv, 0, sizeof inv); inv.opcode = IB_WR_LOCAL_INV; inv.ex.invalidate_rkey = cb->reg_mr->rkey; inv.send_flags = IB_SEND_FENCE; } ret = ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr); if (ret) { printk(KERN_ERR PFX "post send error %d\n", ret); break; } cb->rdma_sq_wr.wr.next = NULL; DEBUG_LOG("server posted rdma read req \n"); /* Wait for read completion */ wait_event_interruptible(cb->sem, cb->state >= RDMA_READ_COMPLETE); if (cb->state != RDMA_READ_COMPLETE) { printk(KERN_ERR PFX "wait for RDMA_READ_COMPLETE state %d\n", cb->state); break; } DEBUG_LOG("server received read complete\n"); /* Display data in recv buf */ if (cb->verbose) printk(KERN_INFO PFX "server ping data: %s\n", cb->rdma_buf); /* Tell client to continue */ if (cb->server && cb->server_invalidate) { cb->sq_wr.ex.invalidate_rkey = cb->remote_rkey; cb->sq_wr.opcode = IB_WR_SEND_WITH_INV; DEBUG_LOG("send-w-inv rkey 0x%x\n", cb->remote_rkey); } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { printk(KERN_ERR PFX "post send error %d\n", ret); break; } DEBUG_LOG("server posted go ahead\n"); /* Wait for client's RDMA STAG/TO/Len */ wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV); if (cb->state != RDMA_WRITE_ADV) { printk(KERN_ERR PFX "wait for RDMA_WRITE_ADV state %d\n", cb->state); break; } DEBUG_LOG("server received sink adv\n"); /* RDMA Write echo data */ cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_WRITE; cb->rdma_sq_wr.rkey = cb->remote_rkey; cb->rdma_sq_wr.remote_addr = cb->remote_addr; cb->rdma_sq_wr.wr.sg_list->length = strlen(cb->rdma_buf) + 1; if (cb->local_dma_lkey) cb->rdma_sgl.lkey = cb->pd->local_dma_lkey; else cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 0); DEBUG_LOG("rdma write from lkey %x laddr %llx len %d\n", cb->rdma_sq_wr.wr.sg_list->lkey, (unsigned long long)cb->rdma_sq_wr.wr.sg_list->addr, cb->rdma_sq_wr.wr.sg_list->length); ret = ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr); if (ret) { printk(KERN_ERR PFX "post send error %d\n", ret); break; } /* Wait for completion */ ret = wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_COMPLETE); if (cb->state != RDMA_WRITE_COMPLETE) { printk(KERN_ERR PFX "wait for RDMA_WRITE_COMPLETE state %d\n", cb->state); break; } DEBUG_LOG("server rdma write complete \n"); cb->state = CONNECTED; /* Tell client to begin again */ if (cb->server && cb->server_invalidate) { cb->sq_wr.ex.invalidate_rkey = cb->remote_rkey; cb->sq_wr.opcode = IB_WR_SEND_WITH_INV; DEBUG_LOG("send-w-inv rkey 0x%x\n", cb->remote_rkey); } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { printk(KERN_ERR PFX "post send error %d\n", ret); break; } DEBUG_LOG("server posted go ahead\n"); } } static void rlat_test(struct krping_cb *cb) { int scnt; int iters = cb->count; struct timeval start_tv, stop_tv; int ret; struct ib_wc wc; const struct ib_send_wr *bad_wr; int ne; scnt = 0; cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_READ; cb->rdma_sq_wr.rkey = cb->remote_rkey; cb->rdma_sq_wr.remote_addr = cb->remote_addr; cb->rdma_sq_wr.wr.sg_list->length = cb->size; microtime(&start_tv); if (!cb->poll) { cb->state = RDMA_READ_ADV; ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); } while (scnt < iters) { cb->state = RDMA_READ_ADV; ret = ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr); if (ret) { printk(KERN_ERR PFX "Couldn't post send: ret=%d scnt %d\n", ret, scnt); return; } do { if (!cb->poll) { wait_event_interruptible(cb->sem, cb->state != RDMA_READ_ADV); if (cb->state == RDMA_READ_COMPLETE) { ne = 1; ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); } else { ne = -1; } } else ne = ib_poll_cq(cb->cq, 1, &wc); if (cb->state == ERROR) { printk(KERN_ERR PFX "state == ERROR...bailing scnt %d\n", scnt); return; } } while (ne == 0); if (ne < 0) { printk(KERN_ERR PFX "poll CQ failed %d\n", ne); return; } if (cb->poll && wc.status != IB_WC_SUCCESS) { printk(KERN_ERR PFX "Completion wth error at %s:\n", cb->server ? "server" : "client"); printk(KERN_ERR PFX "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); return; } ++scnt; } microtime(&stop_tv); if (stop_tv.tv_usec < start_tv.tv_usec) { stop_tv.tv_usec += 1000000; stop_tv.tv_sec -= 1; } printk(KERN_ERR PFX "delta sec %lu delta usec %lu iter %d size %d\n", (unsigned long)(stop_tv.tv_sec - start_tv.tv_sec), (unsigned long)(stop_tv.tv_usec - start_tv.tv_usec), scnt, cb->size); } static void wlat_test(struct krping_cb *cb) { int ccnt, scnt, rcnt; int iters=cb->count; volatile char *poll_buf = (char *) cb->start_buf; char *buf = (char *)cb->rdma_buf; struct timeval start_tv, stop_tv; cycles_t *post_cycles_start = NULL; cycles_t *post_cycles_stop = NULL; cycles_t *poll_cycles_start = NULL; cycles_t *poll_cycles_stop = NULL; cycles_t *last_poll_cycles_start = NULL; cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0; int i; int cycle_iters = 1000; ccnt = 0; scnt = 0; rcnt = 0; post_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!post_cycles_start) { printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); goto done; } post_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!post_cycles_stop) { printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); goto done; } poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!poll_cycles_start) { printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); goto done; } poll_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!poll_cycles_stop) { printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); goto done; } last_poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!last_poll_cycles_start) { printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); goto done; } cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_WRITE; cb->rdma_sq_wr.rkey = cb->remote_rkey; cb->rdma_sq_wr.remote_addr = cb->remote_addr; cb->rdma_sq_wr.wr.sg_list->length = cb->size; if (cycle_iters > iters) cycle_iters = iters; microtime(&start_tv); while (scnt < iters || ccnt < iters || rcnt < iters) { /* Wait till buffer changes. */ if (rcnt < iters && !(scnt < 1 && !cb->server)) { ++rcnt; while (*poll_buf != (char)rcnt) { if (cb->state == ERROR) { printk(KERN_ERR PFX "state = ERROR, bailing\n"); goto done; } } } if (scnt < iters) { const struct ib_send_wr *bad_wr; *buf = (char)scnt+1; if (scnt < cycle_iters) post_cycles_start[scnt] = get_cycles(); if (ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr)) { printk(KERN_ERR PFX "Couldn't post send: scnt=%d\n", scnt); goto done; } if (scnt < cycle_iters) post_cycles_stop[scnt] = get_cycles(); scnt++; } if (ccnt < iters) { struct ib_wc wc; int ne; if (ccnt < cycle_iters) poll_cycles_start[ccnt] = get_cycles(); do { if (ccnt < cycle_iters) last_poll_cycles_start[ccnt] = get_cycles(); ne = ib_poll_cq(cb->cq, 1, &wc); } while (ne == 0); if (ccnt < cycle_iters) poll_cycles_stop[ccnt] = get_cycles(); ++ccnt; if (ne < 0) { printk(KERN_ERR PFX "poll CQ failed %d\n", ne); goto done; } if (wc.status != IB_WC_SUCCESS) { printk(KERN_ERR PFX "Completion wth error at %s:\n", cb->server ? "server" : "client"); printk(KERN_ERR PFX "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); printk(KERN_ERR PFX "scnt=%d, rcnt=%d, ccnt=%d\n", scnt, rcnt, ccnt); goto done; } } } microtime(&stop_tv); if (stop_tv.tv_usec < start_tv.tv_usec) { stop_tv.tv_usec += 1000000; stop_tv.tv_sec -= 1; } for (i=0; i < cycle_iters; i++) { sum_post += post_cycles_stop[i] - post_cycles_start[i]; sum_poll += poll_cycles_stop[i] - poll_cycles_start[i]; sum_last_poll += poll_cycles_stop[i]-last_poll_cycles_start[i]; } printk(KERN_ERR PFX "delta sec %lu delta usec %lu iter %d size %d cycle_iters %d" " sum_post %llu sum_poll %llu sum_last_poll %llu\n", (unsigned long)(stop_tv.tv_sec - start_tv.tv_sec), (unsigned long)(stop_tv.tv_usec - start_tv.tv_usec), scnt, cb->size, cycle_iters, (unsigned long long)sum_post, (unsigned long long)sum_poll, (unsigned long long)sum_last_poll); done: kfree(post_cycles_start); kfree(post_cycles_stop); kfree(poll_cycles_start); kfree(poll_cycles_stop); kfree(last_poll_cycles_start); } static void bw_test(struct krping_cb *cb) { int ccnt, scnt; int iters=cb->count; struct timeval start_tv, stop_tv; cycles_t *post_cycles_start = NULL; cycles_t *post_cycles_stop = NULL; cycles_t *poll_cycles_start = NULL; cycles_t *poll_cycles_stop = NULL; cycles_t *last_poll_cycles_start = NULL; cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0; int i; int cycle_iters = 1000; ccnt = 0; scnt = 0; post_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!post_cycles_start) { printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); goto done; } post_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!post_cycles_stop) { printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); goto done; } poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!poll_cycles_start) { printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); goto done; } poll_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!poll_cycles_stop) { printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); goto done; } last_poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); if (!last_poll_cycles_start) { printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); goto done; } cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_WRITE; cb->rdma_sq_wr.rkey = cb->remote_rkey; cb->rdma_sq_wr.remote_addr = cb->remote_addr; cb->rdma_sq_wr.wr.sg_list->length = cb->size; if (cycle_iters > iters) cycle_iters = iters; microtime(&start_tv); while (scnt < iters || ccnt < iters) { while (scnt < iters && scnt - ccnt < cb->txdepth) { const struct ib_send_wr *bad_wr; if (scnt < cycle_iters) post_cycles_start[scnt] = get_cycles(); if (ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr)) { printk(KERN_ERR PFX "Couldn't post send: scnt=%d\n", scnt); goto done; } if (scnt < cycle_iters) post_cycles_stop[scnt] = get_cycles(); ++scnt; } if (ccnt < iters) { int ne; struct ib_wc wc; if (ccnt < cycle_iters) poll_cycles_start[ccnt] = get_cycles(); do { if (ccnt < cycle_iters) last_poll_cycles_start[ccnt] = get_cycles(); ne = ib_poll_cq(cb->cq, 1, &wc); } while (ne == 0); if (ccnt < cycle_iters) poll_cycles_stop[ccnt] = get_cycles(); ccnt += 1; if (ne < 0) { printk(KERN_ERR PFX "poll CQ failed %d\n", ne); goto done; } if (wc.status != IB_WC_SUCCESS) { printk(KERN_ERR PFX "Completion wth error at %s:\n", cb->server ? "server" : "client"); printk(KERN_ERR PFX "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); goto done; } } } microtime(&stop_tv); if (stop_tv.tv_usec < start_tv.tv_usec) { stop_tv.tv_usec += 1000000; stop_tv.tv_sec -= 1; } for (i=0; i < cycle_iters; i++) { sum_post += post_cycles_stop[i] - post_cycles_start[i]; sum_poll += poll_cycles_stop[i] - poll_cycles_start[i]; sum_last_poll += poll_cycles_stop[i]-last_poll_cycles_start[i]; } printk(KERN_ERR PFX "delta sec %lu delta usec %lu iter %d size %d cycle_iters %d" " sum_post %llu sum_poll %llu sum_last_poll %llu\n", (unsigned long)(stop_tv.tv_sec - start_tv.tv_sec), (unsigned long)(stop_tv.tv_usec - start_tv.tv_usec), scnt, cb->size, cycle_iters, (unsigned long long)sum_post, (unsigned long long)sum_poll, (unsigned long long)sum_last_poll); done: kfree(post_cycles_start); kfree(post_cycles_stop); kfree(poll_cycles_start); kfree(poll_cycles_stop); kfree(last_poll_cycles_start); } static void krping_rlat_test_server(struct krping_cb *cb) { const struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; /* Spin waiting for client's Start STAG/TO/Len */ while (cb->state < RDMA_READ_ADV) { krping_cq_event_handler(cb->cq, cb); } /* Send STAG/TO/Len to client */ krping_format_send(cb, cb->start_dma_addr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { printk(KERN_ERR PFX "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { printk(KERN_ERR PFX "poll error %d\n", ret); return; } if (wc.status) { printk(KERN_ERR PFX "send completiong error %d\n", wc.status); return; } wait_event_interruptible(cb->sem, cb->state == ERROR); } static void krping_wlat_test_server(struct krping_cb *cb) { const struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; /* Spin waiting for client's Start STAG/TO/Len */ while (cb->state < RDMA_READ_ADV) { krping_cq_event_handler(cb->cq, cb); } /* Send STAG/TO/Len to client */ krping_format_send(cb, cb->start_dma_addr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { printk(KERN_ERR PFX "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { printk(KERN_ERR PFX "poll error %d\n", ret); return; } if (wc.status) { printk(KERN_ERR PFX "send completiong error %d\n", wc.status); return; } wlat_test(cb); wait_event_interruptible(cb->sem, cb->state == ERROR); } static void krping_bw_test_server(struct krping_cb *cb) { const struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; /* Spin waiting for client's Start STAG/TO/Len */ while (cb->state < RDMA_READ_ADV) { krping_cq_event_handler(cb->cq, cb); } /* Send STAG/TO/Len to client */ krping_format_send(cb, cb->start_dma_addr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { printk(KERN_ERR PFX "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { printk(KERN_ERR PFX "poll error %d\n", ret); return; } if (wc.status) { printk(KERN_ERR PFX "send completiong error %d\n", wc.status); return; } if (cb->duplex) bw_test(cb); wait_event_interruptible(cb->sem, cb->state == ERROR); } static int reg_supported(struct ib_device *dev) { u64 needed_flags = IB_DEVICE_MEM_MGT_EXTENSIONS; if ((dev->attrs.device_cap_flags & needed_flags) != needed_flags) { printk(KERN_ERR PFX "Fastreg not supported - device_cap_flags 0x%llx\n", (unsigned long long)dev->attrs.device_cap_flags); return 0; } DEBUG_LOG("Fastreg supported - device_cap_flags 0x%llx\n", (unsigned long long)dev->attrs.device_cap_flags); return 1; } static void fill_sockaddr(struct sockaddr_storage *sin, struct krping_cb *cb) { memset(sin, 0, sizeof(*sin)); if (cb->addr_type == AF_INET) { struct sockaddr_in *sin4 = (struct sockaddr_in *)sin; sin4->sin_len = sizeof(*sin4); sin4->sin_family = AF_INET; memcpy((void *)&sin4->sin_addr.s_addr, cb->addr, 4); sin4->sin_port = cb->port; } else if (cb->addr_type == AF_INET6) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sin; sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; memcpy((void *)&sin6->sin6_addr, cb->addr, 16); sin6->sin6_port = cb->port; } } static int krping_bind_server(struct krping_cb *cb) { struct sockaddr_storage sin; int ret; fill_sockaddr(&sin, cb); ret = rdma_bind_addr(cb->cm_id, (struct sockaddr *)&sin); if (ret) { printk(KERN_ERR PFX "rdma_bind_addr error %d\n", ret); return ret; } DEBUG_LOG("rdma_bind_addr successful\n"); DEBUG_LOG("rdma_listen\n"); ret = rdma_listen(cb->cm_id, 3); if (ret) { printk(KERN_ERR PFX "rdma_listen failed: %d\n", ret); return ret; } wait_event_interruptible(cb->sem, cb->state >= CONNECT_REQUEST); if (cb->state != CONNECT_REQUEST) { printk(KERN_ERR PFX "wait for CONNECT_REQUEST state %d\n", cb->state); return -1; } if (!reg_supported(cb->child_cm_id->device)) return -EINVAL; return 0; } static void krping_run_server(struct krping_cb *cb) { const struct ib_recv_wr *bad_wr; int ret; ret = krping_bind_server(cb); if (ret) return; ret = krping_setup_qp(cb, cb->child_cm_id); if (ret) { printk(KERN_ERR PFX "setup_qp failed: %d\n", ret); goto err0; } ret = krping_setup_buffers(cb); if (ret) { printk(KERN_ERR PFX "krping_setup_buffers failed: %d\n", ret); goto err1; } ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); if (ret) { printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret); goto err2; } ret = krping_accept(cb); if (ret) { printk(KERN_ERR PFX "connect error %d\n", ret); goto err2; } if (cb->wlat) krping_wlat_test_server(cb); else if (cb->rlat) krping_rlat_test_server(cb); else if (cb->bw) krping_bw_test_server(cb); else krping_test_server(cb); rdma_disconnect(cb->child_cm_id); err2: krping_free_buffers(cb); err1: krping_free_qp(cb); err0: rdma_destroy_id(cb->child_cm_id); } static void krping_test_client(struct krping_cb *cb) { int ping, start, cc, i, ret; const struct ib_send_wr *bad_wr; unsigned char c; start = 65; for (ping = 0; !cb->count || ping < cb->count; ping++) { cb->state = RDMA_READ_ADV; /* Put some ascii text in the buffer. */ cc = sprintf(cb->start_buf, "rdma-ping-%d: ", ping); for (i = cc, c = start; i < cb->size; i++) { cb->start_buf[i] = c; c++; if (c > 122) c = 65; } start++; if (start > 122) start = 65; cb->start_buf[cb->size - 1] = 0; krping_format_send(cb, cb->start_dma_addr); if (cb->state == ERROR) { printk(KERN_ERR PFX "krping_format_send failed\n"); break; } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { printk(KERN_ERR PFX "post send error %d\n", ret); break; } /* Wait for server to ACK */ wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV); if (cb->state != RDMA_WRITE_ADV) { printk(KERN_ERR PFX "wait for RDMA_WRITE_ADV state %d\n", cb->state); break; } krping_format_send(cb, cb->rdma_dma_addr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { printk(KERN_ERR PFX "post send error %d\n", ret); break; } /* Wait for the server to say the RDMA Write is complete. */ wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_COMPLETE); if (cb->state != RDMA_WRITE_COMPLETE) { printk(KERN_ERR PFX "wait for RDMA_WRITE_COMPLETE state %d\n", cb->state); break; } if (cb->validate) if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) { printk(KERN_ERR PFX "data mismatch!\n"); break; } if (cb->verbose) printk(KERN_INFO PFX "ping data: %s\n", cb->rdma_buf); #ifdef SLOW_KRPING wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ); #endif } } static void krping_rlat_test_client(struct krping_cb *cb) { const struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; cb->state = RDMA_READ_ADV; /* Send STAG/TO/Len to client */ krping_format_send(cb, cb->start_dma_addr); if (cb->state == ERROR) { printk(KERN_ERR PFX "krping_format_send failed\n"); return; } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { printk(KERN_ERR PFX "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { printk(KERN_ERR PFX "poll error %d\n", ret); return; } if (wc.status) { printk(KERN_ERR PFX "send completion error %d\n", wc.status); return; } /* Spin waiting for server's Start STAG/TO/Len */ while (cb->state < RDMA_WRITE_ADV) { krping_cq_event_handler(cb->cq, cb); } #if 0 { int i; struct timeval start, stop; time_t sec; suseconds_t usec; unsigned long long elapsed; struct ib_wc wc; const struct ib_send_wr *bad_wr; int ne; cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_WRITE; cb->rdma_sq_wr.rkey = cb->remote_rkey; cb->rdma_sq_wr.remote_addr = cb->remote_addr; cb->rdma_sq_wr.wr.sg_list->length = 0; cb->rdma_sq_wr.wr.num_sge = 0; microtime(&start); for (i=0; i < 100000; i++) { if (ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr)) { printk(KERN_ERR PFX "Couldn't post send\n"); return; } do { ne = ib_poll_cq(cb->cq, 1, &wc); } while (ne == 0); if (ne < 0) { printk(KERN_ERR PFX "poll CQ failed %d\n", ne); return; } if (wc.status != IB_WC_SUCCESS) { printk(KERN_ERR PFX "Completion wth error at %s:\n", cb->server ? "server" : "client"); printk(KERN_ERR PFX "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); return; } } microtime(&stop); if (stop.tv_usec < start.tv_usec) { stop.tv_usec += 1000000; stop.tv_sec -= 1; } sec = stop.tv_sec - start.tv_sec; usec = stop.tv_usec - start.tv_usec; elapsed = sec * 1000000 + usec; printk(KERN_ERR PFX "0B-write-lat iters 100000 usec %llu\n", elapsed); } #endif rlat_test(cb); } static void krping_wlat_test_client(struct krping_cb *cb) { const struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; cb->state = RDMA_READ_ADV; /* Send STAG/TO/Len to client */ krping_format_send(cb, cb->start_dma_addr); if (cb->state == ERROR) { printk(KERN_ERR PFX "krping_format_send failed\n"); return; } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { printk(KERN_ERR PFX "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { printk(KERN_ERR PFX "poll error %d\n", ret); return; } if (wc.status) { printk(KERN_ERR PFX "send completion error %d\n", wc.status); return; } /* Spin waiting for server's Start STAG/TO/Len */ while (cb->state < RDMA_WRITE_ADV) { krping_cq_event_handler(cb->cq, cb); } wlat_test(cb); } static void krping_bw_test_client(struct krping_cb *cb) { const struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; cb->state = RDMA_READ_ADV; /* Send STAG/TO/Len to client */ krping_format_send(cb, cb->start_dma_addr); if (cb->state == ERROR) { printk(KERN_ERR PFX "krping_format_send failed\n"); return; } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { printk(KERN_ERR PFX "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { printk(KERN_ERR PFX "poll error %d\n", ret); return; } if (wc.status) { printk(KERN_ERR PFX "send completion error %d\n", wc.status); return; } /* Spin waiting for server's Start STAG/TO/Len */ while (cb->state < RDMA_WRITE_ADV) { krping_cq_event_handler(cb->cq, cb); } bw_test(cb); } /* * Manual qp flush test */ static void flush_qp(struct krping_cb *cb) { struct ib_send_wr wr = { 0 }; const struct ib_send_wr *bad; struct ib_recv_wr recv_wr = { 0 }; const struct ib_recv_wr *recv_bad; struct ib_wc wc; int ret; int flushed = 0; int ccnt = 0; rdma_disconnect(cb->cm_id); DEBUG_LOG("disconnected!\n"); wr.opcode = IB_WR_SEND; wr.wr_id = 0xdeadbeefcafebabe; ret = ib_post_send(cb->qp, &wr, &bad); if (ret) { printk(KERN_ERR PFX "%s post_send failed ret %d\n", __func__, ret); return; } recv_wr.wr_id = 0xcafebabedeadbeef; ret = ib_post_recv(cb->qp, &recv_wr, &recv_bad); if (ret) { printk(KERN_ERR PFX "%s post_recv failed ret %d\n", __func__, ret); return; } /* poll until the flush WRs complete */ do { ret = ib_poll_cq(cb->cq, 1, &wc); if (ret < 0) { printk(KERN_ERR PFX "ib_poll_cq failed %d\n", ret); return; } if (ret == 0) continue; ccnt++; if (wc.wr_id == 0xdeadbeefcafebabe || wc.wr_id == 0xcafebabedeadbeef) flushed++; } while (flushed != 2); DEBUG_LOG("qp_flushed! ccnt %u\n", ccnt); } static void krping_fr_test(struct krping_cb *cb) { struct ib_send_wr inv; const struct ib_send_wr *bad; struct ib_reg_wr fr; struct ib_wc wc; u8 key = 0; struct ib_mr *mr; int ret; int size = cb->size; int plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT; unsigned long start; int count = 0; int scnt = 0; struct scatterlist sg = {0}; mr = ib_alloc_mr(cb->pd, IB_MR_TYPE_MEM_REG, plen); if (IS_ERR(mr)) { printk(KERN_ERR PFX "ib_alloc_mr failed %ld\n", PTR_ERR(mr)); return; } sg_dma_address(&sg) = (dma_addr_t)0xcafebabe0000ULL; sg_dma_len(&sg) = size; ret = ib_map_mr_sg(mr, &sg, 1, NULL, PAGE_SIZE); if (ret <= 0) { printk(KERN_ERR PFX "ib_map_mr_sge err %d\n", ret); goto err2; } memset(&fr, 0, sizeof fr); fr.wr.opcode = IB_WR_REG_MR; fr.access = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE; fr.mr = mr; fr.wr.next = &inv; memset(&inv, 0, sizeof inv); inv.opcode = IB_WR_LOCAL_INV; inv.send_flags = IB_SEND_SIGNALED; DEBUG_LOG("fr_test: stag index 0x%x plen %u size %u depth %u\n", mr->rkey >> 8, plen, cb->size, cb->txdepth); start = time_uptime; while (!cb->count || count <= cb->count) { if (SIGPENDING(curthread)) { printk(KERN_ERR PFX "signal!\n"); break; } if ((time_uptime - start) >= 9) { DEBUG_LOG("fr_test: pausing 1 second! count %u latest size %u plen %u\n", count, size, plen); wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ); if (cb->state == ERROR) break; start = time_uptime; } while (scnt < (cb->txdepth>>1)) { ib_update_fast_reg_key(mr, ++key); fr.key = mr->rkey; inv.ex.invalidate_rkey = mr->rkey; size = arc4random() % cb->size; if (size == 0) size = cb->size; sg_dma_len(&sg) = size; ret = ib_map_mr_sg(mr, &sg, 1, NULL, PAGE_SIZE); if (ret <= 0) { printk(KERN_ERR PFX "ib_map_mr_sge err %d\n", ret); goto err2; } ret = ib_post_send(cb->qp, &fr.wr, &bad); if (ret) { printk(KERN_ERR PFX "ib_post_send failed %d\n", ret); goto err2; } scnt++; } ret = ib_poll_cq(cb->cq, 1, &wc); if (ret < 0) { printk(KERN_ERR PFX "ib_poll_cq failed %d\n", ret); goto err2; } if (ret == 1) { if (wc.status) { printk(KERN_ERR PFX "completion error %u\n", wc.status); goto err2; } count++; scnt--; } } err2: flush_qp(cb); DEBUG_LOG("fr_test: done!\n"); ib_dereg_mr(mr); } static int krping_connect_client(struct krping_cb *cb) { struct rdma_conn_param conn_param; int ret; memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 1; conn_param.initiator_depth = 1; conn_param.retry_count = 10; ret = rdma_connect(cb->cm_id, &conn_param); if (ret) { printk(KERN_ERR PFX "rdma_connect error %d\n", ret); return ret; } wait_event_interruptible(cb->sem, cb->state >= CONNECTED); if (cb->state == ERROR) { printk(KERN_ERR PFX "wait for CONNECTED state %d\n", cb->state); return -1; } DEBUG_LOG("rdma_connect successful\n"); return 0; } static int krping_bind_client(struct krping_cb *cb) { struct sockaddr_storage sin; int ret; fill_sockaddr(&sin, cb); ret = rdma_resolve_addr(cb->cm_id, NULL, (struct sockaddr *)&sin, 2000); if (ret) { printk(KERN_ERR PFX "rdma_resolve_addr error %d\n", ret); return ret; } wait_event_interruptible(cb->sem, cb->state >= ROUTE_RESOLVED); if (cb->state != ROUTE_RESOLVED) { printk(KERN_ERR PFX "addr/route resolution did not resolve: state %d\n", cb->state); return -EINTR; } if (!reg_supported(cb->cm_id->device)) return -EINVAL; DEBUG_LOG("rdma_resolve_addr - rdma_resolve_route successful\n"); return 0; } static void krping_run_client(struct krping_cb *cb) { const struct ib_recv_wr *bad_wr; int ret; /* set type of service, if any */ if (cb->tos != 0) rdma_set_service_type(cb->cm_id, cb->tos); ret = krping_bind_client(cb); if (ret) return; ret = krping_setup_qp(cb, cb->cm_id); if (ret) { printk(KERN_ERR PFX "setup_qp failed: %d\n", ret); return; } ret = krping_setup_buffers(cb); if (ret) { printk(KERN_ERR PFX "krping_setup_buffers failed: %d\n", ret); goto err1; } ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); if (ret) { printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret); goto err2; } ret = krping_connect_client(cb); if (ret) { printk(KERN_ERR PFX "connect error %d\n", ret); goto err2; } if (cb->wlat) krping_wlat_test_client(cb); else if (cb->rlat) krping_rlat_test_client(cb); else if (cb->bw) krping_bw_test_client(cb); else if (cb->frtest) krping_fr_test(cb); else krping_test_client(cb); rdma_disconnect(cb->cm_id); err2: krping_free_buffers(cb); err1: krping_free_qp(cb); } static uint16_t krping_get_ipv6_scope_id(char *name) { struct ifnet *ifp; uint16_t retval; if (name == NULL) return (0); CURVNET_SET_QUIET(TD_TO_VNET(curthread)); ifp = ifunit_ref(name); CURVNET_RESTORE(); if (ifp == NULL) return (0); - retval = ifp->if_index; + retval = if_getindex(ifp); if_rele(ifp); return (retval); } int krping_doit(char *cmd) { struct krping_cb *cb; int op; int ret = 0; char *optarg; char *scope; unsigned long optint; cb = kzalloc(sizeof(*cb), GFP_KERNEL); if (!cb) return -ENOMEM; mutex_lock(&krping_mutex); list_add_tail(&cb->list, &krping_cbs); mutex_unlock(&krping_mutex); cb->server = -1; cb->state = IDLE; cb->size = 64; cb->txdepth = RPING_SQ_DEPTH; init_waitqueue_head(&cb->sem); while ((op = krping_getopt("krping", &cmd, krping_opts, NULL, &optarg, &optint)) != 0) { switch (op) { case 'a': cb->addr_str = optarg; cb->addr_type = AF_INET; DEBUG_LOG("ipaddr (%s)\n", optarg); if (inet_pton(AF_INET, optarg, cb->addr) != 1) { printk(KERN_ERR PFX "bad addr string %s\n", optarg); ret = EINVAL; } break; case 'A': cb->addr_str = optarg; cb->addr_type = AF_INET6; DEBUG_LOG("ipv6addr (%s)\n", optarg); scope = strstr(optarg, "%"); /* extract scope ID, if any */ if (scope != NULL) *scope++ = 0; /* extract IPv6 network address */ if (inet_pton(AF_INET6, optarg, cb->addr) != 1) { printk(KERN_ERR PFX "bad addr string %s\n", optarg); ret = EINVAL; } else if (IN6_IS_SCOPE_LINKLOCAL((struct in6_addr *)cb->addr) || IN6_IS_ADDR_MC_INTFACELOCAL((struct in6_addr *)cb->addr)) { uint16_t scope_id = krping_get_ipv6_scope_id(scope); DEBUG_LOG("ipv6 scope ID = %d\n", scope_id); cb->addr[2] = scope_id >> 8; cb->addr[3] = scope_id & 0xFF; } break; case 'p': cb->port = htons(optint); DEBUG_LOG("port %d\n", (int)optint); break; case 'P': cb->poll = 1; DEBUG_LOG("server\n"); break; case 's': cb->server = 1; DEBUG_LOG("server\n"); break; case 'c': cb->server = 0; DEBUG_LOG("client\n"); break; case 'S': cb->size = optint; if ((cb->size < 1) || (cb->size > RPING_BUFSIZE)) { printk(KERN_ERR PFX "Invalid size %d " "(valid range is 1 to %d)\n", cb->size, RPING_BUFSIZE); ret = EINVAL; } else DEBUG_LOG("size %d\n", (int)optint); break; case 'C': cb->count = optint; if (cb->count < 0) { printk(KERN_ERR PFX "Invalid count %d\n", cb->count); ret = EINVAL; } else DEBUG_LOG("count %d\n", (int) cb->count); break; case 'v': cb->verbose++; DEBUG_LOG("verbose\n"); break; case 'V': cb->validate++; DEBUG_LOG("validate data\n"); break; case 'l': cb->wlat++; break; case 'L': cb->rlat++; break; case 'B': cb->bw++; break; case 'd': cb->duplex++; break; case 'I': cb->server_invalidate = 1; break; case 't': cb->tos = optint; DEBUG_LOG("type of service, tos=%d\n", (int) cb->tos); break; case 'T': cb->txdepth = optint; DEBUG_LOG("txdepth %d\n", (int) cb->txdepth); break; case 'Z': cb->local_dma_lkey = 1; DEBUG_LOG("using local dma lkey\n"); break; case 'R': cb->read_inv = 1; DEBUG_LOG("using read-with-inv\n"); break; case 'f': cb->frtest = 1; DEBUG_LOG("fast-reg test!\n"); break; default: printk(KERN_ERR PFX "unknown opt %s\n", optarg); ret = -EINVAL; break; } } if (ret) goto out; if (cb->server == -1) { printk(KERN_ERR PFX "must be either client or server\n"); ret = -EINVAL; goto out; } if (cb->server && cb->frtest) { printk(KERN_ERR PFX "must be client to run frtest\n"); ret = -EINVAL; goto out; } if ((cb->frtest + cb->bw + cb->rlat + cb->wlat) > 1) { printk(KERN_ERR PFX "Pick only one test: fr, bw, rlat, wlat\n"); ret = -EINVAL; goto out; } if (cb->wlat || cb->rlat || cb->bw) { printk(KERN_ERR PFX "wlat, rlat, and bw tests only support mem_mode MR - which is no longer supported\n"); ret = -EINVAL; goto out; } cb->cm_id = rdma_create_id(TD_TO_VNET(curthread), krping_cma_event_handler, cb, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cb->cm_id)) { ret = PTR_ERR(cb->cm_id); printk(KERN_ERR PFX "rdma_create_id error %d\n", ret); goto out; } DEBUG_LOG("created cm_id %p\n", cb->cm_id); if (cb->server) krping_run_server(cb); else krping_run_client(cb); DEBUG_LOG("destroy cm_id %p\n", cb->cm_id); rdma_destroy_id(cb->cm_id); out: mutex_lock(&krping_mutex); list_del(&cb->list); mutex_unlock(&krping_mutex); kfree(cb); return ret; } void krping_walk_cb_list(void (*f)(struct krping_stats *, void *), void *arg) { struct krping_cb *cb; mutex_lock(&krping_mutex); list_for_each_entry(cb, &krping_cbs, list) (*f)(cb->pd ? &cb->stats : NULL, arg); mutex_unlock(&krping_mutex); } void krping_cancel_all(void) { struct krping_cb *cb; mutex_lock(&krping_mutex); list_for_each_entry(cb, &krping_cbs, list) { cb->state = ERROR; wake_up_interruptible(&cb->sem); } mutex_unlock(&krping_mutex); } diff --git a/sys/dev/irdma/fbsd_kcompat.c b/sys/dev/irdma/fbsd_kcompat.c index 7a17b7e5f0f0..001b73342bc9 100644 --- a/sys/dev/irdma/fbsd_kcompat.c +++ b/sys/dev/irdma/fbsd_kcompat.c @@ -1,791 +1,798 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * * Copyright (c) 2021 - 2022 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /*$FreeBSD$*/ #include "osdep.h" #include "ice_rdma.h" #include "irdma_di_if.h" #include "irdma_main.h" #include #include #include #include #include /* additional QP debuging option. Keep false unless needed */ bool irdma_upload_context = false; inline u32 irdma_rd32(struct irdma_dev_ctx *dev_ctx, u32 reg){ KASSERT(reg < dev_ctx->mem_bus_space_size, ("irdma: register offset %#jx too large (max is %#jx)", (uintmax_t)reg, (uintmax_t)dev_ctx->mem_bus_space_size)); return (bus_space_read_4(dev_ctx->mem_bus_space_tag, dev_ctx->mem_bus_space_handle, reg)); } inline void irdma_wr32(struct irdma_dev_ctx *dev_ctx, u32 reg, u32 value) { KASSERT(reg < dev_ctx->mem_bus_space_size, ("irdma: register offset %#jx too large (max is %#jx)", (uintmax_t)reg, (uintmax_t)dev_ctx->mem_bus_space_size)); bus_space_write_4(dev_ctx->mem_bus_space_tag, dev_ctx->mem_bus_space_handle, reg, value); } inline u64 irdma_rd64(struct irdma_dev_ctx *dev_ctx, u32 reg){ KASSERT(reg < dev_ctx->mem_bus_space_size, ("irdma: register offset %#jx too large (max is %#jx)", (uintmax_t)reg, (uintmax_t)dev_ctx->mem_bus_space_size)); return (bus_space_read_8(dev_ctx->mem_bus_space_tag, dev_ctx->mem_bus_space_handle, reg)); } inline void irdma_wr64(struct irdma_dev_ctx *dev_ctx, u32 reg, u64 value) { KASSERT(reg < dev_ctx->mem_bus_space_size, ("irdma: register offset %#jx too large (max is %#jx)", (uintmax_t)reg, (uintmax_t)dev_ctx->mem_bus_space_size)); bus_space_write_8(dev_ctx->mem_bus_space_tag, dev_ctx->mem_bus_space_handle, reg, value); } void irdma_request_reset(struct irdma_pci_f *rf) { struct ice_rdma_peer *peer = rf->peer_info; struct ice_rdma_request req = {0}; req.type = ICE_RDMA_EVENT_RESET; printf("%s:%d requesting pf-reset\n", __func__, __LINE__); IRDMA_DI_REQ_HANDLER(peer, &req); } int irdma_register_qset(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node) { struct irdma_device *iwdev = vsi->back_vsi; struct ice_rdma_peer *peer = iwdev->rf->peer_info; struct ice_rdma_request req = {0}; struct ice_rdma_qset_update *res = &req.res; req.type = ICE_RDMA_EVENT_QSET_REGISTER; res->cnt_req = 1; res->res_type = ICE_RDMA_QSET_ALLOC; res->qsets.qs_handle = tc_node->qs_handle; res->qsets.tc = tc_node->traffic_class; res->qsets.vsi_id = vsi->vsi_idx; IRDMA_DI_REQ_HANDLER(peer, &req); tc_node->l2_sched_node_id = res->qsets.teid; vsi->qos[tc_node->user_pri].l2_sched_node_id = res->qsets.teid; return 0; } void irdma_unregister_qset(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node) { struct irdma_device *iwdev = vsi->back_vsi; struct ice_rdma_peer *peer = iwdev->rf->peer_info; struct ice_rdma_request req = {0}; struct ice_rdma_qset_update *res = &req.res; req.type = ICE_RDMA_EVENT_QSET_REGISTER; res->res_allocated = 1; res->res_type = ICE_RDMA_QSET_FREE; res->qsets.vsi_id = vsi->vsi_idx; res->qsets.teid = tc_node->l2_sched_node_id; res->qsets.qs_handle = tc_node->qs_handle; IRDMA_DI_REQ_HANDLER(peer, &req); } void * hw_to_dev(struct irdma_hw *hw) { struct irdma_pci_f *rf; rf = container_of(hw, struct irdma_pci_f, hw); return rf->pcidev; } void irdma_free_hash_desc(void *desc) { return; } int irdma_init_hash_desc(void **desc) { return 0; } int irdma_ieq_check_mpacrc(void *desc, void *addr, u32 len, u32 val) { u32 crc = calculate_crc32c(0xffffffff, addr, len) ^ 0xffffffff; int ret_code = 0; if (crc != val) { irdma_pr_err("mpa crc check fail %x %x\n", crc, val); ret_code = -EINVAL; } printf("%s: result crc=%x value=%x\n", __func__, crc, val); return ret_code; } +static u_int +irdma_add_ipv6_cb(void *arg, struct ifaddr *addr, u_int count __unused) +{ + struct irdma_device *iwdev = arg; + struct sockaddr_in6 *sin6; + u32 local_ipaddr6[4] = {}; + char ip6buf[INET6_ADDRSTRLEN]; + u8 *mac_addr; + + sin6 = (struct sockaddr_in6 *)addr->ifa_addr; + + irdma_copy_ip_ntohl(local_ipaddr6, (u32 *)&sin6->sin6_addr); + + mac_addr = if_getlladdr(addr->ifa_ifp); + + printf("%s:%d IP=%s, MAC=%02x:%02x:%02x:%02x:%02x:%02x\n", + __func__, __LINE__, + ip6_sprintf(ip6buf, &sin6->sin6_addr), + mac_addr[0], mac_addr[1], mac_addr[2], + mac_addr[3], mac_addr[4], mac_addr[5]); + + irdma_manage_arp_cache(iwdev->rf, mac_addr, local_ipaddr6, + IRDMA_ARP_ADD); + return (0); +} + /** * irdma_add_ipv6_addr - add ipv6 address to the hw arp table * @iwdev: irdma device * @ifp: interface network device pointer */ static void irdma_add_ipv6_addr(struct irdma_device *iwdev, struct ifnet *ifp) { - struct ifaddr *ifa, *tmp; - struct sockaddr_in6 *sin6; - u32 local_ipaddr6[4]; - u8 *mac_addr; - char ip6buf[INET6_ADDRSTRLEN]; - if_addr_rlock(ifp); - IRDMA_TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, tmp) { - sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; - if (sin6->sin6_family != AF_INET6) - continue; + if_foreach_addr_type(ifp, AF_INET6, irdma_add_ipv6_cb, iwdev); + if_addr_runlock(ifp); +} - irdma_copy_ip_ntohl(local_ipaddr6, (u32 *)&sin6->sin6_addr); - mac_addr = IF_LLADDR(ifp); +static u_int +irdma_add_ipv4_cb(void *arg, struct ifaddr *addr, u_int count __unused) +{ + struct irdma_device *iwdev = arg; + struct sockaddr_in *sin; + u32 ip_addr[4] = {}; + uint8_t *mac_addr; - printf("%s:%d IP=%s, MAC=%02x:%02x:%02x:%02x:%02x:%02x\n", - __func__, __LINE__, - ip6_sprintf(ip6buf, &sin6->sin6_addr), - mac_addr[0], mac_addr[1], mac_addr[2], - mac_addr[3], mac_addr[4], mac_addr[5]); + sin = (struct sockaddr_in *)addr->ifa_addr; - irdma_manage_arp_cache(iwdev->rf, mac_addr, local_ipaddr6, - IRDMA_ARP_ADD); + ip_addr[0] = ntohl(sin->sin_addr.s_addr); - } - if_addr_runlock(ifp); + mac_addr = if_getlladdr(addr->ifa_ifp); + + printf("%s:%d IP=%d.%d.%d.%d, MAC=%02x:%02x:%02x:%02x:%02x:%02x\n", + __func__, __LINE__, + ip_addr[0] >> 24, + (ip_addr[0] >> 16) & 0xFF, + (ip_addr[0] >> 8) & 0xFF, + ip_addr[0] & 0xFF, + mac_addr[0], mac_addr[1], mac_addr[2], + mac_addr[3], mac_addr[4], mac_addr[5]); + + irdma_manage_arp_cache(iwdev->rf, mac_addr, ip_addr, + IRDMA_ARP_ADD); + return (0); } /** * irdma_add_ipv4_addr - add ipv4 address to the hw arp table * @iwdev: irdma device * @ifp: interface network device pointer */ static void irdma_add_ipv4_addr(struct irdma_device *iwdev, struct ifnet *ifp) { - struct ifaddr *ifa; - struct sockaddr_in *sin; - u32 ip_addr[4] = {}; - u8 *mac_addr; - if_addr_rlock(ifp); - IRDMA_TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - sin = (struct sockaddr_in *)ifa->ifa_addr; - if (sin->sin_family != AF_INET) - continue; - - ip_addr[0] = ntohl(sin->sin_addr.s_addr); - mac_addr = IF_LLADDR(ifp); - - printf("%s:%d IP=%d.%d.%d.%d, MAC=%02x:%02x:%02x:%02x:%02x:%02x\n", - __func__, __LINE__, - ip_addr[0] >> 24, - (ip_addr[0] >> 16) & 0xFF, - (ip_addr[0] >> 8) & 0xFF, - ip_addr[0] & 0xFF, - mac_addr[0], mac_addr[1], mac_addr[2], - mac_addr[3], mac_addr[4], mac_addr[5]); - - irdma_manage_arp_cache(iwdev->rf, mac_addr, ip_addr, - IRDMA_ARP_ADD); - } + if_foreach_addr_type(ifp, AF_INET, irdma_add_ipv4_cb, iwdev); if_addr_runlock(ifp); } /** * irdma_add_ip - add ip addresses * @iwdev: irdma device * * Add ipv4/ipv6 addresses to the arp cache */ void irdma_add_ip(struct irdma_device *iwdev) { struct ifnet *ifp = iwdev->netdev; struct ifnet *ifv; int i; irdma_add_ipv4_addr(iwdev, ifp); irdma_add_ipv6_addr(iwdev, ifp); - for (i = 0; ifp->if_vlantrunk != NULL && i < VLAN_N_VID; ++i) { + for (i = 0; if_getvlantrunk(ifp) != NULL && i < VLAN_N_VID; ++i) { ifv = VLAN_DEVAT(ifp, i); if (!ifv) continue; irdma_add_ipv4_addr(iwdev, ifv); irdma_add_ipv6_addr(iwdev, ifv); } } static void irdma_ifaddrevent_handler(void *arg, struct ifnet *ifp, struct ifaddr *ifa, int event) { struct irdma_pci_f *rf = arg; struct ifnet *ifv = NULL; struct sockaddr_in *sin; struct epoch_tracker et; int arp_index = 0, i = 0; u32 ip[4] = {}; if (!ifa || !ifa->ifa_addr || !ifp) return; if (rf->iwdev->netdev != ifp) { - for (i = 0; rf->iwdev->netdev->if_vlantrunk != NULL && i < VLAN_N_VID; ++i) { + for (i = 0; if_getvlantrunk(rf->iwdev->netdev) != NULL && i < VLAN_N_VID; ++i) { NET_EPOCH_ENTER(et); ifv = VLAN_DEVAT(rf->iwdev->netdev, i); NET_EPOCH_EXIT(et); if (ifv == ifp) break; } if (ifv != ifp) return; } sin = (struct sockaddr_in *)ifa->ifa_addr; switch (event) { case IFADDR_EVENT_ADD: if (sin->sin_family == AF_INET) irdma_add_ipv4_addr(rf->iwdev, ifp); else if (sin->sin_family == AF_INET6) irdma_add_ipv6_addr(rf->iwdev, ifp); break; case IFADDR_EVENT_DEL: if (sin->sin_family == AF_INET) { ip[0] = ntohl(sin->sin_addr.s_addr); } else if (sin->sin_family == AF_INET6) { irdma_copy_ip_ntohl(ip, (u32 *)&((struct sockaddr_in6 *)sin)->sin6_addr); } else { break; } for_each_set_bit(arp_index, rf->allocated_arps, rf->arp_table_size) { if (!memcmp(rf->arp_table[arp_index].ip_addr, ip, sizeof(ip))) { irdma_manage_arp_cache(rf, rf->arp_table[arp_index].mac_addr, rf->arp_table[arp_index].ip_addr, IRDMA_ARP_DELETE); } } break; default: break; } } void irdma_reg_ipaddr_event_cb(struct irdma_pci_f *rf) { rf->irdma_ifaddr_event = EVENTHANDLER_REGISTER(ifaddr_event_ext, irdma_ifaddrevent_handler, rf, EVENTHANDLER_PRI_ANY); } void irdma_dereg_ipaddr_event_cb(struct irdma_pci_f *rf) { EVENTHANDLER_DEREGISTER(ifaddr_event_ext, rf->irdma_ifaddr_event); } static int irdma_get_route_ifp(struct sockaddr *dst_sin, struct ifnet *netdev, struct ifnet **ifp, struct sockaddr **nexthop, bool *gateway) { struct nhop_object *nh; if (dst_sin->sa_family == AF_INET6) nh = fib6_lookup(RT_DEFAULT_FIB, &((struct sockaddr_in6 *)dst_sin)->sin6_addr, 0, NHR_NONE, 0); else nh = fib4_lookup(RT_DEFAULT_FIB, ((struct sockaddr_in *)dst_sin)->sin_addr, 0, NHR_NONE, 0); if (!nh || (nh->nh_ifp != netdev && rdma_vlan_dev_real_dev(nh->nh_ifp) != netdev)) goto rt_not_found; *gateway = (nh->nh_flags & NHF_GATEWAY) ? true : false; *nexthop = (*gateway) ? &nh->gw_sa : dst_sin; *ifp = nh->nh_ifp; return 0; rt_not_found: pr_err("irdma: route not found\n"); return -ENETUNREACH; } /** * irdma_get_dst_mac - get destination mac address * @cm_node: connection's node * @dst_sin: destination address information * @dst_mac: mac address array to return */ int irdma_get_dst_mac(struct irdma_cm_node *cm_node, struct sockaddr *dst_sin, u8 *dst_mac) { struct ifnet *netdev = cm_node->iwdev->netdev; #ifdef VIMAGE struct rdma_cm_id *rdma_id = (struct rdma_cm_id *)cm_node->cm_id->context; struct vnet *vnet = rdma_id->route.addr.dev_addr.net; #endif struct ifnet *ifp; struct llentry *lle; struct sockaddr *nexthop; struct epoch_tracker et; int err; bool gateway; NET_EPOCH_ENTER(et); CURVNET_SET_QUIET(vnet); err = irdma_get_route_ifp(dst_sin, netdev, &ifp, &nexthop, &gateway); if (err) goto get_route_fail; if (dst_sin->sa_family == AF_INET) { err = arpresolve(ifp, gateway, NULL, nexthop, dst_mac, NULL, &lle); } else if (dst_sin->sa_family == AF_INET6) { err = nd6_resolve(ifp, LLE_SF(AF_INET6, gateway), NULL, nexthop, dst_mac, NULL, &lle); } else { err = -EPROTONOSUPPORT; } get_route_fail: CURVNET_RESTORE(); NET_EPOCH_EXIT(et); if (err) { pr_err("failed to resolve neighbor address (err=%d)\n", err); return -ENETUNREACH; } return 0; } /** * irdma_addr_resolve_neigh - resolve neighbor address * @cm_node: connection's node * @dst_ip: remote ip address * @arpindex: if there is an arp entry */ int irdma_addr_resolve_neigh(struct irdma_cm_node *cm_node, u32 dst_ip, int arpindex) { struct irdma_device *iwdev = cm_node->iwdev; struct sockaddr_in dst_sin = {}; int err; u32 ip[4] = {}; u8 dst_mac[MAX_ADDR_LEN]; dst_sin.sin_len = sizeof(dst_sin); dst_sin.sin_family = AF_INET; dst_sin.sin_port = 0; dst_sin.sin_addr.s_addr = htonl(dst_ip); err = irdma_get_dst_mac(cm_node, (struct sockaddr *)&dst_sin, dst_mac); if (err) return arpindex; ip[0] = dst_ip; return irdma_add_arp(iwdev->rf, ip, dst_mac); } /** * irdma_addr_resolve_neigh_ipv6 - resolve neighbor ipv6 address * @cm_node: connection's node * @dest: remote ip address * @arpindex: if there is an arp entry */ int irdma_addr_resolve_neigh_ipv6(struct irdma_cm_node *cm_node, u32 *dest, int arpindex) { struct irdma_device *iwdev = cm_node->iwdev; struct sockaddr_in6 dst_addr = {}; int err; u8 dst_mac[MAX_ADDR_LEN]; dst_addr.sin6_family = AF_INET6; dst_addr.sin6_len = sizeof(dst_addr); - dst_addr.sin6_scope_id = iwdev->netdev->if_index; + dst_addr.sin6_scope_id = if_getindex(iwdev->netdev); irdma_copy_ip_htonl(dst_addr.sin6_addr.__u6_addr.__u6_addr32, dest); err = irdma_get_dst_mac(cm_node, (struct sockaddr *)&dst_addr, dst_mac); if (err) return arpindex; return irdma_add_arp(iwdev->rf, dest, dst_mac); } int irdma_resolve_neigh_lpb_chk(struct irdma_device *iwdev, struct irdma_cm_node *cm_node, struct irdma_cm_info *cm_info) { #ifdef VIMAGE struct rdma_cm_id *rdma_id = (struct rdma_cm_id *)cm_node->cm_id->context; struct vnet *vnet = rdma_id->route.addr.dev_addr.net; #endif int arpindex; int oldarpindex; bool is_lpb = false; CURVNET_SET_QUIET(vnet); is_lpb = cm_node->ipv4 ? irdma_ipv4_is_lpb(cm_node->loc_addr[0], cm_node->rem_addr[0]) : irdma_ipv6_is_lpb(cm_node->loc_addr, cm_node->rem_addr); CURVNET_RESTORE(); if (is_lpb) { cm_node->do_lpb = true; arpindex = irdma_arp_table(iwdev->rf, cm_node->rem_addr, NULL, IRDMA_ARP_RESOLVE); } else { oldarpindex = irdma_arp_table(iwdev->rf, cm_node->rem_addr, NULL, IRDMA_ARP_RESOLVE); if (cm_node->ipv4) arpindex = irdma_addr_resolve_neigh(cm_node, cm_info->rem_addr[0], oldarpindex); else arpindex = irdma_addr_resolve_neigh_ipv6(cm_node, cm_info->rem_addr, oldarpindex); } return arpindex; } /** * irdma_add_handler - add a handler to the list * @hdl: handler to be added to the handler list */ void irdma_add_handler(struct irdma_handler *hdl) { unsigned long flags; spin_lock_irqsave(&irdma_handler_lock, flags); list_add(&hdl->list, &irdma_handlers); spin_unlock_irqrestore(&irdma_handler_lock, flags); } /** * irdma_del_handler - delete a handler from the list * @hdl: handler to be deleted from the handler list */ void irdma_del_handler(struct irdma_handler *hdl) { unsigned long flags; spin_lock_irqsave(&irdma_handler_lock, flags); list_del(&hdl->list); spin_unlock_irqrestore(&irdma_handler_lock, flags); } /** * irdma_set_rf_user_cfg_params - apply user configurable settings * @rf: RDMA PCI function */ void irdma_set_rf_user_cfg_params(struct irdma_pci_f *rf) { int en_rem_endpoint_trk = 0; int limits_sel = 4; rf->en_rem_endpoint_trk = en_rem_endpoint_trk; rf->limits_sel = limits_sel; rf->rst_to = IRDMA_RST_TIMEOUT_HZ; /* Enable DCQCN algorithm by default */ rf->dcqcn_ena = true; } /** * irdma_sysctl_dcqcn_update - handle dcqcn_ena sysctl update * @arg1: pointer to rf * @arg2: unused * @oidp: sysctl oid structure * @req: sysctl request pointer */ static int irdma_sysctl_dcqcn_update(SYSCTL_HANDLER_ARGS) { struct irdma_pci_f *rf = (struct irdma_pci_f *)arg1; int ret; u8 dcqcn_ena = rf->dcqcn_ena; ret = sysctl_handle_8(oidp, &dcqcn_ena, 0, req); if ((ret) || (req->newptr == NULL)) return ret; if (dcqcn_ena == 0) rf->dcqcn_ena = false; else rf->dcqcn_ena = true; return 0; } /** * irdma_dcqcn_tunables_init - create tunables for dcqcn settings * @rf: RDMA PCI function * * Create DCQCN related sysctls for the driver. * dcqcn_ena is writeable settings and applicable to next QP creation or * context setting. * all other settings are of RDTUN type (read on driver load) and are * applicable only to CQP creation. */ void irdma_dcqcn_tunables_init(struct irdma_pci_f *rf) { struct sysctl_oid_list *irdma_sysctl_oid_list; irdma_sysctl_oid_list = SYSCTL_CHILDREN(rf->tun_info.irdma_sysctl_tree); SYSCTL_ADD_PROC(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_enable", CTLFLAG_RW | CTLTYPE_U8, rf, 0, irdma_sysctl_dcqcn_update, "A", "enables DCQCN algorithm for RoCEv2 on all ports, default=true"); SYSCTL_ADD_U8(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_cc_cfg_valid", CTLFLAG_RDTUN, &rf->dcqcn_params.cc_cfg_valid, 0, "set DCQCN parameters to be valid, default=false"); rf->dcqcn_params.min_dec_factor = 1; SYSCTL_ADD_U8(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_min_dec_factor", CTLFLAG_RDTUN, &rf->dcqcn_params.min_dec_factor, 0, "set minimum percentage factor by which tx rate can be changed for CNP, Range: 1-100, default=1"); SYSCTL_ADD_U8(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_min_rate_MBps", CTLFLAG_RDTUN, &rf->dcqcn_params.min_rate, 0, "set minimum rate limit value, in MBits per second, default=0"); rf->dcqcn_params.dcqcn_f = 5; SYSCTL_ADD_U8(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_F", CTLFLAG_RDTUN, &rf->dcqcn_params.dcqcn_f, 0, "set number of times to stay in each stage of bandwidth recovery, default=5"); rf->dcqcn_params.dcqcn_t = 0x37; SYSCTL_ADD_U16(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_T", CTLFLAG_RDTUN, &rf->dcqcn_params.dcqcn_t, 0, "set number of usecs that should elapse before increasing the CWND in DCQCN mode, default=0x37"); rf->dcqcn_params.dcqcn_b = 0x249f0; SYSCTL_ADD_U32(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_B", CTLFLAG_RDTUN, &rf->dcqcn_params.dcqcn_b, 0, "set number of MSS to add to the congestion window in additive increase mode, default=0x249f0"); rf->dcqcn_params.rai_factor = 1; SYSCTL_ADD_U16(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_rai_factor", CTLFLAG_RDTUN, &rf->dcqcn_params.rai_factor, 0, "set number of MSS to add to the congestion window in additive increase mode, default=1"); rf->dcqcn_params.hai_factor = 5; SYSCTL_ADD_U16(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_hai_factor", CTLFLAG_RDTUN, &rf->dcqcn_params.hai_factor, 0, "set number of MSS to add to the congestion window in hyperactive increase mode, default=5"); rf->dcqcn_params.rreduce_mperiod = 50; SYSCTL_ADD_U32(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_rreduce_mperiod", CTLFLAG_RDTUN, &rf->dcqcn_params.rreduce_mperiod, 0, "set minimum time between 2 consecutive rate reductions for a single flow, default=50"); } /** * irdma_dmamap_cb - callback for bus_dmamap_load */ static void irdma_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs->ds_addr; return; } /** * irdma_allocate_dma_mem - allocate dma memory * @hw: pointer to hw structure * @mem: structure holding memory information * @size: requested size * @alignment: requested alignment */ void * irdma_allocate_dma_mem(struct irdma_hw *hw, struct irdma_dma_mem *mem, u64 size, u32 alignment) { struct irdma_dev_ctx *dev_ctx = (struct irdma_dev_ctx *)hw->dev_context; device_t dev = dev_ctx->dev; void *va; int ret; ret = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ alignment, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &mem->tag); if (ret != 0) { device_printf(dev, "%s: bus_dma_tag_create failed, error %u\n", __func__, ret); goto fail_0; } ret = bus_dmamem_alloc(mem->tag, (void **)&va, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &mem->map); if (ret != 0) { device_printf(dev, "%s: bus_dmamem_alloc failed, error %u\n", __func__, ret); goto fail_1; } ret = bus_dmamap_load(mem->tag, mem->map, va, size, irdma_dmamap_cb, &mem->pa, BUS_DMA_NOWAIT); if (ret != 0) { device_printf(dev, "%s: bus_dmamap_load failed, error %u\n", __func__, ret); goto fail_2; } mem->nseg = 1; mem->size = size; bus_dmamap_sync(mem->tag, mem->map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return va; fail_2: bus_dmamem_free(mem->tag, va, mem->map); fail_1: bus_dma_tag_destroy(mem->tag); fail_0: mem->map = NULL; mem->tag = NULL; return NULL; } /** * irdma_free_dma_mem - Memory free helper fn * @hw: pointer to hw structure * @mem: ptr to mem struct to free */ int irdma_free_dma_mem(struct irdma_hw *hw, struct irdma_dma_mem *mem) { if (!mem) return -EINVAL; bus_dmamap_sync(mem->tag, mem->map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(mem->tag, mem->map); if (!mem->va) return -ENOMEM; bus_dmamem_free(mem->tag, mem->va, mem->map); bus_dma_tag_destroy(mem->tag); mem->va = NULL; return 0; } inline void irdma_prm_rem_bitmapmem(struct irdma_hw *hw, struct irdma_chunk *chunk) { kfree(chunk->bitmapmem.va); } void irdma_cleanup_dead_qps(struct irdma_sc_vsi *vsi) { struct irdma_sc_qp *qp = NULL; struct irdma_qp *iwqp; struct irdma_pci_f *rf; u8 i; for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { qp = irdma_get_qp_from_list(&vsi->qos[i].qplist, qp); while (qp) { if (qp->qp_uk.qp_type == IRDMA_QP_TYPE_UDA) { qp = irdma_get_qp_from_list(&vsi->qos[i].qplist, qp); continue; } iwqp = qp->qp_uk.back_qp; rf = iwqp->iwdev->rf; irdma_free_dma_mem(rf->sc_dev.hw, &iwqp->q2_ctx_mem); irdma_free_dma_mem(rf->sc_dev.hw, &iwqp->kqp.dma_mem); kfree(iwqp->kqp.sq_wrid_mem); kfree(iwqp->kqp.rq_wrid_mem); qp = irdma_get_qp_from_list(&vsi->qos[i].qplist, qp); kfree(iwqp); } } } diff --git a/sys/dev/irdma/icrdma.c b/sys/dev/irdma/icrdma.c index a3dee284adce..fa87086e2f4d 100644 --- a/sys/dev/irdma/icrdma.c +++ b/sys/dev/irdma/icrdma.c @@ -1,721 +1,720 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * * Copyright (c) 2021 - 2022 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /*$FreeBSD$*/ #include #include #include #include #include #include #include #include #include #include "ice_rdma.h" #include "irdma_main.h" #include "icrdma_hw.h" #include "irdma_if.h" #include "irdma_di_if.h" /** * Driver version */ char irdma_driver_version[] = "1.1.11-k"; -#define pf_if_d(peer) peer->ifp->if_dunit - /** * irdma_init_tunable - prepare tunables * @rf: RDMA PCI function * @pf_id: id of the pf */ static void irdma_init_tunable(struct irdma_pci_f *rf, uint8_t pf_id) { struct sysctl_oid_list *irdma_sysctl_oid_list; char pf_name[16]; snprintf(pf_name, 15, "irdma%d", pf_id); sysctl_ctx_init(&rf->tun_info.irdma_sysctl_ctx); rf->tun_info.irdma_sysctl_tree = SYSCTL_ADD_NODE(&rf->tun_info.irdma_sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev), OID_AUTO, pf_name, CTLFLAG_RD, NULL, ""); irdma_sysctl_oid_list = SYSCTL_CHILDREN(rf->tun_info.irdma_sysctl_tree); /* * debug mask setting */ SYSCTL_ADD_S32(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "debug", CTLFLAG_RWTUN, &rf->sc_dev.debug_mask, 0, "irdma debug"); /* * RoCEv2/iWARP setting RoCEv2 the default mode */ rf->tun_info.roce_ena = 1; SYSCTL_ADD_U8(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "roce_enable", CTLFLAG_RDTUN, &rf->tun_info.roce_ena, 0, "RoCEv2 mode enable"); rf->protocol_used = IRDMA_IWARP_PROTOCOL_ONLY; if (rf->tun_info.roce_ena == 1) rf->protocol_used = IRDMA_ROCE_PROTOCOL_ONLY; else if (rf->tun_info.roce_ena != 0) printf("%s:%d wrong roce_enable value (%d), using iWARP\n", __func__, __LINE__, rf->tun_info.roce_ena); printf("%s:%d protocol: %s, roce_enable value: %d\n", __func__, __LINE__, (rf->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) ? "iWARP" : "RoCEv2", rf->tun_info.roce_ena); snprintf(rf->tun_info.drv_ver, IRDMA_VER_LEN, "%s", irdma_driver_version); SYSCTL_ADD_STRING(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "drv_ver", CTLFLAG_RDTUN, rf->tun_info.drv_ver, IRDMA_VER_LEN, "driver version"); irdma_dcqcn_tunables_init(rf); } /** * irdma_find_handler - obtain hdl object to identify pf * @p_dev: the peer interface structure */ static struct irdma_handler * irdma_find_handler(struct ice_rdma_peer *p_dev) { struct irdma_handler *hdl; unsigned long flags; spin_lock_irqsave(&irdma_handler_lock, flags); list_for_each_entry(hdl, &irdma_handlers, list) { if (!hdl) continue; if (!hdl->iwdev->rf->peer_info) continue; if (hdl->iwdev->rf->peer_info->dev == p_dev->dev) { spin_unlock_irqrestore(&irdma_handler_lock, flags); return hdl; } } spin_unlock_irqrestore(&irdma_handler_lock, flags); return NULL; } /** * peer_to_iwdev - return iwdev based on peer * @peer: the peer interface structure */ static struct irdma_device * peer_to_iwdev(struct ice_rdma_peer *peer) { struct irdma_handler *hdl; hdl = irdma_find_handler(peer); if (!hdl) { printf("%s:%d rdma handler not found\n", __func__, __LINE__); return NULL; } return hdl->iwdev; } /** * irdma_get_qos_info - save qos info from parameters to internal struct * @l2params: destination, qos, tc, mtu info structure * @qos_info: source, DCB settings structure */ static void irdma_get_qos_info(struct irdma_l2params *l2params, struct ice_qos_params *qos_info) { int i; l2params->num_tc = qos_info->num_tc; l2params->num_apps = qos_info->num_apps; l2params->vsi_prio_type = qos_info->vsi_priority_type; l2params->vsi_rel_bw = qos_info->vsi_relative_bw; for (i = 0; i < l2params->num_tc; i++) { l2params->tc_info[i].egress_virt_up = qos_info->tc_info[i].egress_virt_up; l2params->tc_info[i].ingress_virt_up = qos_info->tc_info[i].ingress_virt_up; l2params->tc_info[i].prio_type = qos_info->tc_info[i].prio_type; l2params->tc_info[i].rel_bw = qos_info->tc_info[i].rel_bw; l2params->tc_info[i].tc_ctx = qos_info->tc_info[i].tc_ctx; } for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) l2params->up2tc[i] = qos_info->up2tc[i]; if (qos_info->pfc_mode == IRDMA_QOS_MODE_DSCP) { l2params->dscp_mode = true; memcpy(l2params->dscp_map, qos_info->dscp_map, sizeof(l2params->dscp_map)); } printf("%s:%d: l2params settings:\n num_tc %d,\n num_apps %d,\n", __func__, __LINE__, l2params->num_tc, l2params->num_apps); printf(" vsi_prio_type %d,\n vsi_rel_bw %d,\n egress_virt_up:", l2params->vsi_prio_type, l2params->vsi_rel_bw); for (i = 0; i < l2params->num_tc; i++) printf(" %d", l2params->tc_info[i].egress_virt_up); printf("\n ingress_virt_up:"); for (i = 0; i < l2params->num_tc; i++) printf(" %d", l2params->tc_info[i].ingress_virt_up); printf("\n prio_type:"); for (i = 0; i < l2params->num_tc; i++) printf(" %d", l2params->tc_info[i].prio_type); printf("\n rel_bw:"); for (i = 0; i < l2params->num_tc; i++) printf(" %d", l2params->tc_info[i].rel_bw); printf("\n tc_ctx:"); for (i = 0; i < l2params->num_tc; i++) printf(" %lu", l2params->tc_info[i].tc_ctx); printf("\n up2tc:"); for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) printf(" %d", l2params->up2tc[i]); printf(" dscp_mode: %d,\n", l2params->dscp_mode); for (i = 0; i < IRDMA_DSCP_NUM_VAL; i++) printf(" %d", l2params->dscp_map[i]); printf("\n"); dump_struct(l2params, sizeof(*l2params), "l2params"); } /** * irdma_log_invalid_mtu - check mtu setting validity * @mtu: mtu value * @dev: hardware control device structure */ static void irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev) { if (mtu < IRDMA_MIN_MTU_IPV4) irdma_dev_warn(to_ibdev(dev), "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 576 for IPv4\n", mtu); else if (mtu < IRDMA_MIN_MTU_IPV6) irdma_dev_warn(to_ibdev(dev), "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 1280 for IPv6\\n", mtu); } /** * irdma_event_handler - handling events from lan driver * @peer: the peer interface structure * @event: event info structure */ static void irdma_event_handler(struct ice_rdma_peer *peer, struct ice_rdma_event *event) { struct irdma_device *iwdev; struct irdma_l2params l2params = {}; printf("%s:%d event_handler %s (%x) on pf %d (%d)\n", __func__, __LINE__, (event->type == 1) ? "LINK CHANGE" : (event->type == 2) ? "MTU CHANGE" : (event->type == 3) ? "TC CHANGE" : "UNKNOWN", - event->type, peer->pf_id, pf_if_d(peer)); + event->type, peer->pf_id, if_getdunit(peer->ifp)); iwdev = peer_to_iwdev(peer); if (!iwdev) { printf("%s:%d rdma device not found\n", __func__, __LINE__); return; } switch (event->type) { case ICE_RDMA_EVENT_LINK_CHANGE: printf("%s:%d PF: %x (%x), state: %d, speed: %lu\n", __func__, __LINE__, - peer->pf_id, pf_if_d(peer), event->linkstate, event->baudrate); + peer->pf_id, if_getdunit(peer->ifp), event->linkstate, + event->baudrate); break; case ICE_RDMA_EVENT_MTU_CHANGE: if (iwdev->vsi.mtu != event->mtu) { l2params.mtu = event->mtu; l2params.mtu_changed = true; irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev); irdma_change_l2params(&iwdev->vsi, &l2params); } break; case ICE_RDMA_EVENT_TC_CHANGE: /* * 1. check if it is pre or post 2. check if it is currently being done */ if (event->prep == iwdev->vsi.tc_change_pending) { printf("%s:%d can't process %s TC change if TC change is %spending\n", __func__, __LINE__, event->prep ? "pre" : "post", event->prep ? " " : "not "); goto done; } if (event->prep) { iwdev->vsi.tc_change_pending = true; irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_SUSPEND); wait_event_timeout(iwdev->suspend_wq, !atomic_read(&iwdev->vsi.qp_suspend_reqs), IRDMA_EVENT_TIMEOUT_MS * 10); irdma_ws_reset(&iwdev->vsi); printf("%s:%d TC change preparation done\n", __func__, __LINE__); } else { l2params.tc_changed = true; irdma_get_qos_info(&l2params, &event->port_qos); if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode; irdma_check_fc_for_tc_update(&iwdev->vsi, &l2params); irdma_change_l2params(&iwdev->vsi, &l2params); printf("%s:%d TC change done\n", __func__, __LINE__); } break; case ICE_RDMA_EVENT_CRIT_ERR: printf("%s:%d event type received: %d\n", __func__, __LINE__, event->type); break; default: printf("%s:%d event type unsupported: %d\n", __func__, __LINE__, event->type); } done: return; } /** * irdma_link_change - Callback for link state change * @peer: the peer interface structure * @linkstate: state of the link * @baudrate: speed of the link */ static void irdma_link_change(struct ice_rdma_peer *peer, int linkstate, uint64_t baudrate) { printf("%s:%d PF: %x (%x), state: %d, speed: %lu\n", __func__, __LINE__, - peer->pf_id, pf_if_d(peer), linkstate, baudrate); + peer->pf_id, if_getdunit(peer->ifp), linkstate, baudrate); } /** * irdma_finalize_task - Finish open or close phase in a separate thread * @context: instance holding peer and iwdev information * * Triggered from irdma_open or irdma_close to perform rt_init_hw or * rt_deinit_hw respectively. Does registration and unregistration of * the device. */ static void irdma_finalize_task(void *context, int pending) { struct irdma_task_arg *task_arg = (struct irdma_task_arg *)context; struct irdma_device *iwdev = task_arg->iwdev; struct irdma_pci_f *rf = iwdev->rf; struct ice_rdma_peer *peer = task_arg->peer; struct irdma_l2params l2params = {{{0}}}; struct ice_rdma_request req = {0}; int status = 0; if (iwdev->iw_status) { irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT, "Starting deferred closing %d (%d)\n", - rf->peer_info->pf_id, pf_if_d(peer)); + rf->peer_info->pf_id, if_getdunit(peer->ifp)); irdma_dereg_ipaddr_event_cb(rf); irdma_ib_unregister_device(iwdev); req.type = ICE_RDMA_EVENT_VSI_FILTER_UPDATE; req.enable_filter = false; IRDMA_DI_REQ_HANDLER(peer, &req); irdma_cleanup_dead_qps(&iwdev->vsi); irdma_rt_deinit_hw(iwdev); } else { irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT, "Starting deferred opening %d (%d)\n", - rf->peer_info->pf_id, pf_if_d(peer)); + rf->peer_info->pf_id, if_getdunit(peer->ifp)); irdma_get_qos_info(&l2params, &peer->initial_qos_info); if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode; l2params.mtu = peer->mtu; status = irdma_rt_init_hw(iwdev, &l2params); if (status) { irdma_pr_err("RT init failed %d\n", status); ib_dealloc_device(&iwdev->ibdev); return; } status = irdma_ib_register_device(iwdev); if (status) { irdma_pr_err("Registration failed %d\n", status); irdma_rt_deinit_hw(iwdev); ib_dealloc_device(&iwdev->ibdev); } req.type = ICE_RDMA_EVENT_VSI_FILTER_UPDATE; req.enable_filter = true; IRDMA_DI_REQ_HANDLER(peer, &req); irdma_reg_ipaddr_event_cb(rf); irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT, "Deferred opening finished %d (%d)\n", - rf->peer_info->pf_id, pf_if_d(peer)); + rf->peer_info->pf_id, if_getdunit(peer->ifp)); } } /** * irdma_open - Callback for operation open for RDMA device * @peer: the new peer interface structure * * Callback implementing the RDMA_OPEN function. Called by the ice driver to * notify the RDMA client driver that a new device has been initialized. */ static int irdma_open(struct ice_rdma_peer *peer) { struct ice_rdma_event event = {0}; event.type = ICE_RDMA_EVENT_MTU_CHANGE; event.mtu = peer->mtu; irdma_event_handler(peer, &event); return 0; } /** * irdma_close - Callback to notify that a peer device is down * @peer: the RDMA peer device being stopped * * Callback implementing the RDMA_CLOSE function. Called by the ice driver to * notify the RDMA client driver that a peer device is being stopped. */ static int irdma_close(struct ice_rdma_peer *peer) { /* * This is called when ifconfig down. Keeping it for compatibility with ice. This event might be usefull for * future. */ return 0; } /** * irdma_alloc_pcidev - allocate memory for pcidev and populate data * @peer: the new peer interface structure * @rf: RDMA PCI function */ static int irdma_alloc_pcidev(struct ice_rdma_peer *peer, struct irdma_pci_f *rf) { rf->pcidev = kzalloc(sizeof(struct pci_dev), GFP_KERNEL); if (!rf->pcidev) { return -ENOMEM; } if (linux_pci_attach_device(rf->dev_ctx.dev, NULL, NULL, rf->pcidev)) return -ENOMEM; return 0; } /** * irdma_dealloc_pcidev - deallocate memory for pcidev * @rf: RDMA PCI function */ static void irdma_dealloc_pcidev(struct irdma_pci_f *rf) { linux_pci_detach_device(rf->pcidev); kfree(rf->pcidev); } /** * irdma_fill_device_info - assign initial values to rf variables * @iwdev: irdma device * @peer: the peer interface structure */ static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_rdma_peer *peer) { struct irdma_pci_f *rf = iwdev->rf; rf->peer_info = peer; rf->gen_ops.register_qset = irdma_register_qset; rf->gen_ops.unregister_qset = irdma_unregister_qset; rf->rdma_ver = IRDMA_GEN_2; rf->sc_dev.hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_2; rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT; rf->rst_to = IRDMA_RST_TIMEOUT_HZ; rf->check_fc = irdma_check_fc_for_qp; rf->gen_ops.request_reset = irdma_request_reset; irdma_set_rf_user_cfg_params(rf); rf->default_vsi.vsi_idx = peer->pf_vsi_num; rf->dev_ctx.dev = peer->dev; rf->dev_ctx.mem_bus_space_tag = rman_get_bustag(peer->pci_mem); rf->dev_ctx.mem_bus_space_handle = rman_get_bushandle(peer->pci_mem); rf->dev_ctx.mem_bus_space_size = rman_get_size(peer->pci_mem); rf->hw.dev_context = &rf->dev_ctx; rf->hw.hw_addr = (u8 *)rman_get_virtual(peer->pci_mem); rf->msix_count = peer->msix.count; rf->msix_info.entry = peer->msix.base; rf->msix_info.vector = peer->msix.count; printf("%s:%d msix_info: %d %d %d\n", __func__, __LINE__, rf->msix_count, rf->msix_info.entry, rf->msix_info.vector); rf->iwdev = iwdev; iwdev->netdev = peer->ifp; iwdev->init_state = INITIAL_STATE; iwdev->vsi_num = peer->pf_vsi_num; iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED; iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE; iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT; iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT; iwdev->roce_rtomin = 5; if (rf->protocol_used == IRDMA_ROCE_PROTOCOL_ONLY) { iwdev->roce_mode = true; } } /** * irdma_probe - Callback to probe a new RDMA peer device * @peer: the new peer interface structure * * Callback implementing the RDMA_PROBE function. Called by the ice driver to * notify the RDMA client driver that a new device has been created */ static int irdma_probe(struct ice_rdma_peer *peer) { struct irdma_device *iwdev; struct irdma_pci_f *rf; struct irdma_handler *hdl; int err = 0; irdma_pr_info("probe: irdma-%s peer=%p, peer->pf_id=%d, peer->ifp=%p, peer->ifp->if_dunit=%d, peer->pci_mem->r_bustag=%p\n", irdma_driver_version, peer, peer->pf_id, peer->ifp, - pf_if_d(peer), (void *)(uintptr_t)peer->pci_mem->r_bustag); + if_getdunit(peer->ifp), (void *)(uintptr_t)peer->pci_mem->r_bustag); hdl = irdma_find_handler(peer); if (hdl) return -EBUSY; hdl = kzalloc(sizeof(*hdl), GFP_KERNEL); if (!hdl) return -ENOMEM; iwdev = (struct irdma_device *)ib_alloc_device(sizeof(*iwdev)); if (!iwdev) { kfree(hdl); return -ENOMEM; } iwdev->rf = kzalloc(sizeof(*rf), GFP_KERNEL); if (!iwdev->rf) { ib_dealloc_device(&iwdev->ibdev); kfree(hdl); return -ENOMEM; } hdl->iwdev = iwdev; iwdev->hdl = hdl; - irdma_init_tunable(iwdev->rf, pf_if_d(peer)); + irdma_init_tunable(iwdev->rf, if_getdunit(peer->ifp)); irdma_fill_device_info(iwdev, peer); rf = iwdev->rf; if (irdma_alloc_pcidev(peer, rf)) goto err_pcidev; irdma_add_handler(hdl); if (irdma_ctrl_init_hw(rf)) { err = -EIO; goto err_ctrl_init; } rf->dev_ctx.task_arg.peer = peer; rf->dev_ctx.task_arg.iwdev = iwdev; rf->dev_ctx.task_arg.peer = peer; TASK_INIT(&hdl->deferred_task, 0, irdma_finalize_task, &rf->dev_ctx.task_arg); hdl->deferred_tq = taskqueue_create_fast("irdma_defer", M_NOWAIT, taskqueue_thread_enqueue, &hdl->deferred_tq); taskqueue_start_threads(&hdl->deferred_tq, 1, PI_NET, "irdma_defer_t"); taskqueue_enqueue(hdl->deferred_tq, &hdl->deferred_task); return 0; err_ctrl_init: irdma_del_handler(hdl); irdma_dealloc_pcidev(rf); err_pcidev: kfree(iwdev->rf); ib_dealloc_device(&iwdev->ibdev); kfree(hdl); return err; } /** * irdma_remove - Callback to remove an RDMA peer device * @peer: the new peer interface structure * * Callback implementing the RDMA_REMOVE function. Called by the ice driver to * notify the RDMA client driver that the device wille be delated */ static int irdma_remove(struct ice_rdma_peer *peer) { struct irdma_handler *hdl; struct irdma_device *iwdev; irdma_debug((struct irdma_sc_dev *)NULL, IRDMA_DEBUG_INIT, - "removing %s irdma%d\n", __func__, pf_if_d(peer)); + "removing %s irdma%d\n", __func__, if_getdunit(peer->ifp)); hdl = irdma_find_handler(peer); if (!hdl) return 0; iwdev = hdl->iwdev; if (iwdev->vsi.tc_change_pending) { iwdev->vsi.tc_change_pending = false; irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_RESUME); } taskqueue_enqueue(hdl->deferred_tq, &hdl->deferred_task); taskqueue_drain(hdl->deferred_tq, &hdl->deferred_task); taskqueue_free(hdl->deferred_tq); hdl->iwdev->rf->dev_ctx.task_arg.iwdev = NULL; hdl->iwdev->rf->dev_ctx.task_arg.peer = NULL; sysctl_ctx_free(&iwdev->rf->tun_info.irdma_sysctl_ctx); hdl->iwdev->rf->tun_info.irdma_sysctl_tree = NULL; irdma_ctrl_deinit_hw(iwdev->rf); irdma_dealloc_pcidev(iwdev->rf); irdma_del_handler(iwdev->hdl); kfree(iwdev->hdl); kfree(iwdev->rf); ib_dealloc_device(&iwdev->ibdev); irdma_pr_info("IRDMA hardware deinitialization complete irdma%d\n", - pf_if_d(peer)); + if_getdunit(peer->ifp)); return 0; } /** * irdma_prep_for_unregister - ensure the driver is ready to unregister */ static void irdma_prep_for_unregister(void) { struct irdma_handler *hdl; unsigned long flags; bool hdl_valid; do { hdl_valid = false; spin_lock_irqsave(&irdma_handler_lock, flags); list_for_each_entry(hdl, &irdma_handlers, list) { if (!hdl) continue; if (!hdl->iwdev->rf->peer_info) continue; hdl_valid = true; break; } spin_unlock_irqrestore(&irdma_handler_lock, flags); if (!hdl || !hdl_valid) break; IRDMA_CLOSE(hdl->iwdev->rf->peer_info); IRDMA_REMOVE(hdl->iwdev->rf->peer_info); } while (1); } static kobj_method_t irdma_methods[] = { KOBJMETHOD(irdma_probe, irdma_probe), KOBJMETHOD(irdma_open, irdma_open), KOBJMETHOD(irdma_close, irdma_close), KOBJMETHOD(irdma_remove, irdma_remove), KOBJMETHOD(irdma_link_change, irdma_link_change), KOBJMETHOD(irdma_event_handler, irdma_event_handler), KOBJMETHOD_END }; /* declare irdma_class which extends the ice_rdma_di class */ DEFINE_CLASS_1(irdma, irdma_class, irdma_methods, sizeof(struct ice_rdma_peer), ice_rdma_di_class); static struct ice_rdma_info irdma_info = { .major_version = ICE_RDMA_MAJOR_VERSION, .minor_version = ICE_RDMA_MINOR_VERSION, .patch_version = ICE_RDMA_PATCH_VERSION, .rdma_class = &irdma_class, }; /** * irdma_module_event_handler - Module event handler callback * @mod: unused mod argument * @what: the module event to handle * @arg: unused module event argument * * Callback used by the FreeBSD module stack to notify the driver of module * events. Used to implement custom handling for certain module events such as * load and unload. */ static int irdma_module_event_handler(module_t __unused mod, int what, void __unused * arg) { switch (what) { case MOD_LOAD: printf("Loading irdma module\n"); return ice_rdma_register(&irdma_info); case MOD_UNLOAD: printf("Unloading irdma module\n"); irdma_prep_for_unregister(); ice_rdma_unregister(); return (0); default: return (EOPNOTSUPP); } return (0); } static moduledata_t irdma_moduledata = { "irdma", irdma_module_event_handler, NULL }; DECLARE_MODULE(irdma, irdma_moduledata, SI_SUB_LAST, SI_ORDER_ANY); MODULE_VERSION(irdma, 1); MODULE_DEPEND(irdma, ice, 1, 1, 1); MODULE_DEPEND(irdma, ibcore, 1, 1, 1); diff --git a/sys/dev/irdma/irdma_cm.c b/sys/dev/irdma/irdma_cm.c index daf116065596..9f88f6e3cdfe 100644 --- a/sys/dev/irdma/irdma_cm.c +++ b/sys/dev/irdma/irdma_cm.c @@ -1,4314 +1,4250 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * * Copyright (c) 2015 - 2022 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /*$FreeBSD$*/ #include "irdma_main.h" static void irdma_cm_post_event(struct irdma_cm_event *event); static void irdma_disconnect_worker(struct work_struct *work); /** * irdma_free_sqbuf - put back puda buffer if refcount is 0 * @vsi: The VSI structure of the device * @bufp: puda buffer to free */ void irdma_free_sqbuf(struct irdma_sc_vsi *vsi, void *bufp) { struct irdma_puda_buf *buf = bufp; struct irdma_puda_rsrc *ilq = vsi->ilq; if (atomic_dec_and_test(&buf->refcount)) irdma_puda_ret_bufpool(ilq, buf); } /** * irdma_record_ird_ord - Record IRD/ORD passed in * @cm_node: connection's node * @conn_ird: connection IRD * @conn_ord: connection ORD */ static void irdma_record_ird_ord(struct irdma_cm_node *cm_node, u32 conn_ird, u32 conn_ord) { if (conn_ird > cm_node->dev->hw_attrs.max_hw_ird) conn_ird = cm_node->dev->hw_attrs.max_hw_ird; if (conn_ord > cm_node->dev->hw_attrs.max_hw_ord) conn_ord = cm_node->dev->hw_attrs.max_hw_ord; else if (!conn_ord && cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO) conn_ord = 1; cm_node->ird_size = conn_ird; cm_node->ord_size = conn_ord; } /** * irdma_copy_ip_ntohl - copy IP address from network to host * @dst: IP address in host order * @src: IP address in network order (big endian) */ void irdma_copy_ip_ntohl(u32 *dst, __be32 *src) { *dst++ = ntohl(*src++); *dst++ = ntohl(*src++); *dst++ = ntohl(*src++); *dst = ntohl(*src); } /** * irdma_copy_ip_htonl - copy IP address from host to network order * @dst: IP address in network order (big endian) * @src: IP address in host order */ void irdma_copy_ip_htonl(__be32 *dst, u32 *src) { *dst++ = htonl(*src++); *dst++ = htonl(*src++); *dst++ = htonl(*src++); *dst = htonl(*src); } /** * irdma_get_addr_info * @cm_node: contains ip/tcp info * @cm_info: to get a copy of the cm_node ip/tcp info */ static void irdma_get_addr_info(struct irdma_cm_node *cm_node, struct irdma_cm_info *cm_info) { memset(cm_info, 0, sizeof(*cm_info)); cm_info->ipv4 = cm_node->ipv4; cm_info->vlan_id = cm_node->vlan_id; memcpy(cm_info->loc_addr, cm_node->loc_addr, sizeof(cm_info->loc_addr)); memcpy(cm_info->rem_addr, cm_node->rem_addr, sizeof(cm_info->rem_addr)); cm_info->loc_port = cm_node->loc_port; cm_info->rem_port = cm_node->rem_port; } /** * irdma_fill_sockaddr4 - fill in addr info for IPv4 connection * @cm_node: connection's node * @event: upper layer's cm event */ static inline void irdma_fill_sockaddr4(struct irdma_cm_node *cm_node, struct iw_cm_event *event) { struct sockaddr_in *laddr = (struct sockaddr_in *)&event->local_addr; struct sockaddr_in *raddr = (struct sockaddr_in *)&event->remote_addr; laddr->sin_family = AF_INET; raddr->sin_family = AF_INET; laddr->sin_port = htons(cm_node->loc_port); raddr->sin_port = htons(cm_node->rem_port); laddr->sin_addr.s_addr = htonl(cm_node->loc_addr[0]); raddr->sin_addr.s_addr = htonl(cm_node->rem_addr[0]); } /** * irdma_fill_sockaddr6 - fill in addr info for IPv6 connection * @cm_node: connection's node * @event: upper layer's cm event */ static inline void irdma_fill_sockaddr6(struct irdma_cm_node *cm_node, struct iw_cm_event *event) { struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&event->local_addr; struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)&event->remote_addr; laddr6->sin6_family = AF_INET6; raddr6->sin6_family = AF_INET6; laddr6->sin6_port = htons(cm_node->loc_port); raddr6->sin6_port = htons(cm_node->rem_port); irdma_copy_ip_htonl(laddr6->sin6_addr.__u6_addr.__u6_addr32, cm_node->loc_addr); irdma_copy_ip_htonl(raddr6->sin6_addr.__u6_addr.__u6_addr32, cm_node->rem_addr); } /** * irdma_get_cmevent_info - for cm event upcall * @cm_node: connection's node * @cm_id: upper layers cm struct for the event * @event: upper layer's cm event */ static inline void irdma_get_cmevent_info(struct irdma_cm_node *cm_node, struct iw_cm_id *cm_id, struct iw_cm_event *event) { memcpy(&event->local_addr, &cm_id->m_local_addr, sizeof(event->local_addr)); memcpy(&event->remote_addr, &cm_id->m_remote_addr, sizeof(event->remote_addr)); if (cm_node) { event->private_data = cm_node->pdata_buf; event->private_data_len = (u8)cm_node->pdata.size; event->ird = cm_node->ird_size; event->ord = cm_node->ord_size; } } /** * irdma_send_cm_event - upcall cm's event handler * @cm_node: connection's node * @cm_id: upper layer's cm info struct * @type: Event type to indicate * @status: status for the event type */ static int irdma_send_cm_event(struct irdma_cm_node *cm_node, struct iw_cm_id *cm_id, enum iw_cm_event_type type, int status) { struct iw_cm_event event = {0}; event.event = type; event.status = status; irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "cm_node %p cm_id=%p state=%d accel=%d event_type=%d status=%d\n", cm_node, cm_id, cm_node->accelerated, cm_node->state, type, status); switch (type) { case IW_CM_EVENT_CONNECT_REQUEST: if (cm_node->ipv4) irdma_fill_sockaddr4(cm_node, &event); else irdma_fill_sockaddr6(cm_node, &event); event.provider_data = cm_node; event.private_data = cm_node->pdata_buf; event.private_data_len = (u8)cm_node->pdata.size; event.ird = cm_node->ird_size; break; case IW_CM_EVENT_CONNECT_REPLY: irdma_get_cmevent_info(cm_node, cm_id, &event); break; case IW_CM_EVENT_ESTABLISHED: event.ird = cm_node->ird_size; event.ord = cm_node->ord_size; break; case IW_CM_EVENT_DISCONNECT: case IW_CM_EVENT_CLOSE: /* Wait if we are in RTS but havent issued the iwcm event upcall */ if (!cm_node->accelerated) wait_for_completion(&cm_node->establish_comp); break; default: return -EINVAL; } return cm_id->event_handler(cm_id, &event); } /** * irdma_timer_list_prep - add connection nodes to a list to perform timer tasks * @cm_core: cm's core * @timer_list: a timer list to which cm_node will be selected */ static void irdma_timer_list_prep(struct irdma_cm_core *cm_core, struct list_head *timer_list) { struct irdma_cm_node *cm_node; int bkt; HASH_FOR_EACH_RCU(cm_core->cm_hash_tbl, bkt, cm_node, list) { if ((cm_node->close_entry || cm_node->send_entry) && atomic_inc_not_zero(&cm_node->refcnt)) list_add(&cm_node->timer_entry, timer_list); } } /** * irdma_create_event - create cm event * @cm_node: connection's node * @type: Event type to generate */ static struct irdma_cm_event * irdma_create_event(struct irdma_cm_node *cm_node, enum irdma_cm_event_type type) { struct irdma_cm_event *event; if (!cm_node->cm_id) return NULL; event = kzalloc(sizeof(*event), GFP_ATOMIC); if (!event) return NULL; event->type = type; event->cm_node = cm_node; memcpy(event->cm_info.rem_addr, cm_node->rem_addr, sizeof(event->cm_info.rem_addr)); memcpy(event->cm_info.loc_addr, cm_node->loc_addr, sizeof(event->cm_info.loc_addr)); event->cm_info.rem_port = cm_node->rem_port; event->cm_info.loc_port = cm_node->loc_port; event->cm_info.cm_id = cm_node->cm_id; irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "node=%p event=%p type=%u dst=%pI4 src=%pI4\n", cm_node, event, type, event->cm_info.loc_addr, event->cm_info.rem_addr); irdma_cm_post_event(event); return event; } /** * irdma_free_retrans_entry - free send entry * @cm_node: connection's node */ static void irdma_free_retrans_entry(struct irdma_cm_node *cm_node) { struct irdma_device *iwdev = cm_node->iwdev; struct irdma_timer_entry *send_entry; send_entry = cm_node->send_entry; if (!send_entry) return; cm_node->send_entry = NULL; irdma_free_sqbuf(&iwdev->vsi, send_entry->sqbuf); kfree(send_entry); atomic_dec(&cm_node->refcnt); } /** * irdma_cleanup_retrans_entry - free send entry with lock * @cm_node: connection's node */ static void irdma_cleanup_retrans_entry(struct irdma_cm_node *cm_node) { unsigned long flags; spin_lock_irqsave(&cm_node->retrans_list_lock, flags); irdma_free_retrans_entry(cm_node); spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); } /** * irdma_form_ah_cm_frame - get a free packet and build frame with address handle * @cm_node: connection's node ionfo to use in frame * @options: pointer to options info * @hdr: pointer mpa header * @pdata: pointer to private data * @flags: indicates FIN or ACK */ static struct irdma_puda_buf * irdma_form_ah_cm_frame(struct irdma_cm_node *cm_node, struct irdma_kmem_info *options, struct irdma_kmem_info *hdr, struct irdma_mpa_priv_info *pdata, u8 flags) { struct irdma_puda_buf *sqbuf; struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi; u8 *buf; struct tcphdr *tcph; u16 pktsize; u32 opts_len = 0; u32 pd_len = 0; u32 hdr_len = 0; if (!cm_node->ah || !cm_node->ah->ah_info.ah_valid) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "AH invalid\n"); return NULL; } sqbuf = irdma_puda_get_bufpool(vsi->ilq); if (!sqbuf) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "SQ buf NULL\n"); return NULL; } sqbuf->ah_id = cm_node->ah->ah_info.ah_idx; buf = sqbuf->mem.va; if (options) opts_len = (u32)options->size; if (hdr) hdr_len = hdr->size; if (pdata) pd_len = pdata->size; pktsize = sizeof(*tcph) + opts_len + hdr_len + pd_len; memset(buf, 0, pktsize); sqbuf->totallen = pktsize; sqbuf->tcphlen = sizeof(*tcph) + opts_len; sqbuf->scratch = cm_node; tcph = (struct tcphdr *)buf; buf += sizeof(*tcph); tcph->th_sport = htons(cm_node->loc_port); tcph->th_dport = htons(cm_node->rem_port); tcph->th_seq = htonl(cm_node->tcp_cntxt.loc_seq_num); if (flags & SET_ACK) { cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt; tcph->th_ack = htonl(cm_node->tcp_cntxt.loc_ack_num); tcph->th_flags |= TH_ACK; } else { tcph->th_ack = 0; } if (flags & SET_SYN) { cm_node->tcp_cntxt.loc_seq_num++; tcph->th_flags |= TH_SYN; } else { cm_node->tcp_cntxt.loc_seq_num += hdr_len + pd_len; } if (flags & SET_FIN) { cm_node->tcp_cntxt.loc_seq_num++; tcph->th_flags |= TH_FIN; } if (flags & SET_RST) tcph->th_flags |= TH_RST; tcph->th_off = (u16)((sizeof(*tcph) + opts_len + 3) >> 2); sqbuf->tcphlen = tcph->th_off << 2; tcph->th_win = htons(cm_node->tcp_cntxt.rcv_wnd); tcph->th_urp = 0; if (opts_len) { memcpy(buf, options->addr, opts_len); buf += opts_len; } if (hdr_len) { memcpy(buf, hdr->addr, hdr_len); buf += hdr_len; } if (pdata && pdata->addr) memcpy(buf, pdata->addr, pdata->size); atomic_set(&sqbuf->refcount, 1); irdma_debug_buf(vsi->dev, IRDMA_DEBUG_ILQ, "TRANSMIT ILQ BUFFER", sqbuf->mem.va, sqbuf->totallen); return sqbuf; } /** * irdma_form_uda_cm_frame - get a free packet and build frame full tcpip packet * @cm_node: connection's node ionfo to use in frame * @options: pointer to options info * @hdr: pointer mpa header * @pdata: pointer to private data * @flags: indicates FIN or ACK */ static struct irdma_puda_buf * irdma_form_uda_cm_frame(struct irdma_cm_node *cm_node, struct irdma_kmem_info *options, struct irdma_kmem_info *hdr, struct irdma_mpa_priv_info *pdata, u8 flags) { struct irdma_puda_buf *sqbuf; struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi; u8 *buf; struct tcphdr *tcph; struct ip *iph; struct ip6_hdr *ip6h; struct ether_header *ethh; u16 pktsize; u16 eth_hlen = ETH_HLEN; u32 opts_len = 0; u32 pd_len = 0; u32 hdr_len = 0; u16 vtag; sqbuf = irdma_puda_get_bufpool(vsi->ilq); if (!sqbuf) return NULL; buf = sqbuf->mem.va; if (options) opts_len = (u32)options->size; if (hdr) hdr_len = hdr->size; if (pdata) pd_len = pdata->size; if (cm_node->vlan_id < VLAN_N_VID) eth_hlen += 4; if (cm_node->ipv4) pktsize = sizeof(*iph) + sizeof(*tcph); else pktsize = sizeof(*ip6h) + sizeof(*tcph); pktsize += opts_len + hdr_len + pd_len; memset(buf, 0, eth_hlen + pktsize); sqbuf->totallen = pktsize + eth_hlen; sqbuf->maclen = eth_hlen; sqbuf->tcphlen = sizeof(*tcph) + opts_len; sqbuf->scratch = cm_node; ethh = (struct ether_header *)buf; buf += eth_hlen; if (cm_node->do_lpb) sqbuf->do_lpb = true; if (cm_node->ipv4) { sqbuf->ipv4 = true; iph = (struct ip *)buf; buf += sizeof(*iph); tcph = (struct tcphdr *)buf; buf += sizeof(*tcph); ether_addr_copy(ethh->ether_dhost, cm_node->rem_mac); ether_addr_copy(ethh->ether_shost, cm_node->loc_mac); if (cm_node->vlan_id < VLAN_N_VID) { ((struct ether_vlan_header *)ethh)->evl_proto = htons(ETH_P_8021Q); vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id; ((struct ether_vlan_header *)ethh)->evl_tag = htons(vtag); ((struct ether_vlan_header *)ethh)->evl_encap_proto = htons(ETH_P_IP); } else { ethh->ether_type = htons(ETH_P_IP); } iph->ip_v = IPVERSION; iph->ip_hl = 5; /* 5 * 4Byte words, IP headr len */ iph->ip_tos = cm_node->tos; iph->ip_len = htons(pktsize); iph->ip_id = htons(++cm_node->tcp_cntxt.loc_id); iph->ip_off = htons(0x4000); iph->ip_ttl = 0x40; iph->ip_p = IPPROTO_TCP; iph->ip_src.s_addr = htonl(cm_node->loc_addr[0]); iph->ip_dst.s_addr = htonl(cm_node->rem_addr[0]); } else { sqbuf->ipv4 = false; ip6h = (struct ip6_hdr *)buf; buf += sizeof(*ip6h); tcph = (struct tcphdr *)buf; buf += sizeof(*tcph); ether_addr_copy(ethh->ether_dhost, cm_node->rem_mac); ether_addr_copy(ethh->ether_shost, cm_node->loc_mac); if (cm_node->vlan_id < VLAN_N_VID) { ((struct ether_vlan_header *)ethh)->evl_proto = htons(ETH_P_8021Q); vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id; ((struct ether_vlan_header *)ethh)->evl_tag = htons(vtag); ((struct ether_vlan_header *)ethh)->evl_encap_proto = htons(ETH_P_IPV6); } else { ethh->ether_type = htons(ETH_P_IPV6); } ip6h->ip6_vfc = 6 << 4; ip6h->ip6_vfc |= cm_node->tos >> 4; ip6h->ip6_flow = cm_node->tos << 20; ip6h->ip6_plen = htons(pktsize - sizeof(*ip6h)); ip6h->ip6_nxt = 6; ip6h->ip6_hops = 128; irdma_copy_ip_htonl(ip6h->ip6_src.__u6_addr.__u6_addr32, cm_node->loc_addr); irdma_copy_ip_htonl(ip6h->ip6_dst.__u6_addr.__u6_addr32, cm_node->rem_addr); } tcph->th_sport = htons(cm_node->loc_port); tcph->th_dport = htons(cm_node->rem_port); tcph->th_seq = htonl(cm_node->tcp_cntxt.loc_seq_num); if (flags & SET_ACK) { cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt; tcph->th_ack = htonl(cm_node->tcp_cntxt.loc_ack_num); tcph->th_flags |= TH_ACK; } else { tcph->th_ack = 0; } if (flags & SET_SYN) { cm_node->tcp_cntxt.loc_seq_num++; tcph->th_flags |= TH_SYN; } else { cm_node->tcp_cntxt.loc_seq_num += hdr_len + pd_len; } if (flags & SET_FIN) { cm_node->tcp_cntxt.loc_seq_num++; tcph->th_flags |= TH_FIN; } if (flags & SET_RST) tcph->th_flags |= TH_RST; tcph->th_off = (u16)((sizeof(*tcph) + opts_len + 3) >> 2); sqbuf->tcphlen = tcph->th_off << 2; tcph->th_win = htons(cm_node->tcp_cntxt.rcv_wnd); tcph->th_urp = 0; if (opts_len) { memcpy(buf, options->addr, opts_len); buf += opts_len; } if (hdr_len) { memcpy(buf, hdr->addr, hdr_len); buf += hdr_len; } if (pdata && pdata->addr) memcpy(buf, pdata->addr, pdata->size); atomic_set(&sqbuf->refcount, 1); irdma_debug_buf(vsi->dev, IRDMA_DEBUG_ILQ, "TRANSMIT ILQ BUFFER", sqbuf->mem.va, sqbuf->totallen); return sqbuf; } /** * irdma_send_reset - Send RST packet * @cm_node: connection's node */ int irdma_send_reset(struct irdma_cm_node *cm_node) { struct irdma_puda_buf *sqbuf; int flags = SET_RST | SET_ACK; sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL, flags); if (!sqbuf) return -ENOMEM; irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "caller: %pS cm_node %p cm_id=%p accel=%d state=%d rem_port=0x%04x, loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4\n", __builtin_return_address(0), cm_node, cm_node->cm_id, cm_node->accelerated, cm_node->state, cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr, cm_node->loc_addr); return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 0, 1); } /** * irdma_active_open_err - send event for active side cm error * @cm_node: connection's node * @reset: Flag to send reset or not */ static void irdma_active_open_err(struct irdma_cm_node *cm_node, bool reset) { irdma_cleanup_retrans_entry(cm_node); cm_node->cm_core->stats_connect_errs++; if (reset) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "cm_node=%p state=%d\n", cm_node, cm_node->state); atomic_inc(&cm_node->refcnt); irdma_send_reset(cm_node); } cm_node->state = IRDMA_CM_STATE_CLOSED; irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED); } /** * irdma_passive_open_err - handle passive side cm error * @cm_node: connection's node * @reset: send reset or just free cm_node */ static void irdma_passive_open_err(struct irdma_cm_node *cm_node, bool reset) { irdma_cleanup_retrans_entry(cm_node); cm_node->cm_core->stats_passive_errs++; cm_node->state = IRDMA_CM_STATE_CLOSED; irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "cm_node=%p state =%d\n", cm_node, cm_node->state); if (reset) irdma_send_reset(cm_node); else irdma_rem_ref_cm_node(cm_node); } /** * irdma_event_connect_error - to create connect error event * @event: cm information for connect event */ static void irdma_event_connect_error(struct irdma_cm_event *event) { struct irdma_qp *iwqp; struct iw_cm_id *cm_id; cm_id = event->cm_node->cm_id; if (!cm_id) return; iwqp = cm_id->provider_data; if (!iwqp || !iwqp->iwdev) return; iwqp->cm_id = NULL; cm_id->provider_data = NULL; irdma_send_cm_event(event->cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET); irdma_rem_ref_cm_node(event->cm_node); } /** * irdma_process_options - process options from TCP header * @cm_node: connection's node * @optionsloc: point to start of options * @optionsize: size of all options * @syn_pkt: flag if syn packet */ static int irdma_process_options(struct irdma_cm_node *cm_node, u8 *optionsloc, u32 optionsize, u32 syn_pkt) { u32 tmp; u32 offset = 0; union all_known_options *all_options; char got_mss_option = 0; while (offset < optionsize) { all_options = (union all_known_options *)(optionsloc + offset); switch (all_options->base.optionnum) { case OPTION_NUM_EOL: offset = optionsize; break; case OPTION_NUM_NONE: offset += 1; continue; case OPTION_NUM_MSS: irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "MSS Length: %d Offset: %d Size: %d\n", all_options->mss.len, offset, optionsize); got_mss_option = 1; if (all_options->mss.len != 4) return -EINVAL; tmp = ntohs(all_options->mss.mss); if ((cm_node->ipv4 && (tmp + IRDMA_MTU_TO_MSS_IPV4) < IRDMA_MIN_MTU_IPV4) || (!cm_node->ipv4 && (tmp + IRDMA_MTU_TO_MSS_IPV6) < IRDMA_MIN_MTU_IPV6)) return -EINVAL; if (tmp < cm_node->tcp_cntxt.mss) cm_node->tcp_cntxt.mss = tmp; break; case OPTION_NUM_WINDOW_SCALE: cm_node->tcp_cntxt.snd_wscale = all_options->windowscale.shiftcount; break; default: irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "Unsupported TCP Option: %x\n", all_options->base.optionnum); break; } offset += all_options->base.len; } if (!got_mss_option && syn_pkt) cm_node->tcp_cntxt.mss = IRDMA_CM_DEFAULT_MSS; return 0; } /** * irdma_handle_tcp_options - setup TCP context info after parsing TCP options * @cm_node: connection's node * @tcph: pointer tcp header * @optionsize: size of options rcvd * @passive: active or passive flag */ static int irdma_handle_tcp_options(struct irdma_cm_node *cm_node, struct tcphdr *tcph, int optionsize, int passive) { u8 *optionsloc = (u8 *)&tcph[1]; int ret; if (optionsize) { ret = irdma_process_options(cm_node, optionsloc, optionsize, (u32)tcph->th_flags & TH_SYN); if (ret) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "Node %p, Sending Reset\n", cm_node); if (passive) irdma_passive_open_err(cm_node, true); else irdma_active_open_err(cm_node, true); return ret; } } cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->th_win) << cm_node->tcp_cntxt.snd_wscale; if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd) cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd; return 0; } /** * irdma_build_mpa_v1 - build a MPA V1 frame * @cm_node: connection's node * @start_addr: address where to build frame * @mpa_key: to do read0 or write0 */ static void irdma_build_mpa_v1(struct irdma_cm_node *cm_node, void *start_addr, u8 mpa_key) { struct ietf_mpa_v1 *mpa_frame = start_addr; switch (mpa_key) { case MPA_KEY_REQUEST: memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE); break; case MPA_KEY_REPLY: memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE); break; default: break; } mpa_frame->flags = IETF_MPA_FLAGS_CRC; mpa_frame->rev = cm_node->mpa_frame_rev; mpa_frame->priv_data_len = htons(cm_node->pdata.size); } /** * irdma_build_mpa_v2 - build a MPA V2 frame * @cm_node: connection's node * @start_addr: buffer start address * @mpa_key: to do read0 or write0 */ static void irdma_build_mpa_v2(struct irdma_cm_node *cm_node, void *start_addr, u8 mpa_key) { struct ietf_mpa_v2 *mpa_frame = start_addr; struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg; u16 ctrl_ird, ctrl_ord; /* initialize the upper 5 bytes of the frame */ irdma_build_mpa_v1(cm_node, start_addr, mpa_key); mpa_frame->flags |= IETF_MPA_V2_FLAG; if (cm_node->iwdev->iw_ooo) { mpa_frame->flags |= IETF_MPA_FLAGS_MARKERS; cm_node->rcv_mark_en = true; } mpa_frame->priv_data_len = cpu_to_be16(be16_to_cpu(mpa_frame->priv_data_len) + IETF_RTR_MSG_SIZE); /* initialize RTR msg */ if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) { ctrl_ird = IETF_NO_IRD_ORD; ctrl_ord = IETF_NO_IRD_ORD; } else { ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ? IETF_NO_IRD_ORD : cm_node->ird_size; ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ? IETF_NO_IRD_ORD : cm_node->ord_size; } ctrl_ird |= IETF_PEER_TO_PEER; switch (mpa_key) { case MPA_KEY_REQUEST: ctrl_ord |= IETF_RDMA0_WRITE; ctrl_ord |= IETF_RDMA0_READ; break; case MPA_KEY_REPLY: switch (cm_node->send_rdma0_op) { case SEND_RDMA_WRITE_ZERO: ctrl_ord |= IETF_RDMA0_WRITE; break; case SEND_RDMA_READ_ZERO: ctrl_ord |= IETF_RDMA0_READ; break; } break; default: break; } rtr_msg->ctrl_ird = htons(ctrl_ird); rtr_msg->ctrl_ord = htons(ctrl_ord); } /** * irdma_cm_build_mpa_frame - build mpa frame for mpa version 1 or version 2 * @cm_node: connection's node * @mpa: mpa: data buffer * @mpa_key: to do read0 or write0 */ static int irdma_cm_build_mpa_frame(struct irdma_cm_node *cm_node, struct irdma_kmem_info *mpa, u8 mpa_key) { int hdr_len = 0; switch (cm_node->mpa_frame_rev) { case IETF_MPA_V1: hdr_len = sizeof(struct ietf_mpa_v1); irdma_build_mpa_v1(cm_node, mpa->addr, mpa_key); break; case IETF_MPA_V2: hdr_len = sizeof(struct ietf_mpa_v2); irdma_build_mpa_v2(cm_node, mpa->addr, mpa_key); break; default: break; } return hdr_len; } /** * irdma_send_mpa_request - active node send mpa request to passive node * @cm_node: connection's node */ static int irdma_send_mpa_request(struct irdma_cm_node *cm_node) { struct irdma_puda_buf *sqbuf; cm_node->mpa_hdr.addr = &cm_node->mpa_v2_frame; cm_node->mpa_hdr.size = irdma_cm_build_mpa_frame(cm_node, &cm_node->mpa_hdr, MPA_KEY_REQUEST); if (!cm_node->mpa_hdr.size) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "mpa size = %d\n", cm_node->mpa_hdr.size); return -EINVAL; } sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, &cm_node->mpa_hdr, &cm_node->pdata, SET_ACK); if (!sqbuf) return -ENOMEM; return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1, 0); } /** * irdma_send_mpa_reject - * @cm_node: connection's node * @pdata: reject data for connection * @plen: length of reject data */ static int irdma_send_mpa_reject(struct irdma_cm_node *cm_node, const void *pdata, u8 plen) { struct irdma_puda_buf *sqbuf; struct irdma_mpa_priv_info priv_info; cm_node->mpa_hdr.addr = &cm_node->mpa_v2_frame; cm_node->mpa_hdr.size = irdma_cm_build_mpa_frame(cm_node, &cm_node->mpa_hdr, MPA_KEY_REPLY); cm_node->mpa_v2_frame.flags |= IETF_MPA_FLAGS_REJECT; priv_info.addr = pdata; priv_info.size = plen; sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, &cm_node->mpa_hdr, &priv_info, SET_ACK | SET_FIN); if (!sqbuf) return -ENOMEM; cm_node->state = IRDMA_CM_STATE_FIN_WAIT1; return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1, 0); } /** * irdma_negotiate_mpa_v2_ird_ord - negotiate MPAv2 IRD/ORD * @cm_node: connection's node * @buf: Data pointer */ static int irdma_negotiate_mpa_v2_ird_ord(struct irdma_cm_node *cm_node, u8 *buf) { struct ietf_mpa_v2 *mpa_v2_frame; struct ietf_rtr_msg *rtr_msg; u16 ird_size; u16 ord_size; u16 ctrl_ord; u16 ctrl_ird; mpa_v2_frame = (struct ietf_mpa_v2 *)buf; rtr_msg = &mpa_v2_frame->rtr_msg; /* parse rtr message */ ctrl_ord = ntohs(rtr_msg->ctrl_ord); ctrl_ird = ntohs(rtr_msg->ctrl_ird); ird_size = ctrl_ird & IETF_NO_IRD_ORD; ord_size = ctrl_ord & IETF_NO_IRD_ORD; if (!(ctrl_ird & IETF_PEER_TO_PEER)) return -EOPNOTSUPP; if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD) { cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD; goto negotiate_done; } if (cm_node->state != IRDMA_CM_STATE_MPAREQ_SENT) { /* responder */ if (!ord_size && (ctrl_ord & IETF_RDMA0_READ)) cm_node->ird_size = 1; if (cm_node->ord_size > ird_size) cm_node->ord_size = ird_size; } else { /* initiator */ if (!ird_size && (ctrl_ord & IETF_RDMA0_READ)) /* Remote peer doesn't support RDMA0_READ */ return -EOPNOTSUPP; if (cm_node->ord_size > ird_size) cm_node->ord_size = ird_size; if (cm_node->ird_size < ord_size) /* no resources available */ return -EINVAL; } negotiate_done: if (ctrl_ord & IETF_RDMA0_READ) cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; else if (ctrl_ord & IETF_RDMA0_WRITE) cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO; else /* Not supported RDMA0 operation */ return -EOPNOTSUPP; irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "MPAV2 Negotiated ORD: %d, IRD: %d\n", cm_node->ord_size, cm_node->ird_size); return 0; } /** * irdma_parse_mpa - process an IETF MPA frame * @cm_node: connection's node * @buf: Data pointer * @type: to return accept or reject * @len: Len of mpa buffer */ static int irdma_parse_mpa(struct irdma_cm_node *cm_node, u8 *buf, u32 *type, u32 len) { struct ietf_mpa_v1 *mpa_frame; int mpa_hdr_len, priv_data_len, ret; *type = IRDMA_MPA_REQUEST_ACCEPT; if (len < sizeof(struct ietf_mpa_v1)) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "ietf buffer small (%x)\n", len); return -EINVAL; } mpa_frame = (struct ietf_mpa_v1 *)buf; mpa_hdr_len = sizeof(struct ietf_mpa_v1); priv_data_len = ntohs(mpa_frame->priv_data_len); if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "private_data too big %d\n", priv_data_len); return -EOVERFLOW; } if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "unsupported mpa rev = %d\n", mpa_frame->rev); return -EINVAL; } if (mpa_frame->rev > cm_node->mpa_frame_rev) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "rev %d\n", mpa_frame->rev); return -EINVAL; } cm_node->mpa_frame_rev = mpa_frame->rev; if (cm_node->state != IRDMA_CM_STATE_MPAREQ_SENT) { if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "Unexpected MPA Key received\n"); return -EINVAL; } } else { if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE)) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "Unexpected MPA Key received\n"); return -EINVAL; } } if (priv_data_len + mpa_hdr_len > len) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "ietf buffer len(%x + %x != %x)\n", priv_data_len, mpa_hdr_len, len); return -EOVERFLOW; } if (len > IRDMA_MAX_CM_BUF) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "ietf buffer large len = %d\n", len); return -EOVERFLOW; } switch (mpa_frame->rev) { case IETF_MPA_V2: mpa_hdr_len += IETF_RTR_MSG_SIZE; ret = irdma_negotiate_mpa_v2_ird_ord(cm_node, buf); if (ret) return ret; break; case IETF_MPA_V1: default: break; } memcpy(cm_node->pdata_buf, buf + mpa_hdr_len, priv_data_len); cm_node->pdata.size = priv_data_len; if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT) *type = IRDMA_MPA_REQUEST_REJECT; if (mpa_frame->flags & IETF_MPA_FLAGS_MARKERS) cm_node->snd_mark_en = true; return 0; } /** * irdma_schedule_cm_timer * @cm_node: connection's node * @sqbuf: buffer to send * @type: if it is send or close * @send_retrans: if rexmits to be done * @close_when_complete: is cm_node to be removed * * note - cm_node needs to be protected before calling this. Encase in: * irdma_rem_ref_cm_node(cm_core, cm_node); * irdma_schedule_cm_timer(...) * atomic_inc(&cm_node->refcnt); */ int irdma_schedule_cm_timer(struct irdma_cm_node *cm_node, struct irdma_puda_buf *sqbuf, enum irdma_timer_type type, int send_retrans, int close_when_complete) { struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi; struct irdma_cm_core *cm_core = cm_node->cm_core; struct irdma_timer_entry *new_send; u32 was_timer_set; unsigned long flags; new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); if (!new_send) { if (type != IRDMA_TIMER_TYPE_CLOSE) irdma_free_sqbuf(vsi, sqbuf); return -ENOMEM; } new_send->retrycount = IRDMA_DEFAULT_RETRYS; new_send->retranscount = IRDMA_DEFAULT_RETRANS; new_send->sqbuf = sqbuf; new_send->timetosend = jiffies; new_send->type = type; new_send->send_retrans = send_retrans; new_send->close_when_complete = close_when_complete; if (type == IRDMA_TIMER_TYPE_CLOSE) { new_send->timetosend += (HZ / 10); if (cm_node->close_entry) { kfree(new_send); irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "already close entry\n"); return -EINVAL; } cm_node->close_entry = new_send; } else { /* type == IRDMA_TIMER_TYPE_SEND */ spin_lock_irqsave(&cm_node->retrans_list_lock, flags); cm_node->send_entry = new_send; atomic_inc(&cm_node->refcnt); spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); new_send->timetosend = jiffies + IRDMA_RETRY_TIMEOUT; atomic_inc(&sqbuf->refcount); irdma_puda_send_buf(vsi->ilq, sqbuf); if (!send_retrans) { irdma_cleanup_retrans_entry(cm_node); if (close_when_complete) irdma_rem_ref_cm_node(cm_node); return 0; } } spin_lock_irqsave(&cm_core->ht_lock, flags); was_timer_set = timer_pending(&cm_core->tcp_timer); if (!was_timer_set) { cm_core->tcp_timer.expires = new_send->timetosend; add_timer(&cm_core->tcp_timer); } spin_unlock_irqrestore(&cm_core->ht_lock, flags); return 0; } /** * irdma_retrans_expired - Could not rexmit the packet * @cm_node: connection's node */ static void irdma_retrans_expired(struct irdma_cm_node *cm_node) { enum irdma_cm_node_state state = cm_node->state; cm_node->state = IRDMA_CM_STATE_CLOSED; switch (state) { case IRDMA_CM_STATE_SYN_RCVD: case IRDMA_CM_STATE_CLOSING: irdma_rem_ref_cm_node(cm_node); break; case IRDMA_CM_STATE_FIN_WAIT1: case IRDMA_CM_STATE_LAST_ACK: irdma_send_reset(cm_node); break; default: atomic_inc(&cm_node->refcnt); irdma_send_reset(cm_node); irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED); break; } } /** * irdma_handle_close_entry - for handling retry/timeouts * @cm_node: connection's node * @rem_node: flag for remove cm_node */ static void irdma_handle_close_entry(struct irdma_cm_node *cm_node, u32 rem_node) { struct irdma_timer_entry *close_entry = cm_node->close_entry; struct irdma_qp *iwqp; unsigned long flags; if (!close_entry) return; iwqp = (struct irdma_qp *)close_entry->sqbuf; if (iwqp) { spin_lock_irqsave(&iwqp->lock, flags); if (iwqp->cm_id) { iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED; iwqp->hw_iwarp_state = IRDMA_QP_STATE_ERROR; iwqp->last_aeq = IRDMA_AE_RESET_SENT; iwqp->ibqp_state = IB_QPS_ERR; spin_unlock_irqrestore(&iwqp->lock, flags); irdma_cm_disconn(iwqp); } else { spin_unlock_irqrestore(&iwqp->lock, flags); } } else if (rem_node) { /* TIME_WAIT state */ irdma_rem_ref_cm_node(cm_node); } kfree(close_entry); cm_node->close_entry = NULL; } /** * irdma_cm_timer_tick - system's timer expired callback * @t: Pointer to timer_list */ static void irdma_cm_timer_tick(struct timer_list *t) { unsigned long nexttimeout = jiffies + IRDMA_LONG_TIME; struct irdma_cm_node *cm_node; struct irdma_timer_entry *send_entry, *close_entry; struct list_head *list_core_temp; struct list_head *list_node; struct irdma_cm_core *cm_core = from_timer(cm_core, t, tcp_timer); struct irdma_sc_vsi *vsi; u32 settimer = 0; unsigned long timetosend; unsigned long flags; struct list_head timer_list; INIT_LIST_HEAD(&timer_list); rcu_read_lock(); irdma_timer_list_prep(cm_core, &timer_list); rcu_read_unlock(); list_for_each_safe(list_node, list_core_temp, &timer_list) { cm_node = container_of(list_node, struct irdma_cm_node, timer_entry); close_entry = cm_node->close_entry; if (close_entry) { if (time_after(close_entry->timetosend, jiffies)) { if (nexttimeout > close_entry->timetosend || !settimer) { nexttimeout = close_entry->timetosend; settimer = 1; } } else { irdma_handle_close_entry(cm_node, 1); } } spin_lock_irqsave(&cm_node->retrans_list_lock, flags); send_entry = cm_node->send_entry; if (!send_entry) goto done; if (time_after(send_entry->timetosend, jiffies)) { if (cm_node->state != IRDMA_CM_STATE_OFFLOADED) { if (nexttimeout > send_entry->timetosend || !settimer) { nexttimeout = send_entry->timetosend; settimer = 1; } } else { irdma_free_retrans_entry(cm_node); } goto done; } if (cm_node->state == IRDMA_CM_STATE_OFFLOADED || cm_node->state == IRDMA_CM_STATE_CLOSED) { irdma_free_retrans_entry(cm_node); goto done; } if (!send_entry->retranscount || !send_entry->retrycount) { irdma_free_retrans_entry(cm_node); spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); irdma_retrans_expired(cm_node); cm_node->state = IRDMA_CM_STATE_CLOSED; spin_lock_irqsave(&cm_node->retrans_list_lock, flags); goto done; } spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); vsi = &cm_node->iwdev->vsi; if (!cm_node->ack_rcvd) { atomic_inc(&send_entry->sqbuf->refcount); irdma_puda_send_buf(vsi->ilq, send_entry->sqbuf); cm_node->cm_core->stats_pkt_retrans++; } spin_lock_irqsave(&cm_node->retrans_list_lock, flags); if (send_entry->send_retrans) { send_entry->retranscount--; timetosend = (IRDMA_RETRY_TIMEOUT << (IRDMA_DEFAULT_RETRANS - send_entry->retranscount)); send_entry->timetosend = jiffies + min(timetosend, IRDMA_MAX_TIMEOUT); if (nexttimeout > send_entry->timetosend || !settimer) { nexttimeout = send_entry->timetosend; settimer = 1; } } else { int close_when_complete; close_when_complete = send_entry->close_when_complete; irdma_free_retrans_entry(cm_node); if (close_when_complete) irdma_rem_ref_cm_node(cm_node); } done: spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); irdma_rem_ref_cm_node(cm_node); } if (settimer) { spin_lock_irqsave(&cm_core->ht_lock, flags); if (!timer_pending(&cm_core->tcp_timer)) { cm_core->tcp_timer.expires = nexttimeout; add_timer(&cm_core->tcp_timer); } spin_unlock_irqrestore(&cm_core->ht_lock, flags); } } /** * irdma_send_syn - send SYN packet * @cm_node: connection's node * @sendack: flag to set ACK bit or not */ int irdma_send_syn(struct irdma_cm_node *cm_node, u32 sendack) { struct irdma_puda_buf *sqbuf; int flags = SET_SYN; char optionsbuf[sizeof(struct option_mss) + sizeof(struct option_windowscale) + sizeof(struct option_base) + TCP_OPTIONS_PADDING]; struct irdma_kmem_info opts; int optionssize = 0; /* Sending MSS option */ union all_known_options *options; opts.addr = optionsbuf; if (!cm_node) return -EINVAL; options = (union all_known_options *)&optionsbuf[optionssize]; options->mss.optionnum = OPTION_NUM_MSS; options->mss.len = sizeof(struct option_mss); options->mss.mss = htons(cm_node->tcp_cntxt.mss); optionssize += sizeof(struct option_mss); options = (union all_known_options *)&optionsbuf[optionssize]; options->windowscale.optionnum = OPTION_NUM_WINDOW_SCALE; options->windowscale.len = sizeof(struct option_windowscale); options->windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale; optionssize += sizeof(struct option_windowscale); options = (union all_known_options *)&optionsbuf[optionssize]; options->eol = OPTION_NUM_EOL; optionssize += 1; if (sendack) flags |= SET_ACK; opts.size = optionssize; sqbuf = cm_node->cm_core->form_cm_frame(cm_node, &opts, NULL, NULL, flags); if (!sqbuf) return -ENOMEM; return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1, 0); } /** * irdma_send_ack - Send ACK packet * @cm_node: connection's node */ void irdma_send_ack(struct irdma_cm_node *cm_node) { struct irdma_puda_buf *sqbuf; struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi; sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK); if (sqbuf) irdma_puda_send_buf(vsi->ilq, sqbuf); } /** * irdma_send_fin - Send FIN pkt * @cm_node: connection's node */ static int irdma_send_fin(struct irdma_cm_node *cm_node) { struct irdma_puda_buf *sqbuf; sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK | SET_FIN); if (!sqbuf) return -ENOMEM; return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1, 0); } /** * irdma_find_listener - find a cm node listening on this addr-port pair * @cm_core: cm's core * @dst_addr: listener ip addr * @ipv4: flag indicating IPv4 when true * @dst_port: listener tcp port num * @vlan_id: virtual LAN ID * @listener_state: state to match with listen node's */ static struct irdma_cm_listener * irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, bool ipv4, u16 dst_port, u16 vlan_id, enum irdma_cm_listener_state listener_state) { struct irdma_cm_listener *listen_node; static const u32 ip_zero[4] = {0, 0, 0, 0}; u32 listen_addr[4]; u16 listen_port; unsigned long flags; /* walk list and find cm_node associated with this session ID */ spin_lock_irqsave(&cm_core->listen_list_lock, flags); list_for_each_entry(listen_node, &cm_core->listen_list, list) { memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr)); listen_port = listen_node->loc_port; if (listen_node->ipv4 != ipv4 || listen_port != dst_port || !(listener_state & listen_node->listener_state)) continue; /* compare node pair, return node handle if a match */ if (!memcmp(listen_addr, ip_zero, sizeof(listen_addr)) || (!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) && vlan_id == listen_node->vlan_id)) { atomic_inc(&listen_node->refcnt); spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); return listen_node; } } spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); return NULL; } /** * irdma_del_multiple_qhash - Remove qhash and child listens * @iwdev: iWarp device * @cm_info: CM info for parent listen node * @cm_parent_listen_node: The parent listen node */ static int irdma_del_multiple_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cm_info, struct irdma_cm_listener *cm_parent_listen_node) { struct irdma_cm_listener *child_listen_node; struct list_head *pos, *tpos; unsigned long flags; int ret = -EINVAL; spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags); list_for_each_safe(pos, tpos, &cm_parent_listen_node->child_listen_list) { child_listen_node = list_entry(pos, struct irdma_cm_listener, child_listen_list); if (child_listen_node->ipv4) irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "removing child listen for IP=%pI4, port=%d, vlan=%d\n", child_listen_node->loc_addr, child_listen_node->loc_port, child_listen_node->vlan_id); else irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "removing child listen for IP=%pI6, port=%d, vlan=%d\n", child_listen_node->loc_addr, child_listen_node->loc_port, child_listen_node->vlan_id); list_del(pos); memcpy(cm_info->loc_addr, child_listen_node->loc_addr, sizeof(cm_info->loc_addr)); cm_info->vlan_id = child_listen_node->vlan_id; if (child_listen_node->qhash_set) { ret = irdma_manage_qhash(iwdev, cm_info, IRDMA_QHASH_TYPE_TCP_SYN, IRDMA_QHASH_MANAGE_TYPE_DELETE, NULL, false); child_listen_node->qhash_set = false; } else { ret = 0; } irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "Child listen node freed = %p\n", child_listen_node); kfree(child_listen_node); cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++; } spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags); return ret; } static u8 irdma_get_egress_vlan_prio(u32 *loc_addr, u8 prio, bool ipv4){ return prio; } /** * irdma_netdev_vlan_ipv6 - Gets the netdev and mac * @addr: local IPv6 address * @vlan_id: vlan id for the given IPv6 address * @mac: mac address for the given IPv6 address * * Returns the net_device of the IPv6 address and also sets the * vlan id and mac for that address. */ -struct ifnet * +if_t irdma_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac) { - struct ifnet *ip_dev = NULL; + if_t ip_dev = NULL; struct in6_addr laddr6; + struct ifaddr *ifa; u16 scope_id = 0; irdma_copy_ip_htonl(laddr6.__u6_addr.__u6_addr32, addr); if (vlan_id) *vlan_id = 0xFFFF; /* Match rdma_vlan_dev_vlan_id() */ if (mac) eth_zero_addr(mac); if (IN6_IS_SCOPE_LINKLOCAL(&laddr6) || IN6_IS_ADDR_MC_INTFACELOCAL(&laddr6)) scope_id = ntohs(laddr6.__u6_addr.__u6_addr16[1]); ip_dev = ip6_ifp_find(&init_net, laddr6, scope_id); if (ip_dev) { if (vlan_id) *vlan_id = rdma_vlan_dev_vlan_id(ip_dev); - if (ip_dev->if_addr && ip_dev->if_addr->ifa_addr && mac) - ether_addr_copy(mac, IF_LLADDR(ip_dev)); + ifa = if_getifaddr(ip_dev); + if (ifa && ifa->ifa_addr && mac) + ether_addr_copy(mac, if_getlladdr(ip_dev)); } return ip_dev; } /** * irdma_get_vlan_ipv4 - Returns the vlan_id for IPv4 address * @addr: local IPv4 address */ u16 irdma_get_vlan_ipv4(u32 *addr) { - struct ifnet *netdev; + if_t netdev; u16 vlan_id = 0xFFFF; netdev = ip_ifp_find(&init_net, htonl(addr[0])); if (netdev) { vlan_id = rdma_vlan_dev_vlan_id(netdev); dev_put(netdev); } return vlan_id; } -/** - * irdma_add_mqh_6 - Adds multiple qhashes for IPv6 - * @iwdev: iWarp device - * @cm_info: CM info for parent listen node - * @cm_parent_listen_node: The parent listen node - * - * Adds a qhash and a child listen node for every IPv6 address - * on the adapter and adds the associated qhash filter - */ -static int -irdma_add_mqh_6(struct irdma_device *iwdev, - struct irdma_cm_info *cm_info, - struct irdma_cm_listener *cm_parent_listen_node) -{ - struct ifnet *ip_dev; - struct ifaddr *ifp; - struct irdma_cm_listener *child_listen_node; - unsigned long flags; - int ret = 0; - - IFNET_RLOCK(); - IRDMA_TAILQ_FOREACH((ip_dev), &V_ifnet, if_link) { - if (!(ip_dev->if_flags & IFF_UP)) - continue; - - if (((rdma_vlan_dev_vlan_id(ip_dev) >= VLAN_N_VID) || - (rdma_vlan_dev_real_dev(ip_dev) != iwdev->netdev)) && - ip_dev != iwdev->netdev) - continue; - - if_addr_rlock(ip_dev); - IRDMA_TAILQ_FOREACH(ifp, &ip_dev->if_addrhead, ifa_link) { - irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "IP=%pI6, vlan_id=%d, MAC=%pM\n", - &((struct sockaddr_in6 *)ifp->ifa_addr)->sin6_addr, rdma_vlan_dev_vlan_id(ip_dev), - IF_LLADDR(ip_dev)); - if (((struct sockaddr_in6 *)ifp->ifa_addr)->sin6_family != AF_INET6) - continue; - child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_KERNEL); - irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "Allocating child listener %p\n", - child_listen_node); - if (!child_listen_node) { - irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "listener memory allocation\n"); - ret = -ENOMEM; - if_addr_runlock(ip_dev); - goto exit; - } - - memcpy(child_listen_node, cm_parent_listen_node, - sizeof(*child_listen_node)); - cm_info->vlan_id = rdma_vlan_dev_vlan_id(ip_dev); - child_listen_node->vlan_id = cm_info->vlan_id; - irdma_copy_ip_ntohl(child_listen_node->loc_addr, - ((struct sockaddr_in6 *)ifp->ifa_addr)->sin6_addr.__u6_addr.__u6_addr32); - memcpy(cm_info->loc_addr, child_listen_node->loc_addr, - sizeof(cm_info->loc_addr)); - if (!iwdev->vsi.dscp_mode) - cm_info->user_pri = - irdma_get_egress_vlan_prio(child_listen_node->loc_addr, - cm_info->user_pri, - false); - ret = irdma_manage_qhash(iwdev, cm_info, - IRDMA_QHASH_TYPE_TCP_SYN, - IRDMA_QHASH_MANAGE_TYPE_ADD, - NULL, true); - if (ret) { - kfree(child_listen_node); - continue; - } - - child_listen_node->qhash_set = true; - spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags); - list_add(&child_listen_node->child_listen_list, - &cm_parent_listen_node->child_listen_list); - spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags); - cm_parent_listen_node->cm_core->stats_listen_nodes_created++; - } - if_addr_runlock(ip_dev); - } -exit: - IFNET_RUNLOCK(); - - return ret; -} +struct irdma_add_mqh_cbs { + struct irdma_device *iwdev; + struct irdma_cm_info *cm_info; + struct irdma_cm_listener *cm_listen_node; +}; /** - * irdma_add_mqh_4 - Adds multiple qhashes for IPv4 - * @iwdev: iWarp device - * @cm_info: CM info for parent listen node - * @cm_parent_listen_node: The parent listen node + * irdma_add_mqh_ifa_cb - Adds multiple qhashes for IPV4/IPv6 + * @arg: Calback argument structure from irdma_add_mqh + * @ifa: Current address to compute against + * @count: Current cumulative output of all callbacks in this iteration * - * Adds a qhash and a child listen node for every IPv4 address + * Adds a qhash and a child listen node for a single IPv4/IPv6 address * on the adapter and adds the associated qhash filter */ -static int -irdma_add_mqh_4(struct irdma_device *iwdev, - struct irdma_cm_info *cm_info, - struct irdma_cm_listener *cm_parent_listen_node) +static u_int +irdma_add_mqh_ifa_cb(void *arg, struct ifaddr *ifa, u_int count) { - struct ifnet *ip_dev; + struct irdma_add_mqh_cbs *cbs = arg; struct irdma_cm_listener *child_listen_node; + struct irdma_cm_info *cm_info = cbs->cm_info; + struct irdma_device *iwdev = cbs->iwdev; + struct irdma_cm_listener *cm_parent_listen_node = cbs->cm_listen_node; + if_t ip_dev = ifa->ifa_ifp; unsigned long flags; - struct ifaddr *ifa; - int ret = 0; - - IFNET_RLOCK(); - IRDMA_TAILQ_FOREACH((ip_dev), &V_ifnet, if_link) { - if (!(ip_dev->if_flags & IFF_UP)) - continue; - - if (((rdma_vlan_dev_vlan_id(ip_dev) >= VLAN_N_VID) || - (rdma_vlan_dev_real_dev(ip_dev) != iwdev->netdev)) && - ip_dev != iwdev->netdev) - continue; + int ret; - if_addr_rlock(ip_dev); - IRDMA_TAILQ_FOREACH(ifa, &ip_dev->if_addrhead, ifa_link) { - irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "Allocating child CM Listener forIP=%pI4, vlan_id=%d, MAC=%pM\n", - &ifa->ifa_addr, rdma_vlan_dev_vlan_id(ip_dev), - IF_LLADDR(ip_dev)); - if (((struct sockaddr_in *)ifa->ifa_addr)->sin_family != AF_INET) - continue; - child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_KERNEL); - cm_parent_listen_node->cm_core->stats_listen_nodes_created++; - irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "Allocating child listener %p\n", - child_listen_node); - if (!child_listen_node) { - irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "listener memory allocation\n"); - if_addr_runlock(ip_dev); - ret = -ENOMEM; - goto exit; - } + if (count) + return 0; - memcpy(child_listen_node, cm_parent_listen_node, - sizeof(*child_listen_node)); - child_listen_node->vlan_id = rdma_vlan_dev_vlan_id(ip_dev); - cm_info->vlan_id = child_listen_node->vlan_id; - child_listen_node->loc_addr[0] = - ntohl(((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr); - memcpy(cm_info->loc_addr, child_listen_node->loc_addr, - sizeof(cm_info->loc_addr)); - if (!iwdev->vsi.dscp_mode) - cm_info->user_pri = - irdma_get_egress_vlan_prio(child_listen_node->loc_addr, - cm_info->user_pri, - true); - ret = irdma_manage_qhash(iwdev, cm_info, - IRDMA_QHASH_TYPE_TCP_SYN, - IRDMA_QHASH_MANAGE_TYPE_ADD, - NULL, true); - if (ret) { - kfree(child_listen_node); - cm_parent_listen_node->cm_core - ->stats_listen_nodes_created--; - continue; - } + if (cm_info->ipv4) + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "Allocating child CM Listener forIP=%pI4, vlan_id=%d, MAC=%pM\n", + &ifa->ifa_addr, + rdma_vlan_dev_vlan_id(ip_dev), if_getlladdr(ip_dev)); + else + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "IP=%pI6, vlan_id=%d, MAC=%pM\n", + &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr, + rdma_vlan_dev_vlan_id(ip_dev), + if_getlladdr(ip_dev)); + child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_KERNEL); + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "Allocating child listener %p\n", + child_listen_node); + if (!child_listen_node) { + irdma_debug(&iwdev->rf->sc_dev, + IRDMA_DEBUG_CM, + "listener memory allocation\n"); + return -ENOMEM; + } - child_listen_node->qhash_set = true; - spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, - flags); - list_add(&child_listen_node->child_listen_list, - &cm_parent_listen_node->child_listen_list); - spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags); - } - if_addr_runlock(ip_dev); + memcpy(child_listen_node, cm_parent_listen_node, + sizeof(*child_listen_node)); + cm_info->vlan_id = rdma_vlan_dev_vlan_id(ip_dev); + child_listen_node->vlan_id = cm_info->vlan_id; + if (cm_info->ipv4) + child_listen_node->loc_addr[0] = + ntohl(((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr); + else + irdma_copy_ip_ntohl(child_listen_node->loc_addr, + ((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr.__u6_addr.__u6_addr32); + memcpy(cm_info->loc_addr, child_listen_node->loc_addr, + sizeof(cm_info->loc_addr)); + if (!iwdev->vsi.dscp_mode) + cm_info->user_pri = + irdma_get_egress_vlan_prio(child_listen_node->loc_addr, + cm_info->user_pri, + false); + ret = irdma_manage_qhash(iwdev, cm_info, + IRDMA_QHASH_TYPE_TCP_SYN, + IRDMA_QHASH_MANAGE_TYPE_ADD, + NULL, true); + if (ret) { + kfree(child_listen_node); + return ret; } -exit: - IFNET_RUNLOCK(); - return ret; + child_listen_node->qhash_set = true; + spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags); + list_add(&child_listen_node->child_listen_list, + &cm_parent_listen_node->child_listen_list); + spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags); + cm_parent_listen_node->cm_core->stats_listen_nodes_created++; + + return 0; } /** * irdma_add_mqh - Adds multiple qhashes * @iwdev: iWarp device * @cm_info: CM info for parent listen node * @cm_listen_node: The parent listen node */ static int irdma_add_mqh(struct irdma_device *iwdev, struct irdma_cm_info *cm_info, struct irdma_cm_listener *cm_listen_node) { + struct epoch_tracker et; + struct irdma_add_mqh_cbs cbs; + struct if_iter iter; + if_t ifp; int err; + + cbs.iwdev = iwdev; + cbs.cm_info = cm_info; + cbs.cm_listen_node = cm_listen_node; + VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK(); + NET_EPOCH_ENTER(et); VNET_FOREACH(vnet_iter) { - IFNET_RLOCK(); CURVNET_SET_QUIET(vnet_iter); + for (ifp = if_iter_start(&iter); ifp != NULL; ifp = if_iter_next(&iter)) { + if (!(if_getflags(ifp) & IFF_UP)) + continue; - if (cm_info->ipv4) - err = irdma_add_mqh_4(iwdev, cm_info, cm_listen_node); - else - err = irdma_add_mqh_6(iwdev, cm_info, cm_listen_node); + if (((rdma_vlan_dev_vlan_id(ifp) >= VLAN_N_VID) || + (rdma_vlan_dev_real_dev(ifp) != iwdev->netdev)) && + ifp != iwdev->netdev) + continue; + + if_addr_rlock(ifp); + if (cm_info->ipv4) + err = if_foreach_addr_type(ifp, AF_INET, irdma_add_mqh_ifa_cb, &cbs); + else + err = if_foreach_addr_type(ifp, AF_INET6, irdma_add_mqh_ifa_cb, &cbs); + if_addr_runlock(ifp); + } + if_iter_finish(&iter); CURVNET_RESTORE(); - IFNET_RUNLOCK(); } + NET_EPOCH_EXIT(et); VNET_LIST_RUNLOCK(); return err; } /** * irdma_reset_list_prep - add connection nodes slated for reset to list * @cm_core: cm's core * @listener: pointer to listener node * @reset_list: a list to which cm_node will be selected */ static void irdma_reset_list_prep(struct irdma_cm_core *cm_core, struct irdma_cm_listener *listener, struct list_head *reset_list) { struct irdma_cm_node *cm_node; int bkt; HASH_FOR_EACH_RCU(cm_core->cm_hash_tbl, bkt, cm_node, list) { if (cm_node->listener == listener && !cm_node->accelerated && atomic_inc_not_zero(&cm_node->refcnt)) list_add(&cm_node->reset_entry, reset_list); } } /** * irdma_dec_refcnt_listen - delete listener and associated cm nodes * @cm_core: cm's core * @listener: pointer to listener node * @free_hanging_nodes: to free associated cm_nodes * @apbvt_del: flag to delete the apbvt */ static int irdma_dec_refcnt_listen(struct irdma_cm_core *cm_core, struct irdma_cm_listener *listener, int free_hanging_nodes, bool apbvt_del) { struct list_head *list_pos; struct list_head *list_temp; struct irdma_cm_node *cm_node; struct list_head reset_list; struct irdma_cm_info nfo; enum irdma_cm_node_state old_state; unsigned long flags; int err; /* free non-accelerated child nodes for this listener */ INIT_LIST_HEAD(&reset_list); if (free_hanging_nodes) { rcu_read_lock(); irdma_reset_list_prep(cm_core, listener, &reset_list); rcu_read_unlock(); } list_for_each_safe(list_pos, list_temp, &reset_list) { cm_node = container_of(list_pos, struct irdma_cm_node, reset_entry); if (cm_node->state >= IRDMA_CM_STATE_FIN_WAIT1) { irdma_rem_ref_cm_node(cm_node); continue; } irdma_cleanup_retrans_entry(cm_node); err = irdma_send_reset(cm_node); if (err) { cm_node->state = IRDMA_CM_STATE_CLOSED; irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "send reset failed\n"); } else { old_state = cm_node->state; cm_node->state = IRDMA_CM_STATE_LISTENER_DESTROYED; if (old_state != IRDMA_CM_STATE_MPAREQ_RCVD) irdma_rem_ref_cm_node(cm_node); } } if (atomic_dec_and_test(&listener->refcnt)) { spin_lock_irqsave(&cm_core->listen_list_lock, flags); list_del(&listener->list); spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); if (apbvt_del) irdma_del_apbvt(listener->iwdev, listener->apbvt_entry); memcpy(nfo.loc_addr, listener->loc_addr, sizeof(nfo.loc_addr)); nfo.loc_port = listener->loc_port; nfo.ipv4 = listener->ipv4; nfo.vlan_id = listener->vlan_id; nfo.user_pri = listener->user_pri; nfo.qh_qpid = listener->iwdev->vsi.ilq->qp_id; if (!list_empty(&listener->child_listen_list)) { irdma_del_multiple_qhash(listener->iwdev, &nfo, listener); } else { if (listener->qhash_set) irdma_manage_qhash(listener->iwdev, &nfo, IRDMA_QHASH_TYPE_TCP_SYN, IRDMA_QHASH_MANAGE_TYPE_DELETE, NULL, false); } cm_core->stats_listen_destroyed++; cm_core->stats_listen_nodes_destroyed++; irdma_debug(&listener->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "loc_port=0x%04x loc_addr=%pI4 cm_listen_node=%p cm_id=%p qhash_set=%d vlan_id=%d apbvt_del=%d\n", listener->loc_port, listener->loc_addr, listener, listener->cm_id, listener->qhash_set, listener->vlan_id, apbvt_del); kfree(listener); listener = NULL; return 0; } return -EINVAL; } /** * irdma_cm_del_listen - delete a listener * @cm_core: cm's core * @listener: passive connection's listener * @apbvt_del: flag to delete apbvt */ static int irdma_cm_del_listen(struct irdma_cm_core *cm_core, struct irdma_cm_listener *listener, bool apbvt_del) { listener->listener_state = IRDMA_CM_LISTENER_PASSIVE_STATE; listener->cm_id = NULL; return irdma_dec_refcnt_listen(cm_core, listener, 1, apbvt_del); } /** * irdma_find_node - find a cm node that matches the reference cm node * @cm_core: cm's core * @rem_port: remote tcp port num * @rem_addr: remote ip addr * @loc_port: local tcp port num * @loc_addr: local ip addr * @vlan_id: local VLAN ID */ struct irdma_cm_node * irdma_find_node(struct irdma_cm_core *cm_core, u16 rem_port, u32 *rem_addr, u16 loc_port, u32 *loc_addr, u16 vlan_id) { struct irdma_cm_node *cm_node; u32 key = (rem_port << 16) | loc_port; rcu_read_lock(); HASH_FOR_EACH_POSSIBLE_RCU(cm_core->cm_hash_tbl, cm_node, list, key) { if (cm_node->vlan_id == vlan_id && cm_node->loc_port == loc_port && cm_node->rem_port == rem_port && !memcmp(cm_node->loc_addr, loc_addr, sizeof(cm_node->loc_addr)) && !memcmp(cm_node->rem_addr, rem_addr, sizeof(cm_node->rem_addr))) { if (!atomic_inc_not_zero(&cm_node->refcnt)) goto exit; rcu_read_unlock(); return cm_node; } } exit: rcu_read_unlock(); /* no owner node */ return NULL; } /** * irdma_add_hte_node - add a cm node to the hash table * @cm_core: cm's core * @cm_node: connection's node */ static void irdma_add_hte_node(struct irdma_cm_core *cm_core, struct irdma_cm_node *cm_node) { unsigned long flags; u32 key = (cm_node->rem_port << 16) | cm_node->loc_port; spin_lock_irqsave(&cm_core->ht_lock, flags); HASH_ADD_RCU(cm_core->cm_hash_tbl, &cm_node->list, key); spin_unlock_irqrestore(&cm_core->ht_lock, flags); } /** * irdma_ipv4_is_lpb - check if loopback * @loc_addr: local addr to compare * @rem_addr: remote address */ bool irdma_ipv4_is_lpb(u32 loc_addr, u32 rem_addr) { return ipv4_is_loopback(htonl(rem_addr)) || (loc_addr == rem_addr); } /** * irdma_ipv6_is_lpb - check if loopback * @loc_addr: local addr to compare * @rem_addr: remote address */ bool irdma_ipv6_is_lpb(u32 *loc_addr, u32 *rem_addr) { struct in6_addr raddr6; irdma_copy_ip_htonl(raddr6.__u6_addr.__u6_addr32, rem_addr); return !memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6); } /** * irdma_cm_create_ah - create a cm address handle * @cm_node: The connection manager node to create AH for * @wait: Provides option to wait for ah creation or not */ static int irdma_cm_create_ah(struct irdma_cm_node *cm_node, bool wait) { struct irdma_ah_info ah_info = {0}; struct irdma_device *iwdev = cm_node->iwdev; #ifdef VIMAGE struct rdma_cm_id *rdma_id = (struct rdma_cm_id *)cm_node->cm_id->context; struct vnet *vnet = rdma_id->route.addr.dev_addr.net; #endif - ether_addr_copy(ah_info.mac_addr, IF_LLADDR(iwdev->netdev)); + ether_addr_copy(ah_info.mac_addr, if_getlladdr(iwdev->netdev)); ah_info.hop_ttl = 0x40; ah_info.tc_tos = cm_node->tos; ah_info.vsi = &iwdev->vsi; if (cm_node->ipv4) { ah_info.ipv4_valid = true; ah_info.dest_ip_addr[0] = cm_node->rem_addr[0]; ah_info.src_ip_addr[0] = cm_node->loc_addr[0]; CURVNET_SET_QUIET(vnet); ah_info.do_lpbk = irdma_ipv4_is_lpb(ah_info.src_ip_addr[0], ah_info.dest_ip_addr[0]); CURVNET_RESTORE(); } else { memcpy(ah_info.dest_ip_addr, cm_node->rem_addr, sizeof(ah_info.dest_ip_addr)); memcpy(ah_info.src_ip_addr, cm_node->loc_addr, sizeof(ah_info.src_ip_addr)); ah_info.do_lpbk = irdma_ipv6_is_lpb(ah_info.src_ip_addr, ah_info.dest_ip_addr); } ah_info.vlan_tag = cm_node->vlan_id; if (cm_node->vlan_id < VLAN_N_VID) { ah_info.insert_vlan_tag = 1; ah_info.vlan_tag |= cm_node->user_pri << VLAN_PRIO_SHIFT; } ah_info.dst_arpindex = irdma_arp_table(iwdev->rf, ah_info.dest_ip_addr, NULL, IRDMA_ARP_RESOLVE); if (irdma_puda_create_ah(&iwdev->rf->sc_dev, &ah_info, wait, IRDMA_PUDA_RSRC_TYPE_ILQ, cm_node, &cm_node->ah)) return -ENOMEM; return 0; } /** * irdma_cm_free_ah - free a cm address handle * @cm_node: The connection manager node to create AH for */ static void irdma_cm_free_ah(struct irdma_cm_node *cm_node) { struct irdma_device *iwdev = cm_node->iwdev; irdma_puda_free_ah(&iwdev->rf->sc_dev, cm_node->ah); cm_node->ah = NULL; } /** * irdma_make_cm_node - create a new instance of a cm node * @cm_core: cm's core * @iwdev: iwarp device structure * @cm_info: quad info for connection * @listener: passive connection's listener */ static struct irdma_cm_node * irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, struct irdma_cm_info *cm_info, struct irdma_cm_listener *listener) { struct irdma_cm_node *cm_node; int arpindex; - struct ifnet *netdev = iwdev->netdev; + if_t netdev = iwdev->netdev; /* create an hte and cm_node for this instance */ cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC); if (!cm_node) return NULL; /* set our node specific transport info */ cm_node->ipv4 = cm_info->ipv4; cm_node->vlan_id = cm_info->vlan_id; if (cm_node->vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode) cm_node->vlan_id = 0; cm_node->tos = cm_info->tos; cm_node->user_pri = cm_info->user_pri; if (listener) { if (listener->tos != cm_info->tos) irdma_dev_warn(&iwdev->ibdev, "application TOS[%d] and remote client TOS[%d] mismatch\n", listener->tos, cm_info->tos); if (iwdev->vsi.dscp_mode) { cm_node->user_pri = listener->user_pri; } else { cm_node->tos = max(listener->tos, cm_info->tos); cm_node->user_pri = rt_tos2priority(cm_node->tos); cm_node->user_pri = irdma_get_egress_vlan_prio(cm_info->loc_addr, cm_node->user_pri, cm_info->ipv4); } irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DCB, "listener: TOS:[%d] UP:[%d]\n", cm_node->tos, cm_node->user_pri); } memcpy(cm_node->loc_addr, cm_info->loc_addr, sizeof(cm_node->loc_addr)); memcpy(cm_node->rem_addr, cm_info->rem_addr, sizeof(cm_node->rem_addr)); cm_node->loc_port = cm_info->loc_port; cm_node->rem_port = cm_info->rem_port; cm_node->mpa_frame_rev = IRDMA_CM_DEFAULT_MPA_VER; cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; cm_node->iwdev = iwdev; cm_node->dev = &iwdev->rf->sc_dev; cm_node->ird_size = cm_node->dev->hw_attrs.max_hw_ird; cm_node->ord_size = cm_node->dev->hw_attrs.max_hw_ord; cm_node->listener = listener; cm_node->cm_id = cm_info->cm_id; - ether_addr_copy(cm_node->loc_mac, IF_LLADDR(netdev)); + ether_addr_copy(cm_node->loc_mac, if_getlladdr(netdev)); spin_lock_init(&cm_node->retrans_list_lock); cm_node->ack_rcvd = false; init_completion(&cm_node->establish_comp); atomic_set(&cm_node->refcnt, 1); /* associate our parent CM core */ cm_node->cm_core = cm_core; cm_node->tcp_cntxt.loc_id = IRDMA_CM_DEFAULT_LOCAL_ID; cm_node->tcp_cntxt.rcv_wscale = iwdev->rcv_wscale; cm_node->tcp_cntxt.rcv_wnd = iwdev->rcv_wnd >> cm_node->tcp_cntxt.rcv_wscale; kc_set_loc_seq_num_mss(cm_node); arpindex = irdma_resolve_neigh_lpb_chk(iwdev, cm_node, cm_info); if (arpindex < 0) goto err; ether_addr_copy(cm_node->rem_mac, iwdev->rf->arp_table[arpindex].mac_addr); irdma_add_hte_node(cm_core, cm_node); cm_core->stats_nodes_created++; return cm_node; err: kfree(cm_node); return NULL; } static void irdma_destroy_connection(struct irdma_cm_node *cm_node) { struct irdma_cm_core *cm_core = cm_node->cm_core; struct irdma_qp *iwqp; struct irdma_cm_info nfo; /* if the node is destroyed before connection was accelerated */ if (!cm_node->accelerated && cm_node->accept_pend) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "node destroyed before established\n"); atomic_dec(&cm_node->listener->pend_accepts_cnt); } if (cm_node->close_entry) irdma_handle_close_entry(cm_node, 0); if (cm_node->listener) { irdma_dec_refcnt_listen(cm_core, cm_node->listener, 0, true); } else { if (cm_node->apbvt_set) { irdma_del_apbvt(cm_node->iwdev, cm_node->apbvt_entry); cm_node->apbvt_set = 0; } irdma_get_addr_info(cm_node, &nfo); if (cm_node->qhash_set) { nfo.qh_qpid = cm_node->iwdev->vsi.ilq->qp_id; irdma_manage_qhash(cm_node->iwdev, &nfo, IRDMA_QHASH_TYPE_TCP_ESTABLISHED, IRDMA_QHASH_MANAGE_TYPE_DELETE, NULL, false); cm_node->qhash_set = 0; } } iwqp = cm_node->iwqp; if (iwqp) { cm_node->cm_id->rem_ref(cm_node->cm_id); cm_node->cm_id = NULL; iwqp->cm_id = NULL; irdma_qp_rem_ref(&iwqp->ibqp); cm_node->iwqp = NULL; } else if (cm_node->qhash_set) { irdma_get_addr_info(cm_node, &nfo); nfo.qh_qpid = cm_node->iwdev->vsi.ilq->qp_id; irdma_manage_qhash(cm_node->iwdev, &nfo, IRDMA_QHASH_TYPE_TCP_ESTABLISHED, IRDMA_QHASH_MANAGE_TYPE_DELETE, NULL, false); cm_node->qhash_set = 0; } cm_core->cm_free_ah(cm_node); } /** * irdma_rem_ref_cm_node - destroy an instance of a cm node * @cm_node: connection's node */ void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node) { struct irdma_cm_core *cm_core = cm_node->cm_core; unsigned long flags; spin_lock_irqsave(&cm_core->ht_lock, flags); if (!atomic_dec_and_test(&cm_node->refcnt)) { spin_unlock_irqrestore(&cm_core->ht_lock, flags); return; } if (cm_node->iwqp) { cm_node->iwqp->cm_node = NULL; cm_node->iwqp->cm_id = NULL; } HASH_DEL_RCU(cm_core->cm_hash_tbl, &cm_node->list); cm_node->cm_core->stats_nodes_destroyed++; spin_unlock_irqrestore(&cm_core->ht_lock, flags); irdma_destroy_connection(cm_node); kfree_rcu(cm_node, rcu_head); } /** * irdma_handle_fin_pkt - FIN packet received * @cm_node: connection's node */ static void irdma_handle_fin_pkt(struct irdma_cm_node *cm_node) { switch (cm_node->state) { case IRDMA_CM_STATE_SYN_RCVD: case IRDMA_CM_STATE_SYN_SENT: case IRDMA_CM_STATE_ESTABLISHED: case IRDMA_CM_STATE_MPAREJ_RCVD: cm_node->tcp_cntxt.rcv_nxt++; irdma_cleanup_retrans_entry(cm_node); cm_node->state = IRDMA_CM_STATE_LAST_ACK; irdma_send_fin(cm_node); break; case IRDMA_CM_STATE_MPAREQ_SENT: irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED); cm_node->tcp_cntxt.rcv_nxt++; irdma_cleanup_retrans_entry(cm_node); cm_node->state = IRDMA_CM_STATE_CLOSED; atomic_inc(&cm_node->refcnt); irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_FIN_WAIT1: cm_node->tcp_cntxt.rcv_nxt++; irdma_cleanup_retrans_entry(cm_node); cm_node->state = IRDMA_CM_STATE_CLOSING; irdma_send_ack(cm_node); /* * Wait for ACK as this is simultaneous close. After we receive ACK, do not send anything. Just rm the * node. */ break; case IRDMA_CM_STATE_FIN_WAIT2: cm_node->tcp_cntxt.rcv_nxt++; irdma_cleanup_retrans_entry(cm_node); cm_node->state = IRDMA_CM_STATE_TIME_WAIT; irdma_send_ack(cm_node); irdma_schedule_cm_timer(cm_node, NULL, IRDMA_TIMER_TYPE_CLOSE, 1, 0); break; case IRDMA_CM_STATE_TIME_WAIT: cm_node->tcp_cntxt.rcv_nxt++; irdma_cleanup_retrans_entry(cm_node); cm_node->state = IRDMA_CM_STATE_CLOSED; irdma_rem_ref_cm_node(cm_node); break; case IRDMA_CM_STATE_OFFLOADED: default: irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "bad state node state = %d\n", cm_node->state); break; } } /** * irdma_handle_rst_pkt - process received RST packet * @cm_node: connection's node * @rbuf: receive buffer */ static void irdma_handle_rst_pkt(struct irdma_cm_node *cm_node, struct irdma_puda_buf *rbuf) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "caller: %pS cm_node=%p state=%d rem_port=0x%04x loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4\n", __builtin_return_address(0), cm_node, cm_node->state, cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr, cm_node->loc_addr); irdma_cleanup_retrans_entry(cm_node); switch (cm_node->state) { case IRDMA_CM_STATE_SYN_SENT: case IRDMA_CM_STATE_MPAREQ_SENT: switch (cm_node->mpa_frame_rev) { case IETF_MPA_V2: /* Drop down to MPA_V1 */ cm_node->mpa_frame_rev = IETF_MPA_V1; /* send a syn and goto syn sent state */ cm_node->state = IRDMA_CM_STATE_SYN_SENT; if (irdma_send_syn(cm_node, 0)) irdma_active_open_err(cm_node, false); break; case IETF_MPA_V1: default: irdma_active_open_err(cm_node, false); break; } break; case IRDMA_CM_STATE_MPAREQ_RCVD: atomic_inc(&cm_node->passive_state); break; case IRDMA_CM_STATE_ESTABLISHED: case IRDMA_CM_STATE_SYN_RCVD: case IRDMA_CM_STATE_LISTENING: irdma_passive_open_err(cm_node, false); break; case IRDMA_CM_STATE_OFFLOADED: irdma_active_open_err(cm_node, false); break; case IRDMA_CM_STATE_CLOSED: break; case IRDMA_CM_STATE_FIN_WAIT2: case IRDMA_CM_STATE_FIN_WAIT1: case IRDMA_CM_STATE_LAST_ACK: case IRDMA_CM_STATE_TIME_WAIT: cm_node->state = IRDMA_CM_STATE_CLOSED; irdma_rem_ref_cm_node(cm_node); break; default: break; } } /** * irdma_handle_rcv_mpa - Process a recv'd mpa buffer * @cm_node: connection's node * @rbuf: receive buffer */ static void irdma_handle_rcv_mpa(struct irdma_cm_node *cm_node, struct irdma_puda_buf *rbuf) { int err; int datasize = rbuf->datalen; u8 *dataloc = rbuf->data; enum irdma_cm_event_type type = IRDMA_CM_EVENT_UNKNOWN; u32 res_type; err = irdma_parse_mpa(cm_node, dataloc, &res_type, datasize); if (err) { if (cm_node->state == IRDMA_CM_STATE_MPAREQ_SENT) irdma_active_open_err(cm_node, true); else irdma_passive_open_err(cm_node, true); return; } switch (cm_node->state) { case IRDMA_CM_STATE_ESTABLISHED: if (res_type == IRDMA_MPA_REQUEST_REJECT) irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "state for reject\n"); cm_node->state = IRDMA_CM_STATE_MPAREQ_RCVD; type = IRDMA_CM_EVENT_MPA_REQ; irdma_send_ack(cm_node); /* ACK received MPA request */ atomic_set(&cm_node->passive_state, IRDMA_PASSIVE_STATE_INDICATED); break; case IRDMA_CM_STATE_MPAREQ_SENT: irdma_cleanup_retrans_entry(cm_node); if (res_type == IRDMA_MPA_REQUEST_REJECT) { type = IRDMA_CM_EVENT_MPA_REJECT; cm_node->state = IRDMA_CM_STATE_MPAREJ_RCVD; } else { type = IRDMA_CM_EVENT_CONNECTED; cm_node->state = IRDMA_CM_STATE_OFFLOADED; } irdma_send_ack(cm_node); break; default: irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "wrong cm_node state =%d\n", cm_node->state); break; } irdma_create_event(cm_node, type); } /** * irdma_check_syn - Check for error on received syn ack * @cm_node: connection's node * @tcph: pointer tcp header */ static int irdma_check_syn(struct irdma_cm_node *cm_node, struct tcphdr *tcph) { if (ntohl(tcph->th_ack) != cm_node->tcp_cntxt.loc_seq_num) { irdma_active_open_err(cm_node, true); return 1; } return 0; } /** * irdma_check_seq - check seq numbers if OK * @cm_node: connection's node * @tcph: pointer tcp header */ static int irdma_check_seq(struct irdma_cm_node *cm_node, struct tcphdr *tcph) { u32 seq; u32 ack_seq; u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num; u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt; u32 rcv_wnd; int err = 0; seq = ntohl(tcph->th_seq); ack_seq = ntohl(tcph->th_ack); rcv_wnd = cm_node->tcp_cntxt.rcv_wnd; if (ack_seq != loc_seq_num || !between(seq, rcv_nxt, (rcv_nxt + rcv_wnd))) err = -1; if (err) irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "seq number err\n"); return err; } void irdma_add_conn_est_qh(struct irdma_cm_node *cm_node) { struct irdma_cm_info nfo; irdma_get_addr_info(cm_node, &nfo); nfo.qh_qpid = cm_node->iwdev->vsi.ilq->qp_id; irdma_manage_qhash(cm_node->iwdev, &nfo, IRDMA_QHASH_TYPE_TCP_ESTABLISHED, IRDMA_QHASH_MANAGE_TYPE_ADD, cm_node, false); cm_node->qhash_set = true; } /** * irdma_handle_syn_pkt - is for Passive node * @cm_node: connection's node * @rbuf: receive buffer */ static void irdma_handle_syn_pkt(struct irdma_cm_node *cm_node, struct irdma_puda_buf *rbuf) { struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph; int err; u32 inc_sequence; int optionsize; optionsize = (tcph->th_off << 2) - sizeof(struct tcphdr); inc_sequence = ntohl(tcph->th_seq); switch (cm_node->state) { case IRDMA_CM_STATE_SYN_SENT: case IRDMA_CM_STATE_MPAREQ_SENT: /* Rcvd syn on active open connection */ irdma_active_open_err(cm_node, 1); break; case IRDMA_CM_STATE_LISTENING: /* Passive OPEN */ if (atomic_read(&cm_node->listener->pend_accepts_cnt) > cm_node->listener->backlog) { cm_node->cm_core->stats_backlog_drops++; irdma_passive_open_err(cm_node, false); break; } err = irdma_handle_tcp_options(cm_node, tcph, optionsize, 1); if (err) { irdma_passive_open_err(cm_node, false); /* drop pkt */ break; } err = cm_node->cm_core->cm_create_ah(cm_node, false); if (err) { irdma_passive_open_err(cm_node, false); /* drop pkt */ break; } cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1; cm_node->accept_pend = 1; atomic_inc(&cm_node->listener->pend_accepts_cnt); cm_node->state = IRDMA_CM_STATE_SYN_RCVD; break; case IRDMA_CM_STATE_CLOSED: irdma_cleanup_retrans_entry(cm_node); atomic_inc(&cm_node->refcnt); irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_OFFLOADED: case IRDMA_CM_STATE_ESTABLISHED: case IRDMA_CM_STATE_FIN_WAIT1: case IRDMA_CM_STATE_FIN_WAIT2: case IRDMA_CM_STATE_MPAREQ_RCVD: case IRDMA_CM_STATE_LAST_ACK: case IRDMA_CM_STATE_CLOSING: case IRDMA_CM_STATE_UNKNOWN: default: break; } } /** * irdma_handle_synack_pkt - Process SYN+ACK packet (active side) * @cm_node: connection's node * @rbuf: receive buffer */ static void irdma_handle_synack_pkt(struct irdma_cm_node *cm_node, struct irdma_puda_buf *rbuf) { struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph; int err; u32 inc_sequence; int optionsize; optionsize = (tcph->th_off << 2) - sizeof(struct tcphdr); inc_sequence = ntohl(tcph->th_seq); switch (cm_node->state) { case IRDMA_CM_STATE_SYN_SENT: irdma_cleanup_retrans_entry(cm_node); /* active open */ if (irdma_check_syn(cm_node, tcph)) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "check syn fail\n"); return; } cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->th_ack); /* setup options */ err = irdma_handle_tcp_options(cm_node, tcph, optionsize, 0); if (err) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "cm_node=%p tcp_options failed\n", cm_node); break; } irdma_cleanup_retrans_entry(cm_node); cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1; irdma_send_ack(cm_node); /* ACK for the syn_ack */ err = irdma_send_mpa_request(cm_node); if (err) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "cm_node=%p irdma_send_mpa_request failed\n", cm_node); break; } cm_node->state = IRDMA_CM_STATE_MPAREQ_SENT; break; case IRDMA_CM_STATE_MPAREQ_RCVD: irdma_passive_open_err(cm_node, true); break; case IRDMA_CM_STATE_LISTENING: cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->th_ack); irdma_cleanup_retrans_entry(cm_node); cm_node->state = IRDMA_CM_STATE_CLOSED; irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_CLOSED: cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->th_ack); irdma_cleanup_retrans_entry(cm_node); atomic_inc(&cm_node->refcnt); irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_ESTABLISHED: case IRDMA_CM_STATE_FIN_WAIT1: case IRDMA_CM_STATE_FIN_WAIT2: case IRDMA_CM_STATE_LAST_ACK: case IRDMA_CM_STATE_OFFLOADED: case IRDMA_CM_STATE_CLOSING: case IRDMA_CM_STATE_UNKNOWN: case IRDMA_CM_STATE_MPAREQ_SENT: default: break; } } /** * irdma_handle_ack_pkt - process packet with ACK * @cm_node: connection's node * @rbuf: receive buffer */ static int irdma_handle_ack_pkt(struct irdma_cm_node *cm_node, struct irdma_puda_buf *rbuf) { struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph; u32 inc_sequence; int ret; int optionsize; u32 datasize = rbuf->datalen; optionsize = (tcph->th_off << 2) - sizeof(struct tcphdr); if (irdma_check_seq(cm_node, tcph)) return -EINVAL; inc_sequence = ntohl(tcph->th_seq); switch (cm_node->state) { case IRDMA_CM_STATE_SYN_RCVD: irdma_cleanup_retrans_entry(cm_node); ret = irdma_handle_tcp_options(cm_node, tcph, optionsize, 1); if (ret) return ret; cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->th_ack); cm_node->state = IRDMA_CM_STATE_ESTABLISHED; if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; irdma_handle_rcv_mpa(cm_node, rbuf); } break; case IRDMA_CM_STATE_ESTABLISHED: irdma_cleanup_retrans_entry(cm_node); if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; irdma_handle_rcv_mpa(cm_node, rbuf); } break; case IRDMA_CM_STATE_MPAREQ_SENT: cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->th_ack); if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; cm_node->ack_rcvd = false; irdma_handle_rcv_mpa(cm_node, rbuf); } else { cm_node->ack_rcvd = true; } break; case IRDMA_CM_STATE_LISTENING: irdma_cleanup_retrans_entry(cm_node); cm_node->state = IRDMA_CM_STATE_CLOSED; irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_CLOSED: irdma_cleanup_retrans_entry(cm_node); atomic_inc(&cm_node->refcnt); irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_LAST_ACK: case IRDMA_CM_STATE_CLOSING: irdma_cleanup_retrans_entry(cm_node); cm_node->state = IRDMA_CM_STATE_CLOSED; irdma_rem_ref_cm_node(cm_node); break; case IRDMA_CM_STATE_FIN_WAIT1: irdma_cleanup_retrans_entry(cm_node); cm_node->state = IRDMA_CM_STATE_FIN_WAIT2; break; case IRDMA_CM_STATE_SYN_SENT: case IRDMA_CM_STATE_FIN_WAIT2: case IRDMA_CM_STATE_OFFLOADED: case IRDMA_CM_STATE_MPAREQ_RCVD: case IRDMA_CM_STATE_UNKNOWN: default: irdma_cleanup_retrans_entry(cm_node); break; } return 0; } /** * irdma_process_pkt - process cm packet * @cm_node: connection's node * @rbuf: receive buffer */ static void irdma_process_pkt(struct irdma_cm_node *cm_node, struct irdma_puda_buf *rbuf) { enum irdma_tcpip_pkt_type pkt_type = IRDMA_PKT_TYPE_UNKNOWN; struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph; u32 fin_set = 0; int err; if (tcph->th_flags & TH_RST) { pkt_type = IRDMA_PKT_TYPE_RST; } else if (tcph->th_flags & TH_SYN) { pkt_type = IRDMA_PKT_TYPE_SYN; if (tcph->th_flags & TH_ACK) pkt_type = IRDMA_PKT_TYPE_SYNACK; } else if (tcph->th_flags & TH_ACK) { pkt_type = IRDMA_PKT_TYPE_ACK; } if (tcph->th_flags & TH_FIN) fin_set = 1; switch (pkt_type) { case IRDMA_PKT_TYPE_SYN: irdma_handle_syn_pkt(cm_node, rbuf); break; case IRDMA_PKT_TYPE_SYNACK: irdma_handle_synack_pkt(cm_node, rbuf); break; case IRDMA_PKT_TYPE_ACK: err = irdma_handle_ack_pkt(cm_node, rbuf); if (fin_set && !err) irdma_handle_fin_pkt(cm_node); break; case IRDMA_PKT_TYPE_RST: irdma_handle_rst_pkt(cm_node, rbuf); break; default: if (fin_set && (!irdma_check_seq(cm_node, (struct tcphdr *)rbuf->tcph))) irdma_handle_fin_pkt(cm_node); break; } } /** * irdma_make_listen_node - create a listen node with params * @cm_core: cm's core * @iwdev: iwarp device structure * @cm_info: quad info for connection */ static struct irdma_cm_listener * irdma_make_listen_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, struct irdma_cm_info *cm_info) { struct irdma_cm_listener *listener; unsigned long flags; /* cannot have multiple matching listeners */ listener = irdma_find_listener(cm_core, cm_info->loc_addr, cm_info->ipv4, cm_info->loc_port, cm_info->vlan_id, IRDMA_CM_LISTENER_EITHER_STATE); if (listener && listener->listener_state == IRDMA_CM_LISTENER_ACTIVE_STATE) { atomic_dec(&listener->refcnt); return NULL; } if (!listener) { /* * create a CM listen node 1/2 node to compare incoming traffic to */ listener = kzalloc(sizeof(*listener), GFP_KERNEL); if (!listener) return NULL; cm_core->stats_listen_nodes_created++; memcpy(listener->loc_addr, cm_info->loc_addr, sizeof(listener->loc_addr)); listener->loc_port = cm_info->loc_port; INIT_LIST_HEAD(&listener->child_listen_list); atomic_set(&listener->refcnt, 1); } else { listener->reused_node = 1; } listener->cm_id = cm_info->cm_id; listener->ipv4 = cm_info->ipv4; listener->vlan_id = cm_info->vlan_id; atomic_set(&listener->pend_accepts_cnt, 0); listener->cm_core = cm_core; listener->iwdev = iwdev; listener->backlog = cm_info->backlog; listener->listener_state = IRDMA_CM_LISTENER_ACTIVE_STATE; if (!listener->reused_node) { spin_lock_irqsave(&cm_core->listen_list_lock, flags); list_add(&listener->list, &cm_core->listen_list); spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); } return listener; } /** * irdma_create_cm_node - make a connection node with params * @cm_core: cm's core * @iwdev: iwarp device structure * @conn_param: connection parameters * @cm_info: quad info for connection * @caller_cm_node: pointer to cm_node structure to return */ static int irdma_create_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, struct iw_cm_conn_param *conn_param, struct irdma_cm_info *cm_info, struct irdma_cm_node **caller_cm_node) { struct irdma_cm_node *cm_node; u16 private_data_len = conn_param->private_data_len; const void *private_data = conn_param->private_data; /* create a CM connection node */ cm_node = irdma_make_cm_node(cm_core, iwdev, cm_info, NULL); if (!cm_node) return -ENOMEM; /* set our node side to client (active) side */ cm_node->tcp_cntxt.client = 1; cm_node->tcp_cntxt.rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE; irdma_record_ird_ord(cm_node, conn_param->ird, conn_param->ord); cm_node->pdata.size = private_data_len; cm_node->pdata.addr = cm_node->pdata_buf; memcpy(cm_node->pdata_buf, private_data, private_data_len); *caller_cm_node = cm_node; return 0; } /** * irdma_cm_reject - reject and teardown a connection * @cm_node: connection's node * @pdata: ptr to private data for reject * @plen: size of private data */ static int irdma_cm_reject(struct irdma_cm_node *cm_node, const void *pdata, u8 plen) { int ret; int passive_state; if (cm_node->tcp_cntxt.client) return 0; irdma_cleanup_retrans_entry(cm_node); passive_state = atomic_add_return(1, &cm_node->passive_state); if (passive_state == IRDMA_SEND_RESET_EVENT) { cm_node->state = IRDMA_CM_STATE_CLOSED; irdma_rem_ref_cm_node(cm_node); return 0; } if (cm_node->state == IRDMA_CM_STATE_LISTENER_DESTROYED) { irdma_rem_ref_cm_node(cm_node); return 0; } ret = irdma_send_mpa_reject(cm_node, pdata, plen); if (!ret) return 0; cm_node->state = IRDMA_CM_STATE_CLOSED; if (irdma_send_reset(cm_node)) irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "send reset failed\n"); return ret; } /** * irdma_cm_close - close of cm connection * @cm_node: connection's node */ static int irdma_cm_close(struct irdma_cm_node *cm_node) { switch (cm_node->state) { case IRDMA_CM_STATE_SYN_RCVD: case IRDMA_CM_STATE_SYN_SENT: case IRDMA_CM_STATE_ONE_SIDE_ESTABLISHED: case IRDMA_CM_STATE_ESTABLISHED: case IRDMA_CM_STATE_ACCEPTING: case IRDMA_CM_STATE_MPAREQ_SENT: case IRDMA_CM_STATE_MPAREQ_RCVD: irdma_cleanup_retrans_entry(cm_node); irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_CLOSE_WAIT: cm_node->state = IRDMA_CM_STATE_LAST_ACK; irdma_send_fin(cm_node); break; case IRDMA_CM_STATE_FIN_WAIT1: case IRDMA_CM_STATE_FIN_WAIT2: case IRDMA_CM_STATE_LAST_ACK: case IRDMA_CM_STATE_TIME_WAIT: case IRDMA_CM_STATE_CLOSING: return -EINVAL; case IRDMA_CM_STATE_LISTENING: irdma_cleanup_retrans_entry(cm_node); irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_MPAREJ_RCVD: case IRDMA_CM_STATE_UNKNOWN: case IRDMA_CM_STATE_INITED: case IRDMA_CM_STATE_CLOSED: case IRDMA_CM_STATE_LISTENER_DESTROYED: irdma_rem_ref_cm_node(cm_node); break; case IRDMA_CM_STATE_OFFLOADED: if (cm_node->send_entry) irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "CM send_entry in OFFLOADED state\n"); irdma_rem_ref_cm_node(cm_node); break; } return 0; } /** * irdma_receive_ilq - recv an ETHERNET packet, and process it * through CM * @vsi: VSI structure of dev * @rbuf: receive buffer */ void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf) { struct irdma_cm_node *cm_node; struct irdma_cm_listener *listener; struct ip *iph; struct ip6_hdr *ip6h; struct tcphdr *tcph; struct irdma_cm_info cm_info = {0}; struct irdma_device *iwdev = vsi->back_vsi; struct irdma_cm_core *cm_core = &iwdev->cm_core; struct ether_vlan_header *ethh; u16 vtag; /* if vlan, then maclen = 18 else 14 */ iph = (struct ip *)rbuf->iph; irdma_debug_buf(vsi->dev, IRDMA_DEBUG_ILQ, "RECEIVE ILQ BUFFER", rbuf->mem.va, rbuf->totallen); if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { if (rbuf->vlan_valid) { vtag = rbuf->vlan_id; cm_info.user_pri = (vtag & EVL_PRI_MASK) >> VLAN_PRIO_SHIFT; cm_info.vlan_id = vtag & EVL_VLID_MASK; } else { cm_info.vlan_id = 0xFFFF; } } else { ethh = rbuf->mem.va; if (ethh->evl_proto == htons(ETH_P_8021Q)) { vtag = ntohs(ethh->evl_tag); cm_info.user_pri = (vtag & EVL_PRI_MASK) >> VLAN_PRIO_SHIFT; cm_info.vlan_id = vtag & EVL_VLID_MASK; irdma_debug(&cm_core->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "vlan_id=%d\n", cm_info.vlan_id); } else { cm_info.vlan_id = 0xFFFF; } } tcph = (struct tcphdr *)rbuf->tcph; if (rbuf->ipv4) { cm_info.loc_addr[0] = ntohl(iph->ip_dst.s_addr); cm_info.rem_addr[0] = ntohl(iph->ip_src.s_addr); cm_info.ipv4 = true; cm_info.tos = iph->ip_tos; } else { ip6h = (struct ip6_hdr *)rbuf->iph; irdma_copy_ip_ntohl(cm_info.loc_addr, ip6h->ip6_dst.__u6_addr.__u6_addr32); irdma_copy_ip_ntohl(cm_info.rem_addr, ip6h->ip6_src.__u6_addr.__u6_addr32); cm_info.ipv4 = false; cm_info.tos = (ip6h->ip6_vfc << 4) | ip6h->ip6_flow; } cm_info.loc_port = ntohs(tcph->th_dport); cm_info.rem_port = ntohs(tcph->th_sport); cm_node = irdma_find_node(cm_core, cm_info.rem_port, cm_info.rem_addr, cm_info.loc_port, cm_info.loc_addr, cm_info.vlan_id); if (!cm_node) { /* * Only type of packet accepted are for the PASSIVE open (syn only) */ if (!(tcph->th_flags & TH_SYN) || tcph->th_flags & TH_ACK) return; listener = irdma_find_listener(cm_core, cm_info.loc_addr, cm_info.ipv4, cm_info.loc_port, cm_info.vlan_id, IRDMA_CM_LISTENER_ACTIVE_STATE); if (!listener) { cm_info.cm_id = NULL; irdma_debug(&cm_core->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "no listener found\n"); return; } cm_info.cm_id = listener->cm_id; cm_node = irdma_make_cm_node(cm_core, iwdev, &cm_info, listener); if (!cm_node) { irdma_debug(&cm_core->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "allocate node failed\n"); atomic_dec(&listener->refcnt); return; } if (!(tcph->th_flags & (TH_RST | TH_FIN))) { cm_node->state = IRDMA_CM_STATE_LISTENING; } else { irdma_rem_ref_cm_node(cm_node); return; } atomic_inc(&cm_node->refcnt); } else if (cm_node->state == IRDMA_CM_STATE_OFFLOADED) { irdma_rem_ref_cm_node(cm_node); return; } irdma_process_pkt(cm_node, rbuf); irdma_rem_ref_cm_node(cm_node); } static int irdma_add_qh(struct irdma_cm_node *cm_node, bool active) { if (!active) irdma_add_conn_est_qh(cm_node); return 0; } static void irdma_cm_free_ah_nop(struct irdma_cm_node *cm_node) { } /** * irdma_setup_cm_core - setup top level instance of a cm core * @iwdev: iwarp device structure * @rdma_ver: HW version */ int irdma_setup_cm_core(struct irdma_device *iwdev, u8 rdma_ver) { struct irdma_cm_core *cm_core = &iwdev->cm_core; cm_core->iwdev = iwdev; cm_core->dev = &iwdev->rf->sc_dev; /* Handles CM event work items send to Iwarp core */ cm_core->event_wq = alloc_ordered_workqueue("iwarp-event-wq", 0); if (!cm_core->event_wq) return -ENOMEM; INIT_LIST_HEAD(&cm_core->listen_list); timer_setup(&cm_core->tcp_timer, irdma_cm_timer_tick, 0); spin_lock_init(&cm_core->ht_lock); spin_lock_init(&cm_core->listen_list_lock); spin_lock_init(&cm_core->apbvt_lock); switch (rdma_ver) { case IRDMA_GEN_1: cm_core->form_cm_frame = irdma_form_uda_cm_frame; cm_core->cm_create_ah = irdma_add_qh; cm_core->cm_free_ah = irdma_cm_free_ah_nop; break; case IRDMA_GEN_2: default: cm_core->form_cm_frame = irdma_form_ah_cm_frame; cm_core->cm_create_ah = irdma_cm_create_ah; cm_core->cm_free_ah = irdma_cm_free_ah; } return 0; } /** * irdma_cleanup_cm_core - deallocate a top level instance of a * cm core * @cm_core: cm's core */ void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core) { if (!cm_core) return; del_timer_sync(&cm_core->tcp_timer); destroy_workqueue(cm_core->event_wq); cm_core->dev->ws_reset(&cm_core->iwdev->vsi); } /** * irdma_init_tcp_ctx - setup qp context * @cm_node: connection's node * @tcp_info: offload info for tcp * @iwqp: associate qp for the connection */ static void irdma_init_tcp_ctx(struct irdma_cm_node *cm_node, struct irdma_tcp_offload_info *tcp_info, struct irdma_qp *iwqp) { tcp_info->ipv4 = cm_node->ipv4; tcp_info->drop_ooo_seg = !iwqp->iwdev->iw_ooo; tcp_info->wscale = true; tcp_info->ignore_tcp_opt = true; tcp_info->ignore_tcp_uns_opt = true; tcp_info->no_nagle = false; tcp_info->ttl = IRDMA_DEFAULT_TTL; tcp_info->rtt_var = IRDMA_DEFAULT_RTT_VAR; tcp_info->ss_thresh = IRDMA_DEFAULT_SS_THRESH; tcp_info->rexmit_thresh = IRDMA_DEFAULT_REXMIT_THRESH; tcp_info->tcp_state = IRDMA_TCP_STATE_ESTABLISHED; tcp_info->snd_wscale = cm_node->tcp_cntxt.snd_wscale; tcp_info->rcv_wscale = cm_node->tcp_cntxt.rcv_wscale; tcp_info->snd_nxt = cm_node->tcp_cntxt.loc_seq_num; tcp_info->snd_wnd = cm_node->tcp_cntxt.snd_wnd; tcp_info->rcv_nxt = cm_node->tcp_cntxt.rcv_nxt; tcp_info->snd_max = cm_node->tcp_cntxt.loc_seq_num; tcp_info->snd_una = cm_node->tcp_cntxt.loc_seq_num; tcp_info->cwnd = 2 * cm_node->tcp_cntxt.mss; tcp_info->snd_wl1 = cm_node->tcp_cntxt.rcv_nxt; tcp_info->snd_wl2 = cm_node->tcp_cntxt.loc_seq_num; tcp_info->max_snd_window = cm_node->tcp_cntxt.max_snd_wnd; tcp_info->rcv_wnd = cm_node->tcp_cntxt.rcv_wnd << cm_node->tcp_cntxt.rcv_wscale; tcp_info->flow_label = 0; tcp_info->snd_mss = (u32)cm_node->tcp_cntxt.mss; tcp_info->tos = cm_node->tos; if (cm_node->vlan_id < VLAN_N_VID) { tcp_info->insert_vlan_tag = true; tcp_info->vlan_tag = cm_node->vlan_id; tcp_info->vlan_tag |= cm_node->user_pri << VLAN_PRIO_SHIFT; } tcp_info->src_port = cm_node->loc_port; tcp_info->dst_port = cm_node->rem_port; tcp_info->arp_idx = (u16)irdma_arp_table(iwqp->iwdev->rf, cm_node->rem_addr, NULL, IRDMA_ARP_RESOLVE); if (cm_node->ipv4) { tcp_info->dest_ip_addr[3] = cm_node->rem_addr[0]; tcp_info->local_ipaddr[3] = cm_node->loc_addr[0]; } else { memcpy(tcp_info->dest_ip_addr, cm_node->rem_addr, sizeof(tcp_info->dest_ip_addr)); memcpy(tcp_info->local_ipaddr, cm_node->loc_addr, sizeof(tcp_info->local_ipaddr)); } } /** * irdma_cm_init_tsa_conn - setup qp for RTS * @iwqp: associate qp for the connection * @cm_node: connection's node */ static void irdma_cm_init_tsa_conn(struct irdma_qp *iwqp, struct irdma_cm_node *cm_node) { struct irdma_iwarp_offload_info *iwarp_info; struct irdma_qp_host_ctx_info *ctx_info; iwarp_info = &iwqp->iwarp_info; ctx_info = &iwqp->ctx_info; ctx_info->tcp_info = &iwqp->tcp_info; ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id; ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id; iwarp_info->ord_size = cm_node->ord_size; iwarp_info->ird_size = cm_node->ird_size; iwarp_info->rd_en = true; iwarp_info->rdmap_ver = 1; iwarp_info->ddp_ver = 1; iwarp_info->pd_id = iwqp->iwpd->sc_pd.pd_id; ctx_info->tcp_info_valid = true; ctx_info->iwarp_info_valid = true; ctx_info->user_pri = cm_node->user_pri; irdma_init_tcp_ctx(cm_node, &iwqp->tcp_info, iwqp); if (cm_node->snd_mark_en) { iwarp_info->snd_mark_en = true; iwarp_info->snd_mark_offset = (iwqp->tcp_info.snd_nxt & SNDMARKER_SEQNMASK) + cm_node->lsmm_size; } cm_node->state = IRDMA_CM_STATE_OFFLOADED; iwqp->tcp_info.tcp_state = IRDMA_TCP_STATE_ESTABLISHED; iwqp->tcp_info.src_mac_addr_idx = iwqp->iwdev->mac_ip_table_idx; if (cm_node->rcv_mark_en) { iwarp_info->rcv_mark_en = true; iwarp_info->align_hdrs = true; } irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info); /* once tcp_info is set, no need to do it again */ ctx_info->tcp_info_valid = false; ctx_info->iwarp_info_valid = false; } /** * irdma_cm_disconn - when a connection is being closed * @iwqp: associated qp for the connection */ void irdma_cm_disconn(struct irdma_qp *iwqp) { struct irdma_device *iwdev = iwqp->iwdev; struct disconn_work *work; unsigned long flags; work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) return; spin_lock_irqsave(&iwdev->rf->qptable_lock, flags); if (!iwdev->rf->qp_table[iwqp->ibqp.qp_num]) { spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags); irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "qp_id %d is already freed\n", iwqp->ibqp.qp_num); kfree(work); return; } irdma_qp_add_ref(&iwqp->ibqp); spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags); work->iwqp = iwqp; INIT_WORK(&work->work, irdma_disconnect_worker); queue_work(iwdev->cleanup_wq, &work->work); } /** * irdma_qp_disconnect - free qp and close cm * @iwqp: associate qp for the connection */ static void irdma_qp_disconnect(struct irdma_qp *iwqp) { struct irdma_device *iwdev = iwqp->iwdev; iwqp->active_conn = 0; /* close the CM node down if it is still active */ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "Call close API\n"); irdma_cm_close(iwqp->cm_node); } /** * irdma_cm_disconn_true - called by worker thread to disconnect qp * @iwqp: associate qp for the connection */ static void irdma_cm_disconn_true(struct irdma_qp *iwqp) { struct iw_cm_id *cm_id; struct irdma_device *iwdev; struct irdma_sc_qp *qp = &iwqp->sc_qp; u16 last_ae; u8 original_hw_tcp_state; u8 original_ibqp_state; int disconn_status = 0; int issue_disconn = 0; int issue_close = 0; int issue_flush = 0; unsigned long flags; int err; iwdev = iwqp->iwdev; spin_lock_irqsave(&iwqp->lock, flags); if (rdma_protocol_roce(&iwdev->ibdev, 1)) { struct ib_qp_attr attr; if (iwqp->flush_issued || iwqp->sc_qp.qp_uk.destroy_pending) { spin_unlock_irqrestore(&iwqp->lock, flags); return; } spin_unlock_irqrestore(&iwqp->lock, flags); attr.qp_state = IB_QPS_ERR; irdma_modify_qp_roce(&iwqp->ibqp, &attr, IB_QP_STATE, NULL); irdma_ib_qp_event(iwqp, qp->event_type); return; } cm_id = iwqp->cm_id; original_hw_tcp_state = iwqp->hw_tcp_state; original_ibqp_state = iwqp->ibqp_state; last_ae = iwqp->last_aeq; if (qp->term_flags) { issue_disconn = 1; issue_close = 1; iwqp->cm_id = NULL; irdma_terminate_del_timer(qp); if (!iwqp->flush_issued) { iwqp->flush_issued = 1; issue_flush = 1; } } else if ((original_hw_tcp_state == IRDMA_TCP_STATE_CLOSE_WAIT) || ((original_ibqp_state == IB_QPS_RTS) && (last_ae == IRDMA_AE_LLP_CONNECTION_RESET))) { issue_disconn = 1; if (last_ae == IRDMA_AE_LLP_CONNECTION_RESET) disconn_status = -ECONNRESET; } if (original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED || original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT || last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE || last_ae == IRDMA_AE_BAD_CLOSE || last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset || !cm_id) { issue_close = 1; iwqp->cm_id = NULL; qp->term_flags = 0; if (!iwqp->flush_issued) { iwqp->flush_issued = 1; issue_flush = 1; } } spin_unlock_irqrestore(&iwqp->lock, flags); if (issue_flush && !iwqp->sc_qp.qp_uk.destroy_pending) { irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_FLUSH_RQ | IRDMA_FLUSH_WAIT); if (qp->term_flags) irdma_ib_qp_event(iwqp, qp->event_type); } if (!cm_id || !cm_id->event_handler) return; spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags); if (!iwqp->cm_node) { spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags); return; } atomic_inc(&iwqp->cm_node->refcnt); spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags); if (issue_disconn) { err = irdma_send_cm_event(iwqp->cm_node, cm_id, IW_CM_EVENT_DISCONNECT, disconn_status); if (err) irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "disconnect event failed: - cm_id = %p\n", cm_id); } if (issue_close) { cm_id->provider_data = iwqp; err = irdma_send_cm_event(iwqp->cm_node, cm_id, IW_CM_EVENT_CLOSE, 0); if (err) irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "close event failed: - cm_id = %p\n", cm_id); irdma_qp_disconnect(iwqp); } irdma_rem_ref_cm_node(iwqp->cm_node); } /** * irdma_disconnect_worker - worker for connection close * @work: points or disconn structure */ static void irdma_disconnect_worker(struct work_struct *work) { struct disconn_work *dwork = container_of(work, struct disconn_work, work); struct irdma_qp *iwqp = dwork->iwqp; kfree(dwork); irdma_cm_disconn_true(iwqp); irdma_qp_rem_ref(&iwqp->ibqp); } /** * irdma_free_lsmm_rsrc - free lsmm memory and deregister * @iwqp: associate qp for the connection */ void irdma_free_lsmm_rsrc(struct irdma_qp *iwqp) { struct irdma_device *iwdev; iwdev = iwqp->iwdev; if (iwqp->ietf_mem.va) { if (iwqp->lsmm_mr) kc_free_lsmm_dereg_mr(iwdev, iwqp); irdma_free_dma_mem(iwdev->rf->sc_dev.hw, &iwqp->ietf_mem); iwqp->ietf_mem.va = NULL; } } /** * irdma_accept - registered call for connection to be accepted * @cm_id: cm information for passive connection * @conn_param: accpet parameters */ int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { struct ib_qp *ibqp; struct irdma_qp *iwqp; struct irdma_device *iwdev; struct irdma_sc_dev *dev; struct irdma_cm_node *cm_node; struct ib_qp_attr attr = {0}; int passive_state; struct ib_mr *ibmr; struct irdma_pd *iwpd; u16 buf_len = 0; struct irdma_kmem_info accept; u64 tagged_offset; int wait_ret; int ret = 0; ibqp = irdma_get_qp(cm_id->device, conn_param->qpn); if (!ibqp) return -EINVAL; iwqp = to_iwqp(ibqp); iwdev = iwqp->iwdev; dev = &iwdev->rf->sc_dev; cm_node = cm_id->provider_data; if (((struct sockaddr_in *)&cm_id->local_addr)->sin_family == AF_INET) { cm_node->ipv4 = true; cm_node->vlan_id = irdma_get_vlan_ipv4(cm_node->loc_addr); } else { cm_node->ipv4 = false; irdma_netdev_vlan_ipv6(cm_node->loc_addr, &cm_node->vlan_id, NULL); } irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "Accept vlan_id=%d\n", cm_node->vlan_id); if (cm_node->state == IRDMA_CM_STATE_LISTENER_DESTROYED) { ret = -EINVAL; goto error; } passive_state = atomic_add_return(1, &cm_node->passive_state); if (passive_state == IRDMA_SEND_RESET_EVENT) { ret = -ECONNRESET; goto error; } buf_len = conn_param->private_data_len + IRDMA_MAX_IETF_SIZE; iwqp->ietf_mem.size = buf_len; iwqp->ietf_mem.va = irdma_allocate_dma_mem(dev->hw, &iwqp->ietf_mem, iwqp->ietf_mem.size, 1); if (!iwqp->ietf_mem.va) { ret = -ENOMEM; goto error; } cm_node->pdata.size = conn_param->private_data_len; accept.addr = iwqp->ietf_mem.va; accept.size = irdma_cm_build_mpa_frame(cm_node, &accept, MPA_KEY_REPLY); memcpy((u8 *)accept.addr + accept.size, conn_param->private_data, conn_param->private_data_len); if (cm_node->dev->ws_add(iwqp->sc_qp.vsi, cm_node->user_pri)) { ret = -ENOMEM; goto error; } iwqp->sc_qp.user_pri = cm_node->user_pri; irdma_qp_add_qos(&iwqp->sc_qp); if (cm_node->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) iwdev->rf->check_fc(&iwdev->vsi, &iwqp->sc_qp); /* setup our first outgoing iWarp send WQE (the IETF frame response) */ iwpd = iwqp->iwpd; tagged_offset = (uintptr_t)iwqp->ietf_mem.va; ibmr = irdma_reg_phys_mr(&iwpd->ibpd, iwqp->ietf_mem.pa, buf_len, IB_ACCESS_LOCAL_WRITE, &tagged_offset); if (IS_ERR(ibmr)) { ret = -ENOMEM; goto error; } ibmr->pd = &iwpd->ibpd; ibmr->device = iwpd->ibpd.device; iwqp->lsmm_mr = ibmr; if (iwqp->page) iwqp->sc_qp.qp_uk.sq_base = kmap_local_page(iwqp->page); cm_node->lsmm_size = accept.size + conn_param->private_data_len; irdma_sc_send_lsmm(&iwqp->sc_qp, iwqp->ietf_mem.va, cm_node->lsmm_size, ibmr->lkey); if (iwqp->page) kunmap_local(iwqp->sc_qp.qp_uk.sq_base); iwqp->cm_id = cm_id; cm_node->cm_id = cm_id; cm_id->provider_data = iwqp; iwqp->active_conn = 0; iwqp->cm_node = cm_node; cm_node->iwqp = iwqp; irdma_cm_init_tsa_conn(iwqp, cm_node); irdma_qp_add_ref(&iwqp->ibqp); cm_id->add_ref(cm_id); attr.qp_state = IB_QPS_RTS; cm_node->qhash_set = false; cm_node->cm_core->cm_free_ah(cm_node); irdma_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL); if (dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_RTS_AE) { wait_ret = wait_event_interruptible_timeout(iwqp->waitq, iwqp->rts_ae_rcvd, IRDMA_MAX_TIMEOUT); if (!wait_ret) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "Slow Connection: cm_node=%p, loc_port=%d, rem_port=%d, cm_id=%p\n", cm_node, cm_node->loc_port, cm_node->rem_port, cm_node->cm_id); ret = -ECONNRESET; goto error; } } irdma_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0); cm_node->accelerated = true; complete(&cm_node->establish_comp); if (cm_node->accept_pend) { atomic_dec(&cm_node->listener->pend_accepts_cnt); cm_node->accept_pend = 0; } irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "rem_port=0x%04x, loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4 cm_node=%p cm_id=%p qp_id = %d\n\n", cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr, cm_node->loc_addr, cm_node, cm_id, ibqp->qp_num); cm_node->cm_core->stats_accepts++; return 0; error: irdma_free_lsmm_rsrc(iwqp); irdma_rem_ref_cm_node(cm_node); return ret; } /** * irdma_reject - registered call for connection to be rejected * @cm_id: cm information for passive connection * @pdata: private data to be sent * @pdata_len: private data length */ int irdma_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { struct irdma_device *iwdev; struct irdma_cm_node *cm_node; cm_node = cm_id->provider_data; cm_node->pdata.size = pdata_len; iwdev = to_iwdev(cm_id->device); if (!iwdev) return -EINVAL; cm_node->cm_core->stats_rejects++; if (pdata_len + sizeof(struct ietf_mpa_v2) > IRDMA_MAX_CM_BUF) return -EINVAL; return irdma_cm_reject(cm_node, pdata, pdata_len); } /** * irdma_connect - registered call for connection to be established * @cm_id: cm information for passive connection * @conn_param: Information about the connection */ int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { struct ib_qp *ibqp; struct irdma_qp *iwqp; struct irdma_device *iwdev; struct irdma_cm_node *cm_node; struct irdma_cm_info cm_info; struct sockaddr_in *laddr; struct sockaddr_in *raddr; struct sockaddr_in6 *laddr6; struct sockaddr_in6 *raddr6; int ret = 0; ibqp = irdma_get_qp(cm_id->device, conn_param->qpn); if (!ibqp) return -EINVAL; iwqp = to_iwqp(ibqp); if (!iwqp) return -EINVAL; iwdev = iwqp->iwdev; if (!iwdev) return -EINVAL; laddr = (struct sockaddr_in *)&cm_id->m_local_addr; raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr; raddr6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr; if (!(laddr->sin_port) || !(raddr->sin_port)) return -EINVAL; iwqp->active_conn = 1; iwqp->cm_id = NULL; cm_id->provider_data = iwqp; /* set up the connection params for the node */ if (cm_id->remote_addr.ss_family == AF_INET) { if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV4) return -EINVAL; cm_info.ipv4 = true; memset(cm_info.loc_addr, 0, sizeof(cm_info.loc_addr)); memset(cm_info.rem_addr, 0, sizeof(cm_info.rem_addr)); cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr); cm_info.rem_addr[0] = ntohl(raddr->sin_addr.s_addr); cm_info.loc_port = ntohs(laddr->sin_port); cm_info.rem_port = ntohs(raddr->sin_port); cm_info.vlan_id = irdma_get_vlan_ipv4(cm_info.loc_addr); } else { if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV6) return -EINVAL; cm_info.ipv4 = false; irdma_copy_ip_ntohl(cm_info.loc_addr, laddr6->sin6_addr.__u6_addr.__u6_addr32); irdma_copy_ip_ntohl(cm_info.rem_addr, raddr6->sin6_addr.__u6_addr.__u6_addr32); cm_info.loc_port = ntohs(laddr6->sin6_port); cm_info.rem_port = ntohs(raddr6->sin6_port); irdma_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, NULL); } cm_info.cm_id = cm_id; cm_info.qh_qpid = iwdev->vsi.ilq->qp_id; cm_info.tos = cm_id->tos; if (iwdev->vsi.dscp_mode) { cm_info.user_pri = iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(cm_info.tos)]; } else { cm_info.user_pri = rt_tos2priority(cm_id->tos); cm_info.user_pri = irdma_get_egress_vlan_prio(cm_info.loc_addr, cm_info.user_pri, cm_info.ipv4); } if (iwqp->sc_qp.dev->ws_add(iwqp->sc_qp.vsi, cm_info.user_pri)) return -ENOMEM; iwqp->sc_qp.user_pri = cm_info.user_pri; irdma_qp_add_qos(&iwqp->sc_qp); if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) iwdev->rf->check_fc(&iwdev->vsi, &iwqp->sc_qp); irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DCB, "TOS:[%d] UP:[%d]\n", cm_id->tos, cm_info.user_pri); ret = irdma_create_cm_node(&iwdev->cm_core, iwdev, conn_param, &cm_info, &cm_node); if (ret) return ret; ret = cm_node->cm_core->cm_create_ah(cm_node, true); if (ret) goto err; if (irdma_manage_qhash(iwdev, &cm_info, IRDMA_QHASH_TYPE_TCP_ESTABLISHED, IRDMA_QHASH_MANAGE_TYPE_ADD, NULL, true)) { ret = -EINVAL; goto err; } cm_node->qhash_set = true; cm_node->apbvt_entry = irdma_add_apbvt(iwdev, cm_info.loc_port); if (!cm_node->apbvt_entry) { ret = -EINVAL; goto err; } cm_node->apbvt_set = true; iwqp->cm_node = cm_node; cm_node->iwqp = iwqp; iwqp->cm_id = cm_id; irdma_qp_add_ref(&iwqp->ibqp); cm_id->add_ref(cm_id); if (cm_node->state != IRDMA_CM_STATE_OFFLOADED) { cm_node->state = IRDMA_CM_STATE_SYN_SENT; ret = irdma_send_syn(cm_node, 0); if (ret) goto err; } irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "rem_port=0x%04x, loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4 cm_node=%p cm_id=%p qp_id = %d\n\n", cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr, cm_node->loc_addr, cm_node, cm_id, ibqp->qp_num); return 0; err: if (cm_info.ipv4) irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "connect() FAILED: dest addr=%pI4", cm_info.rem_addr); else irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "connect() FAILED: dest addr=%pI6", cm_info.rem_addr); irdma_rem_ref_cm_node(cm_node); iwdev->cm_core.stats_connect_errs++; return ret; } /** * irdma_create_listen - registered call creating listener * @cm_id: cm information for passive connection * @backlog: to max accept pending count */ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog) { struct irdma_device *iwdev; struct irdma_cm_listener *cm_listen_node; struct irdma_cm_info cm_info = {0}; struct sockaddr_in *laddr; struct sockaddr_in6 *laddr6; bool wildcard = false; int err; iwdev = to_iwdev(cm_id->device); if (!iwdev) return -EINVAL; laddr = (struct sockaddr_in *)&cm_id->m_local_addr; laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr; cm_info.qh_qpid = iwdev->vsi.ilq->qp_id; if (laddr->sin_family == AF_INET) { if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV4) return -EINVAL; cm_info.ipv4 = true; cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr); cm_info.loc_port = ntohs(laddr->sin_port); if (laddr->sin_addr.s_addr != htonl(INADDR_ANY)) { cm_info.vlan_id = irdma_get_vlan_ipv4(cm_info.loc_addr); } else { cm_info.vlan_id = 0xFFFF; wildcard = true; } } else { if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV6) return -EINVAL; cm_info.ipv4 = false; irdma_copy_ip_ntohl(cm_info.loc_addr, laddr6->sin6_addr.__u6_addr.__u6_addr32); cm_info.loc_port = ntohs(laddr6->sin6_port); if (!IN6_IS_ADDR_UNSPECIFIED(&laddr6->sin6_addr)) { irdma_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, NULL); } else { cm_info.vlan_id = 0xFFFF; wildcard = true; } } if (cm_info.vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode) cm_info.vlan_id = 0; cm_info.backlog = backlog; cm_info.cm_id = cm_id; cm_listen_node = irdma_make_listen_node(&iwdev->cm_core, iwdev, &cm_info); if (!cm_listen_node) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "cm_listen_node == NULL\n"); return -ENOMEM; } cm_id->provider_data = cm_listen_node; cm_listen_node->tos = cm_id->tos; if (iwdev->vsi.dscp_mode) cm_listen_node->user_pri = iwdev->vsi.dscp_map[irdma_tos2dscp(cm_id->tos)]; else cm_listen_node->user_pri = rt_tos2priority(cm_id->tos); cm_info.user_pri = cm_listen_node->user_pri; if (!cm_listen_node->reused_node) { if (wildcard) { err = irdma_add_mqh(iwdev, &cm_info, cm_listen_node); if (err) goto error; } else { if (!iwdev->vsi.dscp_mode) cm_info.user_pri = cm_listen_node->user_pri = irdma_get_egress_vlan_prio(cm_info.loc_addr, cm_info.user_pri, cm_info.ipv4); err = irdma_manage_qhash(iwdev, &cm_info, IRDMA_QHASH_TYPE_TCP_SYN, IRDMA_QHASH_MANAGE_TYPE_ADD, NULL, true); if (err) goto error; cm_listen_node->qhash_set = true; } cm_listen_node->apbvt_entry = irdma_add_apbvt(iwdev, cm_info.loc_port); if (!cm_listen_node->apbvt_entry) goto error; } cm_id->add_ref(cm_id); cm_listen_node->cm_core->stats_listen_created++; irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "loc_port=0x%04x loc_addr=%pI4 cm_listen_node=%p cm_id=%p qhash_set=%d vlan_id=%d\n", cm_listen_node->loc_port, cm_listen_node->loc_addr, cm_listen_node, cm_listen_node->cm_id, cm_listen_node->qhash_set, cm_listen_node->vlan_id); return 0; error: irdma_cm_del_listen(&iwdev->cm_core, cm_listen_node, false); return -EINVAL; } /** * irdma_destroy_listen - registered call to destroy listener * @cm_id: cm information for passive connection */ int irdma_destroy_listen(struct iw_cm_id *cm_id) { struct irdma_device *iwdev; iwdev = to_iwdev(cm_id->device); if (cm_id->provider_data) irdma_cm_del_listen(&iwdev->cm_core, cm_id->provider_data, true); else irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "cm_id->provider_data was NULL\n"); cm_id->rem_ref(cm_id); return 0; } /** * irdma_iw_teardown_list_prep - add conn nodes slated for tear * down to list * @cm_core: cm's core * @teardown_list: a list to which cm_node will be selected * @ipaddr: pointer to ip address * @nfo: pointer to cm_info structure instance * @disconnect_all: flag indicating disconnect all QPs */ static void irdma_iw_teardown_list_prep(struct irdma_cm_core *cm_core, struct list_head *teardown_list, u32 *ipaddr, struct irdma_cm_info *nfo, bool disconnect_all) { struct irdma_cm_node *cm_node; int bkt; HASH_FOR_EACH_RCU(cm_core->cm_hash_tbl, bkt, cm_node, list) { if ((disconnect_all || (nfo->vlan_id == cm_node->vlan_id && !memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16))) && atomic_inc_not_zero(&cm_node->refcnt)) list_add(&cm_node->teardown_entry, teardown_list); } } static inline bool irdma_ip_vlan_match(u32 *ip1, u16 vlan_id1, bool check_vlan, u32 *ip2, u16 vlan_id2, bool ipv4) { return (!check_vlan || vlan_id1 == vlan_id2) && !memcmp(ip1, ip2, ipv4 ? 4 : 16); } /** * irdma_roce_teardown_list_prep - add conn nodes slated for * tear down to list * @iwdev: RDMA device * @teardown_list: a list to which cm_node will be selected * @ipaddr: pointer to ip address * @nfo: pointer to cm_info structure instance * @disconnect_all: flag indicating disconnect all QPs */ static void irdma_roce_teardown_list_prep(struct irdma_device *iwdev, struct list_head *teardown_list, u32 *ipaddr, struct irdma_cm_info *nfo, bool disconnect_all) { struct irdma_sc_vsi *vsi = &iwdev->vsi; struct irdma_sc_qp *sc_qp; struct list_head *list_node; struct irdma_qp *qp; unsigned long flags; int i; for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { mutex_lock(&vsi->qos[i].qos_mutex); list_for_each(list_node, &vsi->qos[i].qplist) { u32 qp_ip[4]; sc_qp = container_of(list_node, struct irdma_sc_qp, list); if (sc_qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_RC) continue; qp = sc_qp->qp_uk.back_qp; if (!disconnect_all) { if (nfo->ipv4) qp_ip[0] = qp->udp_info.local_ipaddr[3]; else memcpy(qp_ip, &qp->udp_info.local_ipaddr[0], sizeof(qp_ip)); } if (disconnect_all || irdma_ip_vlan_match(qp_ip, qp->udp_info.vlan_tag & EVL_VLID_MASK, qp->udp_info.insert_vlan_tag, ipaddr, nfo->vlan_id, nfo->ipv4)) { spin_lock_irqsave(&iwdev->rf->qptable_lock, flags); if (iwdev->rf->qp_table[sc_qp->qp_uk.qp_id]) { irdma_qp_add_ref(&qp->ibqp); list_add(&qp->teardown_entry, teardown_list); } spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags); } } mutex_unlock(&vsi->qos[i].qos_mutex); } } /** * irdma_cm_event_connected - handle connected active node * @event: the info for cm_node of connection */ static void irdma_cm_event_connected(struct irdma_cm_event *event) { struct irdma_qp *iwqp; struct irdma_device *iwdev; struct irdma_cm_node *cm_node; struct irdma_sc_dev *dev; struct ib_qp_attr attr = {0}; struct iw_cm_id *cm_id; int status; bool read0; int wait_ret = 0; cm_node = event->cm_node; cm_id = cm_node->cm_id; iwqp = cm_id->provider_data; iwdev = iwqp->iwdev; dev = &iwdev->rf->sc_dev; if (iwqp->sc_qp.qp_uk.destroy_pending) { status = -ETIMEDOUT; goto error; } irdma_cm_init_tsa_conn(iwqp, cm_node); read0 = (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO); if (iwqp->page) iwqp->sc_qp.qp_uk.sq_base = kmap_local_page(iwqp->page); irdma_sc_send_rtt(&iwqp->sc_qp, read0); if (iwqp->page) kunmap_local(iwqp->sc_qp.qp_uk.sq_base); attr.qp_state = IB_QPS_RTS; cm_node->qhash_set = false; irdma_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL); if (dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_RTS_AE) { wait_ret = wait_event_interruptible_timeout(iwqp->waitq, iwqp->rts_ae_rcvd, IRDMA_MAX_TIMEOUT); if (!wait_ret) irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "Slow Connection: cm_node=%p, loc_port=%d, rem_port=%d, cm_id=%p\n", cm_node, cm_node->loc_port, cm_node->rem_port, cm_node->cm_id); } irdma_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, 0); cm_node->accelerated = true; complete(&cm_node->establish_comp); cm_node->cm_core->cm_free_ah(cm_node); return; error: iwqp->cm_id = NULL; cm_id->provider_data = NULL; irdma_send_cm_event(event->cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, status); irdma_rem_ref_cm_node(event->cm_node); } /** * irdma_cm_event_reset - handle reset * @event: the info for cm_node of connection */ static void irdma_cm_event_reset(struct irdma_cm_event *event) { struct irdma_cm_node *cm_node = event->cm_node; struct iw_cm_id *cm_id = cm_node->cm_id; struct irdma_qp *iwqp; if (!cm_id) return; iwqp = cm_id->provider_data; if (!iwqp) return; irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "reset event %p - cm_id = %p\n", event->cm_node, cm_id); iwqp->cm_id = NULL; irdma_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_DISCONNECT, -ECONNRESET); irdma_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_CLOSE, 0); } /** * irdma_cm_event_handler - send event to cm upper layer * @work: pointer of cm event info. */ static void irdma_cm_event_handler(struct work_struct *work) { struct irdma_cm_event *event = container_of(work, struct irdma_cm_event, event_work); struct irdma_cm_node *cm_node; if (!event || !event->cm_node || !event->cm_node->cm_core) return; cm_node = event->cm_node; switch (event->type) { case IRDMA_CM_EVENT_MPA_REQ: irdma_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_CONNECT_REQUEST, 0); break; case IRDMA_CM_EVENT_RESET: irdma_cm_event_reset(event); break; case IRDMA_CM_EVENT_CONNECTED: if (!event->cm_node->cm_id || event->cm_node->state != IRDMA_CM_STATE_OFFLOADED) break; irdma_cm_event_connected(event); break; case IRDMA_CM_EVENT_MPA_REJECT: if (!event->cm_node->cm_id || cm_node->state == IRDMA_CM_STATE_OFFLOADED) break; irdma_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED); break; case IRDMA_CM_EVENT_ABORTED: if (!event->cm_node->cm_id || event->cm_node->state == IRDMA_CM_STATE_OFFLOADED) break; irdma_event_connect_error(event); break; default: irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "bad event type = %d\n", event->type); break; } irdma_rem_ref_cm_node(event->cm_node); kfree(event); } /** * irdma_cm_post_event - queue event request for worker thread * @event: cm node's info for up event call */ static void irdma_cm_post_event(struct irdma_cm_event *event) { atomic_inc(&event->cm_node->refcnt); INIT_WORK(&event->event_work, irdma_cm_event_handler); queue_work(event->cm_node->cm_core->event_wq, &event->event_work); } /** * irdma_cm_teardown_connections - teardown QPs * @iwdev: device pointer * @ipaddr: Pointer to IPv4 or IPv6 address * @nfo: Connection info * @disconnect_all: flag indicating disconnect all QPs * * teardown QPs where source or destination addr matches ip addr */ static void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr, struct irdma_cm_info *nfo, bool disconnect_all) { struct irdma_cm_core *cm_core = &iwdev->cm_core; struct list_head *list_core_temp; struct list_head *list_node; struct irdma_cm_node *cm_node; struct list_head teardown_list; struct ib_qp_attr attr; struct irdma_qp *qp; INIT_LIST_HEAD(&teardown_list); rcu_read_lock(); irdma_iw_teardown_list_prep(cm_core, &teardown_list, ipaddr, nfo, disconnect_all); rcu_read_unlock(); attr.qp_state = IB_QPS_ERR; list_for_each_safe(list_node, list_core_temp, &teardown_list) { cm_node = container_of(list_node, struct irdma_cm_node, teardown_entry); irdma_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL); if (iwdev->rf->reset) irdma_cm_disconn(cm_node->iwqp); irdma_rem_ref_cm_node(cm_node); } if (!rdma_protocol_roce(&iwdev->ibdev, 1)) return; INIT_LIST_HEAD(&teardown_list); irdma_roce_teardown_list_prep(iwdev, &teardown_list, ipaddr, nfo, disconnect_all); list_for_each_safe(list_node, list_core_temp, &teardown_list) { qp = container_of(list_node, struct irdma_qp, teardown_entry); irdma_modify_qp_roce(&qp->ibqp, &attr, IB_QP_STATE, NULL); irdma_ib_qp_event(qp, IRDMA_QP_EVENT_CATASTROPHIC); irdma_qp_rem_ref(&qp->ibqp); } } diff --git a/sys/dev/irdma/irdma_hw.c b/sys/dev/irdma/irdma_hw.c index d41b3be061f8..ba4b5233d9d5 100644 --- a/sys/dev/irdma/irdma_hw.c +++ b/sys/dev/irdma/irdma_hw.c @@ -1,2801 +1,2801 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * * Copyright (c) 2015 - 2022 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /*$FreeBSD$*/ #include "irdma_main.h" static struct irdma_rsrc_limits rsrc_limits_table[] = { [0] = { .qplimit = SZ_128, }, [1] = { .qplimit = SZ_1K, }, [2] = { .qplimit = SZ_2K, }, [3] = { .qplimit = SZ_4K, }, [4] = { .qplimit = SZ_16K, }, [5] = { .qplimit = SZ_64K, }, [6] = { .qplimit = SZ_128K, }, [7] = { .qplimit = SZ_256K, }, }; /* types of hmc objects */ static enum irdma_hmc_rsrc_type iw_hmc_obj_types[] = { IRDMA_HMC_IW_QP, IRDMA_HMC_IW_CQ, IRDMA_HMC_IW_HTE, IRDMA_HMC_IW_ARP, IRDMA_HMC_IW_APBVT_ENTRY, IRDMA_HMC_IW_MR, IRDMA_HMC_IW_XF, IRDMA_HMC_IW_XFFL, IRDMA_HMC_IW_Q1, IRDMA_HMC_IW_Q1FL, IRDMA_HMC_IW_PBLE, IRDMA_HMC_IW_TIMER, IRDMA_HMC_IW_FSIMC, IRDMA_HMC_IW_FSIAV, IRDMA_HMC_IW_RRF, IRDMA_HMC_IW_RRFFL, IRDMA_HMC_IW_HDR, IRDMA_HMC_IW_MD, IRDMA_HMC_IW_OOISC, IRDMA_HMC_IW_OOISCFFL, }; /** * irdma_iwarp_ce_handler - handle iwarp completions * @iwcq: iwarp cq receiving event */ static void irdma_iwarp_ce_handler(struct irdma_sc_cq *iwcq) { struct irdma_cq *cq = iwcq->back_cq; if (!cq->user_mode) atomic_set(&cq->armed, 0); if (cq->ibcq.comp_handler) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } /** * irdma_puda_ce_handler - handle puda completion events * @rf: RDMA PCI function * @cq: puda completion q for event */ static void irdma_puda_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq) { struct irdma_sc_dev *dev = &rf->sc_dev; u32 compl_error; int status; do { status = irdma_puda_poll_cmpl(dev, cq, &compl_error); if (status == -ENOENT) break; if (status) { irdma_debug(dev, IRDMA_DEBUG_ERR, "puda status = %d\n", status); break; } if (compl_error) { irdma_debug(dev, IRDMA_DEBUG_ERR, "puda compl_err =0x%x\n", compl_error); break; } } while (1); irdma_sc_ccq_arm(cq); } /** * irdma_process_ceq - handle ceq for completions * @rf: RDMA PCI function * @ceq: ceq having cq for completion */ static void irdma_process_ceq(struct irdma_pci_f *rf, struct irdma_ceq *ceq) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_sc_ceq *sc_ceq; struct irdma_sc_cq *cq; unsigned long flags; sc_ceq = &ceq->sc_ceq; do { spin_lock_irqsave(&ceq->ce_lock, flags); cq = irdma_sc_process_ceq(dev, sc_ceq); if (!cq) { spin_unlock_irqrestore(&ceq->ce_lock, flags); break; } if (cq->cq_type == IRDMA_CQ_TYPE_IWARP) irdma_iwarp_ce_handler(cq); spin_unlock_irqrestore(&ceq->ce_lock, flags); if (cq->cq_type == IRDMA_CQ_TYPE_CQP) queue_work(rf->cqp_cmpl_wq, &rf->cqp_cmpl_work); else if (cq->cq_type == IRDMA_CQ_TYPE_ILQ || cq->cq_type == IRDMA_CQ_TYPE_IEQ) irdma_puda_ce_handler(rf, cq); } while (1); } static void irdma_set_flush_fields(struct irdma_sc_qp *qp, struct irdma_aeqe_info *info) { struct qp_err_code qp_err; qp->sq_flush_code = info->sq; qp->rq_flush_code = info->rq; qp_err = irdma_ae_to_qp_err_code(info->ae_id); qp->flush_code = qp_err.flush_code; qp->event_type = qp_err.event_type; } /** * irdma_complete_cqp_request - perform post-completion cleanup * @cqp: device CQP * @cqp_request: CQP request * * Mark CQP request as done, wake up waiting thread or invoke * callback function and release/free CQP request. */ static void irdma_complete_cqp_request(struct irdma_cqp *cqp, struct irdma_cqp_request *cqp_request) { if (cqp_request->waiting) { cqp_request->request_done = true; wake_up(&cqp_request->waitq); } else if (cqp_request->callback_fcn) { cqp_request->callback_fcn(cqp_request); } irdma_put_cqp_request(cqp, cqp_request); } /** * irdma_process_aeq - handle aeq events * @rf: RDMA PCI function */ static void irdma_process_aeq(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_aeq *aeq = &rf->aeq; struct irdma_sc_aeq *sc_aeq = &aeq->sc_aeq; struct irdma_aeqe_info aeinfo; struct irdma_aeqe_info *info = &aeinfo; int ret; struct irdma_qp *iwqp = NULL; struct irdma_cq *iwcq = NULL; struct irdma_sc_qp *qp = NULL; struct irdma_device *iwdev = rf->iwdev; struct irdma_qp_host_ctx_info *ctx_info = NULL; unsigned long flags; u32 aeqcnt = 0; if (!sc_aeq->size) return; do { memset(info, 0, sizeof(*info)); ret = irdma_sc_get_next_aeqe(sc_aeq, info); if (ret) break; aeqcnt++; irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_AEQ, "ae_id = 0x%x bool qp=%d qp_id = %d tcp_state=%d iwarp_state=%d ae_src=%d\n", info->ae_id, info->qp, info->qp_cq_id, info->tcp_state, info->iwarp_state, info->ae_src); if (info->qp) { spin_lock_irqsave(&rf->qptable_lock, flags); iwqp = rf->qp_table[info->qp_cq_id]; if (!iwqp) { spin_unlock_irqrestore(&rf->qptable_lock, flags); if (info->ae_id == IRDMA_AE_QP_SUSPEND_COMPLETE) { struct irdma_device *iwdev = rf->iwdev; if (!iwdev->vsi.tc_change_pending) continue; atomic_dec(&iwdev->vsi.qp_suspend_reqs); wake_up(&iwdev->suspend_wq); continue; } irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_AEQ, "qp_id %d is already freed\n", info->qp_cq_id); continue; } irdma_qp_add_ref(&iwqp->ibqp); spin_unlock_irqrestore(&rf->qptable_lock, flags); qp = &iwqp->sc_qp; spin_lock_irqsave(&iwqp->lock, flags); iwqp->hw_tcp_state = info->tcp_state; iwqp->hw_iwarp_state = info->iwarp_state; if (info->ae_id != IRDMA_AE_QP_SUSPEND_COMPLETE) iwqp->last_aeq = info->ae_id; spin_unlock_irqrestore(&iwqp->lock, flags); ctx_info = &iwqp->ctx_info; } else { if (info->ae_id != IRDMA_AE_CQ_OPERATION_ERROR) continue; } switch (info->ae_id) { struct irdma_cm_node *cm_node; case IRDMA_AE_LLP_CONNECTION_ESTABLISHED: cm_node = iwqp->cm_node; if (cm_node->accept_pend) { atomic_dec(&cm_node->listener->pend_accepts_cnt); cm_node->accept_pend = 0; } iwqp->rts_ae_rcvd = 1; wake_up_interruptible(&iwqp->waitq); break; case IRDMA_AE_LLP_FIN_RECEIVED: if (qp->term_flags) break; if (atomic_inc_return(&iwqp->close_timer_started) == 1) { iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSE_WAIT; if (iwqp->ibqp_state == IB_QPS_RTS) { irdma_next_iw_state(iwqp, IRDMA_QP_STATE_CLOSING, 0, 0, 0); irdma_cm_disconn(iwqp); } irdma_schedule_cm_timer(iwqp->cm_node, (struct irdma_puda_buf *)iwqp, IRDMA_TIMER_TYPE_CLOSE, 1, 0); } break; case IRDMA_AE_LLP_CLOSE_COMPLETE: if (qp->term_flags) irdma_terminate_done(qp, 0); else irdma_cm_disconn(iwqp); break; case IRDMA_AE_BAD_CLOSE: case IRDMA_AE_RESET_SENT: irdma_next_iw_state(iwqp, IRDMA_QP_STATE_ERROR, 1, 0, 0); irdma_cm_disconn(iwqp); break; case IRDMA_AE_LLP_CONNECTION_RESET: if (atomic_read(&iwqp->close_timer_started)) break; irdma_cm_disconn(iwqp); break; case IRDMA_AE_QP_SUSPEND_COMPLETE: if (iwqp->iwdev->vsi.tc_change_pending) { atomic_dec(&iwqp->sc_qp.vsi->qp_suspend_reqs); wake_up(&iwqp->iwdev->suspend_wq); } break; case IRDMA_AE_TERMINATE_SENT: irdma_terminate_send_fin(qp); break; case IRDMA_AE_LLP_TERMINATE_RECEIVED: irdma_terminate_received(qp, info); break; case IRDMA_AE_LCE_CQ_CATASTROPHIC: case IRDMA_AE_CQ_OPERATION_ERROR: irdma_dev_err(&iwdev->ibdev, "Processing CQ[0x%x] op error, AE 0x%04X\n", info->qp_cq_id, info->ae_id); spin_lock_irqsave(&rf->cqtable_lock, flags); iwcq = rf->cq_table[info->qp_cq_id]; if (!iwcq) { spin_unlock_irqrestore(&rf->cqtable_lock, flags); irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_AEQ, "cq_id %d is already freed\n", info->qp_cq_id); continue; } irdma_cq_add_ref(&iwcq->ibcq); spin_unlock_irqrestore(&rf->cqtable_lock, flags); if (iwcq->ibcq.event_handler) { struct ib_event ibevent; ibevent.device = iwcq->ibcq.device; ibevent.event = IB_EVENT_CQ_ERR; ibevent.element.cq = &iwcq->ibcq; iwcq->ibcq.event_handler(&ibevent, iwcq->ibcq.cq_context); } irdma_cq_rem_ref(&iwcq->ibcq); break; case IRDMA_AE_RESET_NOT_SENT: case IRDMA_AE_LLP_DOUBT_REACHABILITY: break; case IRDMA_AE_RESOURCE_EXHAUSTION: irdma_dev_err(&iwdev->ibdev, "Resource exhaustion reason: q1 = %d xmit or rreq = %d\n", info->ae_src == IRDMA_AE_SOURCE_RSRC_EXHT_Q1, info->ae_src == IRDMA_AE_SOURCE_RSRC_EXHT_XT_RR); break; case IRDMA_AE_PRIV_OPERATION_DENIED: case IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE: case IRDMA_AE_STAG_ZERO_INVALID: case IRDMA_AE_IB_RREQ_AND_Q1_FULL: case IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION: case IRDMA_AE_DDP_UBE_INVALID_MO: case IRDMA_AE_DDP_UBE_INVALID_QN: case IRDMA_AE_DDP_NO_L_BIT: case IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION: case IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE: case IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST: case IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP: case IRDMA_AE_INVALID_ARP_ENTRY: case IRDMA_AE_INVALID_TCP_OPTION_RCVD: case IRDMA_AE_STALE_ARP_ENTRY: case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR: case IRDMA_AE_LLP_SEGMENT_TOO_SMALL: case IRDMA_AE_LLP_SYN_RECEIVED: case IRDMA_AE_LLP_TOO_MANY_RETRIES: case IRDMA_AE_LCE_QP_CATASTROPHIC: case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC: case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: default: irdma_dev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d ae_source=%d\n", info->ae_id, info->qp, info->qp_cq_id, info->ae_src); if (rdma_protocol_roce(&iwqp->iwdev->ibdev, 1)) { ctx_info->roce_info->err_rq_idx_valid = info->err_rq_idx_valid; if (info->rq) { ctx_info->roce_info->err_rq_idx = info->wqe_idx; irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info); } irdma_set_flush_fields(qp, info); irdma_cm_disconn(iwqp); break; } ctx_info->iwarp_info->err_rq_idx_valid = info->err_rq_idx_valid; if (info->rq) { ctx_info->iwarp_info->err_rq_idx = info->wqe_idx; ctx_info->tcp_info_valid = false; ctx_info->iwarp_info_valid = true; irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info); } if (iwqp->hw_iwarp_state != IRDMA_QP_STATE_RTS && iwqp->hw_iwarp_state != IRDMA_QP_STATE_TERMINATE) { irdma_next_iw_state(iwqp, IRDMA_QP_STATE_ERROR, 1, 0, 0); irdma_cm_disconn(iwqp); } else { irdma_terminate_connection(qp, info); } break; } if (info->qp) irdma_qp_rem_ref(&iwqp->ibqp); } while (1); if (aeqcnt) irdma_sc_repost_aeq_entries(dev, aeqcnt); } /** * irdma_ena_intr - set up device interrupts * @dev: hardware control device structure * @msix_id: id of the interrupt to be enabled */ static void irdma_ena_intr(struct irdma_sc_dev *dev, u32 msix_id) { dev->irq_ops->irdma_en_irq(dev, msix_id); } /** * irdma_dpc - tasklet for aeq and ceq 0 * @t: tasklet_struct ptr */ static void irdma_dpc(unsigned long t) { struct irdma_pci_f *rf = from_tasklet(rf, (struct tasklet_struct *)t, dpc_tasklet); if (rf->msix_shared) irdma_process_ceq(rf, rf->ceqlist); irdma_process_aeq(rf); irdma_ena_intr(&rf->sc_dev, rf->iw_msixtbl[0].idx); } /** * irdma_ceq_dpc - dpc handler for CEQ * @t: tasklet_struct ptr */ static void irdma_ceq_dpc(unsigned long t) { struct irdma_ceq *iwceq = from_tasklet(iwceq, (struct tasklet_struct *)t, dpc_tasklet); struct irdma_pci_f *rf = iwceq->rf; irdma_process_ceq(rf, iwceq); irdma_ena_intr(&rf->sc_dev, iwceq->msix_idx); } /** * irdma_save_msix_info - copy msix vector information to iwarp device * @rf: RDMA PCI function * * Allocate iwdev msix table and copy the msix info to the table * Return 0 if successful, otherwise return error */ static int irdma_save_msix_info(struct irdma_pci_f *rf) { struct irdma_qvlist_info *iw_qvlist; struct irdma_qv_info *iw_qvinfo; u32 ceq_idx; u32 i; u32 size; if (!rf->msix_count) { irdma_dev_err(to_ibdev(&rf->sc_dev), "No MSI-X vectors reserved for RDMA.\n"); return -EINVAL; } size = sizeof(struct irdma_msix_vector) * rf->msix_count; size += sizeof(struct irdma_qvlist_info); size += sizeof(struct irdma_qv_info) * rf->msix_count - 1; rf->iw_msixtbl = kzalloc(size, GFP_KERNEL); if (!rf->iw_msixtbl) return -ENOMEM; rf->iw_qvlist = (struct irdma_qvlist_info *) (&rf->iw_msixtbl[rf->msix_count]); iw_qvlist = rf->iw_qvlist; iw_qvinfo = iw_qvlist->qv_info; iw_qvlist->num_vectors = rf->msix_count; if (rf->msix_count <= num_online_cpus()) rf->msix_shared = true; else if (rf->msix_count > num_online_cpus() + 1) rf->msix_count = num_online_cpus() + 1; for (i = 0, ceq_idx = 0; i < rf->msix_count; i++, iw_qvinfo++) { rf->iw_msixtbl[i].idx = rf->msix_info.entry + i; rf->iw_msixtbl[i].cpu_affinity = ceq_idx; if (!i) { iw_qvinfo->aeq_idx = 0; if (rf->msix_shared) iw_qvinfo->ceq_idx = ceq_idx++; else iw_qvinfo->ceq_idx = IRDMA_Q_INVALID_IDX; } else { iw_qvinfo->aeq_idx = IRDMA_Q_INVALID_IDX; iw_qvinfo->ceq_idx = ceq_idx++; } iw_qvinfo->itr_idx = IRDMA_IDX_NOITR; iw_qvinfo->v_idx = rf->iw_msixtbl[i].idx; } return 0; } /** * irdma_irq_handler - interrupt handler for aeq and ceq0 * @data: RDMA PCI function */ static void irdma_irq_handler(void *data) { struct irdma_pci_f *rf = data; tasklet_schedule(&rf->dpc_tasklet); } /** * irdma_ceq_handler - interrupt handler for ceq * @data: ceq pointer */ static void irdma_ceq_handler(void *data) { struct irdma_ceq *iwceq = data; tasklet_schedule(&iwceq->dpc_tasklet); } /** * irdma_free_irq - free device interrupts in FreeBSD manner * @rf: RDMA PCI function * @msix_vec: msix vector to disable irq * * The function is called when destroying irq. It tearsdown * the interrupt and release resources. */ static void irdma_free_irq(struct irdma_pci_f *rf, struct irdma_msix_vector *msix_vec) { if (msix_vec->tag) { bus_teardown_intr(rf->dev_ctx.dev, msix_vec->res, msix_vec->tag); msix_vec->tag = NULL; } if (msix_vec->res) { bus_release_resource(rf->dev_ctx.dev, SYS_RES_IRQ, msix_vec->idx + 1, msix_vec->res); msix_vec->res = NULL; } } /** * irdma_destroy_irq - destroy device interrupts * @rf: RDMA PCI function * @msix_vec: msix vector to disable irq * @dev_id: parameter to pass to free_irq (used during irq setup) * * The function is called when destroying aeq/ceq */ static void irdma_destroy_irq(struct irdma_pci_f *rf, struct irdma_msix_vector *msix_vec, void *dev_id) { struct irdma_sc_dev *dev = &rf->sc_dev; dev->irq_ops->irdma_dis_irq(dev, msix_vec->idx); irdma_free_irq(rf, msix_vec); } /** * irdma_destroy_cqp - destroy control qp * @rf: RDMA PCI function * @free_hwcqp: 1 if hw cqp should be freed * * Issue destroy cqp request and * free the resources associated with the cqp */ static void irdma_destroy_cqp(struct irdma_pci_f *rf, bool free_hwcqp) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_cqp *cqp = &rf->cqp; int status = 0; if (rf->cqp_cmpl_wq) destroy_workqueue(rf->cqp_cmpl_wq); status = irdma_sc_cqp_destroy(dev->cqp, free_hwcqp); if (status) irdma_debug(dev, IRDMA_DEBUG_ERR, "Destroy CQP failed %d\n", status); irdma_cleanup_pending_cqp_op(rf); irdma_free_dma_mem(dev->hw, &cqp->sq); kfree(cqp->scratch_array); cqp->scratch_array = NULL; kfree(cqp->cqp_requests); cqp->cqp_requests = NULL; } static void irdma_destroy_virt_aeq(struct irdma_pci_f *rf) { struct irdma_aeq *aeq = &rf->aeq; u32 pg_cnt = DIV_ROUND_UP(aeq->mem.size, PAGE_SIZE); dma_addr_t *pg_arr = (dma_addr_t *) aeq->palloc.level1.addr; irdma_unmap_vm_page_list(&rf->hw, pg_arr, pg_cnt); irdma_free_pble(rf->pble_rsrc, &aeq->palloc); vfree(aeq->mem.va); } /** * irdma_destroy_aeq - destroy aeq * @rf: RDMA PCI function * * Issue a destroy aeq request and * free the resources associated with the aeq * The function is called during driver unload */ static void irdma_destroy_aeq(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_aeq *aeq = &rf->aeq; int status = -EBUSY; if (!rf->msix_shared) { rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, rf->iw_msixtbl->idx, false); irdma_destroy_irq(rf, rf->iw_msixtbl, rf); } if (rf->reset) goto exit; aeq->sc_aeq.size = 0; status = irdma_cqp_aeq_cmd(dev, &aeq->sc_aeq, IRDMA_OP_AEQ_DESTROY); if (status) irdma_debug(dev, IRDMA_DEBUG_ERR, "Destroy AEQ failed %d\n", status); exit: if (aeq->virtual_map) irdma_destroy_virt_aeq(rf); else irdma_free_dma_mem(dev->hw, &aeq->mem); } /** * irdma_destroy_ceq - destroy ceq * @rf: RDMA PCI function * @iwceq: ceq to be destroyed * * Issue a destroy ceq request and * free the resources associated with the ceq */ static void irdma_destroy_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq) { struct irdma_sc_dev *dev = &rf->sc_dev; int status; if (rf->reset) goto exit; status = irdma_sc_ceq_destroy(&iwceq->sc_ceq, 0, 1); if (status) { irdma_debug(dev, IRDMA_DEBUG_ERR, "CEQ destroy command failed %d\n", status); goto exit; } status = irdma_sc_cceq_destroy_done(&iwceq->sc_ceq); if (status) irdma_debug(dev, IRDMA_DEBUG_ERR, "CEQ destroy completion failed %d\n", status); exit: spin_lock_destroy(&iwceq->ce_lock); spin_lock_destroy(&iwceq->sc_ceq.req_cq_lock); kfree(iwceq->sc_ceq.reg_cq); irdma_free_dma_mem(dev->hw, &iwceq->mem); } /** * irdma_del_ceq_0 - destroy ceq 0 * @rf: RDMA PCI function * * Disable the ceq 0 interrupt and destroy the ceq 0 */ static void irdma_del_ceq_0(struct irdma_pci_f *rf) { struct irdma_ceq *iwceq = rf->ceqlist; struct irdma_msix_vector *msix_vec; if (rf->msix_shared) { msix_vec = &rf->iw_msixtbl[0]; rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev, msix_vec->ceq_id, msix_vec->idx, false); irdma_destroy_irq(rf, msix_vec, rf); } else { msix_vec = &rf->iw_msixtbl[1]; irdma_destroy_irq(rf, msix_vec, iwceq); } irdma_destroy_ceq(rf, iwceq); rf->sc_dev.ceq_valid = false; rf->ceqs_count = 0; } /** * irdma_del_ceqs - destroy all ceq's except CEQ 0 * @rf: RDMA PCI function * * Go through all of the device ceq's, except 0, and for each * ceq disable the ceq interrupt and destroy the ceq */ static void irdma_del_ceqs(struct irdma_pci_f *rf) { struct irdma_ceq *iwceq = &rf->ceqlist[1]; struct irdma_msix_vector *msix_vec; u32 i = 0; if (rf->msix_shared) msix_vec = &rf->iw_msixtbl[1]; else msix_vec = &rf->iw_msixtbl[2]; for (i = 1; i < rf->ceqs_count; i++, msix_vec++, iwceq++) { rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev, msix_vec->ceq_id, msix_vec->idx, false); irdma_destroy_irq(rf, msix_vec, iwceq); irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq, IRDMA_OP_CEQ_DESTROY); spin_lock_destroy(&iwceq->ce_lock); spin_lock_destroy(&iwceq->sc_ceq.req_cq_lock); kfree(iwceq->sc_ceq.reg_cq); irdma_free_dma_mem(rf->sc_dev.hw, &iwceq->mem); } rf->ceqs_count = 1; } /** * irdma_destroy_ccq - destroy control cq * @rf: RDMA PCI function * * Issue destroy ccq request and * free the resources associated with the ccq */ static void irdma_destroy_ccq(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_ccq *ccq = &rf->ccq; int status = 0; if (!rf->reset) status = irdma_sc_ccq_destroy(dev->ccq, 0, true); if (status) irdma_debug(dev, IRDMA_DEBUG_ERR, "CCQ destroy failed %d\n", status); irdma_free_dma_mem(dev->hw, &ccq->mem_cq); } /** * irdma_close_hmc_objects_type - delete hmc objects of a given type * @dev: iwarp device * @obj_type: the hmc object type to be deleted * @hmc_info: host memory info struct * @privileged: permission to close HMC objects * @reset: true if called before reset */ static void irdma_close_hmc_objects_type(struct irdma_sc_dev *dev, enum irdma_hmc_rsrc_type obj_type, struct irdma_hmc_info *hmc_info, bool privileged, bool reset) { struct irdma_hmc_del_obj_info info = {0}; info.hmc_info = hmc_info; info.rsrc_type = obj_type; info.count = hmc_info->hmc_obj[obj_type].cnt; info.privileged = privileged; if (irdma_sc_del_hmc_obj(dev, &info, reset)) irdma_debug(dev, IRDMA_DEBUG_ERR, "del HMC obj of type %d failed\n", obj_type); } /** * irdma_del_hmc_objects - remove all device hmc objects * @dev: iwarp device * @hmc_info: hmc_info to free * @privileged: permission to delete HMC objects * @reset: true if called before reset * @vers: hardware version */ void irdma_del_hmc_objects(struct irdma_sc_dev *dev, struct irdma_hmc_info *hmc_info, bool privileged, bool reset, enum irdma_vers vers) { unsigned int i; for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) { if (dev->hmc_info->hmc_obj[iw_hmc_obj_types[i]].cnt) irdma_close_hmc_objects_type(dev, iw_hmc_obj_types[i], hmc_info, privileged, reset); if (vers == IRDMA_GEN_1 && i == IRDMA_HMC_IW_TIMER) break; } } /** * irdma_create_hmc_obj_type - create hmc object of a given type * @dev: hardware control device structure * @info: information for the hmc object to create */ static int irdma_create_hmc_obj_type(struct irdma_sc_dev *dev, struct irdma_hmc_create_obj_info *info) { return irdma_sc_create_hmc_obj(dev, info); } /** * irdma_create_hmc_objs - create all hmc objects for the device * @rf: RDMA PCI function * @privileged: permission to create HMC objects * @vers: HW version * * Create the device hmc objects and allocate hmc pages * Return 0 if successful, otherwise clean up and return error */ static int irdma_create_hmc_objs(struct irdma_pci_f *rf, bool privileged, enum irdma_vers vers) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_hmc_create_obj_info info = {0}; int i, status = 0; info.hmc_info = dev->hmc_info; info.privileged = privileged; info.entry_type = rf->sd_type; for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) { if (iw_hmc_obj_types[i] == IRDMA_HMC_IW_PBLE) continue; if (dev->hmc_info->hmc_obj[iw_hmc_obj_types[i]].cnt) { info.rsrc_type = iw_hmc_obj_types[i]; info.count = dev->hmc_info->hmc_obj[info.rsrc_type].cnt; info.add_sd_cnt = 0; status = irdma_create_hmc_obj_type(dev, &info); if (status) { irdma_debug(dev, IRDMA_DEBUG_ERR, "create obj type %d status = %d\n", iw_hmc_obj_types[i], status); break; } } if (vers == IRDMA_GEN_1 && i == IRDMA_HMC_IW_TIMER) break; } if (!status) return irdma_sc_static_hmc_pages_allocated(dev->cqp, 0, dev->hmc_fn_id, true, true); while (i) { i--; /* destroy the hmc objects of a given type */ if (dev->hmc_info->hmc_obj[iw_hmc_obj_types[i]].cnt) irdma_close_hmc_objects_type(dev, iw_hmc_obj_types[i], dev->hmc_info, privileged, false); } return status; } /** * irdma_obj_aligned_mem - get aligned memory from device allocated memory * @rf: RDMA PCI function * @memptr: points to the memory addresses * @size: size of memory needed * @mask: mask for the aligned memory * * Get aligned memory of the requested size and * update the memptr to point to the new aligned memory * Return 0 if successful, otherwise return no memory error */ static int irdma_obj_aligned_mem(struct irdma_pci_f *rf, struct irdma_dma_mem *memptr, u32 size, u32 mask) { unsigned long va, newva; unsigned long extra; va = (unsigned long)rf->obj_next.va; newva = va; if (mask) newva = ALIGN(va, (unsigned long)mask + 1ULL); extra = newva - va; memptr->va = (u8 *)va + extra; memptr->pa = rf->obj_next.pa + extra; memptr->size = size; if (((u8 *)memptr->va + size) > ((u8 *)rf->obj_mem.va + rf->obj_mem.size)) return -ENOMEM; rf->obj_next.va = (u8 *)memptr->va + size; rf->obj_next.pa = memptr->pa + size; return 0; } /** * irdma_create_cqp - create control qp * @rf: RDMA PCI function * * Return 0, if the cqp and all the resources associated with it * are successfully created, otherwise return error */ static int irdma_create_cqp(struct irdma_pci_f *rf) { u32 sqsize = IRDMA_CQP_SW_SQSIZE_2048; struct irdma_dma_mem mem; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_cqp_init_info cqp_init_info = {0}; struct irdma_cqp *cqp = &rf->cqp; u16 maj_err, min_err; int i, status; cqp->cqp_requests = kcalloc(sqsize, sizeof(*cqp->cqp_requests), GFP_KERNEL); memset(cqp->cqp_requests, 0, sqsize * sizeof(*cqp->cqp_requests)); if (!cqp->cqp_requests) return -ENOMEM; cqp->scratch_array = kcalloc(sqsize, sizeof(*cqp->scratch_array), GFP_KERNEL); memset(cqp->scratch_array, 0, sqsize * sizeof(*cqp->scratch_array)); if (!cqp->scratch_array) { status = -ENOMEM; goto err_scratch; } dev->cqp = &cqp->sc_cqp; dev->cqp->dev = dev; cqp->sq.size = sizeof(struct irdma_cqp_sq_wqe) * sqsize; cqp->sq.va = irdma_allocate_dma_mem(dev->hw, &cqp->sq, cqp->sq.size, IRDMA_CQP_ALIGNMENT); if (!cqp->sq.va) { status = -ENOMEM; goto err_sq; } status = irdma_obj_aligned_mem(rf, &mem, sizeof(struct irdma_cqp_ctx), IRDMA_HOST_CTX_ALIGNMENT_M); if (status) goto err_ctx; dev->cqp->host_ctx_pa = mem.pa; dev->cqp->host_ctx = mem.va; /* populate the cqp init info */ cqp_init_info.dev = dev; cqp_init_info.sq_size = sqsize; cqp_init_info.sq = cqp->sq.va; cqp_init_info.sq_pa = cqp->sq.pa; cqp_init_info.host_ctx_pa = mem.pa; cqp_init_info.host_ctx = mem.va; cqp_init_info.hmc_profile = rf->rsrc_profile; cqp_init_info.scratch_array = cqp->scratch_array; cqp_init_info.protocol_used = rf->protocol_used; cqp_init_info.en_rem_endpoint_trk = rf->en_rem_endpoint_trk; memcpy(&cqp_init_info.dcqcn_params, &rf->dcqcn_params, sizeof(cqp_init_info.dcqcn_params)); switch (rf->rdma_ver) { case IRDMA_GEN_1: cqp_init_info.hw_maj_ver = IRDMA_CQPHC_HW_MAJVER_GEN_1; break; case IRDMA_GEN_2: cqp_init_info.hw_maj_ver = IRDMA_CQPHC_HW_MAJVER_GEN_2; break; } status = irdma_sc_cqp_init(dev->cqp, &cqp_init_info); if (status) { irdma_debug(dev, IRDMA_DEBUG_ERR, "cqp init status %d\n", status); goto err_ctx; } spin_lock_init(&cqp->req_lock); spin_lock_init(&cqp->compl_lock); status = irdma_sc_cqp_create(dev->cqp, &maj_err, &min_err); if (status) { irdma_debug(dev, IRDMA_DEBUG_ERR, "cqp create failed - status %d maj_err %d min_err %d\n", status, maj_err, min_err); goto err_create; } INIT_LIST_HEAD(&cqp->cqp_avail_reqs); INIT_LIST_HEAD(&cqp->cqp_pending_reqs); /* init the waitqueue of the cqp_requests and add them to the list */ for (i = 0; i < sqsize; i++) { init_waitqueue_head(&cqp->cqp_requests[i].waitq); list_add_tail(&cqp->cqp_requests[i].list, &cqp->cqp_avail_reqs); } init_waitqueue_head(&cqp->remove_wq); return 0; err_create: err_ctx: irdma_free_dma_mem(dev->hw, &cqp->sq); err_sq: kfree(cqp->scratch_array); cqp->scratch_array = NULL; err_scratch: kfree(cqp->cqp_requests); cqp->cqp_requests = NULL; return status; } /** * irdma_create_ccq - create control cq * @rf: RDMA PCI function * * Return 0, if the ccq and the resources associated with it * are successfully created, otherwise return error */ static int irdma_create_ccq(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_ccq_init_info info = {0}; struct irdma_ccq *ccq = &rf->ccq; int status; dev->ccq = &ccq->sc_cq; dev->ccq->dev = dev; info.dev = dev; ccq->shadow_area.size = sizeof(struct irdma_cq_shadow_area); ccq->mem_cq.size = sizeof(struct irdma_cqe) * IW_CCQ_SIZE; ccq->mem_cq.va = irdma_allocate_dma_mem(dev->hw, &ccq->mem_cq, ccq->mem_cq.size, IRDMA_CQ0_ALIGNMENT); if (!ccq->mem_cq.va) return -ENOMEM; status = irdma_obj_aligned_mem(rf, &ccq->shadow_area, ccq->shadow_area.size, IRDMA_SHADOWAREA_M); if (status) goto exit; ccq->sc_cq.back_cq = ccq; /* populate the ccq init info */ info.cq_base = ccq->mem_cq.va; info.cq_pa = ccq->mem_cq.pa; info.num_elem = IW_CCQ_SIZE; info.shadow_area = ccq->shadow_area.va; info.shadow_area_pa = ccq->shadow_area.pa; info.ceqe_mask = false; info.ceq_id_valid = true; info.shadow_read_threshold = 16; info.vsi = &rf->default_vsi; status = irdma_sc_ccq_init(dev->ccq, &info); if (!status) status = irdma_sc_ccq_create(dev->ccq, 0, true, true); exit: if (status) irdma_free_dma_mem(dev->hw, &ccq->mem_cq); return status; } /** * irdma_alloc_set_mac - set up a mac address table entry * @iwdev: irdma device * * Allocate a mac ip entry and add it to the hw table Return 0 * if successful, otherwise return error */ static int irdma_alloc_set_mac(struct irdma_device *iwdev) { int status; status = irdma_alloc_local_mac_entry(iwdev->rf, &iwdev->mac_ip_table_idx); if (!status) { status = irdma_add_local_mac_entry(iwdev->rf, - (const u8 *)IF_LLADDR(iwdev->netdev), + (const u8 *)if_getlladdr(iwdev->netdev), (u8)iwdev->mac_ip_table_idx); if (status) irdma_del_local_mac_entry(iwdev->rf, (u8)iwdev->mac_ip_table_idx); } return status; } /** * irdma_irq_request - set up the msix interrupt vector * @rf: RDMA PCI function * @msix_vec: interrupt vector information * @handler: function pointer to associate with interrupt * @argument: argument passed to the handler * * Allocate interrupt resources and setup interrupt * Return 0 if successful, otherwise return error * Note that after this function bus_describe_intr shall * be called. */ static int irdma_irq_request(struct irdma_pci_f *rf, struct irdma_msix_vector *msix_vec, driver_intr_t handler, void *argument) { device_t dev = rf->dev_ctx.dev; int rid = msix_vec->idx + 1; int err, status; msix_vec->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!msix_vec->res) { irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR, "Unable to allocate bus resource int[%d]\n", rid); return -EINVAL; } err = bus_setup_intr(dev, msix_vec->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, handler, argument, &msix_vec->tag); if (err) { irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR, "Unable to register handler with %x status\n", err); status = -EINVAL; goto fail_intr; } return 0; fail_intr: bus_release_resource(dev, SYS_RES_IRQ, rid, msix_vec->res); msix_vec->res = NULL; return status; } /** * irdma_cfg_ceq_vector - set up the msix interrupt vector for * ceq * @rf: RDMA PCI function * @iwceq: ceq associated with the vector * @ceq_id: the id number of the iwceq * @msix_vec: interrupt vector information * * Allocate interrupt resources and enable irq handling * Return 0 if successful, otherwise return error */ static int irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, u32 ceq_id, struct irdma_msix_vector *msix_vec) { int status; if (rf->msix_shared && !ceq_id) { snprintf(msix_vec->name, sizeof(msix_vec->name) - 1, "irdma-%s-AEQCEQ-0", dev_name(&rf->pcidev->dev)); tasklet_setup(&rf->dpc_tasklet, irdma_dpc); status = irdma_irq_request(rf, msix_vec, irdma_irq_handler, rf); if (status) return status; bus_describe_intr(rf->dev_ctx.dev, msix_vec->res, msix_vec->tag, "%s", msix_vec->name); } else { snprintf(msix_vec->name, sizeof(msix_vec->name) - 1, "irdma-%s-CEQ-%d", dev_name(&rf->pcidev->dev), ceq_id); tasklet_setup(&iwceq->dpc_tasklet, irdma_ceq_dpc); status = irdma_irq_request(rf, msix_vec, irdma_ceq_handler, iwceq); if (status) return status; bus_describe_intr(rf->dev_ctx.dev, msix_vec->res, msix_vec->tag, "%s", msix_vec->name); } msix_vec->ceq_id = ceq_id; rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev, ceq_id, msix_vec->idx, true); return 0; } /** * irdma_cfg_aeq_vector - set up the msix vector for aeq * @rf: RDMA PCI function * * Allocate interrupt resources and enable irq handling * Return 0 if successful, otherwise return error */ static int irdma_cfg_aeq_vector(struct irdma_pci_f *rf) { struct irdma_msix_vector *msix_vec = rf->iw_msixtbl; u32 ret = 0; if (!rf->msix_shared) { snprintf(msix_vec->name, sizeof(msix_vec->name) - 1, "irdma-%s-AEQ", dev_name(&rf->pcidev->dev)); tasklet_setup(&rf->dpc_tasklet, irdma_dpc); ret = irdma_irq_request(rf, msix_vec, irdma_irq_handler, rf); if (ret) return ret; bus_describe_intr(rf->dev_ctx.dev, msix_vec->res, msix_vec->tag, "%s", msix_vec->name); } if (ret) { irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR, "aeq irq config fail\n"); return -EINVAL; } rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, msix_vec->idx, true); return 0; } /** * irdma_create_ceq - create completion event queue * @rf: RDMA PCI function * @iwceq: pointer to the ceq resources to be created * @ceq_id: the id number of the iwceq * @vsi: SC vsi struct * * Return 0, if the ceq and the resources associated with it * are successfully created, otherwise return error */ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, u32 ceq_id, struct irdma_sc_vsi *vsi) { int status; struct irdma_ceq_init_info info = {0}; struct irdma_sc_dev *dev = &rf->sc_dev; u64 scratch; u32 ceq_size; info.ceq_id = ceq_id; iwceq->rf = rf; ceq_size = min(rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt, dev->hw_attrs.max_hw_ceq_size); iwceq->mem.size = sizeof(struct irdma_ceqe) * ceq_size; iwceq->mem.va = irdma_allocate_dma_mem(dev->hw, &iwceq->mem, iwceq->mem.size, IRDMA_CEQ_ALIGNMENT); if (!iwceq->mem.va) return -ENOMEM; info.ceq_id = ceq_id; info.ceqe_base = iwceq->mem.va; info.ceqe_pa = iwceq->mem.pa; info.elem_cnt = ceq_size; info.reg_cq = kzalloc(sizeof(struct irdma_sc_cq *) * info.elem_cnt, GFP_KERNEL); iwceq->sc_ceq.ceq_id = ceq_id; info.dev = dev; info.vsi = vsi; scratch = (uintptr_t)&rf->cqp.sc_cqp; status = irdma_sc_ceq_init(&iwceq->sc_ceq, &info); if (!status) { if (dev->ceq_valid) status = irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq, IRDMA_OP_CEQ_CREATE); else status = irdma_sc_cceq_create(&iwceq->sc_ceq, scratch); } if (status) { kfree(info.reg_cq); irdma_free_dma_mem(dev->hw, &iwceq->mem); } return status; } /** * irdma_setup_ceq_0 - create CEQ 0 and it's interrupt resource * @rf: RDMA PCI function * * Allocate a list for all device completion event queues * Create the ceq 0 and configure it's msix interrupt vector * Return 0, if successfully set up, otherwise return error */ static int irdma_setup_ceq_0(struct irdma_pci_f *rf) { struct irdma_ceq *iwceq; struct irdma_msix_vector *msix_vec; u32 i; int status = 0; u32 num_ceqs; num_ceqs = min(rf->msix_count, rf->sc_dev.hmc_fpm_misc.max_ceqs); rf->ceqlist = kcalloc(num_ceqs, sizeof(*rf->ceqlist), GFP_KERNEL); memset(rf->ceqlist, 0, num_ceqs * sizeof(*rf->ceqlist)); if (!rf->ceqlist) { status = -ENOMEM; goto exit; } iwceq = &rf->ceqlist[0]; status = irdma_create_ceq(rf, iwceq, 0, &rf->default_vsi); if (status) { irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR, "create ceq status = %d\n", status); goto exit; } spin_lock_init(&iwceq->ce_lock); i = rf->msix_shared ? 0 : 1; msix_vec = &rf->iw_msixtbl[i]; iwceq->irq = msix_vec->irq; iwceq->msix_idx = msix_vec->idx; status = irdma_cfg_ceq_vector(rf, iwceq, 0, msix_vec); if (status) { irdma_destroy_ceq(rf, iwceq); goto exit; } irdma_ena_intr(&rf->sc_dev, msix_vec->idx); rf->ceqs_count++; exit: if (status && !rf->ceqs_count) { kfree(rf->ceqlist); rf->ceqlist = NULL; return status; } rf->sc_dev.ceq_valid = true; return 0; } /** * irdma_setup_ceqs - manage the device ceq's and their interrupt resources * @rf: RDMA PCI function * @vsi: VSI structure for this CEQ * * Allocate a list for all device completion event queues * Create the ceq's and configure their msix interrupt vectors * Return 0, if ceqs are successfully set up, otherwise return error */ static int irdma_setup_ceqs(struct irdma_pci_f *rf, struct irdma_sc_vsi *vsi) { u32 i; u32 ceq_id; struct irdma_ceq *iwceq; struct irdma_msix_vector *msix_vec; int status; u32 num_ceqs; num_ceqs = min(rf->msix_count, rf->sc_dev.hmc_fpm_misc.max_ceqs); i = (rf->msix_shared) ? 1 : 2; for (ceq_id = 1; i < num_ceqs; i++, ceq_id++) { iwceq = &rf->ceqlist[ceq_id]; status = irdma_create_ceq(rf, iwceq, ceq_id, vsi); if (status) { irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR, "create ceq status = %d\n", status); goto del_ceqs; } spin_lock_init(&iwceq->ce_lock); msix_vec = &rf->iw_msixtbl[i]; iwceq->irq = msix_vec->irq; iwceq->msix_idx = msix_vec->idx; status = irdma_cfg_ceq_vector(rf, iwceq, ceq_id, msix_vec); if (status) { irdma_destroy_ceq(rf, iwceq); goto del_ceqs; } irdma_ena_intr(&rf->sc_dev, msix_vec->idx); rf->ceqs_count++; } return 0; del_ceqs: irdma_del_ceqs(rf); return status; } static int irdma_create_virt_aeq(struct irdma_pci_f *rf, u32 size) { struct irdma_aeq *aeq = &rf->aeq; dma_addr_t *pg_arr; u32 pg_cnt; int status; if (rf->rdma_ver < IRDMA_GEN_2) return -EOPNOTSUPP; aeq->mem.size = sizeof(struct irdma_sc_aeqe) * size; aeq->mem.va = vzalloc(aeq->mem.size); if (!aeq->mem.va) return -ENOMEM; pg_cnt = DIV_ROUND_UP(aeq->mem.size, PAGE_SIZE); status = irdma_get_pble(rf->pble_rsrc, &aeq->palloc, pg_cnt, true); if (status) { vfree(aeq->mem.va); return status; } pg_arr = (dma_addr_t *) aeq->palloc.level1.addr; status = irdma_map_vm_page_list(&rf->hw, aeq->mem.va, pg_arr, pg_cnt); if (status) { irdma_free_pble(rf->pble_rsrc, &aeq->palloc); vfree(aeq->mem.va); return status; } return 0; } /** * irdma_create_aeq - create async event queue * @rf: RDMA PCI function * * Return 0, if the aeq and the resources associated with it * are successfully created, otherwise return error */ static int irdma_create_aeq(struct irdma_pci_f *rf) { struct irdma_aeq_init_info info = {0}; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_aeq *aeq = &rf->aeq; struct irdma_hmc_info *hmc_info = rf->sc_dev.hmc_info; u32 aeq_size; u8 multiplier = (rf->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) ? 2 : 1; int status; aeq_size = multiplier * hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt + hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt; aeq_size = min(aeq_size, dev->hw_attrs.max_hw_aeq_size); aeq->mem.size = sizeof(struct irdma_sc_aeqe) * aeq_size; aeq->mem.va = irdma_allocate_dma_mem(dev->hw, &aeq->mem, aeq->mem.size, IRDMA_AEQ_ALIGNMENT); if (aeq->mem.va) goto skip_virt_aeq; /* physically mapped aeq failed. setup virtual aeq */ status = irdma_create_virt_aeq(rf, aeq_size); if (status) return status; info.virtual_map = true; aeq->virtual_map = info.virtual_map; info.pbl_chunk_size = 1; info.first_pm_pbl_idx = aeq->palloc.level1.idx; skip_virt_aeq: info.aeqe_base = aeq->mem.va; info.aeq_elem_pa = aeq->mem.pa; info.elem_cnt = aeq_size; info.dev = dev; info.msix_idx = rf->iw_msixtbl->idx; status = irdma_sc_aeq_init(&aeq->sc_aeq, &info); if (status) goto err; status = irdma_cqp_aeq_cmd(dev, &aeq->sc_aeq, IRDMA_OP_AEQ_CREATE); if (status) goto err; return 0; err: if (aeq->virtual_map) irdma_destroy_virt_aeq(rf); else irdma_free_dma_mem(dev->hw, &aeq->mem); return status; } /** * irdma_setup_aeq - set up the device aeq * @rf: RDMA PCI function * * Create the aeq and configure its msix interrupt vector * Return 0 if successful, otherwise return error */ static int irdma_setup_aeq(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; int status; status = irdma_create_aeq(rf); if (status) return status; status = irdma_cfg_aeq_vector(rf); if (status) { irdma_destroy_aeq(rf); return status; } if (!rf->msix_shared) irdma_ena_intr(dev, rf->iw_msixtbl[0].idx); return 0; } /** * irdma_initialize_ilq - create iwarp local queue for cm * @iwdev: irdma device * * Return 0 if successful, otherwise return error */ static int irdma_initialize_ilq(struct irdma_device *iwdev) { struct irdma_puda_rsrc_info info = {0}; int status; info.type = IRDMA_PUDA_RSRC_TYPE_ILQ; info.cq_id = 1; info.qp_id = 1; info.count = 1; info.pd_id = 1; info.abi_ver = IRDMA_ABI_VER; info.sq_size = min(iwdev->rf->max_qp / 2, (u32)32768); info.rq_size = info.sq_size; info.buf_size = 1024; info.tx_buf_cnt = 2 * info.sq_size; info.receive = irdma_receive_ilq; info.xmit_complete = irdma_free_sqbuf; status = irdma_puda_create_rsrc(&iwdev->vsi, &info); if (status) irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_ERR, "ilq create fail\n"); return status; } /** * irdma_initialize_ieq - create iwarp exception queue * @iwdev: irdma device * * Return 0 if successful, otherwise return error */ static int irdma_initialize_ieq(struct irdma_device *iwdev) { struct irdma_puda_rsrc_info info = {0}; int status; info.type = IRDMA_PUDA_RSRC_TYPE_IEQ; info.cq_id = 2; info.qp_id = iwdev->vsi.exception_lan_q; info.count = 1; info.pd_id = 2; info.abi_ver = IRDMA_ABI_VER; info.sq_size = min(iwdev->rf->max_qp / 2, (u32)32768); info.rq_size = info.sq_size; info.buf_size = iwdev->vsi.mtu + IRDMA_IPV4_PAD; info.tx_buf_cnt = 4096; status = irdma_puda_create_rsrc(&iwdev->vsi, &info); if (status) irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_ERR, "ieq create fail\n"); return status; } /** * irdma_reinitialize_ieq - destroy and re-create ieq * @vsi: VSI structure */ void irdma_reinitialize_ieq(struct irdma_sc_vsi *vsi) { struct irdma_device *iwdev = vsi->back_vsi; struct irdma_pci_f *rf = iwdev->rf; irdma_puda_dele_rsrc(vsi, IRDMA_PUDA_RSRC_TYPE_IEQ, false); if (irdma_initialize_ieq(iwdev)) { iwdev->rf->reset = true; rf->gen_ops.request_reset(rf); } } /** * irdma_hmc_setup - create hmc objects for the device * @rf: RDMA PCI function * * Set up the device private memory space for the number and size of * the hmc objects and create the objects * Return 0 if successful, otherwise return error */ static int irdma_hmc_setup(struct irdma_pci_f *rf) { int status; struct irdma_sc_dev *dev = &rf->sc_dev; u32 qpcnt; qpcnt = rsrc_limits_table[rf->limits_sel].qplimit; rf->sd_type = IRDMA_SD_TYPE_DIRECT; status = irdma_cfg_fpm_val(dev, qpcnt); if (status) return status; status = irdma_create_hmc_objs(rf, true, rf->rdma_ver); return status; } /** * irdma_del_init_mem - deallocate memory resources * @rf: RDMA PCI function */ static void irdma_del_init_mem(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; kfree(dev->hmc_info->sd_table.sd_entry); dev->hmc_info->sd_table.sd_entry = NULL; vfree(rf->mem_rsrc); rf->mem_rsrc = NULL; irdma_free_dma_mem(&rf->hw, &rf->obj_mem); if (rf->rdma_ver != IRDMA_GEN_1) { kfree(rf->allocated_ws_nodes); rf->allocated_ws_nodes = NULL; } mutex_destroy(&dev->ws_mutex); kfree(rf->ceqlist); rf->ceqlist = NULL; kfree(rf->iw_msixtbl); rf->iw_msixtbl = NULL; kfree(rf->hmc_info_mem); rf->hmc_info_mem = NULL; } /** * irdma_initialize_dev - initialize device * @rf: RDMA PCI function * * Allocate memory for the hmc objects and initialize iwdev * Return 0 if successful, otherwise clean up the resources * and return error */ static int irdma_initialize_dev(struct irdma_pci_f *rf) { int status; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_device_init_info info = {0}; struct irdma_dma_mem mem; u32 size; size = sizeof(struct irdma_hmc_pble_rsrc) + sizeof(struct irdma_hmc_info) + (sizeof(struct irdma_hmc_obj_info) * IRDMA_HMC_IW_MAX); rf->hmc_info_mem = kzalloc(size, GFP_KERNEL); if (!rf->hmc_info_mem) return -ENOMEM; rf->pble_rsrc = (struct irdma_hmc_pble_rsrc *)rf->hmc_info_mem; dev->hmc_info = &rf->hw.hmc; dev->hmc_info->hmc_obj = (struct irdma_hmc_obj_info *) (rf->pble_rsrc + 1); status = irdma_obj_aligned_mem(rf, &mem, IRDMA_QUERY_FPM_BUF_SIZE, IRDMA_FPM_QUERY_BUF_ALIGNMENT_M); if (status) goto error; info.fpm_query_buf_pa = mem.pa; info.fpm_query_buf = mem.va; status = irdma_obj_aligned_mem(rf, &mem, IRDMA_COMMIT_FPM_BUF_SIZE, IRDMA_FPM_COMMIT_BUF_ALIGNMENT_M); if (status) goto error; info.fpm_commit_buf_pa = mem.pa; info.fpm_commit_buf = mem.va; info.bar0 = rf->hw.hw_addr; info.hmc_fn_id = rf->peer_info->pf_id; /* * the debug_mask is already assigned at this point through sysctl and so the value shouldn't be overwritten */ info.debug_mask = rf->sc_dev.debug_mask; info.hw = &rf->hw; status = irdma_sc_dev_init(&rf->sc_dev, &info); if (status) goto error; return status; error: kfree(rf->hmc_info_mem); rf->hmc_info_mem = NULL; return status; } /** * irdma_rt_deinit_hw - clean up the irdma device resources * @iwdev: irdma device * * remove the mac ip entry and ipv4/ipv6 addresses, destroy the * device queues and free the pble and the hmc objects */ void irdma_rt_deinit_hw(struct irdma_device *iwdev) { struct irdma_sc_qp qp = {{0}}; irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_INIT, "state = %d\n", iwdev->init_state); switch (iwdev->init_state) { case IP_ADDR_REGISTERED: if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) irdma_del_local_mac_entry(iwdev->rf, (u8)iwdev->mac_ip_table_idx); /* fallthrough */ case AEQ_CREATED: case PBLE_CHUNK_MEM: case CEQS_CREATED: case REM_ENDPOINT_TRK_CREATED: if (iwdev->rf->en_rem_endpoint_trk) { qp.dev = &iwdev->rf->sc_dev; qp.qp_uk.qp_id = IRDMA_REM_ENDPOINT_TRK_QPID; qp.qp_uk.qp_type = IRDMA_QP_TYPE_IWARP; irdma_cqp_qp_destroy_cmd(qp.dev, &qp); } /* fallthrough */ case IEQ_CREATED: if (!iwdev->roce_mode) irdma_puda_dele_rsrc(&iwdev->vsi, IRDMA_PUDA_RSRC_TYPE_IEQ, iwdev->rf->reset); /* fallthrough */ case ILQ_CREATED: if (!iwdev->roce_mode) irdma_puda_dele_rsrc(&iwdev->vsi, IRDMA_PUDA_RSRC_TYPE_ILQ, iwdev->rf->reset); break; default: irdma_dev_warn(&iwdev->ibdev, "bad init_state = %d\n", iwdev->init_state); break; } irdma_cleanup_cm_core(&iwdev->cm_core); if (iwdev->vsi.pestat) { irdma_vsi_stats_free(&iwdev->vsi); kfree(iwdev->vsi.pestat); } if (iwdev->cleanup_wq) destroy_workqueue(iwdev->cleanup_wq); } static int irdma_setup_init_state(struct irdma_pci_f *rf) { int status; status = irdma_save_msix_info(rf); if (status) return status; rf->obj_mem.size = 8192; rf->obj_mem.va = irdma_allocate_dma_mem(&rf->hw, &rf->obj_mem, rf->obj_mem.size, IRDMA_HW_PAGE_SIZE); if (!rf->obj_mem.va) { status = -ENOMEM; goto clean_msixtbl; } rf->obj_next = rf->obj_mem; status = irdma_initialize_dev(rf); if (status) goto clean_obj_mem; return 0; clean_obj_mem: irdma_free_dma_mem(&rf->hw, &rf->obj_mem); clean_msixtbl: kfree(rf->iw_msixtbl); rf->iw_msixtbl = NULL; return status; } /** * irdma_get_used_rsrc - determine resources used internally * @iwdev: irdma device * * Called at the end of open to get all internal allocations */ static void irdma_get_used_rsrc(struct irdma_device *iwdev) { iwdev->rf->used_pds = find_first_zero_bit(iwdev->rf->allocated_pds, iwdev->rf->max_pd); iwdev->rf->used_qps = find_first_zero_bit(iwdev->rf->allocated_qps, iwdev->rf->max_qp); iwdev->rf->used_cqs = find_first_zero_bit(iwdev->rf->allocated_cqs, iwdev->rf->max_cq); iwdev->rf->used_mrs = find_first_zero_bit(iwdev->rf->allocated_mrs, iwdev->rf->max_mr); } void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf) { enum init_completion_state state = rf->init_state; rf->init_state = INVALID_STATE; if (rf->rsrc_created) { irdma_destroy_aeq(rf); irdma_destroy_pble_prm(rf->pble_rsrc); irdma_del_ceqs(rf); rf->rsrc_created = false; } switch (state) { case CEQ0_CREATED: irdma_del_ceq_0(rf); /* fallthrough */ case CCQ_CREATED: irdma_destroy_ccq(rf); /* fallthrough */ case HW_RSRC_INITIALIZED: case HMC_OBJS_CREATED: irdma_del_hmc_objects(&rf->sc_dev, rf->sc_dev.hmc_info, true, rf->reset, rf->rdma_ver); /* fallthrough */ case CQP_CREATED: irdma_destroy_cqp(rf, !rf->reset); /* fallthrough */ case INITIAL_STATE: irdma_del_init_mem(rf); break; case INVALID_STATE: default: irdma_dev_warn(&rf->iwdev->ibdev, "bad init_state = %d\n", rf->init_state); break; } } /** * irdma_rt_init_hw - Initializes runtime portion of HW * @iwdev: irdma device * @l2params: qos, tc, mtu info from netdev driver * * Create device queues ILQ, IEQ, CEQs and PBLEs. Setup irdma * device resource objects. */ int irdma_rt_init_hw(struct irdma_device *iwdev, struct irdma_l2params *l2params) { struct irdma_pci_f *rf = iwdev->rf; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_sc_qp qp = {{0}}; struct irdma_vsi_init_info vsi_info = {0}; struct irdma_vsi_stats_info stats_info = {0}; int status; vsi_info.dev = dev; vsi_info.back_vsi = iwdev; vsi_info.params = l2params; vsi_info.pf_data_vsi_num = iwdev->vsi_num; vsi_info.register_qset = rf->gen_ops.register_qset; vsi_info.unregister_qset = rf->gen_ops.unregister_qset; vsi_info.exception_lan_q = 2; irdma_sc_vsi_init(&iwdev->vsi, &vsi_info); status = irdma_setup_cm_core(iwdev, rf->rdma_ver); if (status) return status; stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL); if (!stats_info.pestat) { irdma_cleanup_cm_core(&iwdev->cm_core); return -ENOMEM; } stats_info.fcn_id = dev->hmc_fn_id; status = irdma_vsi_stats_init(&iwdev->vsi, &stats_info); if (status) { irdma_cleanup_cm_core(&iwdev->cm_core); kfree(stats_info.pestat); return status; } do { if (!iwdev->roce_mode) { status = irdma_initialize_ilq(iwdev); if (status) break; iwdev->init_state = ILQ_CREATED; status = irdma_initialize_ieq(iwdev); if (status) break; iwdev->init_state = IEQ_CREATED; } if (iwdev->rf->en_rem_endpoint_trk) { qp.dev = dev; qp.qp_uk.qp_id = IRDMA_REM_ENDPOINT_TRK_QPID; qp.qp_uk.qp_type = IRDMA_QP_TYPE_IWARP; status = irdma_cqp_qp_create_cmd(dev, &qp); if (status) break; iwdev->init_state = REM_ENDPOINT_TRK_CREATED; } if (!rf->rsrc_created) { status = irdma_setup_ceqs(rf, &iwdev->vsi); if (status) break; iwdev->init_state = CEQS_CREATED; status = irdma_hmc_init_pble(&rf->sc_dev, rf->pble_rsrc); if (status) { irdma_del_ceqs(rf); break; } iwdev->init_state = PBLE_CHUNK_MEM; status = irdma_setup_aeq(rf); if (status) { irdma_destroy_pble_prm(rf->pble_rsrc); irdma_del_ceqs(rf); break; } iwdev->init_state = AEQ_CREATED; rf->rsrc_created = true; } if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) irdma_alloc_set_mac(iwdev); irdma_add_ip(iwdev); iwdev->init_state = IP_ADDR_REGISTERED; /* * handles asynch cleanup tasks - disconnect CM , free qp, free cq bufs */ iwdev->cleanup_wq = alloc_workqueue("irdma-cleanup-wq", WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); if (!iwdev->cleanup_wq) return -ENOMEM; irdma_get_used_rsrc(iwdev); init_waitqueue_head(&iwdev->suspend_wq); return 0; } while (0); dev_err(&rf->pcidev->dev, "HW runtime init FAIL status = %d last cmpl = %d\n", status, iwdev->init_state); irdma_rt_deinit_hw(iwdev); return status; } /** * irdma_ctrl_init_hw - Initializes control portion of HW * @rf: RDMA PCI function * * Create admin queues, HMC obejcts and RF resource objects */ int irdma_ctrl_init_hw(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; int status; do { status = irdma_setup_init_state(rf); if (status) break; rf->init_state = INITIAL_STATE; status = irdma_create_cqp(rf); if (status) break; rf->init_state = CQP_CREATED; dev->feature_info[IRDMA_FEATURE_FW_INFO] = IRDMA_FW_VER_DEFAULT; if (rf->rdma_ver != IRDMA_GEN_1) { status = irdma_get_rdma_features(dev); if (status) break; } status = irdma_hmc_setup(rf); if (status) break; rf->init_state = HMC_OBJS_CREATED; status = irdma_initialize_hw_rsrc(rf); if (status) break; rf->init_state = HW_RSRC_INITIALIZED; status = irdma_create_ccq(rf); if (status) break; rf->init_state = CCQ_CREATED; status = irdma_setup_ceq_0(rf); if (status) break; rf->init_state = CEQ0_CREATED; /* Handles processing of CQP completions */ rf->cqp_cmpl_wq = alloc_ordered_workqueue("cqp_cmpl_wq", WQ_HIGHPRI | WQ_UNBOUND); if (!rf->cqp_cmpl_wq) { status = -ENOMEM; break; } INIT_WORK(&rf->cqp_cmpl_work, cqp_compl_worker); irdma_sc_ccq_arm(dev->ccq); return 0; } while (0); pr_err("IRDMA hardware initialization FAILED init_state=%d status=%d\n", rf->init_state, status); irdma_ctrl_deinit_hw(rf); return status; } /** * irdma_set_hw_rsrc - set hw memory resources. * @rf: RDMA PCI function */ static void irdma_set_hw_rsrc(struct irdma_pci_f *rf) { rf->allocated_qps = (void *)(rf->mem_rsrc + (sizeof(struct irdma_arp_entry) * rf->arp_table_size)); rf->allocated_cqs = &rf->allocated_qps[BITS_TO_LONGS(rf->max_qp)]; rf->allocated_mrs = &rf->allocated_cqs[BITS_TO_LONGS(rf->max_cq)]; rf->allocated_pds = &rf->allocated_mrs[BITS_TO_LONGS(rf->max_mr)]; rf->allocated_ahs = &rf->allocated_pds[BITS_TO_LONGS(rf->max_pd)]; rf->allocated_mcgs = &rf->allocated_ahs[BITS_TO_LONGS(rf->max_ah)]; rf->allocated_arps = &rf->allocated_mcgs[BITS_TO_LONGS(rf->max_mcg)]; rf->qp_table = (struct irdma_qp **) (&rf->allocated_arps[BITS_TO_LONGS(rf->arp_table_size)]); rf->cq_table = (struct irdma_cq **)(&rf->qp_table[rf->max_qp]); spin_lock_init(&rf->rsrc_lock); spin_lock_init(&rf->arp_lock); spin_lock_init(&rf->qptable_lock); spin_lock_init(&rf->cqtable_lock); spin_lock_init(&rf->qh_list_lock); } /** * irdma_calc_mem_rsrc_size - calculate memory resources size. * @rf: RDMA PCI function */ static u32 irdma_calc_mem_rsrc_size(struct irdma_pci_f *rf){ u32 rsrc_size; rsrc_size = sizeof(struct irdma_arp_entry) * rf->arp_table_size; rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_qp); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_mr); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_cq); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_pd); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->arp_table_size); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_ah); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_mcg); rsrc_size += sizeof(struct irdma_qp **) * rf->max_qp; rsrc_size += sizeof(struct irdma_cq **) * rf->max_cq; return rsrc_size; } /** * irdma_initialize_hw_rsrc - initialize hw resource tracking array * @rf: RDMA PCI function */ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf) { u32 rsrc_size; u32 mrdrvbits; u32 ret; if (rf->rdma_ver != IRDMA_GEN_1) { rf->allocated_ws_nodes = kcalloc(BITS_TO_LONGS(IRDMA_MAX_WS_NODES), sizeof(unsigned long), GFP_KERNEL); if (!rf->allocated_ws_nodes) return -ENOMEM; set_bit(0, rf->allocated_ws_nodes); rf->max_ws_node_id = IRDMA_MAX_WS_NODES; } rf->max_cqe = rf->sc_dev.hw_attrs.uk_attrs.max_hw_cq_size; rf->max_qp = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt; rf->max_mr = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt; rf->max_cq = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt; rf->max_pd = rf->sc_dev.hw_attrs.max_hw_pds; rf->arp_table_size = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_ARP].cnt; rf->max_ah = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt; rf->max_mcg = rf->max_qp; rsrc_size = irdma_calc_mem_rsrc_size(rf); rf->mem_rsrc = vzalloc(rsrc_size); if (!rf->mem_rsrc) { ret = -ENOMEM; goto mem_rsrc_vmalloc_fail; } rf->arp_table = (struct irdma_arp_entry *)rf->mem_rsrc; irdma_set_hw_rsrc(rf); set_bit(0, rf->allocated_mrs); set_bit(0, rf->allocated_qps); set_bit(0, rf->allocated_cqs); set_bit(0, rf->allocated_pds); set_bit(0, rf->allocated_arps); set_bit(0, rf->allocated_ahs); set_bit(0, rf->allocated_mcgs); set_bit(2, rf->allocated_qps); /* qp 2 IEQ */ set_bit(1, rf->allocated_qps); /* qp 1 ILQ */ set_bit(IRDMA_REM_ENDPOINT_TRK_QPID, rf->allocated_qps); /* qp 3 Remote Endpt trk */ set_bit(1, rf->allocated_cqs); set_bit(1, rf->allocated_pds); set_bit(2, rf->allocated_cqs); set_bit(2, rf->allocated_pds); INIT_LIST_HEAD(&rf->mc_qht_list.list); /* stag index mask has a minimum of 14 bits */ mrdrvbits = 24 - max(get_count_order(rf->max_mr), 14); rf->mr_stagmask = ~(((1 << mrdrvbits) - 1) << (32 - mrdrvbits)); return 0; mem_rsrc_vmalloc_fail: kfree(rf->allocated_ws_nodes); rf->allocated_ws_nodes = NULL; return ret; } /** * irdma_cqp_ce_handler - handle cqp completions * @rf: RDMA PCI function * @cq: cq for cqp completions */ void irdma_cqp_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq) { struct irdma_cqp_request *cqp_request; struct irdma_sc_dev *dev = &rf->sc_dev; u32 cqe_count = 0; struct irdma_ccq_cqe_info info; unsigned long flags; int ret; do { memset(&info, 0, sizeof(info)); spin_lock_irqsave(&rf->cqp.compl_lock, flags); ret = irdma_sc_ccq_get_cqe_info(cq, &info); spin_unlock_irqrestore(&rf->cqp.compl_lock, flags); if (ret) break; cqp_request = (struct irdma_cqp_request *) (uintptr_t)info.scratch; if (info.error && irdma_cqp_crit_err(dev, cqp_request->info.cqp_cmd, info.maj_err_code, info.min_err_code)) irdma_dev_err(&rf->iwdev->ibdev, "cqp opcode = 0x%x maj_err_code = 0x%x min_err_code = 0x%x\n", info.op_code, info.maj_err_code, info.min_err_code); if (cqp_request) { cqp_request->compl_info.maj_err_code = info.maj_err_code; cqp_request->compl_info.min_err_code = info.min_err_code; cqp_request->compl_info.op_ret_val = info.op_ret_val; cqp_request->compl_info.error = info.error; irdma_complete_cqp_request(&rf->cqp, cqp_request); } cqe_count++; } while (1); if (cqe_count) { irdma_process_bh(dev); irdma_sc_ccq_arm(dev->ccq); } } /** * cqp_compl_worker - Handle cqp completions * @work: Pointer to work structure */ void cqp_compl_worker(struct work_struct *work) { struct irdma_pci_f *rf = container_of(work, struct irdma_pci_f, cqp_cmpl_work); struct irdma_sc_cq *cq = &rf->ccq.sc_cq; irdma_cqp_ce_handler(rf, cq); } /** * irdma_lookup_apbvt_entry - lookup hash table for an existing apbvt entry corresponding to port * @cm_core: cm's core * @port: port to identify apbvt entry */ static struct irdma_apbvt_entry * irdma_lookup_apbvt_entry(struct irdma_cm_core *cm_core, u16 port) { struct irdma_apbvt_entry *entry; HASH_FOR_EACH_POSSIBLE(cm_core->apbvt_hash_tbl, entry, hlist, port) { if (entry->port == port) { entry->use_cnt++; return entry; } } return NULL; } /** * irdma_next_iw_state - modify qp state * @iwqp: iwarp qp to modify * @state: next state for qp * @del_hash: del hash * @term: term message * @termlen: length of term message */ void irdma_next_iw_state(struct irdma_qp *iwqp, u8 state, u8 del_hash, u8 term, u8 termlen) { struct irdma_modify_qp_info info = {0}; info.next_iwarp_state = state; info.remove_hash_idx = del_hash; info.cq_num_valid = true; info.arp_cache_idx_valid = true; info.dont_send_term = true; info.dont_send_fin = true; info.termlen = termlen; if (term & IRDMAQP_TERM_SEND_TERM_ONLY) info.dont_send_term = false; if (term & IRDMAQP_TERM_SEND_FIN_ONLY) info.dont_send_fin = false; if (iwqp->sc_qp.term_flags && state == IRDMA_QP_STATE_ERROR) info.reset_tcp_conn = true; iwqp->hw_iwarp_state = state; irdma_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0); iwqp->iwarp_state = info.next_iwarp_state; } /** * irdma_del_local_mac_entry - remove a mac entry from the hw * table * @rf: RDMA PCI function * @idx: the index of the mac ip address to delete */ void irdma_del_local_mac_entry(struct irdma_pci_f *rf, u16 idx) { struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) return; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = IRDMA_OP_DELETE_LOCAL_MAC_ENTRY; cqp_info->post_sq = 1; cqp_info->in.u.del_local_mac_entry.cqp = &iwcqp->sc_cqp; cqp_info->in.u.del_local_mac_entry.scratch = (uintptr_t)cqp_request; cqp_info->in.u.del_local_mac_entry.entry_idx = idx; cqp_info->in.u.del_local_mac_entry.ignore_ref_count = 0; irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(iwcqp, cqp_request); } /** * irdma_add_local_mac_entry - add a mac ip address entry to the * hw table * @rf: RDMA PCI function * @mac_addr: pointer to mac address * @idx: the index of the mac ip address to add */ int irdma_add_local_mac_entry(struct irdma_pci_f *rf, const u8 *mac_addr, u16 idx) { struct irdma_local_mac_entry_info *info; struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->post_sq = 1; info = &cqp_info->in.u.add_local_mac_entry.info; ether_addr_copy(info->mac_addr, mac_addr); info->entry_idx = idx; cqp_info->in.u.add_local_mac_entry.scratch = (uintptr_t)cqp_request; cqp_info->cqp_cmd = IRDMA_OP_ADD_LOCAL_MAC_ENTRY; cqp_info->in.u.add_local_mac_entry.cqp = &iwcqp->sc_cqp; cqp_info->in.u.add_local_mac_entry.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(iwcqp, cqp_request); return status; } /** * irdma_alloc_local_mac_entry - allocate a mac entry * @rf: RDMA PCI function * @mac_tbl_idx: the index of the new mac address * * Allocate a mac address entry and update the mac_tbl_idx * to hold the index of the newly created mac address * Return 0 if successful, otherwise return error */ int irdma_alloc_local_mac_entry(struct irdma_pci_f *rf, u16 *mac_tbl_idx) { struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status = 0; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY; cqp_info->post_sq = 1; cqp_info->in.u.alloc_local_mac_entry.cqp = &iwcqp->sc_cqp; cqp_info->in.u.alloc_local_mac_entry.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); if (!status) *mac_tbl_idx = (u16)cqp_request->compl_info.op_ret_val; irdma_put_cqp_request(iwcqp, cqp_request); return status; } /** * irdma_cqp_manage_apbvt_cmd - send cqp command manage apbvt * @iwdev: irdma device * @accel_local_port: port for apbvt * @add_port: add ordelete port */ static int irdma_cqp_manage_apbvt_cmd(struct irdma_device *iwdev, u16 accel_local_port, bool add_port) { struct irdma_apbvt_info *info; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, add_port); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; info = &cqp_info->in.u.manage_apbvt_entry.info; memset(info, 0, sizeof(*info)); info->add = add_port; info->port = accel_local_port; cqp_info->cqp_cmd = IRDMA_OP_MANAGE_APBVT_ENTRY; cqp_info->post_sq = 1; cqp_info->in.u.manage_apbvt_entry.cqp = &iwdev->rf->cqp.sc_cqp; cqp_info->in.u.manage_apbvt_entry.scratch = (uintptr_t)cqp_request; irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DEV, "%s: port=0x%04x\n", (!add_port) ? "DELETE" : "ADD", accel_local_port); status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); return status; } /** * irdma_add_apbvt - add tcp port to HW apbvt table * @iwdev: irdma device * @port: port for apbvt */ struct irdma_apbvt_entry * irdma_add_apbvt(struct irdma_device *iwdev, u16 port) { struct irdma_cm_core *cm_core = &iwdev->cm_core; struct irdma_apbvt_entry *entry; unsigned long flags; spin_lock_irqsave(&cm_core->apbvt_lock, flags); entry = irdma_lookup_apbvt_entry(cm_core, port); if (entry) { spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); return entry; } entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) { spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); return NULL; } entry->port = port; entry->use_cnt = 1; HASH_ADD(cm_core->apbvt_hash_tbl, &entry->hlist, entry->port); spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); if (irdma_cqp_manage_apbvt_cmd(iwdev, port, true)) { kfree(entry); return NULL; } return entry; } /** * irdma_del_apbvt - delete tcp port from HW apbvt table * @iwdev: irdma device * @entry: apbvt entry object */ void irdma_del_apbvt(struct irdma_device *iwdev, struct irdma_apbvt_entry *entry) { struct irdma_cm_core *cm_core = &iwdev->cm_core; unsigned long flags; spin_lock_irqsave(&cm_core->apbvt_lock, flags); if (--entry->use_cnt) { spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); return; } HASH_DEL(cm_core->apbvt_hash_tbl, &entry->hlist); /* * apbvt_lock is held across CQP delete APBVT OP (non-waiting) to protect against race where add APBVT CQP can * race ahead of the delete APBVT for same port. */ irdma_cqp_manage_apbvt_cmd(iwdev, entry->port, false); kfree(entry); spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); } /** * irdma_manage_arp_cache - manage hw arp cache * @rf: RDMA PCI function * @mac_addr: mac address ptr * @ip_addr: ip addr for arp cache * @action: add, delete or modify */ void irdma_manage_arp_cache(struct irdma_pci_f *rf, const unsigned char *mac_addr, u32 *ip_addr, u32 action) { struct irdma_add_arp_cache_entry_info *info; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int arp_index; arp_index = irdma_arp_table(rf, ip_addr, mac_addr, action); if (arp_index == -1) return; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, false); if (!cqp_request) return; cqp_info = &cqp_request->info; if (action == IRDMA_ARP_ADD) { cqp_info->cqp_cmd = IRDMA_OP_ADD_ARP_CACHE_ENTRY; info = &cqp_info->in.u.add_arp_cache_entry.info; memset(info, 0, sizeof(*info)); info->arp_index = (u16)arp_index; info->permanent = true; ether_addr_copy(info->mac_addr, mac_addr); cqp_info->in.u.add_arp_cache_entry.scratch = (uintptr_t)cqp_request; cqp_info->in.u.add_arp_cache_entry.cqp = &rf->cqp.sc_cqp; } else { cqp_info->cqp_cmd = IRDMA_OP_DELETE_ARP_CACHE_ENTRY; cqp_info->in.u.del_arp_cache_entry.scratch = (uintptr_t)cqp_request; cqp_info->in.u.del_arp_cache_entry.cqp = &rf->cqp.sc_cqp; cqp_info->in.u.del_arp_cache_entry.arp_index = arp_index; } cqp_info->post_sq = 1; irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); } /** * irdma_send_syn_cqp_callback - do syn/ack after qhash * @cqp_request: qhash cqp completion */ static void irdma_send_syn_cqp_callback(struct irdma_cqp_request *cqp_request) { struct irdma_cm_node *cm_node = cqp_request->param; irdma_send_syn(cm_node, 1); irdma_rem_ref_cm_node(cm_node); } /** * irdma_manage_qhash - add or modify qhash * @iwdev: irdma device * @cminfo: cm info for qhash * @etype: type (syn or quad) * @mtype: type of qhash * @cmnode: cmnode associated with connection * @wait: wait for completion */ int irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo, enum irdma_quad_entry_type etype, enum irdma_quad_hash_manage_type mtype, void *cmnode, bool wait) { struct irdma_qhash_table_info *info; struct irdma_cqp *iwcqp = &iwdev->rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_cm_node *cm_node = cmnode; int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; info = &cqp_info->in.u.manage_qhash_table_entry.info; memset(info, 0, sizeof(*info)); info->vsi = &iwdev->vsi; info->manage = mtype; info->entry_type = etype; if (cminfo->vlan_id < VLAN_N_VID) { info->vlan_valid = true; info->vlan_id = cminfo->vlan_id; } else { info->vlan_valid = false; } info->ipv4_valid = cminfo->ipv4; info->user_pri = cminfo->user_pri; - ether_addr_copy(info->mac_addr, IF_LLADDR(iwdev->netdev)); + ether_addr_copy(info->mac_addr, if_getlladdr(iwdev->netdev)); info->qp_num = cminfo->qh_qpid; info->dest_port = cminfo->loc_port; info->dest_ip[0] = cminfo->loc_addr[0]; info->dest_ip[1] = cminfo->loc_addr[1]; info->dest_ip[2] = cminfo->loc_addr[2]; info->dest_ip[3] = cminfo->loc_addr[3]; if (etype == IRDMA_QHASH_TYPE_TCP_ESTABLISHED || etype == IRDMA_QHASH_TYPE_UDP_UNICAST || etype == IRDMA_QHASH_TYPE_UDP_MCAST || etype == IRDMA_QHASH_TYPE_ROCE_MCAST || etype == IRDMA_QHASH_TYPE_ROCEV2_HW) { info->src_port = cminfo->rem_port; info->src_ip[0] = cminfo->rem_addr[0]; info->src_ip[1] = cminfo->rem_addr[1]; info->src_ip[2] = cminfo->rem_addr[2]; info->src_ip[3] = cminfo->rem_addr[3]; } if (cmnode) { cqp_request->callback_fcn = irdma_send_syn_cqp_callback; cqp_request->param = cmnode; if (!wait) atomic_inc(&cm_node->refcnt); } if (info->ipv4_valid) irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "%s caller: %pS loc_port=0x%04x rem_port=0x%04x loc_addr=%pI4 rem_addr=%pI4 mac=%pM, vlan_id=%d cm_node=%p\n", (!mtype) ? "DELETE" : "ADD", __builtin_return_address(0), info->dest_port, info->src_port, info->dest_ip, info->src_ip, info->mac_addr, cminfo->vlan_id, cmnode ? cmnode : NULL); else irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "%s caller: %pS loc_port=0x%04x rem_port=0x%04x loc_addr=%pI6 rem_addr=%pI6 mac=%pM, vlan_id=%d cm_node=%p\n", (!mtype) ? "DELETE" : "ADD", __builtin_return_address(0), info->dest_port, info->src_port, info->dest_ip, info->src_ip, info->mac_addr, cminfo->vlan_id, cmnode ? cmnode : NULL); cqp_info->in.u.manage_qhash_table_entry.cqp = &iwdev->rf->cqp.sc_cqp; cqp_info->in.u.manage_qhash_table_entry.scratch = (uintptr_t)cqp_request; cqp_info->cqp_cmd = IRDMA_OP_MANAGE_QHASH_TABLE_ENTRY; cqp_info->post_sq = 1; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); if (status && cm_node && !wait) irdma_rem_ref_cm_node(cm_node); irdma_put_cqp_request(iwcqp, cqp_request); return status; } /** * irdma_hw_flush_wqes - flush qp's wqe * @rf: RDMA PCI function * @qp: hardware control qp * @info: info for flush * @wait: flag wait for completion */ int irdma_hw_flush_wqes(struct irdma_pci_f *rf, struct irdma_sc_qp *qp, struct irdma_qp_flush_info *info, bool wait) { int status; struct irdma_qp_flush_info *hw_info; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_qp *iwqp = qp->qp_uk.back_qp; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; hw_info = &cqp_request->info.in.u.qp_flush_wqes.info; memcpy(hw_info, info, sizeof(*hw_info)); cqp_info->cqp_cmd = IRDMA_OP_QP_FLUSH_WQES; cqp_info->post_sq = 1; cqp_info->in.u.qp_flush_wqes.qp = qp; cqp_info->in.u.qp_flush_wqes.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); if (status) { qp->qp_uk.sq_flush_complete = true; qp->qp_uk.rq_flush_complete = true; irdma_put_cqp_request(&rf->cqp, cqp_request); return status; } if (!wait || cqp_request->compl_info.maj_err_code) goto put_cqp; if (info->rq) { if (cqp_request->compl_info.min_err_code == IRDMA_CQP_COMPL_SQ_WQE_FLUSHED || cqp_request->compl_info.min_err_code == 0) { /* RQ WQE flush was requested but did not happen */ qp->qp_uk.rq_flush_complete = true; } } if (info->sq) { if (cqp_request->compl_info.min_err_code == IRDMA_CQP_COMPL_RQ_WQE_FLUSHED || cqp_request->compl_info.min_err_code == 0) { /* SQ WQE flush was requested but did not happen */ qp->qp_uk.sq_flush_complete = true; } } irdma_debug(&rf->sc_dev, IRDMA_DEBUG_VERBS, "qp_id=%d qp_type=%d qpstate=%d ibqpstate=%d last_aeq=%d hw_iw_state=%d maj_err_code=%d min_err_code=%d\n", iwqp->ibqp.qp_num, rf->protocol_used, iwqp->iwarp_state, iwqp->ibqp_state, iwqp->last_aeq, iwqp->hw_iwarp_state, cqp_request->compl_info.maj_err_code, cqp_request->compl_info.min_err_code); put_cqp: irdma_put_cqp_request(&rf->cqp, cqp_request); return status; } /** * irdma_gen_ae - generate AE * @rf: RDMA PCI function * @qp: qp associated with AE * @info: info for ae * @wait: wait for completion */ void irdma_gen_ae(struct irdma_pci_f *rf, struct irdma_sc_qp *qp, struct irdma_gen_ae_info *info, bool wait) { struct irdma_gen_ae_info *ae_info; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait); if (!cqp_request) return; cqp_info = &cqp_request->info; ae_info = &cqp_request->info.in.u.gen_ae.info; memcpy(ae_info, info, sizeof(*ae_info)); cqp_info->cqp_cmd = IRDMA_OP_GEN_AE; cqp_info->post_sq = 1; cqp_info->in.u.gen_ae.qp = qp; cqp_info->in.u.gen_ae.scratch = (uintptr_t)cqp_request; irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); } void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask) { struct irdma_qp_flush_info info = {0}; struct irdma_pci_f *rf = iwqp->iwdev->rf; u8 flush_code = iwqp->sc_qp.flush_code; if (!(flush_mask & IRDMA_FLUSH_SQ) && !(flush_mask & IRDMA_FLUSH_RQ)) return; /* Set flush info fields */ info.sq = flush_mask & IRDMA_FLUSH_SQ; info.rq = flush_mask & IRDMA_FLUSH_RQ; /* Generate userflush errors in CQE */ info.sq_major_code = IRDMA_FLUSH_MAJOR_ERR; info.sq_minor_code = FLUSH_GENERAL_ERR; info.rq_major_code = IRDMA_FLUSH_MAJOR_ERR; info.rq_minor_code = FLUSH_GENERAL_ERR; info.userflushcode = true; if (flush_mask & IRDMA_REFLUSH) { if (info.sq) iwqp->sc_qp.flush_sq = false; if (info.rq) iwqp->sc_qp.flush_rq = false; } else { if (flush_code) { if (info.sq && iwqp->sc_qp.sq_flush_code) info.sq_minor_code = flush_code; if (info.rq && iwqp->sc_qp.rq_flush_code) info.rq_minor_code = flush_code; } if (irdma_upload_context && irdma_upload_qp_context(iwqp, 0, 1)) irdma_dev_warn(&iwqp->iwdev->ibdev, "failed to upload QP context\n"); if (!iwqp->user_mode) irdma_sched_qp_flush_work(iwqp); } /* Issue flush */ (void)irdma_hw_flush_wqes(rf, &iwqp->sc_qp, &info, flush_mask & IRDMA_FLUSH_WAIT); iwqp->flush_issued = true; } diff --git a/sys/dev/irdma/irdma_kcompat.c b/sys/dev/irdma/irdma_kcompat.c index 730c7e73bf5a..b2897b30817c 100644 --- a/sys/dev/irdma/irdma_kcompat.c +++ b/sys/dev/irdma/irdma_kcompat.c @@ -1,2381 +1,2383 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * * Copyright (c) 2018 - 2022 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /*$FreeBSD$*/ #include "irdma_main.h" #define IRDMA_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000) static u16 kc_rdma_flow_label_to_udp_sport(u32 fl) { u32 fl_low = fl & 0x03FFF; u32 fl_high = fl & 0xFC000; fl_low ^= fl_high >> 14; return (u16)(fl_low | IRDMA_ROCE_UDP_ENCAP_VALID_PORT_MIN); } #define IRDMA_GRH_FLOWLABEL_MASK (0x000FFFFF) static u32 kc_rdma_calc_flow_label(u32 lqpn, u32 rqpn) { u64 fl = (u64)lqpn * rqpn; fl ^= fl >> 20; fl ^= fl >> 40; return (u32)(fl & IRDMA_GRH_FLOWLABEL_MASK); } u16 kc_rdma_get_udp_sport(u32 fl, u32 lqpn, u32 rqpn) { if (!fl) fl = kc_rdma_calc_flow_label(lqpn, rqpn); return kc_rdma_flow_label_to_udp_sport(fl); } void irdma_get_dev_fw_str(struct ib_device *dev, char *str, size_t str_len) { struct irdma_device *iwdev = to_iwdev(dev); snprintf(str, str_len, "%u.%u", irdma_fw_major_ver(&iwdev->rf->sc_dev), irdma_fw_minor_ver(&iwdev->rf->sc_dev)); } int irdma_add_gid(struct ib_device *device, u8 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr, void **context) { return 0; } int irdma_del_gid(struct ib_device *device, u8 port_num, unsigned int index, void **context) { return 0; } #if __FreeBSD_version >= 1400026 /** * irdma_alloc_mr - register stag for fast memory registration * @pd: ibpd pointer * @mr_type: memory for stag registrion * @max_num_sg: man number of pages * @udata: user data */ struct ib_mr * irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg, struct ib_udata *udata) { #else /** * irdma_alloc_mr - register stag for fast memory registration * @pd: ibpd pointer * @mr_type: memory for stag registrion * @max_num_sg: man number of pages */ struct ib_mr * irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg) { #endif struct irdma_device *iwdev = to_iwdev(pd->device); struct irdma_pble_alloc *palloc; struct irdma_pbl *iwpbl; struct irdma_mr *iwmr; int status; u32 stag; int err_code = -ENOMEM; iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); if (!iwmr) return ERR_PTR(-ENOMEM); stag = irdma_create_stag(iwdev); if (!stag) { err_code = -ENOMEM; goto err; } iwmr->stag = stag; iwmr->ibmr.rkey = stag; iwmr->ibmr.lkey = stag; iwmr->ibmr.pd = pd; iwmr->ibmr.device = pd->device; iwpbl = &iwmr->iwpbl; iwpbl->iwmr = iwmr; iwmr->type = IRDMA_MEMREG_TYPE_MEM; palloc = &iwpbl->pble_alloc; iwmr->page_cnt = max_num_sg; /* Assume system PAGE_SIZE as the sg page sizes are unknown. */ iwmr->len = max_num_sg * PAGE_SIZE; status = irdma_get_pble(iwdev->rf->pble_rsrc, palloc, iwmr->page_cnt, false); if (status) goto err_get_pble; err_code = irdma_hw_alloc_stag(iwdev, iwmr); if (err_code) goto err_alloc_stag; iwpbl->pbl_allocated = true; return &iwmr->ibmr; err_alloc_stag: irdma_free_pble(iwdev->rf->pble_rsrc, palloc); err_get_pble: irdma_free_stag(iwdev, stag); err: kfree(iwmr); return ERR_PTR(err_code); } #define IRDMA_ALLOC_UCTX_MIN_REQ_LEN offsetofend(struct irdma_alloc_ucontext_req, rsvd8) #define IRDMA_ALLOC_UCTX_MIN_RESP_LEN offsetofend(struct irdma_alloc_ucontext_resp, rsvd) #if __FreeBSD_version >= 1400026 /** * irdma_alloc_ucontext - Allocate the user context data structure * @uctx: context * @udata: user data * * This keeps track of all objects associated with a particular * user-mode client. */ int irdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { struct ib_device *ibdev = uctx->device; struct irdma_device *iwdev = to_iwdev(ibdev); struct irdma_alloc_ucontext_req req = {0}; struct irdma_alloc_ucontext_resp uresp = {0}; struct irdma_ucontext *ucontext = to_ucontext(uctx); struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; if (udata->inlen < IRDMA_ALLOC_UCTX_MIN_REQ_LEN || udata->outlen < IRDMA_ALLOC_UCTX_MIN_RESP_LEN) return -EINVAL; if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) return -EINVAL; if (req.userspace_ver < 4 || req.userspace_ver > IRDMA_ABI_VER) goto ver_error; ucontext->iwdev = iwdev; ucontext->abi_ver = req.userspace_ver; if (req.comp_mask & IRDMA_ALLOC_UCTX_USE_RAW_ATTR) ucontext->use_raw_attrs = true; /* GEN_1 support for libi40iw */ if (udata->outlen == IRDMA_ALLOC_UCTX_MIN_RESP_LEN) { if (uk_attrs->hw_rev != IRDMA_GEN_1) return -EOPNOTSUPP; ucontext->legacy_mode = true; uresp.max_qps = iwdev->rf->max_qp; uresp.max_pds = iwdev->rf->sc_dev.hw_attrs.max_hw_pds; uresp.wq_size = iwdev->rf->sc_dev.hw_attrs.max_qp_wr * 2; uresp.kernel_ver = req.userspace_ver; if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) return -EFAULT; } else { u64 bar_off; uresp.kernel_ver = IRDMA_ABI_VER; uresp.feature_flags = uk_attrs->feature_flags; uresp.max_hw_wq_frags = uk_attrs->max_hw_wq_frags; uresp.max_hw_read_sges = uk_attrs->max_hw_read_sges; uresp.max_hw_inline = uk_attrs->max_hw_inline; uresp.max_hw_rq_quanta = uk_attrs->max_hw_rq_quanta; uresp.max_hw_wq_quanta = uk_attrs->max_hw_wq_quanta; uresp.max_hw_sq_chunk = uk_attrs->max_hw_sq_chunk; uresp.max_hw_cq_size = uk_attrs->max_hw_cq_size; uresp.min_hw_cq_size = uk_attrs->min_hw_cq_size; uresp.hw_rev = uk_attrs->hw_rev; uresp.comp_mask |= IRDMA_ALLOC_UCTX_USE_RAW_ATTR; bar_off = (uintptr_t)iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET]; ucontext->db_mmap_entry = irdma_user_mmap_entry_insert(ucontext, bar_off, IRDMA_MMAP_IO_NC, &uresp.db_mmap_key); if (!ucontext->db_mmap_entry) { return -ENOMEM; } if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) { rdma_user_mmap_entry_remove(ucontext->db_mmap_entry); return -EFAULT; } } INIT_LIST_HEAD(&ucontext->cq_reg_mem_list); spin_lock_init(&ucontext->cq_reg_mem_list_lock); INIT_LIST_HEAD(&ucontext->qp_reg_mem_list); spin_lock_init(&ucontext->qp_reg_mem_list_lock); INIT_LIST_HEAD(&ucontext->vma_list); mutex_init(&ucontext->vma_list_mutex); return 0; ver_error: irdma_dev_err(&iwdev->ibdev, "Invalid userspace driver version detected. Detected version %d, should be %d\n", req.userspace_ver, IRDMA_ABI_VER); return -EINVAL; } #endif #if __FreeBSD_version < 1400026 /** * irdma_alloc_ucontext - Allocate the user context data structure * @ibdev: ib device pointer * @udata: user data * * This keeps track of all objects associated with a particular * user-mode client. */ struct ib_ucontext * irdma_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { struct irdma_device *iwdev = to_iwdev(ibdev); struct irdma_alloc_ucontext_req req = {0}; struct irdma_alloc_ucontext_resp uresp = {0}; struct irdma_ucontext *ucontext; struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; if (udata->inlen < IRDMA_ALLOC_UCTX_MIN_REQ_LEN || udata->outlen < IRDMA_ALLOC_UCTX_MIN_RESP_LEN) return ERR_PTR(-EINVAL); if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) return ERR_PTR(-EINVAL); if (req.userspace_ver < 4 || req.userspace_ver > IRDMA_ABI_VER) goto ver_error; ucontext = kzalloc(sizeof(*ucontext), GFP_KERNEL); if (!ucontext) return ERR_PTR(-ENOMEM); ucontext->iwdev = iwdev; ucontext->abi_ver = req.userspace_ver; if (req.comp_mask & IRDMA_ALLOC_UCTX_USE_RAW_ATTR) ucontext->use_raw_attrs = true; /* GEN_1 legacy support with libi40iw */ if (udata->outlen == IRDMA_ALLOC_UCTX_MIN_RESP_LEN) { if (uk_attrs->hw_rev != IRDMA_GEN_1) { kfree(ucontext); return ERR_PTR(-EOPNOTSUPP); } ucontext->legacy_mode = true; uresp.max_qps = iwdev->rf->max_qp; uresp.max_pds = iwdev->rf->sc_dev.hw_attrs.max_hw_pds; uresp.wq_size = iwdev->rf->sc_dev.hw_attrs.max_qp_wr * 2; uresp.kernel_ver = req.userspace_ver; if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) { kfree(ucontext); return ERR_PTR(-EFAULT); } } else { u64 bar_off; uresp.kernel_ver = IRDMA_ABI_VER; uresp.feature_flags = uk_attrs->feature_flags; uresp.max_hw_wq_frags = uk_attrs->max_hw_wq_frags; uresp.max_hw_read_sges = uk_attrs->max_hw_read_sges; uresp.max_hw_inline = uk_attrs->max_hw_inline; uresp.max_hw_rq_quanta = uk_attrs->max_hw_rq_quanta; uresp.max_hw_wq_quanta = uk_attrs->max_hw_wq_quanta; uresp.max_hw_sq_chunk = uk_attrs->max_hw_sq_chunk; uresp.max_hw_cq_size = uk_attrs->max_hw_cq_size; uresp.min_hw_cq_size = uk_attrs->min_hw_cq_size; uresp.hw_rev = uk_attrs->hw_rev; uresp.comp_mask |= IRDMA_ALLOC_UCTX_USE_RAW_ATTR; bar_off = (uintptr_t)iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET]; spin_lock_init(&ucontext->mmap_tbl_lock); ucontext->db_mmap_entry = irdma_user_mmap_entry_add_hash(ucontext, bar_off, IRDMA_MMAP_IO_NC, &uresp.db_mmap_key); if (!ucontext->db_mmap_entry) { spin_lock_destroy(&ucontext->mmap_tbl_lock); kfree(ucontext); return ERR_PTR(-ENOMEM); } if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) { irdma_user_mmap_entry_del_hash(ucontext->db_mmap_entry); spin_lock_destroy(&ucontext->mmap_tbl_lock); kfree(ucontext); return ERR_PTR(-EFAULT); } } INIT_LIST_HEAD(&ucontext->cq_reg_mem_list); spin_lock_init(&ucontext->cq_reg_mem_list_lock); INIT_LIST_HEAD(&ucontext->qp_reg_mem_list); spin_lock_init(&ucontext->qp_reg_mem_list_lock); INIT_LIST_HEAD(&ucontext->vma_list); mutex_init(&ucontext->vma_list_mutex); return &ucontext->ibucontext; ver_error: irdma_dev_err(&iwdev->ibdev, "Invalid userspace driver version detected. Detected version %d, should be %d\n", req.userspace_ver, IRDMA_ABI_VER); return ERR_PTR(-EINVAL); } #endif #if __FreeBSD_version >= 1400026 /** * irdma_dealloc_ucontext - deallocate the user context data structure * @context: user context created during alloc */ void irdma_dealloc_ucontext(struct ib_ucontext *context) { struct irdma_ucontext *ucontext = to_ucontext(context); rdma_user_mmap_entry_remove(ucontext->db_mmap_entry); return; } #endif #if __FreeBSD_version < 1400026 /** * irdma_dealloc_ucontext - deallocate the user context data structure * @context: user context created during alloc */ int irdma_dealloc_ucontext(struct ib_ucontext *context) { struct irdma_ucontext *ucontext = to_ucontext(context); irdma_user_mmap_entry_del_hash(ucontext->db_mmap_entry); spin_lock_destroy(&ucontext->mmap_tbl_lock); kfree(ucontext); return 0; } #endif #define IRDMA_ALLOC_PD_MIN_RESP_LEN offsetofend(struct irdma_alloc_pd_resp, rsvd) #if __FreeBSD_version >= 1400026 /** * irdma_alloc_pd - allocate protection domain * @pd: protection domain * @udata: user data */ int irdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct irdma_pd *iwpd = to_iwpd(pd); struct irdma_device *iwdev = to_iwdev(pd->device); struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; struct irdma_pci_f *rf = iwdev->rf; struct irdma_alloc_pd_resp uresp = {0}; struct irdma_sc_pd *sc_pd; u32 pd_id = 0; int err; if (udata && udata->outlen < IRDMA_ALLOC_PD_MIN_RESP_LEN) return -EINVAL; err = irdma_alloc_rsrc(rf, rf->allocated_pds, rf->max_pd, &pd_id, &rf->next_pd); if (err) return err; sc_pd = &iwpd->sc_pd; if (udata) { struct irdma_ucontext *ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); irdma_sc_pd_init(dev, sc_pd, pd_id, ucontext->abi_ver); uresp.pd_id = pd_id; if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) { err = -EFAULT; goto error; } } else { irdma_sc_pd_init(dev, sc_pd, pd_id, IRDMA_ABI_VER); } spin_lock_init(&iwpd->udqp_list_lock); INIT_LIST_HEAD(&iwpd->udqp_list); return 0; error: irdma_free_rsrc(rf, rf->allocated_pds, pd_id); return err; } #endif #if __FreeBSD_version < 1400026 /** * irdma_alloc_pd - allocate protection domain * @ibdev: IB device * @context: user context * @udata: user data */ struct ib_pd * irdma_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) { struct irdma_pd *iwpd; struct irdma_device *iwdev = to_iwdev(ibdev); struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; struct irdma_pci_f *rf = iwdev->rf; struct irdma_alloc_pd_resp uresp = {0}; struct irdma_sc_pd *sc_pd; u32 pd_id = 0; int err; err = irdma_alloc_rsrc(rf, rf->allocated_pds, rf->max_pd, &pd_id, &rf->next_pd); if (err) return ERR_PTR(err); iwpd = kzalloc(sizeof(*iwpd), GFP_KERNEL); if (!iwpd) { err = -ENOMEM; goto free_res; } sc_pd = &iwpd->sc_pd; if (udata) { struct irdma_ucontext *ucontext = to_ucontext(context); irdma_sc_pd_init(dev, sc_pd, pd_id, ucontext->abi_ver); uresp.pd_id = pd_id; if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) { err = -EFAULT; goto error; } } else { irdma_sc_pd_init(dev, sc_pd, pd_id, IRDMA_ABI_VER); } spin_lock_init(&iwpd->udqp_list_lock); INIT_LIST_HEAD(&iwpd->udqp_list); return &iwpd->ibpd; error: kfree(iwpd); free_res: irdma_free_rsrc(rf, rf->allocated_pds, pd_id); return ERR_PTR(err); } #endif #if __FreeBSD_version >= 1400026 void irdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct irdma_pd *iwpd = to_iwpd(ibpd); struct irdma_device *iwdev = to_iwdev(ibpd->device); irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_pds, iwpd->sc_pd.pd_id); } #endif #if __FreeBSD_version < 1400026 int irdma_dealloc_pd(struct ib_pd *ibpd) { struct irdma_pd *iwpd = to_iwpd(ibpd); struct irdma_device *iwdev = to_iwdev(ibpd->device); irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_pds, iwpd->sc_pd.pd_id); kfree(iwpd); return 0; } #endif /** * irdma_find_qp_update_qs - update QS handle for UD QPs * @rf: RDMA PCI function * @pd: protection domain object * @user_pri: selected user priority */ static void irdma_find_qp_update_qs(struct irdma_pci_f *rf, struct irdma_pd *pd, u8 user_pri) { struct irdma_qp *iwqp; struct list_head *tmp_node, *list_node; struct irdma_udqs_work *work; unsigned long flags; bool qs_change; spin_lock_irqsave(&pd->udqp_list_lock, flags); list_for_each_safe(list_node, tmp_node, &pd->udqp_list) { qs_change = true; iwqp = list_entry(list_node, struct irdma_qp, ud_list_elem); irdma_qp_add_ref(&iwqp->ibqp); /* check if qs_handle needs to be changed */ if (iwqp->sc_qp.qs_handle == iwqp->sc_qp.vsi->qos[user_pri].qs_handle) { if (iwqp->ctx_info.user_pri == user_pri) { /* qs_handle and user_pri don't change */ irdma_qp_rem_ref(&iwqp->ibqp); continue; } qs_change = false; } /* perform qp qos change */ work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) { irdma_qp_rem_ref(&iwqp->ibqp); spin_unlock_irqrestore(&pd->udqp_list_lock, flags); return; } work->iwqp = iwqp; work->user_prio = user_pri; work->qs_change = qs_change; INIT_WORK(&work->work, irdma_udqp_qs_worker); if (qs_change) irdma_cqp_qp_suspend_resume(&iwqp->sc_qp, IRDMA_OP_SUSPEND); queue_work(rf->iwdev->cleanup_wq, &work->work); } spin_unlock_irqrestore(&pd->udqp_list_lock, flags); } static void irdma_fill_ah_info(struct vnet *vnet, struct irdma_ah_info *ah_info, const struct ib_gid_attr *sgid_attr, struct sockaddr *sgid_addr, struct sockaddr *dgid_addr, u8 *dmac, u8 net_type) { if (net_type == RDMA_NETWORK_IPV4) { ah_info->ipv4_valid = true; ah_info->dest_ip_addr[0] = ntohl(((struct sockaddr_in *)dgid_addr)->sin_addr.s_addr); ah_info->src_ip_addr[0] = ntohl(((struct sockaddr_in *)sgid_addr)->sin_addr.s_addr); CURVNET_SET_QUIET(vnet); ah_info->do_lpbk = irdma_ipv4_is_lpb(ah_info->src_ip_addr[0], ah_info->dest_ip_addr[0]); CURVNET_RESTORE(); if (ipv4_is_multicast(((struct sockaddr_in *)dgid_addr)->sin_addr.s_addr)) { irdma_mcast_mac_v4(ah_info->dest_ip_addr, dmac); } } else { irdma_copy_ip_ntohl(ah_info->dest_ip_addr, ((struct sockaddr_in6 *)dgid_addr)->sin6_addr.__u6_addr.__u6_addr32); irdma_copy_ip_ntohl(ah_info->src_ip_addr, ((struct sockaddr_in6 *)sgid_addr)->sin6_addr.__u6_addr.__u6_addr32); ah_info->do_lpbk = irdma_ipv6_is_lpb(ah_info->src_ip_addr, ah_info->dest_ip_addr); if (rdma_is_multicast_addr(&((struct sockaddr_in6 *)dgid_addr)->sin6_addr)) { irdma_mcast_mac_v6(ah_info->dest_ip_addr, dmac); } } } -static inline u8 irdma_get_vlan_ndev_prio(struct ifnet *ndev, u8 prio){ +static inline u8 irdma_get_vlan_ndev_prio(if_t ndev, u8 prio) +{ return prio; } static int irdma_create_ah_vlan_tag(struct irdma_device *iwdev, struct irdma_pd *pd, struct irdma_ah_info *ah_info, const struct ib_gid_attr *sgid_attr, u8 *dmac) { u16 vlan_prio; if (sgid_attr->ndev && is_vlan_dev(sgid_attr->ndev)) ah_info->vlan_tag = vlan_dev_vlan_id(sgid_attr->ndev); else ah_info->vlan_tag = VLAN_N_VID; ah_info->dst_arpindex = irdma_add_arp(iwdev->rf, ah_info->dest_ip_addr, dmac); if (ah_info->dst_arpindex == -1) return -EINVAL; if (ah_info->vlan_tag >= VLAN_N_VID && iwdev->dcb_vlan_mode) ah_info->vlan_tag = 0; if (ah_info->vlan_tag < VLAN_N_VID) { - struct ifnet *ndev = sgid_attr->ndev; + if_t ndev = sgid_attr->ndev; ah_info->insert_vlan_tag = true; vlan_prio = (u16)irdma_get_vlan_ndev_prio(ndev, rt_tos2priority(ah_info->tc_tos)); ah_info->vlan_tag |= vlan_prio << VLAN_PRIO_SHIFT; irdma_find_qp_update_qs(iwdev->rf, pd, vlan_prio); } if (iwdev->roce_dcqcn_en) { ah_info->tc_tos &= ~ECN_CODE_PT_MASK; ah_info->tc_tos |= ECN_CODE_PT_VAL; } return 0; } static int irdma_create_ah_wait(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, bool sleep) { if (!sleep) { int cnt = rf->sc_dev.hw_attrs.max_cqp_compl_wait_time_ms * CQP_TIMEOUT_THRESHOLD; do { irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq); mdelay(1); } while (!sc_ah->ah_info.ah_valid && --cnt); if (!cnt) return -ETIMEDOUT; } return 0; } #define IRDMA_CREATE_AH_MIN_RESP_LEN offsetofend(struct irdma_create_ah_resp, rsvd) #if __FreeBSD_version >= 1400026 /** * irdma_create_ah - create address handle * @ib_ah: ptr to AH * @attr: address handle attributes * @flags: AH flags to wait * @udata: user data * * returns 0 on success, error otherwise */ int irdma_create_ah(struct ib_ah *ib_ah, struct ib_ah_attr *attr, u32 flags, struct ib_udata *udata) { struct irdma_pd *pd = to_iwpd(ib_ah->pd); struct irdma_ah *ah = container_of(ib_ah, struct irdma_ah, ibah); struct irdma_device *iwdev = to_iwdev(ib_ah->pd->device); union ib_gid sgid; struct ib_gid_attr sgid_attr; struct irdma_pci_f *rf = iwdev->rf; struct irdma_sc_ah *sc_ah; u32 ah_id = 0; struct irdma_ah_info *ah_info; struct irdma_create_ah_resp uresp; union { struct sockaddr saddr; struct sockaddr_in saddr_in; struct sockaddr_in6 saddr_in6; } sgid_addr, dgid_addr; int err; u8 dmac[ETH_ALEN]; bool sleep = (flags & RDMA_CREATE_AH_SLEEPABLE) != 0; if (udata && udata->outlen < IRDMA_CREATE_AH_MIN_RESP_LEN) return -EINVAL; err = irdma_alloc_rsrc(rf, rf->allocated_ahs, rf->max_ah, &ah_id, &rf->next_ah); if (err) return err; ah->pd = pd; sc_ah = &ah->sc_ah; sc_ah->ah_info.ah_idx = ah_id; sc_ah->ah_info.vsi = &iwdev->vsi; irdma_sc_init_ah(&rf->sc_dev, sc_ah); ah->sgid_index = attr->grh.sgid_index; memcpy(&ah->dgid, &attr->grh.dgid, sizeof(ah->dgid)); rcu_read_lock(); err = ib_get_cached_gid(&iwdev->ibdev, attr->port_num, attr->grh.sgid_index, &sgid, &sgid_attr); rcu_read_unlock(); if (err) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "GID lookup at idx=%d with port=%d failed\n", attr->grh.sgid_index, attr->port_num); err = -EINVAL; goto err_gid_l2; } rdma_gid2ip((struct sockaddr *)&sgid_addr, &sgid); rdma_gid2ip((struct sockaddr *)&dgid_addr, &attr->grh.dgid); ah->av.attrs = *attr; ah->av.net_type = kc_rdma_gid_attr_network_type(sgid_attr, sgid_attr.gid_type, &sgid); if (sgid_attr.ndev) dev_put(sgid_attr.ndev); ah->av.sgid_addr.saddr = sgid_addr.saddr; ah->av.dgid_addr.saddr = dgid_addr.saddr; ah_info = &sc_ah->ah_info; ah_info->ah_idx = ah_id; ah_info->pd_idx = pd->sc_pd.pd_id; - ether_addr_copy(ah_info->mac_addr, IF_LLADDR(iwdev->netdev)); + ether_addr_copy(ah_info->mac_addr, if_getlladdr(iwdev->netdev)); if (attr->ah_flags & IB_AH_GRH) { ah_info->flow_label = attr->grh.flow_label; ah_info->hop_ttl = attr->grh.hop_limit; ah_info->tc_tos = attr->grh.traffic_class; } ether_addr_copy(dmac, attr->dmac); - irdma_fill_ah_info(iwdev->netdev->if_vnet, ah_info, &sgid_attr, &sgid_addr.saddr, &dgid_addr.saddr, + irdma_fill_ah_info(if_getvnet(iwdev->netdev), ah_info, &sgid_attr, &sgid_addr.saddr, &dgid_addr.saddr, dmac, ah->av.net_type); err = irdma_create_ah_vlan_tag(iwdev, pd, ah_info, &sgid_attr, dmac); if (err) goto err_gid_l2; err = irdma_ah_cqp_op(iwdev->rf, sc_ah, IRDMA_OP_AH_CREATE, sleep, irdma_gsi_ud_qp_ah_cb, sc_ah); if (err) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DEV, "CQP-OP Create AH fail"); goto err_gid_l2; } err = irdma_create_ah_wait(rf, sc_ah, sleep); if (err) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DEV, "CQP create AH timed out"); goto err_gid_l2; } if (udata) { uresp.ah_id = ah->sc_ah.ah_info.ah_idx; err = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (err) { irdma_ah_cqp_op(iwdev->rf, &ah->sc_ah, IRDMA_OP_AH_DESTROY, false, NULL, ah); goto err_gid_l2; } } return 0; err_gid_l2: irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs, ah_id); return err; } #endif void irdma_ether_copy(u8 *dmac, struct ib_ah_attr *attr) { ether_addr_copy(dmac, attr->dmac); } #if __FreeBSD_version < 1400026 struct ib_ah * irdma_create_ah_stub(struct ib_pd *ibpd, struct ib_ah_attr *attr, struct ib_udata *udata) #else int irdma_create_ah_stub(struct ib_ah *ib_ah, struct ib_ah_attr *attr, u32 flags, struct ib_udata *udata) #endif { #if __FreeBSD_version >= 1400026 return -ENOSYS; #else return ERR_PTR(-ENOSYS); #endif } #if __FreeBSD_version >= 1400026 void irdma_destroy_ah_stub(struct ib_ah *ibah, u32 flags) { return; } #else int irdma_destroy_ah_stub(struct ib_ah *ibah) { return -ENOSYS; } #endif #if __FreeBSD_version < 1400026 /** * irdma_create_ah - create address handle * @ibpd: ptr to pd * @attr: address handle attributes * @udata: user data * * returns a pointer to an address handle */ struct ib_ah * irdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, struct ib_udata *udata) { struct irdma_pd *pd = to_iwpd(ibpd); struct irdma_device *iwdev = to_iwdev(ibpd->device); struct irdma_ah *ah; union ib_gid sgid; struct ib_gid_attr sgid_attr; struct irdma_pci_f *rf = iwdev->rf; struct irdma_sc_ah *sc_ah; u32 ah_id = 0; struct irdma_ah_info *ah_info; struct irdma_create_ah_resp uresp; union { struct sockaddr saddr; struct sockaddr_in saddr_in; struct sockaddr_in6 saddr_in6; } sgid_addr, dgid_addr; int err; u8 dmac[ETH_ALEN]; bool sleep = udata ? true : false; if (udata && udata->outlen < IRDMA_CREATE_AH_MIN_RESP_LEN) return ERR_PTR(-EINVAL); err = irdma_alloc_rsrc(rf, rf->allocated_ahs, rf->max_ah, &ah_id, &rf->next_ah); if (err) return ERR_PTR(err); ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) { irdma_free_rsrc(rf, rf->allocated_ahs, ah_id); return ERR_PTR(-ENOMEM); } ah->pd = pd; sc_ah = &ah->sc_ah; sc_ah->ah_info.ah_idx = ah_id; sc_ah->ah_info.vsi = &iwdev->vsi; irdma_sc_init_ah(&rf->sc_dev, sc_ah); ah->sgid_index = attr->grh.sgid_index; memcpy(&ah->dgid, &attr->grh.dgid, sizeof(ah->dgid)); rcu_read_lock(); err = ib_get_cached_gid(&iwdev->ibdev, attr->port_num, attr->grh.sgid_index, &sgid, &sgid_attr); rcu_read_unlock(); if (err) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "GID lookup at idx=%d with port=%d failed\n", attr->grh.sgid_index, attr->port_num); err = -EINVAL; goto err_gid_l2; } rdma_gid2ip((struct sockaddr *)&sgid_addr, &sgid); rdma_gid2ip((struct sockaddr *)&dgid_addr, &attr->grh.dgid); ah->av.attrs = *attr; ah->av.net_type = kc_rdma_gid_attr_network_type(sgid_attr, sgid_attr.gid_type, &sgid); if (sgid_attr.ndev) dev_put(sgid_attr.ndev); ah->av.sgid_addr.saddr = sgid_addr.saddr; ah->av.dgid_addr.saddr = dgid_addr.saddr; ah_info = &sc_ah->ah_info; ah_info->ah_idx = ah_id; ah_info->pd_idx = pd->sc_pd.pd_id; - ether_addr_copy(ah_info->mac_addr, IF_LLADDR(iwdev->netdev)); + ether_addr_copy(ah_info->mac_addr, if_getlladdr(iwdev->netdev)); if (attr->ah_flags & IB_AH_GRH) { ah_info->flow_label = attr->grh.flow_label; ah_info->hop_ttl = attr->grh.hop_limit; ah_info->tc_tos = attr->grh.traffic_class; } if (udata) ib_resolve_eth_dmac(ibpd->device, attr); irdma_ether_copy(dmac, attr); - irdma_fill_ah_info(iwdev->netdev->if_vnet, ah_info, &sgid_attr, &sgid_addr.saddr, &dgid_addr.saddr, + irdma_fill_ah_info(if_getvnet(iwdev->netdev), ah_info, &sgid_attr, &sgid_addr.saddr, &dgid_addr.saddr, dmac, ah->av.net_type); err = irdma_create_ah_vlan_tag(iwdev, pd, ah_info, &sgid_attr, dmac); if (err) goto err_gid_l2; err = irdma_ah_cqp_op(iwdev->rf, sc_ah, IRDMA_OP_AH_CREATE, sleep, irdma_gsi_ud_qp_ah_cb, sc_ah); if (err) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "CQP-OP Create AH fail"); goto err_gid_l2; } err = irdma_create_ah_wait(rf, sc_ah, sleep); if (err) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DEV, "CQP create AH timed out"); goto err_gid_l2; } if (udata) { uresp.ah_id = ah->sc_ah.ah_info.ah_idx; err = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (err) { irdma_ah_cqp_op(iwdev->rf, &ah->sc_ah, IRDMA_OP_AH_DESTROY, false, NULL, ah); goto err_gid_l2; } } return &ah->ibah; err_gid_l2: kfree(ah); irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs, ah_id); return ERR_PTR(err); } #endif /** * irdma_free_qp_rsrc - free up memory resources for qp * @iwqp: qp ptr (user or kernel) */ void irdma_free_qp_rsrc(struct irdma_qp *iwqp) { struct irdma_device *iwdev = iwqp->iwdev; struct irdma_pci_f *rf = iwdev->rf; u32 qp_num = iwqp->ibqp.qp_num; irdma_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp); irdma_dealloc_push_page(rf, &iwqp->sc_qp); if (iwqp->sc_qp.vsi) { irdma_qp_rem_qos(&iwqp->sc_qp); iwqp->sc_qp.dev->ws_remove(iwqp->sc_qp.vsi, iwqp->sc_qp.user_pri); } if (qp_num > 2) irdma_free_rsrc(rf, rf->allocated_qps, qp_num); irdma_free_dma_mem(rf->sc_dev.hw, &iwqp->q2_ctx_mem); irdma_free_dma_mem(rf->sc_dev.hw, &iwqp->kqp.dma_mem); kfree(iwqp->kqp.sig_trk_mem); iwqp->kqp.sig_trk_mem = NULL; kfree(iwqp->kqp.sq_wrid_mem); kfree(iwqp->kqp.rq_wrid_mem); kfree(iwqp->sg_list); kfree(iwqp); } /** * irdma_create_qp - create qp * @ibpd: ptr of pd * @init_attr: attributes for qp * @udata: user data for create qp */ struct ib_qp * irdma_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { #define IRDMA_CREATE_QP_MIN_REQ_LEN offsetofend(struct irdma_create_qp_req, user_compl_ctx) #define IRDMA_CREATE_QP_MIN_RESP_LEN offsetofend(struct irdma_create_qp_resp, rsvd) struct irdma_pd *iwpd = to_iwpd(ibpd); struct irdma_device *iwdev = to_iwdev(ibpd->device); struct irdma_pci_f *rf = iwdev->rf; struct irdma_qp *iwqp; struct irdma_create_qp_resp uresp = {0}; u32 qp_num = 0; int ret; int err_code; struct irdma_sc_qp *qp; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs; struct irdma_qp_init_info init_info = {{0}}; struct irdma_qp_host_ctx_info *ctx_info; unsigned long flags; err_code = irdma_validate_qp_attrs(init_attr, iwdev); if (err_code) return ERR_PTR(err_code); if (udata && (udata->inlen < IRDMA_CREATE_QP_MIN_REQ_LEN || udata->outlen < IRDMA_CREATE_QP_MIN_RESP_LEN)) return ERR_PTR(-EINVAL); init_info.vsi = &iwdev->vsi; init_info.qp_uk_init_info.uk_attrs = uk_attrs; init_info.qp_uk_init_info.sq_size = init_attr->cap.max_send_wr; init_info.qp_uk_init_info.rq_size = init_attr->cap.max_recv_wr; init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge; init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge; init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data; iwqp = kzalloc(sizeof(*iwqp), GFP_KERNEL); if (!iwqp) return ERR_PTR(-ENOMEM); iwqp->sg_list = kcalloc(uk_attrs->max_hw_wq_frags, sizeof(*iwqp->sg_list), GFP_KERNEL); if (!iwqp->sg_list) { kfree(iwqp); return ERR_PTR(-ENOMEM); } qp = &iwqp->sc_qp; qp->qp_uk.back_qp = iwqp; qp->qp_uk.lock = &iwqp->lock; qp->push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX; iwqp->iwdev = iwdev; iwqp->q2_ctx_mem.size = IRDMA_Q2_BUF_SIZE + IRDMA_QP_CTX_SIZE; iwqp->q2_ctx_mem.va = irdma_allocate_dma_mem(dev->hw, &iwqp->q2_ctx_mem, iwqp->q2_ctx_mem.size, 256); if (!iwqp->q2_ctx_mem.va) { kfree(iwqp->sg_list); kfree(iwqp); return ERR_PTR(-ENOMEM); } init_info.q2 = iwqp->q2_ctx_mem.va; init_info.q2_pa = iwqp->q2_ctx_mem.pa; init_info.host_ctx = (__le64 *) (init_info.q2 + IRDMA_Q2_BUF_SIZE); init_info.host_ctx_pa = init_info.q2_pa + IRDMA_Q2_BUF_SIZE; if (init_attr->qp_type == IB_QPT_GSI) qp_num = 1; else err_code = irdma_alloc_rsrc(rf, rf->allocated_qps, rf->max_qp, &qp_num, &rf->next_qp); if (err_code) goto error; iwqp->iwpd = iwpd; iwqp->ibqp.qp_num = qp_num; qp = &iwqp->sc_qp; iwqp->iwscq = to_iwcq(init_attr->send_cq); iwqp->iwrcq = to_iwcq(init_attr->recv_cq); iwqp->host_ctx.va = init_info.host_ctx; iwqp->host_ctx.pa = init_info.host_ctx_pa; iwqp->host_ctx.size = IRDMA_QP_CTX_SIZE; init_info.pd = &iwpd->sc_pd; init_info.qp_uk_init_info.qp_id = iwqp->ibqp.qp_num; if (!rdma_protocol_roce(&iwdev->ibdev, 1)) init_info.qp_uk_init_info.first_sq_wq = 1; iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp; init_waitqueue_head(&iwqp->waitq); init_waitqueue_head(&iwqp->mod_qp_waitq); if (udata) { init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver; err_code = irdma_setup_umode_qp(udata, iwdev, iwqp, &init_info, init_attr); } else { INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker); init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER; err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr); } if (err_code) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "setup qp failed\n"); goto error; } if (rdma_protocol_roce(&iwdev->ibdev, 1)) { if (init_attr->qp_type == IB_QPT_RC) { init_info.qp_uk_init_info.type = IRDMA_QP_TYPE_ROCE_RC; init_info.qp_uk_init_info.qp_caps = IRDMA_SEND_WITH_IMM | IRDMA_WRITE_WITH_IMM | IRDMA_ROCE; } else { init_info.qp_uk_init_info.type = IRDMA_QP_TYPE_ROCE_UD; init_info.qp_uk_init_info.qp_caps = IRDMA_SEND_WITH_IMM | IRDMA_ROCE; } } else { init_info.qp_uk_init_info.type = IRDMA_QP_TYPE_IWARP; init_info.qp_uk_init_info.qp_caps = IRDMA_WRITE_WITH_IMM; } ret = irdma_sc_qp_init(qp, &init_info); if (ret) { err_code = -EPROTO; irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "qp_init fail\n"); goto error; } ctx_info = &iwqp->ctx_info; ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id; ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id; if (rdma_protocol_roce(&iwdev->ibdev, 1)) irdma_roce_fill_and_set_qpctx_info(iwqp, ctx_info); else irdma_iw_fill_and_set_qpctx_info(iwqp, ctx_info); err_code = irdma_cqp_create_qp_cmd(iwqp); if (err_code) goto error; atomic_set(&iwqp->refcnt, 1); spin_lock_init(&iwqp->lock); spin_lock_init(&iwqp->sc_qp.pfpdu.lock); iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; rf->qp_table[qp_num] = iwqp; if (rdma_protocol_roce(&iwdev->ibdev, 1)) { if (dev->ws_add(&iwdev->vsi, 0)) { irdma_cqp_qp_destroy_cmd(&rf->sc_dev, &iwqp->sc_qp); err_code = -EINVAL; goto error; } irdma_qp_add_qos(&iwqp->sc_qp); spin_lock_irqsave(&iwpd->udqp_list_lock, flags); if (iwqp->sc_qp.qp_uk.qp_type == IRDMA_QP_TYPE_ROCE_UD) list_add_tail(&iwqp->ud_list_elem, &iwpd->udqp_list); spin_unlock_irqrestore(&iwpd->udqp_list_lock, flags); } if (udata) { /* GEN_1 legacy support with libi40iw does not have expanded uresp struct */ if (udata->outlen < sizeof(uresp)) { uresp.lsmm = 1; uresp.push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX_GEN_1; } else { if (rdma_protocol_iwarp(&iwdev->ibdev, 1)) uresp.lsmm = 1; } uresp.actual_sq_size = init_info.qp_uk_init_info.sq_size; uresp.actual_rq_size = init_info.qp_uk_init_info.rq_size; uresp.qp_id = qp_num; uresp.qp_caps = qp->qp_uk.qp_caps; err_code = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen)); if (err_code) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "copy_to_udata failed\n"); kc_irdma_destroy_qp(&iwqp->ibqp, udata); return ERR_PTR(err_code); } } init_completion(&iwqp->free_qp); return &iwqp->ibqp; error: irdma_free_qp_rsrc(iwqp); return ERR_PTR(err_code); } /** * irdma_destroy_qp - destroy qp * @ibqp: qp's ib pointer also to get to device's qp address * @udata: user data */ #if __FreeBSD_version >= 1400026 int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) #else int irdma_destroy_qp(struct ib_qp *ibqp) #endif { struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_device *iwdev = iwqp->iwdev; unsigned long flags; if (iwqp->sc_qp.qp_uk.destroy_pending) goto free_rsrc; iwqp->sc_qp.qp_uk.destroy_pending = true; spin_lock_irqsave(&iwqp->iwpd->udqp_list_lock, flags); if (iwqp->sc_qp.qp_uk.qp_type == IRDMA_QP_TYPE_ROCE_UD) list_del(&iwqp->ud_list_elem); spin_unlock_irqrestore(&iwqp->iwpd->udqp_list_lock, flags); if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS) irdma_modify_qp_to_err(&iwqp->sc_qp); irdma_qp_rem_ref(&iwqp->ibqp); wait_for_completion(&iwqp->free_qp); irdma_free_lsmm_rsrc(iwqp); if (!iwdev->rf->reset && irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp)) return (iwdev->rf->rdma_ver <= IRDMA_GEN_2 && !iwqp->user_mode) ? 0 : -ENOTRECOVERABLE; free_rsrc: if (!iwqp->user_mode) { if (iwqp->iwscq) { irdma_clean_cqes(iwqp, iwqp->iwscq); if (iwqp->iwrcq != iwqp->iwscq) irdma_clean_cqes(iwqp, iwqp->iwrcq); } } irdma_remove_push_mmap_entries(iwqp); irdma_free_qp_rsrc(iwqp); return 0; } /** * irdma_create_cq - create cq * @ibcq: CQ allocated * @attr: attributes for cq * @udata: user data */ #if __FreeBSD_version >= 1400026 int irdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) #else struct ib_cq * irdma_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata) #endif { #define IRDMA_CREATE_CQ_MIN_REQ_LEN offsetofend(struct irdma_create_cq_req, user_cq_buf) #define IRDMA_CREATE_CQ_MIN_RESP_LEN offsetofend(struct irdma_create_cq_resp, cq_size) #if __FreeBSD_version >= 1400026 struct ib_device *ibdev = ibcq->device; #endif struct irdma_device *iwdev = to_iwdev(ibdev); struct irdma_pci_f *rf = iwdev->rf; #if __FreeBSD_version >= 1400026 struct irdma_cq *iwcq = to_iwcq(ibcq); #else struct irdma_cq *iwcq; #endif u32 cq_num = 0; struct irdma_sc_cq *cq; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_cq_init_info info = {0}; int status; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_cq_uk_init_info *ukinfo = &info.cq_uk_init_info; unsigned long flags; int err_code; int entries = attr->cqe; bool cqe_64byte_ena; #if __FreeBSD_version >= 1400026 err_code = cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev); if (err_code) return err_code; if (udata && (udata->inlen < IRDMA_CREATE_CQ_MIN_REQ_LEN || udata->outlen < IRDMA_CREATE_CQ_MIN_RESP_LEN)) return -EINVAL; #else err_code = cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev); if (err_code) return ERR_PTR(err_code); if (udata && (udata->inlen < IRDMA_CREATE_CQ_MIN_REQ_LEN || udata->outlen < IRDMA_CREATE_CQ_MIN_RESP_LEN)) return ERR_PTR(-EINVAL); iwcq = kzalloc(sizeof(*iwcq), GFP_KERNEL); if (!iwcq) return ERR_PTR(-ENOMEM); #endif err_code = irdma_alloc_rsrc(rf, rf->allocated_cqs, rf->max_cq, &cq_num, &rf->next_cq); if (err_code) #if __FreeBSD_version >= 1400026 return err_code; #else goto error; #endif cq = &iwcq->sc_cq; cq->back_cq = iwcq; atomic_set(&iwcq->refcnt, 1); spin_lock_init(&iwcq->lock); INIT_LIST_HEAD(&iwcq->resize_list); INIT_LIST_HEAD(&iwcq->cmpl_generated); info.dev = dev; ukinfo->cq_size = max(entries, 4); ukinfo->cq_id = cq_num; cqe_64byte_ena = (dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_64_BYTE_CQE) ? true : false; ukinfo->avoid_mem_cflct = cqe_64byte_ena; iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size; atomic_set(&iwcq->armed, 0); if (attr->comp_vector < rf->ceqs_count) info.ceq_id = attr->comp_vector; info.ceq_id_valid = true; info.ceqe_mask = 1; info.type = IRDMA_CQ_TYPE_IWARP; info.vsi = &iwdev->vsi; if (udata) { struct irdma_ucontext *ucontext; struct irdma_create_cq_req req = {0}; struct irdma_cq_mr *cqmr; struct irdma_pbl *iwpbl; struct irdma_pbl *iwpbl_shadow; struct irdma_cq_mr *cqmr_shadow; iwcq->user_mode = true; #if __FreeBSD_version >= 1400026 ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); #else ucontext = to_ucontext(context); #endif if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) { err_code = -EFAULT; goto cq_free_rsrc; } spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); iwpbl = irdma_get_pbl((unsigned long)req.user_cq_buf, &ucontext->cq_reg_mem_list); spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); if (!iwpbl) { err_code = -EPROTO; goto cq_free_rsrc; } iwcq->iwpbl = iwpbl; iwcq->cq_mem_size = 0; cqmr = &iwpbl->cq_mr; if (rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_CQ_RESIZE && !ucontext->legacy_mode) { spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); iwpbl_shadow = irdma_get_pbl((unsigned long)req.user_shadow_area, &ucontext->cq_reg_mem_list); spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); if (!iwpbl_shadow) { err_code = -EPROTO; goto cq_free_rsrc; } iwcq->iwpbl_shadow = iwpbl_shadow; cqmr_shadow = &iwpbl_shadow->cq_mr; info.shadow_area_pa = cqmr_shadow->cq_pbl.addr; cqmr->split = true; } else { info.shadow_area_pa = cqmr->shadow; } if (iwpbl->pbl_allocated) { info.virtual_map = true; info.pbl_chunk_size = 1; info.first_pm_pbl_idx = cqmr->cq_pbl.idx; } else { info.cq_base_pa = cqmr->cq_pbl.addr; } } else { /* Kmode allocations */ int rsize; if (entries < 1 || entries > rf->max_cqe) { err_code = -EINVAL; goto cq_free_rsrc; } entries++; if (!cqe_64byte_ena && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) entries *= 2; ukinfo->cq_size = entries; if (cqe_64byte_ena) rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_extended_cqe); else rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe); iwcq->kmem.size = round_up(rsize, IRDMA_HW_PAGE_SIZE); iwcq->kmem.va = irdma_allocate_dma_mem(dev->hw, &iwcq->kmem, iwcq->kmem.size, IRDMA_HW_PAGE_SIZE); if (!iwcq->kmem.va) { err_code = -ENOMEM; goto cq_free_rsrc; } iwcq->kmem_shadow.size = IRDMA_SHADOW_AREA_SIZE << 3; iwcq->kmem_shadow.va = irdma_allocate_dma_mem(dev->hw, &iwcq->kmem_shadow, iwcq->kmem_shadow.size, 64); if (!iwcq->kmem_shadow.va) { err_code = -ENOMEM; goto cq_free_rsrc; } info.shadow_area_pa = iwcq->kmem_shadow.pa; ukinfo->shadow_area = iwcq->kmem_shadow.va; ukinfo->cq_base = iwcq->kmem.va; info.cq_base_pa = iwcq->kmem.pa; } if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2, (u32)IRDMA_MAX_CQ_READ_THRESH); if (irdma_sc_cq_init(cq, &info)) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "init cq fail\n"); err_code = -EPROTO; goto cq_free_rsrc; } cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) { err_code = -ENOMEM; goto cq_free_rsrc; } cqp_info = &cqp_request->info; cqp_info->cqp_cmd = IRDMA_OP_CQ_CREATE; cqp_info->post_sq = 1; cqp_info->in.u.cq_create.cq = cq; cqp_info->in.u.cq_create.check_overflow = true; cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); if (status) { err_code = -ENOMEM; goto cq_free_rsrc; } if (udata) { struct irdma_create_cq_resp resp = {0}; resp.cq_id = info.cq_uk_init_info.cq_id; resp.cq_size = info.cq_uk_init_info.cq_size; if (ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen))) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "copy to user data\n"); err_code = -EPROTO; goto cq_destroy; } } rf->cq_table[cq_num] = iwcq; init_completion(&iwcq->free_cq); #if __FreeBSD_version >= 1400026 return 0; #else return &iwcq->ibcq; #endif cq_destroy: irdma_cq_wq_destroy(rf, cq); cq_free_rsrc: irdma_cq_free_rsrc(rf, iwcq); #if __FreeBSD_version >= 1400026 return err_code; #else error: kfree(iwcq); return ERR_PTR(err_code); #endif } /** * irdma_copy_user_pgaddrs - copy user page address to pble's os locally * @iwmr: iwmr for IB's user page addresses * @pbl: ple pointer to save 1 level or 0 level pble * @level: indicated level 0, 1 or 2 */ void irdma_copy_user_pgaddrs(struct irdma_mr *iwmr, u64 *pbl, enum irdma_pble_level level) { struct ib_umem *region = iwmr->region; struct irdma_pbl *iwpbl = &iwmr->iwpbl; int chunk_pages, entry, i; struct scatterlist *sg; u64 pg_addr = 0; struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; struct irdma_pble_info *pinfo; u32 idx = 0; u32 pbl_cnt = 0; pinfo = (level == PBLE_LEVEL_1) ? NULL : palloc->level2.leaf; for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) { chunk_pages = DIV_ROUND_UP(sg_dma_len(sg), iwmr->page_size); if (iwmr->type == IRDMA_MEMREG_TYPE_QP && !iwpbl->qp_mr.sq_page) iwpbl->qp_mr.sq_page = sg_page(sg); for (i = 0; i < chunk_pages; i++) { pg_addr = sg_dma_address(sg) + (i * iwmr->page_size); if ((entry + i) == 0) *pbl = pg_addr & iwmr->page_msk; else if (!(pg_addr & ~iwmr->page_msk)) *pbl = pg_addr; else continue; if (++pbl_cnt == palloc->total_cnt) break; pbl = irdma_next_pbl_addr(pbl, &pinfo, &idx); } } } /** * irdma_destroy_ah - Destroy address handle * @ibah: pointer to address handle * @ah_flags: destroy flags */ #if __FreeBSD_version >= 1400026 void irdma_destroy_ah(struct ib_ah *ibah, u32 ah_flags) { struct irdma_device *iwdev = to_iwdev(ibah->device); struct irdma_ah *ah = to_iwah(ibah); irdma_ah_cqp_op(iwdev->rf, &ah->sc_ah, IRDMA_OP_AH_DESTROY, false, NULL, ah); irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs, ah->sc_ah.ah_info.ah_idx); } #endif #if __FreeBSD_version < 1400026 int irdma_destroy_ah(struct ib_ah *ibah) { struct irdma_device *iwdev = to_iwdev(ibah->device); struct irdma_ah *ah = to_iwah(ibah); irdma_ah_cqp_op(iwdev->rf, &ah->sc_ah, IRDMA_OP_AH_DESTROY, false, NULL, ah); irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs, ah->sc_ah.ah_info.ah_idx); kfree(ah); return 0; } #endif #if __FreeBSD_version >= 1400026 int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) #else int irdma_dereg_mr(struct ib_mr *ib_mr) #endif { struct irdma_mr *iwmr = to_iwmr(ib_mr); struct irdma_device *iwdev = to_iwdev(ib_mr->device); struct irdma_pbl *iwpbl = &iwmr->iwpbl; int ret; if (iwmr->type != IRDMA_MEMREG_TYPE_MEM) { if (iwmr->region) { struct irdma_ucontext *ucontext; #if __FreeBSD_version >= 1400026 ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); #else struct ib_pd *ibpd = ib_mr->pd; ucontext = to_ucontext(ibpd->uobject->context); #endif irdma_del_memlist(iwmr, ucontext); } goto done; } ret = irdma_hwdereg_mr(ib_mr); if (ret) return ret; irdma_free_stag(iwdev, iwmr->stag); done: if (iwpbl->pbl_allocated) irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc); if (iwmr->region) ib_umem_release(iwmr->region); kfree(iwmr); return 0; } /* * irdma_rereg_user_mr - Re-Register a user memory region @ibmr: ib mem to access iwarp mr pointer @flags: bit mask to * indicate which of the attr's of MR modified @start: virtual start address @len: length of mr @virt: virtual address * @new access flags: bit mask of access flags @new_pd: ptr of pd @udata: user data */ int irdma_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 len, u64 virt, int new_access, struct ib_pd *new_pd, struct ib_udata *udata) { struct irdma_device *iwdev = to_iwdev(ib_mr->device); struct irdma_mr *iwmr = to_iwmr(ib_mr); struct irdma_pbl *iwpbl = &iwmr->iwpbl; int ret; if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) return -EINVAL; if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) return -EOPNOTSUPP; ret = irdma_hwdereg_mr(ib_mr); if (ret) return ret; if (flags & IB_MR_REREG_ACCESS) iwmr->access = new_access; if (flags & IB_MR_REREG_PD) { iwmr->ibmr.pd = new_pd; iwmr->ibmr.device = new_pd->device; } if (flags & IB_MR_REREG_TRANS) { if (iwpbl->pbl_allocated) { irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc); iwpbl->pbl_allocated = false; } if (iwmr->region) { ib_umem_release(iwmr->region); iwmr->region = NULL; } ib_mr = irdma_rereg_mr_trans(iwmr, start, len, virt, udata); if (IS_ERR(ib_mr)) return PTR_ERR(ib_mr); } else { ret = irdma_hwreg_mr(iwdev, iwmr, iwmr->access); if (ret) return ret; } return 0; } int kc_irdma_set_roce_cm_info(struct irdma_qp *iwqp, struct ib_qp_attr *attr, u16 *vlan_id) { int ret; union ib_gid sgid; struct ib_gid_attr sgid_attr; struct irdma_av *av = &iwqp->roce_ah.av; ret = ib_get_cached_gid(iwqp->ibqp.device, attr->ah_attr.port_num, attr->ah_attr.grh.sgid_index, &sgid, &sgid_attr); if (ret) return ret; if (sgid_attr.ndev) { *vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); - ether_addr_copy(iwqp->ctx_info.roce_info->mac_addr, IF_LLADDR(sgid_attr.ndev)); + ether_addr_copy(iwqp->ctx_info.roce_info->mac_addr, if_getlladdr(sgid_attr.ndev)); } av->net_type = kc_rdma_gid_attr_network_type(sgid_attr, sgid_attr.gid_type, &sgid); rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid); dev_put(sgid_attr.ndev); iwqp->sc_qp.user_pri = iwqp->ctx_info.user_pri; return 0; } #if __FreeBSD_version >= 1400026 /** * irdma_destroy_cq - destroy cq * @ib_cq: cq pointer * @udata: user data */ void irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct irdma_device *iwdev = to_iwdev(ib_cq->device); struct irdma_cq *iwcq = to_iwcq(ib_cq); struct irdma_sc_cq *cq = &iwcq->sc_cq; struct irdma_sc_dev *dev = cq->dev; struct irdma_sc_ceq *ceq = dev->ceq[cq->ceq_id]; struct irdma_ceq *iwceq = container_of(ceq, struct irdma_ceq, sc_ceq); unsigned long flags; spin_lock_irqsave(&iwcq->lock, flags); if (!list_empty(&iwcq->cmpl_generated)) irdma_remove_cmpls_list(iwcq); if (!list_empty(&iwcq->resize_list)) irdma_process_resize_list(iwcq, iwdev, NULL); spin_unlock_irqrestore(&iwcq->lock, flags); irdma_cq_rem_ref(ib_cq); wait_for_completion(&iwcq->free_cq); irdma_cq_wq_destroy(iwdev->rf, cq); spin_lock_irqsave(&iwceq->ce_lock, flags); irdma_sc_cleanup_ceqes(cq, ceq); spin_unlock_irqrestore(&iwceq->ce_lock, flags); irdma_cq_free_rsrc(iwdev->rf, iwcq); } #endif #if __FreeBSD_version < 1400026 /** * irdma_destroy_cq - destroy cq * @ib_cq: cq pointer */ int irdma_destroy_cq(struct ib_cq *ib_cq) { struct irdma_device *iwdev = to_iwdev(ib_cq->device); struct irdma_cq *iwcq = to_iwcq(ib_cq); struct irdma_sc_cq *cq = &iwcq->sc_cq; struct irdma_sc_dev *dev = cq->dev; struct irdma_sc_ceq *ceq = dev->ceq[cq->ceq_id]; struct irdma_ceq *iwceq = container_of(ceq, struct irdma_ceq, sc_ceq); unsigned long flags; spin_lock_irqsave(&iwcq->lock, flags); if (!list_empty(&iwcq->cmpl_generated)) irdma_remove_cmpls_list(iwcq); if (!list_empty(&iwcq->resize_list)) irdma_process_resize_list(iwcq, iwdev, NULL); spin_unlock_irqrestore(&iwcq->lock, flags); irdma_cq_rem_ref(ib_cq); wait_for_completion(&iwcq->free_cq); irdma_cq_wq_destroy(iwdev->rf, cq); spin_lock_irqsave(&iwceq->ce_lock, flags); irdma_sc_cleanup_ceqes(cq, ceq); spin_unlock_irqrestore(&iwceq->ce_lock, flags); irdma_cq_free_rsrc(iwdev->rf, iwcq); kfree(iwcq); return 0; } #endif /** * irdma_alloc_mw - Allocate memory window * @pd: Protection domain * @type: Window type * @udata: user data pointer */ struct ib_mw * irdma_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata) { struct irdma_device *iwdev = to_iwdev(pd->device); struct irdma_mr *iwmr; int err_code; u32 stag; iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); if (!iwmr) return ERR_PTR(-ENOMEM); stag = irdma_create_stag(iwdev); if (!stag) { kfree(iwmr); return ERR_PTR(-ENOMEM); } iwmr->stag = stag; iwmr->ibmw.rkey = stag; iwmr->ibmw.pd = pd; iwmr->ibmw.type = type; iwmr->ibmw.device = pd->device; err_code = irdma_hw_alloc_mw(iwdev, iwmr); if (err_code) { irdma_free_stag(iwdev, stag); kfree(iwmr); return ERR_PTR(err_code); } return &iwmr->ibmw; } /** * kc_set_loc_seq_num_mss - Set local seq number and mss * @cm_node: cm node info */ void kc_set_loc_seq_num_mss(struct irdma_cm_node *cm_node) { struct timespec ts; getnanotime(&ts); cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec; if (cm_node->iwdev->vsi.mtu > 1500 && 2 * cm_node->iwdev->vsi.mtu > cm_node->iwdev->rcv_wnd) cm_node->tcp_cntxt.mss = (cm_node->ipv4) ? (1500 - IRDMA_MTU_TO_MSS_IPV4) : (1500 - IRDMA_MTU_TO_MSS_IPV6); else cm_node->tcp_cntxt.mss = (cm_node->ipv4) ? (cm_node->iwdev->vsi.mtu - IRDMA_MTU_TO_MSS_IPV4) : (cm_node->iwdev->vsi.mtu - IRDMA_MTU_TO_MSS_IPV6); } #if __FreeBSD_version < 1400026 struct irdma_vma_data { struct list_head list; struct vm_area_struct *vma; struct mutex *vma_list_mutex; /* protect the vma_list */ }; /** * irdma_vma_open - * @vma: User VMA */ static void irdma_vma_open(struct vm_area_struct *vma) { vma->vm_ops = NULL; } /** * irdma_vma_close - Remove vma data from vma list * @vma: User VMA */ static void irdma_vma_close(struct vm_area_struct *vma) { struct irdma_vma_data *vma_data; vma_data = vma->vm_private_data; vma->vm_private_data = NULL; vma_data->vma = NULL; mutex_lock(vma_data->vma_list_mutex); list_del(&vma_data->list); mutex_unlock(vma_data->vma_list_mutex); kfree(vma_data); } static const struct vm_operations_struct irdma_vm_ops = { .open = irdma_vma_open, .close = irdma_vma_close }; /** * irdma_set_vma_data - Save vma data in context list * @vma: User VMA * @context: ib user context */ static int irdma_set_vma_data(struct vm_area_struct *vma, struct irdma_ucontext *context) { struct list_head *vma_head = &context->vma_list; struct irdma_vma_data *vma_entry; vma_entry = kzalloc(sizeof(*vma_entry), GFP_KERNEL); if (!vma_entry) return -ENOMEM; vma->vm_private_data = vma_entry; vma->vm_ops = &irdma_vm_ops; vma_entry->vma = vma; vma_entry->vma_list_mutex = &context->vma_list_mutex; mutex_lock(&context->vma_list_mutex); list_add(&vma_entry->list, vma_head); mutex_unlock(&context->vma_list_mutex); return 0; } /** * irdma_disassociate_ucontext - Disassociate user context * @context: ib user context */ void irdma_disassociate_ucontext(struct ib_ucontext *context) { struct irdma_ucontext *ucontext = to_ucontext(context); struct irdma_vma_data *vma_data, *n; struct vm_area_struct *vma; mutex_lock(&ucontext->vma_list_mutex); list_for_each_entry_safe(vma_data, n, &ucontext->vma_list, list) { vma = vma_data->vma; zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE); vma->vm_ops = NULL; list_del(&vma_data->list); kfree(vma_data); } mutex_unlock(&ucontext->vma_list_mutex); } int rdma_user_mmap_io(struct ib_ucontext *context, struct vm_area_struct *vma, unsigned long pfn, unsigned long size, pgprot_t prot) { if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) return -EAGAIN; return irdma_set_vma_data(vma, to_ucontext(context)); } #else /** * irdma_disassociate_ucontext - Disassociate user context * @context: ib user context */ void irdma_disassociate_ucontext(struct ib_ucontext *context) { } #endif struct ib_device * -ib_device_get_by_netdev(struct ifnet *netdev, int driver_id) +ib_device_get_by_netdev(if_t netdev, int driver_id) { struct irdma_device *iwdev; struct irdma_handler *hdl; unsigned long flags; spin_lock_irqsave(&irdma_handler_lock, flags); list_for_each_entry(hdl, &irdma_handlers, list) { iwdev = hdl->iwdev; if (netdev == iwdev->netdev) { spin_unlock_irqrestore(&irdma_handler_lock, flags); return &iwdev->ibdev; } } spin_unlock_irqrestore(&irdma_handler_lock, flags); return NULL; } void ib_unregister_device_put(struct ib_device *device) { ib_unregister_device(device); } /** * irdma_query_gid_roce - Query port GID for Roce * @ibdev: device pointer from stack * @port: port number * @index: Entry index * @gid: Global ID */ int irdma_query_gid_roce(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { int ret; ret = rdma_query_gid(ibdev, port, index, gid); if (ret == -EAGAIN) { memcpy(gid, &zgid, sizeof(*gid)); return 0; } return ret; } /** * irdma_modify_port - modify port attributes * @ibdev: device pointer from stack * @port: port number for query * @mask: Property mask * @props: returning device attributes */ int irdma_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { if (port > 1) return -EINVAL; return 0; } /** * irdma_query_pkey - Query partition key * @ibdev: device pointer from stack * @port: port number * @index: index of pkey * @pkey: pointer to store the pkey */ int irdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { if (index >= IRDMA_PKEY_TBL_SZ) return -EINVAL; *pkey = IRDMA_DEFAULT_PKEY; return 0; } int irdma_roce_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { struct ib_port_attr attr; int err; immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; err = ib_query_port(ibdev, port_num, &attr); if (err) return err; immutable->max_mad_size = IB_MGMT_MAD_SIZE; immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; return 0; } int irdma_iw_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { struct ib_port_attr attr; int err; immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; err = ib_query_port(ibdev, port_num, &attr); if (err) return err; immutable->gid_tbl_len = 1; return 0; } /** * irdma_query_port - get port attributes * @ibdev: device pointer from stack * @port: port number for query * @props: returning device attributes */ int irdma_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { struct irdma_device *iwdev = to_iwdev(ibdev); - struct ifnet *netdev = iwdev->netdev; + if_t netdev = iwdev->netdev; /* no need to zero out pros here. done by caller */ props->max_mtu = IB_MTU_4096; - props->active_mtu = ib_mtu_int_to_enum(netdev->if_mtu); + props->active_mtu = ib_mtu_int_to_enum(if_getmtu(netdev)); props->lid = 1; props->lmc = 0; props->sm_lid = 0; props->sm_sl = 0; - if ((netdev->if_link_state == LINK_STATE_UP) && (netdev->if_drv_flags & IFF_DRV_RUNNING)) { + if ((if_getlinkstate(netdev) == LINK_STATE_UP) && + (if_getdrvflags(netdev) & IFF_DRV_RUNNING)) { props->state = IB_PORT_ACTIVE; props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; } else { props->state = IB_PORT_DOWN; props->phys_state = IB_PORT_PHYS_STATE_DISABLED; } ib_get_eth_speed(ibdev, port, &props->active_speed, &props->active_width); if (rdma_protocol_roce(ibdev, 1)) { props->gid_tbl_len = 32; kc_set_props_ip_gid_caps(props); props->pkey_tbl_len = IRDMA_PKEY_TBL_SZ; } else { props->gid_tbl_len = 1; } props->qkey_viol_cntr = 0; props->port_cap_flags |= IB_PORT_CM_SUP | IB_PORT_REINIT_SUP; props->max_msg_sz = iwdev->rf->sc_dev.hw_attrs.max_hw_outbound_msg_size; return 0; } static const char *const irdma_hw_stat_names[] = { /* gen1 - 32-bit */ [IRDMA_HW_STAT_INDEX_IP4RXDISCARD] = "ip4InDiscards", [IRDMA_HW_STAT_INDEX_IP4RXTRUNC] = "ip4InTruncatedPkts", [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE] = "ip4OutNoRoutes", [IRDMA_HW_STAT_INDEX_IP6RXDISCARD] = "ip6InDiscards", [IRDMA_HW_STAT_INDEX_IP6RXTRUNC] = "ip6InTruncatedPkts", [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE] = "ip6OutNoRoutes", [IRDMA_HW_STAT_INDEX_RXVLANERR] = "rxVlanErrors", /* gen1 - 64-bit */ [IRDMA_HW_STAT_INDEX_IP4RXOCTS] = "ip4InOctets", [IRDMA_HW_STAT_INDEX_IP4RXPKTS] = "ip4InPkts", [IRDMA_HW_STAT_INDEX_IP4RXFRAGS] = "ip4InReasmRqd", [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS] = "ip4InMcastPkts", [IRDMA_HW_STAT_INDEX_IP4TXOCTS] = "ip4OutOctets", [IRDMA_HW_STAT_INDEX_IP4TXPKTS] = "ip4OutPkts", [IRDMA_HW_STAT_INDEX_IP4TXFRAGS] = "ip4OutSegRqd", [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS] = "ip4OutMcastPkts", [IRDMA_HW_STAT_INDEX_IP6RXOCTS] = "ip6InOctets", [IRDMA_HW_STAT_INDEX_IP6RXPKTS] = "ip6InPkts", [IRDMA_HW_STAT_INDEX_IP6RXFRAGS] = "ip6InReasmRqd", [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS] = "ip6InMcastPkts", [IRDMA_HW_STAT_INDEX_IP6TXOCTS] = "ip6OutOctets", [IRDMA_HW_STAT_INDEX_IP6TXPKTS] = "ip6OutPkts", [IRDMA_HW_STAT_INDEX_IP6TXFRAGS] = "ip6OutSegRqd", [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS] = "ip6OutMcastPkts", [IRDMA_HW_STAT_INDEX_RDMARXRDS] = "InRdmaReads", [IRDMA_HW_STAT_INDEX_RDMARXSNDS] = "InRdmaSends", [IRDMA_HW_STAT_INDEX_RDMARXWRS] = "InRdmaWrites", [IRDMA_HW_STAT_INDEX_RDMATXRDS] = "OutRdmaReads", [IRDMA_HW_STAT_INDEX_RDMATXSNDS] = "OutRdmaSends", [IRDMA_HW_STAT_INDEX_RDMATXWRS] = "OutRdmaWrites", [IRDMA_HW_STAT_INDEX_RDMAVBND] = "RdmaBnd", [IRDMA_HW_STAT_INDEX_RDMAVINV] = "RdmaInv", /* gen2 - 32-bit */ [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED] = "cnpHandled", [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED] = "cnpIgnored", [IRDMA_HW_STAT_INDEX_TXNPCNPSENT] = "cnpSent", /* gen2 - 64-bit */ [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS] = "ip4InMcastOctets", [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS] = "ip4OutMcastOctets", [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS] = "ip6InMcastOctets", [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS] = "ip6OutMcastOctets", [IRDMA_HW_STAT_INDEX_UDPRXPKTS] = "RxUDP", [IRDMA_HW_STAT_INDEX_UDPTXPKTS] = "TxUDP", [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS] = "RxECNMrkd", [IRDMA_HW_STAT_INDEX_TCPRTXSEG] = "RetransSegs", [IRDMA_HW_STAT_INDEX_TCPRXOPTERR] = "InOptErrors", [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR] = "InProtoErrors", [IRDMA_HW_STAT_INDEX_TCPRXSEGS] = "InSegs", [IRDMA_HW_STAT_INDEX_TCPTXSEG] = "OutSegs", }; /** * irdma_alloc_hw_stats - Allocate a hw stats structure * @ibdev: device pointer from stack * @port_num: port number */ struct rdma_hw_stats * irdma_alloc_hw_stats(struct ib_device *ibdev, u8 port_num) { struct irdma_device *iwdev = to_iwdev(ibdev); struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; int num_counters = dev->hw_attrs.max_stat_idx; unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN; return rdma_alloc_hw_stats_struct(irdma_hw_stat_names, num_counters, lifespan); } /** * irdma_get_hw_stats - Populates the rdma_hw_stats structure * @ibdev: device pointer from stack * @stats: stats pointer from stack * @port_num: port number * @index: which hw counter the stack is requesting we update */ int irdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u8 port_num, int index) { struct irdma_device *iwdev = to_iwdev(ibdev); struct irdma_dev_hw_stats *hw_stats = &iwdev->vsi.pestat->hw_stats; if (iwdev->rf->rdma_ver >= IRDMA_GEN_2) irdma_cqp_gather_stats_cmd(&iwdev->rf->sc_dev, iwdev->vsi.pestat, true); memcpy(&stats->value[0], hw_stats, sizeof(u64)* stats->num_counters); return stats->num_counters; } /** * irdma_query_gid - Query port GID * @ibdev: device pointer from stack * @port: port number * @index: Entry index * @gid: Global ID */ int irdma_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { struct irdma_device *iwdev = to_iwdev(ibdev); memset(gid->raw, 0, sizeof(gid->raw)); - ether_addr_copy(gid->raw, IF_LLADDR(iwdev->netdev)); + ether_addr_copy(gid->raw, if_getlladdr(iwdev->netdev)); return 0; } enum rdma_link_layer irdma_get_link_layer(struct ib_device *ibdev, u8 port_num) { return IB_LINK_LAYER_ETHERNET; } inline enum ib_mtu ib_mtu_int_to_enum(int mtu) { if (mtu >= 4096) return IB_MTU_4096; else if (mtu >= 2048) return IB_MTU_2048; else if (mtu >= 1024) return IB_MTU_1024; else if (mtu >= 512) return IB_MTU_512; else return IB_MTU_256; } inline void kc_set_roce_uverbs_cmd_mask(struct irdma_device *iwdev) { iwdev->ibdev.uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST); } inline void kc_set_rdma_uverbs_cmd_mask(struct irdma_device *iwdev) { iwdev->ibdev.uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) | BIT_ULL(IB_USER_VERBS_CMD_REG_MR) | BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_MW) | BIT_ULL(IB_USER_VERBS_CMD_BIND_MW) | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND); iwdev->ibdev.uverbs_ex_cmd_mask = BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_QP) | BIT_ULL(IB_USER_VERBS_EX_CMD_QUERY_DEVICE); if (iwdev->rf->rdma_ver >= IRDMA_GEN_2) iwdev->ibdev.uverbs_ex_cmd_mask |= BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_CQ); } int ib_get_eth_speed(struct ib_device *ibdev, u32 port_num, u8 *speed, u8 *width) { - struct ifnet *netdev = ibdev->get_netdev(ibdev, port_num); + if_t netdev = ibdev->get_netdev(ibdev, port_num); u32 netdev_speed; if (!netdev) return -ENODEV; - netdev_speed = netdev->if_baudrate; + netdev_speed = if_getbaudrate(netdev); dev_put(netdev); if (netdev_speed <= SPEED_1000) { *width = IB_WIDTH_1X; *speed = IB_SPEED_SDR; } else if (netdev_speed <= SPEED_10000) { *width = IB_WIDTH_1X; *speed = IB_SPEED_FDR10; } else if (netdev_speed <= SPEED_20000) { *width = IB_WIDTH_4X; *speed = IB_SPEED_DDR; } else if (netdev_speed <= SPEED_25000) { *width = IB_WIDTH_1X; *speed = IB_SPEED_EDR; } else if (netdev_speed <= SPEED_40000) { *width = IB_WIDTH_4X; *speed = IB_SPEED_FDR10; } else { *width = IB_WIDTH_4X; *speed = IB_SPEED_EDR; } return 0; } diff --git a/sys/dev/irdma/irdma_main.h b/sys/dev/irdma/irdma_main.h index 27b0f2c1264a..91265f2445bb 100644 --- a/sys/dev/irdma/irdma_main.h +++ b/sys/dev/irdma/irdma_main.h @@ -1,608 +1,608 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * * Copyright (c) 2015 - 2022 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /*$FreeBSD$*/ #ifndef IRDMA_MAIN_H #define IRDMA_MAIN_H #include #include #include #include #include #include #include #include #if __FreeBSD_version >= 1400000 #include #endif #include #include #include #include #include #include #include #include #include "osdep.h" #include "irdma_defs.h" #include "irdma_hmc.h" #include "irdma_type.h" #include "irdma_ws.h" #include "irdma_protos.h" #include "irdma_pble.h" #include "irdma_cm.h" #include "fbsd_kcompat.h" #include "irdma-abi.h" #include "irdma_verbs.h" #include "irdma_user.h" #include "irdma_puda.h" extern struct list_head irdma_handlers; extern spinlock_t irdma_handler_lock; extern bool irdma_upload_context; #define IRDMA_FW_VER_DEFAULT 2 #define IRDMA_HW_VER 2 #define IRDMA_ARP_ADD 1 #define IRDMA_ARP_DELETE 2 #define IRDMA_ARP_RESOLVE 3 #define IRDMA_MACIP_ADD 1 #define IRDMA_MACIP_DELETE 2 #define IW_CCQ_SIZE (IRDMA_CQP_SW_SQSIZE_2048 + 1) #define IW_CEQ_SIZE 2048 #define IW_AEQ_SIZE 2048 #define RX_BUF_SIZE (1536 + 8) #define IW_REG0_SIZE (4 * 1024) #define IW_TX_TIMEOUT (6 * HZ) #define IW_FIRST_QPN 1 #define IW_SW_CONTEXT_ALIGN 1024 #define MAX_DPC_ITERATIONS 128 #define IRDMA_EVENT_TIMEOUT_MS 5000 #define IRDMA_VCHNL_EVENT_TIMEOUT_MS 10000 #define IRDMA_RST_TIMEOUT_HZ 4 #define IRDMA_NO_QSET 0xffff #define IW_CFG_FPM_QP_COUNT 32768 #define IRDMA_MAX_PAGES_PER_FMR 262144 #define IRDMA_MIN_PAGES_PER_FMR 1 #define IRDMA_CQP_COMPL_RQ_WQE_FLUSHED 2 #define IRDMA_CQP_COMPL_SQ_WQE_FLUSHED 3 #define IRDMA_Q_TYPE_PE_AEQ 0x80 #define IRDMA_Q_INVALID_IDX 0xffff #define IRDMA_REM_ENDPOINT_TRK_QPID 3 #define IRDMA_DRV_OPT_ENA_MPA_VER_0 0x00000001 #define IRDMA_DRV_OPT_DISABLE_MPA_CRC 0x00000002 #define IRDMA_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004 #define IRDMA_DRV_OPT_DISABLE_INTF 0x00000008 #define IRDMA_DRV_OPT_ENA_MSI 0x00000010 #define IRDMA_DRV_OPT_DUAL_LOGICAL_PORT 0x00000020 #define IRDMA_DRV_OPT_NO_INLINE_DATA 0x00000080 #define IRDMA_DRV_OPT_DISABLE_INT_MOD 0x00000100 #define IRDMA_DRV_OPT_DISABLE_VIRT_WQ 0x00000200 #define IRDMA_DRV_OPT_ENA_PAU 0x00000400 #define IRDMA_DRV_OPT_MCAST_LOGPORT_MAP 0x00000800 #define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types) #define IRDMA_ROCE_CWND_DEFAULT 0x400 #define IRDMA_ROCE_ACKCREDS_DEFAULT 0x1E #define IRDMA_FLUSH_SQ BIT(0) #define IRDMA_FLUSH_RQ BIT(1) #define IRDMA_REFLUSH BIT(2) #define IRDMA_FLUSH_WAIT BIT(3) #define IRDMA_IRQ_NAME_STR_LEN 64 enum init_completion_state { INVALID_STATE = 0, INITIAL_STATE, CQP_CREATED, HMC_OBJS_CREATED, HW_RSRC_INITIALIZED, CCQ_CREATED, CEQ0_CREATED, /* Last state of probe */ ILQ_CREATED, IEQ_CREATED, REM_ENDPOINT_TRK_CREATED, CEQS_CREATED, PBLE_CHUNK_MEM, AEQ_CREATED, IP_ADDR_REGISTERED, /* Last state of open */ }; struct irdma_rsrc_limits { u32 qplimit; u32 mrlimit; u32 cqlimit; }; struct irdma_cqp_err_info { u16 maj; u16 min; const char *desc; }; struct irdma_cqp_compl_info { u32 op_ret_val; u16 maj_err_code; u16 min_err_code; bool error; u8 op_code; }; struct irdma_cqp_request { struct cqp_cmds_info info; wait_queue_head_t waitq; struct list_head list; atomic_t refcnt; void (*callback_fcn)(struct irdma_cqp_request *cqp_request); void *param; struct irdma_cqp_compl_info compl_info; bool waiting:1; bool request_done:1; bool dynamic:1; }; struct irdma_cqp { struct irdma_sc_cqp sc_cqp; spinlock_t req_lock; /* protect CQP request list */ spinlock_t compl_lock; /* protect CQP completion processing */ wait_queue_head_t waitq; wait_queue_head_t remove_wq; struct irdma_dma_mem sq; struct irdma_dma_mem host_ctx; u64 *scratch_array; struct irdma_cqp_request *cqp_requests; struct list_head cqp_avail_reqs; struct list_head cqp_pending_reqs; }; struct irdma_ccq { struct irdma_sc_cq sc_cq; struct irdma_dma_mem mem_cq; struct irdma_dma_mem shadow_area; }; struct irdma_ceq { struct irdma_sc_ceq sc_ceq; struct irdma_dma_mem mem; u32 irq; u32 msix_idx; struct irdma_pci_f *rf; struct tasklet_struct dpc_tasklet; spinlock_t ce_lock; /* sync cq destroy with cq completion event notification */ }; struct irdma_aeq { struct irdma_sc_aeq sc_aeq; struct irdma_dma_mem mem; struct irdma_pble_alloc palloc; bool virtual_map; }; struct irdma_arp_entry { u32 ip_addr[4]; u8 mac_addr[ETH_ALEN]; }; struct irdma_msix_vector { u32 idx; u32 irq; u32 cpu_affinity; u32 ceq_id; char name[IRDMA_IRQ_NAME_STR_LEN]; struct resource *res; void *tag; }; struct irdma_mc_table_info { u32 mgn; u32 dest_ip[4]; bool lan_fwd:1; bool ipv4_valid:1; }; struct mc_table_list { struct list_head list; struct irdma_mc_table_info mc_info; struct irdma_mcast_grp_info mc_grp_ctx; }; struct irdma_qv_info { u32 v_idx; /* msix_vector */ u16 ceq_idx; u16 aeq_idx; u8 itr_idx; }; struct irdma_qvlist_info { u32 num_vectors; struct irdma_qv_info qv_info[1]; }; struct irdma_gen_ops { void (*request_reset)(struct irdma_pci_f *rf); int (*register_qset)(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node); void (*unregister_qset)(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node); }; struct irdma_pci_f { bool reset:1; bool rsrc_created:1; bool msix_shared:1; bool ftype:1; u8 rsrc_profile; u8 *hmc_info_mem; u8 *mem_rsrc; u8 rdma_ver; u8 rst_to; /* Not used in SRIOV VF mode */ u8 pf_id; enum irdma_protocol_used protocol_used; bool en_rem_endpoint_trk:1; bool dcqcn_ena:1; u32 sd_type; u32 msix_count; u32 max_mr; u32 max_qp; u32 max_cq; u32 max_ah; u32 next_ah; u32 max_mcg; u32 next_mcg; u32 max_pd; u32 next_qp; u32 next_cq; u32 next_pd; u32 max_mr_size; u32 max_cqe; u32 mr_stagmask; u32 used_pds; u32 used_cqs; u32 used_mrs; u32 used_qps; u32 arp_table_size; u32 next_arp_index; u32 ceqs_count; u32 next_ws_node_id; u32 max_ws_node_id; u32 limits_sel; unsigned long *allocated_ws_nodes; unsigned long *allocated_qps; unsigned long *allocated_cqs; unsigned long *allocated_mrs; unsigned long *allocated_pds; unsigned long *allocated_mcgs; unsigned long *allocated_ahs; unsigned long *allocated_arps; enum init_completion_state init_state; struct irdma_sc_dev sc_dev; struct irdma_dev_ctx dev_ctx; struct irdma_tunable_info tun_info; eventhandler_tag irdma_ifaddr_event; struct irdma_handler *hdl; struct pci_dev *pcidev; struct ice_rdma_peer *peer_info; struct irdma_hw hw; struct irdma_cqp cqp; struct irdma_ccq ccq; struct irdma_aeq aeq; struct irdma_ceq *ceqlist; struct irdma_hmc_pble_rsrc *pble_rsrc; struct irdma_arp_entry *arp_table; spinlock_t arp_lock; /*protect ARP table access*/ spinlock_t rsrc_lock; /* protect HW resource array access */ spinlock_t qptable_lock; /*protect QP table access*/ spinlock_t cqtable_lock; /*protect CQ table access*/ struct irdma_qp **qp_table; struct irdma_cq **cq_table; spinlock_t qh_list_lock; /* protect mc_qht_list */ struct mc_table_list mc_qht_list; struct irdma_msix_vector *iw_msixtbl; struct irdma_qvlist_info *iw_qvlist; struct tasklet_struct dpc_tasklet; struct msix_entry msix_info; struct irdma_dma_mem obj_mem; struct irdma_dma_mem obj_next; atomic_t vchnl_msgs; wait_queue_head_t vchnl_waitq; struct workqueue_struct *cqp_cmpl_wq; struct work_struct cqp_cmpl_work; struct irdma_sc_vsi default_vsi; void *back_fcn; struct irdma_gen_ops gen_ops; void (*check_fc)(struct irdma_sc_vsi *vsi, struct irdma_sc_qp *sc_qp); struct irdma_dcqcn_cc_params dcqcn_params; struct irdma_device *iwdev; }; struct irdma_device { struct ib_device ibdev; struct irdma_pci_f *rf; - struct ifnet *netdev; + if_t netdev; struct notifier_block nb_netdevice_event; struct irdma_handler *hdl; struct workqueue_struct *cleanup_wq; struct irdma_sc_vsi vsi; struct irdma_cm_core cm_core; u32 roce_cwnd; u32 roce_ackcreds; u32 vendor_id; u32 vendor_part_id; u32 push_mode; u32 rcv_wnd; u16 mac_ip_table_idx; u16 vsi_num; u8 rcv_wscale; u8 iw_status; u8 roce_rtomin; u8 rd_fence_rate; bool override_rcv_wnd:1; bool override_cwnd:1; bool override_ackcreds:1; bool override_ooo:1; bool override_rd_fence_rate:1; bool override_rtomin:1; bool roce_mode:1; bool roce_dcqcn_en:1; bool dcb_vlan_mode:1; bool iw_ooo:1; enum init_completion_state init_state; wait_queue_head_t suspend_wq; }; struct irdma_handler { struct list_head list; struct irdma_device *iwdev; struct task deferred_task; struct taskqueue *deferred_tq; bool shared_res_created; }; static inline struct irdma_device *to_iwdev(struct ib_device *ibdev) { return container_of(ibdev, struct irdma_device, ibdev); } static inline struct irdma_ucontext *to_ucontext(struct ib_ucontext *ibucontext) { return container_of(ibucontext, struct irdma_ucontext, ibucontext); } #if __FreeBSD_version >= 1400026 static inline struct irdma_user_mmap_entry * to_irdma_mmap_entry(struct rdma_user_mmap_entry *rdma_entry) { return container_of(rdma_entry, struct irdma_user_mmap_entry, rdma_entry); } #endif static inline struct irdma_pd *to_iwpd(struct ib_pd *ibpd) { return container_of(ibpd, struct irdma_pd, ibpd); } static inline struct irdma_ah *to_iwah(struct ib_ah *ibah) { return container_of(ibah, struct irdma_ah, ibah); } static inline struct irdma_mr *to_iwmr(struct ib_mr *ibmr) { return container_of(ibmr, struct irdma_mr, ibmr); } static inline struct irdma_mr *to_iwmw(struct ib_mw *ibmw) { return container_of(ibmw, struct irdma_mr, ibmw); } static inline struct irdma_cq *to_iwcq(struct ib_cq *ibcq) { return container_of(ibcq, struct irdma_cq, ibcq); } static inline struct irdma_qp *to_iwqp(struct ib_qp *ibqp) { return container_of(ibqp, struct irdma_qp, ibqp); } static inline struct irdma_pci_f *dev_to_rf(struct irdma_sc_dev *dev) { return container_of(dev, struct irdma_pci_f, sc_dev); } /** * irdma_alloc_resource - allocate a resource * @iwdev: device pointer * @resource_array: resource bit array: * @max_resources: maximum resource number * @req_resources_num: Allocated resource number * @next: next free id **/ static inline int irdma_alloc_rsrc(struct irdma_pci_f *rf, unsigned long *rsrc_array, u32 max_rsrc, u32 *req_rsrc_num, u32 *next) { u32 rsrc_num; unsigned long flags; spin_lock_irqsave(&rf->rsrc_lock, flags); rsrc_num = find_next_zero_bit(rsrc_array, max_rsrc, *next); if (rsrc_num >= max_rsrc) { rsrc_num = find_first_zero_bit(rsrc_array, max_rsrc); if (rsrc_num >= max_rsrc) { spin_unlock_irqrestore(&rf->rsrc_lock, flags); irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR, "resource [%d] allocation failed\n", rsrc_num); return -EOVERFLOW; } } __set_bit(rsrc_num, rsrc_array); *next = rsrc_num + 1; if (*next == max_rsrc) *next = 0; *req_rsrc_num = rsrc_num; spin_unlock_irqrestore(&rf->rsrc_lock, flags); return 0; } /** * irdma_free_resource - free a resource * @iwdev: device pointer * @resource_array: resource array for the resource_num * @resource_num: resource number to free **/ static inline void irdma_free_rsrc(struct irdma_pci_f *rf, unsigned long *rsrc_array, u32 rsrc_num) { unsigned long flags; spin_lock_irqsave(&rf->rsrc_lock, flags); __clear_bit(rsrc_num, rsrc_array); spin_unlock_irqrestore(&rf->rsrc_lock, flags); } int irdma_ctrl_init_hw(struct irdma_pci_f *rf); void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf); int irdma_rt_init_hw(struct irdma_device *iwdev, struct irdma_l2params *l2params); void irdma_rt_deinit_hw(struct irdma_device *iwdev); void irdma_qp_add_ref(struct ib_qp *ibqp); void irdma_qp_rem_ref(struct ib_qp *ibqp); void irdma_free_lsmm_rsrc(struct irdma_qp *iwqp); struct ib_qp *irdma_get_qp(struct ib_device *ibdev, int qpn); void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask); void irdma_manage_arp_cache(struct irdma_pci_f *rf, const unsigned char *mac_addr, u32 *ip_addr, u32 action); struct irdma_apbvt_entry *irdma_add_apbvt(struct irdma_device *iwdev, u16 port); void irdma_del_apbvt(struct irdma_device *iwdev, struct irdma_apbvt_entry *entry); struct irdma_cqp_request *irdma_alloc_and_get_cqp_request(struct irdma_cqp *cqp, bool wait); void irdma_free_cqp_request(struct irdma_cqp *cqp, struct irdma_cqp_request *cqp_request); void irdma_put_cqp_request(struct irdma_cqp *cqp, struct irdma_cqp_request *cqp_request); int irdma_alloc_local_mac_entry(struct irdma_pci_f *rf, u16 *mac_tbl_idx); int irdma_add_local_mac_entry(struct irdma_pci_f *rf, const u8 *mac_addr, u16 idx); void irdma_del_local_mac_entry(struct irdma_pci_f *rf, u16 idx); u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf); void irdma_port_ibevent(struct irdma_device *iwdev); void irdma_cm_disconn(struct irdma_qp *qp); bool irdma_cqp_crit_err(struct irdma_sc_dev *dev, u8 cqp_cmd, u16 maj_err_code, u16 min_err_code); int irdma_handle_cqp_op(struct irdma_pci_f *rf, struct irdma_cqp_request *cqp_request); int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); void irdma_cq_add_ref(struct ib_cq *ibcq); void irdma_cq_rem_ref(struct ib_cq *ibcq); void irdma_cq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_cq *cq); void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf); int irdma_hw_modify_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, struct irdma_modify_qp_info *info, bool wait); int irdma_qp_suspend_resume(struct irdma_sc_qp *qp, bool suspend); int irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo, enum irdma_quad_entry_type etype, enum irdma_quad_hash_manage_type mtype, void *cmnode, bool wait); void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf); void irdma_free_sqbuf(struct irdma_sc_vsi *vsi, void *bufp); void irdma_free_qp_rsrc(struct irdma_qp *iwqp); int irdma_setup_cm_core(struct irdma_device *iwdev, u8 ver); void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core); void irdma_next_iw_state(struct irdma_qp *iwqp, u8 state, u8 del_hash, u8 term, u8 term_len); int irdma_send_syn(struct irdma_cm_node *cm_node, u32 sendack); int irdma_send_reset(struct irdma_cm_node *cm_node); struct irdma_cm_node *irdma_find_node(struct irdma_cm_core *cm_core, u16 rem_port, u32 *rem_addr, u16 loc_port, u32 *loc_addr, u16 vlan_id); int irdma_hw_flush_wqes(struct irdma_pci_f *rf, struct irdma_sc_qp *qp, struct irdma_qp_flush_info *info, bool wait); void irdma_gen_ae(struct irdma_pci_f *rf, struct irdma_sc_qp *qp, struct irdma_gen_ae_info *info, bool wait); void irdma_copy_ip_ntohl(u32 *dst, __be32 *src); void irdma_copy_ip_htonl(__be32 *dst, u32 *src); u16 irdma_get_vlan_ipv4(u32 *addr); -struct ifnet *irdma_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac); +if_t irdma_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac); struct ib_mr *irdma_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size, int acc, u64 *iova_start); int irdma_upload_qp_context(struct irdma_qp *iwqp, bool freeze, bool raw); void irdma_del_hmc_objects(struct irdma_sc_dev *dev, struct irdma_hmc_info *hmc_info, bool privileged, bool reset, enum irdma_vers vers); void irdma_cqp_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq); int irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd, bool wait, void (*callback_fcn)(struct irdma_cqp_request *cqp_request), void *cb_param); void irdma_gsi_ud_qp_ah_cb(struct irdma_cqp_request *cqp_request); void irdma_udqp_qs_worker(struct work_struct *work); bool irdma_cq_empty(struct irdma_cq *iwcq); int irdma_netdevice_event(struct notifier_block *notifier, unsigned long event, void *ptr); void irdma_unregister_notifiers(struct irdma_device *iwdev); int irdma_register_notifiers(struct irdma_device *iwdev); void irdma_set_rf_user_cfg_params(struct irdma_pci_f *rf); void irdma_add_ip(struct irdma_device *iwdev); void irdma_add_handler(struct irdma_handler *hdl); void irdma_del_handler(struct irdma_handler *hdl); void cqp_compl_worker(struct work_struct *work); void irdma_cleanup_dead_qps(struct irdma_sc_vsi *vsi); #endif /* IRDMA_MAIN_H */ diff --git a/sys/dev/irdma/irdma_verbs.c b/sys/dev/irdma/irdma_verbs.c index c9aed90b2cb1..8aac819cf892 100644 --- a/sys/dev/irdma/irdma_verbs.c +++ b/sys/dev/irdma/irdma_verbs.c @@ -1,3761 +1,3761 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * * Copyright (c) 2015 - 2022 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /*$FreeBSD$*/ #include "irdma_main.h" /** * irdma_query_device - get device attributes * @ibdev: device pointer from stack * @props: returning device attributes * @udata: user data */ static int irdma_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *udata) { struct irdma_device *iwdev = to_iwdev(ibdev); struct irdma_pci_f *rf = iwdev->rf; struct pci_dev *pcidev = iwdev->rf->pcidev; struct irdma_hw_attrs *hw_attrs = &rf->sc_dev.hw_attrs; if (udata->inlen || udata->outlen) return -EINVAL; memset(props, 0, sizeof(*props)); addrconf_addr_eui48((u8 *)&props->sys_image_guid, - IF_LLADDR(iwdev->netdev)); + if_getlladdr(iwdev->netdev)); props->fw_ver = (u64)irdma_fw_major_ver(&rf->sc_dev) << 32 | irdma_fw_minor_ver(&rf->sc_dev); props->device_cap_flags = IB_DEVICE_MEM_WINDOW | IB_DEVICE_MEM_MGT_EXTENSIONS; props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; props->vendor_id = pcidev->vendor; props->vendor_part_id = pcidev->device; props->hw_ver = pcidev->revision; props->page_size_cap = hw_attrs->page_size_cap; props->max_mr_size = hw_attrs->max_mr_size; props->max_qp = rf->max_qp - rf->used_qps; props->max_qp_wr = hw_attrs->max_qp_wr; set_max_sge(props, rf); props->max_cq = rf->max_cq - rf->used_cqs; props->max_cqe = rf->max_cqe - 1; props->max_mr = rf->max_mr - rf->used_mrs; props->max_mw = props->max_mr; props->max_pd = rf->max_pd - rf->used_pds; props->max_sge_rd = hw_attrs->uk_attrs.max_hw_read_sges; props->max_qp_rd_atom = hw_attrs->max_hw_ird; props->max_qp_init_rd_atom = hw_attrs->max_hw_ord; if (rdma_protocol_roce(ibdev, 1)) { props->device_cap_flags |= IB_DEVICE_RC_RNR_NAK_GEN; props->max_pkeys = IRDMA_PKEY_TBL_SZ; props->max_ah = rf->max_ah; if (hw_attrs->uk_attrs.hw_rev == IRDMA_GEN_2) { props->max_mcast_grp = rf->max_mcg; props->max_mcast_qp_attach = IRDMA_MAX_MGS_PER_CTX; props->max_total_mcast_qp_attach = rf->max_qp * IRDMA_MAX_MGS_PER_CTX; } } props->max_fast_reg_page_list_len = IRDMA_MAX_PAGES_PER_FMR; if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_2) props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B; return 0; } static int irdma_mmap_legacy(struct irdma_ucontext *ucontext, struct vm_area_struct *vma) { u64 pfn; if (vma->vm_pgoff || vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; vma->vm_private_data = ucontext; pfn = ((uintptr_t)ucontext->iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET] + pci_resource_start(ucontext->iwdev->rf->pcidev, 0)) >> PAGE_SHIFT; #if __FreeBSD_version >= 1400026 return rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, PAGE_SIZE, pgprot_noncached(vma->vm_page_prot), NULL); #else return rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, PAGE_SIZE, pgprot_noncached(vma->vm_page_prot)); #endif } #if __FreeBSD_version >= 1400026 static void irdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry) { struct irdma_user_mmap_entry *entry = to_irdma_mmap_entry(rdma_entry); kfree(entry); } struct rdma_user_mmap_entry * irdma_user_mmap_entry_insert(struct irdma_ucontext *ucontext, u64 bar_offset, enum irdma_mmap_flag mmap_flag, u64 *mmap_offset) { struct irdma_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); int ret; if (!entry) return NULL; entry->bar_offset = bar_offset; entry->mmap_flag = mmap_flag; ret = rdma_user_mmap_entry_insert(&ucontext->ibucontext, &entry->rdma_entry, PAGE_SIZE); if (ret) { kfree(entry); return NULL; } *mmap_offset = rdma_user_mmap_get_offset(&entry->rdma_entry); return &entry->rdma_entry; } #else static inline bool find_key_in_mmap_tbl(struct irdma_ucontext *ucontext, u64 key) { struct irdma_user_mmap_entry *entry; HASH_FOR_EACH_POSSIBLE(ucontext->mmap_hash_tbl, entry, hlist, key) { if (entry->pgoff_key == key) return true; } return false; } struct irdma_user_mmap_entry * irdma_user_mmap_entry_add_hash(struct irdma_ucontext *ucontext, u64 bar_offset, enum irdma_mmap_flag mmap_flag, u64 *mmap_offset) { struct irdma_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); unsigned long flags; int retry_cnt = 0; if (!entry) return NULL; entry->bar_offset = bar_offset; entry->mmap_flag = mmap_flag; entry->ucontext = ucontext; do { get_random_bytes(&entry->pgoff_key, sizeof(entry->pgoff_key)); /* The key is a page offset */ entry->pgoff_key >>= PAGE_SHIFT; /* In the event of a collision in the hash table, retry a new key */ spin_lock_irqsave(&ucontext->mmap_tbl_lock, flags); if (!find_key_in_mmap_tbl(ucontext, entry->pgoff_key)) { HASH_ADD(ucontext->mmap_hash_tbl, &entry->hlist, entry->pgoff_key); spin_unlock_irqrestore(&ucontext->mmap_tbl_lock, flags); goto hash_add_done; } spin_unlock_irqrestore(&ucontext->mmap_tbl_lock, flags); } while (retry_cnt++ < 10); irdma_debug(&ucontext->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "mmap table add failed: Cannot find a unique key\n"); kfree(entry); return NULL; hash_add_done: /* libc mmap uses a byte offset */ *mmap_offset = entry->pgoff_key << PAGE_SHIFT; return entry; } static struct irdma_user_mmap_entry * irdma_find_user_mmap_entry(struct irdma_ucontext *ucontext, struct vm_area_struct *vma) { struct irdma_user_mmap_entry *entry; unsigned long flags; if (vma->vm_end - vma->vm_start != PAGE_SIZE) return NULL; spin_lock_irqsave(&ucontext->mmap_tbl_lock, flags); HASH_FOR_EACH_POSSIBLE(ucontext->mmap_hash_tbl, entry, hlist, vma->vm_pgoff) { if (entry->pgoff_key == vma->vm_pgoff) { spin_unlock_irqrestore(&ucontext->mmap_tbl_lock, flags); return entry; } } spin_unlock_irqrestore(&ucontext->mmap_tbl_lock, flags); return NULL; } void irdma_user_mmap_entry_del_hash(struct irdma_user_mmap_entry *entry) { struct irdma_ucontext *ucontext; unsigned long flags; if (!entry) return; ucontext = entry->ucontext; spin_lock_irqsave(&ucontext->mmap_tbl_lock, flags); HASH_DEL(ucontext->mmap_hash_tbl, &entry->hlist); spin_unlock_irqrestore(&ucontext->mmap_tbl_lock, flags); kfree(entry); } #endif /** * irdma_mmap - user memory map * @context: context created during alloc * @vma: kernel info for user memory map */ static int irdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { #if __FreeBSD_version >= 1400026 struct rdma_user_mmap_entry *rdma_entry; #endif struct irdma_user_mmap_entry *entry; struct irdma_ucontext *ucontext; u64 pfn; int ret; ucontext = to_ucontext(context); /* Legacy support for libi40iw with hard-coded mmap key */ if (ucontext->legacy_mode) return irdma_mmap_legacy(ucontext, vma); #if __FreeBSD_version >= 1400026 rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma); if (!rdma_entry) { irdma_debug(&ucontext->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "pgoff[0x%lx] does not have valid entry\n", vma->vm_pgoff); return -EINVAL; } entry = to_irdma_mmap_entry(rdma_entry); #else entry = irdma_find_user_mmap_entry(ucontext, vma); if (!entry) { irdma_debug(&ucontext->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "pgoff[0x%lx] does not have valid entry\n", vma->vm_pgoff); return -EINVAL; } #endif irdma_debug(&ucontext->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "bar_offset [0x%lx] mmap_flag [%d]\n", entry->bar_offset, entry->mmap_flag); pfn = (entry->bar_offset + pci_resource_start(ucontext->iwdev->rf->pcidev, 0)) >> PAGE_SHIFT; switch (entry->mmap_flag) { case IRDMA_MMAP_IO_NC: #if __FreeBSD_version >= 1400026 ret = rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE, pgprot_noncached(vma->vm_page_prot), rdma_entry); #else ret = rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE, pgprot_noncached(vma->vm_page_prot)); #endif break; case IRDMA_MMAP_IO_WC: #if __FreeBSD_version >= 1400026 ret = rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot), rdma_entry); #else ret = rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot)); #endif break; default: ret = -EINVAL; } if (ret) irdma_debug(&ucontext->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "bar_offset [0x%lx] mmap_flag[%d] err[%d]\n", entry->bar_offset, entry->mmap_flag, ret); #if __FreeBSD_version >= 1400026 rdma_user_mmap_entry_put(rdma_entry); #endif return ret; } /** * irdma_alloc_push_page - allocate a push page for qp * @iwqp: qp pointer */ static void irdma_alloc_push_page(struct irdma_qp *iwqp) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_device *iwdev = iwqp->iwdev; struct irdma_sc_qp *qp = &iwqp->sc_qp; int status; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) return; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = IRDMA_OP_MANAGE_PUSH_PAGE; cqp_info->post_sq = 1; cqp_info->in.u.manage_push_page.info.push_idx = 0; cqp_info->in.u.manage_push_page.info.qs_handle = qp->vsi->qos[qp->user_pri].qs_handle; cqp_info->in.u.manage_push_page.info.free_page = 0; cqp_info->in.u.manage_push_page.info.push_page_type = 0; cqp_info->in.u.manage_push_page.cqp = &iwdev->rf->cqp.sc_cqp; cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); if (!status && cqp_request->compl_info.op_ret_val < iwdev->rf->sc_dev.hw_attrs.max_hw_device_pages) { qp->push_idx = cqp_request->compl_info.op_ret_val; qp->push_offset = 0; } irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); } /** * irdma_get_pbl - Retrieve pbl from a list given a virtual * address * @va: user virtual address * @pbl_list: pbl list to search in (QP's or CQ's) */ struct irdma_pbl * irdma_get_pbl(unsigned long va, struct list_head *pbl_list) { struct irdma_pbl *iwpbl; list_for_each_entry(iwpbl, pbl_list, list) { if (iwpbl->user_base == va) { list_del(&iwpbl->list); iwpbl->on_list = false; return iwpbl; } } return NULL; } /** * irdma_clean_cqes - clean cq entries for qp * @iwqp: qp ptr (user or kernel) * @iwcq: cq ptr */ void irdma_clean_cqes(struct irdma_qp *iwqp, struct irdma_cq *iwcq) { struct irdma_cq_uk *ukcq = &iwcq->sc_cq.cq_uk; unsigned long flags; spin_lock_irqsave(&iwcq->lock, flags); irdma_uk_clean_cq(&iwqp->sc_qp.qp_uk, ukcq); spin_unlock_irqrestore(&iwcq->lock, flags); } static u64 irdma_compute_push_wqe_offset(struct irdma_device *iwdev, u32 page_idx){ u64 bar_off = (uintptr_t)iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET]; if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) { /* skip over db page */ bar_off += IRDMA_HW_PAGE_SIZE; /* skip over reserved space */ bar_off += IRDMA_PF_BAR_RSVD; } /* push wqe page */ bar_off += (u64)page_idx * IRDMA_HW_PAGE_SIZE; return bar_off; } void irdma_remove_push_mmap_entries(struct irdma_qp *iwqp) { if (iwqp->push_db_mmap_entry) { #if __FreeBSD_version >= 1400026 rdma_user_mmap_entry_remove(iwqp->push_db_mmap_entry); #else irdma_user_mmap_entry_del_hash(iwqp->push_db_mmap_entry); #endif iwqp->push_db_mmap_entry = NULL; } if (iwqp->push_wqe_mmap_entry) { #if __FreeBSD_version >= 1400026 rdma_user_mmap_entry_remove(iwqp->push_wqe_mmap_entry); #else irdma_user_mmap_entry_del_hash(iwqp->push_wqe_mmap_entry); #endif iwqp->push_wqe_mmap_entry = NULL; } } static int irdma_setup_push_mmap_entries(struct irdma_ucontext *ucontext, struct irdma_qp *iwqp, u64 *push_wqe_mmap_key, u64 *push_db_mmap_key) { struct irdma_device *iwdev = ucontext->iwdev; u64 bar_off; WARN_ON_ONCE(iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_2); bar_off = irdma_compute_push_wqe_offset(iwdev, iwqp->sc_qp.push_idx); #if __FreeBSD_version >= 1400026 iwqp->push_wqe_mmap_entry = irdma_user_mmap_entry_insert(ucontext, bar_off, IRDMA_MMAP_IO_WC, push_wqe_mmap_key); #else iwqp->push_wqe_mmap_entry = irdma_user_mmap_entry_add_hash(ucontext, bar_off, IRDMA_MMAP_IO_WC, push_wqe_mmap_key); #endif if (!iwqp->push_wqe_mmap_entry) return -ENOMEM; /* push doorbell page */ bar_off += IRDMA_HW_PAGE_SIZE; #if __FreeBSD_version >= 1400026 iwqp->push_db_mmap_entry = irdma_user_mmap_entry_insert(ucontext, bar_off, IRDMA_MMAP_IO_NC, push_db_mmap_key); #else iwqp->push_db_mmap_entry = irdma_user_mmap_entry_add_hash(ucontext, bar_off, IRDMA_MMAP_IO_NC, push_db_mmap_key); #endif if (!iwqp->push_db_mmap_entry) { #if __FreeBSD_version >= 1400026 rdma_user_mmap_entry_remove(iwqp->push_wqe_mmap_entry); #else irdma_user_mmap_entry_del_hash(iwqp->push_wqe_mmap_entry); #endif return -ENOMEM; } return 0; } /** * irdma_setup_virt_qp - setup for allocation of virtual qp * @iwdev: irdma device * @iwqp: qp ptr * @init_info: initialize info to return */ void irdma_setup_virt_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, struct irdma_qp_init_info *init_info) { struct irdma_pbl *iwpbl = iwqp->iwpbl; struct irdma_qp_mr *qpmr = &iwpbl->qp_mr; iwqp->page = qpmr->sq_page; init_info->shadow_area_pa = qpmr->shadow; if (iwpbl->pbl_allocated) { init_info->virtual_map = true; init_info->sq_pa = qpmr->sq_pbl.idx; init_info->rq_pa = qpmr->rq_pbl.idx; } else { init_info->sq_pa = qpmr->sq_pbl.addr; init_info->rq_pa = qpmr->rq_pbl.addr; } } /** * irdma_setup_umode_qp - setup sq and rq size in user mode qp * @udata: user data * @iwdev: iwarp device * @iwqp: qp ptr (user or kernel) * @info: initialize info to return * @init_attr: Initial QP create attributes */ int irdma_setup_umode_qp(struct ib_udata *udata, struct irdma_device *iwdev, struct irdma_qp *iwqp, struct irdma_qp_init_info *info, struct ib_qp_init_attr *init_attr) { #if __FreeBSD_version >= 1400026 struct irdma_ucontext *ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); #else struct irdma_ucontext *ucontext = to_ucontext(iwqp->iwpd->ibpd.uobject->context); #endif struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; struct irdma_create_qp_req req = {0}; unsigned long flags; int ret; ret = ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen)); if (ret) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "ib_copy_from_data fail\n"); return ret; } iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx; iwqp->user_mode = 1; if (req.user_wqe_bufs) { info->qp_uk_init_info.legacy_mode = ucontext->legacy_mode; spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs, &ucontext->qp_reg_mem_list); spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); if (!iwqp->iwpbl) { ret = -ENODATA; irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "no pbl info\n"); return ret; } } if (!ucontext->use_raw_attrs) { /** * Maintain backward compat with older ABI which passes sq and * rq depth in quanta in cap.max_send_wr and cap.max_recv_wr. * There is no way to compute the correct value of * iwqp->max_send_wr/max_recv_wr in the kernel. */ iwqp->max_send_wr = init_attr->cap.max_send_wr; iwqp->max_recv_wr = init_attr->cap.max_recv_wr; ukinfo->sq_size = init_attr->cap.max_send_wr; ukinfo->rq_size = init_attr->cap.max_recv_wr; irdma_uk_calc_shift_wq(ukinfo, &ukinfo->sq_shift, &ukinfo->rq_shift); } else { ret = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth, &ukinfo->sq_shift); if (ret) return ret; ret = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth, &ukinfo->rq_shift); if (ret) return ret; iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift; iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift; ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift; ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift; } irdma_setup_virt_qp(iwdev, iwqp, info); return 0; } /** * irdma_setup_kmode_qp - setup initialization for kernel mode qp * @iwdev: iwarp device * @iwqp: qp ptr (user or kernel) * @info: initialize info to return * @init_attr: Initial QP create attributes */ int irdma_setup_kmode_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, struct irdma_qp_init_info *info, struct ib_qp_init_attr *init_attr) { struct irdma_dma_mem *mem = &iwqp->kqp.dma_mem; u32 size; int status; struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; status = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth, &ukinfo->sq_shift); if (status) return status; status = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth, &ukinfo->rq_shift); if (status) return status; iwqp->kqp.sq_wrid_mem = kcalloc(ukinfo->sq_depth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL); if (!iwqp->kqp.sq_wrid_mem) return -ENOMEM; iwqp->kqp.rq_wrid_mem = kcalloc(ukinfo->rq_depth, sizeof(*iwqp->kqp.rq_wrid_mem), GFP_KERNEL); if (!iwqp->kqp.rq_wrid_mem) { kfree(iwqp->kqp.sq_wrid_mem); iwqp->kqp.sq_wrid_mem = NULL; return -ENOMEM; } iwqp->kqp.sig_trk_mem = kcalloc(ukinfo->sq_depth, sizeof(u32), GFP_KERNEL); memset(iwqp->kqp.sig_trk_mem, 0, ukinfo->sq_depth * sizeof(u32)); if (!iwqp->kqp.sig_trk_mem) { kfree(iwqp->kqp.sq_wrid_mem); iwqp->kqp.sq_wrid_mem = NULL; kfree(iwqp->kqp.rq_wrid_mem); iwqp->kqp.rq_wrid_mem = NULL; return -ENOMEM; } ukinfo->sq_sigwrtrk_array = (void *)iwqp->kqp.sig_trk_mem; ukinfo->sq_wrtrk_array = iwqp->kqp.sq_wrid_mem; ukinfo->rq_wrid_array = iwqp->kqp.rq_wrid_mem; size = (ukinfo->sq_depth + ukinfo->rq_depth) * IRDMA_QP_WQE_MIN_SIZE; size += (IRDMA_SHADOW_AREA_SIZE << 3); mem->size = size; mem->va = irdma_allocate_dma_mem(&iwdev->rf->hw, mem, mem->size, 256); if (!mem->va) { kfree(iwqp->kqp.sq_wrid_mem); iwqp->kqp.sq_wrid_mem = NULL; kfree(iwqp->kqp.rq_wrid_mem); iwqp->kqp.rq_wrid_mem = NULL; return -ENOMEM; } ukinfo->sq = mem->va; info->sq_pa = mem->pa; ukinfo->rq = &ukinfo->sq[ukinfo->sq_depth]; info->rq_pa = info->sq_pa + (ukinfo->sq_depth * IRDMA_QP_WQE_MIN_SIZE); ukinfo->shadow_area = ukinfo->rq[ukinfo->rq_depth].elem; info->shadow_area_pa = info->rq_pa + (ukinfo->rq_depth * IRDMA_QP_WQE_MIN_SIZE); ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift; ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift; ukinfo->qp_id = iwqp->ibqp.qp_num; iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift; iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift; init_attr->cap.max_send_wr = iwqp->max_send_wr; init_attr->cap.max_recv_wr = iwqp->max_recv_wr; return 0; } int irdma_cqp_create_qp_cmd(struct irdma_qp *iwqp) { struct irdma_pci_f *rf = iwqp->iwdev->rf; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_create_qp_info *qp_info; int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; qp_info = &cqp_request->info.in.u.qp_create.info; memset(qp_info, 0, sizeof(*qp_info)); qp_info->mac_valid = true; qp_info->cq_num_valid = true; qp_info->next_iwarp_state = IRDMA_QP_STATE_IDLE; cqp_info->cqp_cmd = IRDMA_OP_QP_CREATE; cqp_info->post_sq = 1; cqp_info->in.u.qp_create.qp = &iwqp->sc_qp; cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); return status; } void irdma_roce_fill_and_set_qpctx_info(struct irdma_qp *iwqp, struct irdma_qp_host_ctx_info *ctx_info) { struct irdma_device *iwdev = iwqp->iwdev; struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; struct irdma_roce_offload_info *roce_info; struct irdma_udp_offload_info *udp_info; udp_info = &iwqp->udp_info; udp_info->snd_mss = ib_mtu_enum_to_int(ib_mtu_int_to_enum(iwdev->vsi.mtu)); udp_info->cwnd = iwdev->roce_cwnd; udp_info->rexmit_thresh = 2; udp_info->rnr_nak_thresh = 2; udp_info->src_port = 0xc000; udp_info->dst_port = ROCE_V2_UDP_DPORT; roce_info = &iwqp->roce_info; - ether_addr_copy(roce_info->mac_addr, IF_LLADDR(iwdev->netdev)); + ether_addr_copy(roce_info->mac_addr, if_getlladdr(iwdev->netdev)); roce_info->rd_en = true; roce_info->wr_rdresp_en = true; roce_info->bind_en = true; roce_info->dcqcn_en = false; roce_info->rtomin = iwdev->roce_rtomin; roce_info->ack_credits = iwdev->roce_ackcreds; roce_info->ird_size = dev->hw_attrs.max_hw_ird; roce_info->ord_size = dev->hw_attrs.max_hw_ord; if (!iwqp->user_mode) { roce_info->priv_mode_en = true; roce_info->fast_reg_en = true; roce_info->udprivcq_en = true; } roce_info->roce_tver = 0; ctx_info->roce_info = &iwqp->roce_info; ctx_info->udp_info = &iwqp->udp_info; irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info); } void irdma_iw_fill_and_set_qpctx_info(struct irdma_qp *iwqp, struct irdma_qp_host_ctx_info *ctx_info) { struct irdma_device *iwdev = iwqp->iwdev; struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; struct irdma_iwarp_offload_info *iwarp_info; iwarp_info = &iwqp->iwarp_info; - ether_addr_copy(iwarp_info->mac_addr, IF_LLADDR(iwdev->netdev)); + ether_addr_copy(iwarp_info->mac_addr, if_getlladdr(iwdev->netdev)); iwarp_info->rd_en = true; iwarp_info->wr_rdresp_en = true; iwarp_info->bind_en = true; iwarp_info->ecn_en = true; iwarp_info->rtomin = 5; if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) iwarp_info->ib_rd_en = true; if (!iwqp->user_mode) { iwarp_info->priv_mode_en = true; iwarp_info->fast_reg_en = true; } iwarp_info->ddp_ver = 1; iwarp_info->rdmap_ver = 1; ctx_info->iwarp_info = &iwqp->iwarp_info; ctx_info->iwarp_info_valid = true; irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info); ctx_info->iwarp_info_valid = false; } int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr, struct irdma_device *iwdev) { struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs; if (init_attr->create_flags) return -EOPNOTSUPP; if (init_attr->cap.max_inline_data > uk_attrs->max_hw_inline || init_attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags || init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags) return -EINVAL; if (rdma_protocol_roce(&iwdev->ibdev, 1)) { if (init_attr->qp_type != IB_QPT_RC && init_attr->qp_type != IB_QPT_UD && init_attr->qp_type != IB_QPT_GSI) return -EOPNOTSUPP; } else { if (init_attr->qp_type != IB_QPT_RC) return -EOPNOTSUPP; } return 0; } void irdma_sched_qp_flush_work(struct irdma_qp *iwqp) { if (iwqp->sc_qp.qp_uk.destroy_pending) return; irdma_qp_add_ref(&iwqp->ibqp); if (mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush, msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS))) irdma_qp_rem_ref(&iwqp->ibqp); } void irdma_flush_worker(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); struct irdma_qp *iwqp = container_of(dwork, struct irdma_qp, dwork_flush); irdma_generate_flush_completions(iwqp); /* For the add in irdma_sched_qp_flush_work */ irdma_qp_rem_ref(&iwqp->ibqp); } static int irdma_get_ib_acc_flags(struct irdma_qp *iwqp) { int acc_flags = 0; if (rdma_protocol_roce(iwqp->ibqp.device, 1)) { if (iwqp->roce_info.wr_rdresp_en) { acc_flags |= IB_ACCESS_LOCAL_WRITE; acc_flags |= IB_ACCESS_REMOTE_WRITE; } if (iwqp->roce_info.rd_en) acc_flags |= IB_ACCESS_REMOTE_READ; if (iwqp->roce_info.bind_en) acc_flags |= IB_ACCESS_MW_BIND; } else { if (iwqp->iwarp_info.wr_rdresp_en) { acc_flags |= IB_ACCESS_LOCAL_WRITE; acc_flags |= IB_ACCESS_REMOTE_WRITE; } if (iwqp->iwarp_info.rd_en) acc_flags |= IB_ACCESS_REMOTE_READ; if (iwqp->iwarp_info.bind_en) acc_flags |= IB_ACCESS_MW_BIND; } return acc_flags; } /** * irdma_query_qp - query qp attributes * @ibqp: qp pointer * @attr: attributes pointer * @attr_mask: Not used * @init_attr: qp attributes to return */ static int irdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr) { struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_sc_qp *qp = &iwqp->sc_qp; memset(attr, 0, sizeof(*attr)); memset(init_attr, 0, sizeof(*init_attr)); attr->qp_state = iwqp->ibqp_state; attr->cur_qp_state = iwqp->ibqp_state; attr->cap.max_send_wr = iwqp->max_send_wr; attr->cap.max_recv_wr = iwqp->max_recv_wr; attr->cap.max_inline_data = qp->qp_uk.max_inline_data; attr->cap.max_send_sge = qp->qp_uk.max_sq_frag_cnt; attr->cap.max_recv_sge = qp->qp_uk.max_rq_frag_cnt; attr->qp_access_flags = irdma_get_ib_acc_flags(iwqp); attr->port_num = 1; if (rdma_protocol_roce(ibqp->device, 1)) { attr->path_mtu = ib_mtu_int_to_enum(iwqp->udp_info.snd_mss); attr->qkey = iwqp->roce_info.qkey; attr->rq_psn = iwqp->udp_info.epsn; attr->sq_psn = iwqp->udp_info.psn_nxt; attr->dest_qp_num = iwqp->roce_info.dest_qp; attr->pkey_index = iwqp->roce_info.p_key; attr->retry_cnt = iwqp->udp_info.rexmit_thresh; attr->rnr_retry = iwqp->udp_info.rnr_nak_thresh; attr->max_rd_atomic = iwqp->roce_info.ord_size; attr->max_dest_rd_atomic = iwqp->roce_info.ird_size; } init_attr->event_handler = iwqp->ibqp.event_handler; init_attr->qp_context = iwqp->ibqp.qp_context; init_attr->send_cq = iwqp->ibqp.send_cq; init_attr->recv_cq = iwqp->ibqp.recv_cq; init_attr->cap = attr->cap; return 0; } /** * irdma_modify_qp_roce - modify qp request * @ibqp: qp's pointer for modify * @attr: access attributes * @attr_mask: state mask * @udata: user data */ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { #define IRDMA_MODIFY_QP_MIN_REQ_LEN offsetofend(struct irdma_modify_qp_req, rq_flush) #define IRDMA_MODIFY_QP_MIN_RESP_LEN offsetofend(struct irdma_modify_qp_resp, push_valid) struct irdma_pd *iwpd = to_iwpd(ibqp->pd); struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_device *iwdev = iwqp->iwdev; struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; struct irdma_qp_host_ctx_info *ctx_info; struct irdma_roce_offload_info *roce_info; struct irdma_udp_offload_info *udp_info; struct irdma_modify_qp_info info = {0}; struct irdma_modify_qp_resp uresp = {}; struct irdma_modify_qp_req ureq; unsigned long flags; u8 issue_modify_qp = 0; int ret = 0; ctx_info = &iwqp->ctx_info; roce_info = &iwqp->roce_info; udp_info = &iwqp->udp_info; if (udata) { if ((udata->inlen && udata->inlen < IRDMA_MODIFY_QP_MIN_REQ_LEN) || (udata->outlen && udata->outlen < IRDMA_MODIFY_QP_MIN_RESP_LEN)) return -EINVAL; } if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) return -EOPNOTSUPP; if (attr_mask & IB_QP_DEST_QPN) roce_info->dest_qp = attr->dest_qp_num; if (attr_mask & IB_QP_PKEY_INDEX) { ret = irdma_query_pkey(ibqp->device, 0, attr->pkey_index, &roce_info->p_key); if (ret) return ret; } if (attr_mask & IB_QP_QKEY) roce_info->qkey = attr->qkey; if (attr_mask & IB_QP_PATH_MTU) udp_info->snd_mss = ib_mtu_enum_to_int(attr->path_mtu); if (attr_mask & IB_QP_SQ_PSN) { udp_info->psn_nxt = attr->sq_psn; udp_info->lsn = 0xffff; udp_info->psn_una = attr->sq_psn; udp_info->psn_max = attr->sq_psn; } if (attr_mask & IB_QP_RQ_PSN) udp_info->epsn = attr->rq_psn; if (attr_mask & IB_QP_RNR_RETRY) udp_info->rnr_nak_thresh = attr->rnr_retry; if (attr_mask & IB_QP_RETRY_CNT) udp_info->rexmit_thresh = attr->retry_cnt; ctx_info->roce_info->pd_id = iwpd->sc_pd.pd_id; if (attr_mask & IB_QP_AV) { struct irdma_av *av = &iwqp->roce_ah.av; u16 vlan_id = VLAN_N_VID; u32 local_ip[4] = {}; memset(&iwqp->roce_ah, 0, sizeof(iwqp->roce_ah)); if (attr->ah_attr.ah_flags & IB_AH_GRH) { udp_info->ttl = attr->ah_attr.grh.hop_limit; udp_info->flow_label = attr->ah_attr.grh.flow_label; udp_info->tos = attr->ah_attr.grh.traffic_class; udp_info->src_port = kc_rdma_get_udp_sport(udp_info->flow_label, ibqp->qp_num, roce_info->dest_qp); irdma_qp_rem_qos(&iwqp->sc_qp); dev->ws_remove(iwqp->sc_qp.vsi, ctx_info->user_pri); if (iwqp->sc_qp.vsi->dscp_mode) ctx_info->user_pri = iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(udp_info->tos)]; else ctx_info->user_pri = rt_tos2priority(udp_info->tos); } ret = kc_irdma_set_roce_cm_info(iwqp, attr, &vlan_id); if (ret) return ret; if (dev->ws_add(iwqp->sc_qp.vsi, ctx_info->user_pri)) return -ENOMEM; iwqp->sc_qp.user_pri = ctx_info->user_pri; irdma_qp_add_qos(&iwqp->sc_qp); if (vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode) vlan_id = 0; if (vlan_id < VLAN_N_VID) { udp_info->insert_vlan_tag = true; udp_info->vlan_tag = vlan_id | ctx_info->user_pri << VLAN_PRIO_SHIFT; } else { udp_info->insert_vlan_tag = false; } av->attrs = attr->ah_attr; rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &attr->ah_attr.grh.dgid); if (av->net_type == RDMA_NETWORK_IPV6) { __be32 *daddr = av->dgid_addr.saddr_in6.sin6_addr.__u6_addr.__u6_addr32; __be32 *saddr = av->sgid_addr.saddr_in6.sin6_addr.__u6_addr.__u6_addr32; irdma_copy_ip_ntohl(&udp_info->dest_ip_addr[0], daddr); irdma_copy_ip_ntohl(&udp_info->local_ipaddr[0], saddr); udp_info->ipv4 = false; irdma_copy_ip_ntohl(local_ip, daddr); } else if (av->net_type == RDMA_NETWORK_IPV4) { __be32 saddr = av->sgid_addr.saddr_in.sin_addr.s_addr; __be32 daddr = av->dgid_addr.saddr_in.sin_addr.s_addr; local_ip[0] = ntohl(daddr); udp_info->ipv4 = true; udp_info->dest_ip_addr[0] = 0; udp_info->dest_ip_addr[1] = 0; udp_info->dest_ip_addr[2] = 0; udp_info->dest_ip_addr[3] = local_ip[0]; udp_info->local_ipaddr[0] = 0; udp_info->local_ipaddr[1] = 0; udp_info->local_ipaddr[2] = 0; udp_info->local_ipaddr[3] = ntohl(saddr); } else { return -EINVAL; } udp_info->arp_idx = irdma_add_arp(iwdev->rf, local_ip, ah_attr_to_dmac(attr->ah_attr)); } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { if (attr->max_rd_atomic > dev->hw_attrs.max_hw_ord) { irdma_dev_err(&iwdev->ibdev, "rd_atomic = %d, above max_hw_ord=%d\n", attr->max_rd_atomic, dev->hw_attrs.max_hw_ord); return -EINVAL; } if (attr->max_rd_atomic) roce_info->ord_size = attr->max_rd_atomic; info.ord_valid = true; } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { if (attr->max_dest_rd_atomic > dev->hw_attrs.max_hw_ird) { irdma_dev_err(&iwdev->ibdev, "rd_atomic = %d, above max_hw_ird=%d\n", attr->max_rd_atomic, dev->hw_attrs.max_hw_ird); return -EINVAL; } if (attr->max_dest_rd_atomic) roce_info->ird_size = attr->max_dest_rd_atomic; } if (attr_mask & IB_QP_ACCESS_FLAGS) { if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE) roce_info->wr_rdresp_en = true; if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) roce_info->wr_rdresp_en = true; if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) roce_info->rd_en = true; } wait_event(iwqp->mod_qp_waitq, !atomic_read(&iwqp->hw_mod_qp_pend)); irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "caller: %pS qp_id=%d to_ibqpstate=%d ibqpstate=%d irdma_qpstate=%d attr_mask=0x%x\n", __builtin_return_address(0), ibqp->qp_num, attr->qp_state, iwqp->ibqp_state, iwqp->iwarp_state, attr_mask); spin_lock_irqsave(&iwqp->lock, flags); if (attr_mask & IB_QP_STATE) { if (!kc_ib_modify_qp_is_ok(iwqp->ibqp_state, attr->qp_state, iwqp->ibqp.qp_type, attr_mask, IB_LINK_LAYER_ETHERNET)) { irdma_dev_warn(&iwdev->ibdev, "modify_qp invalid for qp_id=%d, old_state=0x%x, new_state=0x%x\n", iwqp->ibqp.qp_num, iwqp->ibqp_state, attr->qp_state); ret = -EINVAL; goto exit; } info.curr_iwarp_state = iwqp->iwarp_state; switch (attr->qp_state) { case IB_QPS_INIT: if (iwqp->iwarp_state > IRDMA_QP_STATE_IDLE) { ret = -EINVAL; goto exit; } if (iwqp->iwarp_state == IRDMA_QP_STATE_INVALID) { info.next_iwarp_state = IRDMA_QP_STATE_IDLE; issue_modify_qp = 1; } break; case IB_QPS_RTR: if (iwqp->iwarp_state > IRDMA_QP_STATE_IDLE) { ret = -EINVAL; goto exit; } info.arp_cache_idx_valid = true; info.cq_num_valid = true; info.next_iwarp_state = IRDMA_QP_STATE_RTR; issue_modify_qp = 1; break; case IB_QPS_RTS: if (iwqp->ibqp_state < IB_QPS_RTR || iwqp->ibqp_state == IB_QPS_ERR) { ret = -EINVAL; goto exit; } info.arp_cache_idx_valid = true; info.cq_num_valid = true; info.ord_valid = true; info.next_iwarp_state = IRDMA_QP_STATE_RTS; issue_modify_qp = 1; if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) iwdev->rf->check_fc(&iwdev->vsi, &iwqp->sc_qp); udp_info->cwnd = iwdev->roce_cwnd; roce_info->ack_credits = iwdev->roce_ackcreds; if (iwdev->push_mode && udata && iwqp->sc_qp.push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { spin_unlock_irqrestore(&iwqp->lock, flags); irdma_alloc_push_page(iwqp); spin_lock_irqsave(&iwqp->lock, flags); } break; case IB_QPS_SQD: if (iwqp->iwarp_state == IRDMA_QP_STATE_SQD) goto exit; if (iwqp->iwarp_state != IRDMA_QP_STATE_RTS) { ret = -EINVAL; goto exit; } info.next_iwarp_state = IRDMA_QP_STATE_SQD; issue_modify_qp = 1; break; case IB_QPS_SQE: case IB_QPS_ERR: case IB_QPS_RESET: if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS) { if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) irdma_cqp_qp_suspend_resume(&iwqp->sc_qp, IRDMA_OP_SUSPEND); spin_unlock_irqrestore(&iwqp->lock, flags); info.next_iwarp_state = IRDMA_QP_STATE_SQD; irdma_hw_modify_qp(iwdev, iwqp, &info, true); spin_lock_irqsave(&iwqp->lock, flags); } if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) { spin_unlock_irqrestore(&iwqp->lock, flags); if (udata && udata->inlen) { if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen))) return -EINVAL; irdma_flush_wqes(iwqp, (ureq.sq_flush ? IRDMA_FLUSH_SQ : 0) | (ureq.rq_flush ? IRDMA_FLUSH_RQ : 0) | IRDMA_REFLUSH); } return 0; } info.next_iwarp_state = IRDMA_QP_STATE_ERROR; issue_modify_qp = 1; break; default: ret = -EINVAL; goto exit; } iwqp->ibqp_state = attr->qp_state; } ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id; ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id; irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info); spin_unlock_irqrestore(&iwqp->lock, flags); if (attr_mask & IB_QP_STATE) { if (issue_modify_qp) { ctx_info->rem_endpoint_idx = udp_info->arp_idx; if (irdma_hw_modify_qp(iwdev, iwqp, &info, true)) return -EINVAL; spin_lock_irqsave(&iwqp->lock, flags); if (iwqp->iwarp_state == info.curr_iwarp_state) { iwqp->iwarp_state = info.next_iwarp_state; iwqp->ibqp_state = attr->qp_state; } if (iwqp->ibqp_state > IB_QPS_RTS && !iwqp->flush_issued) { spin_unlock_irqrestore(&iwqp->lock, flags); irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_FLUSH_RQ | IRDMA_FLUSH_WAIT); iwqp->flush_issued = 1; } else { spin_unlock_irqrestore(&iwqp->lock, flags); } } else { iwqp->ibqp_state = attr->qp_state; } if (udata && udata->outlen && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { struct irdma_ucontext *ucontext; #if __FreeBSD_version >= 1400026 ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); #else ucontext = to_ucontext(ibqp->uobject->context); #endif if (iwqp->sc_qp.push_idx != IRDMA_INVALID_PUSH_PAGE_INDEX && !iwqp->push_wqe_mmap_entry && !irdma_setup_push_mmap_entries(ucontext, iwqp, &uresp.push_wqe_mmap_key, &uresp.push_db_mmap_key)) { uresp.push_valid = 1; uresp.push_offset = iwqp->sc_qp.push_offset; } uresp.rd_fence_rate = iwdev->rd_fence_rate; ret = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen)); if (ret) { irdma_remove_push_mmap_entries(iwqp); irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "copy_to_udata failed\n"); return ret; } } } return 0; exit: spin_unlock_irqrestore(&iwqp->lock, flags); return ret; } /** * irdma_modify_qp - modify qp request * @ibqp: qp's pointer for modify * @attr: access attributes * @attr_mask: state mask * @udata: user data */ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { #define IRDMA_MODIFY_QP_MIN_REQ_LEN offsetofend(struct irdma_modify_qp_req, rq_flush) #define IRDMA_MODIFY_QP_MIN_RESP_LEN offsetofend(struct irdma_modify_qp_resp, push_valid) struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_device *iwdev = iwqp->iwdev; struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; struct irdma_qp_host_ctx_info *ctx_info; struct irdma_tcp_offload_info *tcp_info; struct irdma_iwarp_offload_info *offload_info; struct irdma_modify_qp_info info = {0}; struct irdma_modify_qp_resp uresp = {}; struct irdma_modify_qp_req ureq = {}; u8 issue_modify_qp = 0; u8 dont_wait = 0; int err; unsigned long flags; if (udata) { if ((udata->inlen && udata->inlen < IRDMA_MODIFY_QP_MIN_REQ_LEN) || (udata->outlen && udata->outlen < IRDMA_MODIFY_QP_MIN_RESP_LEN)) return -EINVAL; } if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) return -EOPNOTSUPP; ctx_info = &iwqp->ctx_info; offload_info = &iwqp->iwarp_info; tcp_info = &iwqp->tcp_info; wait_event(iwqp->mod_qp_waitq, !atomic_read(&iwqp->hw_mod_qp_pend)); irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "caller: %pS qp_id=%d to_ibqpstate=%d ibqpstate=%d irdma_qpstate=%d last_aeq=%d hw_tcp_state=%d hw_iwarp_state=%d attr_mask=0x%x\n", __builtin_return_address(0), ibqp->qp_num, attr->qp_state, iwqp->ibqp_state, iwqp->iwarp_state, iwqp->last_aeq, iwqp->hw_tcp_state, iwqp->hw_iwarp_state, attr_mask); spin_lock_irqsave(&iwqp->lock, flags); if (attr_mask & IB_QP_STATE) { info.curr_iwarp_state = iwqp->iwarp_state; switch (attr->qp_state) { case IB_QPS_INIT: case IB_QPS_RTR: if (iwqp->iwarp_state > IRDMA_QP_STATE_IDLE) { err = -EINVAL; goto exit; } if (iwqp->iwarp_state == IRDMA_QP_STATE_INVALID) { info.next_iwarp_state = IRDMA_QP_STATE_IDLE; issue_modify_qp = 1; } if (iwdev->push_mode && udata && iwqp->sc_qp.push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { spin_unlock_irqrestore(&iwqp->lock, flags); irdma_alloc_push_page(iwqp); spin_lock_irqsave(&iwqp->lock, flags); } break; case IB_QPS_RTS: if (iwqp->iwarp_state > IRDMA_QP_STATE_RTS || !iwqp->cm_id) { err = -EINVAL; goto exit; } issue_modify_qp = 1; iwqp->hw_tcp_state = IRDMA_TCP_STATE_ESTABLISHED; iwqp->hte_added = 1; info.next_iwarp_state = IRDMA_QP_STATE_RTS; info.tcp_ctx_valid = true; info.ord_valid = true; info.arp_cache_idx_valid = true; info.cq_num_valid = true; break; case IB_QPS_SQD: if (iwqp->hw_iwarp_state > IRDMA_QP_STATE_RTS) { err = 0; goto exit; } if (iwqp->iwarp_state == IRDMA_QP_STATE_CLOSING || iwqp->iwarp_state < IRDMA_QP_STATE_RTS) { err = 0; goto exit; } if (iwqp->iwarp_state > IRDMA_QP_STATE_CLOSING) { err = -EINVAL; goto exit; } info.next_iwarp_state = IRDMA_QP_STATE_CLOSING; issue_modify_qp = 1; break; case IB_QPS_SQE: if (iwqp->iwarp_state >= IRDMA_QP_STATE_TERMINATE) { err = -EINVAL; goto exit; } info.next_iwarp_state = IRDMA_QP_STATE_TERMINATE; issue_modify_qp = 1; break; case IB_QPS_ERR: case IB_QPS_RESET: if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) { spin_unlock_irqrestore(&iwqp->lock, flags); if (udata && udata->inlen) { if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen))) return -EINVAL; irdma_flush_wqes(iwqp, (ureq.sq_flush ? IRDMA_FLUSH_SQ : 0) | (ureq.rq_flush ? IRDMA_FLUSH_RQ : 0) | IRDMA_REFLUSH); } return 0; } if (iwqp->sc_qp.term_flags) { spin_unlock_irqrestore(&iwqp->lock, flags); irdma_terminate_del_timer(&iwqp->sc_qp); spin_lock_irqsave(&iwqp->lock, flags); } info.next_iwarp_state = IRDMA_QP_STATE_ERROR; if (iwqp->hw_tcp_state > IRDMA_TCP_STATE_CLOSED && iwdev->iw_status && iwqp->hw_tcp_state != IRDMA_TCP_STATE_TIME_WAIT) info.reset_tcp_conn = true; else dont_wait = 1; issue_modify_qp = 1; info.next_iwarp_state = IRDMA_QP_STATE_ERROR; break; default: err = -EINVAL; goto exit; } iwqp->ibqp_state = attr->qp_state; } if (attr_mask & IB_QP_ACCESS_FLAGS) { ctx_info->iwarp_info_valid = true; if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE) offload_info->wr_rdresp_en = true; if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) offload_info->wr_rdresp_en = true; if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) offload_info->rd_en = true; } if (ctx_info->iwarp_info_valid) { ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id; ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id; irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info); } spin_unlock_irqrestore(&iwqp->lock, flags); if (attr_mask & IB_QP_STATE) { if (issue_modify_qp) { ctx_info->rem_endpoint_idx = tcp_info->arp_idx; if (irdma_hw_modify_qp(iwdev, iwqp, &info, true)) return -EINVAL; } spin_lock_irqsave(&iwqp->lock, flags); if (iwqp->iwarp_state == info.curr_iwarp_state) { iwqp->iwarp_state = info.next_iwarp_state; iwqp->ibqp_state = attr->qp_state; } spin_unlock_irqrestore(&iwqp->lock, flags); } if (issue_modify_qp && iwqp->ibqp_state > IB_QPS_RTS) { if (dont_wait) { if (iwqp->hw_tcp_state) { spin_lock_irqsave(&iwqp->lock, flags); iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED; iwqp->last_aeq = IRDMA_AE_RESET_SENT; spin_unlock_irqrestore(&iwqp->lock, flags); } irdma_cm_disconn(iwqp); } else { int close_timer_started; spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags); if (iwqp->cm_node) { atomic_inc(&iwqp->cm_node->refcnt); spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags); close_timer_started = atomic_inc_return(&iwqp->close_timer_started); if (iwqp->cm_id && close_timer_started == 1) irdma_schedule_cm_timer(iwqp->cm_node, (struct irdma_puda_buf *)iwqp, IRDMA_TIMER_TYPE_CLOSE, 1, 0); irdma_rem_ref_cm_node(iwqp->cm_node); } else { spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags); } } } if (attr_mask & IB_QP_STATE && udata && udata->outlen && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { struct irdma_ucontext *ucontext; #if __FreeBSD_version >= 1400026 ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); #else ucontext = to_ucontext(ibqp->uobject->context); #endif if (iwqp->sc_qp.push_idx != IRDMA_INVALID_PUSH_PAGE_INDEX && !iwqp->push_wqe_mmap_entry && !irdma_setup_push_mmap_entries(ucontext, iwqp, &uresp.push_wqe_mmap_key, &uresp.push_db_mmap_key)) { uresp.push_valid = 1; uresp.push_offset = iwqp->sc_qp.push_offset; } uresp.rd_fence_rate = iwdev->rd_fence_rate; err = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen)); if (err) { irdma_remove_push_mmap_entries(iwqp); irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "copy_to_udata failed\n"); return err; } } return 0; exit: spin_unlock_irqrestore(&iwqp->lock, flags); return err; } /** * irdma_cq_free_rsrc - free up resources for cq * @rf: RDMA PCI function * @iwcq: cq ptr */ void irdma_cq_free_rsrc(struct irdma_pci_f *rf, struct irdma_cq *iwcq) { struct irdma_sc_cq *cq = &iwcq->sc_cq; if (!iwcq->user_mode) { irdma_free_dma_mem(rf->sc_dev.hw, &iwcq->kmem); irdma_free_dma_mem(rf->sc_dev.hw, &iwcq->kmem_shadow); } irdma_free_rsrc(rf, rf->allocated_cqs, cq->cq_uk.cq_id); } /** * irdma_free_cqbuf - worker to free a cq buffer * @work: provides access to the cq buffer to free */ static void irdma_free_cqbuf(struct work_struct *work) { struct irdma_cq_buf *cq_buf = container_of(work, struct irdma_cq_buf, work); irdma_free_dma_mem(cq_buf->hw, &cq_buf->kmem_buf); kfree(cq_buf); } /** * irdma_process_resize_list - remove resized cq buffers from the resize_list * @iwcq: cq which owns the resize_list * @iwdev: irdma device * @lcqe_buf: the buffer where the last cqe is received */ int irdma_process_resize_list(struct irdma_cq *iwcq, struct irdma_device *iwdev, struct irdma_cq_buf *lcqe_buf) { struct list_head *tmp_node, *list_node; struct irdma_cq_buf *cq_buf; int cnt = 0; list_for_each_safe(list_node, tmp_node, &iwcq->resize_list) { cq_buf = list_entry(list_node, struct irdma_cq_buf, list); if (cq_buf == lcqe_buf) return cnt; list_del(&cq_buf->list); queue_work(iwdev->cleanup_wq, &cq_buf->work); cnt++; } return cnt; } /** * irdma_resize_cq - resize cq * @ibcq: cq to be resized * @entries: desired cq size * @udata: user data */ static int irdma_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) { #define IRDMA_RESIZE_CQ_MIN_REQ_LEN offsetofend(struct irdma_resize_cq_req, user_cq_buffer) struct irdma_cq *iwcq = to_iwcq(ibcq); struct irdma_sc_dev *dev = iwcq->sc_cq.dev; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_modify_cq_info *m_info; struct irdma_modify_cq_info info = {0}; struct irdma_dma_mem kmem_buf; struct irdma_cq_mr *cqmr_buf; struct irdma_pbl *iwpbl_buf; struct irdma_device *iwdev; struct irdma_pci_f *rf; struct irdma_cq_buf *cq_buf = NULL; unsigned long flags; int ret; iwdev = to_iwdev(ibcq->device); rf = iwdev->rf; if (!(rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_CQ_RESIZE)) return -EOPNOTSUPP; if (udata && udata->inlen < IRDMA_RESIZE_CQ_MIN_REQ_LEN) return -EINVAL; if (entries > rf->max_cqe) return -EINVAL; if (!iwcq->user_mode) { entries++; if (rf->sc_dev.hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) entries *= 2; } info.cq_size = max(entries, 4); if (info.cq_size == iwcq->sc_cq.cq_uk.cq_size - 1) return 0; if (udata) { struct irdma_resize_cq_req req = {}; struct irdma_ucontext *ucontext = #if __FreeBSD_version >= 1400026 rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); #else to_ucontext(ibcq->uobject->context); #endif /* CQ resize not supported with legacy GEN_1 libi40iw */ if (ucontext->legacy_mode) return -EOPNOTSUPP; if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) return -EINVAL; spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); iwpbl_buf = irdma_get_pbl((unsigned long)req.user_cq_buffer, &ucontext->cq_reg_mem_list); spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); if (!iwpbl_buf) return -ENOMEM; cqmr_buf = &iwpbl_buf->cq_mr; if (iwpbl_buf->pbl_allocated) { info.virtual_map = true; info.pbl_chunk_size = 1; info.first_pm_pbl_idx = cqmr_buf->cq_pbl.idx; } else { info.cq_pa = cqmr_buf->cq_pbl.addr; } } else { /* Kmode CQ resize */ int rsize; rsize = info.cq_size * sizeof(struct irdma_cqe); kmem_buf.size = round_up(rsize, 256); kmem_buf.va = irdma_allocate_dma_mem(dev->hw, &kmem_buf, kmem_buf.size, 256); if (!kmem_buf.va) return -ENOMEM; info.cq_base = kmem_buf.va; info.cq_pa = kmem_buf.pa; cq_buf = kzalloc(sizeof(*cq_buf), GFP_KERNEL); if (!cq_buf) { ret = -ENOMEM; goto error; } } cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) { ret = -ENOMEM; goto error; } info.shadow_read_threshold = iwcq->sc_cq.shadow_read_threshold; info.cq_resize = true; cqp_info = &cqp_request->info; m_info = &cqp_info->in.u.cq_modify.info; memcpy(m_info, &info, sizeof(*m_info)); cqp_info->cqp_cmd = IRDMA_OP_CQ_MODIFY; cqp_info->in.u.cq_modify.cq = &iwcq->sc_cq; cqp_info->in.u.cq_modify.scratch = (uintptr_t)cqp_request; cqp_info->post_sq = 1; ret = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); if (ret) goto error; spin_lock_irqsave(&iwcq->lock, flags); if (cq_buf) { cq_buf->kmem_buf = iwcq->kmem; cq_buf->hw = dev->hw; memcpy(&cq_buf->cq_uk, &iwcq->sc_cq.cq_uk, sizeof(cq_buf->cq_uk)); INIT_WORK(&cq_buf->work, irdma_free_cqbuf); list_add_tail(&cq_buf->list, &iwcq->resize_list); iwcq->kmem = kmem_buf; } irdma_sc_cq_resize(&iwcq->sc_cq, &info); ibcq->cqe = info.cq_size - 1; spin_unlock_irqrestore(&iwcq->lock, flags); return 0; error: if (!udata) irdma_free_dma_mem(dev->hw, &kmem_buf); kfree(cq_buf); return ret; } /** * irdma_get_mr_access - get hw MR access permissions from IB access flags * @access: IB access flags */ static inline u16 irdma_get_mr_access(int access){ u16 hw_access = 0; hw_access |= (access & IB_ACCESS_LOCAL_WRITE) ? IRDMA_ACCESS_FLAGS_LOCALWRITE : 0; hw_access |= (access & IB_ACCESS_REMOTE_WRITE) ? IRDMA_ACCESS_FLAGS_REMOTEWRITE : 0; hw_access |= (access & IB_ACCESS_REMOTE_READ) ? IRDMA_ACCESS_FLAGS_REMOTEREAD : 0; hw_access |= (access & IB_ACCESS_MW_BIND) ? IRDMA_ACCESS_FLAGS_BIND_WINDOW : 0; hw_access |= (access & IB_ZERO_BASED) ? IRDMA_ACCESS_FLAGS_ZERO_BASED : 0; hw_access |= IRDMA_ACCESS_FLAGS_LOCALREAD; return hw_access; } /** * irdma_free_stag - free stag resource * @iwdev: irdma device * @stag: stag to free */ void irdma_free_stag(struct irdma_device *iwdev, u32 stag) { u32 stag_idx; stag_idx = (stag & iwdev->rf->mr_stagmask) >> IRDMA_CQPSQ_STAG_IDX_S; irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_mrs, stag_idx); } /** * irdma_create_stag - create random stag * @iwdev: irdma device */ u32 irdma_create_stag(struct irdma_device *iwdev) { u32 stag; u32 stag_index = 0; u32 next_stag_index; u32 driver_key; u32 random; u8 consumer_key; int ret; get_random_bytes(&random, sizeof(random)); consumer_key = (u8)random; driver_key = random & ~iwdev->rf->mr_stagmask; next_stag_index = (random & iwdev->rf->mr_stagmask) >> 8; next_stag_index %= iwdev->rf->max_mr; ret = irdma_alloc_rsrc(iwdev->rf, iwdev->rf->allocated_mrs, iwdev->rf->max_mr, &stag_index, &next_stag_index); if (ret) return 0; stag = stag_index << IRDMA_CQPSQ_STAG_IDX_S; stag |= driver_key; stag += (u32)consumer_key; return stag; } /** * irdma_check_mem_contiguous - check if pbls stored in arr are contiguous * @arr: lvl1 pbl array * @npages: page count * @pg_size: page size * */ static bool irdma_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size) { u32 pg_idx; for (pg_idx = 0; pg_idx < npages; pg_idx++) { if ((*arr + (pg_size * pg_idx)) != arr[pg_idx]) return false; } return true; } /** * irdma_check_mr_contiguous - check if MR is physically contiguous * @palloc: pbl allocation struct * @pg_size: page size */ static bool irdma_check_mr_contiguous(struct irdma_pble_alloc *palloc, u32 pg_size) { struct irdma_pble_level2 *lvl2 = &palloc->level2; struct irdma_pble_info *leaf = lvl2->leaf; u64 *arr = NULL; u64 *start_addr = NULL; int i; bool ret; if (palloc->level == PBLE_LEVEL_1) { arr = palloc->level1.addr; ret = irdma_check_mem_contiguous(arr, palloc->total_cnt, pg_size); return ret; } start_addr = leaf->addr; for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) { arr = leaf->addr; if ((*start_addr + (i * pg_size * PBLE_PER_PAGE)) != *arr) return false; ret = irdma_check_mem_contiguous(arr, leaf->cnt, pg_size); if (!ret) return false; } return true; } /** * irdma_setup_pbles - copy user pg address to pble's * @rf: RDMA PCI function * @iwmr: mr pointer for this memory registration * @lvl: requested pble levels */ static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr, u8 lvl) { struct irdma_pbl *iwpbl = &iwmr->iwpbl; struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; struct irdma_pble_info *pinfo; u64 *pbl; int status; enum irdma_pble_level level = PBLE_LEVEL_1; if (lvl) { status = irdma_get_pble(rf->pble_rsrc, palloc, iwmr->page_cnt, lvl); if (status) return status; iwpbl->pbl_allocated = true; level = palloc->level; pinfo = (level == PBLE_LEVEL_1) ? &palloc->level1 : palloc->level2.leaf; pbl = pinfo->addr; } else { pbl = iwmr->pgaddrmem; } irdma_copy_user_pgaddrs(iwmr, pbl, level); if (lvl) iwmr->pgaddrmem[0] = *pbl; return 0; } /** * irdma_handle_q_mem - handle memory for qp and cq * @iwdev: irdma device * @req: information for q memory management * @iwpbl: pble struct * @lvl: pble level mask */ static int irdma_handle_q_mem(struct irdma_device *iwdev, struct irdma_mem_reg_req *req, struct irdma_pbl *iwpbl, u8 lvl) { struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; struct irdma_mr *iwmr = iwpbl->iwmr; struct irdma_qp_mr *qpmr = &iwpbl->qp_mr; struct irdma_cq_mr *cqmr = &iwpbl->cq_mr; struct irdma_hmc_pble *hmc_p; u64 *arr = iwmr->pgaddrmem; u32 pg_size, total; int err = 0; bool ret = true; pg_size = iwmr->page_size; err = irdma_setup_pbles(iwdev->rf, iwmr, lvl); if (err) return err; if (lvl) arr = palloc->level1.addr; switch (iwmr->type) { case IRDMA_MEMREG_TYPE_QP: total = req->sq_pages + req->rq_pages; hmc_p = &qpmr->sq_pbl; qpmr->shadow = (dma_addr_t) arr[total]; if (lvl) { ret = irdma_check_mem_contiguous(arr, req->sq_pages, pg_size); if (ret) ret = irdma_check_mem_contiguous(&arr[req->sq_pages], req->rq_pages, pg_size); } if (!ret) { hmc_p->idx = palloc->level1.idx; hmc_p = &qpmr->rq_pbl; hmc_p->idx = palloc->level1.idx + req->sq_pages; } else { hmc_p->addr = arr[0]; hmc_p = &qpmr->rq_pbl; hmc_p->addr = arr[req->sq_pages]; } break; case IRDMA_MEMREG_TYPE_CQ: hmc_p = &cqmr->cq_pbl; if (!cqmr->split) cqmr->shadow = (dma_addr_t) arr[req->cq_pages]; if (lvl) ret = irdma_check_mem_contiguous(arr, req->cq_pages, pg_size); if (!ret) hmc_p->idx = palloc->level1.idx; else hmc_p->addr = arr[0]; break; default: irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "MR type error\n"); err = -EINVAL; } if (lvl && ret) { irdma_free_pble(iwdev->rf->pble_rsrc, palloc); iwpbl->pbl_allocated = false; } return err; } /** * irdma_hw_alloc_mw - create the hw memory window * @iwdev: irdma device * @iwmr: pointer to memory window info */ int irdma_hw_alloc_mw(struct irdma_device *iwdev, struct irdma_mr *iwmr) { struct irdma_mw_alloc_info *info; struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd); struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; info = &cqp_info->in.u.mw_alloc.info; memset(info, 0, sizeof(*info)); if (iwmr->ibmw.type == IB_MW_TYPE_1) info->mw_wide = true; info->page_size = PAGE_SIZE; info->mw_stag_index = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S; info->pd_id = iwpd->sc_pd.pd_id; info->remote_access = true; cqp_info->cqp_cmd = IRDMA_OP_MW_ALLOC; cqp_info->post_sq = 1; cqp_info->in.u.mw_alloc.dev = &iwdev->rf->sc_dev; cqp_info->in.u.mw_alloc.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); return status; } /** * irdma_dealloc_mw - Dealloc memory window * @ibmw: memory window structure. */ static int irdma_dealloc_mw(struct ib_mw *ibmw) { struct ib_pd *ibpd = ibmw->pd; struct irdma_pd *iwpd = to_iwpd(ibpd); struct irdma_mr *iwmr = to_iwmr((struct ib_mr *)ibmw); struct irdma_device *iwdev = to_iwdev(ibmw->device); struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_dealloc_stag_info *info; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; info = &cqp_info->in.u.dealloc_stag.info; memset(info, 0, sizeof(*info)); info->pd_id = iwpd->sc_pd.pd_id; info->stag_idx = RS_64_1(ibmw->rkey, IRDMA_CQPSQ_STAG_IDX_S); info->mr = false; cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG; cqp_info->post_sq = 1; cqp_info->in.u.dealloc_stag.dev = &iwdev->rf->sc_dev; cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request; irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); irdma_free_stag(iwdev, iwmr->stag); kfree(iwmr); return 0; } /** * irdma_hw_alloc_stag - cqp command to allocate stag * @iwdev: irdma device * @iwmr: irdma mr pointer */ int irdma_hw_alloc_stag(struct irdma_device *iwdev, struct irdma_mr *iwmr) { struct irdma_allocate_stag_info *info; struct ib_pd *pd = iwmr->ibmr.pd; struct irdma_pd *iwpd = to_iwpd(pd); struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; info = &cqp_info->in.u.alloc_stag.info; memset(info, 0, sizeof(*info)); info->page_size = PAGE_SIZE; info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S; info->pd_id = iwpd->sc_pd.pd_id; info->total_len = iwmr->len; info->all_memory = (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) ? true : false; info->remote_access = true; cqp_info->cqp_cmd = IRDMA_OP_ALLOC_STAG; cqp_info->post_sq = 1; cqp_info->in.u.alloc_stag.dev = &iwdev->rf->sc_dev; cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); if (!status) iwmr->is_hwreg = 1; return status; } /** * irdma_set_page - populate pbl list for fmr * @ibmr: ib mem to access iwarp mr pointer * @addr: page dma address fro pbl list */ static int irdma_set_page(struct ib_mr *ibmr, u64 addr) { struct irdma_mr *iwmr = to_iwmr(ibmr); struct irdma_pbl *iwpbl = &iwmr->iwpbl; struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; u64 *pbl; if (unlikely(iwmr->npages == iwmr->page_cnt)) return -ENOMEM; if (palloc->level == PBLE_LEVEL_2) { struct irdma_pble_info *palloc_info = palloc->level2.leaf + (iwmr->npages >> PBLE_512_SHIFT); palloc_info->addr[iwmr->npages & (PBLE_PER_PAGE - 1)] = addr; } else { pbl = palloc->level1.addr; pbl[iwmr->npages] = addr; } iwmr->npages++; return 0; } /** * irdma_map_mr_sg - map of sg list for fmr * @ibmr: ib mem to access iwarp mr pointer * @sg: scatter gather list * @sg_nents: number of sg pages * @sg_offset: scatter gather list for fmr */ static int irdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { struct irdma_mr *iwmr = to_iwmr(ibmr); iwmr->npages = 0; return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, irdma_set_page); } /** * irdma_hwreg_mr - send cqp command for memory registration * @iwdev: irdma device * @iwmr: irdma mr pointer * @access: access for MR */ int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr, u16 access) { struct irdma_pbl *iwpbl = &iwmr->iwpbl; struct irdma_reg_ns_stag_info *stag_info; struct ib_pd *pd = iwmr->ibmr.pd; struct irdma_pd *iwpd = to_iwpd(pd); struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int ret; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; stag_info = &cqp_info->in.u.mr_reg_non_shared.info; memset(stag_info, 0, sizeof(*stag_info)); stag_info->va = iwpbl->user_base; stag_info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S; stag_info->stag_key = (u8)iwmr->stag; stag_info->total_len = iwmr->len; stag_info->all_memory = (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) ? true : false; stag_info->access_rights = irdma_get_mr_access(access); stag_info->pd_id = iwpd->sc_pd.pd_id; if (stag_info->access_rights & IRDMA_ACCESS_FLAGS_ZERO_BASED) stag_info->addr_type = IRDMA_ADDR_TYPE_ZERO_BASED; else stag_info->addr_type = IRDMA_ADDR_TYPE_VA_BASED; stag_info->page_size = iwmr->page_size; if (iwpbl->pbl_allocated) { if (palloc->level == PBLE_LEVEL_1) { stag_info->first_pm_pbl_index = palloc->level1.idx; stag_info->chunk_size = 1; } else { stag_info->first_pm_pbl_index = palloc->level2.root.idx; stag_info->chunk_size = 3; } } else { stag_info->reg_addr_pa = iwmr->pgaddrmem[0]; } cqp_info->cqp_cmd = IRDMA_OP_MR_REG_NON_SHARED; cqp_info->post_sq = 1; cqp_info->in.u.mr_reg_non_shared.dev = &iwdev->rf->sc_dev; cqp_info->in.u.mr_reg_non_shared.scratch = (uintptr_t)cqp_request; ret = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); if (!ret) iwmr->is_hwreg = 1; return ret; } /** * irdma_reg_user_mr - Register a user memory region * @pd: ptr of pd * @start: virtual start address * @len: length of mr * @virt: virtual address * @access: access of mr * @udata: user data */ static struct ib_mr * irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, u64 virt, int access, struct ib_udata *udata) { #define IRDMA_MEM_REG_MIN_REQ_LEN offsetofend(struct irdma_mem_reg_req, sq_pages) struct irdma_device *iwdev = to_iwdev(pd->device); struct irdma_ucontext *ucontext; struct irdma_pble_alloc *palloc; struct irdma_pbl *iwpbl; struct irdma_mr *iwmr; struct ib_umem *region; struct irdma_mem_reg_req req = {}; u32 total, stag = 0; u8 shadow_pgcnt = 1; unsigned long flags; int err = -EINVAL; u8 lvl; int ret; if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) return ERR_PTR(-EINVAL); if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) return ERR_PTR(-EINVAL); region = ib_umem_get(pd->uobject->context, start, len, access, 0); if (IS_ERR(region)) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "Failed to create ib_umem region\n"); return (struct ib_mr *)region; } if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) { ib_umem_release(region); return ERR_PTR(-EFAULT); } iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); if (!iwmr) { ib_umem_release(region); return ERR_PTR(-ENOMEM); } iwpbl = &iwmr->iwpbl; iwpbl->iwmr = iwmr; iwmr->region = region; iwmr->ibmr.pd = pd; iwmr->ibmr.device = pd->device; iwmr->ibmr.iova = virt; iwmr->page_size = IRDMA_HW_PAGE_SIZE; iwmr->page_msk = ~(IRDMA_HW_PAGE_SIZE - 1); iwmr->len = region->length; iwpbl->user_base = virt; palloc = &iwpbl->pble_alloc; iwmr->type = req.reg_type; iwmr->page_cnt = irdma_ib_umem_num_dma_blocks(region, iwmr->page_size, virt); switch (req.reg_type) { case IRDMA_MEMREG_TYPE_QP: total = req.sq_pages + req.rq_pages + shadow_pgcnt; if (total > iwmr->page_cnt) { err = -EINVAL; goto error; } total = req.sq_pages + req.rq_pages; lvl = total > 2 ? PBLE_LEVEL_1 : PBLE_LEVEL_0; err = irdma_handle_q_mem(iwdev, &req, iwpbl, lvl); if (err) goto error; #if __FreeBSD_version >= 1400026 ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); #else ucontext = to_ucontext(pd->uobject->context); #endif spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list); iwpbl->on_list = true; spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); break; case IRDMA_MEMREG_TYPE_CQ: if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_CQ_RESIZE) shadow_pgcnt = 0; total = req.cq_pages + shadow_pgcnt; if (total > iwmr->page_cnt) { err = -EINVAL; goto error; } lvl = req.cq_pages > 1 ? PBLE_LEVEL_1 : PBLE_LEVEL_0; err = irdma_handle_q_mem(iwdev, &req, iwpbl, lvl); if (err) goto error; #if __FreeBSD_version >= 1400026 ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); #else ucontext = to_ucontext(pd->uobject->context); #endif spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list); iwpbl->on_list = true; spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); break; case IRDMA_MEMREG_TYPE_MEM: lvl = iwmr->page_cnt != 1 ? PBLE_LEVEL_1 | PBLE_LEVEL_2 : PBLE_LEVEL_0; err = irdma_setup_pbles(iwdev->rf, iwmr, lvl); if (err) goto error; if (lvl) { ret = irdma_check_mr_contiguous(palloc, iwmr->page_size); if (ret) { irdma_free_pble(iwdev->rf->pble_rsrc, palloc); iwpbl->pbl_allocated = false; } } stag = irdma_create_stag(iwdev); if (!stag) { err = -ENOMEM; goto error; } iwmr->stag = stag; iwmr->ibmr.rkey = stag; iwmr->ibmr.lkey = stag; iwmr->access = access; err = irdma_hwreg_mr(iwdev, iwmr, access); if (err) { irdma_free_stag(iwdev, stag); goto error; } break; default: goto error; } iwmr->type = req.reg_type; return &iwmr->ibmr; error: if (palloc->level != PBLE_LEVEL_0 && iwpbl->pbl_allocated) irdma_free_pble(iwdev->rf->pble_rsrc, palloc); ib_umem_release(region); kfree(iwmr); return ERR_PTR(err); } int irdma_hwdereg_mr(struct ib_mr *ib_mr) { struct irdma_device *iwdev = to_iwdev(ib_mr->device); struct irdma_mr *iwmr = to_iwmr(ib_mr); struct irdma_pd *iwpd = to_iwpd(ib_mr->pd); struct irdma_dealloc_stag_info *info; struct irdma_pbl *iwpbl = &iwmr->iwpbl; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; /* * Skip HW MR de-register when it is already de-registered during an MR re-reregister and the re-registration * fails */ if (!iwmr->is_hwreg) return 0; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; info = &cqp_info->in.u.dealloc_stag.info; memset(info, 0, sizeof(*info)); info->pd_id = iwpd->sc_pd.pd_id; info->stag_idx = RS_64_1(ib_mr->rkey, IRDMA_CQPSQ_STAG_IDX_S); info->mr = true; if (iwpbl->pbl_allocated) info->dealloc_pbl = true; cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG; cqp_info->post_sq = 1; cqp_info->in.u.dealloc_stag.dev = &iwdev->rf->sc_dev; cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); if (!status) iwmr->is_hwreg = 0; return status; } /* * irdma_rereg_mr_trans - Re-register a user MR for a change translation. @iwmr: ptr of iwmr @start: virtual start * address @len: length of mr @virt: virtual address * * Re-register a user memory region when a change translation is requested. Re-register a new region while reusing the * stag from the original registration. */ struct ib_mr * irdma_rereg_mr_trans(struct irdma_mr *iwmr, u64 start, u64 len, u64 virt, struct ib_udata *udata) { struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device); struct irdma_pbl *iwpbl = &iwmr->iwpbl; struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; struct ib_pd *pd = iwmr->ibmr.pd; struct ib_umem *region; u8 lvl; int err; region = ib_umem_get(pd->uobject->context, start, len, iwmr->access, 0); if (IS_ERR(region)) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "Failed to create ib_umem region\n"); return (struct ib_mr *)region; } iwmr->region = region; iwmr->ibmr.iova = virt; iwmr->ibmr.pd = pd; iwmr->page_size = PAGE_SIZE; iwmr->len = region->length; iwpbl->user_base = virt; iwmr->page_cnt = irdma_ib_umem_num_dma_blocks(region, iwmr->page_size, virt); lvl = iwmr->page_cnt != 1 ? PBLE_LEVEL_1 | PBLE_LEVEL_2 : PBLE_LEVEL_0; err = irdma_setup_pbles(iwdev->rf, iwmr, lvl); if (err) goto error; if (lvl) { err = irdma_check_mr_contiguous(palloc, iwmr->page_size); if (err) { irdma_free_pble(iwdev->rf->pble_rsrc, palloc); iwpbl->pbl_allocated = false; } } err = irdma_hwreg_mr(iwdev, iwmr, iwmr->access); if (err) goto error; return &iwmr->ibmr; error: if (palloc->level != PBLE_LEVEL_0 && iwpbl->pbl_allocated) { irdma_free_pble(iwdev->rf->pble_rsrc, palloc); iwpbl->pbl_allocated = false; } ib_umem_release(region); iwmr->region = NULL; return ERR_PTR(err); } /** * irdma_reg_phys_mr - register kernel physical memory * @pd: ibpd pointer * @addr: physical address of memory to register * @size: size of memory to register * @access: Access rights * @iova_start: start of virtual address for physical buffers */ struct ib_mr * irdma_reg_phys_mr(struct ib_pd *pd, u64 addr, u64 size, int access, u64 *iova_start) { struct irdma_device *iwdev = to_iwdev(pd->device); struct irdma_pbl *iwpbl; struct irdma_mr *iwmr; u32 stag; int ret; iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); if (!iwmr) return ERR_PTR(-ENOMEM); iwmr->ibmr.pd = pd; iwmr->ibmr.device = pd->device; iwpbl = &iwmr->iwpbl; iwpbl->iwmr = iwmr; iwmr->type = IRDMA_MEMREG_TYPE_MEM; iwpbl->user_base = *iova_start; stag = irdma_create_stag(iwdev); if (!stag) { ret = -ENOMEM; goto err; } iwmr->stag = stag; iwmr->ibmr.iova = *iova_start; iwmr->ibmr.rkey = stag; iwmr->ibmr.lkey = stag; iwmr->page_cnt = 1; iwmr->pgaddrmem[0] = addr; iwmr->len = size; iwmr->page_size = SZ_4K; ret = irdma_hwreg_mr(iwdev, iwmr, access); if (ret) { irdma_free_stag(iwdev, stag); goto err; } return &iwmr->ibmr; err: kfree(iwmr); return ERR_PTR(ret); } /** * irdma_get_dma_mr - register physical mem * @pd: ptr of pd * @acc: access for memory */ static struct ib_mr * irdma_get_dma_mr(struct ib_pd *pd, int acc) { u64 kva = 0; return irdma_reg_phys_mr(pd, 0, 0, acc, &kva); } /** * irdma_del_memlist - Deleting pbl list entries for CQ/QP * @iwmr: iwmr for IB's user page addresses * @ucontext: ptr to user context */ void irdma_del_memlist(struct irdma_mr *iwmr, struct irdma_ucontext *ucontext) { struct irdma_pbl *iwpbl = &iwmr->iwpbl; unsigned long flags; switch (iwmr->type) { case IRDMA_MEMREG_TYPE_CQ: spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); if (iwpbl->on_list) { iwpbl->on_list = false; list_del(&iwpbl->list); } spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); break; case IRDMA_MEMREG_TYPE_QP: spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); if (iwpbl->on_list) { iwpbl->on_list = false; list_del(&iwpbl->list); } spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); break; default: break; } } /** * irdma_copy_sg_list - copy sg list for qp * @sg_list: copied into sg_list * @sgl: copy from sgl * @num_sges: count of sg entries */ static void irdma_copy_sg_list(struct irdma_sge *sg_list, struct ib_sge *sgl, int num_sges) { unsigned int i; for (i = 0; i < num_sges; i++) { sg_list[i].tag_off = sgl[i].addr; sg_list[i].len = sgl[i].length; sg_list[i].stag = sgl[i].lkey; } } /** * irdma_post_send - kernel application wr * @ibqp: qp ptr for wr * @ib_wr: work request ptr * @bad_wr: return of bad wr if err */ static int irdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *ib_wr, const struct ib_send_wr **bad_wr) { struct irdma_qp *iwqp; struct irdma_qp_uk *ukqp; struct irdma_sc_dev *dev; struct irdma_post_sq_info info; int err = 0; unsigned long flags; bool inv_stag; struct irdma_ah *ah; iwqp = to_iwqp(ibqp); ukqp = &iwqp->sc_qp.qp_uk; dev = &iwqp->iwdev->rf->sc_dev; spin_lock_irqsave(&iwqp->lock, flags); while (ib_wr) { memset(&info, 0, sizeof(info)); inv_stag = false; info.wr_id = (ib_wr->wr_id); if ((ib_wr->send_flags & IB_SEND_SIGNALED) || iwqp->sig_all) info.signaled = true; if (ib_wr->send_flags & IB_SEND_FENCE) info.read_fence = true; switch (ib_wr->opcode) { case IB_WR_SEND_WITH_IMM: if (ukqp->qp_caps & IRDMA_SEND_WITH_IMM) { info.imm_data_valid = true; info.imm_data = ntohl(ib_wr->ex.imm_data); } else { err = -EINVAL; break; } /* fallthrough */ case IB_WR_SEND: case IB_WR_SEND_WITH_INV: if (ib_wr->opcode == IB_WR_SEND || ib_wr->opcode == IB_WR_SEND_WITH_IMM) { if (ib_wr->send_flags & IB_SEND_SOLICITED) info.op_type = IRDMA_OP_TYPE_SEND_SOL; else info.op_type = IRDMA_OP_TYPE_SEND; } else { if (ib_wr->send_flags & IB_SEND_SOLICITED) info.op_type = IRDMA_OP_TYPE_SEND_SOL_INV; else info.op_type = IRDMA_OP_TYPE_SEND_INV; info.stag_to_inv = ib_wr->ex.invalidate_rkey; } info.op.send.num_sges = ib_wr->num_sge; info.op.send.sg_list = (struct irdma_sge *)ib_wr->sg_list; if (iwqp->ibqp.qp_type == IB_QPT_UD || iwqp->ibqp.qp_type == IB_QPT_GSI) { ah = to_iwah(ud_wr(ib_wr)->ah); info.op.send.ah_id = ah->sc_ah.ah_info.ah_idx; info.op.send.qkey = ud_wr(ib_wr)->remote_qkey; info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn; } if (ib_wr->send_flags & IB_SEND_INLINE) err = irdma_uk_inline_send(ukqp, &info, false); else err = irdma_uk_send(ukqp, &info, false); break; case IB_WR_RDMA_WRITE_WITH_IMM: if (ukqp->qp_caps & IRDMA_WRITE_WITH_IMM) { info.imm_data_valid = true; info.imm_data = ntohl(ib_wr->ex.imm_data); } else { err = -EINVAL; break; } /* fallthrough */ case IB_WR_RDMA_WRITE: if (ib_wr->send_flags & IB_SEND_SOLICITED) info.op_type = IRDMA_OP_TYPE_RDMA_WRITE_SOL; else info.op_type = IRDMA_OP_TYPE_RDMA_WRITE; info.op.rdma_write.num_lo_sges = ib_wr->num_sge; info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list; info.op.rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; info.op.rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey; if (ib_wr->send_flags & IB_SEND_INLINE) err = irdma_uk_inline_rdma_write(ukqp, &info, false); else err = irdma_uk_rdma_write(ukqp, &info, false); break; case IB_WR_RDMA_READ_WITH_INV: inv_stag = true; /* fallthrough */ case IB_WR_RDMA_READ: if (ib_wr->num_sge > dev->hw_attrs.uk_attrs.max_hw_read_sges) { err = -EINVAL; break; } info.op_type = IRDMA_OP_TYPE_RDMA_READ; info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey; info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list; info.op.rdma_read.num_lo_sges = ib_wr->num_sge; err = irdma_uk_rdma_read(ukqp, &info, inv_stag, false); break; case IB_WR_LOCAL_INV: info.op_type = IRDMA_OP_TYPE_INV_STAG; info.local_fence = info.read_fence; info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey; err = irdma_uk_stag_local_invalidate(ukqp, &info, true); break; case IB_WR_REG_MR:{ struct irdma_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr); struct irdma_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc; struct irdma_fast_reg_stag_info stag_info = {0}; stag_info.signaled = info.signaled; stag_info.read_fence = info.read_fence; stag_info.access_rights = irdma_get_mr_access(reg_wr(ib_wr)->access); stag_info.stag_key = reg_wr(ib_wr)->key & 0xff; stag_info.stag_idx = reg_wr(ib_wr)->key >> 8; stag_info.page_size = reg_wr(ib_wr)->mr->page_size; stag_info.wr_id = ib_wr->wr_id; stag_info.addr_type = IRDMA_ADDR_TYPE_VA_BASED; stag_info.va = (void *)(uintptr_t)iwmr->ibmr.iova; stag_info.total_len = iwmr->ibmr.length; if (palloc->level == PBLE_LEVEL_2) { stag_info.chunk_size = 3; stag_info.first_pm_pbl_index = palloc->level2.root.idx; } else { stag_info.chunk_size = 1; stag_info.first_pm_pbl_index = palloc->level1.idx; } stag_info.local_fence = ib_wr->send_flags & IB_SEND_FENCE; err = irdma_sc_mr_fast_register(&iwqp->sc_qp, &stag_info, true); break; } default: err = -EINVAL; irdma_debug(&iwqp->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "upost_send bad opcode = 0x%x\n", ib_wr->opcode); break; } if (err) break; ib_wr = ib_wr->next; } if (!iwqp->flush_issued) { if (iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS) irdma_uk_qp_post_wr(ukqp); spin_unlock_irqrestore(&iwqp->lock, flags); } else { spin_unlock_irqrestore(&iwqp->lock, flags); irdma_sched_qp_flush_work(iwqp); } if (err) *bad_wr = ib_wr; return err; } /** * irdma_post_recv - post receive wr for kernel application * @ibqp: ib qp pointer * @ib_wr: work request for receive * @bad_wr: bad wr caused an error */ static int irdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *ib_wr, const struct ib_recv_wr **bad_wr) { struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_qp_uk *ukqp = &iwqp->sc_qp.qp_uk; struct irdma_post_rq_info post_recv = {0}; struct irdma_sge *sg_list = iwqp->sg_list; unsigned long flags; int err = 0; spin_lock_irqsave(&iwqp->lock, flags); while (ib_wr) { if (ib_wr->num_sge > ukqp->max_rq_frag_cnt) { err = -EINVAL; goto out; } post_recv.num_sges = ib_wr->num_sge; post_recv.wr_id = ib_wr->wr_id; irdma_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge); post_recv.sg_list = sg_list; err = irdma_uk_post_receive(ukqp, &post_recv); if (err) { irdma_debug(&iwqp->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "post_recv err %d\n", err); goto out; } ib_wr = ib_wr->next; } out: spin_unlock_irqrestore(&iwqp->lock, flags); if (iwqp->flush_issued) irdma_sched_qp_flush_work(iwqp); if (err) *bad_wr = ib_wr; return err; } /** * irdma_flush_err_to_ib_wc_status - return change flush error code to IB status * @opcode: iwarp flush code */ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode opcode) { switch (opcode) { case FLUSH_PROT_ERR: return IB_WC_LOC_PROT_ERR; case FLUSH_REM_ACCESS_ERR: return IB_WC_REM_ACCESS_ERR; case FLUSH_LOC_QP_OP_ERR: return IB_WC_LOC_QP_OP_ERR; case FLUSH_REM_OP_ERR: return IB_WC_REM_OP_ERR; case FLUSH_LOC_LEN_ERR: return IB_WC_LOC_LEN_ERR; case FLUSH_GENERAL_ERR: return IB_WC_WR_FLUSH_ERR; case FLUSH_MW_BIND_ERR: return IB_WC_MW_BIND_ERR; case FLUSH_REM_INV_REQ_ERR: return IB_WC_REM_INV_REQ_ERR; case FLUSH_RETRY_EXC_ERR: return IB_WC_RETRY_EXC_ERR; case FLUSH_FATAL_ERR: default: return IB_WC_FATAL_ERR; } } static inline void set_ib_wc_op_sq(struct irdma_cq_poll_info *cq_poll_info, struct ib_wc *entry) { struct irdma_sc_qp *qp; switch (cq_poll_info->op_type) { case IRDMA_OP_TYPE_RDMA_WRITE: case IRDMA_OP_TYPE_RDMA_WRITE_SOL: entry->opcode = IB_WC_RDMA_WRITE; break; case IRDMA_OP_TYPE_RDMA_READ_INV_STAG: case IRDMA_OP_TYPE_RDMA_READ: entry->opcode = IB_WC_RDMA_READ; break; case IRDMA_OP_TYPE_SEND_SOL: case IRDMA_OP_TYPE_SEND_SOL_INV: case IRDMA_OP_TYPE_SEND_INV: case IRDMA_OP_TYPE_SEND: entry->opcode = IB_WC_SEND; break; case IRDMA_OP_TYPE_FAST_REG_NSMR: entry->opcode = IB_WC_REG_MR; break; case IRDMA_OP_TYPE_INV_STAG: entry->opcode = IB_WC_LOCAL_INV; break; default: qp = cq_poll_info->qp_handle; irdma_dev_err(to_ibdev(qp->dev), "Invalid opcode = %d in CQE\n", cq_poll_info->op_type); entry->status = IB_WC_GENERAL_ERR; } } static inline void set_ib_wc_op_rq(struct irdma_cq_poll_info *cq_poll_info, struct ib_wc *entry, bool send_imm_support) { /** * iWARP does not support sendImm, so the presence of Imm data * must be WriteImm. */ if (!send_imm_support) { entry->opcode = cq_poll_info->imm_valid ? IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; return; } switch (cq_poll_info->op_type) { case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: entry->opcode = IB_WC_RECV_RDMA_WITH_IMM; break; default: entry->opcode = IB_WC_RECV; } } /** * irdma_process_cqe - process cqe info * @entry: processed cqe * @cq_poll_info: cqe info */ static void irdma_process_cqe(struct ib_wc *entry, struct irdma_cq_poll_info *cq_poll_info) { struct irdma_sc_qp *qp; entry->wc_flags = 0; entry->pkey_index = 0; entry->wr_id = cq_poll_info->wr_id; qp = cq_poll_info->qp_handle; entry->qp = qp->qp_uk.back_qp; if (cq_poll_info->error) { entry->status = (cq_poll_info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ? irdma_flush_err_to_ib_wc_status(cq_poll_info->minor_err) : IB_WC_GENERAL_ERR; entry->vendor_err = cq_poll_info->major_err << 16 | cq_poll_info->minor_err; } else { entry->status = IB_WC_SUCCESS; if (cq_poll_info->imm_valid) { entry->ex.imm_data = htonl(cq_poll_info->imm_data); entry->wc_flags |= IB_WC_WITH_IMM; } if (cq_poll_info->ud_smac_valid) { ether_addr_copy(entry->smac, cq_poll_info->ud_smac); entry->wc_flags |= IB_WC_WITH_SMAC; } if (cq_poll_info->ud_vlan_valid) { u16 vlan = cq_poll_info->ud_vlan & EVL_VLID_MASK; entry->sl = cq_poll_info->ud_vlan >> VLAN_PRIO_SHIFT; if (vlan) { entry->vlan_id = vlan; entry->wc_flags |= IB_WC_WITH_VLAN; } } else { entry->sl = 0; } } if (cq_poll_info->q_type == IRDMA_CQE_QTYPE_SQ) { set_ib_wc_op_sq(cq_poll_info, entry); } else { set_ib_wc_op_rq(cq_poll_info, entry, qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM ? true : false); if (qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_UD && cq_poll_info->stag_invalid_set) { entry->ex.invalidate_rkey = cq_poll_info->inv_stag; entry->wc_flags |= IB_WC_WITH_INVALIDATE; } } if (qp->qp_uk.qp_type == IRDMA_QP_TYPE_ROCE_UD) { entry->src_qp = cq_poll_info->ud_src_qpn; entry->slid = 0; entry->wc_flags |= (IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE); entry->network_hdr_type = cq_poll_info->ipv4 ? RDMA_NETWORK_IPV4 : RDMA_NETWORK_IPV6; } else { entry->src_qp = cq_poll_info->qp_id; } entry->byte_len = cq_poll_info->bytes_xfered; } /** * irdma_poll_one - poll one entry of the CQ * @ukcq: ukcq to poll * @cur_cqe: current CQE info to be filled in * @entry: ibv_wc object to be filled for non-extended CQ or NULL for extended CQ * * Returns the internal irdma device error code or 0 on success */ static inline int irdma_poll_one(struct irdma_cq_uk *ukcq, struct irdma_cq_poll_info *cur_cqe, struct ib_wc *entry) { int ret = irdma_uk_cq_poll_cmpl(ukcq, cur_cqe); if (ret) return ret; irdma_process_cqe(entry, cur_cqe); return 0; } /** * __irdma_poll_cq - poll cq for completion (kernel apps) * @iwcq: cq to poll * @num_entries: number of entries to poll * @entry: wr of a completed entry */ static int __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc *entry) { struct list_head *tmp_node, *list_node; struct irdma_cq_buf *last_buf = NULL; struct irdma_cq_poll_info *cur_cqe = &iwcq->cur_cqe; struct irdma_cq_buf *cq_buf; int ret; struct irdma_device *iwdev; struct irdma_cq_uk *ukcq; bool cq_new_cqe = false; int resized_bufs = 0; int npolled = 0; iwdev = to_iwdev(iwcq->ibcq.device); ukcq = &iwcq->sc_cq.cq_uk; /* go through the list of previously resized CQ buffers */ list_for_each_safe(list_node, tmp_node, &iwcq->resize_list) { cq_buf = container_of(list_node, struct irdma_cq_buf, list); while (npolled < num_entries) { ret = irdma_poll_one(&cq_buf->cq_uk, cur_cqe, entry + npolled); if (!ret) { ++npolled; cq_new_cqe = true; continue; } if (ret == -ENOENT) break; /* QP using the CQ is destroyed. Skip reporting this CQE */ if (ret == -EFAULT) { cq_new_cqe = true; continue; } goto error; } /* save the resized CQ buffer which received the last cqe */ if (cq_new_cqe) last_buf = cq_buf; cq_new_cqe = false; } /* check the current CQ for new cqes */ while (npolled < num_entries) { ret = irdma_poll_one(ukcq, cur_cqe, entry + npolled); if (ret == -ENOENT) { ret = irdma_generated_cmpls(iwcq, cur_cqe); if (!ret) irdma_process_cqe(entry + npolled, cur_cqe); } if (!ret) { ++npolled; cq_new_cqe = true; continue; } if (ret == -ENOENT) break; /* QP using the CQ is destroyed. Skip reporting this CQE */ if (ret == -EFAULT) { cq_new_cqe = true; continue; } goto error; } if (cq_new_cqe) /* all previous CQ resizes are complete */ resized_bufs = irdma_process_resize_list(iwcq, iwdev, NULL); else if (last_buf) /* only CQ resizes up to the last_buf are complete */ resized_bufs = irdma_process_resize_list(iwcq, iwdev, last_buf); if (resized_bufs) /* report to the HW the number of complete CQ resizes */ irdma_uk_cq_set_resized_cnt(ukcq, resized_bufs); return npolled; error: irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "%s: Error polling CQ, irdma_err: %d\n", __func__, ret); return ret; } /** * irdma_poll_cq - poll cq for completion (kernel apps) * @ibcq: cq to poll * @num_entries: number of entries to poll * @entry: wr of a completed entry */ static int irdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) { struct irdma_cq *iwcq; unsigned long flags; int ret; iwcq = to_iwcq(ibcq); spin_lock_irqsave(&iwcq->lock, flags); ret = __irdma_poll_cq(iwcq, num_entries, entry); spin_unlock_irqrestore(&iwcq->lock, flags); return ret; } /** * irdma_req_notify_cq - arm cq kernel application * @ibcq: cq to arm * @notify_flags: notofication flags */ static int irdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) { struct irdma_cq *iwcq; struct irdma_cq_uk *ukcq; unsigned long flags; enum irdma_cmpl_notify cq_notify = IRDMA_CQ_COMPL_EVENT; bool promo_event = false; int ret = 0; iwcq = to_iwcq(ibcq); ukcq = &iwcq->sc_cq.cq_uk; spin_lock_irqsave(&iwcq->lock, flags); if (notify_flags == IB_CQ_SOLICITED) { cq_notify = IRDMA_CQ_COMPL_SOLICITED; } else { if (iwcq->last_notify == IRDMA_CQ_COMPL_SOLICITED) promo_event = true; } if (!atomic_cmpxchg(&iwcq->armed, 0, 1) || promo_event) { iwcq->last_notify = cq_notify; irdma_uk_cq_request_notification(ukcq, cq_notify); } if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && (!irdma_cq_empty(iwcq) || !list_empty(&iwcq->cmpl_generated))) ret = 1; spin_unlock_irqrestore(&iwcq->lock, flags); return ret; } /** * mcast_list_add - Add a new mcast item to list * @rf: RDMA PCI function * @new_elem: pointer to element to add */ static void mcast_list_add(struct irdma_pci_f *rf, struct mc_table_list *new_elem) { list_add(&new_elem->list, &rf->mc_qht_list.list); } /** * mcast_list_del - Remove an mcast item from list * @mc_qht_elem: pointer to mcast table list element */ static void mcast_list_del(struct mc_table_list *mc_qht_elem) { if (mc_qht_elem) list_del(&mc_qht_elem->list); } /** * mcast_list_lookup_ip - Search mcast list for address * @rf: RDMA PCI function * @ip_mcast: pointer to mcast IP address */ static struct mc_table_list * mcast_list_lookup_ip(struct irdma_pci_f *rf, u32 *ip_mcast) { struct mc_table_list *mc_qht_el; struct list_head *pos, *q; list_for_each_safe(pos, q, &rf->mc_qht_list.list) { mc_qht_el = list_entry(pos, struct mc_table_list, list); if (!memcmp(mc_qht_el->mc_info.dest_ip, ip_mcast, sizeof(mc_qht_el->mc_info.dest_ip))) return mc_qht_el; } return NULL; } /** * irdma_mcast_cqp_op - perform a mcast cqp operation * @iwdev: irdma device * @mc_grp_ctx: mcast group info * @op: operation * * returns error status */ static int irdma_mcast_cqp_op(struct irdma_device *iwdev, struct irdma_mcast_grp_info *mc_grp_ctx, u8 op) { struct cqp_cmds_info *cqp_info; struct irdma_cqp_request *cqp_request; int status; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_request->info.in.u.mc_create.info = *mc_grp_ctx; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = op; cqp_info->post_sq = 1; cqp_info->in.u.mc_create.scratch = (uintptr_t)cqp_request; cqp_info->in.u.mc_create.cqp = &iwdev->rf->cqp.sc_cqp; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); return status; } /** * irdma_attach_mcast - attach a qp to a multicast group * @ibqp: ptr to qp * @ibgid: pointer to global ID * @lid: local ID * * returns error status */ static int irdma_attach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid) { struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_device *iwdev = iwqp->iwdev; struct irdma_pci_f *rf = iwdev->rf; struct mc_table_list *mc_qht_elem; struct irdma_mcast_grp_ctx_entry_info mcg_info = {0}; unsigned long flags; u32 ip_addr[4] = {0}; u32 mgn; u32 no_mgs; int ret = 0; bool ipv4; u16 vlan_id; union { struct sockaddr saddr; struct sockaddr_in saddr_in; struct sockaddr_in6 saddr_in6; } sgid_addr; unsigned char dmac[ETH_ALEN]; rdma_gid2ip((struct sockaddr *)&sgid_addr, ibgid); if (!ipv6_addr_v4mapped((struct in6_addr *)ibgid)) { irdma_copy_ip_ntohl(ip_addr, sgid_addr.saddr_in6.sin6_addr.__u6_addr.__u6_addr32); irdma_netdev_vlan_ipv6(ip_addr, &vlan_id, NULL); ipv4 = false; irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "qp_id=%d, IP6address=%pI6\n", ibqp->qp_num, ip_addr); irdma_mcast_mac_v6(ip_addr, dmac); } else { ip_addr[0] = ntohl(sgid_addr.saddr_in.sin_addr.s_addr); ipv4 = true; vlan_id = irdma_get_vlan_ipv4(ip_addr); irdma_mcast_mac_v4(ip_addr, dmac); irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "qp_id=%d, IP4address=%pI4, MAC=%pM\n", ibqp->qp_num, ip_addr, dmac); } spin_lock_irqsave(&rf->qh_list_lock, flags); mc_qht_elem = mcast_list_lookup_ip(rf, ip_addr); if (!mc_qht_elem) { struct irdma_dma_mem *dma_mem_mc; spin_unlock_irqrestore(&rf->qh_list_lock, flags); mc_qht_elem = kzalloc(sizeof(*mc_qht_elem), GFP_KERNEL); if (!mc_qht_elem) return -ENOMEM; mc_qht_elem->mc_info.ipv4_valid = ipv4; memcpy(mc_qht_elem->mc_info.dest_ip, ip_addr, sizeof(mc_qht_elem->mc_info.dest_ip)); ret = irdma_alloc_rsrc(rf, rf->allocated_mcgs, rf->max_mcg, &mgn, &rf->next_mcg); if (ret) { kfree(mc_qht_elem); return -ENOMEM; } mc_qht_elem->mc_info.mgn = mgn; dma_mem_mc = &mc_qht_elem->mc_grp_ctx.dma_mem_mc; dma_mem_mc->size = sizeof(u64)* IRDMA_MAX_MGS_PER_CTX; dma_mem_mc->va = irdma_allocate_dma_mem(&rf->hw, dma_mem_mc, dma_mem_mc->size, IRDMA_HW_PAGE_SIZE); if (!dma_mem_mc->va) { irdma_free_rsrc(rf, rf->allocated_mcgs, mgn); kfree(mc_qht_elem); return -ENOMEM; } mc_qht_elem->mc_grp_ctx.mg_id = (u16)mgn; memcpy(mc_qht_elem->mc_grp_ctx.dest_ip_addr, ip_addr, sizeof(mc_qht_elem->mc_grp_ctx.dest_ip_addr)); mc_qht_elem->mc_grp_ctx.ipv4_valid = ipv4; mc_qht_elem->mc_grp_ctx.vlan_id = vlan_id; if (vlan_id < VLAN_N_VID) mc_qht_elem->mc_grp_ctx.vlan_valid = true; mc_qht_elem->mc_grp_ctx.hmc_fcn_id = iwdev->rf->sc_dev.hmc_fn_id; mc_qht_elem->mc_grp_ctx.qs_handle = iwqp->sc_qp.vsi->qos[iwqp->sc_qp.user_pri].qs_handle; ether_addr_copy(mc_qht_elem->mc_grp_ctx.dest_mac_addr, dmac); spin_lock_irqsave(&rf->qh_list_lock, flags); mcast_list_add(rf, mc_qht_elem); } else { if (mc_qht_elem->mc_grp_ctx.no_of_mgs == IRDMA_MAX_MGS_PER_CTX) { spin_unlock_irqrestore(&rf->qh_list_lock, flags); return -ENOMEM; } } mcg_info.qp_id = iwqp->ibqp.qp_num; no_mgs = mc_qht_elem->mc_grp_ctx.no_of_mgs; irdma_sc_add_mcast_grp(&mc_qht_elem->mc_grp_ctx, &mcg_info); spin_unlock_irqrestore(&rf->qh_list_lock, flags); /* Only if there is a change do we need to modify or create */ if (!no_mgs) { ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx, IRDMA_OP_MC_CREATE); } else if (no_mgs != mc_qht_elem->mc_grp_ctx.no_of_mgs) { ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx, IRDMA_OP_MC_MODIFY); } else { return 0; } if (ret) goto error; return 0; error: irdma_sc_del_mcast_grp(&mc_qht_elem->mc_grp_ctx, &mcg_info); if (!mc_qht_elem->mc_grp_ctx.no_of_mgs) { mcast_list_del(mc_qht_elem); irdma_free_dma_mem(&rf->hw, &mc_qht_elem->mc_grp_ctx.dma_mem_mc); irdma_free_rsrc(rf, rf->allocated_mcgs, mc_qht_elem->mc_grp_ctx.mg_id); kfree(mc_qht_elem); } return ret; } /** * irdma_detach_mcast - detach a qp from a multicast group * @ibqp: ptr to qp * @ibgid: pointer to global ID * @lid: local ID * * returns error status */ static int irdma_detach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid) { struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_device *iwdev = iwqp->iwdev; struct irdma_pci_f *rf = iwdev->rf; u32 ip_addr[4] = {0}; struct mc_table_list *mc_qht_elem; struct irdma_mcast_grp_ctx_entry_info mcg_info = {0}; int ret; unsigned long flags; union { struct sockaddr saddr; struct sockaddr_in saddr_in; struct sockaddr_in6 saddr_in6; } sgid_addr; rdma_gid2ip((struct sockaddr *)&sgid_addr, ibgid); if (!ipv6_addr_v4mapped((struct in6_addr *)ibgid)) irdma_copy_ip_ntohl(ip_addr, sgid_addr.saddr_in6.sin6_addr.__u6_addr.__u6_addr32); else ip_addr[0] = ntohl(sgid_addr.saddr_in.sin_addr.s_addr); spin_lock_irqsave(&rf->qh_list_lock, flags); mc_qht_elem = mcast_list_lookup_ip(rf, ip_addr); if (!mc_qht_elem) { spin_unlock_irqrestore(&rf->qh_list_lock, flags); irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "address not found MCG\n"); return 0; } mcg_info.qp_id = iwqp->ibqp.qp_num; irdma_sc_del_mcast_grp(&mc_qht_elem->mc_grp_ctx, &mcg_info); if (!mc_qht_elem->mc_grp_ctx.no_of_mgs) { mcast_list_del(mc_qht_elem); spin_unlock_irqrestore(&rf->qh_list_lock, flags); ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx, IRDMA_OP_MC_DESTROY); if (ret) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "failed MC_DESTROY MCG\n"); spin_lock_irqsave(&rf->qh_list_lock, flags); mcast_list_add(rf, mc_qht_elem); spin_unlock_irqrestore(&rf->qh_list_lock, flags); return -EAGAIN; } irdma_free_dma_mem(&rf->hw, &mc_qht_elem->mc_grp_ctx.dma_mem_mc); irdma_free_rsrc(rf, rf->allocated_mcgs, mc_qht_elem->mc_grp_ctx.mg_id); kfree(mc_qht_elem); } else { spin_unlock_irqrestore(&rf->qh_list_lock, flags); ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx, IRDMA_OP_MC_MODIFY); if (ret) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "failed Modify MCG\n"); return ret; } } return 0; } /** * irdma_query_ah - Query address handle * @ibah: pointer to address handle * @ah_attr: address handle attributes */ static int irdma_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { struct irdma_ah *ah = to_iwah(ibah); memset(ah_attr, 0, sizeof(*ah_attr)); if (ah->av.attrs.ah_flags & IB_AH_GRH) { ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.flow_label = ah->sc_ah.ah_info.flow_label; ah_attr->grh.traffic_class = ah->sc_ah.ah_info.tc_tos; ah_attr->grh.hop_limit = ah->sc_ah.ah_info.hop_ttl; ah_attr->grh.sgid_index = ah->sgid_index; ah_attr->grh.sgid_index = ah->sgid_index; memcpy(&ah_attr->grh.dgid, &ah->dgid, sizeof(ah_attr->grh.dgid)); } return 0; } -static struct ifnet * +static if_t irdma_get_netdev(struct ib_device *ibdev, u8 port_num) { struct irdma_device *iwdev = to_iwdev(ibdev); if (iwdev->netdev) { dev_hold(iwdev->netdev); return iwdev->netdev; } return NULL; } static void irdma_set_device_ops(struct ib_device *ibdev) { struct ib_device *dev_ops = ibdev; #if __FreeBSD_version >= 1400000 dev_ops->ops.driver_id = RDMA_DRIVER_I40IW; dev_ops->ops.size_ib_ah = IRDMA_SET_RDMA_OBJ_SIZE(ib_ah, irdma_ah, ibah); dev_ops->ops.size_ib_cq = IRDMA_SET_RDMA_OBJ_SIZE(ib_cq, irdma_cq, ibcq); dev_ops->ops.size_ib_pd = IRDMA_SET_RDMA_OBJ_SIZE(ib_pd, irdma_pd, ibpd); dev_ops->ops.size_ib_ucontext = IRDMA_SET_RDMA_OBJ_SIZE(ib_ucontext, irdma_ucontext, ibucontext); #endif /* __FreeBSD_version >= 1400000 */ dev_ops->alloc_hw_stats = irdma_alloc_hw_stats; dev_ops->alloc_mr = irdma_alloc_mr; dev_ops->alloc_mw = irdma_alloc_mw; dev_ops->alloc_pd = irdma_alloc_pd; dev_ops->alloc_ucontext = irdma_alloc_ucontext; dev_ops->create_cq = irdma_create_cq; dev_ops->create_qp = irdma_create_qp; dev_ops->dealloc_mw = irdma_dealloc_mw; dev_ops->dealloc_pd = irdma_dealloc_pd; dev_ops->dealloc_ucontext = irdma_dealloc_ucontext; dev_ops->dereg_mr = irdma_dereg_mr; dev_ops->destroy_cq = irdma_destroy_cq; dev_ops->destroy_qp = irdma_destroy_qp; dev_ops->disassociate_ucontext = irdma_disassociate_ucontext; dev_ops->get_dev_fw_str = irdma_get_dev_fw_str; dev_ops->get_dma_mr = irdma_get_dma_mr; dev_ops->get_hw_stats = irdma_get_hw_stats; dev_ops->get_netdev = irdma_get_netdev; dev_ops->map_mr_sg = irdma_map_mr_sg; dev_ops->mmap = irdma_mmap; #if __FreeBSD_version >= 1400026 dev_ops->mmap_free = irdma_mmap_free; #endif dev_ops->poll_cq = irdma_poll_cq; dev_ops->post_recv = irdma_post_recv; dev_ops->post_send = irdma_post_send; dev_ops->query_device = irdma_query_device; dev_ops->query_port = irdma_query_port; dev_ops->modify_port = irdma_modify_port; dev_ops->query_qp = irdma_query_qp; dev_ops->reg_user_mr = irdma_reg_user_mr; dev_ops->rereg_user_mr = irdma_rereg_user_mr; dev_ops->req_notify_cq = irdma_req_notify_cq; dev_ops->resize_cq = irdma_resize_cq; } static void irdma_set_device_mcast_ops(struct ib_device *ibdev) { struct ib_device *dev_ops = ibdev; dev_ops->attach_mcast = irdma_attach_mcast; dev_ops->detach_mcast = irdma_detach_mcast; } static void irdma_set_device_roce_ops(struct ib_device *ibdev) { struct ib_device *dev_ops = ibdev; dev_ops->create_ah = irdma_create_ah; dev_ops->destroy_ah = irdma_destroy_ah; dev_ops->get_link_layer = irdma_get_link_layer; dev_ops->get_port_immutable = irdma_roce_port_immutable; dev_ops->modify_qp = irdma_modify_qp_roce; dev_ops->query_ah = irdma_query_ah; dev_ops->query_gid = irdma_query_gid_roce; dev_ops->query_pkey = irdma_query_pkey; ibdev->add_gid = irdma_add_gid; ibdev->del_gid = irdma_del_gid; } static void irdma_set_device_iw_ops(struct ib_device *ibdev) { struct ib_device *dev_ops = ibdev; ibdev->uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_CREATE_AH) | (1ull << IB_USER_VERBS_CMD_DESTROY_AH); dev_ops->create_ah = irdma_create_ah_stub; dev_ops->destroy_ah = irdma_destroy_ah_stub; dev_ops->get_port_immutable = irdma_iw_port_immutable; dev_ops->modify_qp = irdma_modify_qp; dev_ops->query_gid = irdma_query_gid; dev_ops->query_pkey = irdma_iw_query_pkey; } static inline void irdma_set_device_gen1_ops(struct ib_device *ibdev) { } /** * irdma_init_roce_device - initialization of roce rdma device * @iwdev: irdma device */ static void irdma_init_roce_device(struct irdma_device *iwdev) { kc_set_roce_uverbs_cmd_mask(iwdev); iwdev->ibdev.node_type = RDMA_NODE_IB_CA; addrconf_addr_eui48((u8 *)&iwdev->ibdev.node_guid, - IF_LLADDR(iwdev->netdev)); + if_getlladdr(iwdev->netdev)); irdma_set_device_roce_ops(&iwdev->ibdev); if (iwdev->rf->rdma_ver == IRDMA_GEN_2) irdma_set_device_mcast_ops(&iwdev->ibdev); } /** * irdma_init_iw_device - initialization of iwarp rdma device * @iwdev: irdma device */ static int irdma_init_iw_device(struct irdma_device *iwdev) { - struct ifnet *netdev = iwdev->netdev; + if_t netdev = iwdev->netdev; iwdev->ibdev.node_type = RDMA_NODE_RNIC; addrconf_addr_eui48((u8 *)&iwdev->ibdev.node_guid, - IF_LLADDR(netdev)); + if_getlladdr(netdev)); iwdev->ibdev.iwcm = kzalloc(sizeof(*iwdev->ibdev.iwcm), GFP_KERNEL); if (!iwdev->ibdev.iwcm) return -ENOMEM; iwdev->ibdev.iwcm->add_ref = irdma_qp_add_ref; iwdev->ibdev.iwcm->rem_ref = irdma_qp_rem_ref; iwdev->ibdev.iwcm->get_qp = irdma_get_qp; iwdev->ibdev.iwcm->connect = irdma_connect; iwdev->ibdev.iwcm->accept = irdma_accept; iwdev->ibdev.iwcm->reject = irdma_reject; iwdev->ibdev.iwcm->create_listen = irdma_create_listen; iwdev->ibdev.iwcm->destroy_listen = irdma_destroy_listen; memcpy(iwdev->ibdev.iwcm->ifname, if_name(netdev), sizeof(iwdev->ibdev.iwcm->ifname)); irdma_set_device_iw_ops(&iwdev->ibdev); return 0; } /** * irdma_init_rdma_device - initialization of rdma device * @iwdev: irdma device */ static int irdma_init_rdma_device(struct irdma_device *iwdev) { int ret; iwdev->ibdev.owner = THIS_MODULE; iwdev->ibdev.uverbs_abi_ver = IRDMA_ABI_VER; kc_set_rdma_uverbs_cmd_mask(iwdev); if (iwdev->roce_mode) { irdma_init_roce_device(iwdev); } else { ret = irdma_init_iw_device(iwdev); if (ret) return ret; } iwdev->ibdev.phys_port_cnt = 1; iwdev->ibdev.num_comp_vectors = iwdev->rf->ceqs_count; iwdev->ibdev.dev.parent = iwdev->rf->dev_ctx.dev; set_ibdev_dma_device(iwdev->ibdev, &iwdev->rf->pcidev->dev); irdma_set_device_ops(&iwdev->ibdev); if (iwdev->rf->rdma_ver == IRDMA_GEN_1) irdma_set_device_gen1_ops(&iwdev->ibdev); return 0; } /** * irdma_port_ibevent - indicate port event * @iwdev: irdma device */ void irdma_port_ibevent(struct irdma_device *iwdev) { struct ib_event event; event.device = &iwdev->ibdev; event.element.port_num = 1; event.event = iwdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; ib_dispatch_event(&event); } /** * irdma_ib_unregister_device - unregister rdma device from IB * core * @iwdev: irdma device */ void irdma_ib_unregister_device(struct irdma_device *iwdev) { iwdev->iw_status = 0; irdma_port_ibevent(iwdev); ib_unregister_device(&iwdev->ibdev); dev_put(iwdev->netdev); kfree(iwdev->ibdev.iwcm); iwdev->ibdev.iwcm = NULL; } /** * irdma_ib_register_device - register irdma device to IB core * @iwdev: irdma device */ int irdma_ib_register_device(struct irdma_device *iwdev) { int ret; ret = irdma_init_rdma_device(iwdev); if (ret) return ret; dev_hold(iwdev->netdev); sprintf(iwdev->ibdev.name, "irdma-%s", if_name(iwdev->netdev)); ret = ib_register_device(&iwdev->ibdev, NULL); if (ret) goto error; iwdev->iw_status = 1; irdma_port_ibevent(iwdev); return 0; error: kfree(iwdev->ibdev.iwcm); iwdev->ibdev.iwcm = NULL; irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "Register RDMA device fail\n"); return ret; }