diff --git a/contrib/ofed/libmlx4/src/cq.c b/contrib/ofed/libmlx4/src/cq.c index eef1e0258253..ef01fcfdb151 100644 --- a/contrib/ofed/libmlx4/src/cq.c +++ b/contrib/ofed/libmlx4/src/cq.c @@ -1,500 +1,508 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #if HAVE_CONFIG_H # include #endif /* HAVE_CONFIG_H */ #include #include #include #include #include #include #include "mlx4.h" #include "doorbell.h" enum { MLX4_CQ_DOORBELL = 0x20 }; enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; #define MLX4_CQ_DB_REQ_NOT_SOL (1 << 24) #define MLX4_CQ_DB_REQ_NOT (2 << 24) enum { MLX4_CQE_OWNER_MASK = 0x80, MLX4_CQE_IS_SEND_MASK = 0x40, MLX4_CQE_OPCODE_MASK = 0x1f }; enum { MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR = 0x01, MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR = 0x02, MLX4_CQE_SYNDROME_LOCAL_PROT_ERR = 0x04, MLX4_CQE_SYNDROME_WR_FLUSH_ERR = 0x05, MLX4_CQE_SYNDROME_MW_BIND_ERR = 0x06, MLX4_CQE_SYNDROME_BAD_RESP_ERR = 0x10, MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR = 0x11, MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12, MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR = 0x13, MLX4_CQE_SYNDROME_REMOTE_OP_ERR = 0x14, MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR = 0x15, MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR = 0x16, MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR = 0x22, }; struct mlx4_cqe { uint32_t my_qpn; uint32_t immed_rss_invalid; uint32_t g_mlpath_rqpn; uint8_t sl; uint8_t reserved1; uint16_t rlid; uint32_t reserved2; uint32_t byte_cnt; uint16_t wqe_index; uint16_t checksum; uint8_t reserved3[3]; uint8_t owner_sr_opcode; }; struct mlx4_err_cqe { uint32_t my_qpn; uint32_t reserved1[5]; uint16_t wqe_index; uint8_t vendor_err; uint8_t syndrome; uint8_t reserved2[3]; uint8_t owner_sr_opcode; }; static struct mlx4_cqe *get_cqe(struct mlx4_cq *cq, int entry) { - return cq->buf.buf + entry * MLX4_CQ_ENTRY_SIZE; + return cq->buf.buf + entry * cq->cqe_size; } static void *get_sw_cqe(struct mlx4_cq *cq, int n) { struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibv_cq.cqe); + struct mlx4_cqe *tcqe = cq->cqe_size == 64 ? cqe + 1 : cqe; - return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ - !!(n & (cq->ibv_cq.cqe + 1))) ? NULL : cqe; + return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ + !!(n & (cq->ibv_cq.cqe + 1))) ? NULL : tcqe; } static struct mlx4_cqe *next_cqe_sw(struct mlx4_cq *cq) { return get_sw_cqe(cq, cq->cons_index); } static void update_cons_index(struct mlx4_cq *cq) { *cq->set_ci_db = htonl(cq->cons_index & 0xffffff); } static void mlx4_handle_error_cqe(struct mlx4_err_cqe *cqe, struct ibv_wc *wc) { if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) printf(PFX "local QP operation err " "(QPN %06x, WQE index %x, vendor syndrome %02x, " "opcode = %02x)\n", htonl(cqe->my_qpn), htonl(cqe->wqe_index), cqe->vendor_err, cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK); switch (cqe->syndrome) { case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR: wc->status = IBV_WC_LOC_LEN_ERR; break; case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR: wc->status = IBV_WC_LOC_QP_OP_ERR; break; case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR: wc->status = IBV_WC_LOC_PROT_ERR; break; case MLX4_CQE_SYNDROME_WR_FLUSH_ERR: wc->status = IBV_WC_WR_FLUSH_ERR; break; case MLX4_CQE_SYNDROME_MW_BIND_ERR: wc->status = IBV_WC_MW_BIND_ERR; break; case MLX4_CQE_SYNDROME_BAD_RESP_ERR: wc->status = IBV_WC_BAD_RESP_ERR; break; case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR: wc->status = IBV_WC_LOC_ACCESS_ERR; break; case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: wc->status = IBV_WC_REM_INV_REQ_ERR; break; case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR: wc->status = IBV_WC_REM_ACCESS_ERR; break; case MLX4_CQE_SYNDROME_REMOTE_OP_ERR: wc->status = IBV_WC_REM_OP_ERR; break; case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: wc->status = IBV_WC_RETRY_EXC_ERR; break; case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR: wc->status = IBV_WC_RNR_RETRY_EXC_ERR; break; case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR: wc->status = IBV_WC_REM_ABORT_ERR; break; default: wc->status = IBV_WC_GENERAL_ERR; break; } wc->vendor_err = cqe->vendor_err; } static int mlx4_poll_one(struct mlx4_cq *cq, struct mlx4_qp **cur_qp, struct ibv_wc *wc) { struct mlx4_wq *wq; struct mlx4_cqe *cqe; struct mlx4_srq *srq = NULL; uint32_t qpn; uint32_t srqn; uint32_t g_mlpath_rqpn; uint16_t wqe_index; int is_error; int is_send; cqe = next_cqe_sw(cq); if (!cqe) return CQ_EMPTY; ++cq->cons_index; VALGRIND_MAKE_MEM_DEFINED(cqe, sizeof *cqe); /* * Make sure we read CQ entry contents after we've checked the * ownership bit. */ rmb(); qpn = ntohl(cqe->my_qpn); is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK; is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR; if (qpn & MLX4_XRC_QPN_BIT && !is_send) { srqn = ntohl(cqe->g_mlpath_rqpn) & 0xffffff; /* * We do not have to take the XRC SRQ table lock here, * because CQs will be locked while XRC SRQs are removed * from the table. */ srq = mlx4_find_xrc_srq(to_mctx(cq->ibv_cq.context), srqn); if (!srq) return CQ_POLL_ERR; } else if (!*cur_qp || (qpn & 0xffffff) != (*cur_qp)->ibv_qp.qp_num) { /* * We do not have to take the QP table lock here, * because CQs will be locked while QPs are removed * from the table. */ *cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context), qpn & 0xffffff); if (!*cur_qp) return CQ_POLL_ERR; } wc->qp_num = qpn & 0xffffff; if (is_send) { wq = &(*cur_qp)->sq; wqe_index = ntohs(cqe->wqe_index); wq->tail += (uint16_t) (wqe_index - (uint16_t) wq->tail); wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++wq->tail; } else if (srq) { wqe_index = htons(cqe->wqe_index); wc->wr_id = srq->wrid[wqe_index]; mlx4_free_srq_wqe(srq, wqe_index); } else if ((*cur_qp)->ibv_qp.srq) { srq = to_msrq((*cur_qp)->ibv_qp.srq); wqe_index = htons(cqe->wqe_index); wc->wr_id = srq->wrid[wqe_index]; mlx4_free_srq_wqe(srq, wqe_index); } else { wq = &(*cur_qp)->rq; wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++wq->tail; } if (is_error) { mlx4_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc); return CQ_OK; } wc->status = IBV_WC_SUCCESS; if (is_send) { wc->wc_flags = 0; switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { case MLX4_OPCODE_RDMA_WRITE_IMM: wc->wc_flags |= IBV_WC_WITH_IMM; case MLX4_OPCODE_RDMA_WRITE: wc->opcode = IBV_WC_RDMA_WRITE; break; case MLX4_OPCODE_SEND_IMM: wc->wc_flags |= IBV_WC_WITH_IMM; case MLX4_OPCODE_SEND: wc->opcode = IBV_WC_SEND; break; case MLX4_OPCODE_RDMA_READ: wc->opcode = IBV_WC_RDMA_READ; wc->byte_len = ntohl(cqe->byte_cnt); break; case MLX4_OPCODE_ATOMIC_CS: wc->opcode = IBV_WC_COMP_SWAP; wc->byte_len = 8; break; case MLX4_OPCODE_ATOMIC_FA: wc->opcode = IBV_WC_FETCH_ADD; wc->byte_len = 8; break; case MLX4_OPCODE_BIND_MW: wc->opcode = IBV_WC_BIND_MW; break; default: /* assume it's a send completion */ wc->opcode = IBV_WC_SEND; break; } } else { wc->byte_len = ntohl(cqe->byte_cnt); switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: wc->opcode = IBV_WC_RECV_RDMA_WITH_IMM; wc->wc_flags = IBV_WC_WITH_IMM; wc->imm_data = cqe->immed_rss_invalid; break; case MLX4_RECV_OPCODE_SEND: wc->opcode = IBV_WC_RECV; wc->wc_flags = 0; break; case MLX4_RECV_OPCODE_SEND_IMM: wc->opcode = IBV_WC_RECV; wc->wc_flags = IBV_WC_WITH_IMM; wc->imm_data = cqe->immed_rss_invalid; break; } wc->slid = ntohs(cqe->rlid); wc->sl = cqe->sl >> 4; g_mlpath_rqpn = ntohl(cqe->g_mlpath_rqpn); wc->src_qp = g_mlpath_rqpn & 0xffffff; wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IBV_WC_GRH : 0; wc->pkey_index = ntohl(cqe->immed_rss_invalid) & 0x7f; } return CQ_OK; } int mlx4_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) { struct mlx4_cq *cq = to_mcq(ibcq); struct mlx4_qp *qp = NULL; int npolled; int err = CQ_OK; pthread_spin_lock(&cq->lock); for (npolled = 0; npolled < ne; ++npolled) { err = mlx4_poll_one(cq, &qp, wc + npolled); if (err != CQ_OK) break; } if (npolled) update_cons_index(cq); pthread_spin_unlock(&cq->lock); return err == CQ_POLL_ERR ? err : npolled; } int mlx4_arm_cq(struct ibv_cq *ibvcq, int solicited) { struct mlx4_cq *cq = to_mcq(ibvcq); uint32_t doorbell[2]; uint32_t sn; uint32_t ci; uint32_t cmd; sn = cq->arm_sn & 3; ci = cq->cons_index & 0xffffff; cmd = solicited ? MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT; *cq->arm_db = htonl(sn << 28 | cmd | ci); /* * Make sure that the doorbell record in host memory is * written before ringing the doorbell via PCI MMIO. */ wmb(); doorbell[0] = htonl(sn << 28 | cmd | cq->cqn); doorbell[1] = htonl(ci); mlx4_write64(doorbell, to_mctx(ibvcq->context), MLX4_CQ_DOORBELL); return 0; } void mlx4_cq_event(struct ibv_cq *cq) { to_mcq(cq)->arm_sn++; } void __mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq) { struct mlx4_cqe *cqe, *dest; uint32_t prod_index; uint8_t owner_bit; int nfreed = 0; int is_xrc_srq = 0; + int cqe_inc = cq->cqe_size == 64 ? 1 : 0; if (srq && srq->ibv_srq.xrc_cq) is_xrc_srq = 1; /* * First we need to find the current producer index, so we * know where to start cleaning from. It doesn't matter if HW * adds new entries after this loop -- the QP we're worried * about is already in RESET, so the new entries won't come * from our QP and therefore don't need to be checked. */ for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); ++prod_index) if (prod_index == cq->cons_index + cq->ibv_cq.cqe) break; /* * Now sweep backwards through the CQ, removing CQ entries * that match our QP by copying older entries on top of them. */ while ((int) --prod_index - (int) cq->cons_index >= 0) { cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe); + cqe += cqe_inc; if (is_xrc_srq && (ntohl(cqe->g_mlpath_rqpn & 0xffffff) == srq->srqn) && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) { mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index)); ++nfreed; } else if ((ntohl(cqe->my_qpn) & 0xffffff) == qpn) { if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index)); ++nfreed; } else if (nfreed) { dest = get_cqe(cq, (prod_index + nfreed) & cq->ibv_cq.cqe); + dest += cqe_inc; owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK; memcpy(dest, cqe, sizeof *cqe); dest->owner_sr_opcode = owner_bit | (dest->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK); } } if (nfreed) { cq->cons_index += nfreed; /* * Make sure update of buffer contents is done before * updating consumer index. */ wmb(); update_cons_index(cq); } } void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq) { pthread_spin_lock(&cq->lock); __mlx4_cq_clean(cq, qpn, srq); pthread_spin_unlock(&cq->lock); } int mlx4_get_outstanding_cqes(struct mlx4_cq *cq) { uint32_t i; for (i = cq->cons_index; get_sw_cqe(cq, (i & cq->ibv_cq.cqe)); ++i) ; return i - cq->cons_index; } void mlx4_cq_resize_copy_cqes(struct mlx4_cq *cq, void *buf, int old_cqe) { struct mlx4_cqe *cqe; int i; + int cqe_inc = cq->cqe_size == 64 ? 1 : 0; i = cq->cons_index; cqe = get_cqe(cq, (i & old_cqe)); + cqe += cqe_inc; while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) | (((i + 1) & (cq->ibv_cq.cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0); - memcpy(buf + ((i + 1) & cq->ibv_cq.cqe) * MLX4_CQ_ENTRY_SIZE, - cqe, MLX4_CQ_ENTRY_SIZE); + memcpy(buf + ((i + 1) & cq->ibv_cq.cqe) * cq->cqe_size, + cqe - cqe_inc, cq->cqe_size); ++i; cqe = get_cqe(cq, (i & old_cqe)); + cqe += cqe_inc; } ++cq->cons_index; } -int mlx4_alloc_cq_buf(struct mlx4_device *dev, struct mlx4_buf *buf, int nent) +int mlx4_alloc_cq_buf(struct mlx4_device *dev, struct mlx4_buf *buf, int nent, + int entry_size) { - if (mlx4_alloc_buf(buf, align(nent * MLX4_CQ_ENTRY_SIZE, dev->page_size), + if (mlx4_alloc_buf(buf, align(nent * entry_size, dev->page_size), dev->page_size)) return -1; - memset(buf->buf, 0, nent * MLX4_CQ_ENTRY_SIZE); + memset(buf->buf, 0, nent * entry_size); return 0; } diff --git a/contrib/ofed/libmlx4/src/mlx4-abi.h b/contrib/ofed/libmlx4/src/mlx4-abi.h index cc054e419579..86be5c7fb137 100644 --- a/contrib/ofed/libmlx4/src/mlx4-abi.h +++ b/contrib/ofed/libmlx4/src/mlx4-abi.h @@ -1,109 +1,111 @@ /* * Copyright (c) 2007 Cisco, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef MLX4_ABI_H #define MLX4_ABI_H #include #define MLX4_UVERBS_MIN_ABI_VERSION 2 #define MLX4_UVERBS_MAX_ABI_VERSION 4 struct mlx4_alloc_ucontext_resp { struct ibv_get_context_resp ibv_resp; + __u32 dev_caps; __u32 qp_tab_size; __u16 bf_reg_size; __u16 bf_regs_per_page; + __u32 cqe_size; }; struct mlx4_alloc_pd_resp { struct ibv_alloc_pd_resp ibv_resp; __u32 pdn; __u32 reserved; }; struct mlx4_create_cq { struct ibv_create_cq ibv_cmd; __u64 buf_addr; __u64 db_addr; }; struct mlx4_create_cq_resp { struct ibv_create_cq_resp ibv_resp; __u32 cqn; __u32 reserved; }; struct mlx4_resize_cq { struct ibv_resize_cq ibv_cmd; __u64 buf_addr; }; #ifdef HAVE_IBV_XRC_OPS struct mlx4_create_xrc_srq { struct ibv_create_xrc_srq ibv_cmd; __u64 buf_addr; __u64 db_addr; }; #endif struct mlx4_create_srq { struct ibv_create_srq ibv_cmd; __u64 buf_addr; __u64 db_addr; }; struct mlx4_create_srq_resp { struct ibv_create_srq_resp ibv_resp; __u32 srqn; __u32 reserved; }; struct mlx4_create_qp { struct ibv_create_qp ibv_cmd; __u64 buf_addr; __u64 db_addr; __u8 log_sq_bb_count; __u8 log_sq_stride; __u8 sq_no_prefetch; /* was reserved in ABI 2 */ __u8 reserved[5]; }; #ifdef HAVE_IBV_XRC_OPS struct mlx4_open_xrc_domain_resp { struct ibv_open_xrc_domain_resp ibv_resp; __u32 xrcdn; __u32 reserved; }; #endif #endif /* MLX4_ABI_H */ diff --git a/contrib/ofed/libmlx4/src/mlx4.c b/contrib/ofed/libmlx4/src/mlx4.c index 17adb300a9a8..caabced39a7e 100644 --- a/contrib/ofed/libmlx4/src/mlx4.c +++ b/contrib/ofed/libmlx4/src/mlx4.c @@ -1,321 +1,322 @@ /* * Copyright (c) 2007 Cisco, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #if HAVE_CONFIG_H # include #endif /* HAVE_CONFIG_H */ #include #include #include #include #include #include #include #ifndef HAVE_IBV_REGISTER_DRIVER #include #endif #include "mlx4.h" #include "mlx4-abi.h" #ifndef PCI_VENDOR_ID_MELLANOX #define PCI_VENDOR_ID_MELLANOX 0x15b3 #endif #define HCA(v, d) \ { .vendor = PCI_VENDOR_ID_##v, \ .device = d } struct { unsigned vendor; unsigned device; } hca_table[] = { HCA(MELLANOX, 0x6340), /* MT25408 "Hermon" SDR */ HCA(MELLANOX, 0x634a), /* MT25408 "Hermon" DDR */ HCA(MELLANOX, 0x6354), /* MT25408 "Hermon" QDR */ HCA(MELLANOX, 0x6732), /* MT25408 "Hermon" DDR PCIe gen2 */ HCA(MELLANOX, 0x673c), /* MT25408 "Hermon" QDR PCIe gen2 */ HCA(MELLANOX, 0x6368), /* MT25448 [ConnectX EN 10GigE, PCIe 2.0 2.5GT/s] */ HCA(MELLANOX, 0x6750), /* MT26448 [ConnectX EN 10GigE, PCIe 2.0 5GT/s] */ HCA(MELLANOX, 0x6372), /* MT25408 [ConnectX EN 10GigE 10GBaseT, PCIe 2.0 2.5GT/s] */ HCA(MELLANOX, 0x675a), /* MT25408 [ConnectX EN 10GigE 10GBaseT, PCIe Gen2 5GT/s] */ HCA(MELLANOX, 0x6764), /* MT26468 [ConnectX EN 10GigE, PCIe 2.0 5GT/s] */ HCA(MELLANOX, 0x6746), /* MT26438 ConnectX VPI PCIe 2.0 5GT/s - IB QDR / 10GigE Virt+ */ HCA(MELLANOX, 0x676e), /* MT26478 ConnectX EN 40GigE PCIe 2.0 5GT/s */ HCA(MELLANOX, 0x6778), /* MT26488 ConnectX VPI PCIe 2.0 5GT/s - IB DDR / 10GigE Virt+ */ HCA(MELLANOX, 0x1000), HCA(MELLANOX, 0x1001), HCA(MELLANOX, 0x1002), HCA(MELLANOX, 0x1003), HCA(MELLANOX, 0x1004), HCA(MELLANOX, 0x1005), HCA(MELLANOX, 0x1006), HCA(MELLANOX, 0x1007), HCA(MELLANOX, 0x1008), HCA(MELLANOX, 0x1009), HCA(MELLANOX, 0x100a), HCA(MELLANOX, 0x100b), HCA(MELLANOX, 0x100c), HCA(MELLANOX, 0x100d), HCA(MELLANOX, 0x100e), HCA(MELLANOX, 0x100f), }; #ifdef HAVE_IBV_MORE_OPS static struct ibv_more_ops mlx4_more_ops = { #ifdef HAVE_IBV_XRC_OPS .create_xrc_srq = mlx4_create_xrc_srq, .open_xrc_domain = mlx4_open_xrc_domain, .close_xrc_domain = mlx4_close_xrc_domain, .create_xrc_rcv_qp = mlx4_create_xrc_rcv_qp, .modify_xrc_rcv_qp = mlx4_modify_xrc_rcv_qp, .query_xrc_rcv_qp = mlx4_query_xrc_rcv_qp, .reg_xrc_rcv_qp = mlx4_reg_xrc_rcv_qp, .unreg_xrc_rcv_qp = mlx4_unreg_xrc_rcv_qp, #endif }; #endif static struct ibv_context_ops mlx4_ctx_ops = { .query_device = mlx4_query_device, .query_port = mlx4_query_port, .alloc_pd = mlx4_alloc_pd, .dealloc_pd = mlx4_free_pd, .reg_mr = mlx4_reg_mr, .dereg_mr = mlx4_dereg_mr, .create_cq = mlx4_create_cq, .poll_cq = mlx4_poll_cq, .req_notify_cq = mlx4_arm_cq, .cq_event = mlx4_cq_event, .resize_cq = mlx4_resize_cq, .destroy_cq = mlx4_destroy_cq, .create_srq = mlx4_create_srq, .modify_srq = mlx4_modify_srq, .query_srq = mlx4_query_srq, .destroy_srq = mlx4_destroy_srq, .post_srq_recv = mlx4_post_srq_recv, .create_qp = mlx4_create_qp, .query_qp = mlx4_query_qp, .modify_qp = mlx4_modify_qp, .destroy_qp = mlx4_destroy_qp, .post_send = mlx4_post_send, .post_recv = mlx4_post_recv, .create_ah = mlx4_create_ah, .destroy_ah = mlx4_destroy_ah, .attach_mcast = ibv_cmd_attach_mcast, .detach_mcast = ibv_cmd_detach_mcast }; static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_fd) { struct mlx4_context *context; struct ibv_get_context cmd; struct mlx4_alloc_ucontext_resp resp; int i; struct ibv_device_attr dev_attrs; context = calloc(1, sizeof *context); if (!context) return NULL; context->ibv_ctx.cmd_fd = cmd_fd; if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd, &resp.ibv_resp, sizeof resp)) goto err_free; context->num_qps = resp.qp_tab_size; context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS; context->qp_table_mask = (1 << context->qp_table_shift) - 1; pthread_mutex_init(&context->qp_table_mutex, NULL); for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i) context->qp_table[i].refcnt = 0; context->num_xrc_srqs = resp.qp_tab_size; context->xrc_srq_table_shift = ffs(context->num_xrc_srqs) - 1 - MLX4_XRC_SRQ_TABLE_BITS; context->xrc_srq_table_mask = (1 << context->xrc_srq_table_shift) - 1; pthread_mutex_init(&context->xrc_srq_table_mutex, NULL); for (i = 0; i < MLX4_XRC_SRQ_TABLE_SIZE; ++i) context->xrc_srq_table[i].refcnt = 0; for (i = 0; i < MLX4_NUM_DB_TYPE; ++i) context->db_list[i] = NULL; pthread_mutex_init(&context->db_list_mutex, NULL); context->uar = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE, MAP_SHARED, cmd_fd, 0); if (context->uar == MAP_FAILED) goto err_free; if (resp.bf_reg_size) { context->bf_page = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE, MAP_SHARED, cmd_fd, to_mdev(ibdev)->page_size); if (context->bf_page == MAP_FAILED) { fprintf(stderr, PFX "Warning: BlueFlame available, " "but failed to mmap() BlueFlame page.\n"); context->bf_page = NULL; context->bf_buf_size = 0; } else { context->bf_buf_size = resp.bf_reg_size / 2; context->bf_offset = 0; pthread_spin_init(&context->bf_lock, PTHREAD_PROCESS_PRIVATE); } } else { context->bf_page = NULL; context->bf_buf_size = 0; } + context->cqe_size = resp.cqe_size; pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); context->ibv_ctx.ops = mlx4_ctx_ops; #ifdef HAVE_IBV_XRC_OPS context->ibv_ctx.more_ops = &mlx4_more_ops; #endif if (mlx4_query_device(&context->ibv_ctx, &dev_attrs)) goto query_free; context->max_qp_wr = dev_attrs.max_qp_wr; context->max_sge = dev_attrs.max_sge; context->max_cqe = dev_attrs.max_cqe; if (!(dev_attrs.device_cap_flags & IBV_DEVICE_XRC)) { fprintf(stderr, PFX "There is a mismatch between " "the kernel and the userspace libraries: " "Kernel does not support XRC. Exiting.\n"); goto query_free; } return &context->ibv_ctx; query_free: munmap(context->uar, to_mdev(ibdev)->page_size); if (context->bf_page) munmap(context->bf_page, to_mdev(ibdev)->page_size); err_free: free(context); return NULL; } static void mlx4_free_context(struct ibv_context *ibctx) { struct mlx4_context *context = to_mctx(ibctx); munmap(context->uar, to_mdev(ibctx->device)->page_size); if (context->bf_page) munmap(context->bf_page, to_mdev(ibctx->device)->page_size); free(context); } static struct ibv_device_ops mlx4_dev_ops = { .alloc_context = mlx4_alloc_context, .free_context = mlx4_free_context }; static struct ibv_device *mlx4_driver_init(const char *uverbs_sys_path, int abi_version) { char value[8]; struct mlx4_device *dev; unsigned vendor, device; int i; if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", value, sizeof value) < 0) return NULL; sscanf(value, "%i", &vendor); if (ibv_read_sysfs_file(uverbs_sys_path, "device/device", value, sizeof value) < 0) return NULL; sscanf(value, "%i", &device); for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i) if (vendor == hca_table[i].vendor && device == hca_table[i].device) goto found; return NULL; found: if (abi_version < MLX4_UVERBS_MIN_ABI_VERSION || abi_version > MLX4_UVERBS_MAX_ABI_VERSION) { fprintf(stderr, PFX "Fatal: ABI version %d of %s is not supported " "(min supported %d, max supported %d)\n", abi_version, uverbs_sys_path, MLX4_UVERBS_MIN_ABI_VERSION, MLX4_UVERBS_MAX_ABI_VERSION); return NULL; } dev = malloc(sizeof *dev); if (!dev) { fprintf(stderr, PFX "Fatal: couldn't allocate device for %s\n", uverbs_sys_path); return NULL; } dev->ibv_dev.ops = mlx4_dev_ops; dev->page_size = sysconf(_SC_PAGESIZE); return &dev->ibv_dev; } #ifdef HAVE_IBV_REGISTER_DRIVER static __attribute__((constructor)) void mlx4_register_driver(void) { ibv_register_driver("mlx4", mlx4_driver_init); } #else /* * Export the old libsysfs sysfs_class_device-based driver entry point * if libibverbs does not export an ibv_register_driver() function. */ struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) { int abi_ver = 0; char value[8]; if (ibv_read_sysfs_file(sysdev->path, "abi_version", value, sizeof value) > 0) abi_ver = strtol(value, NULL, 10); return mlx4_driver_init(sysdev->path, abi_ver); } #endif /* HAVE_IBV_REGISTER_DRIVER */ diff --git a/contrib/ofed/libmlx4/src/mlx4.h b/contrib/ofed/libmlx4/src/mlx4.h index a349c5b805b4..cf5b963c16bc 100644 --- a/contrib/ofed/libmlx4/src/mlx4.h +++ b/contrib/ofed/libmlx4/src/mlx4.h @@ -1,457 +1,456 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef MLX4_H #define MLX4_H #include #include #include #ifdef HAVE_VALGRIND_MEMCHECK_H # include # if !defined(VALGRIND_MAKE_MEM_DEFINED) || !defined(VALGRIND_MAKE_MEM_UNDEFINED) # warning "Valgrind support requested, but VALGRIND_MAKE_MEM_(UN)DEFINED not available" # endif #endif /* HAVE_VALGRIND_MEMCHECK_H */ #ifndef VALGRIND_MAKE_MEM_DEFINED # define VALGRIND_MAKE_MEM_DEFINED(addr,len) #endif #ifndef VALGRIND_MAKE_MEM_UNDEFINED # define VALGRIND_MAKE_MEM_UNDEFINED(addr,len) #endif #ifndef rmb # define rmb() mb() #endif #ifndef wmb # define wmb() mb() #endif #ifndef wc_wmb #if defined(__i386__) #define wc_wmb() asm volatile("lock; addl $0,0(%%esp) " ::: "memory") #elif defined(__x86_64__) #define wc_wmb() asm volatile("sfence" ::: "memory") #elif defined(__ia64__) #define wc_wmb() asm volatile("fwb" ::: "memory") #else #define wc_wmb() wmb() #endif #endif #ifndef HAVE_IBV_MORE_OPS #undef HAVE_IBV_XRC_OPS #undef HAVE_IBV_CREATE_QP_EXP #endif #define HIDDEN __attribute__((visibility ("hidden"))) #define PFX "mlx4: " #ifndef max #define max(a,b) \ ({ typeof (a) _a = (a); \ typeof (b) _b = (b); \ _a > _b ? _a : _b; }) #endif #ifndef min #define min(a,b) \ ({ typeof (a) _a = (a); \ typeof (b) _b = (b); \ _a < _b ? _a : _b; }) #endif -enum { - MLX4_CQ_ENTRY_SIZE = 0x20 -}; - enum { MLX4_STAT_RATE_OFFSET = 5 }; enum { MLX4_QP_TABLE_BITS = 8, MLX4_QP_TABLE_SIZE = 1 << MLX4_QP_TABLE_BITS, MLX4_QP_TABLE_MASK = MLX4_QP_TABLE_SIZE - 1 }; enum { MLX4_XRC_SRQ_TABLE_BITS = 8, MLX4_XRC_SRQ_TABLE_SIZE = 1 << MLX4_XRC_SRQ_TABLE_BITS, MLX4_XRC_SRQ_TABLE_MASK = MLX4_XRC_SRQ_TABLE_SIZE - 1 }; enum { MLX4_XRC_QPN_BIT = (1 << 23) }; enum mlx4_db_type { MLX4_DB_TYPE_CQ, MLX4_DB_TYPE_RQ, MLX4_NUM_DB_TYPE }; enum { MLX4_OPCODE_NOP = 0x00, MLX4_OPCODE_SEND_INVAL = 0x01, MLX4_OPCODE_RDMA_WRITE = 0x08, MLX4_OPCODE_RDMA_WRITE_IMM = 0x09, MLX4_OPCODE_SEND = 0x0a, MLX4_OPCODE_SEND_IMM = 0x0b, MLX4_OPCODE_LSO = 0x0e, MLX4_OPCODE_RDMA_READ = 0x10, MLX4_OPCODE_ATOMIC_CS = 0x11, MLX4_OPCODE_ATOMIC_FA = 0x12, MLX4_OPCODE_ATOMIC_MASK_CS = 0x14, MLX4_OPCODE_ATOMIC_MASK_FA = 0x15, MLX4_OPCODE_BIND_MW = 0x18, MLX4_OPCODE_FMR = 0x19, MLX4_OPCODE_LOCAL_INVAL = 0x1b, MLX4_OPCODE_CONFIG_CMD = 0x1f, MLX4_RECV_OPCODE_RDMA_WRITE_IMM = 0x00, MLX4_RECV_OPCODE_SEND = 0x01, MLX4_RECV_OPCODE_SEND_IMM = 0x02, MLX4_RECV_OPCODE_SEND_INVAL = 0x03, MLX4_CQE_OPCODE_ERROR = 0x1e, MLX4_CQE_OPCODE_RESIZE = 0x16, }; enum { MLX4_MAX_WQE_SIZE = 1008 }; struct mlx4_device { struct ibv_device ibv_dev; int page_size; }; struct mlx4_db_page; struct mlx4_context { struct ibv_context ibv_ctx; void *uar; pthread_spinlock_t uar_lock; void *bf_page; int bf_buf_size; int bf_offset; pthread_spinlock_t bf_lock; struct { struct mlx4_qp **table; int refcnt; } qp_table[MLX4_QP_TABLE_SIZE]; pthread_mutex_t qp_table_mutex; int num_qps; int qp_table_shift; int qp_table_mask; int max_qp_wr; int max_sge; int max_cqe; + int cqe_size; struct { struct mlx4_srq **table; int refcnt; } xrc_srq_table[MLX4_XRC_SRQ_TABLE_SIZE]; pthread_mutex_t xrc_srq_table_mutex; int num_xrc_srqs; int xrc_srq_table_shift; int xrc_srq_table_mask; struct mlx4_db_page *db_list[MLX4_NUM_DB_TYPE]; pthread_mutex_t db_list_mutex; }; struct mlx4_buf { void *buf; size_t length; }; struct mlx4_pd { struct ibv_pd ibv_pd; uint32_t pdn; }; struct mlx4_cq { struct ibv_cq ibv_cq; struct mlx4_buf buf; struct mlx4_buf resize_buf; pthread_spinlock_t lock; uint32_t cqn; uint32_t cons_index; uint32_t *set_ci_db; uint32_t *arm_db; int arm_sn; + int cqe_size; }; struct mlx4_srq { struct ibv_srq ibv_srq; struct mlx4_buf buf; pthread_spinlock_t lock; uint64_t *wrid; uint32_t srqn; int max; int max_gs; int wqe_shift; int head; int tail; uint32_t *db; uint16_t counter; }; struct mlx4_wq { uint64_t *wrid; pthread_spinlock_t lock; int wqe_cnt; int max_post; unsigned head; unsigned tail; int max_gs; int wqe_shift; int offset; }; struct mlx4_qp { struct ibv_qp ibv_qp; struct mlx4_buf buf; int max_inline_data; int buf_size; uint32_t doorbell_qpn; uint32_t sq_signal_bits; int sq_spare_wqes; struct mlx4_wq sq; uint32_t *db; struct mlx4_wq rq; }; struct mlx4_av { uint32_t port_pd; uint8_t reserved1; uint8_t g_slid; uint16_t dlid; uint8_t reserved2; uint8_t gid_index; uint8_t stat_rate; uint8_t hop_limit; uint32_t sl_tclass_flowlabel; uint8_t dgid[16]; uint8_t mac[8]; }; struct mlx4_ah { struct ibv_ah ibv_ah; struct mlx4_av av; uint16_t vlan; uint8_t mac[6]; uint8_t tagged; }; struct mlx4_xrc_domain { struct ibv_xrc_domain ibv_xrcd; uint32_t xrcdn; }; static inline unsigned long align(unsigned long val, unsigned long align) { return (val + align - 1) & ~(align - 1); } #define to_mxxx(xxx, type) \ ((struct mlx4_##type *) \ ((void *) ib##xxx - offsetof(struct mlx4_##type, ibv_##xxx))) static inline struct mlx4_device *to_mdev(struct ibv_device *ibdev) { return to_mxxx(dev, device); } static inline struct mlx4_context *to_mctx(struct ibv_context *ibctx) { return to_mxxx(ctx, context); } static inline struct mlx4_pd *to_mpd(struct ibv_pd *ibpd) { return to_mxxx(pd, pd); } static inline struct mlx4_cq *to_mcq(struct ibv_cq *ibcq) { return to_mxxx(cq, cq); } static inline struct mlx4_srq *to_msrq(struct ibv_srq *ibsrq) { return to_mxxx(srq, srq); } static inline struct mlx4_qp *to_mqp(struct ibv_qp *ibqp) { return to_mxxx(qp, qp); } static inline struct mlx4_ah *to_mah(struct ibv_ah *ibah) { return to_mxxx(ah, ah); } #ifdef HAVE_IBV_XRC_OPS static inline struct mlx4_xrc_domain *to_mxrcd(struct ibv_xrc_domain *ibxrcd) { return to_mxxx(xrcd, xrc_domain); } #endif int mlx4_alloc_buf(struct mlx4_buf *buf, size_t size, int page_size); void mlx4_free_buf(struct mlx4_buf *buf); uint32_t *mlx4_alloc_db(struct mlx4_context *context, enum mlx4_db_type type); void mlx4_free_db(struct mlx4_context *context, enum mlx4_db_type type, uint32_t *db); int mlx4_query_device(struct ibv_context *context, struct ibv_device_attr *attr); int mlx4_query_port(struct ibv_context *context, uint8_t port, struct ibv_port_attr *attr); struct ibv_pd *mlx4_alloc_pd(struct ibv_context *context); int mlx4_free_pd(struct ibv_pd *pd); struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length, enum ibv_access_flags access); int mlx4_dereg_mr(struct ibv_mr *mr); struct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe, struct ibv_comp_channel *channel, int comp_vector); -int mlx4_alloc_cq_buf(struct mlx4_device *dev, struct mlx4_buf *buf, int nent); +int mlx4_alloc_cq_buf(struct mlx4_device *dev, struct mlx4_buf *buf, int nent, + int entry_size); int mlx4_resize_cq(struct ibv_cq *cq, int cqe); int mlx4_destroy_cq(struct ibv_cq *cq); int mlx4_poll_cq(struct ibv_cq *cq, int ne, struct ibv_wc *wc); int mlx4_arm_cq(struct ibv_cq *cq, int solicited); void mlx4_cq_event(struct ibv_cq *cq); void __mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq); void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq); int mlx4_get_outstanding_cqes(struct mlx4_cq *cq); void mlx4_cq_resize_copy_cqes(struct mlx4_cq *cq, void *buf, int new_cqe); struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd, struct ibv_srq_init_attr *attr); int mlx4_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr, enum ibv_srq_attr_mask mask); int mlx4_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr); int mlx4_destroy_srq(struct ibv_srq *srq); int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr, struct mlx4_srq *srq); void mlx4_free_srq_wqe(struct mlx4_srq *srq, int ind); int mlx4_post_srq_recv(struct ibv_srq *ibsrq, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr); struct mlx4_srq *mlx4_find_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn); int mlx4_store_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn, struct mlx4_srq *srq); void mlx4_clear_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn); struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr); int mlx4_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, enum ibv_qp_attr_mask attr_mask, struct ibv_qp_init_attr *init_attr); int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, enum ibv_qp_attr_mask attr_mask); int mlx4_destroy_qp(struct ibv_qp *qp); void mlx4_init_qp_indices(struct mlx4_qp *qp); void mlx4_qp_init_sq_ownership(struct mlx4_qp *qp); int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, struct ibv_send_wr **bad_wr); int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr); void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type, struct mlx4_qp *qp); int num_inline_segs(int data, enum ibv_qp_type type); int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap, enum ibv_qp_type type, struct mlx4_qp *qp); void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap, enum ibv_qp_type type); struct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn); int mlx4_store_qp(struct mlx4_context *ctx, uint32_t qpn, struct mlx4_qp *qp); void mlx4_clear_qp(struct mlx4_context *ctx, uint32_t qpn); struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr); int mlx4_destroy_ah(struct ibv_ah *ah); int mlx4_alloc_av(struct mlx4_pd *pd, struct ibv_ah_attr *attr, struct mlx4_ah *ah); void mlx4_free_av(struct mlx4_ah *ah); #ifdef HAVE_IBV_XRC_OPS struct ibv_srq *mlx4_create_xrc_srq(struct ibv_pd *pd, struct ibv_xrc_domain *xrc_domain, struct ibv_cq *xrc_cq, struct ibv_srq_init_attr *attr); struct ibv_xrc_domain *mlx4_open_xrc_domain(struct ibv_context *context, int fd, int oflag); int mlx4_close_xrc_domain(struct ibv_xrc_domain *d); int mlx4_create_xrc_rcv_qp(struct ibv_qp_init_attr *init_attr, uint32_t *xrc_qp_num); int mlx4_modify_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain, uint32_t xrc_qp_num, struct ibv_qp_attr *attr, int attr_mask); int mlx4_query_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain, uint32_t xrc_qp_num, struct ibv_qp_attr *attr, int attr_mask, struct ibv_qp_init_attr *init_attr); int mlx4_reg_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain, uint32_t xrc_qp_num); int mlx4_unreg_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain, uint32_t xrc_qp_num); #endif #endif /* MLX4_H */ diff --git a/contrib/ofed/libmlx4/src/verbs.c b/contrib/ofed/libmlx4/src/verbs.c index 45e86935a37f..1a806984ef09 100644 --- a/contrib/ofed/libmlx4/src/verbs.c +++ b/contrib/ofed/libmlx4/src/verbs.c @@ -1,897 +1,901 @@ /* * Copyright (c) 2007 Cisco, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #if HAVE_CONFIG_H # include #endif /* HAVE_CONFIG_H */ #include #include #include #include #include #include #include "mlx4.h" #include "mlx4-abi.h" #include "wqe.h" int mlx4_query_device(struct ibv_context *context, struct ibv_device_attr *attr) { struct ibv_query_device cmd; uint64_t raw_fw_ver; unsigned major, minor, sub_minor; int ret; ret = ibv_cmd_query_device(context, attr, &raw_fw_ver, &cmd, sizeof cmd); if (ret) return ret; major = (raw_fw_ver >> 32) & 0xffff; minor = (raw_fw_ver >> 16) & 0xffff; sub_minor = raw_fw_ver & 0xffff; snprintf(attr->fw_ver, sizeof attr->fw_ver, "%d.%d.%03d", major, minor, sub_minor); return 0; } int mlx4_query_port(struct ibv_context *context, uint8_t port, struct ibv_port_attr *attr) { struct ibv_query_port cmd; return ibv_cmd_query_port(context, port, attr, &cmd, sizeof cmd); } struct ibv_pd *mlx4_alloc_pd(struct ibv_context *context) { struct ibv_alloc_pd cmd; struct mlx4_alloc_pd_resp resp; struct mlx4_pd *pd; pd = malloc(sizeof *pd); if (!pd) return NULL; if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof cmd, &resp.ibv_resp, sizeof resp)) { free(pd); return NULL; } pd->pdn = resp.pdn; return &pd->ibv_pd; } int mlx4_free_pd(struct ibv_pd *pd) { int ret; ret = ibv_cmd_dealloc_pd(pd); if (ret) return ret; free(to_mpd(pd)); return 0; } struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length, enum ibv_access_flags access) { struct ibv_mr *mr; struct ibv_reg_mr cmd; int ret; mr = malloc(sizeof *mr); if (!mr) return NULL; #ifdef IBV_CMD_REG_MR_HAS_RESP_PARAMS { struct ibv_reg_mr_resp resp; ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr, access, mr, &cmd, sizeof cmd, &resp, sizeof resp); } #else ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr, access, mr, &cmd, sizeof cmd); #endif if (ret) { free(mr); return NULL; } return mr; } int mlx4_dereg_mr(struct ibv_mr *mr) { int ret; ret = ibv_cmd_dereg_mr(mr); if (ret) return ret; free(mr); return 0; } static int align_queue_size(int req) { int nent; for (nent = 1; nent < req; nent <<= 1) ; /* nothing */ return nent; } struct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe, struct ibv_comp_channel *channel, int comp_vector) { struct mlx4_create_cq cmd; struct mlx4_create_cq_resp resp; struct mlx4_cq *cq; int ret; + struct mlx4_context *mctx = to_mctx(context); /* Sanity check CQ size before proceeding */ if (cqe > 0x3fffff) return NULL; cq = malloc(sizeof *cq); if (!cq) return NULL; cq->cons_index = 0; if (pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE)) goto err; cqe = align_queue_size(cqe + 1); - if (mlx4_alloc_cq_buf(to_mdev(context->device), &cq->buf, cqe)) + if (mlx4_alloc_cq_buf(to_mdev(context->device), &cq->buf, cqe, mctx->cqe_size)) goto err; + cq->cqe_size = mctx->cqe_size; + cq->set_ci_db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_CQ); if (!cq->set_ci_db) goto err_buf; cq->arm_db = cq->set_ci_db + 1; *cq->arm_db = 0; cq->arm_sn = 1; *cq->set_ci_db = 0; cmd.buf_addr = (uintptr_t) cq->buf.buf; cmd.db_addr = (uintptr_t) cq->set_ci_db; ret = ibv_cmd_create_cq(context, cqe - 1, channel, comp_vector, &cq->ibv_cq, &cmd.ibv_cmd, sizeof cmd, &resp.ibv_resp, sizeof resp); if (ret) goto err_db; cq->cqn = resp.cqn; return &cq->ibv_cq; err_db: mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_CQ, cq->set_ci_db); err_buf: mlx4_free_buf(&cq->buf); err: free(cq); return NULL; } int mlx4_resize_cq(struct ibv_cq *ibcq, int cqe) { struct mlx4_cq *cq = to_mcq(ibcq); struct mlx4_resize_cq cmd; struct mlx4_buf buf; int old_cqe, outst_cqe, ret; /* Sanity check CQ size before proceeding */ if (cqe > 0x3fffff) return EINVAL; pthread_spin_lock(&cq->lock); cqe = align_queue_size(cqe + 1); if (cqe == ibcq->cqe + 1) { ret = 0; goto out; } /* Can't be smaller then the number of outstanding CQEs */ outst_cqe = mlx4_get_outstanding_cqes(cq); if (cqe < outst_cqe + 1) { ret = 0; goto out; } - ret = mlx4_alloc_cq_buf(to_mdev(ibcq->context->device), &buf, cqe); + ret = mlx4_alloc_cq_buf(to_mdev(ibcq->context->device), &buf, cqe, + cq->cqe_size); if (ret) goto out; old_cqe = ibcq->cqe; cmd.buf_addr = (uintptr_t) buf.buf; #ifdef IBV_CMD_RESIZE_CQ_HAS_RESP_PARAMS { struct ibv_resize_cq_resp resp; ret = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof cmd, &resp, sizeof resp); } #else ret = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof cmd); #endif if (ret) { mlx4_free_buf(&buf); goto out; } mlx4_cq_resize_copy_cqes(cq, buf.buf, old_cqe); mlx4_free_buf(&cq->buf); cq->buf = buf; out: pthread_spin_unlock(&cq->lock); return ret; } int mlx4_destroy_cq(struct ibv_cq *cq) { int ret; ret = ibv_cmd_destroy_cq(cq); if (ret) return ret; mlx4_free_db(to_mctx(cq->context), MLX4_DB_TYPE_CQ, to_mcq(cq)->set_ci_db); mlx4_free_buf(&to_mcq(cq)->buf); free(to_mcq(cq)); return 0; } struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd, struct ibv_srq_init_attr *attr) { struct mlx4_create_srq cmd; struct mlx4_create_srq_resp resp; struct mlx4_srq *srq; int ret; /* Sanity check SRQ size before proceeding */ if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64) return NULL; srq = malloc(sizeof *srq); if (!srq) return NULL; if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE)) goto err; srq->max = align_queue_size(attr->attr.max_wr + 1); srq->max_gs = attr->attr.max_sge; srq->counter = 0; if (mlx4_alloc_srq_buf(pd, &attr->attr, srq)) goto err; srq->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ); if (!srq->db) goto err_free; *srq->db = 0; cmd.buf_addr = (uintptr_t) srq->buf.buf; cmd.db_addr = (uintptr_t) srq->db; ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr, &cmd.ibv_cmd, sizeof cmd, &resp.ibv_resp, sizeof resp); if (ret) goto err_db; srq->srqn = resp.srqn; return &srq->ibv_srq; err_db: mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db); err_free: free(srq->wrid); mlx4_free_buf(&srq->buf); err: free(srq); return NULL; } int mlx4_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr, enum ibv_srq_attr_mask attr_mask) { struct ibv_modify_srq cmd; return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd); } int mlx4_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr) { struct ibv_query_srq cmd; return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd); } int mlx4_destroy_srq(struct ibv_srq *ibsrq) { struct mlx4_srq *srq = to_msrq(ibsrq); struct mlx4_cq *mcq = NULL; int ret; if (ibsrq->xrc_cq) { /* is an xrc_srq */ mcq = to_mcq(ibsrq->xrc_cq); mlx4_cq_clean(mcq, 0, srq); pthread_spin_lock(&mcq->lock); mlx4_clear_xrc_srq(to_mctx(ibsrq->context), srq->srqn); pthread_spin_unlock(&mcq->lock); } ret = ibv_cmd_destroy_srq(ibsrq); if (ret) { if (ibsrq->xrc_cq) { pthread_spin_lock(&mcq->lock); mlx4_store_xrc_srq(to_mctx(ibsrq->context), srq->srqn, srq); pthread_spin_unlock(&mcq->lock); } return ret; } mlx4_free_db(to_mctx(ibsrq->context), MLX4_DB_TYPE_RQ, srq->db); mlx4_free_buf(&srq->buf); free(srq->wrid); free(srq); return 0; } static int verify_sizes(struct ibv_qp_init_attr *attr, struct mlx4_context *context) { int size; int nsegs; if (attr->cap.max_send_wr > context->max_qp_wr || attr->cap.max_recv_wr > context->max_qp_wr || attr->cap.max_send_sge > context->max_sge || attr->cap.max_recv_sge > context->max_sge) return -1; if (attr->cap.max_inline_data) { nsegs = num_inline_segs(attr->cap.max_inline_data, attr->qp_type); size = MLX4_MAX_WQE_SIZE - nsegs * sizeof (struct mlx4_wqe_inline_seg); switch (attr->qp_type) { case IBV_QPT_UD: size -= (sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_datagram_seg)); break; case IBV_QPT_RC: case IBV_QPT_UC: case IBV_QPT_XRC: size -= (sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_raddr_seg)); break; default: return 0; } if (attr->cap.max_inline_data > size) return -1; } return 0; } struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr) { struct mlx4_create_qp cmd; struct ibv_create_qp_resp resp; struct mlx4_qp *qp; int ret; struct mlx4_context *context = to_mctx(pd->context); /* Sanity check QP size before proceeding */ if (verify_sizes(attr, context)) return NULL; qp = malloc(sizeof *qp); if (!qp) return NULL; mlx4_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp); /* * We need to leave 2 KB + 1 WQE of headroom in the SQ to * allow HW to prefetch. */ qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1; qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes); qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr); if (attr->srq || attr->qp_type == IBV_QPT_XRC) attr->cap.max_recv_wr = qp->rq.wqe_cnt = 0; else { if (attr->cap.max_recv_sge < 1) attr->cap.max_recv_sge = 1; if (attr->cap.max_recv_wr < 1) attr->cap.max_recv_wr = 1; } if (mlx4_alloc_qp_buf(pd, &attr->cap, attr->qp_type, qp)) goto err; mlx4_init_qp_indices(qp); if (pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE) || pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE)) goto err_free; if (!attr->srq && attr->qp_type != IBV_QPT_XRC) { qp->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ); if (!qp->db) goto err_free; *qp->db = 0; } cmd.buf_addr = (uintptr_t) qp->buf.buf; if (attr->srq || attr->qp_type == IBV_QPT_XRC) cmd.db_addr = 0; else cmd.db_addr = (uintptr_t) qp->db; cmd.log_sq_stride = qp->sq.wqe_shift; for (cmd.log_sq_bb_count = 0; qp->sq.wqe_cnt > 1 << cmd.log_sq_bb_count; ++cmd.log_sq_bb_count) ; /* nothing */ cmd.sq_no_prefetch = 0; /* OK for ABI 2: just a reserved field */ memset(cmd.reserved, 0, sizeof cmd.reserved); pthread_mutex_lock(&to_mctx(pd->context)->qp_table_mutex); ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd, sizeof cmd, &resp, sizeof resp); if (ret) goto err_rq_db; ret = mlx4_store_qp(to_mctx(pd->context), qp->ibv_qp.qp_num, qp); if (ret) goto err_destroy; pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex); qp->rq.wqe_cnt = attr->cap.max_recv_wr; qp->rq.max_gs = attr->cap.max_recv_sge; /* adjust rq maxima to not exceed reported device maxima */ attr->cap.max_recv_wr = min(context->max_qp_wr, attr->cap.max_recv_wr); attr->cap.max_recv_sge = min(context->max_sge, attr->cap.max_recv_sge); qp->rq.max_post = attr->cap.max_recv_wr; mlx4_set_sq_sizes(qp, &attr->cap, attr->qp_type); qp->doorbell_qpn = htonl(qp->ibv_qp.qp_num << 8); if (attr->sq_sig_all) qp->sq_signal_bits = htonl(MLX4_WQE_CTRL_CQ_UPDATE); else qp->sq_signal_bits = 0; return &qp->ibv_qp; err_destroy: ibv_cmd_destroy_qp(&qp->ibv_qp); err_rq_db: pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex); if (!attr->srq && attr->qp_type != IBV_QPT_XRC) mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, qp->db); err_free: free(qp->sq.wrid); if (qp->rq.wqe_cnt) free(qp->rq.wrid); mlx4_free_buf(&qp->buf); err: free(qp); return NULL; } int mlx4_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, enum ibv_qp_attr_mask attr_mask, struct ibv_qp_init_attr *init_attr) { struct ibv_query_qp cmd; struct mlx4_qp *qp = to_mqp(ibqp); int ret; ret = ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr, &cmd, sizeof cmd); if (ret) return ret; init_attr->cap.max_send_wr = qp->sq.max_post; init_attr->cap.max_send_sge = qp->sq.max_gs; init_attr->cap.max_inline_data = qp->max_inline_data; attr->cap = init_attr->cap; return 0; } int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, enum ibv_qp_attr_mask attr_mask) { struct ibv_modify_qp cmd; int ret; if (qp->state == IBV_QPS_RESET && attr_mask & IBV_QP_STATE && attr->qp_state == IBV_QPS_INIT) { mlx4_qp_init_sq_ownership(to_mqp(qp)); } ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof cmd); if (!ret && (attr_mask & IBV_QP_STATE) && attr->qp_state == IBV_QPS_RESET) { mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num, qp->srq ? to_msrq(qp->srq) : NULL); if (qp->send_cq != qp->recv_cq) mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL); mlx4_init_qp_indices(to_mqp(qp)); if (!qp->srq && qp->qp_type != IBV_QPT_XRC) *to_mqp(qp)->db = 0; } return ret; } static void mlx4_lock_cqs(struct ibv_qp *qp) { struct mlx4_cq *send_cq = to_mcq(qp->send_cq); struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq); if (send_cq == recv_cq) pthread_spin_lock(&send_cq->lock); else if (send_cq->cqn < recv_cq->cqn) { pthread_spin_lock(&send_cq->lock); pthread_spin_lock(&recv_cq->lock); } else { pthread_spin_lock(&recv_cq->lock); pthread_spin_lock(&send_cq->lock); } } static void mlx4_unlock_cqs(struct ibv_qp *qp) { struct mlx4_cq *send_cq = to_mcq(qp->send_cq); struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq); if (send_cq == recv_cq) pthread_spin_unlock(&send_cq->lock); else if (send_cq->cqn < recv_cq->cqn) { pthread_spin_unlock(&recv_cq->lock); pthread_spin_unlock(&send_cq->lock); } else { pthread_spin_unlock(&send_cq->lock); pthread_spin_unlock(&recv_cq->lock); } } int mlx4_destroy_qp(struct ibv_qp *ibqp) { struct mlx4_qp *qp = to_mqp(ibqp); int ret; pthread_mutex_lock(&to_mctx(ibqp->context)->qp_table_mutex); ret = ibv_cmd_destroy_qp(ibqp); if (ret) { pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex); return ret; } mlx4_lock_cqs(ibqp); __mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num, ibqp->srq ? to_msrq(ibqp->srq) : NULL); if (ibqp->send_cq != ibqp->recv_cq) __mlx4_cq_clean(to_mcq(ibqp->send_cq), ibqp->qp_num, NULL); mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num); mlx4_unlock_cqs(ibqp); pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex); if (!ibqp->srq && ibqp->qp_type != IBV_QPT_XRC) mlx4_free_db(to_mctx(ibqp->context), MLX4_DB_TYPE_RQ, qp->db); free(qp->sq.wrid); if (qp->rq.wqe_cnt) free(qp->rq.wrid); mlx4_free_buf(&qp->buf); free(qp); return 0; } struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) { struct mlx4_ah *ah; struct ibv_port_attr port_attr; uint8_t is_mcast; ah = malloc(sizeof *ah); if (!ah) return NULL; memset(ah, 0, sizeof *ah); ah->av.port_pd = htonl(to_mpd(pd)->pdn | (attr->port_num << 24)); ah->av.g_slid = attr->src_path_bits; ah->av.dlid = htons(attr->dlid); if (attr->static_rate) { ah->av.stat_rate = attr->static_rate + MLX4_STAT_RATE_OFFSET; /* XXX check rate cap? */ } ah->av.sl_tclass_flowlabel = htonl(attr->sl << 28); if (attr->is_global) { ah->av.g_slid |= 0x80; ah->av.gid_index = attr->grh.sgid_index; ah->av.hop_limit = attr->grh.hop_limit; ah->av.sl_tclass_flowlabel |= htonl((attr->grh.traffic_class << 20) | attr->grh.flow_label); memcpy(ah->av.dgid, attr->grh.dgid.raw, 16); } if (ibv_query_port(pd->context, attr->port_num, &port_attr)) goto err; if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) { if (ibv_resolve_eth_gid(pd, attr->port_num, (union ibv_gid *)ah->av.dgid, attr->grh.sgid_index, ah->mac, &ah->vlan, &ah->tagged, &is_mcast)) goto err; if (is_mcast) { ah->av.dlid = htons(0xc000); ah->av.port_pd |= htonl(1 << 31); } if (ah->tagged) { ah->av.port_pd |= htonl(1 << 29); ah->vlan |= (attr->sl & 7) << 13; } } return &ah->ibv_ah; err: free(ah); return NULL; } int mlx4_destroy_ah(struct ibv_ah *ah) { free(to_mah(ah)); return 0; } #ifdef HAVE_IBV_XRC_OPS struct ibv_srq *mlx4_create_xrc_srq(struct ibv_pd *pd, struct ibv_xrc_domain *xrc_domain, struct ibv_cq *xrc_cq, struct ibv_srq_init_attr *attr) { struct mlx4_create_xrc_srq cmd; struct mlx4_create_srq_resp resp; struct mlx4_srq *srq; int ret; /* Sanity check SRQ size before proceeding */ if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64) return NULL; srq = malloc(sizeof *srq); if (!srq) return NULL; if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE)) goto err; srq->max = align_queue_size(attr->attr.max_wr + 1); srq->max_gs = attr->attr.max_sge; srq->counter = 0; if (mlx4_alloc_srq_buf(pd, &attr->attr, srq)) goto err; srq->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ); if (!srq->db) goto err_free; *srq->db = 0; cmd.buf_addr = (uintptr_t) srq->buf.buf; cmd.db_addr = (uintptr_t) srq->db; ret = ibv_cmd_create_xrc_srq(pd, &srq->ibv_srq, attr, xrc_domain->handle, xrc_cq->handle, &cmd.ibv_cmd, sizeof cmd, &resp.ibv_resp, sizeof resp); if (ret) goto err_db; srq->ibv_srq.xrc_srq_num = srq->srqn = resp.srqn; ret = mlx4_store_xrc_srq(to_mctx(pd->context), srq->ibv_srq.xrc_srq_num, srq); if (ret) goto err_destroy; return &srq->ibv_srq; err_destroy: ibv_cmd_destroy_srq(&srq->ibv_srq); err_db: mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db); err_free: free(srq->wrid); mlx4_free_buf(&srq->buf); err: free(srq); return NULL; } struct ibv_xrc_domain *mlx4_open_xrc_domain(struct ibv_context *context, int fd, int oflag) { int ret; struct mlx4_open_xrc_domain_resp resp; struct mlx4_xrc_domain *xrcd; xrcd = malloc(sizeof *xrcd); if (!xrcd) return NULL; ret = ibv_cmd_open_xrc_domain(context, fd, oflag, &xrcd->ibv_xrcd, &resp.ibv_resp, sizeof resp); if (ret) { free(xrcd); return NULL; } xrcd->xrcdn = resp.xrcdn; return &xrcd->ibv_xrcd; } int mlx4_close_xrc_domain(struct ibv_xrc_domain *d) { int ret; ret = ibv_cmd_close_xrc_domain(d); if (!ret) free(d); return ret; } int mlx4_create_xrc_rcv_qp(struct ibv_qp_init_attr *init_attr, uint32_t *xrc_qp_num) { return ibv_cmd_create_xrc_rcv_qp(init_attr, xrc_qp_num); } int mlx4_modify_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain, uint32_t xrc_qp_num, struct ibv_qp_attr *attr, int attr_mask) { return ibv_cmd_modify_xrc_rcv_qp(xrc_domain, xrc_qp_num, attr, attr_mask); } int mlx4_query_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain, uint32_t xrc_qp_num, struct ibv_qp_attr *attr, int attr_mask, struct ibv_qp_init_attr *init_attr) { int ret; ret = ibv_cmd_query_xrc_rcv_qp(xrc_domain, xrc_qp_num, attr, attr_mask, init_attr); if (ret) return ret; init_attr->cap.max_send_wr = init_attr->cap.max_send_sge = 1; init_attr->cap.max_recv_sge = init_attr->cap.max_recv_wr = 0; init_attr->cap.max_inline_data = 0; init_attr->recv_cq = init_attr->send_cq = NULL; init_attr->srq = NULL; init_attr->xrc_domain = xrc_domain; init_attr->qp_type = IBV_QPT_XRC; init_attr->qp_context = NULL; attr->cap = init_attr->cap; return 0; } int mlx4_reg_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain, uint32_t xrc_qp_num) { return ibv_cmd_reg_xrc_rcv_qp(xrc_domain, xrc_qp_num); } int mlx4_unreg_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain, uint32_t xrc_qp_num) { return ibv_cmd_unreg_xrc_rcv_qp(xrc_domain, xrc_qp_num); } #endif