diff --git a/contrib/ofed/libirdma/irdma_defs.h b/contrib/ofed/libirdma/irdma_defs.h index 3d8b59c4b78e..9276490793a0 100644 --- a/contrib/ofed/libirdma/irdma_defs.h +++ b/contrib/ofed/libirdma/irdma_defs.h @@ -1,503 +1,516 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /*$FreeBSD$*/ #ifndef IRDMA_DEFS_H #define IRDMA_DEFS_H #define IRDMA_BYTE_0 0 #define IRDMA_BYTE_8 8 #define IRDMA_BYTE_16 16 #define IRDMA_BYTE_24 24 #define IRDMA_BYTE_32 32 #define IRDMA_BYTE_40 40 #define IRDMA_BYTE_48 48 #define IRDMA_BYTE_56 56 #define IRDMA_BYTE_64 64 #define IRDMA_BYTE_72 72 #define IRDMA_BYTE_80 80 #define IRDMA_BYTE_88 88 #define IRDMA_BYTE_96 96 #define IRDMA_BYTE_104 104 #define IRDMA_BYTE_112 112 #define IRDMA_BYTE_120 120 #define IRDMA_BYTE_128 128 #define IRDMA_BYTE_136 136 #define IRDMA_BYTE_144 144 #define IRDMA_BYTE_152 152 #define IRDMA_BYTE_160 160 #define IRDMA_BYTE_168 168 #define IRDMA_BYTE_176 176 #define IRDMA_BYTE_184 184 #define IRDMA_BYTE_192 192 #define IRDMA_BYTE_200 200 #define IRDMA_BYTE_208 208 #define IRDMA_BYTE_216 216 #define IRDMA_QP_TYPE_IWARP 1 #define IRDMA_QP_TYPE_UDA 2 #define IRDMA_QP_TYPE_ROCE_RC 3 #define IRDMA_QP_TYPE_ROCE_UD 4 #define IRDMA_HW_PAGE_SIZE 4096 #define IRDMA_HW_PAGE_SHIFT 12 #define IRDMA_CQE_QTYPE_RQ 0 #define IRDMA_CQE_QTYPE_SQ 1 #define IRDMA_QP_SW_MIN_WQSIZE 8 /* in WRs*/ #define IRDMA_QP_WQE_MIN_SIZE 32 #define IRDMA_QP_WQE_MAX_SIZE 256 #define IRDMA_QP_WQE_MIN_QUANTA 1 #define IRDMA_MAX_RQ_WQE_SHIFT_GEN1 2 #define IRDMA_MAX_RQ_WQE_SHIFT_GEN2 3 #define IRDMA_SQ_RSVD 258 #define IRDMA_RQ_RSVD 1 #define IRDMA_FEATURE_RTS_AE BIT_ULL(0) #define IRDMA_FEATURE_CQ_RESIZE BIT_ULL(1) #define IRDMA_FEATURE_RELAX_RQ_ORDER BIT_ULL(2) #define IRDMA_FEATURE_64_BYTE_CQE BIT_ULL(5) #define IRDMAQP_OP_RDMA_WRITE 0x00 #define IRDMAQP_OP_RDMA_READ 0x01 #define IRDMAQP_OP_RDMA_SEND 0x03 #define IRDMAQP_OP_RDMA_SEND_INV 0x04 #define IRDMAQP_OP_RDMA_SEND_SOL_EVENT 0x05 #define IRDMAQP_OP_RDMA_SEND_SOL_EVENT_INV 0x06 #define IRDMAQP_OP_BIND_MW 0x08 #define IRDMAQP_OP_FAST_REGISTER 0x09 #define IRDMAQP_OP_LOCAL_INVALIDATE 0x0a #define IRDMAQP_OP_RDMA_READ_LOC_INV 0x0b #define IRDMAQP_OP_NOP 0x0c #ifndef LS_64_1 #define LS_64_1(val, bits) ((u64)(uintptr_t)(val) << (bits)) #define RS_64_1(val, bits) ((u64)(uintptr_t)(val) >> (bits)) #define LS_32_1(val, bits) ((u32)((val) << (bits))) #define RS_32_1(val, bits) ((u32)((val) >> (bits))) #endif #ifndef GENMASK_ULL #define GENMASK_ULL(high, low) ((0xFFFFFFFFFFFFFFFFULL >> (64ULL - ((high) - (low) + 1ULL))) << (low)) #endif /* GENMASK_ULL */ #ifndef GENMASK #define GENMASK(high, low) ((0xFFFFFFFFUL >> (32UL - ((high) - (low) + 1UL))) << (low)) #endif /* GENMASK */ #ifndef FIELD_PREP #define FIELD_PREP(mask, val) (((u64)(val) << mask##_S) & (mask)) #define FIELD_GET(mask, val) (((val) & mask) >> mask##_S) #endif /* FIELD_PREP */ #define IRDMA_CQPHC_QPCTX_S 0 #define IRDMA_CQPHC_QPCTX GENMASK_ULL(63, 0) #define IRDMA_QP_DBSA_HW_SQ_TAIL_S 0 #define IRDMA_QP_DBSA_HW_SQ_TAIL GENMASK_ULL(14, 0) #define IRDMA_CQ_DBSA_CQEIDX_S 0 #define IRDMA_CQ_DBSA_CQEIDX GENMASK_ULL(19, 0) #define IRDMA_CQ_DBSA_SW_CQ_SELECT_S 0 #define IRDMA_CQ_DBSA_SW_CQ_SELECT GENMASK_ULL(13, 0) #define IRDMA_CQ_DBSA_ARM_NEXT_S 14 #define IRDMA_CQ_DBSA_ARM_NEXT BIT_ULL(14) #define IRDMA_CQ_DBSA_ARM_NEXT_SE_S 15 #define IRDMA_CQ_DBSA_ARM_NEXT_SE BIT_ULL(15) #define IRDMA_CQ_DBSA_ARM_SEQ_NUM_S 16 #define IRDMA_CQ_DBSA_ARM_SEQ_NUM GENMASK_ULL(17, 16) /* CQP and iWARP Completion Queue */ #define IRDMA_CQ_QPCTX_S IRDMA_CQPHC_QPCTX_S #define IRDMA_CQ_QPCTX IRDMA_CQPHC_QPCTX #define IRDMA_CQ_MINERR_S 0 #define IRDMA_CQ_MINERR GENMASK_ULL(15, 0) #define IRDMA_CQ_MAJERR_S 16 #define IRDMA_CQ_MAJERR GENMASK_ULL(31, 16) #define IRDMA_CQ_WQEIDX_S 32 #define IRDMA_CQ_WQEIDX GENMASK_ULL(46, 32) #define IRDMA_CQ_EXTCQE_S 50 #define IRDMA_CQ_EXTCQE BIT_ULL(50) #define IRDMA_OOO_CMPL_S 54 #define IRDMA_OOO_CMPL BIT_ULL(54) #define IRDMA_CQ_ERROR_S 55 #define IRDMA_CQ_ERROR BIT_ULL(55) #define IRDMA_CQ_SQ_S 62 #define IRDMA_CQ_SQ BIT_ULL(62) #define IRDMA_CQ_VALID_S 63 #define IRDMA_CQ_VALID BIT_ULL(63) #define IRDMA_CQ_IMMVALID BIT_ULL(62) #define IRDMA_CQ_UDSMACVALID_S 61 #define IRDMA_CQ_UDSMACVALID BIT_ULL(61) #define IRDMA_CQ_UDVLANVALID_S 60 #define IRDMA_CQ_UDVLANVALID BIT_ULL(60) #define IRDMA_CQ_UDSMAC_S 0 #define IRDMA_CQ_UDSMAC GENMASK_ULL(47, 0) #define IRDMA_CQ_UDVLAN_S 48 #define IRDMA_CQ_UDVLAN GENMASK_ULL(63, 48) #define IRDMA_CQ_IMMDATA_S 0 #define IRDMA_CQ_IMMVALID_S 62 #define IRDMA_CQ_IMMDATA GENMASK_ULL(125, 62) #define IRDMA_CQ_IMMDATALOW32_S 0 #define IRDMA_CQ_IMMDATALOW32 GENMASK_ULL(31, 0) #define IRDMA_CQ_IMMDATAUP32_S 32 #define IRDMA_CQ_IMMDATAUP32 GENMASK_ULL(63, 32) #define IRDMACQ_PAYLDLEN_S 0 #define IRDMACQ_PAYLDLEN GENMASK_ULL(31, 0) #define IRDMACQ_TCPSQN_ROCEPSN_RTT_TS_S 32 #define IRDMACQ_TCPSQN_ROCEPSN_RTT_TS GENMASK_ULL(63, 32) #define IRDMACQ_INVSTAG_S 0 #define IRDMACQ_INVSTAG GENMASK_ULL(31, 0) #define IRDMACQ_QPID_S 32 #define IRDMACQ_QPID GENMASK_ULL(55, 32) #define IRDMACQ_UDSRCQPN_S 0 #define IRDMACQ_UDSRCQPN GENMASK_ULL(31, 0) #define IRDMACQ_PSHDROP_S 51 #define IRDMACQ_PSHDROP BIT_ULL(51) #define IRDMACQ_STAG_S 53 #define IRDMACQ_STAG BIT_ULL(53) #define IRDMACQ_IPV4_S 53 #define IRDMACQ_IPV4 BIT_ULL(53) #define IRDMACQ_SOEVENT_S 54 #define IRDMACQ_SOEVENT BIT_ULL(54) #define IRDMACQ_OP_S 56 #define IRDMACQ_OP GENMASK_ULL(61, 56) /* Manage Push Page - MPP */ #define IRDMA_INVALID_PUSH_PAGE_INDEX_GEN_1 0xffff #define IRDMA_INVALID_PUSH_PAGE_INDEX 0xffffffff #define IRDMAQPSQ_OPCODE_S 32 #define IRDMAQPSQ_OPCODE GENMASK_ULL(37, 32) #define IRDMAQPSQ_COPY_HOST_PBL_S 43 #define IRDMAQPSQ_COPY_HOST_PBL BIT_ULL(43) #define IRDMAQPSQ_ADDFRAGCNT_S 38 #define IRDMAQPSQ_ADDFRAGCNT GENMASK_ULL(41, 38) #define IRDMAQPSQ_PUSHWQE_S 56 #define IRDMAQPSQ_PUSHWQE BIT_ULL(56) #define IRDMAQPSQ_STREAMMODE_S 58 #define IRDMAQPSQ_STREAMMODE BIT_ULL(58) #define IRDMAQPSQ_WAITFORRCVPDU_S 59 #define IRDMAQPSQ_WAITFORRCVPDU BIT_ULL(59) #define IRDMAQPSQ_READFENCE_S 60 #define IRDMAQPSQ_READFENCE BIT_ULL(60) #define IRDMAQPSQ_LOCALFENCE_S 61 #define IRDMAQPSQ_LOCALFENCE BIT_ULL(61) #define IRDMAQPSQ_UDPHEADER_S 61 #define IRDMAQPSQ_UDPHEADER BIT_ULL(61) #define IRDMAQPSQ_L4LEN_S 42 #define IRDMAQPSQ_L4LEN GENMASK_ULL(45, 42) #define IRDMAQPSQ_SIGCOMPL_S 62 #define IRDMAQPSQ_SIGCOMPL BIT_ULL(62) #define IRDMAQPSQ_VALID_S 63 #define IRDMAQPSQ_VALID BIT_ULL(63) #define IRDMAQPSQ_FRAG_TO_S IRDMA_CQPHC_QPCTX_S #define IRDMAQPSQ_FRAG_TO IRDMA_CQPHC_QPCTX #define IRDMAQPSQ_FRAG_VALID_S 63 #define IRDMAQPSQ_FRAG_VALID BIT_ULL(63) #define IRDMAQPSQ_FRAG_LEN_S 32 #define IRDMAQPSQ_FRAG_LEN GENMASK_ULL(62, 32) #define IRDMAQPSQ_FRAG_STAG_S 0 #define IRDMAQPSQ_FRAG_STAG GENMASK_ULL(31, 0) #define IRDMAQPSQ_GEN1_FRAG_LEN_S 0 #define IRDMAQPSQ_GEN1_FRAG_LEN GENMASK_ULL(31, 0) #define IRDMAQPSQ_GEN1_FRAG_STAG_S 32 #define IRDMAQPSQ_GEN1_FRAG_STAG GENMASK_ULL(63, 32) #define IRDMAQPSQ_REMSTAGINV_S 0 #define IRDMAQPSQ_REMSTAGINV GENMASK_ULL(31, 0) #define IRDMAQPSQ_DESTQKEY_S 0 #define IRDMAQPSQ_DESTQKEY GENMASK_ULL(31, 0) #define IRDMAQPSQ_DESTQPN_S 32 #define IRDMAQPSQ_DESTQPN GENMASK_ULL(55, 32) #define IRDMAQPSQ_AHID_S 0 #define IRDMAQPSQ_AHID GENMASK_ULL(16, 0) #define IRDMAQPSQ_INLINEDATAFLAG_S 57 #define IRDMAQPSQ_INLINEDATAFLAG BIT_ULL(57) #define IRDMA_INLINE_VALID_S 7 #define IRDMAQPSQ_INLINEDATALEN_S 48 #define IRDMAQPSQ_INLINEDATALEN GENMASK_ULL(55, 48) #define IRDMAQPSQ_IMMDATAFLAG_S 47 #define IRDMAQPSQ_IMMDATAFLAG BIT_ULL(47) #define IRDMAQPSQ_REPORTRTT_S 46 #define IRDMAQPSQ_REPORTRTT BIT_ULL(46) #define IRDMAQPSQ_IMMDATA_S 0 #define IRDMAQPSQ_IMMDATA GENMASK_ULL(63, 0) #define IRDMAQPSQ_REMSTAG_S 0 #define IRDMAQPSQ_REMSTAG GENMASK_ULL(31, 0) #define IRDMAQPSQ_REMTO_S IRDMA_CQPHC_QPCTX_S #define IRDMAQPSQ_REMTO IRDMA_CQPHC_QPCTX #define IRDMAQPSQ_STAGRIGHTS_S 48 #define IRDMAQPSQ_STAGRIGHTS GENMASK_ULL(52, 48) #define IRDMAQPSQ_VABASEDTO_S 53 #define IRDMAQPSQ_VABASEDTO BIT_ULL(53) #define IRDMAQPSQ_MEMWINDOWTYPE_S 54 #define IRDMAQPSQ_MEMWINDOWTYPE BIT_ULL(54) #define IRDMAQPSQ_MWLEN_S IRDMA_CQPHC_QPCTX_S #define IRDMAQPSQ_MWLEN IRDMA_CQPHC_QPCTX #define IRDMAQPSQ_PARENTMRSTAG_S 32 #define IRDMAQPSQ_PARENTMRSTAG GENMASK_ULL(63, 32) #define IRDMAQPSQ_MWSTAG_S 0 #define IRDMAQPSQ_MWSTAG GENMASK_ULL(31, 0) #define IRDMAQPSQ_BASEVA_TO_FBO_S IRDMA_CQPHC_QPCTX_S #define IRDMAQPSQ_BASEVA_TO_FBO IRDMA_CQPHC_QPCTX #define IRDMAQPSQ_LOCSTAG_S 0 #define IRDMAQPSQ_LOCSTAG GENMASK_ULL(31, 0) /* iwarp QP RQ WQE common fields */ #define IRDMAQPRQ_ADDFRAGCNT_S IRDMAQPSQ_ADDFRAGCNT_S #define IRDMAQPRQ_ADDFRAGCNT IRDMAQPSQ_ADDFRAGCNT #define IRDMAQPRQ_VALID_S IRDMAQPSQ_VALID_S #define IRDMAQPRQ_VALID IRDMAQPSQ_VALID #define IRDMAQPRQ_COMPLCTX_S IRDMA_CQPHC_QPCTX_S #define IRDMAQPRQ_COMPLCTX IRDMA_CQPHC_QPCTX #define IRDMAQPRQ_FRAG_LEN_S IRDMAQPSQ_FRAG_LEN_S #define IRDMAQPRQ_FRAG_LEN IRDMAQPSQ_FRAG_LEN #define IRDMAQPRQ_STAG_S IRDMAQPSQ_FRAG_STAG_S #define IRDMAQPRQ_STAG IRDMAQPSQ_FRAG_STAG #define IRDMAQPRQ_TO_S IRDMAQPSQ_FRAG_TO_S #define IRDMAQPRQ_TO IRDMAQPSQ_FRAG_TO #define IRDMAPFINT_OICR_HMC_ERR_M BIT(26) #define IRDMAPFINT_OICR_PE_PUSH_M BIT(27) #define IRDMAPFINT_OICR_PE_CRITERR_M BIT(28) #define IRDMA_GET_RING_OFFSET(_ring, _i) \ ( \ ((_ring).head + (_i)) % (_ring).size \ ) #define IRDMA_GET_CQ_ELEM_AT_OFFSET(_cq, _i, _cqe) \ { \ - register __u32 offset; \ + __u32 offset; \ offset = IRDMA_GET_RING_OFFSET((_cq)->cq_ring, _i); \ (_cqe) = (_cq)->cq_base[offset].buf; \ } #define IRDMA_GET_CURRENT_CQ_ELEM(_cq) \ ( \ (_cq)->cq_base[IRDMA_RING_CURRENT_HEAD((_cq)->cq_ring)].buf \ ) #define IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(_cq) \ ( \ ((struct irdma_extended_cqe *) \ ((_cq)->cq_base))[IRDMA_RING_CURRENT_HEAD((_cq)->cq_ring)].buf \ ) #define IRDMA_RING_INIT(_ring, _size) \ { \ (_ring).head = 0; \ (_ring).tail = 0; \ (_ring).size = (_size); \ } #define IRDMA_RING_SIZE(_ring) ((_ring).size) #define IRDMA_RING_CURRENT_HEAD(_ring) ((_ring).head) #define IRDMA_RING_CURRENT_TAIL(_ring) ((_ring).tail) #define IRDMA_RING_MOVE_HEAD(_ring, _retcode) \ { \ - register u32 size; \ + u32 size; \ size = (_ring).size; \ if (!IRDMA_RING_FULL_ERR(_ring)) { \ (_ring).head = ((_ring).head + 1) % size; \ (_retcode) = 0; \ } else { \ (_retcode) = ENOSPC; \ } \ } #define IRDMA_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ { \ - register u32 size; \ + u32 size; \ size = (_ring).size; \ if ((IRDMA_RING_USED_QUANTA(_ring) + (_count)) < size) { \ (_ring).head = ((_ring).head + (_count)) % size; \ (_retcode) = 0; \ } else { \ (_retcode) = ENOSPC; \ } \ } #define IRDMA_SQ_RING_MOVE_HEAD(_ring, _retcode) \ { \ - register u32 size; \ + u32 size; \ size = (_ring).size; \ if (!IRDMA_SQ_RING_FULL_ERR(_ring)) { \ (_ring).head = ((_ring).head + 1) % size; \ (_retcode) = 0; \ } else { \ (_retcode) = ENOSPC; \ } \ } #define IRDMA_SQ_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ { \ - register u32 size; \ + u32 size; \ size = (_ring).size; \ if ((IRDMA_RING_USED_QUANTA(_ring) + (_count)) < (size - 256)) { \ (_ring).head = ((_ring).head + (_count)) % size; \ (_retcode) = 0; \ } else { \ (_retcode) = ENOSPC; \ } \ } #define IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \ (_ring).head = ((_ring).head + (_count)) % (_ring).size #define IRDMA_RING_MOVE_TAIL(_ring) \ (_ring).tail = ((_ring).tail + 1) % (_ring).size #define IRDMA_RING_MOVE_HEAD_NOCHECK(_ring) \ (_ring).head = ((_ring).head + 1) % (_ring).size #define IRDMA_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \ (_ring).tail = ((_ring).tail + (_count)) % (_ring).size #define IRDMA_RING_SET_TAIL(_ring, _pos) \ (_ring).tail = (_pos) % (_ring).size #define IRDMA_RING_FULL_ERR(_ring) \ ( \ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 1)) \ ) #define IRDMA_ERR_RING_FULL2(_ring) \ ( \ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 2)) \ ) #define IRDMA_ERR_RING_FULL3(_ring) \ ( \ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 3)) \ ) #define IRDMA_SQ_RING_FULL_ERR(_ring) \ ( \ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 257)) \ ) #define IRDMA_ERR_SQ_RING_FULL2(_ring) \ ( \ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 258)) \ ) #define IRDMA_ERR_SQ_RING_FULL3(_ring) \ ( \ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 259)) \ ) #define IRDMA_RING_MORE_WORK(_ring) \ ( \ (IRDMA_RING_USED_QUANTA(_ring) != 0) \ ) #define IRDMA_RING_USED_QUANTA(_ring) \ ( \ (((_ring).head + (_ring).size - (_ring).tail) % (_ring).size) \ ) #define IRDMA_RING_FREE_QUANTA(_ring) \ ( \ ((_ring).size - IRDMA_RING_USED_QUANTA(_ring) - 1) \ ) #define IRDMA_SQ_RING_FREE_QUANTA(_ring) \ ( \ ((_ring).size - IRDMA_RING_USED_QUANTA(_ring) - 257) \ ) #define IRDMA_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \ { \ index = IRDMA_RING_CURRENT_HEAD(_ring); \ IRDMA_RING_MOVE_HEAD(_ring, _retcode); \ } enum irdma_qp_wqe_size { IRDMA_WQE_SIZE_32 = 32, IRDMA_WQE_SIZE_64 = 64, IRDMA_WQE_SIZE_96 = 96, IRDMA_WQE_SIZE_128 = 128, IRDMA_WQE_SIZE_256 = 256, }; +enum irdma_ws_op_type { + IRDMA_WS_OP_TYPE_NODE = 0, + IRDMA_WS_OP_TYPE_LEAF_NODE_GROUP, +}; + +enum irdma_ws_rate_limit_flags { + IRDMA_WS_RATE_LIMIT_FLAGS_VALID = 0x1, + IRDMA_WS_NO_RDMA_RATE_LIMIT = 0x2, + IRDMA_WS_LEAF_NODE_IS_PART_GROUP = 0x4, + IRDMA_WS_TREE_RATE_LIMITING = 0x8, + IRDMA_WS_PACING_CONTROL = 0x10, +}; + /** * set_64bit_val - set 64 bit value to hw wqe * @wqe_words: wqe addr to write * @byte_index: index in wqe * @val: value to write **/ static inline void set_64bit_val(__le64 *wqe_words, u32 byte_index, u64 val) { wqe_words[byte_index >> 3] = htole64(val); } /** * set_32bit_val - set 32 bit value to hw wqe * @wqe_words: wqe addr to write * @byte_index: index in wqe * @val: value to write **/ static inline void set_32bit_val(__le32 *wqe_words, u32 byte_index, u32 val) { wqe_words[byte_index >> 2] = htole32(val); } /** * get_64bit_val - read 64 bit value from wqe * @wqe_words: wqe addr * @byte_index: index to read from * @val: read value **/ static inline void get_64bit_val(__le64 *wqe_words, u32 byte_index, u64 *val) { *val = le64toh(wqe_words[byte_index >> 3]); } /** * get_32bit_val - read 32 bit value from wqe * @wqe_words: wqe addr * @byte_index: index to reaad from * @val: return 32 bit value **/ static inline void get_32bit_val(__le32 *wqe_words, u32 byte_index, u32 *val) { *val = le32toh(wqe_words[byte_index >> 2]); } #endif /* IRDMA_DEFS_H */ diff --git a/contrib/ofed/libirdma/irdma_uk.c b/contrib/ofed/libirdma/irdma_uk.c index 97e3ac553c26..a1b796b318f4 100644 --- a/contrib/ofed/libirdma/irdma_uk.c +++ b/contrib/ofed/libirdma/irdma_uk.c @@ -1,1975 +1,1938 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /*$FreeBSD$*/ #include "osdep.h" #include "irdma_defs.h" #include "irdma_user.h" #include "irdma.h" /** * irdma_set_fragment - set fragment in wqe * @wqe: wqe for setting fragment * @offset: offset value * @sge: sge length and stag * @valid: The wqe valid */ static void irdma_set_fragment(__le64 * wqe, u32 offset, struct irdma_sge *sge, u8 valid) { if (sge) { set_64bit_val(wqe, offset, FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->tag_off)); set_64bit_val(wqe, offset + IRDMA_BYTE_8, FIELD_PREP(IRDMAQPSQ_VALID, valid) | FIELD_PREP(IRDMAQPSQ_FRAG_LEN, sge->len) | FIELD_PREP(IRDMAQPSQ_FRAG_STAG, sge->stag)); } else { set_64bit_val(wqe, offset, 0); set_64bit_val(wqe, offset + IRDMA_BYTE_8, FIELD_PREP(IRDMAQPSQ_VALID, valid)); } } /** * irdma_set_fragment_gen_1 - set fragment in wqe * @wqe: wqe for setting fragment * @offset: offset value * @sge: sge length and stag * @valid: wqe valid flag */ static void irdma_set_fragment_gen_1(__le64 * wqe, u32 offset, struct irdma_sge *sge, u8 valid) { if (sge) { set_64bit_val(wqe, offset, FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->tag_off)); set_64bit_val(wqe, offset + IRDMA_BYTE_8, FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, sge->len) | FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_STAG, sge->stag)); } else { set_64bit_val(wqe, offset, 0); set_64bit_val(wqe, offset + IRDMA_BYTE_8, 0); } } /** * irdma_nop_hdr - Format header section of noop WQE * @qp: hw qp ptr */ static inline u64 irdma_nop_hdr(struct irdma_qp_uk *qp){ return FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_NOP) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, false) | FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); } /** * irdma_nop_1 - insert a NOP wqe * @qp: hw qp ptr */ static int irdma_nop_1(struct irdma_qp_uk *qp) { __le64 *wqe; u32 wqe_idx; if (!qp->sq_ring.head) return EINVAL; wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); wqe = qp->sq_base[wqe_idx].elem; qp->sq_wrtrk_array[wqe_idx].quanta = IRDMA_QP_WQE_MIN_QUANTA; set_64bit_val(wqe, IRDMA_BYTE_0, 0); set_64bit_val(wqe, IRDMA_BYTE_8, 0); set_64bit_val(wqe, IRDMA_BYTE_16, 0); /* make sure WQE is written before valid bit is set */ udma_to_device_barrier(); set_64bit_val(wqe, IRDMA_BYTE_24, irdma_nop_hdr(qp)); return 0; } /** * irdma_clr_wqes - clear next 128 sq entries * @qp: hw qp ptr * @qp_wqe_idx: wqe_idx */ void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx) { __le64 *wqe; u32 wqe_idx; if (!(qp_wqe_idx & 0x7F)) { wqe_idx = (qp_wqe_idx + 128) % qp->sq_ring.size; wqe = qp->sq_base[wqe_idx].elem; if (wqe_idx) memset(wqe, qp->swqe_polarity ? 0 : 0xFF, 0x1000); else memset(wqe, qp->swqe_polarity ? 0xFF : 0, 0x1000); } } /** * irdma_uk_qp_post_wr - ring doorbell * @qp: hw qp ptr */ void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp) { u64 temp; u32 hw_sq_tail; u32 sw_sq_head; /* valid bit is written and loads completed before reading shadow */ atomic_thread_fence(memory_order_seq_cst); /* read the doorbell shadow area */ get_64bit_val(qp->shadow_area, IRDMA_BYTE_0, &temp); hw_sq_tail = (u32)FIELD_GET(IRDMA_QP_DBSA_HW_SQ_TAIL, temp); sw_sq_head = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); if (sw_sq_head != qp->initial_ring.head) { if (qp->push_dropped) { db_wr32(qp->qp_id, qp->wqe_alloc_db); qp->push_dropped = false; } else if (sw_sq_head != hw_sq_tail) { if (sw_sq_head > qp->initial_ring.head) { if (hw_sq_tail >= qp->initial_ring.head && hw_sq_tail < sw_sq_head) db_wr32(qp->qp_id, qp->wqe_alloc_db); } else { if (hw_sq_tail >= qp->initial_ring.head || hw_sq_tail < sw_sq_head) db_wr32(qp->qp_id, qp->wqe_alloc_db); } } } qp->initial_ring.head = qp->sq_ring.head; } /** * irdma_qp_ring_push_db - ring qp doorbell * @qp: hw qp ptr * @wqe_idx: wqe index */ static void irdma_qp_ring_push_db(struct irdma_qp_uk *qp, u32 wqe_idx) { set_32bit_val(qp->push_db, 0, FIELD_PREP(IRDMA_WQEALLOC_WQE_DESC_INDEX, wqe_idx >> 3) | qp->qp_id); qp->initial_ring.head = qp->sq_ring.head; qp->push_mode = true; qp->push_dropped = false; } void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 * wqe, u16 quanta, u32 wqe_idx, bool post_sq) { __le64 *push; if (IRDMA_RING_CURRENT_HEAD(qp->initial_ring) != IRDMA_RING_CURRENT_TAIL(qp->sq_ring) && !qp->push_mode) { if (post_sq) irdma_uk_qp_post_wr(qp); } else { push = (__le64 *) ((uintptr_t)qp->push_wqe + (wqe_idx & 0x7) * 0x20); irdma_memcpy(push, wqe, quanta * IRDMA_QP_WQE_MIN_SIZE); irdma_qp_ring_push_db(qp, wqe_idx); } } /** * irdma_qp_get_next_send_wqe - pad with NOP if needed, return where next WR should go * @qp: hw qp ptr * @wqe_idx: return wqe index * @quanta: (in/out) ptr to size of WR in quanta. Modified in case pad is needed * @total_size: size of WR in bytes * @info: info on WR */ __le64 * irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx, u16 *quanta, u32 total_size, struct irdma_post_sq_info *info) { __le64 *wqe; __le64 *wqe_0 = NULL; u32 nop_wqe_idx; u16 avail_quanta, wqe_quanta = *quanta; u16 i; avail_quanta = qp->uk_attrs->max_hw_sq_chunk - (IRDMA_RING_CURRENT_HEAD(qp->sq_ring) % qp->uk_attrs->max_hw_sq_chunk); if (*quanta <= avail_quanta) { /* WR fits in current chunk */ if (*quanta > IRDMA_SQ_RING_FREE_QUANTA(qp->sq_ring)) return NULL; } else { /* Need to pad with NOP */ if (*quanta + avail_quanta > IRDMA_SQ_RING_FREE_QUANTA(qp->sq_ring)) return NULL; nop_wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); for (i = 0; i < avail_quanta; i++) { irdma_nop_1(qp); IRDMA_RING_MOVE_HEAD_NOCHECK(qp->sq_ring); } if (qp->push_db && info->push_wqe) irdma_qp_push_wqe(qp, qp->sq_base[nop_wqe_idx].elem, avail_quanta, nop_wqe_idx, true); } *wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); if (!*wqe_idx) qp->swqe_polarity = !qp->swqe_polarity; IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, *quanta); irdma_clr_wqes(qp, *wqe_idx); wqe = qp->sq_base[*wqe_idx].elem; if (qp->uk_attrs->hw_rev == IRDMA_GEN_1 && wqe_quanta == 1 && (IRDMA_RING_CURRENT_HEAD(qp->sq_ring) & 1)) { wqe_0 = qp->sq_base[IRDMA_RING_CURRENT_HEAD(qp->sq_ring)].elem; - wqe_0[3] = htole64(FIELD_PREP(IRDMAQPSQ_VALID, !qp->swqe_polarity)); + wqe_0[3] = htole64(FIELD_PREP(IRDMAQPSQ_VALID, + qp->swqe_polarity ? 0 : 1)); } qp->sq_wrtrk_array[*wqe_idx].wrid = info->wr_id; qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size; qp->sq_wrtrk_array[*wqe_idx].quanta = wqe_quanta; qp->sq_wrtrk_array[*wqe_idx].signaled = info->signaled; return wqe; } /** * irdma_qp_get_next_recv_wqe - get next qp's rcv wqe * @qp: hw qp ptr * @wqe_idx: return wqe index */ __le64 * irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx) { __le64 *wqe; int ret_code; if (IRDMA_RING_FULL_ERR(qp->rq_ring)) return NULL; IRDMA_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code); if (ret_code) return NULL; if (!*wqe_idx) qp->rwqe_polarity = !qp->rwqe_polarity; /* rq_wqe_size_multiplier is no of 32 byte quanta in one rq wqe */ wqe = qp->rq_base[*wqe_idx * qp->rq_wqe_size_multiplier].elem; return wqe; } /** * irdma_uk_rdma_write - rdma write operation * @qp: hw qp ptr * @info: post sq information * @post_sq: flag to post sq */ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq) { u64 hdr; __le64 *wqe; struct irdma_rdma_write *op_info; u32 i, wqe_idx; u32 total_size = 0, byte_off; int ret_code; u32 frag_cnt, addl_frag_cnt; bool read_fence = false; u16 quanta; info->push_wqe = qp->push_db ? true : false; op_info = &info->op.rdma_write; if (op_info->num_lo_sges > qp->max_sq_frag_cnt) return EINVAL; for (i = 0; i < op_info->num_lo_sges; i++) total_size += op_info->lo_sg_list[i].len; read_fence |= info->read_fence; if (info->imm_data_valid) frag_cnt = op_info->num_lo_sges + 1; else frag_cnt = op_info->num_lo_sges; addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0; ret_code = irdma_fragcnt_to_quanta_sq(frag_cnt, &quanta); if (ret_code) return ret_code; wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return ENOSPC; qp->sq_wrtrk_array[wqe_idx].signaled = info->signaled; set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); if (info->imm_data_valid) { set_64bit_val(wqe, IRDMA_BYTE_0, FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data)); i = 0; } else { qp->wqe_ops.iw_set_fragment(wqe, IRDMA_BYTE_0, op_info->lo_sg_list, qp->swqe_polarity); i = 1; } for (byte_off = IRDMA_BYTE_32; i < op_info->num_lo_sges; i++) { qp->wqe_ops.iw_set_fragment(wqe, byte_off, &op_info->lo_sg_list[i], qp->swqe_polarity); byte_off += 16; } /* if not an odd number set valid bit in next fragment */ if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(frag_cnt & 0x01) && frag_cnt) { qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, qp->swqe_polarity); if (qp->uk_attrs->hw_rev == IRDMA_GEN_2) ++addl_frag_cnt; } hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.stag) | FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt) | FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); if (info->push_wqe) irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); else if (post_sq) irdma_uk_qp_post_wr(qp); return 0; } /** * irdma_uk_rdma_read - rdma read command * @qp: hw qp ptr * @info: post sq information * @inv_stag: flag for inv_stag * @post_sq: flag to post sq */ int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool inv_stag, bool post_sq) { struct irdma_rdma_read *op_info; int ret_code; u32 i, byte_off, total_size = 0; bool local_fence = false; bool ord_fence = false; u32 addl_frag_cnt; __le64 *wqe; u32 wqe_idx; u16 quanta; u64 hdr; info->push_wqe = qp->push_db ? true : false; op_info = &info->op.rdma_read; if (qp->max_sq_frag_cnt < op_info->num_lo_sges) return EINVAL; for (i = 0; i < op_info->num_lo_sges; i++) total_size += op_info->lo_sg_list[i].len; ret_code = irdma_fragcnt_to_quanta_sq(op_info->num_lo_sges, &quanta); if (ret_code) return ret_code; wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return ENOSPC; if (qp->rd_fence_rate && (qp->ord_cnt++ == qp->rd_fence_rate)) { ord_fence = true; qp->ord_cnt = 0; } qp->sq_wrtrk_array[wqe_idx].signaled = info->signaled; addl_frag_cnt = op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0; local_fence |= info->local_fence; qp->wqe_ops.iw_set_fragment(wqe, IRDMA_BYTE_0, op_info->lo_sg_list, qp->swqe_polarity); for (i = 1, byte_off = IRDMA_BYTE_32; i < op_info->num_lo_sges; ++i) { qp->wqe_ops.iw_set_fragment(wqe, byte_off, &op_info->lo_sg_list[i], qp->swqe_polarity); byte_off += IRDMA_BYTE_16; } /* if not an odd number set valid bit in next fragment */ if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(op_info->num_lo_sges & 0x01) && op_info->num_lo_sges) { qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, qp->swqe_polarity); if (qp->uk_attrs->hw_rev == IRDMA_GEN_2) ++addl_frag_cnt; } set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.stag) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | FIELD_PREP(IRDMAQPSQ_OPCODE, (inv_stag ? IRDMAQP_OP_RDMA_READ_LOC_INV : IRDMAQP_OP_RDMA_READ)) | FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence || ord_fence ? 1 : 0) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); if (info->push_wqe) irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); else if (post_sq) irdma_uk_qp_post_wr(qp); return 0; } /** * irdma_uk_send - rdma send command * @qp: hw qp ptr * @info: post sq information * @post_sq: flag to post sq */ int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq) { __le64 *wqe; struct irdma_post_send *op_info; u64 hdr; u32 i, wqe_idx, total_size = 0, byte_off; int ret_code; u32 frag_cnt, addl_frag_cnt; bool read_fence = false; u16 quanta; info->push_wqe = qp->push_db ? true : false; op_info = &info->op.send; if (qp->max_sq_frag_cnt < op_info->num_sges) return EINVAL; for (i = 0; i < op_info->num_sges; i++) total_size += op_info->sg_list[i].len; if (info->imm_data_valid) frag_cnt = op_info->num_sges + 1; else frag_cnt = op_info->num_sges; ret_code = irdma_fragcnt_to_quanta_sq(frag_cnt, &quanta); if (ret_code) return ret_code; wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return ENOSPC; read_fence |= info->read_fence; addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0; if (info->imm_data_valid) { set_64bit_val(wqe, IRDMA_BYTE_0, FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data)); i = 0; } else { qp->wqe_ops.iw_set_fragment(wqe, IRDMA_BYTE_0, frag_cnt ? op_info->sg_list : NULL, qp->swqe_polarity); i = 1; } for (byte_off = IRDMA_BYTE_32; i < op_info->num_sges; i++) { qp->wqe_ops.iw_set_fragment(wqe, byte_off, &op_info->sg_list[i], qp->swqe_polarity); byte_off += IRDMA_BYTE_16; } /* if not an odd number set valid bit in next fragment */ if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(frag_cnt & 0x01) && frag_cnt) { qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, qp->swqe_polarity); if (qp->uk_attrs->hw_rev == IRDMA_GEN_2) ++addl_frag_cnt; } set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMAQPSQ_DESTQKEY, op_info->qkey) | FIELD_PREP(IRDMAQPSQ_DESTQPN, op_info->dest_qp)); hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, info->stag_to_inv) | FIELD_PREP(IRDMAQPSQ_AHID, op_info->ah_id) | FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, (info->imm_data_valid ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | FIELD_PREP(IRDMAQPSQ_UDPHEADER, info->udp_hdr) | FIELD_PREP(IRDMAQPSQ_L4LEN, info->l4len) | FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); if (info->push_wqe) irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); else if (post_sq) irdma_uk_qp_post_wr(qp); return 0; } /** * irdma_set_mw_bind_wqe_gen_1 - set mw bind wqe * @wqe: wqe for setting fragment * @op_info: info for setting bind wqe values */ static void irdma_set_mw_bind_wqe_gen_1(__le64 * wqe, struct irdma_bind_window *op_info) { set_64bit_val(wqe, IRDMA_BYTE_0, (uintptr_t)op_info->va); set_64bit_val(wqe, IRDMA_BYTE_8, FIELD_PREP(IRDMAQPSQ_PARENTMRSTAG, op_info->mw_stag) | FIELD_PREP(IRDMAQPSQ_MWSTAG, op_info->mr_stag)); set_64bit_val(wqe, IRDMA_BYTE_16, op_info->bind_len); } /** * irdma_copy_inline_data_gen_1 - Copy inline data to wqe * @wqe: pointer to wqe * @sge_list: table of pointers to inline data * @num_sges: Total inline data length * @polarity: compatibility parameter */ static void irdma_copy_inline_data_gen_1(u8 *wqe, struct irdma_sge *sge_list, u32 num_sges, u8 polarity) { u32 quanta_bytes_remaining = 16; u32 i; for (i = 0; i < num_sges; i++) { u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].tag_off; u32 sge_len = sge_list[i].len; while (sge_len) { u32 bytes_copied; bytes_copied = min(sge_len, quanta_bytes_remaining); irdma_memcpy(wqe, cur_sge, bytes_copied); wqe += bytes_copied; cur_sge += bytes_copied; quanta_bytes_remaining -= bytes_copied; sge_len -= bytes_copied; if (!quanta_bytes_remaining) { /* Remaining inline bytes reside after hdr */ wqe += 16; quanta_bytes_remaining = 32; } } } } /** * irdma_inline_data_size_to_quanta_gen_1 - based on inline data, quanta * @data_size: data size for inline * * Gets the quanta based on inline and immediate data. */ static inline u16 irdma_inline_data_size_to_quanta_gen_1(u32 data_size) { return data_size <= 16 ? IRDMA_QP_WQE_MIN_QUANTA : 2; } /** * irdma_set_mw_bind_wqe - set mw bind in wqe * @wqe: wqe for setting mw bind * @op_info: info for setting wqe values */ static void irdma_set_mw_bind_wqe(__le64 * wqe, struct irdma_bind_window *op_info) { set_64bit_val(wqe, IRDMA_BYTE_0, (uintptr_t)op_info->va); set_64bit_val(wqe, IRDMA_BYTE_8, FIELD_PREP(IRDMAQPSQ_PARENTMRSTAG, op_info->mr_stag) | FIELD_PREP(IRDMAQPSQ_MWSTAG, op_info->mw_stag)); set_64bit_val(wqe, IRDMA_BYTE_16, op_info->bind_len); } /** * irdma_copy_inline_data - Copy inline data to wqe * @wqe: pointer to wqe * @sge_list: table of pointers to inline data * @num_sges: number of SGE's * @polarity: polarity of wqe valid bit */ static void -irdma_copy_inline_data(u8 *wqe, struct irdma_sge *sge_list, u32 num_sges, - u8 polarity) +irdma_copy_inline_data(u8 *wqe, struct irdma_sge *sge_list, + u32 num_sges, u8 polarity) { u8 inline_valid = polarity << IRDMA_INLINE_VALID_S; u32 quanta_bytes_remaining = 8; u32 i; bool first_quanta = true; wqe += 8; for (i = 0; i < num_sges; i++) { u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].tag_off; u32 sge_len = sge_list[i].len; while (sge_len) { u32 bytes_copied; bytes_copied = min(sge_len, quanta_bytes_remaining); irdma_memcpy(wqe, cur_sge, bytes_copied); wqe += bytes_copied; cur_sge += bytes_copied; quanta_bytes_remaining -= bytes_copied; sge_len -= bytes_copied; if (!quanta_bytes_remaining) { quanta_bytes_remaining = 31; /* Remaining inline bytes reside after hdr */ if (first_quanta) { first_quanta = false; wqe += 16; } else { *wqe = inline_valid; wqe++; } } } } if (!first_quanta && quanta_bytes_remaining < 31) *(wqe + quanta_bytes_remaining) = inline_valid; } /** * irdma_inline_data_size_to_quanta - based on inline data, quanta * @data_size: data size for inline * * Gets the quanta based on inline and immediate data. */ static u16 irdma_inline_data_size_to_quanta(u32 data_size) { if (data_size <= 8) return IRDMA_QP_WQE_MIN_QUANTA; else if (data_size <= 39) return 2; else if (data_size <= 70) return 3; else if (data_size <= 101) return 4; else if (data_size <= 132) return 5; else if (data_size <= 163) return 6; else if (data_size <= 194) return 7; else return 8; } /** * irdma_uk_inline_rdma_write - inline rdma write operation * @qp: hw qp ptr * @info: post sq information * @post_sq: flag to post sq */ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq) { __le64 *wqe; struct irdma_rdma_write *op_info; u64 hdr = 0; u32 wqe_idx; bool read_fence = false; u16 quanta; u32 i, total_size = 0; info->push_wqe = qp->push_db ? true : false; op_info = &info->op.rdma_write; if (unlikely(qp->max_sq_frag_cnt < op_info->num_lo_sges)) return EINVAL; for (i = 0; i < op_info->num_lo_sges; i++) total_size += op_info->lo_sg_list[i].len; if (unlikely(total_size > qp->max_inline_data)) return EINVAL; quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size); wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return ENOSPC; qp->sq_wrtrk_array[wqe_idx].signaled = info->signaled; read_fence |= info->read_fence; set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.stag) | FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt ? 1 : 0) | FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) | FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid ? 1 : 0) | FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe ? 1 : 0) | FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); if (info->imm_data_valid) set_64bit_val(wqe, IRDMA_BYTE_0, FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data)); qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->lo_sg_list, op_info->num_lo_sges, qp->swqe_polarity); udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); if (info->push_wqe) irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); else if (post_sq) irdma_uk_qp_post_wr(qp); return 0; } /** * irdma_uk_inline_send - inline send operation * @qp: hw qp ptr * @info: post sq information * @post_sq: flag to post sq */ int irdma_uk_inline_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq) { __le64 *wqe; struct irdma_post_send *op_info; u64 hdr; u32 wqe_idx; bool read_fence = false; u16 quanta; u32 i, total_size = 0; info->push_wqe = qp->push_db ? true : false; op_info = &info->op.send; if (unlikely(qp->max_sq_frag_cnt < op_info->num_sges)) return EINVAL; for (i = 0; i < op_info->num_sges; i++) total_size += op_info->sg_list[i].len; if (unlikely(total_size > qp->max_inline_data)) return EINVAL; quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size); wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMAQPSQ_DESTQKEY, op_info->qkey) | FIELD_PREP(IRDMAQPSQ_DESTQPN, op_info->dest_qp)); read_fence |= info->read_fence; hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, info->stag_to_inv) | FIELD_PREP(IRDMAQPSQ_AHID, op_info->ah_id) | FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) | FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, (info->imm_data_valid ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) | FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | FIELD_PREP(IRDMAQPSQ_UDPHEADER, info->udp_hdr) | FIELD_PREP(IRDMAQPSQ_L4LEN, info->l4len) | FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); if (info->imm_data_valid) set_64bit_val(wqe, IRDMA_BYTE_0, FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data)); qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->sg_list, op_info->num_sges, qp->swqe_polarity); udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); if (info->push_wqe) irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); else if (post_sq) irdma_uk_qp_post_wr(qp); return 0; } /** * irdma_uk_stag_local_invalidate - stag invalidate operation * @qp: hw qp ptr * @info: post sq information * @post_sq: flag to post sq */ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq) { __le64 *wqe; struct irdma_inv_local_stag *op_info; u64 hdr; u32 wqe_idx; bool local_fence = false; struct irdma_sge sge = {0}; u16 quanta = IRDMA_QP_WQE_MIN_QUANTA; info->push_wqe = qp->push_db ? true : false; op_info = &info->op.inv_local_stag; local_fence = info->local_fence; wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, 0, info); if (!wqe) return ENOSPC; sge.stag = op_info->target_stag; qp->wqe_ops.iw_set_fragment(wqe, IRDMA_BYTE_0, &sge, 0); set_64bit_val(wqe, IRDMA_BYTE_16, 0); hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMA_OP_TYPE_INV_STAG) | FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); if (info->push_wqe) irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); else if (post_sq) irdma_uk_qp_post_wr(qp); return 0; } /** * irdma_uk_mw_bind - bind Memory Window * @qp: hw qp ptr * @info: post sq information * @post_sq: flag to post sq */ int irdma_uk_mw_bind(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq) { __le64 *wqe; struct irdma_bind_window *op_info; u64 hdr; u32 wqe_idx; bool local_fence; u16 quanta = IRDMA_QP_WQE_MIN_QUANTA; info->push_wqe = qp->push_db ? true : false; op_info = &info->op.bind_window; local_fence = info->local_fence; wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, 0, info); if (!wqe) return ENOSPC; qp->wqe_ops.iw_set_mw_bind_wqe(wqe, op_info); hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMA_OP_TYPE_BIND_MW) | FIELD_PREP(IRDMAQPSQ_STAGRIGHTS, ((op_info->ena_reads << 2) | (op_info->ena_writes << 3))) | FIELD_PREP(IRDMAQPSQ_VABASEDTO, (op_info->addressing_type == IRDMA_ADDR_TYPE_VA_BASED ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_MEMWINDOWTYPE, (op_info->mem_window_type_1 ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); if (info->push_wqe) irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); else if (post_sq) irdma_uk_qp_post_wr(qp); return 0; } /** * irdma_uk_post_receive - post receive wqe * @qp: hw qp ptr * @info: post rq information */ int irdma_uk_post_receive(struct irdma_qp_uk *qp, struct irdma_post_rq_info *info) { u32 wqe_idx, i, byte_off; u32 addl_frag_cnt; __le64 *wqe; u64 hdr; if (qp->max_rq_frag_cnt < info->num_sges) return EINVAL; wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx); if (!wqe) return ENOSPC; qp->rq_wrid_array[wqe_idx] = info->wr_id; addl_frag_cnt = info->num_sges > 1 ? (info->num_sges - 1) : 0; qp->wqe_ops.iw_set_fragment(wqe, IRDMA_BYTE_0, info->sg_list, qp->rwqe_polarity); for (i = 1, byte_off = IRDMA_BYTE_32; i < info->num_sges; i++) { qp->wqe_ops.iw_set_fragment(wqe, byte_off, &info->sg_list[i], qp->rwqe_polarity); byte_off += 16; } /* if not an odd number set valid bit in next fragment */ if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(info->num_sges & 0x01) && info->num_sges) { qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, qp->rwqe_polarity); if (qp->uk_attrs->hw_rev == IRDMA_GEN_2) ++addl_frag_cnt; } set_64bit_val(wqe, IRDMA_BYTE_16, 0); hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | FIELD_PREP(IRDMAQPSQ_VALID, qp->rwqe_polarity); udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); return 0; } /** * irdma_uk_cq_resize - reset the cq buffer info * @cq: cq to resize * @cq_base: new cq buffer addr * @cq_size: number of cqes */ void irdma_uk_cq_resize(struct irdma_cq_uk *cq, void *cq_base, int cq_size) { cq->cq_base = cq_base; cq->cq_size = cq_size; IRDMA_RING_INIT(cq->cq_ring, cq->cq_size); cq->polarity = 1; } /** * irdma_uk_cq_set_resized_cnt - record the count of the resized buffers * @cq: cq to resize * @cq_cnt: the count of the resized cq buffers */ void irdma_uk_cq_set_resized_cnt(struct irdma_cq_uk *cq, u16 cq_cnt) { u64 temp_val; u16 sw_cq_sel; u8 arm_next_se; u8 arm_next; u8 arm_seq_num; get_64bit_val(cq->shadow_area, 32, &temp_val); sw_cq_sel = (u16)FIELD_GET(IRDMA_CQ_DBSA_SW_CQ_SELECT, temp_val); sw_cq_sel += cq_cnt; arm_seq_num = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_SEQ_NUM, temp_val); arm_next_se = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT_SE, temp_val); arm_next = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT, temp_val); temp_val = FIELD_PREP(IRDMA_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) | FIELD_PREP(IRDMA_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) | FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT_SE, arm_next_se) | FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT, arm_next); set_64bit_val(cq->shadow_area, 32, temp_val); } /** * irdma_uk_cq_request_notification - cq notification request (door bell) * @cq: hw cq * @cq_notify: notification type */ void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq, enum irdma_cmpl_notify cq_notify) { u64 temp_val; u16 sw_cq_sel; u8 arm_next_se = 0; u8 arm_next = 0; u8 arm_seq_num; get_64bit_val(cq->shadow_area, IRDMA_BYTE_32, &temp_val); arm_seq_num = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_SEQ_NUM, temp_val); arm_seq_num++; sw_cq_sel = (u16)FIELD_GET(IRDMA_CQ_DBSA_SW_CQ_SELECT, temp_val); arm_next_se = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT_SE, temp_val); arm_next_se |= 1; if (cq_notify == IRDMA_CQ_COMPL_EVENT) arm_next = 1; temp_val = FIELD_PREP(IRDMA_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) | FIELD_PREP(IRDMA_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) | FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT_SE, arm_next_se) | FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT, arm_next); set_64bit_val(cq->shadow_area, IRDMA_BYTE_32, temp_val); udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ db_wr32(cq->cq_id, cq->cqe_alloc_db); } static void irdma_copy_quanta(__le64 * dst, __le64 * src, u32 offset, bool flip, bool barrier) { __le64 val; get_64bit_val(src, offset, &val); set_64bit_val(dst, offset, val); get_64bit_val(src, offset + 8, &val); if (flip) val ^= IRDMAQPSQ_VALID; set_64bit_val(dst, offset + 8, val); get_64bit_val(src, offset + 24, &val); if (flip) val ^= IRDMAQPSQ_VALID; if (barrier) udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(dst, offset + 24, val); } static void irdma_copy_wqe(__le64 * dst, __le64 * src, u8 wqe_quanta, bool flip_polarity) { u32 offset; offset = 32; while (--wqe_quanta) { irdma_copy_quanta(dst, src, offset, flip_polarity, false); offset += 32; } irdma_copy_quanta(dst, src, 0, flip_polarity, true); } static void irdma_repost_rq_wqes(struct irdma_qp_uk *qp, u32 start_idx, u32 end_idx) { __le64 *dst_wqe, *src_wqe; - u32 wqe_idx; + u32 wqe_idx = 0; u8 wqe_quanta = qp->rq_wqe_size_multiplier; bool flip_polarity; u64 val; libirdma_debug("reposting_wqes: from start_idx=%d to end_idx = %d\n", start_idx, end_idx); if (pthread_spin_lock(qp->lock)) return; while (start_idx != end_idx) { IRDMA_RING_SET_TAIL(qp->rq_ring, start_idx + 1); src_wqe = qp->rq_base[start_idx * qp->rq_wqe_size_multiplier].elem; dst_wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx); /* Check to see if polarity has changed */ get_64bit_val(src_wqe, 24, &val); if (FIELD_GET(IRDMAQPSQ_VALID, val) != qp->rwqe_polarity) flip_polarity = true; else flip_polarity = false; qp->rq_wrid_array[wqe_idx] = qp->rq_wrid_array[start_idx]; irdma_copy_wqe(dst_wqe, src_wqe, wqe_quanta, flip_polarity); start_idx = (start_idx + 1) % qp->rq_size; } pthread_spin_unlock(qp->lock); } static int irdma_check_rq_cqe(struct irdma_qp_uk *qp, u32 *array_idx) { u32 exp_idx = (qp->last_rx_cmpl_idx + 1) % qp->rq_size; if (*array_idx != exp_idx) { if (qp->uk_attrs->feature_flags & IRDMA_FEATURE_RELAX_RQ_ORDER) { irdma_repost_rq_wqes(qp, exp_idx, *array_idx); qp->last_rx_cmpl_idx = *array_idx; return 0; } *array_idx = exp_idx; qp->last_rx_cmpl_idx = exp_idx; return -1; } qp->last_rx_cmpl_idx = *array_idx; return 0; } /** * irdma_skip_duplicate_flush_cmpl - check last cmpl and update wqe if needed * * @ring: sq/rq ring * @flush_seen: information if flush for specific ring was already seen * @comp_status: completion status * @wqe_idx: new value of WQE index returned if there is more work on ring */ static inline int irdma_skip_duplicate_flush_cmpl(struct irdma_ring ring, u8 flush_seen, enum irdma_cmpl_status comp_status, u32 *wqe_idx) { if (flush_seen) { if (IRDMA_RING_MORE_WORK(ring)) *wqe_idx = ring.tail; else return ENOENT; } return 0; } /** * irdma_uk_cq_poll_cmpl - get cq completion info * @cq: hw cq * @info: cq poll information returned */ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info) { u64 comp_ctx, qword0, qword2, qword3; __le64 *cqe; struct irdma_qp_uk *qp; struct irdma_ring *pring = NULL; u32 wqe_idx; int ret_code; bool move_cq_head = true; u8 polarity; bool ext_valid; __le64 *ext_cqe; if (cq->avoid_mem_cflct) cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(cq); else cqe = IRDMA_GET_CURRENT_CQ_ELEM(cq); get_64bit_val(cqe, IRDMA_BYTE_24, &qword3); polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); if (polarity != cq->polarity) return ENOENT; /* Ensure CQE contents are read after valid bit is checked */ udma_from_device_barrier(); ext_valid = (bool)FIELD_GET(IRDMA_CQ_EXTCQE, qword3); if (ext_valid) { u64 qword6, qword7; u32 peek_head; if (cq->avoid_mem_cflct) { ext_cqe = (__le64 *) ((u8 *)cqe + 32); get_64bit_val(ext_cqe, IRDMA_BYTE_24, &qword7); polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7); } else { peek_head = (cq->cq_ring.head + 1) % cq->cq_ring.size; ext_cqe = cq->cq_base[peek_head].buf; get_64bit_val(ext_cqe, IRDMA_BYTE_24, &qword7); polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7); if (!peek_head) polarity ^= 1; } if (polarity != cq->polarity) return ENOENT; /* Ensure ext CQE contents are read after ext valid bit is checked */ udma_from_device_barrier(); info->imm_valid = (bool)FIELD_GET(IRDMA_CQ_IMMVALID, qword7); if (info->imm_valid) { u64 qword4; get_64bit_val(ext_cqe, IRDMA_BYTE_0, &qword4); info->imm_data = (u32)FIELD_GET(IRDMA_CQ_IMMDATALOW32, qword4); } info->ud_smac_valid = (bool)FIELD_GET(IRDMA_CQ_UDSMACVALID, qword7); info->ud_vlan_valid = (bool)FIELD_GET(IRDMA_CQ_UDVLANVALID, qword7); if (info->ud_smac_valid || info->ud_vlan_valid) { get_64bit_val(ext_cqe, IRDMA_BYTE_16, &qword6); if (info->ud_vlan_valid) info->ud_vlan = (u16)FIELD_GET(IRDMA_CQ_UDVLAN, qword6); if (info->ud_smac_valid) { info->ud_smac[5] = qword6 & 0xFF; info->ud_smac[4] = (qword6 >> 8) & 0xFF; info->ud_smac[3] = (qword6 >> 16) & 0xFF; info->ud_smac[2] = (qword6 >> 24) & 0xFF; info->ud_smac[1] = (qword6 >> 32) & 0xFF; info->ud_smac[0] = (qword6 >> 40) & 0xFF; } } } else { info->imm_valid = false; info->ud_smac_valid = false; info->ud_vlan_valid = false; } info->q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3); info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3); info->push_dropped = (bool)FIELD_GET(IRDMACQ_PSHDROP, qword3); info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3); get_64bit_val(cqe, IRDMA_BYTE_8, &comp_ctx); qp = (struct irdma_qp_uk *)(irdma_uintptr) comp_ctx; if (info->error) { info->major_err = FIELD_GET(IRDMA_CQ_MAJERR, qword3); info->minor_err = FIELD_GET(IRDMA_CQ_MINERR, qword3); switch (info->major_err) { case IRDMA_FLUSH_MAJOR_ERR: /* Set the min error to standard flush error code for remaining cqes */ if (info->minor_err != FLUSH_GENERAL_ERR) { qword3 &= ~IRDMA_CQ_MINERR; qword3 |= FIELD_PREP(IRDMA_CQ_MINERR, FLUSH_GENERAL_ERR); set_64bit_val(cqe, IRDMA_BYTE_24, qword3); } info->comp_status = IRDMA_COMPL_STATUS_FLUSHED; break; default: info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN; break; } } else { info->comp_status = IRDMA_COMPL_STATUS_SUCCESS; } get_64bit_val(cqe, IRDMA_BYTE_0, &qword0); get_64bit_val(cqe, IRDMA_BYTE_16, &qword2); info->stat.raw = (u32)FIELD_GET(IRDMACQ_TCPSQN_ROCEPSN_RTT_TS, qword0); info->qp_id = (u32)FIELD_GET(IRDMACQ_QPID, qword2); info->ud_src_qpn = (u32)FIELD_GET(IRDMACQ_UDSRCQPN, qword2); info->solicited_event = (bool)FIELD_GET(IRDMACQ_SOEVENT, qword3); if (!qp || qp->destroy_pending) { ret_code = EFAULT; goto exit; } wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3); info->qp_handle = (irdma_qp_handle) (irdma_uintptr) qp; info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3); if (info->q_type == IRDMA_CQE_QTYPE_RQ) { u32 array_idx; ret_code = irdma_skip_duplicate_flush_cmpl(qp->rq_ring, qp->rq_flush_seen, info->comp_status, &wqe_idx); if (ret_code != 0) goto exit; array_idx = wqe_idx / qp->rq_wqe_size_multiplier; if (info->comp_status == IRDMA_COMPL_STATUS_FLUSHED || info->comp_status == IRDMA_COMPL_STATUS_UNKNOWN) { if (!IRDMA_RING_MORE_WORK(qp->rq_ring)) { ret_code = ENOENT; goto exit; } info->wr_id = qp->rq_wrid_array[qp->rq_ring.tail]; info->signaled = 1; array_idx = qp->rq_ring.tail; } else { info->wr_id = qp->rq_wrid_array[array_idx]; info->signaled = 1; if (irdma_check_rq_cqe(qp, &array_idx)) { info->wr_id = qp->rq_wrid_array[array_idx]; info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN; IRDMA_RING_SET_TAIL(qp->rq_ring, array_idx + 1); return 0; } } info->bytes_xfered = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0); if (qword3 & IRDMACQ_STAG) { info->stag_invalid_set = true; info->inv_stag = (u32)FIELD_GET(IRDMACQ_INVSTAG, qword2); } else { info->stag_invalid_set = false; } IRDMA_RING_SET_TAIL(qp->rq_ring, array_idx + 1); if (info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) { qp->rq_flush_seen = true; if (!IRDMA_RING_MORE_WORK(qp->rq_ring)) qp->rq_flush_complete = true; else move_cq_head = false; } pring = &qp->rq_ring; } else { /* q_type is IRDMA_CQE_QTYPE_SQ */ if (qp->first_sq_wq) { if (wqe_idx + 1 >= qp->conn_wqes) qp->first_sq_wq = false; if (wqe_idx < qp->conn_wqes && qp->sq_ring.head == qp->sq_ring.tail) { IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); IRDMA_RING_MOVE_TAIL(cq->cq_ring); set_64bit_val(cq->shadow_area, IRDMA_BYTE_0, IRDMA_RING_CURRENT_HEAD(cq->cq_ring)); memset(info, 0, sizeof(struct irdma_cq_poll_info)); return irdma_uk_cq_poll_cmpl(cq, info); } } /* cease posting push mode on push drop */ if (info->push_dropped) { qp->push_mode = false; qp->push_dropped = true; } ret_code = irdma_skip_duplicate_flush_cmpl(qp->sq_ring, qp->sq_flush_seen, info->comp_status, &wqe_idx); if (ret_code != 0) goto exit; if (info->comp_status != IRDMA_COMPL_STATUS_FLUSHED) { info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; info->signaled = qp->sq_wrtrk_array[wqe_idx].signaled; if (!info->comp_status) info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len; info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3); IRDMA_RING_SET_TAIL(qp->sq_ring, wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta); } else { if (pthread_spin_lock(qp->lock)) { ret_code = ENOENT; goto exit; } if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) { pthread_spin_unlock(qp->lock); ret_code = ENOENT; goto exit; } do { __le64 *sw_wqe; u64 wqe_qword; u32 tail; tail = qp->sq_ring.tail; sw_wqe = qp->sq_base[tail].elem; get_64bit_val(sw_wqe, IRDMA_BYTE_24, &wqe_qword); - info->op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, wqe_qword); + info->op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, + wqe_qword); IRDMA_RING_SET_TAIL(qp->sq_ring, tail + qp->sq_wrtrk_array[tail].quanta); if (info->op_type != IRDMAQP_OP_NOP) { info->wr_id = qp->sq_wrtrk_array[tail].wrid; info->signaled = qp->sq_wrtrk_array[tail].signaled; info->bytes_xfered = qp->sq_wrtrk_array[tail].wr_len; break; } } while (1); if (info->op_type == IRDMA_OP_TYPE_BIND_MW && info->minor_err == FLUSH_PROT_ERR) info->minor_err = FLUSH_MW_BIND_ERR; qp->sq_flush_seen = true; if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) qp->sq_flush_complete = true; pthread_spin_unlock(qp->lock); } pring = &qp->sq_ring; } ret_code = 0; exit: if (!ret_code && info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) { if (pring && IRDMA_RING_MORE_WORK(*pring)) move_cq_head = false; } if (move_cq_head) { IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); if (!IRDMA_RING_CURRENT_HEAD(cq->cq_ring)) cq->polarity ^= 1; if (ext_valid && !cq->avoid_mem_cflct) { IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); if (!IRDMA_RING_CURRENT_HEAD(cq->cq_ring)) cq->polarity ^= 1; } IRDMA_RING_MOVE_TAIL(cq->cq_ring); if (!cq->avoid_mem_cflct && ext_valid) IRDMA_RING_MOVE_TAIL(cq->cq_ring); set_64bit_val(cq->shadow_area, IRDMA_BYTE_0, IRDMA_RING_CURRENT_HEAD(cq->cq_ring)); } else { qword3 &= ~IRDMA_CQ_WQEIDX; qword3 |= FIELD_PREP(IRDMA_CQ_WQEIDX, pring->tail); set_64bit_val(cqe, IRDMA_BYTE_24, qword3); } return ret_code; } /** * irdma_round_up_wq - return round up qp wq depth * @wqdepth: wq depth in quanta to round up */ static int irdma_round_up_wq(u32 wqdepth) { int scount = 1; for (wqdepth--; scount <= 16; scount *= 2) wqdepth |= wqdepth >> scount; return ++wqdepth; } /** * irdma_get_wqe_shift - get shift count for maximum wqe size * @uk_attrs: qp HW attributes * @sge: Maximum Scatter Gather Elements wqe * @inline_data: Maximum inline data size * @shift: Returns the shift needed based on sge * * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size. * For 1 SGE or inline data <= 8, shift = 0 (wqe size of 32 * bytes). For 2 or 3 SGEs or inline data <= 39, shift = 1 (wqe * size of 64 bytes). * For 4-7 SGE's and inline <= 101 Shift of 2 otherwise (wqe * size of 256 bytes). */ void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, u32 inline_data, u8 *shift) { *shift = 0; if (uk_attrs->hw_rev >= IRDMA_GEN_2) { if (sge > 1 || inline_data > 8) { if (sge < 4 && inline_data <= 39) *shift = 1; else if (sge < 8 && inline_data <= 101) *shift = 2; else *shift = 3; } } else if (sge > 1 || inline_data > 16) { *shift = (sge < 4 && inline_data <= 48) ? 1 : 2; } } /* * irdma_get_sqdepth - get SQ depth (quanta) @uk_attrs: qp HW attributes @sq_size: SQ size @shift: shift which * determines size of WQE @sqdepth: depth of SQ */ int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *sqdepth) { *sqdepth = irdma_round_up_wq((sq_size << shift) + IRDMA_SQ_RSVD); if (*sqdepth < ((u32)uk_attrs->min_hw_wq_size << shift)) *sqdepth = uk_attrs->min_hw_wq_size << shift; else if (*sqdepth > uk_attrs->max_hw_wq_quanta) return EINVAL; return 0; } /* * irdma_get_rqdepth - get RQ depth (quanta) @uk_attrs: qp HW attributes @rq_size: SRQ size @shift: shift which * determines size of WQE @rqdepth: depth of RQ/SRQ */ int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, u32 *rqdepth) { *rqdepth = irdma_round_up_wq((rq_size << shift) + IRDMA_RQ_RSVD); if (*rqdepth < ((u32)uk_attrs->min_hw_wq_size << shift)) *rqdepth = uk_attrs->min_hw_wq_size << shift; else if (*rqdepth > uk_attrs->max_hw_rq_quanta) return EINVAL; return 0; } static const struct irdma_wqe_uk_ops iw_wqe_uk_ops = { .iw_copy_inline_data = irdma_copy_inline_data, .iw_inline_data_size_to_quanta = irdma_inline_data_size_to_quanta, .iw_set_fragment = irdma_set_fragment, .iw_set_mw_bind_wqe = irdma_set_mw_bind_wqe, }; static const struct irdma_wqe_uk_ops iw_wqe_uk_ops_gen_1 = { .iw_copy_inline_data = irdma_copy_inline_data_gen_1, .iw_inline_data_size_to_quanta = irdma_inline_data_size_to_quanta_gen_1, .iw_set_fragment = irdma_set_fragment_gen_1, .iw_set_mw_bind_wqe = irdma_set_mw_bind_wqe_gen_1, }; /** * irdma_setup_connection_wqes - setup WQEs necessary to complete * connection. * @qp: hw qp (user and kernel) * @info: qp initialization info */ static void irdma_setup_connection_wqes(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) { u16 move_cnt = 1; if (qp->uk_attrs->feature_flags & IRDMA_FEATURE_RTS_AE) move_cnt = 3; qp->conn_wqes = move_cnt; IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, move_cnt); IRDMA_RING_MOVE_TAIL_BY_COUNT(qp->sq_ring, move_cnt); IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->initial_ring, move_cnt); } /** * irdma_uk_calc_shift_wq - calculate WQE shift for both SQ and RQ * @ukinfo: qp initialization info * @sq_shift: Returns shift of SQ * @rq_shift: Returns shift of RQ */ void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift, u8 *rq_shift) { bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2 ? true : false; irdma_get_wqe_shift(ukinfo->uk_attrs, imm_support ? ukinfo->max_sq_frag_cnt + 1 : ukinfo->max_sq_frag_cnt, ukinfo->max_inline_data, sq_shift); irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0, rq_shift); if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) { if (ukinfo->abi_ver > 4) *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; } } /** * irdma_uk_calc_depth_shift_sq - calculate depth and shift for SQ size. * @ukinfo: qp initialization info * @sq_depth: Returns depth of SQ * @sq_shift: Returns shift of SQ */ int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo, u32 *sq_depth, u8 *sq_shift) { bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2 ? true : false; int status; irdma_get_wqe_shift(ukinfo->uk_attrs, imm_support ? ukinfo->max_sq_frag_cnt + 1 : ukinfo->max_sq_frag_cnt, ukinfo->max_inline_data, sq_shift); status = irdma_get_sqdepth(ukinfo->uk_attrs, ukinfo->sq_size, *sq_shift, sq_depth); return status; } /** * irdma_uk_calc_depth_shift_rq - calculate depth and shift for RQ size. * @ukinfo: qp initialization info * @rq_depth: Returns depth of RQ * @rq_shift: Returns shift of RQ */ int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo, u32 *rq_depth, u8 *rq_shift) { int status; irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0, rq_shift); if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) { if (ukinfo->abi_ver > 4) *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; } status = irdma_get_rqdepth(ukinfo->uk_attrs, ukinfo->rq_size, *rq_shift, rq_depth); return status; } /** * irdma_uk_qp_init - initialize shared qp * @qp: hw qp (user and kernel) * @info: qp initialization info * * initializes the vars used in both user and kernel mode. * size of the wqe depends on numbers of max. fragements * allowed. Then size of wqe * the number of wqes should be the * amount of memory allocated for sq and rq. */ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) { int ret_code = 0; u32 sq_ring_size; qp->uk_attrs = info->uk_attrs; if (info->max_sq_frag_cnt > qp->uk_attrs->max_hw_wq_frags || info->max_rq_frag_cnt > qp->uk_attrs->max_hw_wq_frags) return EINVAL; qp->qp_caps = info->qp_caps; qp->sq_base = info->sq; qp->rq_base = info->rq; qp->qp_type = info->type ? info->type : IRDMA_QP_TYPE_IWARP; qp->shadow_area = info->shadow_area; qp->sq_wrtrk_array = info->sq_wrtrk_array; qp->rq_wrid_array = info->rq_wrid_array; qp->wqe_alloc_db = info->wqe_alloc_db; qp->last_rx_cmpl_idx = 0xffffffff; qp->rd_fence_rate = info->rd_fence_rate; qp->qp_id = info->qp_id; qp->sq_size = info->sq_size; qp->push_mode = false; qp->max_sq_frag_cnt = info->max_sq_frag_cnt; sq_ring_size = qp->sq_size << info->sq_shift; IRDMA_RING_INIT(qp->sq_ring, sq_ring_size); IRDMA_RING_INIT(qp->initial_ring, sq_ring_size); if (info->first_sq_wq) { irdma_setup_connection_wqes(qp, info); qp->swqe_polarity = 1; qp->first_sq_wq = true; } else { qp->swqe_polarity = 0; } qp->swqe_polarity_deferred = 1; qp->rwqe_polarity = 0; qp->rq_size = info->rq_size; qp->max_rq_frag_cnt = info->max_rq_frag_cnt; qp->max_inline_data = info->max_inline_data; qp->rq_wqe_size = info->rq_shift; IRDMA_RING_INIT(qp->rq_ring, qp->rq_size); qp->rq_wqe_size_multiplier = 1 << info->rq_shift; if (qp->uk_attrs->hw_rev == IRDMA_GEN_1) qp->wqe_ops = iw_wqe_uk_ops_gen_1; else qp->wqe_ops = iw_wqe_uk_ops; return ret_code; } /** * irdma_uk_cq_init - initialize shared cq (user and kernel) * @cq: hw cq * @info: hw cq initialization info */ int irdma_uk_cq_init(struct irdma_cq_uk *cq, struct irdma_cq_uk_init_info *info) { cq->cq_base = info->cq_base; cq->cq_id = info->cq_id; cq->cq_size = info->cq_size; cq->cqe_alloc_db = info->cqe_alloc_db; cq->cq_ack_db = info->cq_ack_db; cq->shadow_area = info->shadow_area; cq->avoid_mem_cflct = info->avoid_mem_cflct; IRDMA_RING_INIT(cq->cq_ring, cq->cq_size); cq->polarity = 1; return 0; } /** * irdma_uk_clean_cq - clean cq entries * @q: completion context * @cq: cq to clean */ int irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq) { __le64 *cqe; u64 qword3, comp_ctx; u32 cq_head; u8 polarity, temp; cq_head = cq->cq_ring.head; temp = cq->polarity; do { if (cq->avoid_mem_cflct) cqe = ((struct irdma_extended_cqe *)(cq->cq_base))[cq_head].buf; else cqe = cq->cq_base[cq_head].buf; get_64bit_val(cqe, IRDMA_BYTE_24, &qword3); polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); if (polarity != temp) break; + /* Ensure CQE contents are read after valid bit is checked */ + udma_from_device_barrier(); + get_64bit_val(cqe, IRDMA_BYTE_8, &comp_ctx); if ((void *)(irdma_uintptr) comp_ctx == q) set_64bit_val(cqe, IRDMA_BYTE_8, 0); cq_head = (cq_head + 1) % cq->cq_ring.size; if (!cq_head) temp ^= 1; } while (true); return 0; } -/** - * irdma_nop - post a nop - * @qp: hw qp ptr - * @wr_id: work request id - * @signaled: signaled for completion - * @post_sq: ring doorbell - */ -int -irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq) -{ - __le64 *wqe; - u64 hdr; - u32 wqe_idx; - struct irdma_post_sq_info info = {0}; - u16 quanta = IRDMA_QP_WQE_MIN_QUANTA; - - info.push_wqe = qp->push_db ? true : false; - info.wr_id = wr_id; - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, 0, &info); - if (!wqe) - return ENOSPC; - - set_64bit_val(wqe, IRDMA_BYTE_0, 0); - set_64bit_val(wqe, IRDMA_BYTE_8, 0); - set_64bit_val(wqe, IRDMA_BYTE_16, 0); - - hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_NOP) | - FIELD_PREP(IRDMAQPSQ_SIGCOMPL, signaled) | - FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); - - udma_to_device_barrier(); /* make sure WQE is populated before valid bit is set */ - - set_64bit_val(wqe, IRDMA_BYTE_24, hdr); - - if (info.push_wqe) - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); - else if (post_sq) - irdma_uk_qp_post_wr(qp); - - return 0; -} - /** * irdma_fragcnt_to_quanta_sq - calculate quanta based on fragment count for SQ * @frag_cnt: number of fragments * @quanta: quanta for frag_cnt */ int irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta) { switch (frag_cnt) { case 0: case 1: *quanta = IRDMA_QP_WQE_MIN_QUANTA; break; case 2: case 3: *quanta = 2; break; case 4: case 5: *quanta = 3; break; case 6: case 7: *quanta = 4; break; case 8: case 9: *quanta = 5; break; case 10: case 11: *quanta = 6; break; case 12: case 13: *quanta = 7; break; case 14: case 15: /* when immediate data is present */ *quanta = 8; break; default: return EINVAL; } return 0; } /** * irdma_fragcnt_to_wqesize_rq - calculate wqe size based on fragment count for RQ * @frag_cnt: number of fragments * @wqe_size: size in bytes given frag_cnt */ int irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size) { switch (frag_cnt) { case 0: case 1: *wqe_size = 32; break; case 2: case 3: *wqe_size = 64; break; case 4: case 5: case 6: case 7: *wqe_size = 128; break; case 8: case 9: case 10: case 11: case 12: case 13: case 14: *wqe_size = 256; break; default: return EINVAL; } return 0; } diff --git a/contrib/ofed/libirdma/irdma_umain.c b/contrib/ofed/libirdma/irdma_umain.c index 6c823646b375..9e223cae429f 100644 --- a/contrib/ofed/libirdma/irdma_umain.c +++ b/contrib/ofed/libirdma/irdma_umain.c @@ -1,277 +1,277 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2021 - 2022 Intel Corporation + * Copyright (c) 2021 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /*$FreeBSD$*/ #include #include #include #include "irdma_umain.h" #include "irdma-abi.h" #include "irdma_uquery.h" #include "ice_devids.h" #include "i40e_devids.h" #include "abi.h" /** * Driver version */ -char libirdma_version[] = "1.1.11-k"; +char libirdma_version[] = "1.2.17-k"; unsigned int irdma_dbg; #define INTEL_HCA(d) \ { .vendor = PCI_VENDOR_ID_INTEL, \ .device = d } struct hca_info { unsigned vendor; unsigned device; }; static const struct hca_info hca_table[] = { INTEL_HCA(ICE_DEV_ID_E823L_BACKPLANE), INTEL_HCA(ICE_DEV_ID_E823L_SFP), INTEL_HCA(ICE_DEV_ID_E823L_10G_BASE_T), INTEL_HCA(ICE_DEV_ID_E823L_1GBE), INTEL_HCA(ICE_DEV_ID_E823L_QSFP), INTEL_HCA(ICE_DEV_ID_E810C_BACKPLANE), INTEL_HCA(ICE_DEV_ID_E810C_QSFP), INTEL_HCA(ICE_DEV_ID_E810C_SFP), INTEL_HCA(ICE_DEV_ID_E810_XXV_BACKPLANE), INTEL_HCA(ICE_DEV_ID_E810_XXV_QSFP), INTEL_HCA(ICE_DEV_ID_E810_XXV_SFP), INTEL_HCA(ICE_DEV_ID_E823C_BACKPLANE), INTEL_HCA(ICE_DEV_ID_E823C_QSFP), INTEL_HCA(ICE_DEV_ID_E823C_SFP), INTEL_HCA(ICE_DEV_ID_E823C_10G_BASE_T), INTEL_HCA(ICE_DEV_ID_E823C_SGMII), INTEL_HCA(ICE_DEV_ID_C822N_BACKPLANE), INTEL_HCA(ICE_DEV_ID_C822N_QSFP), INTEL_HCA(ICE_DEV_ID_C822N_SFP), INTEL_HCA(ICE_DEV_ID_E822C_10G_BASE_T), INTEL_HCA(ICE_DEV_ID_E822C_SGMII), INTEL_HCA(ICE_DEV_ID_E822L_BACKPLANE), INTEL_HCA(ICE_DEV_ID_E822L_SFP), INTEL_HCA(ICE_DEV_ID_E822L_10G_BASE_T), INTEL_HCA(ICE_DEV_ID_E822L_SGMII), }; static struct ibv_context_ops irdma_ctx_ops = { .query_device = irdma_uquery_device, .query_port = irdma_uquery_port, .alloc_pd = irdma_ualloc_pd, .dealloc_pd = irdma_ufree_pd, .reg_mr = irdma_ureg_mr, .rereg_mr = NULL, .dereg_mr = irdma_udereg_mr, .alloc_mw = irdma_ualloc_mw, .dealloc_mw = irdma_udealloc_mw, .bind_mw = irdma_ubind_mw, .create_cq = irdma_ucreate_cq, .poll_cq = irdma_upoll_cq, .req_notify_cq = irdma_uarm_cq, .cq_event = irdma_cq_event, .resize_cq = irdma_uresize_cq, .destroy_cq = irdma_udestroy_cq, .create_qp = irdma_ucreate_qp, .query_qp = irdma_uquery_qp, .modify_qp = irdma_umodify_qp, .destroy_qp = irdma_udestroy_qp, .post_send = irdma_upost_send, .post_recv = irdma_upost_recv, .create_ah = irdma_ucreate_ah, .destroy_ah = irdma_udestroy_ah, .attach_mcast = irdma_uattach_mcast, .detach_mcast = irdma_udetach_mcast, }; /** * libirdma_query_device - fill libirdma_device structure * @ctx_in - ibv_context identifying device * @out - libirdma_device structure to fill quered info * * ctx_in is not used at the moment */ int libirdma_query_device(struct ibv_context *ctx_in, struct libirdma_device *out) { if (!out) return EIO; if (sizeof(out->lib_ver) < sizeof(libirdma_version)) return ERANGE; out->query_ver = 1; snprintf(out->lib_ver, min(sizeof(libirdma_version), sizeof(out->lib_ver)), "%s", libirdma_version); return 0; } static int irdma_init_context(struct verbs_device *vdev, struct ibv_context *ctx, int cmd_fd) { struct irdma_uvcontext *iwvctx; struct irdma_get_context cmd = {}; struct irdma_get_context_resp resp = {}; struct ibv_pd *ibv_pd; u64 mmap_key; iwvctx = container_of(ctx, struct irdma_uvcontext, ibv_ctx); iwvctx->ibv_ctx.cmd_fd = cmd_fd; cmd.userspace_ver = IRDMA_ABI_VER; if (ibv_cmd_get_context(&iwvctx->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp))) { /* failed first attempt */ printf("%s %s get context failure\n", __FILE__, __func__); return -1; } iwvctx->uk_attrs.feature_flags = resp.feature_flags; iwvctx->uk_attrs.hw_rev = resp.hw_rev; iwvctx->uk_attrs.max_hw_wq_frags = resp.max_hw_wq_frags; iwvctx->uk_attrs.max_hw_read_sges = resp.max_hw_read_sges; iwvctx->uk_attrs.max_hw_inline = resp.max_hw_inline; iwvctx->uk_attrs.max_hw_rq_quanta = resp.max_hw_rq_quanta; iwvctx->uk_attrs.max_hw_wq_quanta = resp.max_hw_wq_quanta; iwvctx->uk_attrs.max_hw_sq_chunk = resp.max_hw_sq_chunk; iwvctx->uk_attrs.max_hw_cq_size = resp.max_hw_cq_size; iwvctx->uk_attrs.min_hw_cq_size = resp.min_hw_cq_size; iwvctx->uk_attrs.min_hw_wq_size = IRDMA_QP_SW_MIN_WQSIZE; iwvctx->abi_ver = IRDMA_ABI_VER; mmap_key = resp.db_mmap_key; iwvctx->db = mmap(NULL, IRDMA_HW_PAGE_SIZE, PROT_WRITE | PROT_READ, MAP_SHARED, cmd_fd, mmap_key); if (iwvctx->db == MAP_FAILED) goto err_free; iwvctx->ibv_ctx.ops = irdma_ctx_ops; ibv_pd = irdma_ualloc_pd(&iwvctx->ibv_ctx); if (!ibv_pd) { munmap(iwvctx->db, IRDMA_HW_PAGE_SIZE); goto err_free; } ibv_pd->context = &iwvctx->ibv_ctx; iwvctx->iwupd = container_of(ibv_pd, struct irdma_upd, ibv_pd); return 0; err_free: printf("%s %s failure\n", __FILE__, __func__); return -1; } static void irdma_cleanup_context(struct verbs_device *device, struct ibv_context *ibctx) { struct irdma_uvcontext *iwvctx; iwvctx = container_of(ibctx, struct irdma_uvcontext, ibv_ctx); irdma_ufree_pd(&iwvctx->iwupd->ibv_pd); munmap(iwvctx->db, IRDMA_HW_PAGE_SIZE); } static struct verbs_device_ops irdma_dev_ops = { .init_context = irdma_init_context, .uninit_context = irdma_cleanup_context, }; static struct verbs_device * irdma_driver_init(const char *uverbs_sys_path, int abi_version) { struct irdma_udevice *dev; int i = 0; unsigned int device_found = 0; unsigned vendor_id, device_id; unsigned hca_size; char buf[8]; if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", buf, sizeof(buf)) < 0) return NULL; sscanf(buf, "%i", &vendor_id); if (vendor_id != PCI_VENDOR_ID_INTEL) return NULL; if (ibv_read_sysfs_file(uverbs_sys_path, "device/device", buf, sizeof(buf)) < 0) return NULL; sscanf(buf, "%i", &device_id); hca_size = sizeof(hca_table) / sizeof(struct hca_info); while (i < hca_size && !device_found) { if (device_id != hca_table[i].device) device_found = 1; ++i; } if (!device_found) return NULL; if (abi_version < IRDMA_MIN_ABI_VERSION || abi_version > IRDMA_MAX_ABI_VERSION) { printf("Invalid ABI version: %d of %s\n", abi_version, uverbs_sys_path); return NULL; } dev = calloc(1, sizeof(struct irdma_udevice)); if (!dev) { printf("Device creation for %s failed\n", uverbs_sys_path); return NULL; } dev->ibv_dev.ops = &irdma_dev_ops; dev->ibv_dev.sz = sizeof(*dev); dev->ibv_dev.size_of_context = sizeof(struct irdma_uvcontext) - sizeof(struct ibv_context); return &dev->ibv_dev; } static __attribute__((constructor)) void irdma_register_driver(void) { verbs_register_driver("irdma", irdma_driver_init); } diff --git a/contrib/ofed/libirdma/irdma_user.h b/contrib/ofed/libirdma/irdma_user.h index 8eb9ae7990e7..8d71a37d7bf4 100644 --- a/contrib/ofed/libirdma/irdma_user.h +++ b/contrib/ofed/libirdma/irdma_user.h @@ -1,650 +1,649 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /*$FreeBSD$*/ #ifndef IRDMA_USER_H #define IRDMA_USER_H #include "osdep.h" #define irdma_handle void * #define irdma_adapter_handle irdma_handle #define irdma_qp_handle irdma_handle #define irdma_cq_handle irdma_handle #define irdma_pd_id irdma_handle #define irdma_stag_handle irdma_handle #define irdma_stag_index u32 #define irdma_stag u32 #define irdma_stag_key u8 #define irdma_tagged_offset u64 #define irdma_access_privileges u32 #define irdma_physical_fragment u64 #define irdma_address_list u64 * #define irdma_sgl struct irdma_sge * #define IRDMA_MAX_MR_SIZE 0x200000000000ULL #define IRDMA_ACCESS_FLAGS_LOCALREAD 0x01 #define IRDMA_ACCESS_FLAGS_LOCALWRITE 0x02 #define IRDMA_ACCESS_FLAGS_REMOTEREAD_ONLY 0x04 #define IRDMA_ACCESS_FLAGS_REMOTEREAD 0x05 #define IRDMA_ACCESS_FLAGS_REMOTEWRITE_ONLY 0x08 #define IRDMA_ACCESS_FLAGS_REMOTEWRITE 0x0a #define IRDMA_ACCESS_FLAGS_BIND_WINDOW 0x10 #define IRDMA_ACCESS_FLAGS_ZERO_BASED 0x20 #define IRDMA_ACCESS_FLAGS_ALL 0x3f #define IRDMA_OP_TYPE_RDMA_WRITE 0x00 #define IRDMA_OP_TYPE_RDMA_READ 0x01 #define IRDMA_OP_TYPE_SEND 0x03 #define IRDMA_OP_TYPE_SEND_INV 0x04 #define IRDMA_OP_TYPE_SEND_SOL 0x05 #define IRDMA_OP_TYPE_SEND_SOL_INV 0x06 #define IRDMA_OP_TYPE_RDMA_WRITE_SOL 0x0d #define IRDMA_OP_TYPE_BIND_MW 0x08 #define IRDMA_OP_TYPE_FAST_REG_NSMR 0x09 #define IRDMA_OP_TYPE_INV_STAG 0x0a #define IRDMA_OP_TYPE_RDMA_READ_INV_STAG 0x0b #define IRDMA_OP_TYPE_NOP 0x0c #define IRDMA_OP_TYPE_REC 0x3e #define IRDMA_OP_TYPE_REC_IMM 0x3f #define IRDMA_FLUSH_MAJOR_ERR 1 #define IRDMA_SRQFLUSH_RSVD_MAJOR_ERR 0xfffe /* Async Events codes */ #define IRDMA_AE_AMP_UNALLOCATED_STAG 0x0102 #define IRDMA_AE_AMP_INVALID_STAG 0x0103 #define IRDMA_AE_AMP_BAD_QP 0x0104 #define IRDMA_AE_AMP_BAD_PD 0x0105 #define IRDMA_AE_AMP_BAD_STAG_KEY 0x0106 #define IRDMA_AE_AMP_BAD_STAG_INDEX 0x0107 #define IRDMA_AE_AMP_BOUNDS_VIOLATION 0x0108 #define IRDMA_AE_AMP_RIGHTS_VIOLATION 0x0109 #define IRDMA_AE_AMP_TO_WRAP 0x010a #define IRDMA_AE_AMP_FASTREG_VALID_STAG 0x010c #define IRDMA_AE_AMP_FASTREG_MW_STAG 0x010d #define IRDMA_AE_AMP_FASTREG_INVALID_RIGHTS 0x010e #define IRDMA_AE_AMP_FASTREG_INVALID_LENGTH 0x0110 #define IRDMA_AE_AMP_INVALIDATE_SHARED 0x0111 #define IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS 0x0112 #define IRDMA_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS 0x0113 #define IRDMA_AE_AMP_MWBIND_VALID_STAG 0x0114 #define IRDMA_AE_AMP_MWBIND_OF_MR_STAG 0x0115 #define IRDMA_AE_AMP_MWBIND_TO_ZERO_BASED_STAG 0x0116 #define IRDMA_AE_AMP_MWBIND_TO_MW_STAG 0x0117 #define IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS 0x0118 #define IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS 0x0119 #define IRDMA_AE_AMP_MWBIND_TO_INVALID_PARENT 0x011a #define IRDMA_AE_AMP_MWBIND_BIND_DISABLED 0x011b #define IRDMA_AE_PRIV_OPERATION_DENIED 0x011c #define IRDMA_AE_AMP_INVALIDATE_TYPE1_MW 0x011d #define IRDMA_AE_AMP_MWBIND_ZERO_BASED_TYPE1_MW 0x011e #define IRDMA_AE_AMP_FASTREG_INVALID_PBL_HPS_CFG 0x011f #define IRDMA_AE_AMP_MWBIND_WRONG_TYPE 0x0120 #define IRDMA_AE_AMP_FASTREG_PBLE_MISMATCH 0x0121 #define IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG 0x0132 #define IRDMA_AE_UDA_XMIT_BAD_PD 0x0133 #define IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT 0x0134 #define IRDMA_AE_UDA_L4LEN_INVALID 0x0135 #define IRDMA_AE_BAD_CLOSE 0x0201 #define IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE 0x0202 #define IRDMA_AE_CQ_OPERATION_ERROR 0x0203 #define IRDMA_AE_RDMA_READ_WHILE_ORD_ZERO 0x0205 #define IRDMA_AE_STAG_ZERO_INVALID 0x0206 #define IRDMA_AE_IB_RREQ_AND_Q1_FULL 0x0207 #define IRDMA_AE_IB_INVALID_REQUEST 0x0208 #define IRDMA_AE_WQE_UNEXPECTED_OPCODE 0x020a #define IRDMA_AE_WQE_INVALID_PARAMETER 0x020b #define IRDMA_AE_WQE_INVALID_FRAG_DATA 0x020c #define IRDMA_AE_IB_REMOTE_ACCESS_ERROR 0x020d #define IRDMA_AE_IB_REMOTE_OP_ERROR 0x020e #define IRDMA_AE_WQE_LSMM_TOO_LONG 0x0220 #define IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301 #define IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303 #define IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304 #define IRDMA_AE_DDP_UBE_INVALID_MO 0x0305 #define IRDMA_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE 0x0306 #define IRDMA_AE_DDP_UBE_INVALID_QN 0x0307 #define IRDMA_AE_DDP_NO_L_BIT 0x0308 #define IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION 0x0311 #define IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE 0x0312 #define IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST 0x0313 #define IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP 0x0314 #define IRDMA_AE_ROCE_RSP_LENGTH_ERROR 0x0316 #define IRDMA_AE_ROCE_EMPTY_MCG 0x0380 #define IRDMA_AE_ROCE_BAD_MC_IP_ADDR 0x0381 #define IRDMA_AE_ROCE_BAD_MC_QPID 0x0382 #define IRDMA_AE_MCG_QP_PROTOCOL_MISMATCH 0x0383 #define IRDMA_AE_INVALID_ARP_ENTRY 0x0401 #define IRDMA_AE_INVALID_TCP_OPTION_RCVD 0x0402 #define IRDMA_AE_STALE_ARP_ENTRY 0x0403 #define IRDMA_AE_INVALID_AH_ENTRY 0x0406 #define IRDMA_AE_LLP_CLOSE_COMPLETE 0x0501 #define IRDMA_AE_LLP_CONNECTION_RESET 0x0502 #define IRDMA_AE_LLP_FIN_RECEIVED 0x0503 #define IRDMA_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH 0x0504 #define IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR 0x0505 #define IRDMA_AE_LLP_SEGMENT_TOO_SMALL 0x0507 #define IRDMA_AE_LLP_SYN_RECEIVED 0x0508 #define IRDMA_AE_LLP_TERMINATE_RECEIVED 0x0509 #define IRDMA_AE_LLP_TOO_MANY_RETRIES 0x050a #define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b #define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c #define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e #define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520 #define IRDMA_AE_RESET_SENT 0x0601 #define IRDMA_AE_TERMINATE_SENT 0x0602 #define IRDMA_AE_RESET_NOT_SENT 0x0603 #define IRDMA_AE_LCE_QP_CATASTROPHIC 0x0700 #define IRDMA_AE_LCE_FUNCTION_CATASTROPHIC 0x0701 #define IRDMA_AE_LCE_CQ_CATASTROPHIC 0x0702 #define IRDMA_AE_QP_SUSPEND_COMPLETE 0x0900 enum irdma_device_caps_const { IRDMA_WQE_SIZE = 4, IRDMA_CQP_WQE_SIZE = 8, IRDMA_CQE_SIZE = 4, IRDMA_EXTENDED_CQE_SIZE = 8, IRDMA_AEQE_SIZE = 2, IRDMA_CEQE_SIZE = 1, IRDMA_CQP_CTX_SIZE = 8, IRDMA_SHADOW_AREA_SIZE = 8, IRDMA_GATHER_STATS_BUF_SIZE = 1024, IRDMA_MIN_IW_QP_ID = 0, IRDMA_QUERY_FPM_BUF_SIZE = 176, IRDMA_COMMIT_FPM_BUF_SIZE = 176, IRDMA_MAX_IW_QP_ID = 262143, IRDMA_MIN_CEQID = 0, IRDMA_MAX_CEQID = 1023, IRDMA_CEQ_MAX_COUNT = IRDMA_MAX_CEQID + 1, IRDMA_MIN_CQID = 0, IRDMA_MAX_CQID = 524287, IRDMA_MIN_AEQ_ENTRIES = 1, IRDMA_MAX_AEQ_ENTRIES = 524287, IRDMA_MIN_CEQ_ENTRIES = 1, IRDMA_MAX_CEQ_ENTRIES = 262143, IRDMA_MIN_CQ_SIZE = 1, IRDMA_MAX_CQ_SIZE = 1048575, IRDMA_DB_ID_ZERO = 0, /* 64K + 1 */ IRDMA_MAX_OUTBOUND_MSG_SIZE = 65537, /* 64K +1 */ IRDMA_MAX_INBOUND_MSG_SIZE = 65537, IRDMA_MAX_PUSH_PAGE_COUNT = 1024, IRDMA_MAX_PE_ENA_VF_COUNT = 32, IRDMA_MAX_VF_FPM_ID = 47, IRDMA_MAX_SQ_PAYLOAD_SIZE = 2145386496, IRDMA_MAX_INLINE_DATA_SIZE = 101, IRDMA_MAX_WQ_ENTRIES = 32768, IRDMA_Q2_BUF_SIZE = 256, IRDMA_QP_CTX_SIZE = 256, IRDMA_MAX_PDS = 262144, }; enum irdma_addressing_type { IRDMA_ADDR_TYPE_ZERO_BASED = 0, IRDMA_ADDR_TYPE_VA_BASED = 1, }; enum irdma_flush_opcode { FLUSH_INVALID = 0, FLUSH_GENERAL_ERR, FLUSH_PROT_ERR, FLUSH_REM_ACCESS_ERR, FLUSH_LOC_QP_OP_ERR, FLUSH_REM_OP_ERR, FLUSH_LOC_LEN_ERR, FLUSH_FATAL_ERR, FLUSH_RETRY_EXC_ERR, FLUSH_MW_BIND_ERR, FLUSH_REM_INV_REQ_ERR, }; enum irdma_qp_event_type { IRDMA_QP_EVENT_CATASTROPHIC, IRDMA_QP_EVENT_ACCESS_ERR, IRDMA_QP_EVENT_REQ_ERR, }; enum irdma_cmpl_status { IRDMA_COMPL_STATUS_SUCCESS = 0, IRDMA_COMPL_STATUS_FLUSHED, IRDMA_COMPL_STATUS_INVALID_WQE, IRDMA_COMPL_STATUS_QP_CATASTROPHIC, IRDMA_COMPL_STATUS_REMOTE_TERMINATION, IRDMA_COMPL_STATUS_INVALID_STAG, IRDMA_COMPL_STATUS_BASE_BOUND_VIOLATION, IRDMA_COMPL_STATUS_ACCESS_VIOLATION, IRDMA_COMPL_STATUS_INVALID_PD_ID, IRDMA_COMPL_STATUS_WRAP_ERROR, IRDMA_COMPL_STATUS_STAG_INVALID_PDID, IRDMA_COMPL_STATUS_RDMA_READ_ZERO_ORD, IRDMA_COMPL_STATUS_QP_NOT_PRIVLEDGED, IRDMA_COMPL_STATUS_STAG_NOT_INVALID, IRDMA_COMPL_STATUS_INVALID_PHYS_BUF_SIZE, IRDMA_COMPL_STATUS_INVALID_PHYS_BUF_ENTRY, IRDMA_COMPL_STATUS_INVALID_FBO, IRDMA_COMPL_STATUS_INVALID_LEN, IRDMA_COMPL_STATUS_INVALID_ACCESS, IRDMA_COMPL_STATUS_PHYS_BUF_LIST_TOO_LONG, IRDMA_COMPL_STATUS_INVALID_VIRT_ADDRESS, IRDMA_COMPL_STATUS_INVALID_REGION, IRDMA_COMPL_STATUS_INVALID_WINDOW, IRDMA_COMPL_STATUS_INVALID_TOTAL_LEN, IRDMA_COMPL_STATUS_UNKNOWN, }; enum irdma_cmpl_notify { IRDMA_CQ_COMPL_EVENT = 0, IRDMA_CQ_COMPL_SOLICITED = 1, }; enum irdma_qp_caps { IRDMA_WRITE_WITH_IMM = 1, IRDMA_SEND_WITH_IMM = 2, IRDMA_ROCE = 4, IRDMA_PUSH_MODE = 8, }; struct irdma_qp_uk; struct irdma_cq_uk; struct irdma_qp_uk_init_info; struct irdma_cq_uk_init_info; struct irdma_sge { irdma_tagged_offset tag_off; u32 len; irdma_stag stag; }; struct irdma_ring { volatile u32 head; volatile u32 tail; /* effective tail */ u32 size; }; struct irdma_cqe { __le64 buf[IRDMA_CQE_SIZE]; }; struct irdma_extended_cqe { __le64 buf[IRDMA_EXTENDED_CQE_SIZE]; }; struct irdma_post_send { irdma_sgl sg_list; u32 num_sges; u32 qkey; u32 dest_qp; u32 ah_id; }; struct irdma_post_rq_info { u64 wr_id; irdma_sgl sg_list; u32 num_sges; }; struct irdma_rdma_write { irdma_sgl lo_sg_list; u32 num_lo_sges; struct irdma_sge rem_addr; }; struct irdma_rdma_read { irdma_sgl lo_sg_list; u32 num_lo_sges; struct irdma_sge rem_addr; }; struct irdma_bind_window { irdma_stag mr_stag; u64 bind_len; void *va; enum irdma_addressing_type addressing_type; bool ena_reads:1; bool ena_writes:1; irdma_stag mw_stag; bool mem_window_type_1:1; }; struct irdma_inv_local_stag { irdma_stag target_stag; }; struct irdma_post_sq_info { u64 wr_id; u8 op_type; u8 l4len; bool signaled:1; bool read_fence:1; bool local_fence:1; bool inline_data:1; bool imm_data_valid:1; bool push_wqe:1; bool report_rtt:1; bool udp_hdr:1; bool defer_flag:1; u32 imm_data; u32 stag_to_inv; union { struct irdma_post_send send; struct irdma_rdma_write rdma_write; struct irdma_rdma_read rdma_read; struct irdma_bind_window bind_window; struct irdma_inv_local_stag inv_local_stag; } op; }; struct irdma_cq_poll_info { u64 wr_id; irdma_qp_handle qp_handle; u32 bytes_xfered; u32 qp_id; u32 ud_src_qpn; u32 imm_data; irdma_stag inv_stag; /* or L_R_Key */ enum irdma_cmpl_status comp_status; u16 major_err; u16 minor_err; u16 ud_vlan; u8 ud_smac[6]; u8 op_type; u8 q_type; bool stag_invalid_set:1; /* or L_R_Key set */ bool push_dropped:1; bool error:1; bool solicited_event:1; bool ipv4:1; bool ud_vlan_valid:1; bool ud_smac_valid:1; bool imm_valid:1; bool signaled:1; union { u32 tcp_sqn; u32 roce_psn; u32 rtt; u32 raw; } stat; }; struct qp_err_code { enum irdma_flush_opcode flush_code; enum irdma_qp_event_type event_type; }; int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq); int irdma_uk_inline_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq); int irdma_uk_mw_bind(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq); int irdma_uk_post_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq); int irdma_uk_post_receive(struct irdma_qp_uk *qp, struct irdma_post_rq_info *info); void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp); int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool inv_stag, bool post_sq); int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq); int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq); int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq); struct irdma_wqe_uk_ops { void (*iw_copy_inline_data)(u8 *dest, struct irdma_sge *sge_list, u32 num_sges, u8 polarity); u16 (*iw_inline_data_size_to_quanta)(u32 data_size); void (*iw_set_fragment)(__le64 *wqe, u32 offset, struct irdma_sge *sge, u8 valid); void (*iw_set_mw_bind_wqe)(__le64 *wqe, struct irdma_bind_window *op_info); }; int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info); void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq, enum irdma_cmpl_notify cq_notify); void irdma_uk_cq_resize(struct irdma_cq_uk *cq, void *cq_base, int size); void irdma_uk_cq_set_resized_cnt(struct irdma_cq_uk *qp, u16 cnt); int irdma_uk_cq_init(struct irdma_cq_uk *cq, struct irdma_cq_uk_init_info *info); int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info); void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift, u8 *rq_shift); int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo, u32 *sq_depth, u8 *sq_shift); int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo, u32 *rq_depth, u8 *rq_shift); struct irdma_sq_uk_wr_trk_info { u64 wrid; u32 wr_len; u16 quanta; u8 signaled; u8 reserved[1]; }; struct irdma_qp_quanta { __le64 elem[IRDMA_WQE_SIZE]; }; struct irdma_qp_uk { struct irdma_qp_quanta *sq_base; struct irdma_qp_quanta *rq_base; struct irdma_uk_attrs *uk_attrs; u32 IOMEM *wqe_alloc_db; struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array; struct irdma_sig_wr_trk_info *sq_sigwrtrk_array; u64 *rq_wrid_array; __le64 *shadow_area; __le32 *push_db; __le64 *push_wqe; struct irdma_ring sq_ring; struct irdma_ring sq_sig_ring; struct irdma_ring rq_ring; struct irdma_ring initial_ring; u32 qp_id; u32 qp_caps; u32 sq_size; u32 rq_size; u32 max_sq_frag_cnt; u32 max_rq_frag_cnt; u32 max_inline_data; u32 last_rx_cmpl_idx; u32 last_tx_cmpl_idx; struct irdma_wqe_uk_ops wqe_ops; u16 conn_wqes; u8 qp_type; u8 swqe_polarity; u8 swqe_polarity_deferred; u8 rwqe_polarity; u8 rq_wqe_size; u8 rq_wqe_size_multiplier; bool deferred_flag:1; bool push_mode:1; /* whether the last post wqe was pushed */ bool push_dropped:1; bool first_sq_wq:1; bool sq_flush_complete:1; /* Indicates flush was seen and SQ was empty after the flush */ bool rq_flush_complete:1; /* Indicates flush was seen and RQ was empty after the flush */ bool destroy_pending:1; /* Indicates the QP is being destroyed */ void *back_qp; pthread_spinlock_t *lock; u8 dbg_rq_flushed; u16 ord_cnt; u8 sq_flush_seen; u8 rq_flush_seen; u8 rd_fence_rate; }; struct irdma_cq_uk { struct irdma_cqe *cq_base; u32 IOMEM *cqe_alloc_db; u32 IOMEM *cq_ack_db; __le64 *shadow_area; u32 cq_id; u32 cq_size; struct irdma_ring cq_ring; u8 polarity; bool avoid_mem_cflct:1; }; struct irdma_qp_uk_init_info { struct irdma_qp_quanta *sq; struct irdma_qp_quanta *rq; struct irdma_uk_attrs *uk_attrs; u32 IOMEM *wqe_alloc_db; __le64 *shadow_area; struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array; struct irdma_sig_wr_trk_info *sq_sigwrtrk_array; u64 *rq_wrid_array; u32 qp_id; u32 qp_caps; u32 sq_size; u32 rq_size; u32 max_sq_frag_cnt; u32 max_rq_frag_cnt; u32 max_inline_data; u32 sq_depth; u32 rq_depth; u8 first_sq_wq; u8 type; u8 sq_shift; u8 rq_shift; u8 rd_fence_rate; int abi_ver; bool legacy_mode; }; struct irdma_cq_uk_init_info { u32 IOMEM *cqe_alloc_db; u32 IOMEM *cq_ack_db; struct irdma_cqe *cq_base; __le64 *shadow_area; u32 cq_size; u32 cq_id; bool avoid_mem_cflct; }; __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx, u16 *quanta, u32 total_size, struct irdma_post_sq_info *info); __le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx); int irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq); int irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq); int irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta); int irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size); void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, u32 inline_data, u8 *shift); int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *sqdepth); int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, u32 *rqdepth); -int irdma_get_srqdepth(struct irdma_uk_attrs *uk_attrs, u32 srq_size, u8 shift, u32 *srqdepth); void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 *wqe, u16 quanta, u32 wqe_idx, bool post_sq); void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx); static inline struct qp_err_code irdma_ae_to_qp_err_code(u16 ae_id) { struct qp_err_code qp_err = { 0 }; switch (ae_id) { case IRDMA_AE_AMP_BOUNDS_VIOLATION: case IRDMA_AE_AMP_INVALID_STAG: case IRDMA_AE_AMP_RIGHTS_VIOLATION: case IRDMA_AE_AMP_UNALLOCATED_STAG: case IRDMA_AE_AMP_BAD_PD: case IRDMA_AE_AMP_BAD_QP: case IRDMA_AE_AMP_BAD_STAG_KEY: case IRDMA_AE_AMP_BAD_STAG_INDEX: case IRDMA_AE_AMP_TO_WRAP: case IRDMA_AE_PRIV_OPERATION_DENIED: qp_err.flush_code = FLUSH_PROT_ERR; qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR; break; case IRDMA_AE_UDA_XMIT_BAD_PD: case IRDMA_AE_WQE_UNEXPECTED_OPCODE: qp_err.flush_code = FLUSH_LOC_QP_OP_ERR; qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT: case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: case IRDMA_AE_UDA_L4LEN_INVALID: case IRDMA_AE_DDP_UBE_INVALID_MO: case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: qp_err.flush_code = FLUSH_LOC_LEN_ERR; qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS: case IRDMA_AE_IB_REMOTE_ACCESS_ERROR: qp_err.flush_code = FLUSH_REM_ACCESS_ERR; qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR; break; case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS: case IRDMA_AE_AMP_MWBIND_BIND_DISABLED: case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS: case IRDMA_AE_AMP_MWBIND_VALID_STAG: qp_err.flush_code = FLUSH_MW_BIND_ERR; qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR; break; case IRDMA_AE_LLP_TOO_MANY_RETRIES: qp_err.flush_code = FLUSH_RETRY_EXC_ERR; qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; case IRDMA_AE_IB_INVALID_REQUEST: qp_err.flush_code = FLUSH_REM_INV_REQ_ERR; qp_err.event_type = IRDMA_QP_EVENT_REQ_ERR; break; case IRDMA_AE_LLP_SEGMENT_TOO_SMALL: case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR: case IRDMA_AE_ROCE_RSP_LENGTH_ERROR: case IRDMA_AE_IB_REMOTE_OP_ERROR: qp_err.flush_code = FLUSH_REM_OP_ERR; qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; case IRDMA_AE_LCE_QP_CATASTROPHIC: qp_err.flush_code = FLUSH_FATAL_ERR; qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; default: qp_err.flush_code = FLUSH_GENERAL_ERR; qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; } return qp_err; } #endif /* IRDMA_USER_H */ diff --git a/contrib/ofed/libirdma/irdma_uverbs.c b/contrib/ofed/libirdma/irdma_uverbs.c index 14efab96a107..c59d409e72cd 100644 --- a/contrib/ofed/libirdma/irdma_uverbs.c +++ b/contrib/ofed/libirdma/irdma_uverbs.c @@ -1,2254 +1,2261 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (C) 2019 - 2022 Intel Corporation + * Copyright (C) 2019 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /*$FreeBSD$*/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "irdma_umain.h" #include "abi.h" static inline void print_fw_ver(uint64_t fw_ver, char *str, size_t len) { uint16_t major, minor; major = fw_ver >> 32 & 0xffff; minor = fw_ver & 0xffff; snprintf(str, len, "%d.%d", major, minor); } /** * irdma_uquery_device_ex - query device attributes including extended properties * @context: user context for the device * @input: extensible input struct for ibv_query_device_ex verb * @attr: extended device attribute struct * @attr_size: size of extended device attribute struct **/ int irdma_uquery_device_ex(struct ibv_context *context, const struct ibv_query_device_ex_input *input, struct ibv_device_attr_ex *attr, size_t attr_size) { struct irdma_query_device_ex cmd = {}; struct irdma_query_device_ex_resp resp = {}; uint64_t fw_ver; int ret; ret = ibv_cmd_query_device_ex(context, input, attr, attr_size, &fw_ver, &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), &resp.ibv_resp, sizeof(resp.ibv_resp), sizeof(resp)); if (ret) return ret; print_fw_ver(fw_ver, attr->orig_attr.fw_ver, sizeof(attr->orig_attr.fw_ver)); return 0; } /** * irdma_uquery_device - call driver to query device for max resources * @context: user context for the device * @attr: where to save all the mx resources from the driver **/ int irdma_uquery_device(struct ibv_context *context, struct ibv_device_attr *attr) { struct ibv_query_device cmd; uint64_t fw_ver; int ret; ret = ibv_cmd_query_device(context, attr, &fw_ver, &cmd, sizeof(cmd)); if (ret) return ret; print_fw_ver(fw_ver, attr->fw_ver, sizeof(attr->fw_ver)); return 0; } /** * irdma_uquery_port - get port attributes (msg size, lnk, mtu...) * @context: user context of the device * @port: port for the attributes * @attr: to return port attributes **/ int irdma_uquery_port(struct ibv_context *context, uint8_t port, struct ibv_port_attr *attr) { struct ibv_query_port cmd; return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd)); } /** * irdma_ualloc_pd - allocates protection domain and return pd ptr * @context: user context of the device **/ struct ibv_pd * irdma_ualloc_pd(struct ibv_context *context) { struct ibv_alloc_pd cmd; struct irdma_ualloc_pd_resp resp = {}; struct irdma_upd *iwupd; int err; iwupd = calloc(1, sizeof(*iwupd)); if (!iwupd) return NULL; err = ibv_cmd_alloc_pd(context, &iwupd->ibv_pd, &cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp)); if (err) goto err_free; iwupd->pd_id = resp.pd_id; return &iwupd->ibv_pd; err_free: free(iwupd); errno = err; return NULL; } /** * irdma_ufree_pd - free pd resources * @pd: pd to free resources */ int irdma_ufree_pd(struct ibv_pd *pd) { struct irdma_uvcontext *iwvctx = container_of(pd->context, struct irdma_uvcontext, ibv_ctx); struct irdma_upd *iwupd; int ret; iwupd = container_of(pd, struct irdma_upd, ibv_pd); ret = ibv_cmd_dealloc_pd(pd); if (ret) return ret; free(iwupd); return 0; } /** * irdma_ureg_mr - register user memory region * @pd: pd for the mr * @addr: user address of the memory region * @length: length of the memory * @hca_va: hca_va * @access: access allowed on this mr */ struct ibv_mr * irdma_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, int access) { struct verbs_mr *vmr; struct irdma_ureg_mr cmd = {}; struct ibv_reg_mr_resp resp; int err; vmr = malloc(sizeof(*vmr)); if (!vmr) return NULL; cmd.reg_type = IRDMA_MEMREG_TYPE_MEM; err = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, &vmr->ibv_mr, &cmd.ibv_cmd, sizeof(cmd), &resp, sizeof(resp)); if (err) { free(vmr); errno = err; return NULL; } return &vmr->ibv_mr; } /* * irdma_urereg_mr - re-register memory region @vmr: mr that was allocated @flags: bit mask to indicate which of the * attr's of MR modified @pd: pd of the mr @addr: user address of the memory region @length: length of the memory * @access: access allowed on this mr */ int irdma_urereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, void *addr, size_t length, int access) { struct irdma_urereg_mr cmd = {}; struct ibv_rereg_mr_resp resp; cmd.reg_type = IRDMA_MEMREG_TYPE_MEM; return ibv_cmd_rereg_mr(&vmr->ibv_mr, flags, addr, length, (uintptr_t)addr, access, pd, &cmd.ibv_cmd, sizeof(cmd), &resp, sizeof(resp)); } /** * irdma_udereg_mr - re-register memory region * @mr: mr that was allocated */ int irdma_udereg_mr(struct ibv_mr *mr) { struct verbs_mr *vmr; int ret; vmr = container_of(mr, struct verbs_mr, ibv_mr); ret = ibv_cmd_dereg_mr(mr); if (ret) return ret; return 0; } /** * irdma_ualloc_mw - allocate memory window * @pd: protection domain * @type: memory window type */ struct ibv_mw * irdma_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type) { struct ibv_mw *mw; struct ibv_alloc_mw cmd; struct ibv_alloc_mw_resp resp; int err; mw = calloc(1, sizeof(*mw)); if (!mw) return NULL; - if (ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd), &resp, - sizeof(resp))) { + err = ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd), &resp, + sizeof(resp)); + if (err) { printf("%s: Failed to alloc memory window\n", __func__); free(mw); + errno = err; return NULL; } return mw; } /** * irdma_ubind_mw - bind a memory window * @qp: qp to post WR * @mw: memory window to bind * @mw_bind: bind info */ int irdma_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw, struct ibv_mw_bind *mw_bind) { struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info; struct verbs_mr *vmr; struct ibv_send_wr wr = {}; struct ibv_send_wr *bad_wr; int err; if (!bind_info->mr && (bind_info->addr || bind_info->length)) return EINVAL; if (bind_info->mr) { vmr = verbs_get_mr(bind_info->mr); if (vmr->mr_type != IBV_MR_TYPE_MR) return ENOTSUP; if (vmr->access & IBV_ACCESS_ZERO_BASED) return EINVAL; if (mw->pd != bind_info->mr->pd) return EPERM; } wr.opcode = IBV_WR_BIND_MW; wr.bind_mw.bind_info = mw_bind->bind_info; wr.bind_mw.mw = mw; wr.bind_mw.rkey = ibv_inc_rkey(mw->rkey); wr.wr_id = mw_bind->wr_id; wr.send_flags = mw_bind->send_flags; err = irdma_upost_send(qp, &wr, &bad_wr); if (!err) mw->rkey = wr.bind_mw.rkey; return err; } /** * irdma_udealloc_mw - deallocate memory window * @mw: memory window to dealloc */ int irdma_udealloc_mw(struct ibv_mw *mw) { int ret; struct ibv_dealloc_mw cmd; ret = ibv_cmd_dealloc_mw(mw, &cmd, sizeof(cmd)); if (ret) return ret; free(mw); return 0; } static void * irdma_alloc_hw_buf(size_t size) { void *buf; buf = memalign(IRDMA_HW_PAGE_SIZE, size); if (!buf) return NULL; if (ibv_dontfork_range(buf, size)) { free(buf); return NULL; } return buf; } static void irdma_free_hw_buf(void *buf, size_t size) { ibv_dofork_range(buf, size); free(buf); } /** * get_cq_size - returns actual cqe needed by HW * @ncqe: minimum cqes requested by application * @hw_rev: HW generation * @cqe_64byte_ena: enable 64byte cqe */ static inline int get_cq_size(int ncqe, u8 hw_rev, bool cqe_64byte_ena) { ncqe++; /* Completions with immediate require 1 extra entry */ if (!cqe_64byte_ena && hw_rev > IRDMA_GEN_1) ncqe *= 2; if (ncqe < IRDMA_U_MINCQ_SIZE) ncqe = IRDMA_U_MINCQ_SIZE; return ncqe; } static inline size_t get_cq_total_bytes(u32 cq_size, bool cqe_64byte_ena){ if (cqe_64byte_ena) return roundup(cq_size * sizeof(struct irdma_extended_cqe), IRDMA_HW_PAGE_SIZE); else return roundup(cq_size * sizeof(struct irdma_cqe), IRDMA_HW_PAGE_SIZE); } /** * ucreate_cq - irdma util function to create a CQ * @context: ibv context * @attr_ex: CQ init attributes * @ext_cq: flag to create an extendable or normal CQ */ static struct ibv_cq_ex * ucreate_cq(struct ibv_context *context, struct ibv_cq_init_attr_ex *attr_ex, bool ext_cq) { struct irdma_cq_uk_init_info info = {}; struct irdma_ureg_mr reg_mr_cmd = {}; struct irdma_ucreate_cq_ex cmd = {}; struct irdma_ucreate_cq_ex_resp resp = {}; struct ibv_reg_mr_resp reg_mr_resp = {}; struct irdma_ureg_mr reg_mr_shadow_cmd = {}; struct ibv_reg_mr_resp reg_mr_shadow_resp = {}; struct irdma_uk_attrs *uk_attrs; struct irdma_uvcontext *iwvctx; struct irdma_ucq *iwucq; size_t total_size; u32 cq_pages; int ret, ncqe; u8 hw_rev; bool cqe_64byte_ena; iwvctx = container_of(context, struct irdma_uvcontext, ibv_ctx); uk_attrs = &iwvctx->uk_attrs; hw_rev = uk_attrs->hw_rev; if (ext_cq) { u32 supported_flags = IRDMA_STANDARD_WC_FLAGS_EX; if (hw_rev == IRDMA_GEN_1 || attr_ex->wc_flags & ~supported_flags) { errno = EOPNOTSUPP; return NULL; } } if (attr_ex->cqe < uk_attrs->min_hw_cq_size || attr_ex->cqe > uk_attrs->max_hw_cq_size - 1) { errno = EINVAL; return NULL; } /* save the cqe requested by application */ ncqe = attr_ex->cqe; iwucq = calloc(1, sizeof(*iwucq)); if (!iwucq) return NULL; - if (pthread_spin_init(&iwucq->lock, PTHREAD_PROCESS_PRIVATE)) { + ret = pthread_spin_init(&iwucq->lock, PTHREAD_PROCESS_PRIVATE); + if (ret) { free(iwucq); + errno = ret; return NULL; } cqe_64byte_ena = uk_attrs->feature_flags & IRDMA_FEATURE_64_BYTE_CQE ? true : false; info.cq_size = get_cq_size(attr_ex->cqe, hw_rev, cqe_64byte_ena); iwucq->comp_vector = attr_ex->comp_vector; LIST_INIT(&iwucq->resize_list); LIST_INIT(&iwucq->cmpl_generated); total_size = get_cq_total_bytes(info.cq_size, cqe_64byte_ena); cq_pages = total_size >> IRDMA_HW_PAGE_SHIFT; if (!(uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE)) total_size = (cq_pages << IRDMA_HW_PAGE_SHIFT) + IRDMA_DB_SHADOW_AREA_SIZE; iwucq->buf_size = total_size; info.cq_base = irdma_alloc_hw_buf(total_size); - if (!info.cq_base) + if (!info.cq_base) { + ret = ENOMEM; goto err_cq_base; + } memset(info.cq_base, 0, total_size); reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; reg_mr_cmd.cq_pages = cq_pages; ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.cq_base, total_size, (uintptr_t)info.cq_base, IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr.ibv_mr, ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), ®_mr_resp, sizeof(reg_mr_resp)); - if (ret) { - errno = ret; + if (ret) goto err_dereg_mr; - } iwucq->vmr.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; if (uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE) { info.shadow_area = irdma_alloc_hw_buf(IRDMA_DB_SHADOW_AREA_SIZE); - if (!info.shadow_area) + if (!info.shadow_area) { + ret = ENOMEM; goto err_alloc_shadow; + } memset(info.shadow_area, 0, IRDMA_DB_SHADOW_AREA_SIZE); reg_mr_shadow_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; reg_mr_shadow_cmd.cq_pages = 1; ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE, (uintptr_t)info.shadow_area, IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr_shadow_area.ibv_mr, ®_mr_shadow_cmd.ibv_cmd, sizeof(reg_mr_shadow_cmd), ®_mr_shadow_resp, sizeof(reg_mr_shadow_resp)); if (ret) { irdma_free_hw_buf(info.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); - errno = ret; goto err_alloc_shadow; } iwucq->vmr_shadow_area.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; } else { info.shadow_area = (__le64 *) ((u8 *)info.cq_base + (cq_pages << IRDMA_HW_PAGE_SHIFT)); } attr_ex->cqe = info.cq_size; cmd.user_cq_buf = (__u64) ((uintptr_t)info.cq_base); cmd.user_shadow_area = (__u64) ((uintptr_t)info.shadow_area); ret = ibv_cmd_create_cq_ex(context, attr_ex, &iwucq->verbs_cq.cq_ex, &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), &resp.ibv_resp, sizeof(resp.ibv_resp), sizeof(resp)); attr_ex->cqe = ncqe; - if (ret) { - errno = ret; + if (ret) goto err_create_cq; - } if (ext_cq) irdma_ibvcq_ex_fill_priv_funcs(iwucq, attr_ex); info.cq_id = resp.cq_id; /* Do not report the CQE's reserved for immediate and burned by HW */ iwucq->verbs_cq.cq.cqe = ncqe; if (cqe_64byte_ena) info.avoid_mem_cflct = true; info.cqe_alloc_db = (u32 *)((u8 *)iwvctx->db + IRDMA_DB_CQ_OFFSET); irdma_uk_cq_init(&iwucq->cq, &info); return &iwucq->verbs_cq.cq_ex; err_create_cq: if (iwucq->vmr_shadow_area.ibv_mr.handle) { ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area.ibv_mr); irdma_free_hw_buf(info.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); } err_alloc_shadow: ibv_cmd_dereg_mr(&iwucq->vmr.ibv_mr); err_dereg_mr: irdma_free_hw_buf(info.cq_base, total_size); err_cq_base: printf("%s: failed to initialize CQ\n", __func__); pthread_spin_destroy(&iwucq->lock); free(iwucq); + errno = ret; return NULL; } struct ibv_cq * irdma_ucreate_cq(struct ibv_context *context, int cqe, struct ibv_comp_channel *channel, int comp_vector) { struct ibv_cq_init_attr_ex attr_ex = { .cqe = cqe, .channel = channel, .comp_vector = comp_vector, }; struct ibv_cq_ex *ibvcq_ex; ibvcq_ex = ucreate_cq(context, &attr_ex, false); return ibvcq_ex ? ibv_cq_ex_to_cq(ibvcq_ex) : NULL; } struct ibv_cq_ex * irdma_ucreate_cq_ex(struct ibv_context *context, struct ibv_cq_init_attr_ex *attr_ex) { return ucreate_cq(context, attr_ex, true); } /** * irdma_free_cq_buf - free memory for cq buffer * @cq_buf: cq buf to free */ static void irdma_free_cq_buf(struct irdma_cq_buf *cq_buf) { ibv_cmd_dereg_mr(&cq_buf->vmr.ibv_mr); irdma_free_hw_buf(cq_buf->cq.cq_base, cq_buf->buf_size); free(cq_buf); } /** * irdma_process_resize_list - process the cq list to remove buffers * @iwucq: cq which owns the list * @lcqe_buf: cq buf where the last cqe is found */ static int irdma_process_resize_list(struct irdma_ucq *iwucq, struct irdma_cq_buf *lcqe_buf) { struct irdma_cq_buf *cq_buf, *next; int cq_cnt = 0; LIST_FOREACH_SAFE(cq_buf, &iwucq->resize_list, list, next) { if (cq_buf == lcqe_buf) return cq_cnt; LIST_REMOVE(cq_buf, list); irdma_free_cq_buf(cq_buf); cq_cnt++; } return cq_cnt; } static void irdma_remove_cmpls_list(struct irdma_ucq *iwucq) { struct irdma_cmpl_gen *cmpl_node, *next; LIST_FOREACH_SAFE(cmpl_node, &iwucq->cmpl_generated, list, next) { LIST_REMOVE(cmpl_node, list); free(cmpl_node); } } static int irdma_generated_cmpls(struct irdma_ucq *iwucq, struct irdma_cq_poll_info *cq_poll_info) { struct irdma_cmpl_gen *cmpl; if (!iwucq || LIST_EMPTY(&iwucq->cmpl_generated)) return ENOENT; cmpl = LIST_FIRST(&iwucq->cmpl_generated); LIST_REMOVE(cmpl, list); memcpy(cq_poll_info, &cmpl->cpi, sizeof(*cq_poll_info)); free(cmpl); return 0; } /** * irdma_set_cpi_common_values - fill in values for polling info struct * @cpi: resulting structure of cq_poll_info type * @qp: QPair * @qp_num: id of the QP */ static void irdma_set_cpi_common_values(struct irdma_cq_poll_info *cpi, struct irdma_qp_uk *qp, __u32 qp_num) { cpi->comp_status = IRDMA_COMPL_STATUS_FLUSHED; cpi->error = 1; cpi->major_err = IRDMA_FLUSH_MAJOR_ERR; cpi->minor_err = FLUSH_GENERAL_ERR; cpi->qp_handle = (irdma_qp_handle) (uintptr_t)qp; cpi->qp_id = qp_num; } static bool irdma_cq_empty(struct irdma_ucq *iwucq) { struct irdma_cq_uk *ukcq; __u64 qword3; __le64 *cqe; __u8 polarity; ukcq = &iwucq->cq; cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq); get_64bit_val(cqe, 24, &qword3); polarity = (__u8) FIELD_GET(IRDMA_CQ_VALID, qword3); return polarity != ukcq->polarity; } /** * irdma_generate_flush_completions - generate completion from WRs * @iwuqp: pointer to QP */ static void irdma_generate_flush_completions(struct irdma_uqp *iwuqp) { struct irdma_qp_uk *qp = &iwuqp->qp; struct irdma_ring *sq_ring = &qp->sq_ring; struct irdma_ring *rq_ring = &qp->rq_ring; struct irdma_cmpl_gen *cmpl; __le64 *sw_wqe; __u64 wqe_qword; __u32 wqe_idx; if (pthread_spin_lock(&iwuqp->send_cq->lock)) return; if (irdma_cq_empty(iwuqp->send_cq)) { while (IRDMA_RING_MORE_WORK(*sq_ring)) { cmpl = malloc(sizeof(*cmpl)); if (!cmpl) { pthread_spin_unlock(&iwuqp->send_cq->lock); return; } wqe_idx = sq_ring->tail; irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id); cmpl->cpi.wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; sw_wqe = qp->sq_base[wqe_idx].elem; get_64bit_val(sw_wqe, 24, &wqe_qword); cmpl->cpi.op_type = (__u8) FIELD_GET(IRDMAQPSQ_OPCODE, wqe_qword); /* remove the SQ WR by moving SQ tail */ IRDMA_RING_SET_TAIL(*sq_ring, sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta); LIST_INSERT_HEAD(&iwuqp->send_cq->cmpl_generated, cmpl, list); } } pthread_spin_unlock(&iwuqp->send_cq->lock); if (pthread_spin_lock(&iwuqp->recv_cq->lock)) return; if (irdma_cq_empty(iwuqp->recv_cq)) { while (IRDMA_RING_MORE_WORK(*rq_ring)) { cmpl = malloc(sizeof(*cmpl)); if (!cmpl) { pthread_spin_unlock(&iwuqp->recv_cq->lock); return; } wqe_idx = rq_ring->tail; irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id); cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx]; cmpl->cpi.op_type = IRDMA_OP_TYPE_REC; /* remove the RQ WR by moving RQ tail */ IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1); LIST_INSERT_HEAD(&iwuqp->recv_cq->cmpl_generated, cmpl, list); } } pthread_spin_unlock(&iwuqp->recv_cq->lock); } void * irdma_flush_thread(void *arg) { __u8 i = 5; struct irdma_uqp *iwuqp = arg; while (--i) { if (pthread_spin_lock(&iwuqp->lock)) break; irdma_generate_flush_completions(arg); pthread_spin_unlock(&iwuqp->lock); sleep(1); } pthread_exit(NULL); } /** * irdma_udestroy_cq - destroys cq * @cq: ptr to cq to be destroyed */ int irdma_udestroy_cq(struct ibv_cq *cq) { struct irdma_uk_attrs *uk_attrs; struct irdma_uvcontext *iwvctx; struct irdma_ucq *iwucq; int ret; iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); iwvctx = container_of(cq->context, struct irdma_uvcontext, ibv_ctx); uk_attrs = &iwvctx->uk_attrs; ret = pthread_spin_destroy(&iwucq->lock); if (ret) goto err; if (!LIST_EMPTY(&iwucq->cmpl_generated)) irdma_remove_cmpls_list(iwucq); irdma_process_resize_list(iwucq, NULL); ret = ibv_cmd_destroy_cq(cq); if (ret) goto err; ibv_cmd_dereg_mr(&iwucq->vmr.ibv_mr); irdma_free_hw_buf(iwucq->cq.cq_base, iwucq->buf_size); if (uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE) { ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area.ibv_mr); irdma_free_hw_buf(iwucq->cq.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); } free(iwucq); return 0; err: return ret; } static enum ibv_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode opcode) { switch (opcode) { case FLUSH_PROT_ERR: return IBV_WC_LOC_PROT_ERR; case FLUSH_REM_ACCESS_ERR: return IBV_WC_REM_ACCESS_ERR; case FLUSH_LOC_QP_OP_ERR: return IBV_WC_LOC_QP_OP_ERR; case FLUSH_REM_OP_ERR: return IBV_WC_REM_OP_ERR; case FLUSH_LOC_LEN_ERR: return IBV_WC_LOC_LEN_ERR; case FLUSH_GENERAL_ERR: return IBV_WC_WR_FLUSH_ERR; case FLUSH_MW_BIND_ERR: return IBV_WC_MW_BIND_ERR; case FLUSH_REM_INV_REQ_ERR: return IBV_WC_REM_INV_REQ_ERR; case FLUSH_RETRY_EXC_ERR: return IBV_WC_RETRY_EXC_ERR; case FLUSH_FATAL_ERR: default: return IBV_WC_FATAL_ERR; } } static inline void set_ib_wc_op_sq(struct irdma_cq_poll_info *cur_cqe, struct ibv_wc *entry) { switch (cur_cqe->op_type) { case IRDMA_OP_TYPE_RDMA_WRITE: case IRDMA_OP_TYPE_RDMA_WRITE_SOL: entry->opcode = IBV_WC_RDMA_WRITE; break; case IRDMA_OP_TYPE_RDMA_READ: entry->opcode = IBV_WC_RDMA_READ; break; case IRDMA_OP_TYPE_SEND_SOL: case IRDMA_OP_TYPE_SEND_SOL_INV: case IRDMA_OP_TYPE_SEND_INV: case IRDMA_OP_TYPE_SEND: entry->opcode = IBV_WC_SEND; break; case IRDMA_OP_TYPE_BIND_MW: entry->opcode = IBV_WC_BIND_MW; break; case IRDMA_OP_TYPE_INV_STAG: entry->opcode = IBV_WC_LOCAL_INV; break; default: entry->status = IBV_WC_GENERAL_ERR; printf("%s: Invalid opcode = %d in CQE\n", __func__, cur_cqe->op_type); } } static inline void set_ib_wc_op_rq(struct irdma_cq_poll_info *cur_cqe, struct ibv_wc *entry, bool send_imm_support) { if (!send_imm_support) { entry->opcode = cur_cqe->imm_valid ? IBV_WC_RECV_RDMA_WITH_IMM : IBV_WC_RECV; return; } switch (cur_cqe->op_type) { case IBV_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: case IBV_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: entry->opcode = IBV_WC_RECV_RDMA_WITH_IMM; break; default: entry->opcode = IBV_WC_RECV; } } /** * irdma_process_cqe_ext - process current cqe for extended CQ * @cur_cqe - current cqe info */ static void irdma_process_cqe_ext(struct irdma_cq_poll_info *cur_cqe) { struct irdma_ucq *iwucq = container_of(cur_cqe, struct irdma_ucq, cur_cqe); struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex; ibvcq_ex->wr_id = cur_cqe->wr_id; if (cur_cqe->error) ibvcq_ex->status = (cur_cqe->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ? irdma_flush_err_to_ib_wc_status(cur_cqe->minor_err) : IBV_WC_GENERAL_ERR; else ibvcq_ex->status = IBV_WC_SUCCESS; } /** * irdma_process_cqe - process current cqe info * @entry - ibv_wc object to fill in for non-extended CQ * @cur_cqe - current cqe info */ static void irdma_process_cqe(struct ibv_wc *entry, struct irdma_cq_poll_info *cur_cqe) { struct irdma_qp_uk *qp; struct ibv_qp *ib_qp; entry->wc_flags = 0; entry->wr_id = cur_cqe->wr_id; entry->qp_num = cur_cqe->qp_id; qp = cur_cqe->qp_handle; ib_qp = qp->back_qp; if (cur_cqe->error) { entry->status = (cur_cqe->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ? irdma_flush_err_to_ib_wc_status(cur_cqe->minor_err) : IBV_WC_GENERAL_ERR; entry->vendor_err = cur_cqe->major_err << 16 | cur_cqe->minor_err; } else { entry->status = IBV_WC_SUCCESS; } if (cur_cqe->imm_valid) { entry->imm_data = htonl(cur_cqe->imm_data); entry->wc_flags |= IBV_WC_WITH_IMM; } if (cur_cqe->q_type == IRDMA_CQE_QTYPE_SQ) { set_ib_wc_op_sq(cur_cqe, entry); } else { set_ib_wc_op_rq(cur_cqe, entry, qp->qp_caps & IRDMA_SEND_WITH_IMM ? true : false); if (ib_qp->qp_type != IBV_QPT_UD && cur_cqe->stag_invalid_set) { entry->invalidated_rkey = cur_cqe->inv_stag; entry->wc_flags |= IBV_WC_WITH_INV; } } if (ib_qp->qp_type == IBV_QPT_UD) { entry->src_qp = cur_cqe->ud_src_qpn; entry->wc_flags |= IBV_WC_GRH; } else { entry->src_qp = cur_cqe->qp_id; } entry->byte_len = cur_cqe->bytes_xfered; } /** * irdma_poll_one - poll one entry of the CQ * @ukcq: ukcq to poll * @cur_cqe: current CQE info to be filled in * @entry: ibv_wc object to be filled for non-extended CQ or NULL for extended CQ * * Returns the internal irdma device error code or 0 on success */ static int irdma_poll_one(struct irdma_cq_uk *ukcq, struct irdma_cq_poll_info *cur_cqe, struct ibv_wc *entry) { int ret = irdma_uk_cq_poll_cmpl(ukcq, cur_cqe); if (ret) return ret; if (!entry) irdma_process_cqe_ext(cur_cqe); else irdma_process_cqe(entry, cur_cqe); return 0; } /** * __irdma_upoll_cq - irdma util function to poll device CQ * @iwucq: irdma cq to poll * @num_entries: max cq entries to poll * @entry: pointer to array of ibv_wc objects to be filled in for each completion or NULL if ext CQ * * Returns non-negative value equal to the number of completions * found. On failure, EINVAL */ static int __irdma_upoll_cq(struct irdma_ucq *iwucq, int num_entries, struct ibv_wc *entry) { struct irdma_cq_buf *cq_buf, *next; struct irdma_cq_buf *last_buf = NULL; struct irdma_cq_poll_info *cur_cqe = &iwucq->cur_cqe; bool cq_new_cqe = false; int resized_bufs = 0; int npolled = 0; int ret; /* go through the list of previously resized CQ buffers */ LIST_FOREACH_SAFE(cq_buf, &iwucq->resize_list, list, next) { while (npolled < num_entries) { ret = irdma_poll_one(&cq_buf->cq, cur_cqe, entry ? entry + npolled : NULL); if (!ret) { ++npolled; cq_new_cqe = true; continue; } if (ret == ENOENT) break; /* QP using the CQ is destroyed. Skip reporting this CQE */ if (ret == EFAULT) { cq_new_cqe = true; continue; } goto error; } /* save the resized CQ buffer which received the last cqe */ if (cq_new_cqe) last_buf = cq_buf; cq_new_cqe = false; } /* check the current CQ for new cqes */ while (npolled < num_entries) { ret = irdma_poll_one(&iwucq->cq, cur_cqe, entry ? entry + npolled : NULL); if (ret == ENOENT) { ret = irdma_generated_cmpls(iwucq, cur_cqe); if (!ret) { if (entry) irdma_process_cqe(entry + npolled, cur_cqe); else irdma_process_cqe_ext(cur_cqe); } } if (!ret) { ++npolled; cq_new_cqe = true; continue; } if (ret == ENOENT) break; /* QP using the CQ is destroyed. Skip reporting this CQE */ if (ret == EFAULT) { cq_new_cqe = true; continue; } goto error; } if (cq_new_cqe) /* all previous CQ resizes are complete */ resized_bufs = irdma_process_resize_list(iwucq, NULL); else if (last_buf) /* only CQ resizes up to the last_buf are complete */ resized_bufs = irdma_process_resize_list(iwucq, last_buf); if (resized_bufs) /* report to the HW the number of complete CQ resizes */ irdma_uk_cq_set_resized_cnt(&iwucq->cq, resized_bufs); return npolled; error: printf("%s: Error polling CQ, irdma_err: %d\n", __func__, ret); return EINVAL; } /** * irdma_upoll_cq - verb API callback to poll device CQ * @cq: ibv_cq to poll * @num_entries: max cq entries to poll * @entry: pointer to array of ibv_wc objects to be filled in for each completion * * Returns non-negative value equal to the number of completions * found and a negative error code on failure */ int irdma_upoll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *entry) { struct irdma_ucq *iwucq; int ret; iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); ret = pthread_spin_lock(&iwucq->lock); if (ret) return -ret; ret = __irdma_upoll_cq(iwucq, num_entries, entry); pthread_spin_unlock(&iwucq->lock); return ret; } /** * irdma_start_poll - verb_ex API callback to poll batch of WC's * @ibvcq_ex: ibv extended CQ * @attr: attributes (not used) * * Start polling batch of work completions. Return 0 on success, ENONENT when * no completions are available on CQ. And an error code on errors */ static int irdma_start_poll(struct ibv_cq_ex *ibvcq_ex, struct ibv_poll_cq_attr *attr) { struct irdma_ucq *iwucq; int ret; iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); ret = pthread_spin_lock(&iwucq->lock); if (ret) return ret; ret = __irdma_upoll_cq(iwucq, 1, NULL); if (ret == 1) return 0; /* No Completions on CQ */ if (!ret) ret = ENOENT; pthread_spin_unlock(&iwucq->lock); return ret; } /** * irdma_next_poll - verb_ex API callback to get next WC * @ibvcq_ex: ibv extended CQ * * Return 0 on success, ENONENT when no completions are available on CQ. * And an error code on errors */ static int irdma_next_poll(struct ibv_cq_ex *ibvcq_ex) { struct irdma_ucq *iwucq; int ret; iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); ret = __irdma_upoll_cq(iwucq, 1, NULL); if (ret == 1) return 0; /* No Completions on CQ */ if (!ret) ret = ENOENT; return ret; } /** * irdma_end_poll - verb_ex API callback to end polling of WC's * @ibvcq_ex: ibv extended CQ */ static void irdma_end_poll(struct ibv_cq_ex *ibvcq_ex) { struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); pthread_spin_unlock(&iwucq->lock); } static enum ibv_wc_opcode irdma_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex) { struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); switch (iwucq->cur_cqe.op_type) { case IRDMA_OP_TYPE_RDMA_WRITE: case IRDMA_OP_TYPE_RDMA_WRITE_SOL: return IBV_WC_RDMA_WRITE; case IRDMA_OP_TYPE_RDMA_READ: return IBV_WC_RDMA_READ; case IRDMA_OP_TYPE_SEND_SOL: case IRDMA_OP_TYPE_SEND_SOL_INV: case IRDMA_OP_TYPE_SEND_INV: case IRDMA_OP_TYPE_SEND: return IBV_WC_SEND; case IRDMA_OP_TYPE_BIND_MW: return IBV_WC_BIND_MW; case IRDMA_OP_TYPE_REC: return IBV_WC_RECV; case IRDMA_OP_TYPE_REC_IMM: return IBV_WC_RECV_RDMA_WITH_IMM; case IRDMA_OP_TYPE_INV_STAG: return IBV_WC_LOCAL_INV; } printf("%s: Invalid opcode = %d in CQE\n", __func__, iwucq->cur_cqe.op_type); return 0; } static uint32_t irdma_wc_read_vendor_err(struct ibv_cq_ex *ibvcq_ex){ struct irdma_cq_poll_info *cur_cqe; struct irdma_ucq *iwucq; iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); cur_cqe = &iwucq->cur_cqe; return cur_cqe->error ? cur_cqe->major_err << 16 | cur_cqe->minor_err : 0; } static int irdma_wc_read_wc_flags(struct ibv_cq_ex *ibvcq_ex) { struct irdma_cq_poll_info *cur_cqe; struct irdma_ucq *iwucq; struct irdma_qp_uk *qp; struct ibv_qp *ib_qp; int wc_flags = 0; iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); cur_cqe = &iwucq->cur_cqe; qp = cur_cqe->qp_handle; ib_qp = qp->back_qp; if (cur_cqe->imm_valid) wc_flags |= IBV_WC_WITH_IMM; if (ib_qp->qp_type == IBV_QPT_UD) { wc_flags |= IBV_WC_GRH; } else { if (cur_cqe->stag_invalid_set) { switch (cur_cqe->op_type) { case IRDMA_OP_TYPE_REC: wc_flags |= IBV_WC_WITH_INV; break; case IRDMA_OP_TYPE_REC_IMM: wc_flags |= IBV_WC_WITH_INV; break; } } } return wc_flags; } static uint32_t irdma_wc_read_byte_len(struct ibv_cq_ex *ibvcq_ex){ struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); return iwucq->cur_cqe.bytes_xfered; } static __be32 irdma_wc_read_imm_data(struct ibv_cq_ex *ibvcq_ex){ struct irdma_cq_poll_info *cur_cqe; struct irdma_ucq *iwucq; iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); cur_cqe = &iwucq->cur_cqe; return cur_cqe->imm_valid ? htonl(cur_cqe->imm_data) : 0; } static uint32_t irdma_wc_read_qp_num(struct ibv_cq_ex *ibvcq_ex){ struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); return iwucq->cur_cqe.qp_id; } static uint32_t irdma_wc_read_src_qp(struct ibv_cq_ex *ibvcq_ex){ struct irdma_cq_poll_info *cur_cqe; struct irdma_ucq *iwucq; struct irdma_qp_uk *qp; struct ibv_qp *ib_qp; iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); cur_cqe = &iwucq->cur_cqe; qp = cur_cqe->qp_handle; ib_qp = qp->back_qp; return ib_qp->qp_type == IBV_QPT_UD ? cur_cqe->ud_src_qpn : cur_cqe->qp_id; } static uint8_t irdma_wc_read_sl(struct ibv_cq_ex *ibvcq_ex){ return 0; } void irdma_ibvcq_ex_fill_priv_funcs(struct irdma_ucq *iwucq, struct ibv_cq_init_attr_ex *attr_ex) { struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex; ibvcq_ex->start_poll = irdma_start_poll; ibvcq_ex->end_poll = irdma_end_poll; ibvcq_ex->next_poll = irdma_next_poll; ibvcq_ex->read_opcode = irdma_wc_read_opcode; ibvcq_ex->read_vendor_err = irdma_wc_read_vendor_err; ibvcq_ex->read_wc_flags = irdma_wc_read_wc_flags; if (attr_ex->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) ibvcq_ex->read_byte_len = irdma_wc_read_byte_len; if (attr_ex->wc_flags & IBV_WC_EX_WITH_IMM) ibvcq_ex->read_imm_data = irdma_wc_read_imm_data; if (attr_ex->wc_flags & IBV_WC_EX_WITH_QP_NUM) ibvcq_ex->read_qp_num = irdma_wc_read_qp_num; if (attr_ex->wc_flags & IBV_WC_EX_WITH_SRC_QP) ibvcq_ex->read_src_qp = irdma_wc_read_src_qp; if (attr_ex->wc_flags & IBV_WC_EX_WITH_SL) ibvcq_ex->read_sl = irdma_wc_read_sl; } /** * irdma_arm_cq - arm of cq * @iwucq: cq to which arm * @cq_notify: notification params */ static void irdma_arm_cq(struct irdma_ucq *iwucq, enum irdma_cmpl_notify cq_notify) { iwucq->is_armed = true; iwucq->arm_sol = true; iwucq->skip_arm = false; iwucq->skip_sol = true; irdma_uk_cq_request_notification(&iwucq->cq, cq_notify); } /** * irdma_uarm_cq - callback for arm of cq * @cq: cq to arm * @solicited: to get notify params */ int irdma_uarm_cq(struct ibv_cq *cq, int solicited) { struct irdma_ucq *iwucq; enum irdma_cmpl_notify cq_notify = IRDMA_CQ_COMPL_EVENT; int ret; iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); if (solicited) cq_notify = IRDMA_CQ_COMPL_SOLICITED; ret = pthread_spin_lock(&iwucq->lock); if (ret) return ret; if (iwucq->is_armed) { if (iwucq->arm_sol && !solicited) { irdma_arm_cq(iwucq, cq_notify); } else { iwucq->skip_arm = true; iwucq->skip_sol = solicited ? true : false; } } else { irdma_arm_cq(iwucq, cq_notify); } pthread_spin_unlock(&iwucq->lock); return 0; } /** * irdma_cq_event - cq to do completion event * @cq: cq to arm */ void irdma_cq_event(struct ibv_cq *cq) { struct irdma_ucq *iwucq; iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); if (pthread_spin_lock(&iwucq->lock)) return; if (iwucq->skip_arm) irdma_arm_cq(iwucq, IRDMA_CQ_COMPL_EVENT); else iwucq->is_armed = false; pthread_spin_unlock(&iwucq->lock); } void * irdma_mmap(int fd, off_t offset) { void *map; map = mmap(NULL, IRDMA_HW_PAGE_SIZE, PROT_WRITE | PROT_READ, MAP_SHARED, fd, offset); if (map == MAP_FAILED) return map; if (ibv_dontfork_range(map, IRDMA_HW_PAGE_SIZE)) { munmap(map, IRDMA_HW_PAGE_SIZE); return MAP_FAILED; } return map; } void irdma_munmap(void *map) { ibv_dofork_range(map, IRDMA_HW_PAGE_SIZE); munmap(map, IRDMA_HW_PAGE_SIZE); } /** * irdma_destroy_vmapped_qp - destroy resources for qp * @iwuqp: qp struct for resources */ static int irdma_destroy_vmapped_qp(struct irdma_uqp *iwuqp) { int ret; ret = ibv_cmd_destroy_qp(&iwuqp->ibv_qp); if (ret) return ret; if (iwuqp->qp.push_db) irdma_munmap(iwuqp->qp.push_db); if (iwuqp->qp.push_wqe) irdma_munmap(iwuqp->qp.push_wqe); ibv_cmd_dereg_mr(&iwuqp->vmr.ibv_mr); return 0; } /** * irdma_vmapped_qp - create resources for qp * @iwuqp: qp struct for resources * @pd: pd for the qp * @attr: attributes of qp passed * @resp: response back from create qp * @info: uk info for initializing user level qp * @abi_ver: abi version of the create qp command */ static int irdma_vmapped_qp(struct irdma_uqp *iwuqp, struct ibv_pd *pd, struct ibv_qp_init_attr *attr, struct irdma_qp_uk_init_info *info, bool legacy_mode) { struct irdma_ucreate_qp cmd = {}; size_t sqsize, rqsize, totalqpsize; struct irdma_ucreate_qp_resp resp = {}; struct irdma_ureg_mr reg_mr_cmd = {}; struct ibv_reg_mr_resp reg_mr_resp = {}; int ret; sqsize = roundup(info->sq_depth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE); rqsize = roundup(info->rq_depth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE); totalqpsize = rqsize + sqsize + IRDMA_DB_SHADOW_AREA_SIZE; info->sq = irdma_alloc_hw_buf(totalqpsize); iwuqp->buf_size = totalqpsize; if (!info->sq) return ENOMEM; memset(info->sq, 0, totalqpsize); info->rq = &info->sq[sqsize / IRDMA_QP_WQE_MIN_SIZE]; info->shadow_area = info->rq[rqsize / IRDMA_QP_WQE_MIN_SIZE].elem; reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_QP; reg_mr_cmd.sq_pages = sqsize >> IRDMA_HW_PAGE_SHIFT; reg_mr_cmd.rq_pages = rqsize >> IRDMA_HW_PAGE_SHIFT; ret = ibv_cmd_reg_mr(pd, info->sq, totalqpsize, (uintptr_t)info->sq, IBV_ACCESS_LOCAL_WRITE, &iwuqp->vmr.ibv_mr, ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), ®_mr_resp, sizeof(reg_mr_resp)); if (ret) goto err_dereg_mr; cmd.user_wqe_bufs = (__u64) ((uintptr_t)info->sq); cmd.user_compl_ctx = (__u64) (uintptr_t)&iwuqp->qp; ret = ibv_cmd_create_qp(pd, &iwuqp->ibv_qp, attr, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(struct irdma_ucreate_qp_resp)); if (ret) goto err_qp; info->sq_size = resp.actual_sq_size; info->rq_size = resp.actual_rq_size; info->first_sq_wq = legacy_mode ? 1 : resp.lsmm; info->qp_caps = resp.qp_caps; info->qp_id = resp.qp_id; iwuqp->irdma_drv_opt = resp.irdma_drv_opt; iwuqp->ibv_qp.qp_num = resp.qp_id; iwuqp->send_cq = container_of(attr->send_cq, struct irdma_ucq, verbs_cq.cq); iwuqp->recv_cq = container_of(attr->recv_cq, struct irdma_ucq, verbs_cq.cq); iwuqp->send_cq->uqp = iwuqp; iwuqp->recv_cq->uqp = iwuqp; return 0; err_qp: ibv_cmd_dereg_mr(&iwuqp->vmr.ibv_mr); err_dereg_mr: printf("%s: failed to create QP, status %d\n", __func__, ret); irdma_free_hw_buf(info->sq, iwuqp->buf_size); return ret; } /** * irdma_ucreate_qp - create qp on user app * @pd: pd for the qp * @attr: attributes of the qp to be created (sizes, sge, cq) */ struct ibv_qp * irdma_ucreate_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr) { struct irdma_qp_uk_init_info info = {}; struct irdma_uk_attrs *uk_attrs; struct irdma_uvcontext *iwvctx; struct irdma_uqp *iwuqp; int status; if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_UD) { printf("%s: failed to create QP, unsupported QP type: 0x%x\n", __func__, attr->qp_type); errno = EOPNOTSUPP; return NULL; } iwvctx = container_of(pd->context, struct irdma_uvcontext, ibv_ctx); uk_attrs = &iwvctx->uk_attrs; if (attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags || attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags || attr->cap.max_inline_data > uk_attrs->max_hw_inline) { errno = EINVAL; return NULL; } info.uk_attrs = uk_attrs; info.sq_size = attr->cap.max_send_wr; info.rq_size = attr->cap.max_recv_wr; info.max_sq_frag_cnt = attr->cap.max_send_sge; info.max_rq_frag_cnt = attr->cap.max_recv_sge; info.max_inline_data = attr->cap.max_inline_data; info.abi_ver = iwvctx->abi_ver; status = irdma_uk_calc_depth_shift_sq(&info, &info.sq_depth, &info.sq_shift); if (status) { printf("%s: invalid SQ attributes, max_send_wr=%d max_send_sge=%d max_inline=%d\n", __func__, attr->cap.max_send_wr, attr->cap.max_send_sge, attr->cap.max_inline_data); errno = status; return NULL; } status = irdma_uk_calc_depth_shift_rq(&info, &info.rq_depth, &info.rq_shift); if (status) { printf("%s: invalid RQ attributes, recv_wr=%d recv_sge=%d\n", __func__, attr->cap.max_recv_wr, attr->cap.max_recv_sge); errno = status; return NULL; } iwuqp = memalign(1024, sizeof(*iwuqp)); if (!iwuqp) return NULL; memset(iwuqp, 0, sizeof(*iwuqp)); - if (pthread_spin_init(&iwuqp->lock, PTHREAD_PROCESS_PRIVATE)) + status = pthread_spin_init(&iwuqp->lock, PTHREAD_PROCESS_PRIVATE); + if (status) goto err_free_qp; info.sq_size = info.sq_depth >> info.sq_shift; info.rq_size = info.rq_depth >> info.rq_shift; /** * Maintain backward compatibility with older ABI which pass sq * and rq depth (in quanta) in cap.max_send_wr a cap.max_recv_wr */ if (!iwvctx->use_raw_attrs) { attr->cap.max_send_wr = info.sq_size; attr->cap.max_recv_wr = info.rq_size; } iwuqp->recv_sges = calloc(attr->cap.max_recv_sge, sizeof(*iwuqp->recv_sges)); - if (!iwuqp->recv_sges) + if (!iwuqp->recv_sges) { + status = errno; /* preserve errno */ goto err_destroy_lock; + } info.wqe_alloc_db = (u32 *)iwvctx->db; info.legacy_mode = iwvctx->legacy_mode; info.sq_wrtrk_array = calloc(info.sq_depth, sizeof(*info.sq_wrtrk_array)); - if (!info.sq_wrtrk_array) + if (!info.sq_wrtrk_array) { + status = errno; /* preserve errno */ goto err_free_rsges; + } info.rq_wrid_array = calloc(info.rq_depth, sizeof(*info.rq_wrid_array)); - if (!info.rq_wrid_array) + if (!info.rq_wrid_array) { + status = errno; /* preserve errno */ goto err_free_sq_wrtrk; + } iwuqp->sq_sig_all = attr->sq_sig_all; iwuqp->qp_type = attr->qp_type; status = irdma_vmapped_qp(iwuqp, pd, attr, &info, iwvctx->legacy_mode); - if (status) { - errno = status; + if (status) goto err_free_rq_wrid; - } iwuqp->qp.back_qp = iwuqp; iwuqp->qp.lock = &iwuqp->lock; status = irdma_uk_qp_init(&iwuqp->qp, &info); - if (status) { - errno = status; + if (status) goto err_free_vmap_qp; - } attr->cap.max_send_wr = (info.sq_depth - IRDMA_SQ_RSVD) >> info.sq_shift; attr->cap.max_recv_wr = (info.rq_depth - IRDMA_RQ_RSVD) >> info.rq_shift; return &iwuqp->ibv_qp; err_free_vmap_qp: irdma_destroy_vmapped_qp(iwuqp); irdma_free_hw_buf(info.sq, iwuqp->buf_size); err_free_rq_wrid: free(info.rq_wrid_array); err_free_sq_wrtrk: free(info.sq_wrtrk_array); err_free_rsges: free(iwuqp->recv_sges); err_destroy_lock: pthread_spin_destroy(&iwuqp->lock); err_free_qp: printf("%s: failed to create QP\n", __func__); free(iwuqp); + errno = status; return NULL; } /** * irdma_uquery_qp - query qp for some attribute * @qp: qp for the attributes query * @attr: to return the attributes * @attr_mask: mask of what is query for * @init_attr: initial attributes during create_qp */ int irdma_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, struct ibv_qp_init_attr *init_attr) { struct ibv_query_qp cmd; return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd, sizeof(cmd)); } /** * irdma_umodify_qp - send qp modify to driver * @qp: qp to modify * @attr: attribute to modify * @attr_mask: mask of the attribute */ int irdma_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) { struct irdma_umodify_qp_resp resp = {}; struct ibv_modify_qp cmd = {}; struct irdma_modify_qp_cmd cmd_ex = {}; struct irdma_uvcontext *iwvctx; struct irdma_uqp *iwuqp; iwuqp = container_of(qp, struct irdma_uqp, ibv_qp); iwvctx = container_of(qp->context, struct irdma_uvcontext, ibv_ctx); if (iwuqp->qp.qp_caps & IRDMA_PUSH_MODE && attr_mask & IBV_QP_STATE && iwvctx->uk_attrs.hw_rev > IRDMA_GEN_1) { u64 offset; void *map; int ret; ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, &cmd_ex.ibv_cmd, sizeof(cmd_ex.ibv_cmd), sizeof(cmd_ex), &resp.ibv_resp, sizeof(resp.ibv_resp), sizeof(resp)); if (!ret) iwuqp->qp.rd_fence_rate = resp.rd_fence_rate; if (ret || !resp.push_valid) return ret; if (iwuqp->qp.push_wqe) return ret; offset = resp.push_wqe_mmap_key; map = irdma_mmap(qp->context->cmd_fd, offset); if (map == MAP_FAILED) return ret; iwuqp->qp.push_wqe = map; offset = resp.push_db_mmap_key; map = irdma_mmap(qp->context->cmd_fd, offset); if (map == MAP_FAILED) { irdma_munmap(iwuqp->qp.push_wqe); iwuqp->qp.push_wqe = NULL; printf("failed to map push page, errno %d\n", errno); return ret; } iwuqp->qp.push_wqe += resp.push_offset; iwuqp->qp.push_db = map + resp.push_offset; return ret; } else { int ret; ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd)); if (ret) return ret; if (attr_mask & IBV_QP_STATE && attr->qp_state == IBV_QPS_ERR) pthread_create(&iwuqp->flush_thread, NULL, irdma_flush_thread, iwuqp); return 0; } } static void irdma_issue_flush(struct ibv_qp *qp, bool sq_flush, bool rq_flush) { struct irdma_umodify_qp_resp resp = {}; struct irdma_modify_qp_cmd cmd_ex = {}; struct ibv_qp_attr attr = {}; attr.qp_state = IBV_QPS_ERR; cmd_ex.sq_flush = sq_flush; cmd_ex.rq_flush = rq_flush; ibv_cmd_modify_qp_ex(qp, &attr, IBV_QP_STATE, &cmd_ex.ibv_cmd, sizeof(cmd_ex.ibv_cmd), sizeof(cmd_ex), &resp.ibv_resp, sizeof(resp.ibv_resp), sizeof(resp)); } /** * irdma_clean_cqes - clean cq entries for qp * @qp: qp for which completions are cleaned * @iwcq: cq to be cleaned */ static void irdma_clean_cqes(struct irdma_qp_uk *qp, struct irdma_ucq *iwucq) { struct irdma_cq_uk *ukcq = &iwucq->cq; int ret; ret = pthread_spin_lock(&iwucq->lock); if (ret) return; irdma_uk_clean_cq(qp, ukcq); pthread_spin_unlock(&iwucq->lock); } /** * irdma_udestroy_qp - destroy qp * @qp: qp to destroy */ int irdma_udestroy_qp(struct ibv_qp *qp) { struct irdma_uqp *iwuqp; int ret; iwuqp = container_of(qp, struct irdma_uqp, ibv_qp); if (iwuqp->flush_thread) { pthread_cancel(iwuqp->flush_thread); pthread_join(iwuqp->flush_thread, NULL); } ret = pthread_spin_destroy(&iwuqp->lock); if (ret) goto err; ret = irdma_destroy_vmapped_qp(iwuqp); if (ret) goto err; /* Clean any pending completions from the cq(s) */ if (iwuqp->send_cq) irdma_clean_cqes(&iwuqp->qp, iwuqp->send_cq); if (iwuqp->recv_cq && iwuqp->recv_cq != iwuqp->send_cq) irdma_clean_cqes(&iwuqp->qp, iwuqp->recv_cq); if (iwuqp->qp.sq_wrtrk_array) free(iwuqp->qp.sq_wrtrk_array); if (iwuqp->qp.rq_wrid_array) free(iwuqp->qp.rq_wrid_array); irdma_free_hw_buf(iwuqp->qp.sq_base, iwuqp->buf_size); free(iwuqp->recv_sges); free(iwuqp); return 0; err: printf("%s: failed to destroy QP, status %d\n", __func__, ret); return ret; } /** * irdma_copy_sg_list - copy sg list for qp * @sg_list: copied into sg_list * @sgl: copy from sgl * @num_sges: count of sg entries * @max_sges: count of max supported sg entries */ static void irdma_copy_sg_list(struct irdma_sge *sg_list, struct ibv_sge *sgl, int num_sges) { int i; for (i = 0; i < num_sges; i++) { sg_list[i].tag_off = sgl[i].addr; sg_list[i].len = sgl[i].length; sg_list[i].stag = sgl[i].lkey; } } /** * calc_type2_mw_stag - calculate type 2 MW stag * @rkey: desired rkey of the MW * @mw_rkey: type2 memory window rkey * * compute type2 memory window stag by taking lower 8 bits * of the desired rkey and leaving 24 bits if mw->rkey unchanged */ static inline u32 calc_type2_mw_stag(u32 rkey, u32 mw_rkey) { const u32 mask = 0xff; return (rkey & mask) | (mw_rkey & ~mask); } /** * irdma_post_send - post send wr for user application * @ib_qp: qp to post wr * @ib_wr: work request ptr * @bad_wr: return of bad wr if err */ int irdma_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, struct ibv_send_wr **bad_wr) { struct irdma_post_sq_info info; struct irdma_uvcontext *iwvctx; struct irdma_uk_attrs *uk_attrs; struct irdma_uqp *iwuqp; bool reflush = false; int err = 0; iwuqp = container_of(ib_qp, struct irdma_uqp, ibv_qp); iwvctx = container_of(ib_qp->context, struct irdma_uvcontext, ibv_ctx); uk_attrs = &iwvctx->uk_attrs; err = pthread_spin_lock(&iwuqp->lock); if (err) return err; if (!IRDMA_RING_MORE_WORK(iwuqp->qp.sq_ring) && ib_qp->state == IBV_QPS_ERR) reflush = true; while (ib_wr) { memset(&info, 0, sizeof(info)); info.wr_id = (u64)(ib_wr->wr_id); if ((ib_wr->send_flags & IBV_SEND_SIGNALED) || iwuqp->sq_sig_all) info.signaled = true; if (ib_wr->send_flags & IBV_SEND_FENCE) info.read_fence = true; switch (ib_wr->opcode) { case IBV_WR_SEND_WITH_IMM: if (iwuqp->qp.qp_caps & IRDMA_SEND_WITH_IMM) { info.imm_data_valid = true; info.imm_data = ntohl(ib_wr->imm_data); } else { err = EINVAL; break; } /* fallthrough */ case IBV_WR_SEND: case IBV_WR_SEND_WITH_INV: if (ib_wr->opcode == IBV_WR_SEND || ib_wr->opcode == IBV_WR_SEND_WITH_IMM) { if (ib_wr->send_flags & IBV_SEND_SOLICITED) info.op_type = IRDMA_OP_TYPE_SEND_SOL; else info.op_type = IRDMA_OP_TYPE_SEND; } else { if (ib_wr->send_flags & IBV_SEND_SOLICITED) info.op_type = IRDMA_OP_TYPE_SEND_SOL_INV; else info.op_type = IRDMA_OP_TYPE_SEND_INV; info.stag_to_inv = ib_wr->imm_data; } info.op.send.num_sges = ib_wr->num_sge; info.op.send.sg_list = (struct irdma_sge *)ib_wr->sg_list; if (ib_qp->qp_type == IBV_QPT_UD) { struct irdma_uah *ah = container_of(ib_wr->wr.ud.ah, struct irdma_uah, ibv_ah); info.op.send.ah_id = ah->ah_id; info.op.send.qkey = ib_wr->wr.ud.remote_qkey; info.op.send.dest_qp = ib_wr->wr.ud.remote_qpn; } if (ib_wr->send_flags & IBV_SEND_INLINE) err = irdma_uk_inline_send(&iwuqp->qp, &info, false); else err = irdma_uk_send(&iwuqp->qp, &info, false); break; case IBV_WR_RDMA_WRITE_WITH_IMM: if (iwuqp->qp.qp_caps & IRDMA_WRITE_WITH_IMM) { info.imm_data_valid = true; info.imm_data = ntohl(ib_wr->imm_data); } else { err = EINVAL; break; } /* fallthrough */ case IBV_WR_RDMA_WRITE: if (ib_wr->send_flags & IBV_SEND_SOLICITED) info.op_type = IRDMA_OP_TYPE_RDMA_WRITE_SOL; else info.op_type = IRDMA_OP_TYPE_RDMA_WRITE; info.op.rdma_write.num_lo_sges = ib_wr->num_sge; info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list; info.op.rdma_write.rem_addr.tag_off = ib_wr->wr.rdma.remote_addr; info.op.rdma_write.rem_addr.stag = ib_wr->wr.rdma.rkey; if (ib_wr->send_flags & IBV_SEND_INLINE) err = irdma_uk_inline_rdma_write(&iwuqp->qp, &info, false); else err = irdma_uk_rdma_write(&iwuqp->qp, &info, false); break; case IBV_WR_RDMA_READ: if (ib_wr->num_sge > uk_attrs->max_hw_read_sges) { err = EINVAL; break; } info.op_type = IRDMA_OP_TYPE_RDMA_READ; info.op.rdma_read.rem_addr.tag_off = ib_wr->wr.rdma.remote_addr; info.op.rdma_read.rem_addr.stag = ib_wr->wr.rdma.rkey; info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list; info.op.rdma_read.num_lo_sges = ib_wr->num_sge; err = irdma_uk_rdma_read(&iwuqp->qp, &info, false, false); break; case IBV_WR_BIND_MW: if (ib_qp->qp_type != IBV_QPT_RC) { err = EINVAL; break; } info.op_type = IRDMA_OP_TYPE_BIND_MW; info.op.bind_window.mr_stag = ib_wr->bind_mw.bind_info.mr->rkey; if (ib_wr->bind_mw.mw->type == IBV_MW_TYPE_1) { info.op.bind_window.mem_window_type_1 = true; info.op.bind_window.mw_stag = ib_wr->bind_mw.rkey; } else { struct verbs_mr *vmr = verbs_get_mr(ib_wr->bind_mw.bind_info.mr); if (vmr->access & IBV_ACCESS_ZERO_BASED) { err = EINVAL; break; } info.op.bind_window.mw_stag = calc_type2_mw_stag(ib_wr->bind_mw.rkey, ib_wr->bind_mw.mw->rkey); ib_wr->bind_mw.mw->rkey = info.op.bind_window.mw_stag; } if (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_ZERO_BASED) { info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_ZERO_BASED; info.op.bind_window.va = NULL; } else { info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_VA_BASED; info.op.bind_window.va = (void *)(uintptr_t)ib_wr->bind_mw.bind_info.addr; } info.op.bind_window.bind_len = ib_wr->bind_mw.bind_info.length; info.op.bind_window.ena_reads = (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_READ) ? 1 : 0; info.op.bind_window.ena_writes = (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_WRITE) ? 1 : 0; err = irdma_uk_mw_bind(&iwuqp->qp, &info, false); break; case IBV_WR_LOCAL_INV: info.op_type = IRDMA_OP_TYPE_INV_STAG; info.op.inv_local_stag.target_stag = ib_wr->imm_data; err = irdma_uk_stag_local_invalidate(&iwuqp->qp, &info, true); break; default: /* error */ err = EINVAL; printf("%s: post work request failed, invalid opcode: 0x%x\n", __func__, ib_wr->opcode); break; } if (err) break; ib_wr = ib_wr->next; } if (err) *bad_wr = ib_wr; irdma_uk_qp_post_wr(&iwuqp->qp); if (reflush) irdma_issue_flush(ib_qp, 1, 0); pthread_spin_unlock(&iwuqp->lock); return err; } /** * irdma_post_recv - post receive wr for user application * @ib_wr: work request for receive * @bad_wr: bad wr caused an error */ int irdma_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, struct ibv_recv_wr **bad_wr) { struct irdma_post_rq_info post_recv = {}; struct irdma_sge *sg_list; struct irdma_uqp *iwuqp; bool reflush = false; int err = 0; iwuqp = container_of(ib_qp, struct irdma_uqp, ibv_qp); sg_list = iwuqp->recv_sges; err = pthread_spin_lock(&iwuqp->lock); if (err) return err; if (!IRDMA_RING_MORE_WORK(iwuqp->qp.rq_ring) && ib_qp->state == IBV_QPS_ERR) reflush = true; while (ib_wr) { if (ib_wr->num_sge > iwuqp->qp.max_rq_frag_cnt) { *bad_wr = ib_wr; err = EINVAL; goto error; } post_recv.num_sges = ib_wr->num_sge; post_recv.wr_id = ib_wr->wr_id; irdma_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge); post_recv.sg_list = sg_list; err = irdma_uk_post_receive(&iwuqp->qp, &post_recv); if (err) { *bad_wr = ib_wr; goto error; } if (reflush) irdma_issue_flush(ib_qp, 0, 1); ib_wr = ib_wr->next; } error: pthread_spin_unlock(&iwuqp->lock); return err; } /** * irdma_ucreate_ah - create address handle associated with a pd * @ibpd: pd for the address handle * @attr: attributes of address handle */ struct ibv_ah * irdma_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr) { struct irdma_uah *ah; union ibv_gid sgid; struct irdma_ucreate_ah_resp resp = {}; int err; - err = ibv_query_gid(ibpd->context, attr->port_num, attr->grh.sgid_index, - &sgid); - if (err) { + if (ibv_query_gid(ibpd->context, attr->port_num, attr->grh.sgid_index, + &sgid)) { fprintf(stderr, "irdma: Error from ibv_query_gid.\n"); - errno = err; + errno = ENOENT; return NULL; } ah = calloc(1, sizeof(*ah)); if (!ah) return NULL; err = ibv_cmd_create_ah(ibpd, &ah->ibv_ah, attr, &resp.ibv_resp, sizeof(resp)); if (err) { free(ah); errno = err; return NULL; } ah->ah_id = resp.ah_id; return &ah->ibv_ah; } /** * irdma_udestroy_ah - destroy the address handle * @ibah: address handle */ int irdma_udestroy_ah(struct ibv_ah *ibah) { struct irdma_uah *ah; int ret; ah = container_of(ibah, struct irdma_uah, ibv_ah); ret = ibv_cmd_destroy_ah(ibah); if (ret) return ret; free(ah); return 0; } /** * irdma_uattach_mcast - Attach qp to multicast group implemented * @qp: The queue pair * @gid:The Global ID for multicast group * @lid: The Local ID */ int irdma_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid) { return ibv_cmd_attach_mcast(qp, gid, lid); } /** * irdma_udetach_mcast - Detach qp from multicast group * @qp: The queue pair * @gid:The Global ID for multicast group * @lid: The Local ID */ int irdma_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid) { return ibv_cmd_detach_mcast(qp, gid, lid); } /** * irdma_uresize_cq - resizes a cq * @cq: cq to resize * @cqe: the number of cqes of the new cq */ int irdma_uresize_cq(struct ibv_cq *cq, int cqe) { struct irdma_uvcontext *iwvctx; struct irdma_uk_attrs *uk_attrs; struct irdma_uresize_cq cmd = {}; struct ibv_resize_cq_resp resp = {}; struct irdma_ureg_mr reg_mr_cmd = {}; struct ibv_reg_mr_resp reg_mr_resp = {}; struct irdma_cq_buf *cq_buf = NULL; struct irdma_cqe *cq_base = NULL; struct verbs_mr new_mr = {}; struct irdma_ucq *iwucq; size_t cq_size; u32 cq_pages; int cqe_needed; int ret = 0; bool cqe_64byte_ena; iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); iwvctx = container_of(cq->context, struct irdma_uvcontext, ibv_ctx); uk_attrs = &iwvctx->uk_attrs; if (!(uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE)) return EOPNOTSUPP; if (cqe < uk_attrs->min_hw_cq_size || cqe > uk_attrs->max_hw_cq_size - 1) return EINVAL; cqe_64byte_ena = uk_attrs->feature_flags & IRDMA_FEATURE_64_BYTE_CQE ? true : false; cqe_needed = get_cq_size(cqe, uk_attrs->hw_rev, cqe_64byte_ena); if (cqe_needed == iwucq->cq.cq_size) return 0; cq_size = get_cq_total_bytes(cqe_needed, cqe_64byte_ena); cq_pages = cq_size >> IRDMA_HW_PAGE_SHIFT; cq_base = irdma_alloc_hw_buf(cq_size); if (!cq_base) return ENOMEM; memset(cq_base, 0, cq_size); cq_buf = malloc(sizeof(*cq_buf)); if (!cq_buf) { ret = ENOMEM; goto err_buf; } new_mr.ibv_mr.pd = iwucq->vmr.ibv_mr.pd; reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; reg_mr_cmd.cq_pages = cq_pages; ret = ibv_cmd_reg_mr(new_mr.ibv_mr.pd, cq_base, cq_size, (uintptr_t)cq_base, IBV_ACCESS_LOCAL_WRITE, &new_mr.ibv_mr, ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), ®_mr_resp, sizeof(reg_mr_resp)); if (ret) goto err_dereg_mr; ret = pthread_spin_lock(&iwucq->lock); if (ret) goto err_lock; cmd.user_cq_buffer = (__u64) ((uintptr_t)cq_base); ret = ibv_cmd_resize_cq(&iwucq->verbs_cq.cq, cqe_needed, &cmd.ibv_cmd, sizeof(cmd), &resp, sizeof(resp)); if (ret) goto err_resize; memcpy(&cq_buf->cq, &iwucq->cq, sizeof(cq_buf->cq)); cq_buf->buf_size = cq_size; cq_buf->vmr = iwucq->vmr; iwucq->vmr = new_mr; irdma_uk_cq_resize(&iwucq->cq, cq_base, cqe_needed); iwucq->verbs_cq.cq.cqe = cqe; LIST_INSERT_HEAD(&iwucq->resize_list, cq_buf, list); pthread_spin_unlock(&iwucq->lock); return ret; err_resize: pthread_spin_unlock(&iwucq->lock); err_lock: ibv_cmd_dereg_mr(&new_mr.ibv_mr); err_dereg_mr: free(cq_buf); err_buf: fprintf(stderr, "failed to resize CQ cq_id=%d ret=%d\n", iwucq->cq.cq_id, ret); irdma_free_hw_buf(cq_base, cq_size); return ret; } diff --git a/contrib/ofed/libirdma/osdep.h b/contrib/ofed/libirdma/osdep.h index 1bbf6573b800..f7b8a96b93d2 100644 --- a/contrib/ofed/libirdma/osdep.h +++ b/contrib/ofed/libirdma/osdep.h @@ -1,230 +1,200 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2021 - 2022 Intel Corporation + * Copyright (c) 2021 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /*$FreeBSD$*/ #ifndef _ICRDMA_OSDEP_H_ #define _ICRDMA_OSDEP_H_ #include #include #include #include #include #include #include #include #include -#define ATOMIC atomic_t #define IOMEM #define IRDMA_NTOHL(a) ntohl(a) #define IRDMA_NTOHS(a) ntohs(a) #define MAKEMASK(m, s) ((m) << (s)) #define OS_TIMER timer_list #define OS_LIST_HEAD list_head #define OS_LIST_ENTRY list_head #define DECLARE_HASHTABLE(n, b) struct hlist_head (n)[1 << (b)] #define HASH_MIN(v, b) (sizeof(v) <= 4 ? hash_32(v, b) : hash_long(v, b)) #define HASH_FOR_EACH_RCU(n, b, o, m) for ((b) = 0, o = NULL; o == NULL && (b) < ARRAY_SIZE(n);\ (b)++)\ hlist_for_each_entry_rcu(o, &n[(b)], m) #define HASH_FOR_EACH_POSSIBLE_RCU(n, o, m, k) \ hlist_for_each_entry_rcu(o, &n[jhash(&k, sizeof(k), 0) >> (32 - ilog2(ARRAY_SIZE(n)))],\ m) #define HASH_FOR_EACH_POSSIBLE(n, o, m, k) \ hlist_for_each_entry(o, &n[jhash(&k, sizeof(k), 0) >> (32 - ilog2(ARRAY_SIZE(n)))],\ m) #define HASH_ADD_RCU(h, n, k) \ hlist_add_head_rcu(n, &h[jhash(&k, sizeof(k), 0) >> (32 - ilog2(ARRAY_SIZE(h)))]) #define HASH_DEL_RCU(tbl, node) hlist_del_rcu(node) #define HASH_ADD(h, n, k) \ hlist_add_head(n, &h[jhash(&k, sizeof(k), 0) >> (32 - ilog2(ARRAY_SIZE(h)))]) #define HASH_DEL(tbl, node) hlist_del(node) #define WQ_UNBOUND_MAX_ACTIVE max_t(int, 512, num_possible_cpus() * 4) #define if_addr_rlock(x) #define if_addr_runlock(x) /* constants */ #define STATS_TIMER_DELAY 60000 /* a couple of linux size defines */ -#define SZ_128 128 -#define SZ_2K SZ_128 * 16 -#define SZ_1G (SZ_1K * SZ_1K * SZ_1K) -#define SPEED_1000 1000 -#define SPEED_10000 10000 -#define SPEED_20000 20000 -#define SPEED_25000 25000 -#define SPEED_40000 40000 -#define SPEED_100000 100000 - #define BIT_ULL(a) (1ULL << (a)) #define min(a, b) ((a) > (b) ? (b) : (a)) #ifndef likely -#define likely(x) __builtin_expect((x), 1) +#define likely(x) __builtin_expect((x), 1) #endif #ifndef unlikely -#define unlikely(x) __builtin_expect((x), 0) +#define unlikely(x) __builtin_expect((x), 0) #endif #define __aligned_u64 uint64_t __aligned(8) #define VLAN_PRIO_SHIFT 13 #if __FreeBSD_version < 1400000 #define IB_USER_VERBS_EX_CMD_MODIFY_QP IB_USER_VERBS_CMD_MODIFY_QP #endif /* * debug definition section */ #define irdma_print(S, ...) printf("%s:%d "S, __FUNCTION__, __LINE__, ##__VA_ARGS__) #define irdma_debug_buf(dev, mask, desc, buf, size) \ do { \ - u32 i; \ + u32 i; \ if (!((mask) & (dev)->debug_mask)) { \ break; \ } \ irdma_debug(dev, mask, "%s\n", desc); \ irdma_debug(dev, mask, "starting address virt=%p phy=%lxh\n", buf, irdma_get_virt_to_phy(buf)); \ for (i = 0; i < size ; i += 8) \ irdma_debug(dev, mask, "index %03d val: %016lx\n", i, ((unsigned long *)(buf))[i / 8]); \ } while(0) -#define irdma_debug(h, m, s, ...) \ -do { \ - if (!(h)) { \ - if ((m) == IRDMA_DEBUG_INIT) \ +#define irdma_debug(h, m, s, ...) \ +do { \ + if (!(h)) { \ + if ((m) == IRDMA_DEBUG_INIT) \ printf("irdma INIT " s, ##__VA_ARGS__); \ - } else if (((m) & (h)->debug_mask)) { \ - printf("irdma " s, ##__VA_ARGS__); \ - } \ + } else if (((m) & (h)->debug_mask)) { \ + printf("irdma " s, ##__VA_ARGS__); \ + } \ } while (0) extern unsigned int irdma_dbg; -#define libirdma_debug(fmt, args...) \ -do { \ - if (irdma_dbg) \ - printf("libirdma-%s: " fmt, __func__, ##args); \ +#define libirdma_debug(fmt, args...) \ +do { \ + if (irdma_dbg) \ + printf("libirdma-%s: " fmt, __func__, ##args); \ } while (0) #define irdma_dev_err(ibdev, fmt, ...) \ pr_err("%s:%s:%d ERR "fmt, (ibdev)->name, __func__, __LINE__, ##__VA_ARGS__) #define irdma_dev_warn(ibdev, fmt, ...) \ pr_warn("%s:%s:%d WARN "fmt, (ibdev)->name, __func__, __LINE__, ##__VA_ARGS__) #define irdma_dev_info(a, b, ...) printf(b, ##__VA_ARGS__) #define irdma_pr_warn printf -#define dump_struct(s, sz, name) \ -do { \ - unsigned char *a; \ - printf("%s %u", (name), (unsigned int)(sz)); \ - for (a = (unsigned char*)(s); a < (unsigned char *)(s) + (sz) ; a ++) { \ - if ((u64)a % 8 == 0) \ - printf("\n%p ", a); \ - printf("%2x ", *a); \ - } \ - printf("\n"); \ -}while(0) - /* * debug definition end */ typedef __be16 BE16; typedef __be32 BE32; typedef uintptr_t irdma_uintptr; struct irdma_hw; struct irdma_pci_f; struct irdma_sc_dev; struct irdma_sc_qp; struct irdma_sc_vsi; #define irdma_pr_info(fmt, args ...) printf("%s: WARN "fmt, __func__, ## args) #define irdma_pr_err(fmt, args ...) printf("%s: ERR "fmt, __func__, ## args) #define irdma_memcpy(a, b, c) memcpy((a), (b), (c)) #define irdma_memset(a, b, c) memset((a), (b), (c)) #define irdma_usec_delay(x) DELAY(x) #define mdelay(x) DELAY((x) * 1000) #define rt_tos2priority(tos) (tos >> 5) #define ah_attr_to_dmac(attr) ((attr).dmac) -#define kc_ib_modify_qp_is_ok(cur_state, next_state, type, mask, ll) \ - ib_modify_qp_is_ok(cur_state, next_state, type, mask) -#define kc_typeq_ib_wr const -#define kc_ifp_find ip_ifp_find -#define kc_ifp6_find ip6_ifp_find -#define kc_rdma_gid_attr_network_type(sgid_attr, gid_type, gid) \ - ib_gid_to_network_type(gid_type, gid) #define irdma_del_timer_compat(tt) del_timer((tt)) #define IRDMA_TAILQ_FOREACH CK_STAILQ_FOREACH #define IRDMA_TAILQ_FOREACH_SAFE CK_STAILQ_FOREACH_SAFE #define between(a, b, c) (bool)(c-a >= b-a) static inline void db_wr32(__u32 val, __u32 *wqe_word) { - *wqe_word = val; + *wqe_word = val; } void *hw_to_dev(struct irdma_hw *hw); struct irdma_dma_mem { - void *va; - u64 pa; + void *va; + u64 pa; bus_dma_tag_t tag; bus_dmamap_t map; bus_dma_segment_t seg; bus_size_t size; - int nseg; - int flags; + int nseg; + int flags; }; struct irdma_virt_mem { - void *va; - u32 size; + void *va; + u32 size; }; #ifndef verbs_mr enum ibv_mr_type { - IBV_MR_TYPE_MR, - IBV_MR_TYPE_NULL_MR, + IBV_MR_TYPE_MR, + IBV_MR_TYPE_NULL_MR, }; struct verbs_mr { - struct ibv_mr ibv_mr; - enum ibv_mr_type mr_type; + struct ibv_mr ibv_mr; + enum ibv_mr_type mr_type; int access; }; #define verbs_get_mr(mr) container_of((mr), struct verbs_mr, ibv_mr) #endif #endif /* _ICRDMA_OSDEP_H_ */ diff --git a/sys/dev/ice/ice_rdma.c b/sys/dev/ice/ice_rdma.c index decf9ea490d8..0bb4b79bb143 100644 --- a/sys/dev/ice/ice_rdma.c +++ b/sys/dev/ice/ice_rdma.c @@ -1,867 +1,916 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2023, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /** * @file ice_rdma.c * @brief RDMA client driver interface * * Functions to interface with the RDMA client driver, for enabling RMDA * functionality for the ice driver. * * The RDMA client interface is based on a simple kobject interface which is * defined by the rmda_if.m and irdma_di_if.m interfaces. * * The ice device driver provides the rmda_di_if.m interface methods, while * the client RDMA driver provides the irdma_if.m interface methods as an * extension ontop of the irdma_di_if kobject. * * The initial connection between drivers is done via the RDMA client driver * calling ice_rdma_register. */ #include "ice_iflib.h" #include "ice_rdma_internal.h" #include "irdma_if.h" #include "irdma_di_if.h" /** * @var ice_rdma * @brief global RDMA driver state * * Contains global state the driver uses to connect to a client RDMA interface * driver. */ static struct ice_rdma_state ice_rdma; /* * Helper function prototypes */ static int ice_rdma_pf_attach_locked(struct ice_softc *sc); static void ice_rdma_pf_detach_locked(struct ice_softc *sc); static int ice_rdma_check_version(struct ice_rdma_info *info); static void ice_rdma_cp_qos_info(struct ice_hw *hw, struct ice_dcbx_cfg *dcbx_cfg, struct ice_qos_params *qos_info); /* * RDMA Device Interface prototypes */ static int ice_rdma_pf_reset(struct ice_rdma_peer *peer); static int ice_rdma_pf_msix_init(struct ice_rdma_peer *peer, struct ice_rdma_msix_mapping *msix_info); static int ice_rdma_qset_register_request(struct ice_rdma_peer *peer, struct ice_rdma_qset_update *res); static int ice_rdma_update_vsi_filter(struct ice_rdma_peer *peer_dev, bool enable); static void ice_rdma_request_handler(struct ice_rdma_peer *peer, struct ice_rdma_request *req); /** * @var ice_rdma_di_methods * @brief RDMA driver interface methods * * Kobject methods implementing the driver-side interface for the RDMA peer * clients. This method table contains the operations which the client can * request from the driver. * * The client driver will then extend this kobject class with methods that the * driver can request from the client. */ static kobj_method_t ice_rdma_di_methods[] = { KOBJMETHOD(irdma_di_reset, ice_rdma_pf_reset), KOBJMETHOD(irdma_di_msix_init, ice_rdma_pf_msix_init), KOBJMETHOD(irdma_di_qset_register_request, ice_rdma_qset_register_request), KOBJMETHOD(irdma_di_vsi_filter_update, ice_rdma_update_vsi_filter), KOBJMETHOD(irdma_di_req_handler, ice_rdma_request_handler), KOBJMETHOD_END }; /* Define ice_rdma_di class which will be extended by the iRDMA driver */ DEFINE_CLASS_0(ice_rdma_di, ice_rdma_di_class, ice_rdma_di_methods, sizeof(struct ice_rdma_peer)); /** * ice_rdma_pf_reset - RDMA client interface requested a reset * @peer: the RDMA peer client structure * * Implements IRDMA_DI_RESET, called by the RDMA client driver to request * a reset of an ice driver device. */ static int ice_rdma_pf_reset(struct ice_rdma_peer *peer) { struct ice_softc *sc = ice_rdma_peer_to_sc(peer); - /* - * Request that the driver re-initialize by bringing the interface - * down and up. - */ - ice_request_stack_reinit(sc); + /* Tell the base driver that RDMA is requesting a PFR */ + ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ); + + /* XXX: Base driver will notify RDMA when it's done */ return (0); } /** * ice_rdma_pf_msix_init - RDMA client interface request MSI-X initialization * @peer: the RDMA peer client structure * @msix_info: requested MSI-X mapping * * Implements IRDMA_DI_MSIX_INIT, called by the RDMA client driver to * initialize the MSI-X resources required for RDMA functionality. */ static int ice_rdma_pf_msix_init(struct ice_rdma_peer *peer, struct ice_rdma_msix_mapping __unused *msix_info) { struct ice_softc *sc = ice_rdma_peer_to_sc(peer); MPASS(msix_info != NULL); device_printf(sc->dev, "%s: iRDMA MSI-X initialization request is not yet implemented\n", __func__); /* TODO: implement MSI-X initialization for RDMA */ return (ENOSYS); } /** * ice_rdma_register_request - RDMA client interface request qset * registration or unregistration * @peer: the RDMA peer client structure * @res: resources to be registered or unregistered */ static int ice_rdma_qset_register_request(struct ice_rdma_peer *peer, struct ice_rdma_qset_update *res) { struct ice_softc *sc = ice_rdma_peer_to_sc(peer); struct ice_vsi *vsi = NULL; struct ice_dcbx_cfg *dcbx_cfg; struct ice_hw *hw = &sc->hw; enum ice_status status; int count, i, ret = 0; uint32_t *qset_teid; uint16_t *qs_handle; uint16_t max_rdmaqs[ICE_MAX_TRAFFIC_CLASS]; uint16_t vsi_id; uint8_t ena_tc = 0; if (!res) return -EINVAL; if (res->cnt_req > ICE_MAX_TXQ_PER_TXQG) return -EINVAL; switch(res->res_type) { case ICE_RDMA_QSET_ALLOC: count = res->cnt_req; vsi_id = peer->pf_vsi_num; break; case ICE_RDMA_QSET_FREE: count = res->res_allocated; vsi_id = res->qsets.vsi_id; break; default: return -EINVAL; } qset_teid = (uint32_t *)ice_calloc(hw, count, sizeof(*qset_teid)); if (!qset_teid) return -ENOMEM; qs_handle = (uint16_t *)ice_calloc(hw, count, sizeof(*qs_handle)); if (!qs_handle) { ice_free(hw, qset_teid); return -ENOMEM; } ice_for_each_traffic_class(i) max_rdmaqs[i] = 0; for (i = 0; i < sc->num_available_vsi; i++) { if (sc->all_vsi[i] && ice_get_hw_vsi_num(hw, sc->all_vsi[i]->idx) == vsi_id) { vsi = sc->all_vsi[i]; break; } } if (!vsi) { ice_debug(hw, ICE_DBG_RDMA, "RDMA QSet invalid VSI\n"); ret = -EINVAL; goto out; } if (sc != vsi->sc) { ice_debug(hw, ICE_DBG_RDMA, "VSI is tied to unexpected device\n"); ret = -EXDEV; goto out; } for (i = 0; i < count; i++) { struct ice_rdma_qset_params *qset; qset = &res->qsets; if (qset->vsi_id != peer->pf_vsi_num) { ice_debug(hw, ICE_DBG_RDMA, "RDMA QSet invalid VSI requested %d %d\n", qset->vsi_id, peer->pf_vsi_num); ret = -EINVAL; goto out; } max_rdmaqs[qset->tc]++; qs_handle[i] = qset->qs_handle; qset_teid[i] = qset->teid; } switch(res->res_type) { case ICE_RDMA_QSET_ALLOC: dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg; ena_tc = ice_dcb_get_tc_map(dcbx_cfg); ice_debug(hw, ICE_DBG_RDMA, "%s:%d ena_tc=%x\n", __func__, __LINE__, ena_tc); status = ice_cfg_vsi_rdma(hw->port_info, vsi->idx, ena_tc, max_rdmaqs); if (status) { ice_debug(hw, ICE_DBG_RDMA, "Failed VSI RDMA qset config\n"); ret = -EINVAL; goto out; } for (i = 0; i < count; i++) { struct ice_rdma_qset_params *qset; qset = &res->qsets; status = ice_ena_vsi_rdma_qset(hw->port_info, vsi->idx, qset->tc, &qs_handle[i], 1, &qset_teid[i]); if (status) { ice_debug(hw, ICE_DBG_RDMA, "Failed VSI RDMA qset enable\n"); ret = -EINVAL; goto out; } qset->teid = qset_teid[i]; } break; case ICE_RDMA_QSET_FREE: status = ice_dis_vsi_rdma_qset(hw->port_info, count, qset_teid, qs_handle); if (status) ret = -EINVAL; break; default: ret = -EINVAL; break; } out: ice_free(hw, qs_handle); ice_free(hw, qset_teid); return ret; } /** * ice_rdma_update_vsi_filter - configure vsi information * when opening or closing rdma driver * @peer: the RDMA peer client structure * @enable: enable or disable the rdma filter */ static int ice_rdma_update_vsi_filter(struct ice_rdma_peer *peer, bool enable) { struct ice_softc *sc = ice_rdma_peer_to_sc(peer); struct ice_vsi *vsi; int ret; vsi = &sc->pf_vsi; if (!vsi) return -EINVAL; ret = ice_cfg_iwarp_fltr(&sc->hw, vsi->idx, enable); if (ret) { device_printf(sc->dev, "Failed to %sable iWARP filtering\n", enable ? "en" : "dis"); } else { if (enable) vsi->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN; else vsi->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN; } return ret; } /** * ice_rdma_request_handler - handle requests incoming from RDMA driver * @peer: the RDMA peer client structure * @req: structure containing request */ static void ice_rdma_request_handler(struct ice_rdma_peer *peer, struct ice_rdma_request *req) { if (!req || !peer) { log(LOG_WARNING, "%s: peer or req are not valid\n", __func__); return; } switch(req->type) { case ICE_RDMA_EVENT_RESET: + ice_rdma_pf_reset(peer); break; case ICE_RDMA_EVENT_QSET_REGISTER: ice_rdma_qset_register_request(peer, &req->res); break; case ICE_RDMA_EVENT_VSI_FILTER_UPDATE: ice_rdma_update_vsi_filter(peer, req->enable_filter); break; default: log(LOG_WARNING, "%s: Event %d not supported\n", __func__, req->type); break; } } /** * ice_rdma_cp_qos_info - gather current QOS/DCB settings in LAN to pass * to RDMA driver * @hw: ice hw structure * @dcbx_cfg: current DCB settings in ice driver * @qos_info: destination of the DCB settings */ static void ice_rdma_cp_qos_info(struct ice_hw *hw, struct ice_dcbx_cfg *dcbx_cfg, struct ice_qos_params *qos_info) { u32 up2tc; u8 j; u8 num_tc = 0; u8 val_tc = 0; /* number of TC for validation */ u8 cnt_tc = 0; /* setup qos_info fields with defaults */ qos_info->num_apps = 0; qos_info->num_tc = 1; for (j = 0; j < ICE_TC_MAX_USER_PRIORITY; j++) qos_info->up2tc[j] = 0; qos_info->tc_info[0].rel_bw = 100; for (j = 1; j < IEEE_8021QAZ_MAX_TCS; j++) qos_info->tc_info[j].rel_bw = 0; /* gather current values */ up2tc = rd32(hw, PRTDCB_TUP2TC); qos_info->num_apps = dcbx_cfg->numapps; for (j = 0; j < ICE_MAX_TRAFFIC_CLASS; j++) { num_tc |= BIT(dcbx_cfg->etscfg.prio_table[j]); } for (j = 0; j < ICE_MAX_TRAFFIC_CLASS; j++) { if (num_tc & BIT(j)) { cnt_tc++; val_tc |= BIT(j); } else { break; } } qos_info->num_tc = (val_tc == num_tc && num_tc != 0) ? cnt_tc : 1; for (j = 0; j < ICE_TC_MAX_USER_PRIORITY; j++) qos_info->up2tc[j] = (up2tc >> (j * 3)) & 0x7; for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++) qos_info->tc_info[j].rel_bw = dcbx_cfg->etscfg.tcbwtable[j]; for (j = 0; j < qos_info->num_apps; j++) { qos_info->apps[j].priority = dcbx_cfg->app[j].priority; qos_info->apps[j].prot_id = dcbx_cfg->app[j].prot_id; qos_info->apps[j].selector = dcbx_cfg->app[j].selector; } /* Gather DSCP-to-TC mapping and QoS/PFC mode */ memcpy(qos_info->dscp_map, dcbx_cfg->dscp_map, sizeof(qos_info->dscp_map)); qos_info->pfc_mode = dcbx_cfg->pfc_mode; } /** * ice_rdma_check_version - Check that the provided RDMA version is compatible * @info: the RDMA client information structure * * Verify that the client RDMA driver provided a version that is compatible * with the driver interface. */ static int ice_rdma_check_version(struct ice_rdma_info *info) { /* Make sure the MAJOR version matches */ if (info->major_version != ICE_RDMA_MAJOR_VERSION) { log(LOG_WARNING, "%s: the iRDMA driver requested version %d.%d.%d, but this driver only supports major version %d.x.x\n", __func__, info->major_version, info->minor_version, info->patch_version, ICE_RDMA_MAJOR_VERSION); return (ENOTSUP); } /* * Make sure that the MINOR version is compatible. * * This means that the RDMA client driver version MUST not be greater * than the version provided by the driver, as it would indicate that * the RDMA client expects features which are not supported by the * main driver. */ if (info->minor_version > ICE_RDMA_MINOR_VERSION) { log(LOG_WARNING, "%s: the iRDMA driver requested version %d.%d.%d, but this driver only supports up to minor version %d.%d.x\n", __func__, info->major_version, info->minor_version, info->patch_version, ICE_RDMA_MAJOR_VERSION, ICE_RDMA_MINOR_VERSION); return (ENOTSUP); } /* * Make sure that the PATCH version is compatible. * * This means that the RDMA client version MUST not be greater than * the version provided by the driver, as it may indicate that the * RDMA client expects certain backwards compatible bug fixes which * are not implemented by this version of the main driver. */ if ((info->minor_version == ICE_RDMA_MINOR_VERSION) && (info->patch_version > ICE_RDMA_PATCH_VERSION)) { log(LOG_WARNING, "%s: the iRDMA driver requested version %d.%d.%d, but this driver only supports up to patch version %d.%d.%d\n", __func__, info->major_version, info->minor_version, info->patch_version, ICE_RDMA_MAJOR_VERSION, ICE_RDMA_MINOR_VERSION, ICE_RDMA_PATCH_VERSION); return (ENOTSUP); } /* Make sure that the kobject class is initialized */ if (info->rdma_class == NULL) { log(LOG_WARNING, "%s: the iRDMA driver did not specify a kobject interface\n", __func__); return (EINVAL); } return (0); } /** * ice_rdma_register - Register an RDMA client driver * @info: the RDMA client information structure * * Called by the RDMA client driver on load. Used to initialize the RDMA * client driver interface and enable interop between the ice driver and the * RDMA client driver. * * The RDMA client driver must provide the version number it expects, along * with a pointer to a kobject class that extends the irdma_di_if class, and * implements the irdma_if class interface. */ int ice_rdma_register(struct ice_rdma_info *info) { struct ice_rdma_entry *entry; struct ice_softc *sc; int err = 0; sx_xlock(&ice_rdma.mtx); if (!ice_enable_irdma) { log(LOG_INFO, "%s: The iRDMA driver interface has been disabled\n", __func__); err = (ECONNREFUSED); goto return_unlock; } if (ice_rdma.registered) { log(LOG_WARNING, "%s: iRDMA driver already registered\n", __func__); err = (EBUSY); goto return_unlock; } /* Make sure the iRDMA version is compatible */ err = ice_rdma_check_version(info); if (err) goto return_unlock; log(LOG_INFO, "%s: iRDMA driver registered using version %d.%d.%d\n", __func__, info->major_version, info->minor_version, info->patch_version); ice_rdma.peer_class = info->rdma_class; /* * Initialize the kobject interface and notify the RDMA client of each * existing PF interface. */ LIST_FOREACH(entry, &ice_rdma.peers, node) { kobj_init((kobj_t)&entry->peer, ice_rdma.peer_class); /* Gather DCB/QOS info into peer */ sc = __containerof(entry, struct ice_softc, rdma_entry); memset(&entry->peer.initial_qos_info, 0, sizeof(entry->peer.initial_qos_info)); ice_rdma_cp_qos_info(&sc->hw, &sc->hw.port_info->qos_cfg.local_dcbx_cfg, &entry->peer.initial_qos_info); IRDMA_PROBE(&entry->peer); if (entry->initiated) IRDMA_OPEN(&entry->peer); } ice_rdma.registered = true; return_unlock: sx_xunlock(&ice_rdma.mtx); return (err); } /** * ice_rdma_unregister - Unregister an RDMA client driver * * Called by the RDMA client driver on unload. Used to de-initialize the RDMA * client driver interface and shut down communication between the ice driver * and the RDMA client driver. */ int ice_rdma_unregister(void) { struct ice_rdma_entry *entry; sx_xlock(&ice_rdma.mtx); if (!ice_rdma.registered) { log(LOG_WARNING, "%s: iRDMA driver was not previously registered\n", __func__); sx_xunlock(&ice_rdma.mtx); return (ENOENT); } log(LOG_INFO, "%s: iRDMA driver unregistered\n", __func__); ice_rdma.registered = false; ice_rdma.peer_class = NULL; /* * Release the kobject interface for each of the existing PF * interfaces. Note that we do not notify the client about removing * each PF, as it is assumed that the client will have already cleaned * up any associated resources when it is unregistered. */ LIST_FOREACH(entry, &ice_rdma.peers, node) kobj_delete((kobj_t)&entry->peer, NULL); sx_xunlock(&ice_rdma.mtx); return (0); } /** * ice_rdma_init - RDMA driver init routine * * Called during ice driver module initialization to setup the RDMA client * interface mutex and RDMA peer structure list. */ void ice_rdma_init(void) { LIST_INIT(&ice_rdma.peers); sx_init_flags(&ice_rdma.mtx, "ice rdma interface", SX_DUPOK); ice_rdma.registered = false; ice_rdma.peer_class = NULL; } /** * ice_rdma_exit - RDMA driver exit routine * * Called during ice driver module exit to shutdown the RDMA client interface * mutex. */ void ice_rdma_exit(void) { MPASS(LIST_EMPTY(&ice_rdma.peers)); sx_destroy(&ice_rdma.mtx); } /** * ice_rdma_pf_attach_locked - Prepare a PF for RDMA connections * @sc: the ice driver softc * * Initialize a peer entry for this PF and add it to the RDMA interface list. * Notify the client RDMA driver of a new PF device. * * @pre must be called while holding the ice_rdma mutex. */ static int ice_rdma_pf_attach_locked(struct ice_softc *sc) { struct ice_rdma_entry *entry; /* Do not attach the PF unless RDMA is supported */ if (!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) return (0); entry = &sc->rdma_entry; if (entry->attached) { device_printf(sc->dev, "iRDMA peer entry already exists\n"); return (EEXIST); } entry->attached = true; entry->peer.dev = sc->dev; entry->peer.ifp = sc->ifp; entry->peer.pf_id = sc->hw.pf_id; entry->peer.pci_mem = sc->bar0.res; entry->peer.pf_vsi_num = ice_get_hw_vsi_num(&sc->hw, sc->pf_vsi.idx); if (sc->rdma_imap && sc->rdma_imap[0] != ICE_INVALID_RES_IDX && sc->irdma_vectors > 0) { entry->peer.msix.base = sc->rdma_imap[0]; entry->peer.msix.count = sc->irdma_vectors; } /* Gather DCB/QOS info into peer */ memset(&entry->peer.initial_qos_info, 0, sizeof(entry->peer.initial_qos_info)); ice_rdma_cp_qos_info(&sc->hw, &sc->hw.port_info->qos_cfg.local_dcbx_cfg, &entry->peer.initial_qos_info); /* * If the RDMA client driver has already registered, initialize the * kobject and notify the client of a new PF */ if (ice_rdma.registered) { kobj_init((kobj_t)&entry->peer, ice_rdma.peer_class); IRDMA_PROBE(&entry->peer); } LIST_INSERT_HEAD(&ice_rdma.peers, entry, node); ice_set_bit(ICE_FEATURE_RDMA, sc->feat_en); return (0); } /** * ice_rdma_pf_attach - Notify the RDMA client of a new PF * @sc: the ice driver softc * * Called during PF attach to notify the RDMA client of a new PF. */ int ice_rdma_pf_attach(struct ice_softc *sc) { int err; sx_xlock(&ice_rdma.mtx); err = ice_rdma_pf_attach_locked(sc); sx_xunlock(&ice_rdma.mtx); return (err); } /** * ice_rdma_pf_detach_locked - Notify the RDMA client on PF detach * @sc: the ice driver softc * * Notify the RDMA peer client driver of removal of a PF, and release any * RDMA-specific resources associated with that PF. Remove the PF from the * list of available RDMA entries. * * @pre must be called while holding the ice_rdma mutex. */ static void ice_rdma_pf_detach_locked(struct ice_softc *sc) { struct ice_rdma_entry *entry; /* No need to detach the PF if RDMA is not enabled */ if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_RDMA)) return; entry = &sc->rdma_entry; if (!entry->attached) { device_printf(sc->dev, "iRDMA peer entry was not attached\n"); return; } /* * If the RDMA client driver is registered, notify the client that * a PF has been removed, and release the kobject reference. */ if (ice_rdma.registered) { IRDMA_REMOVE(&entry->peer); kobj_delete((kobj_t)&entry->peer, NULL); } LIST_REMOVE(entry, node); entry->attached = false; ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_en); } /** * ice_rdma_pf_detach - Notify the RDMA client of a PF detaching * @sc: the ice driver softc * * Take the ice_rdma mutex and then notify the RDMA client that a PF has been * removed. */ void ice_rdma_pf_detach(struct ice_softc *sc) { sx_xlock(&ice_rdma.mtx); ice_rdma_pf_detach_locked(sc); sx_xunlock(&ice_rdma.mtx); } /** * ice_rdma_pf_init - Notify the RDMA client that a PF has initialized * @sc: the ice driver softc * * Called by the ice driver when a PF has been initialized. Notifies the RDMA * client that a PF is up and ready to operate. */ int ice_rdma_pf_init(struct ice_softc *sc) { struct ice_rdma_peer *peer = &sc->rdma_entry.peer; sx_xlock(&ice_rdma.mtx); /* Update the MTU */ peer->mtu = if_getmtu(sc->ifp); sc->rdma_entry.initiated = true; if (sc->rdma_entry.attached && ice_rdma.registered) { sx_xunlock(&ice_rdma.mtx); return IRDMA_OPEN(peer); } sx_xunlock(&ice_rdma.mtx); return (0); } /** * ice_rdma_pf_stop - Notify the RDMA client of a stopped PF device * @sc: the ice driver softc * * Called by the ice driver when a PF is stopped. Notifies the RDMA client * driver that the PF has stopped and is not ready to operate. */ int ice_rdma_pf_stop(struct ice_softc *sc) { sx_xlock(&ice_rdma.mtx); sc->rdma_entry.initiated = false; if (sc->rdma_entry.attached && ice_rdma.registered) { sx_xunlock(&ice_rdma.mtx); return IRDMA_CLOSE(&sc->rdma_entry.peer); } sx_xunlock(&ice_rdma.mtx); return (0); } /** * ice_rdma_link_change - Notify RDMA client of a change in link status * @sc: the ice driver softc * @linkstate: the link status * @baudrate: the link rate in bits per second * * Notify the RDMA client of a link status change, by sending it the new link * state and baudrate. * * The link state is represented the same was as in the ifnet structure. It * should be LINK_STATE_UNKNOWN, LINK_STATE_DOWN, or LINK_STATE_UP. */ void ice_rdma_link_change(struct ice_softc *sc, int linkstate, uint64_t baudrate) { struct ice_rdma_peer *peer = &sc->rdma_entry.peer; struct ice_rdma_event event; memset(&event, 0, sizeof(struct ice_rdma_event)); event.type = ICE_RDMA_EVENT_LINK_CHANGE; event.linkstate = linkstate; event.baudrate = baudrate; sx_xlock(&ice_rdma.mtx); if (sc->rdma_entry.attached && ice_rdma.registered) IRDMA_EVENT_HANDLER(peer, &event); sx_xunlock(&ice_rdma.mtx); } /** * ice_rdma_notify_dcb_qos_change - notify RDMA driver to pause traffic * @sc: the ice driver softc * * Notify the RDMA driver that QOS/DCB settings are about to change. * Once the function return, all the QPs should be suspended. */ void ice_rdma_notify_dcb_qos_change(struct ice_softc *sc) { struct ice_rdma_peer *peer = &sc->rdma_entry.peer; struct ice_rdma_event event; memset(&event, 0, sizeof(struct ice_rdma_event)); event.type = ICE_RDMA_EVENT_TC_CHANGE; /* pre-event */ event.prep = true; sx_xlock(&ice_rdma.mtx); if (sc->rdma_entry.attached && ice_rdma.registered) IRDMA_EVENT_HANDLER(peer, &event); sx_xunlock(&ice_rdma.mtx); } /** * ice_rdma_dcb_qos_update - pass the changed dcb settings to RDMA driver * @sc: the ice driver softc * @pi: the port info structure * * Pass the changed DCB settings to RDMA traffic. This function should be * called only after ice_rdma_notify_dcb_qos_change has been called and * returned before. After the function returns, all the RDMA traffic * should be resumed. */ void ice_rdma_dcb_qos_update(struct ice_softc *sc, struct ice_port_info *pi) { struct ice_rdma_peer *peer = &sc->rdma_entry.peer; struct ice_rdma_event event; memset(&event, 0, sizeof(struct ice_rdma_event)); event.type = ICE_RDMA_EVENT_TC_CHANGE; /* post-event */ event.prep = false; /* gather current configuration */ ice_rdma_cp_qos_info(&sc->hw, &pi->qos_cfg.local_dcbx_cfg, &event.port_qos); sx_xlock(&ice_rdma.mtx); if (sc->rdma_entry.attached && ice_rdma.registered) IRDMA_EVENT_HANDLER(peer, &event); sx_xunlock(&ice_rdma.mtx); } + +/** + * ice_rdma_notify_pe_intr - notify irdma on incoming interrupts regarding PE + * @sc: the ice driver softc + * @oicr: interrupt cause + * + * Pass the information about received interrupt to RDMA driver if it was + * relating to PE. Specifically PE_CRITERR and HMC_ERR. + * The irdma driver shall decide what should be done upon these interrupts. + */ +void +ice_rdma_notify_pe_intr(struct ice_softc *sc, uint32_t oicr) +{ + struct ice_rdma_peer *peer = &sc->rdma_entry.peer; + struct ice_rdma_event event; + + memset(&event, 0, sizeof(struct ice_rdma_event)); + event.type = ICE_RDMA_EVENT_CRIT_ERR; + event.oicr_reg = oicr; + + sx_xlock(&ice_rdma.mtx); + if (sc->rdma_entry.attached && ice_rdma.registered) + IRDMA_EVENT_HANDLER(peer, &event); + sx_xunlock(&ice_rdma.mtx); +} + +/** + * ice_rdma_notify_reset - notify irdma on incoming pf-reset + * @sc: the ice driver softc + * + * Inform irdma driver of an incoming PF reset. + * The irdma driver shall set its state to reset, and avoid using CQP + * anymore. Next step should be to call ice_rdma_pf_stop in order to + * remove resources. + */ +void +ice_rdma_notify_reset(struct ice_softc *sc) +{ + struct ice_rdma_peer *peer = &sc->rdma_entry.peer; + struct ice_rdma_event event; + + memset(&event, 0, sizeof(struct ice_rdma_event)); + event.type = ICE_RDMA_EVENT_RESET; + + sx_xlock(&ice_rdma.mtx); + if (sc->rdma_entry.attached && ice_rdma.registered) + IRDMA_EVENT_HANDLER(peer, &event); + sx_xunlock(&ice_rdma.mtx); +} diff --git a/sys/dev/ice/ice_rdma.h b/sys/dev/ice/ice_rdma.h index f83c30b33f6c..38e2ef491e8e 100644 --- a/sys/dev/ice/ice_rdma.h +++ b/sys/dev/ice/ice_rdma.h @@ -1,310 +1,311 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2023, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /** * @file ice_rdma.h * @brief header file for RDMA client interface functions * * Contains definitions and function calls shared by the ice driver and the * RDMA client interface driver. * * Since these definitions are shared between drivers it is important that any * changes are considered carefully for backwards compatibility. */ #ifndef _ICE_RDMA_H_ #define _ICE_RDMA_H_ /* * The RDMA client interface version is used to help determine * incompatibilities between the interface definition shared between the main * driver and the client driver. * * It will follows the semantic version guidelines, that is: * Given the version number MAJOR.MINOR.PATCH, increment the: * * MAJOR version when you make incompatible changes, * MINOR version when you add functionality in a backwards-compatible manner, and * PATCH version when you make backwards-compatible bug fixes. * * Any change to this file, or one of the kobject interface files must come * with an associated change in one of the MAJOR, MINOR, or PATCH versions, * and care must be taken that backwards incompatible changes MUST increment * the MAJOR version. * * Note: Until the MAJOR version is set to at least 1, the above semantic * version guarantees may not hold, and this interface should not be * considered stable. */ #define ICE_RDMA_MAJOR_VERSION 1 -#define ICE_RDMA_MINOR_VERSION 0 +#define ICE_RDMA_MINOR_VERSION 1 #define ICE_RDMA_PATCH_VERSION 0 /** * @def ICE_RDMA_MAX_MSIX * @brief Maximum number of MSI-X vectors that will be reserved * * Defines the maximum number of MSI-X vectors that an RDMA interface will * have reserved in advance. Does not guarantee that many vectors have * actually been enabled. */ #define ICE_RDMA_MAX_MSIX 64 /** * @struct ice_rdma_info * @brief RDMA information from the client driver * * The RDMA client driver will fill in this structure and pass its contents * back to the main driver using the ice_rdma_register function. * * It should fill the version in with the ICE_RDMA_* versions as defined in * the ice_rdma.h header. * * Additionally it must provide a pointer to a kobject class which extends the * ice_rdma_di_class with the operations defined in the rdma_if.m interface. * * If the version specified is not compatible, then the registration will * of the RDMA driver will fail. */ struct ice_rdma_info { uint16_t major_version; uint16_t minor_version; uint16_t patch_version; kobj_class_t rdma_class; }; #define ICE_RDMA_MAX_USER_PRIORITY 8 #define ICE_RDMA_MAX_MSIX 64 /* Declare the ice_rdma_di kobject class */ DECLARE_CLASS(ice_rdma_di_class); /** * @struct ice_rdma_msix_mapping * @brief MSI-X mapping requested by the peer RDMA driver * * Defines a mapping for MSI-X vectors being requested by the peer RDMA driver * for a given PF. */ struct ice_rdma_msix_mapping { uint8_t itr_indx; int aeq_vector; int ceq_cnt; int *ceq_vector; }; /** * @struct ice_rdma_msix * @brief RDMA MSI-X vectors reserved for the peer RDMA driver * * Defines the segment of the MSI-X vectors for use by the RDMA driver. These * are reserved by the PF when it initializes. */ struct ice_rdma_msix { int base; int count; }; /** * @struct ice_qos_info * @brief QoS information to be shared with RDMA driver */ struct ice_qos_info { uint64_t tc_ctx; uint8_t rel_bw; uint8_t prio_type; uint8_t egress_virt_up; uint8_t ingress_virt_up; }; /** * @struct ice_qos_app_priority_table * @brief Application priority data */ struct ice_qos_app_priority_table { uint16_t prot_id; uint8_t priority; uint8_t selector; }; #define IEEE_8021QAZ_MAX_TCS 8 #define ICE_TC_MAX_USER_PRIORITY 8 #define ICE_QOS_MAX_APPS 32 #define ICE_QOS_DSCP_NUM_VAL 64 /** * @struct ice_qos_params * @brief Holds all necessary data for RDMA to work with DCB * * Struct to hold QoS info */ struct ice_qos_params { struct ice_qos_info tc_info[IEEE_8021QAZ_MAX_TCS]; uint8_t up2tc[ICE_TC_MAX_USER_PRIORITY]; uint8_t vsi_relative_bw; uint8_t vsi_priority_type; uint32_t num_apps; uint8_t pfc_mode; uint8_t dscp_map[ICE_QOS_DSCP_NUM_VAL]; struct ice_qos_app_priority_table apps[ICE_QOS_MAX_APPS]; uint8_t num_tc; }; /** * @struct ice_rdma_peer * @brief RDMA driver information * * Shared structure used by the RDMA client driver when talking with the main * device driver. * * Because the definition of this structure is shared between the two drivers, * its ABI should be handled carefully. */ struct ice_rdma_peer { /** * The KOBJ_FIELDS macro must come first, in order for it to be used * as a kobject. */ KOBJ_FIELDS; struct ifnet *ifp; device_t dev; struct resource *pci_mem; struct ice_qos_params initial_qos_info; struct ice_rdma_msix msix; uint16_t mtu; uint16_t pf_vsi_num; uint8_t pf_id; }; /** * @enum ice_res_type * @brief enum for type of resource registration * * enum for type of resource registration. * created for plausible compatibility with IDC */ enum ice_res_type { ICE_INVAL_RES = 0x0, ICE_RDMA_QSET_ALLOC = 0x8, ICE_RDMA_QSET_FREE = 0x18, }; /** * @struct ice_rdma_qset_params * @brief struct to hold per RDMA Qset info */ struct ice_rdma_qset_params { uint32_t teid; /* qset TEID */ uint16_t qs_handle; /* RDMA driver provides this */ uint16_t vsi_id; /* VSI index */ uint8_t tc; /* TC branch the QSet should belong to */ uint8_t reserved[3]; }; #define ICE_MAX_TXQ_PER_TXQG 128 /** * @struct ice_rdma_qset_update * @brief struct used to register and unregister qsets for RDMA driver */ struct ice_rdma_qset_update { enum ice_res_type res_type; uint16_t cnt_req; uint16_t res_allocated; uint32_t res_handle; struct ice_rdma_qset_params qsets; }; /** * @enum ice_rdma_event_type * @brief enum for type of event from base driver */ enum ice_rdma_event_type { ICE_RDMA_EVENT_NONE = 0, ICE_RDMA_EVENT_LINK_CHANGE, ICE_RDMA_EVENT_MTU_CHANGE, ICE_RDMA_EVENT_TC_CHANGE, ICE_RDMA_EVENT_API_CHANGE, ICE_RDMA_EVENT_CRIT_ERR, ICE_RDMA_EVENT_RESET, ICE_RDMA_EVENT_QSET_REGISTER, ICE_RDMA_EVENT_VSI_FILTER_UPDATE, ICE_RDMA_EVENT_LAST }; /** * @struct ice_rdma_event * @brief struct for event information to pass to RDMA driver */ struct ice_rdma_event { enum ice_rdma_event_type type; union { /* link change event */ struct { int linkstate; uint64_t baudrate; }; /* MTU change event */ - struct { - int mtu; - }; + int mtu; /* * TC/QoS/DCB change event - * RESET event use prep variable only * prep: if true, this is a pre-event, post-event otherwise */ struct { struct ice_qos_params port_qos; bool prep; }; + /* + * CRIT_ERR event + */ + uint32_t oicr_reg; }; }; /** * @struct ice_rdma_request * @brief struct with data for a request from the RDMA driver */ struct ice_rdma_request { enum ice_rdma_event_type type; union { struct { struct ice_rdma_qset_update res; }; struct { bool enable_filter; }; }; }; int ice_rdma_register(struct ice_rdma_info *info); int ice_rdma_unregister(void); #endif diff --git a/sys/dev/ice/ice_rdma_internal.h b/sys/dev/ice/ice_rdma_internal.h index c80d4540194a..b36544609f45 100644 --- a/sys/dev/ice/ice_rdma_internal.h +++ b/sys/dev/ice/ice_rdma_internal.h @@ -1,101 +1,103 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2023, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /** * @file ice_rdma_internal.h * @brief internal header for the RMDA driver interface setup * * Contains the definitions and functions used by the ice driver to setup the * RDMA driver interface. Functions and definitions in this file are not * shared with the RDMA client driver. */ #ifndef _ICE_RDMA_INTERNAL_H_ #define _ICE_RDMA_INTERNAL_H_ #include "ice_rdma.h" /* Forward declare the softc structure */ struct ice_softc; /* Global sysctl variable indicating if the RDMA client interface is enabled */ extern bool ice_enable_irdma; /** * @struct ice_rdma_entry * @brief RDMA peer list node * * Structure used to store peer entries for each PF in a linked list. */ struct ice_rdma_entry { LIST_ENTRY(ice_rdma_entry) node; struct ice_rdma_peer peer; bool attached; bool initiated; }; #define ice_rdma_peer_to_entry(p) __containerof(p, struct ice_rdma_entry, peer) #define ice_rdma_entry_to_sc(e) __containerof(e, struct ice_softc, rdma_entry) #define ice_rdma_peer_to_sc(p) ice_rdma_entry_to_sc(ice_rdma_peer_to_entry(p)) /** * @struct ice_rdma_peers * @brief Head list structure for the RDMA entry list * * Type defining the head of the linked list of RDMA entries. */ LIST_HEAD(ice_rdma_peers, ice_rdma_entry); /** * @struct ice_rdma_state * @brief global driver state for RDMA * * Contains global state shared across all PFs by the device driver, such as * the kobject class of the currently connected peer driver, and the linked * list of peer entries for each PF. */ struct ice_rdma_state { bool registered; kobj_class_t peer_class; struct sx mtx; struct ice_rdma_peers peers; }; void ice_rdma_init(void); void ice_rdma_exit(void); int ice_rdma_pf_attach(struct ice_softc *sc); void ice_rdma_pf_detach(struct ice_softc *sc); int ice_rdma_pf_init(struct ice_softc *sc); int ice_rdma_pf_stop(struct ice_softc *sc); void ice_rdma_link_change(struct ice_softc *sc, int linkstate, uint64_t baudrate); void ice_rdma_notify_dcb_qos_change(struct ice_softc *sc); void ice_rdma_dcb_qos_update(struct ice_softc *sc, struct ice_port_info *pi); +void ice_rdma_notify_pe_intr(struct ice_softc *sc, uint32_t oicr); +void ice_rdma_notify_reset(struct ice_softc *sc); #endif diff --git a/sys/dev/ice/if_ice_iflib.c b/sys/dev/ice/if_ice_iflib.c index cb4931c4d54e..2397fce11596 100644 --- a/sys/dev/ice/if_ice_iflib.c +++ b/sys/dev/ice/if_ice_iflib.c @@ -1,3079 +1,3078 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2023, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /** * @file if_ice_iflib.c * @brief iflib driver implementation * * Contains the main entry point for the iflib driver implementation. It * implements the various ifdi driver methods, and sets up the module and * driver values to load an iflib driver. */ #include "ice_iflib.h" #include "ice_drv_info.h" #include "ice_switch.h" #include "ice_sched.h" #include #include #include #include #include /* * Device method prototypes */ static void *ice_register(device_t); static int ice_if_attach_pre(if_ctx_t); static int ice_attach_pre_recovery_mode(struct ice_softc *sc); static int ice_if_attach_post(if_ctx_t); static void ice_attach_post_recovery_mode(struct ice_softc *sc); static int ice_if_detach(if_ctx_t); static int ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets); static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix); static void ice_if_queues_free(if_ctx_t ctx); static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void ice_if_intr_enable(if_ctx_t ctx); static void ice_if_intr_disable(if_ctx_t ctx); static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); static int ice_if_promisc_set(if_ctx_t ctx, int flags); static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr); static int ice_if_media_change(if_ctx_t ctx); static void ice_if_init(if_ctx_t ctx); static void ice_if_timer(if_ctx_t ctx, uint16_t qid); static void ice_if_update_admin_status(if_ctx_t ctx); static void ice_if_multi_set(if_ctx_t ctx); static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag); static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag); static void ice_if_stop(if_ctx_t ctx); static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter); static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data); static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req); static int ice_if_suspend(if_ctx_t ctx); static int ice_if_resume(if_ctx_t ctx); static int ice_msix_que(void *arg); static int ice_msix_admin(void *arg); /* * Helper function prototypes */ static int ice_pci_mapping(struct ice_softc *sc); static void ice_free_pci_mapping(struct ice_softc *sc); static void ice_update_link_status(struct ice_softc *sc, bool update_media); static void ice_init_device_features(struct ice_softc *sc); static void ice_init_tx_tracking(struct ice_vsi *vsi); static void ice_handle_reset_event(struct ice_softc *sc); static void ice_handle_pf_reset_request(struct ice_softc *sc); static void ice_prepare_for_reset(struct ice_softc *sc); static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc); static void ice_rebuild(struct ice_softc *sc); static void ice_rebuild_recovery_mode(struct ice_softc *sc); static void ice_free_irqvs(struct ice_softc *sc); static void ice_update_rx_mbuf_sz(struct ice_softc *sc); static void ice_poll_for_media_avail(struct ice_softc *sc); static void ice_setup_scctx(struct ice_softc *sc); static int ice_allocate_msix(struct ice_softc *sc); static void ice_admin_timer(void *arg); static void ice_transition_recovery_mode(struct ice_softc *sc); static void ice_transition_safe_mode(struct ice_softc *sc); /* * Device Interface Declaration */ /** * @var ice_methods * @brief ice driver method entry points * * List of device methods implementing the generic device interface used by * the device stack to interact with the ice driver. Since this is an iflib * driver, most of the methods point to the generic iflib implementation. */ static device_method_t ice_methods[] = { /* Device interface */ DEVMETHOD(device_register, ice_register), DEVMETHOD(device_probe, iflib_device_probe_vendor), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; /** * @var ice_iflib_methods * @brief iflib method entry points * * List of device methods used by the iflib stack to interact with this * driver. These are the real main entry points used to interact with this * driver. */ static device_method_t ice_iflib_methods[] = { DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre), DEVMETHOD(ifdi_attach_post, ice_if_attach_post), DEVMETHOD(ifdi_detach, ice_if_detach), DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc), DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign), DEVMETHOD(ifdi_queues_free, ice_if_queues_free), DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set), DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable), DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable), DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable), DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set), DEVMETHOD(ifdi_media_status, ice_if_media_status), DEVMETHOD(ifdi_media_change, ice_if_media_change), DEVMETHOD(ifdi_init, ice_if_init), DEVMETHOD(ifdi_stop, ice_if_stop), DEVMETHOD(ifdi_timer, ice_if_timer), DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status), DEVMETHOD(ifdi_multi_set, ice_if_multi_set), DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, ice_if_get_counter), DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl), DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req), DEVMETHOD(ifdi_suspend, ice_if_suspend), DEVMETHOD(ifdi_resume, ice_if_resume), DEVMETHOD_END }; /** * @var ice_driver * @brief driver structure for the generic device stack * * driver_t definition used to setup the generic device methods. */ static driver_t ice_driver = { .name = "ice", .methods = ice_methods, .size = sizeof(struct ice_softc), }; /** * @var ice_iflib_driver * @brief driver structure for the iflib stack * * driver_t definition used to setup the iflib device methods. */ static driver_t ice_iflib_driver = { .name = "ice", .methods = ice_iflib_methods, .size = sizeof(struct ice_softc), }; extern struct if_txrx ice_txrx; extern struct if_txrx ice_recovery_txrx; /** * @var ice_sctx * @brief ice driver shared context * * Structure defining shared values (context) that is used by all instances of * the device. Primarily used to setup details about how the iflib stack * should treat this driver. Also defines the default, minimum, and maximum * number of descriptors in each ring. */ static struct if_shared_ctx ice_sctx = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = ICE_MAX_FRAME_SIZE, /* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but * that doesn't make sense since that would be larger than the maximum * size of a single packet. */ .isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE, /* XXX: This is only used by iflib to ensure that * scctx->isc_tx_tso_size_max + the VLAN header is a valid size. */ .isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header), /* XXX: This is used by iflib to set the number of segments in the TSO * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the * related ifnet parameter. */ .isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE, .isc_rx_maxsize = ICE_MAX_FRAME_SIZE, .isc_rx_nsegments = ICE_MAX_RX_SEGS, .isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE, .isc_nfl = 1, .isc_ntxqs = 1, .isc_nrxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = ice_vendor_info_array, .isc_driver_version = __DECONST(char *, ice_driver_version), .isc_driver = &ice_iflib_driver, /* * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available * for hardware checksum offload * * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the * IP sum field, required by our hardware to calculate valid TSO * checksums. * * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs * even when the interface is down. * * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X * vectors manually instead of relying on iflib code to do this. */ .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP | IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX, .isc_nrxd_min = {ICE_MIN_DESC_COUNT}, .isc_ntxd_min = {ICE_MIN_DESC_COUNT}, .isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT}, .isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT}, .isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT}, .isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT}, }; DRIVER_MODULE(ice, pci, ice_driver, ice_module_event_handler, NULL); MODULE_VERSION(ice, 1); MODULE_DEPEND(ice, pci, 1, 1, 1); MODULE_DEPEND(ice, ether, 1, 1, 1); MODULE_DEPEND(ice, iflib, 1, 1, 1); IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array); /* Static driver-wide sysctls */ #include "ice_iflib_sysctls.h" /** * ice_pci_mapping - Map PCI BAR memory * @sc: device private softc * * Map PCI BAR 0 for device operation. */ static int ice_pci_mapping(struct ice_softc *sc) { int rc; /* Map BAR0 */ rc = ice_map_bar(sc->dev, &sc->bar0, 0); if (rc) return rc; return 0; } /** * ice_free_pci_mapping - Release PCI BAR memory * @sc: device private softc * * Release PCI BARs which were previously mapped by ice_pci_mapping(). */ static void ice_free_pci_mapping(struct ice_softc *sc) { /* Free BAR0 */ ice_free_bar(sc->dev, &sc->bar0); } /* * Device methods */ /** * ice_register - register device method callback * @dev: the device being registered * * Returns a pointer to the shared context structure, which is used by iflib. */ static void * ice_register(device_t dev __unused) { return &ice_sctx; } /* ice_register */ /** * ice_setup_scctx - Setup the iflib softc context structure * @sc: the device private structure * * Setup the parameters in if_softc_ctx_t structure used by the iflib stack * when loading. */ static void ice_setup_scctx(struct ice_softc *sc) { if_softc_ctx_t scctx = sc->scctx; struct ice_hw *hw = &sc->hw; bool safe_mode, recovery_mode; safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE); recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE); /* * If the driver loads in Safe mode or Recovery mode, limit iflib to * a single queue pair. */ if (safe_mode || recovery_mode) { scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1; scctx->isc_ntxqsets_max = 1; scctx->isc_nrxqsets_max = 1; } else { /* * iflib initially sets the isc_ntxqsets and isc_nrxqsets to * the values of the override sysctls. Cache these initial * values so that the driver can be aware of what the iflib * sysctl value is when setting up MSI-X vectors. */ sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets; sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets; if (scctx->isc_ntxqsets == 0) scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size; if (scctx->isc_nrxqsets == 0) scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size; scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq; scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq; /* * Sanity check that the iflib sysctl values are within the * maximum supported range. */ if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max) sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max; if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max) sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max; } scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(struct ice_tx_desc), DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN); scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS; scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS; scctx->isc_tx_tso_size_max = ICE_TSO_SIZE; scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE; scctx->isc_msix_bar = PCIR_BAR(ICE_MSIX_BAR); scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size; /* * If the driver loads in recovery mode, disable Tx/Rx functionality */ if (recovery_mode) scctx->isc_txrx = &ice_recovery_txrx; else scctx->isc_txrx = &ice_txrx; /* * If the driver loads in Safe mode or Recovery mode, disable * advanced features including hardware offloads. */ if (safe_mode || recovery_mode) { scctx->isc_capenable = ICE_SAFE_CAPS; scctx->isc_tx_csum_flags = 0; } else { scctx->isc_capenable = ICE_FULL_CAPS; scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD; } scctx->isc_capabilities = scctx->isc_capenable; } /* ice_setup_scctx */ /** * ice_if_attach_pre - Early device attach logic * @ctx: the iflib context structure * * Called by iflib during the attach process. Earliest main driver entry * point which performs necessary hardware and driver initialization. Called * before the Tx and Rx queues are allocated. */ static int ice_if_attach_pre(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); enum ice_fw_modes fw_mode; enum ice_status status; if_softc_ctx_t scctx; struct ice_hw *hw; device_t dev; int err; device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n"); ice_set_state(&sc->state, ICE_STATE_ATTACHING); sc->ctx = ctx; sc->media = iflib_get_media(ctx); sc->sctx = iflib_get_sctx(ctx); sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx); dev = sc->dev = iflib_get_dev(ctx); scctx = sc->scctx = iflib_get_softc_ctx(ctx); hw = &sc->hw; hw->back = sc; snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name), "%s:admin", device_get_nameunit(dev)); mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF); callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0); ASSERT_CTX_LOCKED(sc); if (ice_pci_mapping(sc)) { err = (ENXIO); goto destroy_admin_timer; } /* Save off the PCI information */ ice_save_pci_info(hw, dev); /* create tunables as early as possible */ ice_add_device_tunables(sc); /* Setup ControlQ lengths */ ice_set_ctrlq_len(hw); reinit_hw: fw_mode = ice_get_fw_mode(hw); if (fw_mode == ICE_FW_MODE_REC) { device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); err = ice_attach_pre_recovery_mode(sc); if (err) goto free_pci_mapping; return (0); } /* Initialize the hw data structure */ status = ice_init_hw(hw); if (status) { if (status == ICE_ERR_FW_API_VER) { /* Enter recovery mode, so that the driver remains * loaded. This way, if the system administrator * cannot update the driver, they may still attempt to * downgrade the NVM. */ err = ice_attach_pre_recovery_mode(sc); if (err) goto free_pci_mapping; return (0); } else { err = EIO; device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } goto free_pci_mapping; } ice_init_device_features(sc); /* Notify firmware of the device driver version */ err = ice_send_version(sc); if (err) goto deinit_hw; /* * Success indicates a change was made that requires a reinitialization * of the hardware */ err = ice_load_pkg_file(sc); if (err == ICE_SUCCESS) { ice_deinit_hw(hw); goto reinit_hw; } err = ice_init_link_events(sc); if (err) { device_printf(dev, "ice_init_link_events failed: %s\n", ice_err_str(err)); goto deinit_hw; } /* Initialize VLAN mode in FW; if dual VLAN mode is supported by the package * and firmware, this will force them to use single VLAN mode. */ status = ice_set_vlan_mode(hw); if (status) { err = EIO; device_printf(dev, "Unable to initialize VLAN mode, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); goto deinit_hw; } ice_print_nvm_version(sc); /* Setup the MAC address */ iflib_set_mac(ctx, hw->port_info->mac.lan_addr); /* Setup the iflib softc context structure */ ice_setup_scctx(sc); /* Initialize the Tx queue manager */ err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq); if (err) { device_printf(dev, "Unable to initialize Tx queue manager: %s\n", ice_err_str(err)); goto deinit_hw; } /* Initialize the Rx queue manager */ err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq); if (err) { device_printf(dev, "Unable to initialize Rx queue manager: %s\n", ice_err_str(err)); goto free_tx_qmgr; } /* Initialize the interrupt resource manager */ err = ice_alloc_intr_tracking(sc); if (err) /* Errors are already printed */ goto free_rx_qmgr; /* Determine maximum number of VSIs we'll prepare for */ sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE, hw->func_caps.guar_num_vsi); if (!sc->num_available_vsi) { err = EIO; device_printf(dev, "No VSIs allocated to host\n"); goto free_intr_tracking; } /* Allocate storage for the VSI pointers */ sc->all_vsi = (struct ice_vsi **) malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi, M_ICE, M_WAITOK | M_ZERO); if (!sc->all_vsi) { err = ENOMEM; device_printf(dev, "Unable to allocate VSI array\n"); goto free_intr_tracking; } /* * Prepare the statically allocated primary PF VSI in the softc * structure. Other VSIs will be dynamically allocated as needed. */ ice_setup_pf_vsi(sc); err = ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max, scctx->isc_nrxqsets_max); if (err) { device_printf(dev, "Unable to allocate VSI Queue maps\n"); goto free_main_vsi; } /* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */ err = ice_allocate_msix(sc); if (err) goto free_main_vsi; return 0; free_main_vsi: /* ice_release_vsi will free the queue maps if they were allocated */ ice_release_vsi(&sc->pf_vsi); free(sc->all_vsi, M_ICE); sc->all_vsi = NULL; free_intr_tracking: ice_free_intr_tracking(sc); free_rx_qmgr: ice_resmgr_destroy(&sc->rx_qmgr); free_tx_qmgr: ice_resmgr_destroy(&sc->tx_qmgr); deinit_hw: ice_deinit_hw(hw); free_pci_mapping: ice_free_pci_mapping(sc); destroy_admin_timer: mtx_lock(&sc->admin_mtx); callout_stop(&sc->admin_timer); mtx_unlock(&sc->admin_mtx); mtx_destroy(&sc->admin_mtx); return err; } /* ice_if_attach_pre */ /** * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery * @sc: the device private softc * * Loads the device driver in limited Firmware Recovery mode, intended to * allow users to update the firmware to attempt to recover the device. * * @remark We may enter recovery mode in case either (a) the firmware is * detected to be in an invalid state and must be re-programmed, or (b) the * driver detects that the loaded firmware has a non-compatible API version * that the driver cannot operate with. */ static int ice_attach_pre_recovery_mode(struct ice_softc *sc) { ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE); /* Setup the iflib softc context */ ice_setup_scctx(sc); /* Setup the PF VSI back pointer */ sc->pf_vsi.sc = sc; /* * We still need to allocate MSI-X vectors since we need one vector to * run the administrative admin interrupt */ return ice_allocate_msix(sc); } /** * ice_update_link_status - notify OS of link state change * @sc: device private softc structure * @update_media: true if we should update media even if link didn't change * * Called to notify iflib core of link status changes. Should be called once * during attach_post, and whenever link status changes during runtime. * * This call only updates the currently supported media types if the link * status changed, or if update_media is set to true. */ static void ice_update_link_status(struct ice_softc *sc, bool update_media) { struct ice_hw *hw = &sc->hw; enum ice_status status; /* Never report link up when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; /* Report link status to iflib only once each time it changes */ if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) { if (sc->link_up) { /* link is up */ uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info); ice_set_default_local_lldp_mib(sc); iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate); ice_rdma_link_change(sc, LINK_STATE_UP, baudrate); ice_link_up_msg(sc); } else { /* link is down */ iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0); ice_rdma_link_change(sc, LINK_STATE_DOWN, 0); } update_media = true; } /* Update the supported media types */ if (update_media) { status = ice_add_media_types(sc, sc->media); if (status) device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } } /** * ice_if_attach_post - Late device attach logic * @ctx: the iflib context structure * * Called by iflib to finish up attaching the device. Performs any attach * logic which must wait until after the Tx and Rx queues have been * allocated. */ static int ice_if_attach_post(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); int err; ASSERT_CTX_LOCKED(sc); /* We don't yet support loading if MSI-X is not supported */ if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) { device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n"); return (ENOTSUP); } /* The ifnet structure hasn't yet been initialized when the attach_pre * handler is called, so wait until attach_post to setup the * isc_max_frame_size. */ sc->ifp = ifp; sc->scctx->isc_max_frame_size = if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; /* * If we are in recovery mode, only perform a limited subset of * initialization to support NVM recovery. */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) { ice_attach_post_recovery_mode(sc); return (0); } sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size; err = ice_initialize_vsi(&sc->pf_vsi); if (err) { device_printf(sc->dev, "Unable to initialize Main VSI: %s\n", ice_err_str(err)); return err; } /* Enable FW health event reporting */ ice_init_health_events(sc); /* Configure the main PF VSI for RSS */ err = ice_config_rss(&sc->pf_vsi); if (err) { device_printf(sc->dev, "Unable to configure RSS for the main VSI, err %s\n", ice_err_str(err)); return err; } /* Configure switch to drop transmitted LLDP and PAUSE frames */ err = ice_cfg_pf_ethertype_filters(sc); if (err) return err; ice_get_and_print_bus_info(sc); ice_set_link_management_mode(sc); ice_init_saved_phy_cfg(sc); ice_cfg_pba_num(sc); ice_add_device_sysctls(sc); /* Get DCBX/LLDP state and start DCBX agent */ ice_init_dcb_setup(sc); /* Setup link configuration parameters */ ice_init_link_configuration(sc); ice_update_link_status(sc, true); /* Configure interrupt causes for the administrative interrupt */ ice_configure_misc_interrupts(sc); /* Enable ITR 0 right away, so that we can handle admin interrupts */ ice_enable_intr(&sc->hw, sc->irqvs[0].me); err = ice_rdma_pf_attach(sc); if (err) return (err); /* Start the admin timer */ mtx_lock(&sc->admin_mtx); callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc); mtx_unlock(&sc->admin_mtx); ice_clear_state(&sc->state, ICE_STATE_ATTACHING); return 0; } /* ice_if_attach_post */ /** * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery * @sc: the device private softc * * Performs minimal work to prepare the driver to recover an NVM in case the * firmware is in recovery mode. */ static void ice_attach_post_recovery_mode(struct ice_softc *sc) { /* Configure interrupt causes for the administrative interrupt */ ice_configure_misc_interrupts(sc); /* Enable ITR 0 right away, so that we can handle admin interrupts */ ice_enable_intr(&sc->hw, sc->irqvs[0].me); /* Start the admin timer */ mtx_lock(&sc->admin_mtx); callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc); mtx_unlock(&sc->admin_mtx); ice_clear_state(&sc->state, ICE_STATE_ATTACHING); } /** * ice_free_irqvs - Free IRQ vector memory * @sc: the device private softc structure * * Free IRQ vector memory allocated during ice_if_msix_intr_assign. */ static void ice_free_irqvs(struct ice_softc *sc) { struct ice_vsi *vsi = &sc->pf_vsi; if_ctx_t ctx = sc->ctx; int i; /* If the irqvs array is NULL, then there are no vectors to free */ if (sc->irqvs == NULL) return; /* Free the IRQ vectors */ for (i = 0; i < sc->num_irq_vectors; i++) iflib_irq_free(ctx, &sc->irqvs[i].irq); /* Clear the irqv pointers */ for (i = 0; i < vsi->num_rx_queues; i++) vsi->rx_queues[i].irqv = NULL; for (i = 0; i < vsi->num_tx_queues; i++) vsi->tx_queues[i].irqv = NULL; /* Release the vector array memory */ free(sc->irqvs, M_ICE); sc->irqvs = NULL; sc->num_irq_vectors = 0; } /** * ice_if_detach - Device driver detach logic * @ctx: iflib context structure * * Perform device shutdown logic to detach the device driver. * * Note that there is no guarantee of the ordering of ice_if_queues_free() and * ice_if_detach(). It is possible for the functions to be called in either * order, and they must not assume to have a strict ordering. */ static int ice_if_detach(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; int i; ASSERT_CTX_LOCKED(sc); /* Indicate that we're detaching */ ice_set_state(&sc->state, ICE_STATE_DETACHING); /* Stop the admin timer */ mtx_lock(&sc->admin_mtx); callout_stop(&sc->admin_timer); mtx_unlock(&sc->admin_mtx); mtx_destroy(&sc->admin_mtx); ice_rdma_pf_detach(sc); /* Free allocated media types */ ifmedia_removeall(sc->media); /* Free the Tx and Rx sysctl contexts, and assign NULL to the node * pointers. Note, the calls here and those in ice_if_queues_free() * are *BOTH* necessary, as we cannot guarantee which path will be * run first */ ice_vsi_del_txqs_ctx(vsi); ice_vsi_del_rxqs_ctx(vsi); /* Release MSI-X resources */ ice_free_irqvs(sc); for (i = 0; i < sc->num_available_vsi; i++) { if (sc->all_vsi[i]) ice_release_vsi(sc->all_vsi[i]); } if (sc->all_vsi) { free(sc->all_vsi, M_ICE); sc->all_vsi = NULL; } /* Release MSI-X memory */ pci_release_msi(sc->dev); if (sc->msix_table != NULL) { bus_release_resource(sc->dev, SYS_RES_MEMORY, rman_get_rid(sc->msix_table), sc->msix_table); sc->msix_table = NULL; } ice_free_intr_tracking(sc); /* Destroy the queue managers */ ice_resmgr_destroy(&sc->tx_qmgr); ice_resmgr_destroy(&sc->rx_qmgr); if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) ice_deinit_hw(&sc->hw); ice_free_pci_mapping(sc); return 0; } /* ice_if_detach */ /** * ice_if_tx_queues_alloc - Allocate Tx queue memory * @ctx: iflib context structure * @vaddrs: virtual addresses for the queue memory * @paddrs: physical addresses for the queue memory * @ntxqs: the number of Tx queues per set (should always be 1) * @ntxqsets: the number of Tx queue sets to allocate * * Called by iflib to allocate Tx queues for the device. Allocates driver * memory to track each queue, the status arrays used for descriptor * status reporting, and Tx queue sysctls. */ static int ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int __invariant_only ntxqs, int ntxqsets) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_tx_queue *txq; int err, i, j; MPASS(ntxqs == 1); MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT); ASSERT_CTX_LOCKED(sc); /* Do not bother allocating queues if we're in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (0); /* Allocate queue structure memory */ if (!(vsi->tx_queues = (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) { device_printf(sc->dev, "Unable to allocate Tx queue memory\n"); return (ENOMEM); } /* Allocate report status arrays */ for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) { if (!(txq->tx_rsq = (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_NOWAIT))) { device_printf(sc->dev, "Unable to allocate tx_rsq memory\n"); err = ENOMEM; goto free_tx_queues; } /* Initialize report status array */ for (j = 0; j < sc->scctx->isc_ntxd[0]; j++) txq->tx_rsq[j] = QIDX_INVALID; } /* Assign queues from PF space to the main VSI */ err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets); if (err) { device_printf(sc->dev, "Unable to assign PF queues: %s\n", ice_err_str(err)); goto free_tx_queues; } vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS; /* Add Tx queue sysctls context */ ice_vsi_add_txqs_ctx(vsi); for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) { /* q_handle == me when only one TC */ txq->me = txq->q_handle = i; txq->vsi = vsi; /* store the queue size for easier access */ txq->desc_count = sc->scctx->isc_ntxd[0]; /* get the virtual and physical address of the hardware queues */ txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]); txq->tx_base = (struct ice_tx_desc *)vaddrs[i]; txq->tx_paddr = paddrs[i]; ice_add_txq_sysctls(txq); } vsi->num_tx_queues = ntxqsets; return (0); free_tx_queues: for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) { if (txq->tx_rsq != NULL) { free(txq->tx_rsq, M_ICE); txq->tx_rsq = NULL; } } free(vsi->tx_queues, M_ICE); vsi->tx_queues = NULL; return err; } /** * ice_if_rx_queues_alloc - Allocate Rx queue memory * @ctx: iflib context structure * @vaddrs: virtual addresses for the queue memory * @paddrs: physical addresses for the queue memory * @nrxqs: number of Rx queues per set (should always be 1) * @nrxqsets: number of Rx queue sets to allocate * * Called by iflib to allocate Rx queues for the device. Allocates driver * memory to track each queue, as well as sets up the Rx queue sysctls. */ static int ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int __invariant_only nrxqs, int nrxqsets) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_rx_queue *rxq; int err, i; MPASS(nrxqs == 1); MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT); ASSERT_CTX_LOCKED(sc); /* Do not bother allocating queues if we're in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (0); /* Allocate queue structure memory */ if (!(vsi->rx_queues = (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) { device_printf(sc->dev, "Unable to allocate Rx queue memory\n"); return (ENOMEM); } /* Assign queues from PF space to the main VSI */ err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets); if (err) { device_printf(sc->dev, "Unable to assign PF queues: %s\n", ice_err_str(err)); goto free_rx_queues; } vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS; /* Add Rx queue sysctls context */ ice_vsi_add_rxqs_ctx(vsi); for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) { rxq->me = i; rxq->vsi = vsi; /* store the queue size for easier access */ rxq->desc_count = sc->scctx->isc_nrxd[0]; /* get the virtual and physical address of the hardware queues */ rxq->tail = QRX_TAIL(vsi->rx_qmap[i]); rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i]; rxq->rx_paddr = paddrs[i]; ice_add_rxq_sysctls(rxq); } vsi->num_rx_queues = nrxqsets; return (0); free_rx_queues: free(vsi->rx_queues, M_ICE); vsi->rx_queues = NULL; return err; } /** * ice_if_queues_free - Free queue memory * @ctx: the iflib context structure * * Free queue memory allocated by ice_if_tx_queues_alloc() and * ice_if_rx_queues_alloc(). * * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be * called in the same order. It's possible for ice_if_queues_free() to be * called prior to ice_if_detach(), and vice versa. * * For this reason, the main VSI is a static member of the ice_softc, which is * not free'd until after iflib finishes calling both of these functions. * * Thus, care must be taken in how we manage the memory being freed by this * function, and in what tasks it can and must perform. */ static void ice_if_queues_free(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_tx_queue *txq; int i; /* Free the Tx and Rx sysctl contexts, and assign NULL to the node * pointers. Note, the calls here and those in ice_if_detach() * are *BOTH* necessary, as we cannot guarantee which path will be * run first */ ice_vsi_del_txqs_ctx(vsi); ice_vsi_del_rxqs_ctx(vsi); /* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */ ice_free_irqvs(sc); if (vsi->tx_queues != NULL) { /* free the tx_rsq arrays */ for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) { if (txq->tx_rsq != NULL) { free(txq->tx_rsq, M_ICE); txq->tx_rsq = NULL; } } free(vsi->tx_queues, M_ICE); vsi->tx_queues = NULL; vsi->num_tx_queues = 0; } if (vsi->rx_queues != NULL) { free(vsi->rx_queues, M_ICE); vsi->rx_queues = NULL; vsi->num_rx_queues = 0; } } /** * ice_msix_que - Fast interrupt handler for MSI-X receive queues * @arg: The Rx queue memory * * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when * an MSI-X interrupt for a given queue is triggered. Currently this just asks * iflib to schedule the main Rx thread. */ static int ice_msix_que(void *arg) { struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg; /* TODO: dynamic ITR algorithm?? */ return (FILTER_SCHEDULE_THREAD); } /** * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt * @arg: pointer to device softc memory * * Called by iflib when an administrative interrupt occurs. Should perform any * fast logic for handling the interrupt cause, and then indicate whether the * admin task needs to be queued. */ static int ice_msix_admin(void *arg) { struct ice_softc *sc = (struct ice_softc *)arg; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; u32 oicr; /* There is no safe way to modify the enabled miscellaneous causes of * the OICR vector at runtime, as doing so would be prone to race * conditions. Reading PFINT_OICR will unmask the associated interrupt * causes and allow future interrupts to occur. The admin interrupt * vector will not be re-enabled until after we exit this function, * but any delayed tasks must be resilient against possible "late * arrival" interrupts that occur while we're already handling the * task. This is done by using state bits and serializing these * delayed tasks via the admin status task function. */ oicr = rd32(hw, PFINT_OICR); /* Processing multiple controlq interrupts on a single vector does not * provide an indication of which controlq triggered the interrupt. * We might try reading the INTEVENT bit of the respective PFINT_*_CTL * registers. However, the INTEVENT bit is not guaranteed to be set as * it gets automatically cleared when the hardware acknowledges the * interrupt. * * This means we don't really have a good indication of whether or * which controlq triggered this interrupt. We'll just notify the * admin task that it should check all the controlqs. */ ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING); if (oicr & PFINT_OICR_VFLR_M) { ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING); } if (oicr & PFINT_OICR_MAL_DETECT_M) { ice_set_state(&sc->state, ICE_STATE_MDD_PENDING); } if (oicr & PFINT_OICR_GRST_M) { u32 reset; reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >> GLGEN_RSTAT_RESET_TYPE_S; if (reset == ICE_RESET_CORER) sc->soft_stats.corer_count++; else if (reset == ICE_RESET_GLOBR) sc->soft_stats.globr_count++; else sc->soft_stats.empr_count++; /* There are a couple of bits at play for handling resets. * First, the ICE_STATE_RESET_OICR_RECV bit is used to * indicate that the driver has received an OICR with a reset * bit active, indicating that a CORER/GLOBR/EMPR is about to * happen. Second, we set hw->reset_ongoing to indicate that * the hardware is in reset. We will set this back to false as * soon as the driver has determined that the hardware is out * of reset. * * If the driver wishes to trigger a request, it can set one of * the ICE_STATE_RESET_*_REQ bits, which will trigger the * correct type of reset. */ if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV)) hw->reset_ongoing = true; } if (oicr & PFINT_OICR_ECC_ERR_M) { device_printf(dev, "ECC Error detected!\n"); ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ); } - if (oicr & PFINT_OICR_PE_CRITERR_M) { - device_printf(dev, "Critical Protocol Engine Error detected!\n"); - ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ); + if (oicr & (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M)) { + if (oicr & PFINT_OICR_HMC_ERR_M) + /* Log the HMC errors */ + ice_log_hmc_error(hw, dev); + ice_rdma_notify_pe_intr(sc, oicr); } if (oicr & PFINT_OICR_PCI_EXCEPTION_M) { device_printf(dev, "PCI Exception detected!\n"); ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ); } - if (oicr & PFINT_OICR_HMC_ERR_M) { - /* Log the HMC errors, but don't disable the interrupt cause */ - ice_log_hmc_error(hw, dev); - } - return (FILTER_SCHEDULE_THREAD); } /** * ice_allocate_msix - Allocate MSI-X vectors for the interface * @sc: the device private softc * * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process. * * First, determine a suitable total number of vectors based on the number * of CPUs, RSS buckets, the administrative vector, and other demands such as * RDMA. * * Request the desired amount of vectors, and see how many we obtain. If we * don't obtain as many as desired, reduce the demands by lowering the number * of requested queues or reducing the demand from other features such as * RDMA. * * @remark This function is required because the driver sets the * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors * manually. * * @remark This driver will only use MSI-X vectors. If this is not possible, * neither MSI or legacy interrupts will be tried. * * @post on success this function must set the following scctx parameters: * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr. * * @returns zero on success or an error code on failure. */ static int ice_allocate_msix(struct ice_softc *sc) { bool iflib_override_queue_count = false; if_softc_ctx_t scctx = sc->scctx; device_t dev = sc->dev; cpuset_t cpus; int bar, queues, vectors, requested; int err = 0; int rdma; /* Allocate the MSI-X bar */ bar = scctx->isc_msix_bar; sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE); if (!sc->msix_table) { device_printf(dev, "Unable to map MSI-X table\n"); return (ENOMEM); } /* Check if the iflib queue count sysctls have been set */ if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs) iflib_override_queue_count = true; err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus); if (err) { device_printf(dev, "%s: Unable to fetch the CPU list: %s\n", __func__, ice_err_str(err)); CPU_COPY(&all_cpus, &cpus); } /* Attempt to mimic behavior of iflib_msix_init */ if (iflib_override_queue_count) { /* * If the override sysctls have been set, limit the queues to * the number of logical CPUs. */ queues = mp_ncpus; } else { /* * Otherwise, limit the queue count to the CPUs associated * with the NUMA node the device is associated with. */ queues = CPU_COUNT(&cpus); } /* Clamp to the number of RSS buckets */ queues = imin(queues, rss_getnumbuckets()); /* * Clamp the number of queue pairs to the minimum of the requested Tx * and Rx queues. */ queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets); queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets); if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) { /* * Choose a number of RDMA vectors based on the number of CPUs * up to a maximum */ rdma = min(CPU_COUNT(&cpus), ICE_RDMA_MAX_MSIX); /* Further limit by the user configurable tunable */ rdma = min(rdma, ice_rdma_max_msix); } else { rdma = 0; } /* * Determine the number of vectors to request. Note that we also need * to allocate one vector for administrative tasks. */ requested = rdma + queues + 1; vectors = requested; err = pci_alloc_msix(dev, &vectors); if (err) { device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n", vectors, ice_err_str(err)); goto err_free_msix_table; } /* If we don't receive enough vectors, reduce demands */ if (vectors < requested) { int diff = requested - vectors; device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n", requested, vectors); /* * The OS didn't grant us the requested number of vectors. * Check to see if we can reduce demands by limiting the * number of vectors allocated to certain features. */ if (rdma >= diff) { /* Reduce the number of RDMA vectors we reserve */ rdma -= diff; diff = 0; } else { /* Disable RDMA and reduce the difference */ ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap); diff -= rdma; rdma = 0; } /* * If we still have a difference, we need to reduce the number * of queue pairs. * * However, we still need at least one vector for the admin * interrupt and one queue pair. */ if (queues <= diff) { device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n"); err = (ERANGE); goto err_pci_release_msi; } queues -= diff; } device_printf(dev, "Using %d Tx and Rx queues\n", queues); if (rdma) device_printf(dev, "Reserving %d MSI-X interrupts for iRDMA\n", rdma); device_printf(dev, "Using MSI-X interrupts with %d vectors\n", vectors); scctx->isc_vectors = vectors; scctx->isc_nrxqsets = queues; scctx->isc_ntxqsets = queues; scctx->isc_intr = IFLIB_INTR_MSIX; sc->irdma_vectors = rdma; /* Interrupt allocation tracking isn't required in recovery mode, * since neither RDMA nor VFs are enabled. */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (0); /* Keep track of which interrupt indices are being used for what */ sc->lan_vectors = vectors - rdma; err = ice_resmgr_assign_contiguous(&sc->imgr, sc->pf_imap, sc->lan_vectors); if (err) { device_printf(dev, "Unable to assign PF interrupt mapping: %s\n", ice_err_str(err)); goto err_pci_release_msi; } err = ice_resmgr_assign_contiguous(&sc->imgr, sc->rdma_imap, rdma); if (err) { device_printf(dev, "Unable to assign PF RDMA interrupt mapping: %s\n", ice_err_str(err)); ice_resmgr_release_map(&sc->imgr, sc->pf_imap, sc->lan_vectors); goto err_pci_release_msi; } return (0); err_pci_release_msi: pci_release_msi(dev); err_free_msix_table: if (sc->msix_table != NULL) { bus_release_resource(sc->dev, SYS_RES_MEMORY, rman_get_rid(sc->msix_table), sc->msix_table); sc->msix_table = NULL; } return (err); } /** * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues * @ctx: the iflib context structure * @msix: the number of vectors we were assigned * * Called by iflib to assign MSI-X vectors to queues. Currently requires that * we get at least the same number of vectors as we have queues, and that we * always have the same number of Tx and Rx queues. * * Tx queues use a softirq instead of using their own hardware interrupt. */ static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; int err, i, vector; ASSERT_CTX_LOCKED(sc); if (vsi->num_rx_queues != vsi->num_tx_queues) { device_printf(sc->dev, "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n", vsi->num_tx_queues, vsi->num_rx_queues); return (EOPNOTSUPP); } if (msix < (vsi->num_rx_queues + 1)) { device_printf(sc->dev, "Not enough MSI-X vectors to assign one vector to each queue pair\n"); return (EOPNOTSUPP); } /* Save the number of vectors for future use */ sc->num_irq_vectors = vsi->num_rx_queues + 1; /* Allocate space to store the IRQ vector data */ if (!(sc->irqvs = (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors), M_ICE, M_NOWAIT))) { device_printf(sc->dev, "Unable to allocate irqv memory\n"); return (ENOMEM); } /* Administrative interrupt events will use vector 0 */ err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN, ice_msix_admin, sc, 0, "admin"); if (err) { device_printf(sc->dev, "Failed to register Admin queue handler: %s\n", ice_err_str(err)); goto free_irqvs; } sc->irqvs[0].me = 0; /* Do not allocate queue interrupts when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (0); for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) { struct ice_rx_queue *rxq = &vsi->rx_queues[i]; struct ice_tx_queue *txq = &vsi->tx_queues[i]; int rid = vector + 1; char irq_name[16]; snprintf(irq_name, sizeof(irq_name), "rxq%d", i); err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid, IFLIB_INTR_RXTX, ice_msix_que, rxq, rxq->me, irq_name); if (err) { device_printf(sc->dev, "Failed to allocate q int %d err: %s\n", i, ice_err_str(err)); vector--; i--; goto fail; } sc->irqvs[vector].me = vector; rxq->irqv = &sc->irqvs[vector]; bzero(irq_name, sizeof(irq_name)); snprintf(irq_name, sizeof(irq_name), "txq%d", i); iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq, IFLIB_INTR_TX, txq, txq->me, irq_name); txq->irqv = &sc->irqvs[vector]; } return (0); fail: for (; i >= 0; i--, vector--) iflib_irq_free(ctx, &sc->irqvs[vector].irq); iflib_irq_free(ctx, &sc->irqvs[0].irq); free_irqvs: free(sc->irqvs, M_ICE); sc->irqvs = NULL; return err; } /** * ice_if_mtu_set - Set the device MTU * @ctx: iflib context structure * @mtu: the MTU requested * * Called by iflib to configure the device's Maximum Transmission Unit (MTU). * * @pre assumes the caller holds the iflib CTX lock */ static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); ASSERT_CTX_LOCKED(sc); /* Do not support configuration when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU) return (EINVAL); sc->scctx->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size; return (0); } /** * ice_if_intr_enable - Enable device interrupts * @ctx: iflib context structure * * Called by iflib to request enabling device interrupts. */ static void ice_if_intr_enable(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; ASSERT_CTX_LOCKED(sc); /* Enable ITR 0 */ ice_enable_intr(hw, sc->irqvs[0].me); /* Do not enable queue interrupts in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; /* Enable all queue interrupts */ for (int i = 0; i < vsi->num_rx_queues; i++) ice_enable_intr(hw, vsi->rx_queues[i].irqv->me); } /** * ice_if_intr_disable - Disable device interrupts * @ctx: iflib context structure * * Called by iflib to request disabling device interrupts. */ static void ice_if_intr_disable(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_hw *hw = &sc->hw; unsigned int i; ASSERT_CTX_LOCKED(sc); /* IFDI_INTR_DISABLE may be called prior to interrupts actually being * assigned to queues. Instead of assuming that the interrupt * assignment in the rx_queues structure is valid, just disable all * possible interrupts * * Note that we choose not to disable ITR 0 because this handles the * AdminQ interrupts, and we want to keep processing these even when * the interface is offline. */ for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++) ice_disable_intr(hw, i); } /** * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt * @ctx: iflib context structure * @rxqid: the Rx queue to enable * * Enable a specific Rx queue interrupt. * * This function is not protected by the iflib CTX lock. */ static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; /* Do not enable queue interrupts in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me); return (0); } /** * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt * @ctx: iflib context structure * @txqid: the Tx queue to enable * * Enable a specific Tx queue interrupt. * * This function is not protected by the iflib CTX lock. */ static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; /* Do not enable queue interrupts in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me); return (0); } /** * ice_if_promisc_set - Set device promiscuous mode * @ctx: iflib context structure * @flags: promiscuous flags to configure * * Called by iflib to configure device promiscuous mode. * * @remark Calls to this function will always overwrite the previous setting */ static int ice_if_promisc_set(if_ctx_t ctx, int flags) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; bool promisc_enable = flags & IFF_PROMISC; bool multi_enable = flags & IFF_ALLMULTI; /* Do not support configuration when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); if (multi_enable) return (EOPNOTSUPP); if (promisc_enable) { status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx, ICE_VSI_PROMISC_MASK, 0); if (status && status != ICE_ERR_ALREADY_EXISTS) { device_printf(dev, "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } } else { status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx, ICE_VSI_PROMISC_MASK, 0); if (status) { device_printf(dev, "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } } return (0); } /** * ice_if_media_change - Change device media * @ctx: device ctx structure * * Called by iflib when a media change is requested. This operation is not * supported by the hardware, so we just return an error code. */ static int ice_if_media_change(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); device_printf(sc->dev, "Media change is not supported.\n"); return (ENODEV); } /** * ice_if_media_status - Report current device media * @ctx: iflib context structure * @ifmr: ifmedia request structure to update * * Updates the provided ifmr with current device media status, including link * status and media type. */ static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_link_status *li = &sc->hw.port_info->phy.link_info; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; /* Never report link up or media types when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; if (!sc->link_up) return; ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_FDX; if (li->phy_type_low) ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low); else if (li->phy_type_high) ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high); else ifmr->ifm_active |= IFM_UNKNOWN; /* Report flow control status as well */ if (li->an_info & ICE_AQ_LINK_PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; if (li->an_info & ICE_AQ_LINK_PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; } /** * ice_init_tx_tracking - Initialize Tx queue software tracking values * @vsi: the VSI to initialize * * Initialize Tx queue software tracking values, including the Report Status * queue, and related software tracking values. */ static void ice_init_tx_tracking(struct ice_vsi *vsi) { struct ice_tx_queue *txq; size_t j; int i; for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) { txq->tx_rs_cidx = txq->tx_rs_pidx = 0; /* Initialize the last processed descriptor to be the end of * the ring, rather than the start, so that we avoid an * off-by-one error in ice_ift_txd_credits_update for the * first packet. */ txq->tx_cidx_processed = txq->desc_count - 1; for (j = 0; j < txq->desc_count; j++) txq->tx_rsq[j] = QIDX_INVALID; } } /** * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues * @sc: the device softc * * Called to update the Rx queue mbuf_sz parameter for configuring the receive * buffer sizes when programming hardware. */ static void ice_update_rx_mbuf_sz(struct ice_softc *sc) { uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx); struct ice_vsi *vsi = &sc->pf_vsi; MPASS(mbuf_sz <= UINT16_MAX); vsi->mbuf_sz = mbuf_sz; } /** * ice_if_init - Initialize the device * @ctx: iflib ctx structure * * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes * device filters and prepares the Tx and Rx engines. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_init(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); device_t dev = sc->dev; int err; ASSERT_CTX_LOCKED(sc); /* * We've seen an issue with 11.3/12.1 where sideband routines are * called after detach is called. This would call routines after * if_stop, causing issues with the teardown process. This has * seemingly been fixed in STABLE snapshots, but it seems like a * good idea to have this guard here regardless. */ if (ice_driver_is_detaching(sc)) return; if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) { device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n"); return; } if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) { device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n"); return; } ice_update_rx_mbuf_sz(sc); /* Update the MAC address... User might use a LAA */ err = ice_update_laa_mac(sc); if (err) { device_printf(dev, "LAA address change failed, err %s\n", ice_err_str(err)); return; } /* Initialize software Tx tracking values */ ice_init_tx_tracking(&sc->pf_vsi); err = ice_cfg_vsi_for_tx(&sc->pf_vsi); if (err) { device_printf(dev, "Unable to configure the main VSI for Tx: %s\n", ice_err_str(err)); return; } err = ice_cfg_vsi_for_rx(&sc->pf_vsi); if (err) { device_printf(dev, "Unable to configure the main VSI for Rx: %s\n", ice_err_str(err)); goto err_cleanup_tx; } err = ice_control_all_rx_queues(&sc->pf_vsi, true); if (err) { device_printf(dev, "Unable to enable Rx rings for transmit: %s\n", ice_err_str(err)); goto err_cleanup_tx; } err = ice_cfg_pf_default_mac_filters(sc); if (err) { device_printf(dev, "Unable to configure default MAC filters: %s\n", ice_err_str(err)); goto err_stop_rx; } /* We use software interrupts for Tx, so we only program the hardware * interrupts for Rx. */ ice_configure_all_rxq_interrupts(&sc->pf_vsi); ice_configure_rx_itr(&sc->pf_vsi); /* Configure promiscuous mode */ ice_if_promisc_set(ctx, if_getflags(sc->ifp)); ice_rdma_pf_init(sc); ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED); return; err_stop_rx: ice_control_all_rx_queues(&sc->pf_vsi, false); err_cleanup_tx: ice_vsi_disable_tx(&sc->pf_vsi); } /** * ice_poll_for_media_avail - Re-enable link if media is detected * @sc: device private structure * * Intended to be called from the driver's timer function, this function * sends the Get Link Status AQ command and re-enables HW link if the * command says that media is available. * * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing, * since media removal events are supposed to be sent to the driver through * a link status event. */ static void ice_poll_for_media_avail(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; struct ice_port_info *pi = hw->port_info; if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) { pi->phy.get_link_info = true; ice_get_link_status(pi, &sc->link_up); if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { enum ice_status status; /* Re-enable link and re-apply user link settings */ ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC); /* Update the OS about changes in media capability */ status = ice_add_media_types(sc, sc->media); if (status) device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA); } } } /** * ice_if_timer - called by iflib periodically * @ctx: iflib ctx structure * @qid: the queue this timer was called for * * This callback is triggered by iflib periodically. We use it to update the * hw statistics. * * @remark this function is not protected by the iflib CTX lock. */ static void ice_if_timer(if_ctx_t ctx, uint16_t qid) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx; if (qid != 0) return; /* Do not attempt to update stats when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; /* Update device statistics */ ice_update_pf_stats(sc); /* * For proper watchdog management, the iflib stack needs to know if * we've been paused during the last interval. Check if the * link_xoff_rx stat changed, and set the isc_pause_frames, if so. */ if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx) sc->scctx->isc_pause_frames = 1; /* Update the primary VSI stats */ ice_update_vsi_hw_stats(&sc->pf_vsi); } /** * ice_admin_timer - called periodically to trigger the admin task * @arg: callout(9) argument pointing to the device private softc structure * * Timer function used as part of a callout(9) timer that will periodically * trigger the admin task, even when the interface is down. * * @remark this function is not called by iflib and is not protected by the * iflib CTX lock. * * @remark because this is a callout function, it cannot sleep and should not * attempt taking the iflib CTX lock. */ static void ice_admin_timer(void *arg) { struct ice_softc *sc = (struct ice_softc *)arg; /* * There is a point where callout routines are no longer * cancelable. So there exists a window of time where the * driver enters detach() and tries to cancel the callout, but the * callout routine has passed the cancellation point. The detach() * routine is unaware of this and tries to free resources that the * callout routine needs. So we check for the detach state flag to * at least shrink the window of opportunity. */ if (ice_driver_is_detaching(sc)) return; /* Fire off the admin task */ iflib_admin_intr_deferred(sc->ctx); /* Reschedule the admin timer */ callout_schedule(&sc->admin_timer, hz/2); } /** * ice_transition_recovery_mode - Transition to recovery mode * @sc: the device private softc * * Called when the driver detects that the firmware has entered recovery mode * at run time. */ static void ice_transition_recovery_mode(struct ice_softc *sc) { struct ice_vsi *vsi = &sc->pf_vsi; int i; device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); /* Tell the stack that the link has gone down */ iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0); /* Request that the device be re-initialized */ ice_request_stack_reinit(sc); ice_rdma_pf_detach(sc); ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap); ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en); ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap); ice_vsi_del_txqs_ctx(vsi); ice_vsi_del_rxqs_ctx(vsi); for (i = 0; i < sc->num_available_vsi; i++) { if (sc->all_vsi[i]) ice_release_vsi(sc->all_vsi[i]); } sc->num_available_vsi = 0; if (sc->all_vsi) { free(sc->all_vsi, M_ICE); sc->all_vsi = NULL; } /* Destroy the interrupt manager */ ice_resmgr_destroy(&sc->imgr); /* Destroy the queue managers */ ice_resmgr_destroy(&sc->tx_qmgr); ice_resmgr_destroy(&sc->rx_qmgr); ice_deinit_hw(&sc->hw); } /** * ice_transition_safe_mode - Transition to safe mode * @sc: the device private softc * * Called when the driver attempts to reload the DDP package during a device * reset, and the new download fails. If so, we must transition to safe mode * at run time. * * @remark although safe mode normally allocates only a single queue, we can't * change the number of queues dynamically when using iflib. Due to this, we * do not attempt to reduce the number of queues. */ static void ice_transition_safe_mode(struct ice_softc *sc) { /* Indicate that we are in Safe mode */ ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap); ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en); ice_rdma_pf_detach(sc); ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap); ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en); ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap); ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap); ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en); } /** * ice_if_update_admin_status - update admin status * @ctx: iflib ctx structure * * Called by iflib to update the admin status. For our purposes, this means * check the adminq, and update the link status. It's ultimately triggered by * our admin interrupt, or by the ice_if_timer periodically. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_update_admin_status(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); enum ice_fw_modes fw_mode; bool reschedule = false; u16 pending = 0; ASSERT_CTX_LOCKED(sc); /* Check if the firmware entered recovery mode at run time */ fw_mode = ice_get_fw_mode(&sc->hw); if (fw_mode == ICE_FW_MODE_REC) { if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) { /* If we just entered recovery mode, log a warning to * the system administrator and deinit driver state * that is no longer functional. */ ice_transition_recovery_mode(sc); } } else if (fw_mode == ICE_FW_MODE_ROLLBACK) { if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) { /* Rollback mode isn't fatal, but we don't want to * repeatedly post a message about it. */ ice_print_rollback_msg(&sc->hw); } } /* Handle global reset events */ ice_handle_reset_event(sc); /* Handle PF reset requests */ ice_handle_pf_reset_request(sc); /* Handle MDD events */ ice_handle_mdd_event(sc); if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) || ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) || ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) { /* * If we know the control queues are disabled, skip processing * the control queues entirely. */ ; } else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) { ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending); if (pending > 0) reschedule = true; ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending); if (pending > 0) reschedule = true; } /* Poll for link up */ ice_poll_for_media_avail(sc); /* Check and update link status */ ice_update_link_status(sc, false); /* * If there are still messages to process, we need to reschedule * ourselves. Otherwise, we can just re-enable the interrupt. We'll be * woken up at the next interrupt or timer event. */ if (reschedule) { ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING); iflib_admin_intr_deferred(ctx); } else { ice_enable_intr(&sc->hw, sc->irqvs[0].me); } } /** * ice_prepare_for_reset - Prepare device for an impending reset * @sc: The device private softc * * Prepare the driver for an impending reset, shutting down VSIs, clearing the * scheduler setup, and shutting down controlqs. Uses the * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the * driver for reset or not. */ static void ice_prepare_for_reset(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; /* If we're already prepared, there's nothing to do */ if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) return; log(LOG_INFO, "%s: preparing to reset device logic\n", if_name(sc->ifp)); /* In recovery mode, hardware is not initialized */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; + /* inform the RDMA client */ + ice_rdma_notify_reset(sc); /* stop the RDMA client */ ice_rdma_pf_stop(sc); /* Release the main PF VSI queue mappings */ ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap, sc->pf_vsi.num_tx_queues); ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap, sc->pf_vsi.num_rx_queues); ice_clear_hw_tbls(hw); if (hw->port_info) ice_sched_clear_port(hw->port_info); ice_shutdown_all_ctrlq(hw, false); } /** * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping * @sc: the device softc pointer * * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue * mapping after a reset occurred. */ static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc) { struct ice_vsi *vsi = &sc->pf_vsi; struct ice_tx_queue *txq; struct ice_rx_queue *rxq; int err, i; /* Re-assign Tx queues from PF space to the main VSI */ err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, vsi->num_tx_queues); if (err) { device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n", ice_err_str(err)); return (err); } /* Re-assign Rx queues from PF space to this VSI */ err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, vsi->num_rx_queues); if (err) { device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n", ice_err_str(err)); goto err_release_tx_queues; } vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS; /* Re-assign Tx queue tail pointers */ for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]); /* Re-assign Rx queue tail pointers */ for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++) rxq->tail = QRX_TAIL(vsi->rx_qmap[i]); return (0); err_release_tx_queues: ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap, sc->pf_vsi.num_tx_queues); return (err); } /* determine if the iflib context is active */ #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING)) /** * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode * @sc: The device private softc * * Handle a driver rebuild while in recovery mode. This will only rebuild the * limited functionality supported while in recovery mode. */ static void ice_rebuild_recovery_mode(struct ice_softc *sc) { device_t dev = sc->dev; /* enable PCIe bus master */ pci_enable_busmaster(dev); /* Configure interrupt causes for the administrative interrupt */ ice_configure_misc_interrupts(sc); /* Enable ITR 0 right away, so that we can handle admin interrupts */ ice_enable_intr(&sc->hw, sc->irqvs[0].me); /* Now that the rebuild is finished, we're no longer prepared to reset */ ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET); log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp)); /* In order to completely restore device functionality, the iflib core * needs to be reset. We need to request an iflib reset. Additionally, * because the state of IFC_DO_RESET is cached within task_fn_admin in * the iflib core, we also want re-run the admin task so that iflib * resets immediately instead of waiting for the next interrupt. */ ice_request_stack_reinit(sc); return; } /** * ice_rebuild - Rebuild driver state post reset * @sc: The device private softc * * Restore driver state after a reset occurred. Restart the controlqs, setup * the hardware port, and re-enable the VSIs. */ static void ice_rebuild(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_ddp_state pkg_state; enum ice_status status; int err; sc->rebuild_ticks = ticks; /* If we're rebuilding, then a reset has succeeded. */ ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED); /* * If the firmware is in recovery mode, only restore the limited * functionality supported by recovery mode. */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) { ice_rebuild_recovery_mode(sc); return; } /* enable PCIe bus master */ pci_enable_busmaster(dev); status = ice_init_all_ctrlq(hw); if (status) { device_printf(dev, "failed to re-init controlqs, err %s\n", ice_status_str(status)); goto err_shutdown_ctrlq; } /* Query the allocated resources for Tx scheduler */ status = ice_sched_query_res_alloc(hw); if (status) { device_printf(dev, "Failed to query scheduler resources, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); goto err_shutdown_ctrlq; } /* Re-enable FW logging. Keep going even if this fails */ status = ice_fwlog_set(hw, &hw->fwlog_cfg); if (!status) { /* * We should have the most updated cached copy of the * configuration, regardless of whether we're rebuilding * or not. So we'll simply check to see if logging was * enabled pre-rebuild. */ if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) { status = ice_fwlog_register(hw); if (status) device_printf(dev, "failed to re-register fw logging, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } } else device_printf(dev, "failed to rebuild fw logging configuration, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = ice_send_version(sc); if (err) goto err_shutdown_ctrlq; err = ice_init_link_events(sc); if (err) { device_printf(dev, "ice_init_link_events failed: %s\n", ice_err_str(err)); goto err_shutdown_ctrlq; } status = ice_clear_pf_cfg(hw); if (status) { device_printf(dev, "failed to clear PF configuration, err %s\n", ice_status_str(status)); goto err_shutdown_ctrlq; } ice_clear_pxe_mode(hw); status = ice_get_caps(hw); if (status) { device_printf(dev, "failed to get capabilities, err %s\n", ice_status_str(status)); goto err_shutdown_ctrlq; } status = ice_sched_init_port(hw->port_info); if (status) { device_printf(dev, "failed to initialize port, err %s\n", ice_status_str(status)); goto err_sched_cleanup; } /* If we previously loaded the package, it needs to be reloaded now */ if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) { pkg_state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size); if (!ice_is_init_pkg_successful(pkg_state)) { ice_log_pkg_init(sc, pkg_state); ice_transition_safe_mode(sc); } } ice_reset_pf_stats(sc); err = ice_rebuild_pf_vsi_qmap(sc); if (err) { device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n", ice_err_str(err)); goto err_sched_cleanup; } err = ice_initialize_vsi(&sc->pf_vsi); if (err) { device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n", ice_err_str(err)); goto err_release_queue_allocations; } /* Replay all VSI configuration */ err = ice_replay_all_vsi_cfg(sc); if (err) goto err_deinit_pf_vsi; /* Re-enable FW health event reporting */ ice_init_health_events(sc); /* Reconfigure the main PF VSI for RSS */ err = ice_config_rss(&sc->pf_vsi); if (err) { device_printf(sc->dev, "Unable to reconfigure RSS for the main VSI, err %s\n", ice_err_str(err)); goto err_deinit_pf_vsi; } /* Refresh link status */ ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED); sc->hw.port_info->phy.get_link_info = true; ice_get_link_status(sc->hw.port_info, &sc->link_up); ice_update_link_status(sc, true); /* RDMA interface will be restarted by the stack re-init */ /* Configure interrupt causes for the administrative interrupt */ ice_configure_misc_interrupts(sc); /* Enable ITR 0 right away, so that we can handle admin interrupts */ ice_enable_intr(&sc->hw, sc->irqvs[0].me); /* Now that the rebuild is finished, we're no longer prepared to reset */ ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET); log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp)); /* In order to completely restore device functionality, the iflib core * needs to be reset. We need to request an iflib reset. Additionally, * because the state of IFC_DO_RESET is cached within task_fn_admin in * the iflib core, we also want re-run the admin task so that iflib * resets immediately instead of waiting for the next interrupt. */ ice_request_stack_reinit(sc); return; err_deinit_pf_vsi: ice_deinit_vsi(&sc->pf_vsi); err_release_queue_allocations: ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap, sc->pf_vsi.num_tx_queues); ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap, sc->pf_vsi.num_rx_queues); err_sched_cleanup: ice_sched_cleanup_all(hw); err_shutdown_ctrlq: ice_shutdown_all_ctrlq(hw, false); ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET); ice_set_state(&sc->state, ICE_STATE_RESET_FAILED); device_printf(dev, "Driver rebuild failed, please reload the device driver\n"); } /** * ice_handle_reset_event - Handle reset events triggered by OICR * @sc: The device private softc * * Handle reset events triggered by an OICR notification. This includes CORER, * GLOBR, and EMPR resets triggered by software on this or any other PF or by * firmware. * * @pre assumes the iflib context lock is held, and will unlock it while * waiting for the hardware to finish reset. */ static void ice_handle_reset_event(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; enum ice_status status; device_t dev = sc->dev; /* When a CORER, GLOBR, or EMPR is about to happen, the hardware will * trigger an OICR interrupt. Our OICR handler will determine when * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as * appropriate. */ if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV)) return; ice_prepare_for_reset(sc); /* * Release the iflib context lock and wait for the device to finish * resetting. */ IFLIB_CTX_UNLOCK(sc); status = ice_check_reset(hw); IFLIB_CTX_LOCK(sc); if (status) { device_printf(dev, "Device never came out of reset, err %s\n", ice_status_str(status)); ice_set_state(&sc->state, ICE_STATE_RESET_FAILED); return; } /* We're done with the reset, so we can rebuild driver state */ sc->hw.reset_ongoing = false; ice_rebuild(sc); /* In the unlikely event that a PF reset request occurs at the same * time as a global reset, clear the request now. This avoids * resetting a second time right after we reset due to a global event. */ if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ)) device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n"); } /** * ice_handle_pf_reset_request - Initiate PF reset requested by software * @sc: The device private softc * * Initiate a PF reset requested by software. We handle this in the admin task * so that only one thread actually handles driver preparation and cleanup, * rather than having multiple threads possibly attempt to run this code * simultaneously. * * @pre assumes the iflib context lock is held and will unlock it while * waiting for the PF reset to complete. */ static void ice_handle_pf_reset_request(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; enum ice_status status; /* Check for PF reset requests */ if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ)) return; /* Make sure we're prepared for reset */ ice_prepare_for_reset(sc); /* * Release the iflib context lock and wait for the device to finish * resetting. */ IFLIB_CTX_UNLOCK(sc); status = ice_reset(hw, ICE_RESET_PFR); IFLIB_CTX_LOCK(sc); if (status) { device_printf(sc->dev, "device PF reset failed, err %s\n", ice_status_str(status)); ice_set_state(&sc->state, ICE_STATE_RESET_FAILED); return; } sc->soft_stats.pfr_count++; ice_rebuild(sc); } /** * ice_init_device_features - Init device driver features * @sc: driver softc structure * * @pre assumes that the function capabilities bits have been set up by * ice_init_hw(). */ static void ice_init_device_features(struct ice_softc *sc) { /* Set capabilities that all devices support */ ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap); ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap); ice_set_bit(ICE_FEATURE_RDMA, sc->feat_cap); ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap); ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap); ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap); ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap); ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap); ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_cap); ice_set_bit(ICE_FEATURE_DCB, sc->feat_cap); ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap); /* Disable features due to hardware limitations... */ if (!sc->hw.func_caps.common_cap.rss_table_size) ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap); if (!sc->hw.func_caps.common_cap.iwarp || !ice_enable_irdma) ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap); if (!sc->hw.func_caps.common_cap.dcb) ice_clear_bit(ICE_FEATURE_DCB, sc->feat_cap); /* Disable features due to firmware limitations... */ if (!ice_is_fw_health_report_supported(&sc->hw)) ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap); if (!ice_fwlog_supported(&sc->hw)) ice_clear_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap); if (sc->hw.fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) { if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_FW_LOGGING)) ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_en); else ice_fwlog_unregister(&sc->hw); } /* Disable capabilities not supported by the OS */ ice_disable_unsupported_features(sc->feat_cap); /* RSS is always enabled for iflib */ if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS)) ice_set_bit(ICE_FEATURE_RSS, sc->feat_en); /* Disable features based on sysctl settings */ if (!ice_tx_balance_en) ice_clear_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap); } /** * ice_if_multi_set - Callback to update Multicast filters in HW * @ctx: iflib ctx structure * * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search * the if_multiaddrs list and determine which filters have been added or * removed from the list, and update HW programming to reflect the new list. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_multi_set(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); int err; ASSERT_CTX_LOCKED(sc); /* Do not handle multicast configuration in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; err = ice_sync_multicast_filters(sc); if (err) { device_printf(sc->dev, "Failed to synchronize multicast filter list: %s\n", ice_err_str(err)); return; } } /** * ice_if_vlan_register - Register a VLAN with the hardware * @ctx: iflib ctx pointer * @vtag: VLAN to add * * Programs the main PF VSI with a hardware filter for the given VLAN. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); enum ice_status status; ASSERT_CTX_LOCKED(sc); /* Do not handle VLAN configuration in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag); if (status) { device_printf(sc->dev, "Failure adding VLAN %d to main VSI, err %s aq_err %s\n", vtag, ice_status_str(status), ice_aq_str(sc->hw.adminq.sq_last_status)); } } /** * ice_if_vlan_unregister - Remove a VLAN filter from the hardware * @ctx: iflib ctx pointer * @vtag: VLAN to add * * Removes the previously programmed VLAN filter from the main PF VSI. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); enum ice_status status; ASSERT_CTX_LOCKED(sc); /* Do not handle VLAN configuration in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag); if (status) { device_printf(sc->dev, "Failure removing VLAN %d from main VSI, err %s aq_err %s\n", vtag, ice_status_str(status), ice_aq_str(sc->hw.adminq.sq_last_status)); } } /** * ice_if_stop - Stop the device * @ctx: iflib context structure * * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0 * down) * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_stop(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); ASSERT_CTX_LOCKED(sc); /* * The iflib core may call IFDI_STOP prior to the first call to * IFDI_INIT. This will cause us to attempt to remove MAC filters we * don't have, and disable Tx queues which aren't yet configured. * Although it is likely these extra operations are harmless, they do * cause spurious warning messages to be displayed, which may confuse * users. * * To avoid these messages, we use a state bit indicating if we've * been initialized. It will be set when ice_if_init is called, and * cleared here in ice_if_stop. */ if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED)) return; if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) { device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n"); return; } if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) { device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n"); return; } ice_rdma_pf_stop(sc); /* Remove the MAC filters, stop Tx, and stop Rx. We don't check the * return of these functions because there's nothing we can really do * if they fail, and the functions already print error messages. * Just try to shut down as much as we can. */ ice_rm_pf_default_mac_filters(sc); /* Dissociate the Tx and Rx queues from the interrupts */ ice_flush_txq_interrupts(&sc->pf_vsi); ice_flush_rxq_interrupts(&sc->pf_vsi); /* Disable the Tx and Rx queues */ ice_vsi_disable_tx(&sc->pf_vsi); ice_control_all_rx_queues(&sc->pf_vsi, false); } /** * ice_if_get_counter - Get current value of an ifnet statistic * @ctx: iflib context pointer * @counter: ifnet counter to read * * Reads the current value of an ifnet counter for the device. * * This function is not protected by the iflib CTX lock. */ static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); /* Return the counter for the main PF VSI */ return ice_get_ifnet_counter(&sc->pf_vsi, counter); } /** * ice_request_stack_reinit - Request that iflib re-initialize * @sc: the device private softc * * Request that the device be brought down and up, to re-initialize. For * example, this may be called when a device reset occurs, or when Tx and Rx * queues need to be re-initialized. * * This is required because the iflib state is outside the driver, and must be * re-initialized if we need to resart Tx and Rx queues. */ void ice_request_stack_reinit(struct ice_softc *sc) { if (CTX_ACTIVE(sc->ctx)) { iflib_request_reset(sc->ctx); iflib_admin_intr_deferred(sc->ctx); } } /** * ice_driver_is_detaching - Check if the driver is detaching/unloading * @sc: device private softc * * Returns true if the driver is detaching, false otherwise. * * @remark on newer kernels, take advantage of iflib_in_detach in order to * report detachment correctly as early as possible. * * @remark this function is used by various code paths that want to avoid * running if the driver is about to be removed. This includes sysctls and * other driver access points. Note that it does not fully resolve * detach-based race conditions as it is possible for a thread to race with * iflib_in_detach. */ bool ice_driver_is_detaching(struct ice_softc *sc) { return (ice_test_state(&sc->state, ICE_STATE_DETACHING) || iflib_in_detach(sc->ctx)); } /** * ice_if_priv_ioctl - Device private ioctl handler * @ctx: iflib context pointer * @command: The ioctl command issued * @data: ioctl specific data * * iflib callback for handling custom driver specific ioctls. * * @pre Assumes that the iflib context lock is held. */ static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ifdrv *ifd; device_t dev = sc->dev; if (data == NULL) return (EINVAL); ASSERT_CTX_LOCKED(sc); /* Make sure the command type is valid */ switch (command) { case SIOCSDRVSPEC: case SIOCGDRVSPEC: /* Accepted commands */ break; case SIOCGPRIVATE_0: /* * Although we do not support this ioctl command, it's * expected that iflib will forward it to the IFDI_PRIV_IOCTL * handler. Do not print a message in this case */ return (ENOTSUP); default: /* * If we get a different command for this function, it's * definitely unexpected, so log a message indicating what * command we got for debugging purposes. */ device_printf(dev, "%s: unexpected ioctl command %08lx\n", __func__, command); return (EINVAL); } ifd = (struct ifdrv *)data; switch (ifd->ifd_cmd) { case ICE_NVM_ACCESS: return ice_handle_nvm_access_ioctl(sc, ifd); case ICE_DEBUG_DUMP: return ice_handle_debug_dump_ioctl(sc, ifd); default: return EINVAL; } } /** * ice_if_i2c_req - I2C request handler for iflib * @ctx: iflib context pointer * @req: The I2C parameters to use * * Read from the port's I2C eeprom using the parameters from the ioctl. * * @remark The iflib-only part is pretty simple. */ static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); return ice_handle_i2c_req(sc, req); } /** * ice_if_suspend - PCI device suspend handler for iflib * @ctx: iflib context pointer * * Deinitializes the driver and clears HW resources in preparation for * suspend or an FLR. * * @returns 0; this return value is ignored */ static int ice_if_suspend(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); /* At least a PFR is always going to happen after this; * either via FLR or during the D3->D0 transition. */ ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ); ice_prepare_for_reset(sc); return (0); } /** * ice_if_resume - PCI device resume handler for iflib * @ctx: iflib context pointer * * Reinitializes the driver and the HW after PCI resume or after * an FLR. An init is performed by iflib after this function is finished. * * @returns 0; this return value is ignored */ static int ice_if_resume(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); ice_rebuild(sc); return (0); } diff --git a/sys/dev/irdma/fbsd_kcompat.c b/sys/dev/irdma/fbsd_kcompat.c index 325429cdd993..013f9279952e 100644 --- a/sys/dev/irdma/fbsd_kcompat.c +++ b/sys/dev/irdma/fbsd_kcompat.c @@ -1,797 +1,1061 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2021 - 2022 Intel Corporation + * Copyright (c) 2021 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "osdep.h" #include "ice_rdma.h" #include "irdma_di_if.h" #include "irdma_main.h" #include #include #include #include #include /* additional QP debuging option. Keep false unless needed */ bool irdma_upload_context = false; inline u32 irdma_rd32(struct irdma_dev_ctx *dev_ctx, u32 reg){ KASSERT(reg < dev_ctx->mem_bus_space_size, ("irdma: register offset %#jx too large (max is %#jx)", (uintmax_t)reg, (uintmax_t)dev_ctx->mem_bus_space_size)); return (bus_space_read_4(dev_ctx->mem_bus_space_tag, dev_ctx->mem_bus_space_handle, reg)); } inline void irdma_wr32(struct irdma_dev_ctx *dev_ctx, u32 reg, u32 value) { KASSERT(reg < dev_ctx->mem_bus_space_size, ("irdma: register offset %#jx too large (max is %#jx)", (uintmax_t)reg, (uintmax_t)dev_ctx->mem_bus_space_size)); bus_space_write_4(dev_ctx->mem_bus_space_tag, dev_ctx->mem_bus_space_handle, reg, value); } inline u64 irdma_rd64(struct irdma_dev_ctx *dev_ctx, u32 reg){ KASSERT(reg < dev_ctx->mem_bus_space_size, ("irdma: register offset %#jx too large (max is %#jx)", (uintmax_t)reg, (uintmax_t)dev_ctx->mem_bus_space_size)); return (bus_space_read_8(dev_ctx->mem_bus_space_tag, dev_ctx->mem_bus_space_handle, reg)); } inline void irdma_wr64(struct irdma_dev_ctx *dev_ctx, u32 reg, u64 value) { KASSERT(reg < dev_ctx->mem_bus_space_size, ("irdma: register offset %#jx too large (max is %#jx)", (uintmax_t)reg, (uintmax_t)dev_ctx->mem_bus_space_size)); bus_space_write_8(dev_ctx->mem_bus_space_tag, dev_ctx->mem_bus_space_handle, reg, value); } void irdma_request_reset(struct irdma_pci_f *rf) { struct ice_rdma_peer *peer = rf->peer_info; struct ice_rdma_request req = {0}; req.type = ICE_RDMA_EVENT_RESET; printf("%s:%d requesting pf-reset\n", __func__, __LINE__); IRDMA_DI_REQ_HANDLER(peer, &req); } int irdma_register_qset(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node) { struct irdma_device *iwdev = vsi->back_vsi; struct ice_rdma_peer *peer = iwdev->rf->peer_info; struct ice_rdma_request req = {0}; struct ice_rdma_qset_update *res = &req.res; req.type = ICE_RDMA_EVENT_QSET_REGISTER; res->cnt_req = 1; res->res_type = ICE_RDMA_QSET_ALLOC; res->qsets.qs_handle = tc_node->qs_handle; res->qsets.tc = tc_node->traffic_class; res->qsets.vsi_id = vsi->vsi_idx; IRDMA_DI_REQ_HANDLER(peer, &req); tc_node->l2_sched_node_id = res->qsets.teid; vsi->qos[tc_node->user_pri].l2_sched_node_id = res->qsets.teid; return 0; } void irdma_unregister_qset(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node) { struct irdma_device *iwdev = vsi->back_vsi; struct ice_rdma_peer *peer = iwdev->rf->peer_info; struct ice_rdma_request req = {0}; struct ice_rdma_qset_update *res = &req.res; req.type = ICE_RDMA_EVENT_QSET_REGISTER; res->res_allocated = 1; res->res_type = ICE_RDMA_QSET_FREE; res->qsets.vsi_id = vsi->vsi_idx; res->qsets.teid = tc_node->l2_sched_node_id; res->qsets.qs_handle = tc_node->qs_handle; IRDMA_DI_REQ_HANDLER(peer, &req); } void * hw_to_dev(struct irdma_hw *hw) { struct irdma_pci_f *rf; rf = container_of(hw, struct irdma_pci_f, hw); return rf->pcidev; } void irdma_free_hash_desc(void *desc) { return; } int irdma_init_hash_desc(void **desc) { return 0; } int irdma_ieq_check_mpacrc(void *desc, void *addr, u32 len, u32 val) { u32 crc = calculate_crc32c(0xffffffff, addr, len) ^ 0xffffffff; int ret_code = 0; if (crc != val) { irdma_pr_err("mpa crc check fail %x %x\n", crc, val); ret_code = -EINVAL; } printf("%s: result crc=%x value=%x\n", __func__, crc, val); return ret_code; } static u_int irdma_add_ipv6_cb(void *arg, struct ifaddr *addr, u_int count __unused) { struct irdma_device *iwdev = arg; struct sockaddr_in6 *sin6; u32 local_ipaddr6[4] = {}; char ip6buf[INET6_ADDRSTRLEN]; u8 *mac_addr; sin6 = (struct sockaddr_in6 *)addr->ifa_addr; irdma_copy_ip_ntohl(local_ipaddr6, (u32 *)&sin6->sin6_addr); mac_addr = if_getlladdr(addr->ifa_ifp); printf("%s:%d IP=%s, MAC=%02x:%02x:%02x:%02x:%02x:%02x\n", __func__, __LINE__, ip6_sprintf(ip6buf, &sin6->sin6_addr), mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3], mac_addr[4], mac_addr[5]); irdma_manage_arp_cache(iwdev->rf, mac_addr, local_ipaddr6, IRDMA_ARP_ADD); return (0); } /** * irdma_add_ipv6_addr - add ipv6 address to the hw arp table * @iwdev: irdma device * @ifp: interface network device pointer */ static void irdma_add_ipv6_addr(struct irdma_device *iwdev, struct ifnet *ifp) { if_addr_rlock(ifp); if_foreach_addr_type(ifp, AF_INET6, irdma_add_ipv6_cb, iwdev); if_addr_runlock(ifp); } static u_int irdma_add_ipv4_cb(void *arg, struct ifaddr *addr, u_int count __unused) { struct irdma_device *iwdev = arg; struct sockaddr_in *sin; u32 ip_addr[4] = {}; uint8_t *mac_addr; sin = (struct sockaddr_in *)addr->ifa_addr; ip_addr[0] = ntohl(sin->sin_addr.s_addr); mac_addr = if_getlladdr(addr->ifa_ifp); printf("%s:%d IP=%d.%d.%d.%d, MAC=%02x:%02x:%02x:%02x:%02x:%02x\n", __func__, __LINE__, ip_addr[0] >> 24, (ip_addr[0] >> 16) & 0xFF, (ip_addr[0] >> 8) & 0xFF, ip_addr[0] & 0xFF, mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3], mac_addr[4], mac_addr[5]); irdma_manage_arp_cache(iwdev->rf, mac_addr, ip_addr, IRDMA_ARP_ADD); return (0); } /** * irdma_add_ipv4_addr - add ipv4 address to the hw arp table * @iwdev: irdma device * @ifp: interface network device pointer */ static void irdma_add_ipv4_addr(struct irdma_device *iwdev, struct ifnet *ifp) { if_addr_rlock(ifp); if_foreach_addr_type(ifp, AF_INET, irdma_add_ipv4_cb, iwdev); if_addr_runlock(ifp); } /** * irdma_add_ip - add ip addresses * @iwdev: irdma device * * Add ipv4/ipv6 addresses to the arp cache */ void irdma_add_ip(struct irdma_device *iwdev) { struct ifnet *ifp = iwdev->netdev; struct ifnet *ifv; + struct epoch_tracker et; int i; irdma_add_ipv4_addr(iwdev, ifp); irdma_add_ipv6_addr(iwdev, ifp); for (i = 0; if_getvlantrunk(ifp) != NULL && i < VLAN_N_VID; ++i) { + NET_EPOCH_ENTER(et); ifv = VLAN_DEVAT(ifp, i); + NET_EPOCH_EXIT(et); if (!ifv) continue; irdma_add_ipv4_addr(iwdev, ifv); irdma_add_ipv6_addr(iwdev, ifv); } } static void irdma_ifaddrevent_handler(void *arg, struct ifnet *ifp, struct ifaddr *ifa, int event) { struct irdma_pci_f *rf = arg; struct ifnet *ifv = NULL; struct sockaddr_in *sin; struct epoch_tracker et; int arp_index = 0, i = 0; u32 ip[4] = {}; if (!ifa || !ifa->ifa_addr || !ifp) return; if (rf->iwdev->netdev != ifp) { for (i = 0; if_getvlantrunk(rf->iwdev->netdev) != NULL && i < VLAN_N_VID; ++i) { NET_EPOCH_ENTER(et); ifv = VLAN_DEVAT(rf->iwdev->netdev, i); NET_EPOCH_EXIT(et); if (ifv == ifp) break; } if (ifv != ifp) return; } sin = (struct sockaddr_in *)ifa->ifa_addr; switch (event) { case IFADDR_EVENT_ADD: if (sin->sin_family == AF_INET) irdma_add_ipv4_addr(rf->iwdev, ifp); else if (sin->sin_family == AF_INET6) irdma_add_ipv6_addr(rf->iwdev, ifp); break; case IFADDR_EVENT_DEL: if (sin->sin_family == AF_INET) { ip[0] = ntohl(sin->sin_addr.s_addr); } else if (sin->sin_family == AF_INET6) { irdma_copy_ip_ntohl(ip, (u32 *)&((struct sockaddr_in6 *)sin)->sin6_addr); } else { break; } for_each_set_bit(arp_index, rf->allocated_arps, rf->arp_table_size) { if (!memcmp(rf->arp_table[arp_index].ip_addr, ip, sizeof(ip))) { irdma_manage_arp_cache(rf, rf->arp_table[arp_index].mac_addr, rf->arp_table[arp_index].ip_addr, IRDMA_ARP_DELETE); } } break; default: break; } } void irdma_reg_ipaddr_event_cb(struct irdma_pci_f *rf) { rf->irdma_ifaddr_event = EVENTHANDLER_REGISTER(ifaddr_event_ext, irdma_ifaddrevent_handler, rf, EVENTHANDLER_PRI_ANY); } void irdma_dereg_ipaddr_event_cb(struct irdma_pci_f *rf) { EVENTHANDLER_DEREGISTER(ifaddr_event_ext, rf->irdma_ifaddr_event); } static int irdma_get_route_ifp(struct sockaddr *dst_sin, struct ifnet *netdev, struct ifnet **ifp, struct sockaddr **nexthop, bool *gateway) { struct nhop_object *nh; if (dst_sin->sa_family == AF_INET6) - nh = fib6_lookup(RT_DEFAULT_FIB, &((struct sockaddr_in6 *)dst_sin)->sin6_addr, 0, NHR_NONE, 0); + nh = fib6_lookup(RT_DEFAULT_FIB, &((struct sockaddr_in6 *)dst_sin)->sin6_addr, + ((struct sockaddr_in6 *)dst_sin)->sin6_scope_id, NHR_NONE, 0); else nh = fib4_lookup(RT_DEFAULT_FIB, ((struct sockaddr_in *)dst_sin)->sin_addr, 0, NHR_NONE, 0); if (!nh || (nh->nh_ifp != netdev && rdma_vlan_dev_real_dev(nh->nh_ifp) != netdev)) goto rt_not_found; *gateway = (nh->nh_flags & NHF_GATEWAY) ? true : false; *nexthop = (*gateway) ? &nh->gw_sa : dst_sin; *ifp = nh->nh_ifp; return 0; rt_not_found: pr_err("irdma: route not found\n"); return -ENETUNREACH; } /** * irdma_get_dst_mac - get destination mac address * @cm_node: connection's node * @dst_sin: destination address information * @dst_mac: mac address array to return */ int irdma_get_dst_mac(struct irdma_cm_node *cm_node, struct sockaddr *dst_sin, u8 *dst_mac) { struct ifnet *netdev = cm_node->iwdev->netdev; #ifdef VIMAGE struct rdma_cm_id *rdma_id = (struct rdma_cm_id *)cm_node->cm_id->context; struct vnet *vnet = rdma_id->route.addr.dev_addr.net; #endif struct ifnet *ifp; struct llentry *lle; struct sockaddr *nexthop; struct epoch_tracker et; int err; bool gateway; NET_EPOCH_ENTER(et); CURVNET_SET_QUIET(vnet); err = irdma_get_route_ifp(dst_sin, netdev, &ifp, &nexthop, &gateway); if (err) goto get_route_fail; if (dst_sin->sa_family == AF_INET) { err = arpresolve(ifp, gateway, NULL, nexthop, dst_mac, NULL, &lle); } else if (dst_sin->sa_family == AF_INET6) { err = nd6_resolve(ifp, LLE_SF(AF_INET6, gateway), NULL, nexthop, dst_mac, NULL, &lle); } else { err = -EPROTONOSUPPORT; } get_route_fail: CURVNET_RESTORE(); NET_EPOCH_EXIT(et); if (err) { pr_err("failed to resolve neighbor address (err=%d)\n", err); return -ENETUNREACH; } return 0; } /** * irdma_addr_resolve_neigh - resolve neighbor address * @cm_node: connection's node * @dst_ip: remote ip address * @arpindex: if there is an arp entry */ int irdma_addr_resolve_neigh(struct irdma_cm_node *cm_node, u32 dst_ip, int arpindex) { struct irdma_device *iwdev = cm_node->iwdev; struct sockaddr_in dst_sin = {}; int err; u32 ip[4] = {}; u8 dst_mac[MAX_ADDR_LEN]; dst_sin.sin_len = sizeof(dst_sin); dst_sin.sin_family = AF_INET; dst_sin.sin_port = 0; dst_sin.sin_addr.s_addr = htonl(dst_ip); err = irdma_get_dst_mac(cm_node, (struct sockaddr *)&dst_sin, dst_mac); if (err) return arpindex; ip[0] = dst_ip; return irdma_add_arp(iwdev->rf, ip, dst_mac); } /** * irdma_addr_resolve_neigh_ipv6 - resolve neighbor ipv6 address * @cm_node: connection's node * @dest: remote ip address * @arpindex: if there is an arp entry */ int irdma_addr_resolve_neigh_ipv6(struct irdma_cm_node *cm_node, u32 *dest, int arpindex) { struct irdma_device *iwdev = cm_node->iwdev; struct sockaddr_in6 dst_addr = {}; int err; u8 dst_mac[MAX_ADDR_LEN]; dst_addr.sin6_family = AF_INET6; dst_addr.sin6_len = sizeof(dst_addr); dst_addr.sin6_scope_id = if_getindex(iwdev->netdev); irdma_copy_ip_htonl(dst_addr.sin6_addr.__u6_addr.__u6_addr32, dest); err = irdma_get_dst_mac(cm_node, (struct sockaddr *)&dst_addr, dst_mac); if (err) return arpindex; return irdma_add_arp(iwdev->rf, dest, dst_mac); } int irdma_resolve_neigh_lpb_chk(struct irdma_device *iwdev, struct irdma_cm_node *cm_node, struct irdma_cm_info *cm_info) { #ifdef VIMAGE struct rdma_cm_id *rdma_id = (struct rdma_cm_id *)cm_node->cm_id->context; struct vnet *vnet = rdma_id->route.addr.dev_addr.net; #endif int arpindex; int oldarpindex; bool is_lpb = false; CURVNET_SET_QUIET(vnet); is_lpb = cm_node->ipv4 ? irdma_ipv4_is_lpb(cm_node->loc_addr[0], cm_node->rem_addr[0]) : irdma_ipv6_is_lpb(cm_node->loc_addr, cm_node->rem_addr); CURVNET_RESTORE(); if (is_lpb) { cm_node->do_lpb = true; arpindex = irdma_arp_table(iwdev->rf, cm_node->rem_addr, NULL, IRDMA_ARP_RESOLVE); } else { oldarpindex = irdma_arp_table(iwdev->rf, cm_node->rem_addr, NULL, IRDMA_ARP_RESOLVE); if (cm_node->ipv4) arpindex = irdma_addr_resolve_neigh(cm_node, cm_info->rem_addr[0], oldarpindex); else arpindex = irdma_addr_resolve_neigh_ipv6(cm_node, cm_info->rem_addr, oldarpindex); } return arpindex; } /** * irdma_add_handler - add a handler to the list * @hdl: handler to be added to the handler list */ void irdma_add_handler(struct irdma_handler *hdl) { unsigned long flags; spin_lock_irqsave(&irdma_handler_lock, flags); list_add(&hdl->list, &irdma_handlers); spin_unlock_irqrestore(&irdma_handler_lock, flags); } /** * irdma_del_handler - delete a handler from the list * @hdl: handler to be deleted from the handler list */ void irdma_del_handler(struct irdma_handler *hdl) { unsigned long flags; spin_lock_irqsave(&irdma_handler_lock, flags); list_del(&hdl->list); spin_unlock_irqrestore(&irdma_handler_lock, flags); } /** * irdma_set_rf_user_cfg_params - apply user configurable settings * @rf: RDMA PCI function */ void irdma_set_rf_user_cfg_params(struct irdma_pci_f *rf) { int en_rem_endpoint_trk = 0; int limits_sel = 4; rf->en_rem_endpoint_trk = en_rem_endpoint_trk; rf->limits_sel = limits_sel; rf->rst_to = IRDMA_RST_TIMEOUT_HZ; /* Enable DCQCN algorithm by default */ rf->dcqcn_ena = true; } /** * irdma_sysctl_dcqcn_update - handle dcqcn_ena sysctl update * @arg1: pointer to rf * @arg2: unused * @oidp: sysctl oid structure * @req: sysctl request pointer */ static int irdma_sysctl_dcqcn_update(SYSCTL_HANDLER_ARGS) { struct irdma_pci_f *rf = (struct irdma_pci_f *)arg1; int ret; u8 dcqcn_ena = rf->dcqcn_ena; ret = sysctl_handle_8(oidp, &dcqcn_ena, 0, req); if ((ret) || (req->newptr == NULL)) return ret; if (dcqcn_ena == 0) rf->dcqcn_ena = false; else rf->dcqcn_ena = true; return 0; } +enum irdma_cqp_stats_info { + IRDMA_CQP_REQ_CMDS = 28, + IRDMA_CQP_CMPL_CMDS = 29 +}; + +static int +irdma_sysctl_cqp_stats(SYSCTL_HANDLER_ARGS) +{ + struct irdma_sc_cqp *cqp = (struct irdma_sc_cqp *)arg1; + char rslt[192] = "no cqp available yet"; + int rslt_size = sizeof(rslt) - 1; + int option = (int)arg2; + + if (!cqp) { + return sysctl_handle_string(oidp, rslt, sizeof(rslt), req); + } + + snprintf(rslt, sizeof(rslt), ""); + switch (option) { + case IRDMA_CQP_REQ_CMDS: + snprintf(rslt, rslt_size, "%lu", cqp->requested_ops); + break; + case IRDMA_CQP_CMPL_CMDS: + snprintf(rslt, rslt_size, "%lu", atomic64_read(&cqp->completed_ops)); + break; + } + + return sysctl_handle_string(oidp, rslt, sizeof(rslt), req); +} + +struct irdma_sw_stats_tunable_info { + u8 op_type; + const char name[32]; + const char desc[32]; + uintptr_t value; +}; + +static const struct irdma_sw_stats_tunable_info irdma_sws_list[] = { + {IRDMA_OP_CEQ_DESTROY, "ceq_destroy", "ceq_destroy", 0}, + {IRDMA_OP_AEQ_DESTROY, "aeq_destroy", "aeq_destroy", 0}, + {IRDMA_OP_DELETE_ARP_CACHE_ENTRY, "delete_arp_cache_entry", + "delete_arp_cache_entry", 0}, + {IRDMA_OP_MANAGE_APBVT_ENTRY, "manage_apbvt_entry", + "manage_apbvt_entry", 0}, + {IRDMA_OP_CEQ_CREATE, "ceq_create", "ceq_create", 0}, + {IRDMA_OP_AEQ_CREATE, "aeq_create", "aeq_create", 0}, + {IRDMA_OP_MANAGE_QHASH_TABLE_ENTRY, "manage_qhash_table_entry", + "manage_qhash_table_entry", 0}, + {IRDMA_OP_QP_MODIFY, "qp_modify", "qp_modify", 0}, + {IRDMA_OP_QP_UPLOAD_CONTEXT, "qp_upload_context", "qp_upload_context", + 0}, + {IRDMA_OP_CQ_CREATE, "cq_create", "cq_create", 0}, + {IRDMA_OP_CQ_DESTROY, "cq_destroy", "cq_destroy", 0}, + {IRDMA_OP_QP_CREATE, "qp_create", "qp_create", 0}, + {IRDMA_OP_QP_DESTROY, "qp_destroy", "qp_destroy", 0}, + {IRDMA_OP_ALLOC_STAG, "alloc_stag", "alloc_stag", 0}, + {IRDMA_OP_MR_REG_NON_SHARED, "mr_reg_non_shared", "mr_reg_non_shared", + 0}, + {IRDMA_OP_DEALLOC_STAG, "dealloc_stag", "dealloc_stag", 0}, + {IRDMA_OP_MW_ALLOC, "mw_alloc", "mw_alloc", 0}, + {IRDMA_OP_QP_FLUSH_WQES, "qp_flush_wqes", "qp_flush_wqes", 0}, + {IRDMA_OP_ADD_ARP_CACHE_ENTRY, "add_arp_cache_entry", + "add_arp_cache_entry", 0}, + {IRDMA_OP_MANAGE_PUSH_PAGE, "manage_push_page", "manage_push_page", 0}, + {IRDMA_OP_UPDATE_PE_SDS, "update_pe_sds", "update_pe_sds", 0}, + {IRDMA_OP_MANAGE_HMC_PM_FUNC_TABLE, "manage_hmc_pm_func_table", + "manage_hmc_pm_func_table", 0}, + {IRDMA_OP_SUSPEND, "suspend", "suspend", 0}, + {IRDMA_OP_RESUME, "resume", "resume", 0}, + {IRDMA_OP_MANAGE_VCHNL_REQ_PBLE_BP, "manage_vchnl_req_pble_bp", + "manage_vchnl_req_pble_bp", 0}, + {IRDMA_OP_QUERY_FPM_VAL, "query_fpm_val", "query_fpm_val", 0}, + {IRDMA_OP_COMMIT_FPM_VAL, "commit_fpm_val", "commit_fpm_val", 0}, + {IRDMA_OP_AH_CREATE, "ah_create", "ah_create", 0}, + {IRDMA_OP_AH_MODIFY, "ah_modify", "ah_modify", 0}, + {IRDMA_OP_AH_DESTROY, "ah_destroy", "ah_destroy", 0}, + {IRDMA_OP_MC_CREATE, "mc_create", "mc_create", 0}, + {IRDMA_OP_MC_DESTROY, "mc_destroy", "mc_destroy", 0}, + {IRDMA_OP_MC_MODIFY, "mc_modify", "mc_modify", 0}, + {IRDMA_OP_STATS_ALLOCATE, "stats_allocate", "stats_allocate", 0}, + {IRDMA_OP_STATS_FREE, "stats_free", "stats_free", 0}, + {IRDMA_OP_STATS_GATHER, "stats_gather", "stats_gather", 0}, + {IRDMA_OP_WS_ADD_NODE, "ws_add_node", "ws_add_node", 0}, + {IRDMA_OP_WS_MODIFY_NODE, "ws_modify_node", "ws_modify_node", 0}, + {IRDMA_OP_WS_DELETE_NODE, "ws_delete_node", "ws_delete_node", 0}, + {IRDMA_OP_WS_FAILOVER_START, "ws_failover_start", "ws_failover_start", + 0}, + {IRDMA_OP_WS_FAILOVER_COMPLETE, "ws_failover_complete", + "ws_failover_complete", 0}, + {IRDMA_OP_SET_UP_MAP, "set_up_map", "set_up_map", 0}, + {IRDMA_OP_GEN_AE, "gen_ae", "gen_ae", 0}, + {IRDMA_OP_QUERY_RDMA_FEATURES, "query_rdma_features", + "query_rdma_features", 0}, + {IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY, "alloc_local_mac_entry", + "alloc_local_mac_entry", 0}, + {IRDMA_OP_ADD_LOCAL_MAC_ENTRY, "add_local_mac_entry", + "add_local_mac_entry", 0}, + {IRDMA_OP_DELETE_LOCAL_MAC_ENTRY, "delete_local_mac_entry", + "delete_local_mac_entry", 0}, + {IRDMA_OP_CQ_MODIFY, "cq_modify", "cq_modify", 0} +}; + +static const struct irdma_sw_stats_tunable_info irdma_cmcs_list[] = { + {0, "cm_nodes_created", "cm_nodes_created", + offsetof(struct irdma_cm_core, stats_nodes_created)}, + {0, "cm_nodes_destroyed", "cm_nodes_destroyed", + offsetof(struct irdma_cm_core, stats_nodes_destroyed)}, + {0, "cm_listen_created", "cm_listen_created", + offsetof(struct irdma_cm_core, stats_listen_created)}, + {0, "cm_listen_destroyed", "cm_listen_destroyed", + offsetof(struct irdma_cm_core, stats_listen_destroyed)}, + {0, "cm_listen_nodes_created", "cm_listen_nodes_created", + offsetof(struct irdma_cm_core, stats_listen_nodes_created)}, + {0, "cm_listen_nodes_destroyed", "cm_listen_nodes_destroyed", + offsetof(struct irdma_cm_core, stats_listen_nodes_destroyed)}, + {0, "cm_lpbs", "cm_lpbs", offsetof(struct irdma_cm_core, stats_lpbs)}, + {0, "cm_accepts", "cm_accepts", offsetof(struct irdma_cm_core, + stats_accepts)}, + {0, "cm_rejects", "cm_rejects", offsetof(struct irdma_cm_core, + stats_rejects)}, + {0, "cm_connect_errs", "cm_connect_errs", + offsetof(struct irdma_cm_core, stats_connect_errs)}, + {0, "cm_passive_errs", "cm_passive_errs", + offsetof(struct irdma_cm_core, stats_passive_errs)}, + {0, "cm_pkt_retrans", "cm_pkt_retrans", offsetof(struct irdma_cm_core, + stats_pkt_retrans)}, + {0, "cm_backlog_drops", "cm_backlog_drops", + offsetof(struct irdma_cm_core, stats_backlog_drops)}, +}; + +static const struct irdma_sw_stats_tunable_info irdma_ilqs32_list[] = { + {0, "ilq_avail_buf_count", "ilq_avail_buf_count", + offsetof(struct irdma_puda_rsrc, avail_buf_count)}, + {0, "ilq_alloc_buf_count", "ilq_alloc_buf_count", + offsetof(struct irdma_puda_rsrc, alloc_buf_count)} +}; + +static const struct irdma_sw_stats_tunable_info irdma_ilqs_list[] = { + {0, "ilq_stats_buf_alloc_fail", "ilq_stats_buf_alloc_fail", + offsetof(struct irdma_puda_rsrc, stats_buf_alloc_fail)}, + {0, "ilq_stats_pkt_rcvd", "ilq_stats_pkt_rcvd", + offsetof(struct irdma_puda_rsrc, stats_pkt_rcvd)}, + {0, "ilq_stats_pkt_sent", "ilq_stats_pkt_sent", + offsetof(struct irdma_puda_rsrc, stats_pkt_sent)}, + {0, "ilq_stats_rcvd_pkt_err", "ilq_stats_rcvd_pkt_err", + offsetof(struct irdma_puda_rsrc, stats_rcvd_pkt_err)}, + {0, "ilq_stats_sent_pkt_q", "ilq_stats_sent_pkt_q", + offsetof(struct irdma_puda_rsrc, stats_sent_pkt_q)} +}; + +static const struct irdma_sw_stats_tunable_info irdma_ieqs32_list[] = { + {0, "ieq_avail_buf_count", "ieq_avail_buf_count", + offsetof(struct irdma_puda_rsrc, avail_buf_count)}, + {0, "ieq_alloc_buf_count", "ieq_alloc_buf_count", + offsetof(struct irdma_puda_rsrc, alloc_buf_count)} +}; + +static const struct irdma_sw_stats_tunable_info irdma_ieqs_list[] = { + {0, "ieq_stats_buf_alloc_fail", "ieq_stats_buf_alloc_fail", + offsetof(struct irdma_puda_rsrc, stats_buf_alloc_fail)}, + {0, "ieq_stats_pkt_rcvd", "ieq_stats_pkt_rcvd", + offsetof(struct irdma_puda_rsrc, stats_pkt_rcvd)}, + {0, "ieq_stats_pkt_sent", "ieq_stats_pkt_sent", + offsetof(struct irdma_puda_rsrc, stats_pkt_sent)}, + {0, "ieq_stats_rcvd_pkt_err", "ieq_stats_rcvd_pkt_err", + offsetof(struct irdma_puda_rsrc, stats_rcvd_pkt_err)}, + {0, "ieq_stats_sent_pkt_q", "ieq_stats_sent_pkt_q", + offsetof(struct irdma_puda_rsrc, stats_sent_pkt_q)}, + {0, "ieq_stats_bad_qp_id", "ieq_stats_bad_qp_id", + offsetof(struct irdma_puda_rsrc, stats_bad_qp_id)}, + {0, "ieq_fpdu_processed", "ieq_fpdu_processed", + offsetof(struct irdma_puda_rsrc, fpdu_processed)}, + {0, "ieq_bad_seq_num", "ieq_bad_seq_num", + offsetof(struct irdma_puda_rsrc, bad_seq_num)}, + {0, "ieq_crc_err", "ieq_crc_err", offsetof(struct irdma_puda_rsrc, + crc_err)}, + {0, "ieq_pmode_count", "ieq_pmode_count", + offsetof(struct irdma_puda_rsrc, pmode_count)}, + {0, "ieq_partials_handled", "ieq_partials_handled", + offsetof(struct irdma_puda_rsrc, partials_handled)}, +}; + /** * irdma_dcqcn_tunables_init - create tunables for dcqcn settings * @rf: RDMA PCI function * * Create DCQCN related sysctls for the driver. * dcqcn_ena is writeable settings and applicable to next QP creation or * context setting. * all other settings are of RDTUN type (read on driver load) and are * applicable only to CQP creation. */ void irdma_dcqcn_tunables_init(struct irdma_pci_f *rf) { struct sysctl_oid_list *irdma_sysctl_oid_list; irdma_sysctl_oid_list = SYSCTL_CHILDREN(rf->tun_info.irdma_sysctl_tree); SYSCTL_ADD_PROC(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_enable", CTLFLAG_RW | CTLTYPE_U8, rf, 0, irdma_sysctl_dcqcn_update, "A", "enables DCQCN algorithm for RoCEv2 on all ports, default=true"); SYSCTL_ADD_U8(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_cc_cfg_valid", CTLFLAG_RDTUN, &rf->dcqcn_params.cc_cfg_valid, 0, "set DCQCN parameters to be valid, default=false"); rf->dcqcn_params.min_dec_factor = 1; SYSCTL_ADD_U8(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_min_dec_factor", CTLFLAG_RDTUN, &rf->dcqcn_params.min_dec_factor, 0, "set minimum percentage factor by which tx rate can be changed for CNP, Range: 1-100, default=1"); SYSCTL_ADD_U8(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_min_rate_MBps", CTLFLAG_RDTUN, &rf->dcqcn_params.min_rate, 0, "set minimum rate limit value, in MBits per second, default=0"); rf->dcqcn_params.dcqcn_f = 5; SYSCTL_ADD_U8(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_F", CTLFLAG_RDTUN, &rf->dcqcn_params.dcqcn_f, 0, "set number of times to stay in each stage of bandwidth recovery, default=5"); rf->dcqcn_params.dcqcn_t = 0x37; SYSCTL_ADD_U16(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_T", CTLFLAG_RDTUN, &rf->dcqcn_params.dcqcn_t, 0, - "set number of usecs that should elapse before increasing the CWND in DCQCN mode, default=0x37"); + "number of us to elapse before increasing the CWND in DCQCN mode, default=0x37"); rf->dcqcn_params.dcqcn_b = 0x249f0; SYSCTL_ADD_U32(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_B", CTLFLAG_RDTUN, &rf->dcqcn_params.dcqcn_b, 0, "set number of MSS to add to the congestion window in additive increase mode, default=0x249f0"); rf->dcqcn_params.rai_factor = 1; SYSCTL_ADD_U16(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_rai_factor", CTLFLAG_RDTUN, &rf->dcqcn_params.rai_factor, 0, "set number of MSS to add to the congestion window in additive increase mode, default=1"); rf->dcqcn_params.hai_factor = 5; SYSCTL_ADD_U16(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_hai_factor", CTLFLAG_RDTUN, &rf->dcqcn_params.hai_factor, 0, "set number of MSS to add to the congestion window in hyperactive increase mode, default=5"); rf->dcqcn_params.rreduce_mperiod = 50; SYSCTL_ADD_U32(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, "dcqcn_rreduce_mperiod", CTLFLAG_RDTUN, &rf->dcqcn_params.rreduce_mperiod, 0, "set minimum time between 2 consecutive rate reductions for a single flow, default=50"); } +/** + * irdma_sysctl_settings - sysctl runtime settings init + * @rf: RDMA PCI function + */ +void +irdma_sysctl_settings(struct irdma_pci_f *rf) +{ + struct sysctl_oid_list *irdma_sysctl_oid_list; + + irdma_sysctl_oid_list = SYSCTL_CHILDREN(rf->tun_info.irdma_sysctl_tree); + + SYSCTL_ADD_BOOL(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, + OID_AUTO, "upload_context", CTLFLAG_RWTUN, + &irdma_upload_context, 0, + "allow for generating QP's upload context, default=0"); +} + +void +irdma_sw_stats_tunables_init(struct irdma_pci_f *rf) +{ + struct sysctl_oid_list *sws_oid_list; + struct sysctl_ctx_list *irdma_ctx = &rf->tun_info.irdma_sysctl_ctx; + struct irdma_sc_dev *dev = &rf->sc_dev; + struct irdma_cm_core *cm_core = &rf->iwdev->cm_core; + struct irdma_puda_rsrc *ilq = rf->iwdev->vsi.ilq; + struct irdma_puda_rsrc *ieq = rf->iwdev->vsi.ieq; + u64 *ll_ptr; + u32 *l_ptr; + int cqp_stat_cnt = sizeof(irdma_sws_list) / sizeof(struct irdma_sw_stats_tunable_info); + int cmcore_stat_cnt = sizeof(irdma_cmcs_list) / sizeof(struct irdma_sw_stats_tunable_info); + int ilqs_stat_cnt = sizeof(irdma_ilqs_list) / sizeof(struct irdma_sw_stats_tunable_info); + int ilqs32_stat_cnt = sizeof(irdma_ilqs32_list) / sizeof(struct irdma_sw_stats_tunable_info); + int ieqs_stat_cnt = sizeof(irdma_ieqs_list) / sizeof(struct irdma_sw_stats_tunable_info); + int ieqs32_stat_cnt = sizeof(irdma_ieqs32_list) / sizeof(struct irdma_sw_stats_tunable_info); + int i; + + sws_oid_list = SYSCTL_CHILDREN(rf->tun_info.sws_sysctl_tree); + + for (i = 0; i < cqp_stat_cnt; ++i) { + SYSCTL_ADD_U64(irdma_ctx, sws_oid_list, OID_AUTO, + irdma_sws_list[i].name, CTLFLAG_RD, + &dev->cqp_cmd_stats[irdma_sws_list[i].op_type], + 0, irdma_sws_list[i].desc); + } + SYSCTL_ADD_PROC(irdma_ctx, sws_oid_list, OID_AUTO, + "req_cmds", CTLFLAG_RD | CTLTYPE_STRING, + dev->cqp, IRDMA_CQP_REQ_CMDS, irdma_sysctl_cqp_stats, "A", + "req_cmds"); + SYSCTL_ADD_PROC(irdma_ctx, sws_oid_list, OID_AUTO, + "cmpl_cmds", CTLFLAG_RD | CTLTYPE_STRING, + dev->cqp, IRDMA_CQP_CMPL_CMDS, irdma_sysctl_cqp_stats, "A", + "cmpl_cmds"); + for (i = 0; i < cmcore_stat_cnt; ++i) { + ll_ptr = (u64 *)((uintptr_t)cm_core + irdma_cmcs_list[i].value); + SYSCTL_ADD_U64(irdma_ctx, sws_oid_list, OID_AUTO, + irdma_cmcs_list[i].name, CTLFLAG_RD, ll_ptr, + 0, irdma_cmcs_list[i].desc); + } + for (i = 0; ilq && i < ilqs_stat_cnt; ++i) { + ll_ptr = (u64 *)((uintptr_t)ilq + irdma_ilqs_list[i].value); + SYSCTL_ADD_U64(irdma_ctx, sws_oid_list, OID_AUTO, + irdma_ilqs_list[i].name, CTLFLAG_RD, ll_ptr, + 0, irdma_ilqs_list[i].desc); + } + for (i = 0; ilq && i < ilqs32_stat_cnt; ++i) { + l_ptr = (u32 *)((uintptr_t)ilq + irdma_ilqs32_list[i].value); + SYSCTL_ADD_U32(irdma_ctx, sws_oid_list, OID_AUTO, + irdma_ilqs32_list[i].name, CTLFLAG_RD, l_ptr, + 0, irdma_ilqs32_list[i].desc); + } + for (i = 0; ieq && i < ieqs_stat_cnt; ++i) { + ll_ptr = (u64 *)((uintptr_t)ieq + irdma_ieqs_list[i].value); + SYSCTL_ADD_U64(irdma_ctx, sws_oid_list, OID_AUTO, + irdma_ieqs_list[i].name, CTLFLAG_RD, ll_ptr, + 0, irdma_ieqs_list[i].desc); + } + for (i = 0; ieq && i < ieqs32_stat_cnt; ++i) { + l_ptr = (u32 *)((uintptr_t)ieq + irdma_ieqs32_list[i].value); + SYSCTL_ADD_U32(irdma_ctx, sws_oid_list, OID_AUTO, + irdma_ieqs32_list[i].name, CTLFLAG_RD, l_ptr, + 0, irdma_ieqs32_list[i].desc); + } +} + /** * irdma_dmamap_cb - callback for bus_dmamap_load */ static void irdma_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs->ds_addr; return; } /** * irdma_allocate_dma_mem - allocate dma memory * @hw: pointer to hw structure * @mem: structure holding memory information * @size: requested size * @alignment: requested alignment */ void * irdma_allocate_dma_mem(struct irdma_hw *hw, struct irdma_dma_mem *mem, u64 size, u32 alignment) { struct irdma_dev_ctx *dev_ctx = (struct irdma_dev_ctx *)hw->dev_context; device_t dev = dev_ctx->dev; void *va; int ret; ret = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ alignment, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &mem->tag); if (ret != 0) { device_printf(dev, "%s: bus_dma_tag_create failed, error %u\n", __func__, ret); goto fail_0; } ret = bus_dmamem_alloc(mem->tag, (void **)&va, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &mem->map); if (ret != 0) { device_printf(dev, "%s: bus_dmamem_alloc failed, error %u\n", __func__, ret); goto fail_1; } ret = bus_dmamap_load(mem->tag, mem->map, va, size, irdma_dmamap_cb, &mem->pa, BUS_DMA_NOWAIT); if (ret != 0) { device_printf(dev, "%s: bus_dmamap_load failed, error %u\n", __func__, ret); goto fail_2; } mem->nseg = 1; mem->size = size; bus_dmamap_sync(mem->tag, mem->map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return va; fail_2: bus_dmamem_free(mem->tag, va, mem->map); fail_1: bus_dma_tag_destroy(mem->tag); fail_0: mem->map = NULL; mem->tag = NULL; return NULL; } /** * irdma_free_dma_mem - Memory free helper fn * @hw: pointer to hw structure * @mem: ptr to mem struct to free */ int irdma_free_dma_mem(struct irdma_hw *hw, struct irdma_dma_mem *mem) { if (!mem) return -EINVAL; bus_dmamap_sync(mem->tag, mem->map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(mem->tag, mem->map); if (!mem->va) return -ENOMEM; bus_dmamem_free(mem->tag, mem->va, mem->map); bus_dma_tag_destroy(mem->tag); mem->va = NULL; return 0; } -inline void -irdma_prm_rem_bitmapmem(struct irdma_hw *hw, struct irdma_chunk *chunk) -{ - kfree(chunk->bitmapmem.va); -} - void irdma_cleanup_dead_qps(struct irdma_sc_vsi *vsi) { struct irdma_sc_qp *qp = NULL; struct irdma_qp *iwqp; struct irdma_pci_f *rf; u8 i; for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { qp = irdma_get_qp_from_list(&vsi->qos[i].qplist, qp); while (qp) { if (qp->qp_uk.qp_type == IRDMA_QP_TYPE_UDA) { qp = irdma_get_qp_from_list(&vsi->qos[i].qplist, qp); continue; } iwqp = qp->qp_uk.back_qp; rf = iwqp->iwdev->rf; irdma_free_dma_mem(rf->sc_dev.hw, &iwqp->q2_ctx_mem); irdma_free_dma_mem(rf->sc_dev.hw, &iwqp->kqp.dma_mem); kfree(iwqp->kqp.sq_wrid_mem); kfree(iwqp->kqp.rq_wrid_mem); qp = irdma_get_qp_from_list(&vsi->qos[i].qplist, qp); kfree(iwqp); } } } diff --git a/sys/dev/irdma/fbsd_kcompat.h b/sys/dev/irdma/fbsd_kcompat.h index b4d7e59bdc53..eb1e615fc048 100644 --- a/sys/dev/irdma/fbsd_kcompat.h +++ b/sys/dev/irdma/fbsd_kcompat.h @@ -1,349 +1,341 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2021 - 2022 Intel Corporation + * Copyright (c) 2021 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef FBSD_KCOMPAT_H #define FBSD_KCOMPAT_H #include "ice_rdma.h" #define TASKLET_DATA_TYPE unsigned long #define TASKLET_FUNC_TYPE void (*)(TASKLET_DATA_TYPE) #ifndef tasklet_setup #define tasklet_setup(tasklet, callback) \ tasklet_init((tasklet), (TASKLET_FUNC_TYPE)(callback), \ (TASKLET_DATA_TYPE)(tasklet)) #endif #ifndef from_tasklet #define from_tasklet(var, callback_tasklet, tasklet_fieldname) \ container_of(callback_tasklet, typeof(*var), tasklet_fieldname) #endif #if __FreeBSD_version >= 1400000 #define IRDMA_SET_RDMA_OBJ_SIZE(ib_struct, drv_struct, member) \ (sizeof(struct drv_struct) + \ BUILD_BUG_ON_ZERO(offsetof(struct drv_struct, member)) + \ BUILD_BUG_ON_ZERO( \ !__same_type(((struct drv_struct *)NULL)->member, \ struct ib_struct))) #endif /* __FreeBSD_version > 1400000 */ #define set_ibdev_dma_device(ibdev, dev) \ ibdev.dma_device = (dev) #define set_max_sge(props, rf) \ ((props)->max_sge = (rf)->sc_dev.hw_attrs.uk_attrs.max_hw_wq_frags) -#define kc_set_props_ip_gid_caps(props) \ - ((props)->port_cap_flags |= IB_PORT_IP_BASED_GIDS) #define rdma_query_gid(ibdev, port, index, gid) \ ib_get_cached_gid(ibdev, port, index, gid, NULL) #define kmap(pg) page_address(pg) #define kmap_local_page(pg) page_address(pg) #define kunmap(pg) #define kunmap_local(pg) -#if __FreeBSD_version >= 1400026 -#define kc_free_lsmm_dereg_mr(iwdev, iwqp) \ - ((iwdev)->ibdev.dereg_mr((iwqp)->lsmm_mr, NULL)) -#else -#define kc_free_lsmm_dereg_mr(iwdev, iwqp) \ - ((iwdev)->ibdev.dereg_mr((iwqp)->lsmm_mr)) -#endif #define IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION IB_CQ_FLAGS_TIMESTAMP_COMPLETION #if __FreeBSD_version < 1400026 #define kc_irdma_destroy_qp(ibqp, udata) irdma_destroy_qp(ibqp) #else #define kc_irdma_destroy_qp(ibqp, udata) irdma_destroy_qp(ibqp, udata) #endif #ifndef IB_QP_ATTR_STANDARD_BITS #define IB_QP_ATTR_STANDARD_BITS GENMASK(20, 0) #endif #define IRDMA_QOS_MODE_VLAN 0x0 #define IRDMA_QOS_MODE_DSCP 0x1 #define IRDMA_VER_LEN 24 void kc_set_roce_uverbs_cmd_mask(struct irdma_device *iwdev); void kc_set_rdma_uverbs_cmd_mask(struct irdma_device *iwdev); struct irdma_tunable_info { struct sysctl_ctx_list irdma_sysctl_ctx; struct sysctl_oid *irdma_sysctl_tree; + struct sysctl_oid *sws_sysctl_tree; char drv_ver[IRDMA_VER_LEN]; u8 roce_ena; }; static inline int irdma_iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { *pkey = 0; return 0; } static inline int cq_validate_flags(u32 flags, u8 hw_rev) { /* GEN1 does not support CQ create flags */ if (hw_rev == IRDMA_GEN_1) return flags ? -EOPNOTSUPP : 0; return flags & ~IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION ? -EOPNOTSUPP : 0; } static inline u64 *irdma_next_pbl_addr(u64 *pbl, struct irdma_pble_info **pinfo, u32 *idx) { *idx += 1; if (!(*pinfo) || *idx != (*pinfo)->cnt) return ++pbl; *idx = 0; (*pinfo)++; return (*pinfo)->addr; } #if __FreeBSD_version < 1400026 struct ib_cq *irdma_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); #else int irdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); #endif struct ib_qp *irdma_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); #if __FreeBSD_version >= 1400026 int irdma_create_ah(struct ib_ah *ib_ah, struct ib_ah_attr *attr, u32 flags, struct ib_udata *udata); int irdma_create_ah_stub(struct ib_ah *ib_ah, struct ib_ah_attr *attr, u32 flags, struct ib_udata *udata); #else struct ib_ah *irdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, struct ib_udata *udata); struct ib_ah *irdma_create_ah_stub(struct ib_pd *ibpd, struct ib_ah_attr *attr, struct ib_udata *udata); #endif void irdma_ether_copy(u8 *dmac, struct ib_ah_attr *attr); #if __FreeBSD_version >= 1400026 void irdma_destroy_ah(struct ib_ah *ibah, u32 flags); void irdma_destroy_ah_stub(struct ib_ah *ibah, u32 flags); #else int irdma_destroy_ah(struct ib_ah *ibah); int irdma_destroy_ah_stub(struct ib_ah *ibah); #endif #if __FreeBSD_version < 1400026 int irdma_destroy_qp(struct ib_qp *ibqp); #else int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); #endif #if __FreeBSD_version < 1400026 int irdma_dereg_mr(struct ib_mr *ib_mr); #else int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); #endif int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u8 *speed, u8 *width); enum rdma_link_layer irdma_get_link_layer(struct ib_device *ibdev, u8 port_num); int irdma_roce_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable); int irdma_iw_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable); int irdma_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid); int irdma_query_gid_roce(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid); int irdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey); int irdma_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); struct rdma_hw_stats *irdma_alloc_hw_stats(struct ib_device *ibdev, u8 port_num); int irdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u8 port_num, int index); void irdma_request_reset(struct irdma_pci_f *rf); int irdma_register_qset(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node); void irdma_unregister_qset(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node); void ib_unregister_device(struct ib_device *ibdev); #if __FreeBSD_version < 1400026 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, unsigned long pfn, unsigned long size, pgprot_t prot); #endif void irdma_disassociate_ucontext(struct ib_ucontext *context); int kc_irdma_set_roce_cm_info(struct irdma_qp *iwqp, struct ib_qp_attr *attr, u16 *vlan_id); -struct irdma_device *kc_irdma_get_device(struct ifnet *netdev); -void kc_irdma_put_device(struct irdma_device *iwdev); void kc_set_loc_seq_num_mss(struct irdma_cm_node *cm_node); u16 kc_rdma_get_udp_sport(u32 fl, u32 lqpn, u32 rqpn); void irdma_get_dev_fw_str(struct ib_device *dev, char *str, size_t str_len); int irdma_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props); int irdma_get_dst_mac(struct irdma_cm_node *cm_node, struct sockaddr *dst_sin, u8 *dst_mac); int irdma_resolve_neigh_lpb_chk(struct irdma_device *iwdev, struct irdma_cm_node *cm_node, struct irdma_cm_info *cm_info); int irdma_addr_resolve_neigh(struct irdma_cm_node *cm_node, u32 dst_ip, int arpindex); int irdma_addr_resolve_neigh_ipv6(struct irdma_cm_node *cm_node, u32 *dest, int arpindex); void irdma_dcqcn_tunables_init(struct irdma_pci_f *rf); +void irdma_sysctl_settings(struct irdma_pci_f *rf); +void irdma_sw_stats_tunables_init(struct irdma_pci_f *rf); u32 irdma_create_stag(struct irdma_device *iwdev); void irdma_free_stag(struct irdma_device *iwdev, u32 stag); int irdma_hwdereg_mr(struct ib_mr *ib_mr); int irdma_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 len, u64 virt, int new_access, struct ib_pd *new_pd, struct ib_udata *udata); struct irdma_mr; struct irdma_cq; struct irdma_cq_buf; #if __FreeBSD_version < 1400026 struct ib_mr *irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); #else struct ib_mr *irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg, struct ib_udata *udata); #endif int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr, u16 access); struct ib_mr *irdma_rereg_mr_trans(struct irdma_mr *iwmr, u64 start, u64 len, u64 virt, struct ib_udata *udata); int irdma_hw_alloc_mw(struct irdma_device *iwdev, struct irdma_mr *iwmr); struct ib_mw *irdma_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); int irdma_hw_alloc_stag(struct irdma_device *iwdev, struct irdma_mr *iwmr); void irdma_cq_free_rsrc(struct irdma_pci_f *rf, struct irdma_cq *iwcq); int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr, struct irdma_device *iwdev); void irdma_setup_virt_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, struct irdma_qp_init_info *init_info); int irdma_setup_kmode_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, struct irdma_qp_init_info *info, struct ib_qp_init_attr *init_attr); int irdma_setup_umode_qp(struct ib_udata *udata, struct irdma_device *iwdev, struct irdma_qp *iwqp, struct irdma_qp_init_info *info, struct ib_qp_init_attr *init_attr); void irdma_roce_fill_and_set_qpctx_info(struct irdma_qp *iwqp, struct irdma_qp_host_ctx_info *ctx_info); void irdma_iw_fill_and_set_qpctx_info(struct irdma_qp *iwqp, struct irdma_qp_host_ctx_info *ctx_info); int irdma_cqp_create_qp_cmd(struct irdma_qp *iwqp); void irdma_dealloc_push_page(struct irdma_pci_f *rf, struct irdma_sc_qp *qp); int irdma_process_resize_list(struct irdma_cq *iwcq, struct irdma_device *iwdev, struct irdma_cq_buf *lcqe_buf); #if __FreeBSD_version < 1400026 int irdma_destroy_cq(struct ib_cq *ib_cq); #else void irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); #endif #if __FreeBSD_version < 1400026 struct ib_ucontext *irdma_alloc_ucontext(struct ib_device *, struct ib_udata *); #else int irdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); #endif #if __FreeBSD_version < 1400026 int irdma_dealloc_ucontext(struct ib_ucontext *); #else void irdma_dealloc_ucontext(struct ib_ucontext *context); #endif #if __FreeBSD_version < 1400026 struct ib_pd *irdma_alloc_pd(struct ib_device *, struct ib_ucontext *, struct ib_udata *); #else int irdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); #endif #if __FreeBSD_version < 1400026 int irdma_dealloc_pd(struct ib_pd *); #else void irdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); #endif int irdma_add_gid(struct ib_device *, u8, unsigned int, const union ib_gid *, const struct ib_gid_attr *, void **); int irdma_del_gid(struct ib_device *, u8, unsigned int, void **); struct ib_device *ib_device_get_by_netdev(struct ifnet *ndev, int driver_id); void ib_device_put(struct ib_device *device); void ib_unregister_device_put(struct ib_device *device); enum ib_mtu ib_mtu_int_to_enum(int mtu); struct irdma_pbl *irdma_get_pbl(unsigned long va, struct list_head *pbl_list); void irdma_clean_cqes(struct irdma_qp *iwqp, struct irdma_cq *iwcq); void irdma_remove_push_mmap_entries(struct irdma_qp *iwqp); struct irdma_ucontext; void irdma_del_memlist(struct irdma_mr *iwmr, struct irdma_ucontext *ucontext); void irdma_copy_user_pgaddrs(struct irdma_mr *iwmr, u64 *pbl, enum irdma_pble_level level); void irdma_reg_ipaddr_event_cb(struct irdma_pci_f *rf); void irdma_dereg_ipaddr_event_cb(struct irdma_pci_f *rf); /* Introduced in this series https://lore.kernel.org/linux-rdma/0-v2-270386b7e60b+28f4-umem_1_jgg@nvidia.com/ * An irdma version helper doing same for older functions with difference that iova is passed in * as opposed to derived from umem->iova. */ static inline size_t irdma_ib_umem_num_dma_blocks(struct ib_umem *umem, unsigned long pgsz, u64 iova) { /* some older OFED distros do not have ALIGN_DOWN */ #ifndef ALIGN_DOWN #define ALIGN_DOWN(x, a) ALIGN((x) - ((a) - 1), (a)) #endif return (size_t)((ALIGN(iova + umem->length, pgsz) - ALIGN_DOWN(iova, pgsz))) / pgsz; } static inline void addrconf_addr_eui48(u8 *deui, const char *const addr) { memcpy(deui, addr, 3); deui[3] = 0xFF; deui[4] = 0xFE; memcpy(deui + 5, addr + 3, 3); deui[0] ^= 2; } #endif /* FBSD_KCOMPAT_H */ diff --git a/sys/dev/irdma/icrdma.c b/sys/dev/irdma/icrdma.c index 8dd16eb7f589..c46921be386a 100644 --- a/sys/dev/irdma/icrdma.c +++ b/sys/dev/irdma/icrdma.c @@ -1,719 +1,816 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2021 - 2022 Intel Corporation + * Copyright (c) 2021 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include "ice_rdma.h" #include "irdma_main.h" #include "icrdma_hw.h" #include "irdma_if.h" #include "irdma_di_if.h" /** * Driver version */ -char irdma_driver_version[] = "1.1.11-k"; +char irdma_driver_version[] = "1.2.17-k"; /** * irdma_init_tunable - prepare tunables * @rf: RDMA PCI function * @pf_id: id of the pf */ static void irdma_init_tunable(struct irdma_pci_f *rf, uint8_t pf_id) { - struct sysctl_oid_list *irdma_sysctl_oid_list; + struct sysctl_oid_list *irdma_oid_list; + struct irdma_tunable_info *t_info = &rf->tun_info; char pf_name[16]; snprintf(pf_name, 15, "irdma%d", pf_id); - sysctl_ctx_init(&rf->tun_info.irdma_sysctl_ctx); + sysctl_ctx_init(&t_info->irdma_sysctl_ctx); - rf->tun_info.irdma_sysctl_tree = SYSCTL_ADD_NODE(&rf->tun_info.irdma_sysctl_ctx, - SYSCTL_STATIC_CHILDREN(_dev), - OID_AUTO, pf_name, CTLFLAG_RD, - NULL, ""); + t_info->irdma_sysctl_tree = SYSCTL_ADD_NODE(&t_info->irdma_sysctl_ctx, + SYSCTL_STATIC_CHILDREN(_dev), + OID_AUTO, pf_name, + CTLFLAG_RD, NULL, ""); - irdma_sysctl_oid_list = SYSCTL_CHILDREN(rf->tun_info.irdma_sysctl_tree); + irdma_oid_list = SYSCTL_CHILDREN(t_info->irdma_sysctl_tree); + t_info->sws_sysctl_tree = SYSCTL_ADD_NODE(&t_info->irdma_sysctl_ctx, + irdma_oid_list, OID_AUTO, + "sw_stats", CTLFLAG_RD, + NULL, ""); /* * debug mask setting */ - SYSCTL_ADD_S32(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, + SYSCTL_ADD_S32(&t_info->irdma_sysctl_ctx, irdma_oid_list, OID_AUTO, "debug", CTLFLAG_RWTUN, &rf->sc_dev.debug_mask, 0, "irdma debug"); /* * RoCEv2/iWARP setting RoCEv2 the default mode */ - rf->tun_info.roce_ena = 1; - SYSCTL_ADD_U8(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, OID_AUTO, - "roce_enable", CTLFLAG_RDTUN, &rf->tun_info.roce_ena, 0, + t_info->roce_ena = 1; + SYSCTL_ADD_U8(&t_info->irdma_sysctl_ctx, irdma_oid_list, OID_AUTO, + "roce_enable", CTLFLAG_RDTUN, &t_info->roce_ena, 0, "RoCEv2 mode enable"); rf->protocol_used = IRDMA_IWARP_PROTOCOL_ONLY; - if (rf->tun_info.roce_ena == 1) + if (t_info->roce_ena == 1) rf->protocol_used = IRDMA_ROCE_PROTOCOL_ONLY; - else if (rf->tun_info.roce_ena != 0) + else if (t_info->roce_ena != 0) printf("%s:%d wrong roce_enable value (%d), using iWARP\n", - __func__, __LINE__, rf->tun_info.roce_ena); + __func__, __LINE__, t_info->roce_ena); printf("%s:%d protocol: %s, roce_enable value: %d\n", __func__, __LINE__, (rf->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) ? "iWARP" : "RoCEv2", - rf->tun_info.roce_ena); + t_info->roce_ena); - snprintf(rf->tun_info.drv_ver, IRDMA_VER_LEN, "%s", irdma_driver_version); - SYSCTL_ADD_STRING(&rf->tun_info.irdma_sysctl_ctx, irdma_sysctl_oid_list, - OID_AUTO, "drv_ver", CTLFLAG_RDTUN, rf->tun_info.drv_ver, + snprintf(t_info->drv_ver, IRDMA_VER_LEN, "%s", irdma_driver_version); + SYSCTL_ADD_STRING(&t_info->irdma_sysctl_ctx, irdma_oid_list, + OID_AUTO, "drv_ver", CTLFLAG_RDTUN, t_info->drv_ver, IRDMA_VER_LEN, "driver version"); irdma_dcqcn_tunables_init(rf); + irdma_sysctl_settings(rf); } /** * irdma_find_handler - obtain hdl object to identify pf * @p_dev: the peer interface structure */ static struct irdma_handler * irdma_find_handler(struct ice_rdma_peer *p_dev) { struct irdma_handler *hdl; unsigned long flags; spin_lock_irqsave(&irdma_handler_lock, flags); list_for_each_entry(hdl, &irdma_handlers, list) { - if (!hdl) - continue; if (!hdl->iwdev->rf->peer_info) continue; if (hdl->iwdev->rf->peer_info->dev == p_dev->dev) { spin_unlock_irqrestore(&irdma_handler_lock, flags); return hdl; } } spin_unlock_irqrestore(&irdma_handler_lock, flags); return NULL; } /** * peer_to_iwdev - return iwdev based on peer * @peer: the peer interface structure */ static struct irdma_device * peer_to_iwdev(struct ice_rdma_peer *peer) { struct irdma_handler *hdl; hdl = irdma_find_handler(peer); if (!hdl) { printf("%s:%d rdma handler not found\n", __func__, __LINE__); return NULL; } return hdl->iwdev; } /** * irdma_get_qos_info - save qos info from parameters to internal struct * @l2params: destination, qos, tc, mtu info structure * @qos_info: source, DCB settings structure */ static void -irdma_get_qos_info(struct irdma_l2params *l2params, struct ice_qos_params *qos_info) +irdma_get_qos_info(struct irdma_pci_f *rf, struct irdma_l2params *l2params, + struct ice_qos_params *qos_info) { int i; + char txt[7][128] = {"", "", "", "", "", "", ""}; + u8 len; l2params->num_tc = qos_info->num_tc; l2params->num_apps = qos_info->num_apps; l2params->vsi_prio_type = qos_info->vsi_priority_type; l2params->vsi_rel_bw = qos_info->vsi_relative_bw; for (i = 0; i < l2params->num_tc; i++) { l2params->tc_info[i].egress_virt_up = qos_info->tc_info[i].egress_virt_up; l2params->tc_info[i].ingress_virt_up = qos_info->tc_info[i].ingress_virt_up; l2params->tc_info[i].prio_type = qos_info->tc_info[i].prio_type; l2params->tc_info[i].rel_bw = qos_info->tc_info[i].rel_bw; l2params->tc_info[i].tc_ctx = qos_info->tc_info[i].tc_ctx; } for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) l2params->up2tc[i] = qos_info->up2tc[i]; if (qos_info->pfc_mode == IRDMA_QOS_MODE_DSCP) { l2params->dscp_mode = true; memcpy(l2params->dscp_map, qos_info->dscp_map, sizeof(l2params->dscp_map)); } - printf("%s:%d: l2params settings:\n num_tc %d,\n num_apps %d,\n", - __func__, __LINE__, l2params->num_tc, l2params->num_apps); - printf(" vsi_prio_type %d,\n vsi_rel_bw %d,\n egress_virt_up:", - l2params->vsi_prio_type, l2params->vsi_rel_bw); - for (i = 0; i < l2params->num_tc; i++) - printf(" %d", l2params->tc_info[i].egress_virt_up); - printf("\n ingress_virt_up:"); - for (i = 0; i < l2params->num_tc; i++) - printf(" %d", l2params->tc_info[i].ingress_virt_up); - printf("\n prio_type:"); - for (i = 0; i < l2params->num_tc; i++) - printf(" %d", l2params->tc_info[i].prio_type); - printf("\n rel_bw:"); - for (i = 0; i < l2params->num_tc; i++) - printf(" %d", l2params->tc_info[i].rel_bw); - printf("\n tc_ctx:"); - for (i = 0; i < l2params->num_tc; i++) - printf(" %lu", l2params->tc_info[i].tc_ctx); - printf("\n up2tc:"); + if (!(rf->sc_dev.debug_mask & IRDMA_DEBUG_DCB)) + return; + for (i = 0; i < l2params->num_tc; i++) { + len = strlen(txt[0]); + snprintf(txt[0] + len, sizeof(txt[0]) - 5, " %d", + l2params->tc_info[i].egress_virt_up); + len = strlen(txt[1]); + snprintf(txt[1] + len, sizeof(txt[1]) - 5, " %d", + l2params->tc_info[i].ingress_virt_up); + len = strlen(txt[2]); + snprintf(txt[2] + len, sizeof(txt[2]) - 5, " %d", + l2params->tc_info[i].prio_type); + len = strlen(txt[3]); + snprintf(txt[3] + len, sizeof(txt[3]) - 5, " %d", + l2params->tc_info[i].rel_bw); + len = strlen(txt[4]); + snprintf(txt[4] + len, sizeof(txt[4]) - 5, " %lu", + l2params->tc_info[i].tc_ctx); + } + len = strlen(txt[5]); for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) - printf(" %d", l2params->up2tc[i]); - printf(" dscp_mode: %d,\n", l2params->dscp_mode); + len += snprintf(txt[5] + len, sizeof(txt[5]) - 5, " %d", + l2params->up2tc[i]); + len = strlen(txt[6]); for (i = 0; i < IRDMA_DSCP_NUM_VAL; i++) - printf(" %d", l2params->dscp_map[i]); - printf("\n"); - - dump_struct(l2params, sizeof(*l2params), "l2params"); + len += snprintf(txt[6] + len, sizeof(txt[6]) - 5, " %d", + l2params->dscp_map[i]); + irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "num_tc: %d\n", l2params->num_tc); + irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "num_apps: %d\n", l2params->num_apps); + irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "vsi_prio_type: %d\n", l2params->vsi_prio_type); + irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "vsi_rel_bw: %d\n", l2params->vsi_rel_bw); + irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "egress_virt_up: %s\n", txt[0]); + irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "ingress_virt_up:%s\n", txt[1]); + irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "prio_type: %s\n", txt[2]); + irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "rel_bw: %s\n", txt[3]); + irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "tc_ctx: %s\n", txt[4]); + irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "up2tc: %s\n", txt[5]); + irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "dscp_mode: %s\n", txt[6]); + + irdma_debug_buf(&rf->sc_dev, IRDMA_DEBUG_DCB, "l2params", l2params, sizeof(*l2params)); } /** * irdma_log_invalid_mtu - check mtu setting validity * @mtu: mtu value * @dev: hardware control device structure */ static void irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev) { if (mtu < IRDMA_MIN_MTU_IPV4) irdma_dev_warn(to_ibdev(dev), "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 576 for IPv4\n", mtu); else if (mtu < IRDMA_MIN_MTU_IPV6) irdma_dev_warn(to_ibdev(dev), "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 1280 for IPv6\\n", mtu); } +/** + * irdma_get_event_name - convert type enum to string + * @type: event type enum + */ +static const char * +irdma_get_event_name(enum ice_rdma_event_type type) +{ + switch (type) { + case ICE_RDMA_EVENT_LINK_CHANGE: + return "LINK CHANGE"; + case ICE_RDMA_EVENT_MTU_CHANGE: + return "MTU CHANGE"; + case ICE_RDMA_EVENT_TC_CHANGE: + return "TC CHANGE"; + case ICE_RDMA_EVENT_API_CHANGE: + return "API CHANGE"; + case ICE_RDMA_EVENT_CRIT_ERR: + return "CRITICAL ERROR"; + case ICE_RDMA_EVENT_RESET: + return "RESET"; + case ICE_RDMA_EVENT_QSET_REGISTER: + return "QSET REGISTER"; + case ICE_RDMA_EVENT_VSI_FILTER_UPDATE: + return "VSI FILTER UPDATE"; + default: + return "UNKNOWN"; + } +} + /** * irdma_event_handler - handling events from lan driver * @peer: the peer interface structure * @event: event info structure */ static void irdma_event_handler(struct ice_rdma_peer *peer, struct ice_rdma_event *event) { struct irdma_device *iwdev; struct irdma_l2params l2params = {}; printf("%s:%d event_handler %s (%x) on pf %d (%d)\n", __func__, __LINE__, - (event->type == 1) ? "LINK CHANGE" : - (event->type == 2) ? "MTU CHANGE" : - (event->type == 3) ? "TC CHANGE" : "UNKNOWN", + irdma_get_event_name(event->type), event->type, peer->pf_id, if_getdunit(peer->ifp)); iwdev = peer_to_iwdev(peer); if (!iwdev) { printf("%s:%d rdma device not found\n", __func__, __LINE__); return; } switch (event->type) { case ICE_RDMA_EVENT_LINK_CHANGE: printf("%s:%d PF: %x (%x), state: %d, speed: %lu\n", __func__, __LINE__, peer->pf_id, if_getdunit(peer->ifp), event->linkstate, event->baudrate); break; case ICE_RDMA_EVENT_MTU_CHANGE: if (iwdev->vsi.mtu != event->mtu) { l2params.mtu = event->mtu; l2params.mtu_changed = true; irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev); irdma_change_l2params(&iwdev->vsi, &l2params); } break; case ICE_RDMA_EVENT_TC_CHANGE: /* * 1. check if it is pre or post 2. check if it is currently being done */ if (event->prep == iwdev->vsi.tc_change_pending) { printf("%s:%d can't process %s TC change if TC change is %spending\n", __func__, __LINE__, event->prep ? "pre" : "post", event->prep ? " " : "not "); goto done; } + if (!atomic_inc_not_zero(&iwdev->rf->dev_ctx.event_rfcnt)) { + printf("%s:%d (%d) EVENT_TC_CHANGE received, but not processed %d\n", + __func__, __LINE__, if_getdunit(peer->ifp), + atomic_read(&iwdev->rf->dev_ctx.event_rfcnt)); + break; + } if (event->prep) { iwdev->vsi.tc_change_pending = true; irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_SUSPEND); wait_event_timeout(iwdev->suspend_wq, !atomic_read(&iwdev->vsi.qp_suspend_reqs), IRDMA_EVENT_TIMEOUT_MS * 10); irdma_ws_reset(&iwdev->vsi); printf("%s:%d TC change preparation done\n", __func__, __LINE__); } else { l2params.tc_changed = true; - irdma_get_qos_info(&l2params, &event->port_qos); + irdma_get_qos_info(iwdev->rf, &l2params, &event->port_qos); if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode; irdma_check_fc_for_tc_update(&iwdev->vsi, &l2params); irdma_change_l2params(&iwdev->vsi, &l2params); printf("%s:%d TC change done\n", __func__, __LINE__); } + atomic_dec(&iwdev->rf->dev_ctx.event_rfcnt); break; case ICE_RDMA_EVENT_CRIT_ERR: - printf("%s:%d event type received: %d\n", __func__, __LINE__, event->type); + if (event->oicr_reg & IRDMAPFINT_OICR_PE_CRITERR_M) { + u32 pe_criterr; + +#define IRDMA_Q1_RESOURCE_ERR 0x0001024d + pe_criterr = readl(iwdev->rf->sc_dev.hw_regs[IRDMA_GLPE_CRITERR]); + if (pe_criterr != IRDMA_Q1_RESOURCE_ERR) { + irdma_pr_err("critical PE Error, GLPE_CRITERR=0x%08x\n", + pe_criterr); + iwdev->rf->reset = true; + } else { + irdma_dev_warn(to_ibdev(&iwdev->rf->sc_dev), + "Q1 Resource Check\n"); + } + } + if (event->oicr_reg & IRDMAPFINT_OICR_HMC_ERR_M) { + irdma_pr_err("HMC Error\n"); + iwdev->rf->reset = true; + } + if (iwdev->rf->reset) + iwdev->rf->gen_ops.request_reset(iwdev->rf); + break; + case ICE_RDMA_EVENT_RESET: + iwdev->rf->reset = true; break; default: printf("%s:%d event type unsupported: %d\n", __func__, __LINE__, event->type); } done: return; } /** * irdma_link_change - Callback for link state change * @peer: the peer interface structure * @linkstate: state of the link * @baudrate: speed of the link */ static void irdma_link_change(struct ice_rdma_peer *peer, int linkstate, uint64_t baudrate) { printf("%s:%d PF: %x (%x), state: %d, speed: %lu\n", __func__, __LINE__, peer->pf_id, if_getdunit(peer->ifp), linkstate, baudrate); } /** * irdma_finalize_task - Finish open or close phase in a separate thread * @context: instance holding peer and iwdev information * * Triggered from irdma_open or irdma_close to perform rt_init_hw or * rt_deinit_hw respectively. Does registration and unregistration of * the device. */ static void irdma_finalize_task(void *context, int pending) { struct irdma_task_arg *task_arg = (struct irdma_task_arg *)context; struct irdma_device *iwdev = task_arg->iwdev; struct irdma_pci_f *rf = iwdev->rf; struct ice_rdma_peer *peer = task_arg->peer; struct irdma_l2params l2params = {{{0}}}; struct ice_rdma_request req = {0}; int status = 0; if (iwdev->iw_status) { irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT, "Starting deferred closing %d (%d)\n", rf->peer_info->pf_id, if_getdunit(peer->ifp)); + atomic_dec(&rf->dev_ctx.event_rfcnt); + wait_event_timeout(iwdev->suspend_wq, + !atomic_read(&rf->dev_ctx.event_rfcnt), + IRDMA_MAX_TIMEOUT); + if (atomic_read(&rf->dev_ctx.event_rfcnt) != 0) { + printf("%s:%d (%d) waiting for event_rfcnt (%d) timeout, proceed with unload\n", + __func__, __LINE__, if_getdunit(peer->ifp), + atomic_read(&rf->dev_ctx.event_rfcnt)); + } irdma_dereg_ipaddr_event_cb(rf); irdma_ib_unregister_device(iwdev); req.type = ICE_RDMA_EVENT_VSI_FILTER_UPDATE; req.enable_filter = false; IRDMA_DI_REQ_HANDLER(peer, &req); irdma_cleanup_dead_qps(&iwdev->vsi); irdma_rt_deinit_hw(iwdev); } else { irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT, "Starting deferred opening %d (%d)\n", rf->peer_info->pf_id, if_getdunit(peer->ifp)); - irdma_get_qos_info(&l2params, &peer->initial_qos_info); + irdma_get_qos_info(iwdev->rf, &l2params, &peer->initial_qos_info); if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode; l2params.mtu = peer->mtu; status = irdma_rt_init_hw(iwdev, &l2params); if (status) { irdma_pr_err("RT init failed %d\n", status); ib_dealloc_device(&iwdev->ibdev); return; } status = irdma_ib_register_device(iwdev); if (status) { irdma_pr_err("Registration failed %d\n", status); irdma_rt_deinit_hw(iwdev); ib_dealloc_device(&iwdev->ibdev); } + irdma_sw_stats_tunables_init(rf); req.type = ICE_RDMA_EVENT_VSI_FILTER_UPDATE; req.enable_filter = true; IRDMA_DI_REQ_HANDLER(peer, &req); irdma_reg_ipaddr_event_cb(rf); + atomic_inc(&rf->dev_ctx.event_rfcnt); irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT, "Deferred opening finished %d (%d)\n", rf->peer_info->pf_id, if_getdunit(peer->ifp)); } } -/** - * irdma_open - Callback for operation open for RDMA device - * @peer: the new peer interface structure - * - * Callback implementing the RDMA_OPEN function. Called by the ice driver to - * notify the RDMA client driver that a new device has been initialized. - */ -static int -irdma_open(struct ice_rdma_peer *peer) -{ - struct ice_rdma_event event = {0}; - - event.type = ICE_RDMA_EVENT_MTU_CHANGE; - event.mtu = peer->mtu; - - irdma_event_handler(peer, &event); - - return 0; -} - -/** - * irdma_close - Callback to notify that a peer device is down - * @peer: the RDMA peer device being stopped - * - * Callback implementing the RDMA_CLOSE function. Called by the ice driver to - * notify the RDMA client driver that a peer device is being stopped. - */ -static int -irdma_close(struct ice_rdma_peer *peer) -{ - /* - * This is called when ifconfig down. Keeping it for compatibility with ice. This event might be usefull for - * future. - */ - return 0; -} - /** * irdma_alloc_pcidev - allocate memory for pcidev and populate data * @peer: the new peer interface structure * @rf: RDMA PCI function */ static int irdma_alloc_pcidev(struct ice_rdma_peer *peer, struct irdma_pci_f *rf) { rf->pcidev = kzalloc(sizeof(struct pci_dev), GFP_KERNEL); if (!rf->pcidev) { return -ENOMEM; } if (linux_pci_attach_device(rf->dev_ctx.dev, NULL, NULL, rf->pcidev)) return -ENOMEM; return 0; } /** * irdma_dealloc_pcidev - deallocate memory for pcidev * @rf: RDMA PCI function */ static void irdma_dealloc_pcidev(struct irdma_pci_f *rf) { linux_pci_detach_device(rf->pcidev); kfree(rf->pcidev); } /** * irdma_fill_device_info - assign initial values to rf variables * @iwdev: irdma device * @peer: the peer interface structure */ static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_rdma_peer *peer) { struct irdma_pci_f *rf = iwdev->rf; rf->peer_info = peer; rf->gen_ops.register_qset = irdma_register_qset; rf->gen_ops.unregister_qset = irdma_unregister_qset; rf->rdma_ver = IRDMA_GEN_2; rf->sc_dev.hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_2; rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT; rf->rst_to = IRDMA_RST_TIMEOUT_HZ; rf->check_fc = irdma_check_fc_for_qp; rf->gen_ops.request_reset = irdma_request_reset; irdma_set_rf_user_cfg_params(rf); rf->default_vsi.vsi_idx = peer->pf_vsi_num; rf->dev_ctx.dev = peer->dev; rf->dev_ctx.mem_bus_space_tag = rman_get_bustag(peer->pci_mem); rf->dev_ctx.mem_bus_space_handle = rman_get_bushandle(peer->pci_mem); rf->dev_ctx.mem_bus_space_size = rman_get_size(peer->pci_mem); rf->hw.dev_context = &rf->dev_ctx; rf->hw.hw_addr = (u8 *)rman_get_virtual(peer->pci_mem); rf->msix_count = peer->msix.count; rf->msix_info.entry = peer->msix.base; rf->msix_info.vector = peer->msix.count; printf("%s:%d msix_info: %d %d %d\n", __func__, __LINE__, rf->msix_count, rf->msix_info.entry, rf->msix_info.vector); rf->iwdev = iwdev; iwdev->netdev = peer->ifp; iwdev->init_state = INITIAL_STATE; iwdev->vsi_num = peer->pf_vsi_num; iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED; iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE; iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT; iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT; iwdev->roce_rtomin = 5; if (rf->protocol_used == IRDMA_ROCE_PROTOCOL_ONLY) { iwdev->roce_mode = true; } } /** * irdma_probe - Callback to probe a new RDMA peer device * @peer: the new peer interface structure * * Callback implementing the RDMA_PROBE function. Called by the ice driver to * notify the RDMA client driver that a new device has been created */ static int irdma_probe(struct ice_rdma_peer *peer) { struct irdma_device *iwdev; struct irdma_pci_f *rf; struct irdma_handler *hdl; int err = 0; irdma_pr_info("probe: irdma-%s peer=%p, peer->pf_id=%d, peer->ifp=%p, peer->ifp->if_dunit=%d, peer->pci_mem->r_bustag=%p\n", irdma_driver_version, peer, peer->pf_id, peer->ifp, if_getdunit(peer->ifp), (void *)(uintptr_t)peer->pci_mem->r_bustag); hdl = irdma_find_handler(peer); if (hdl) return -EBUSY; hdl = kzalloc(sizeof(*hdl), GFP_KERNEL); if (!hdl) return -ENOMEM; iwdev = (struct irdma_device *)ib_alloc_device(sizeof(*iwdev)); if (!iwdev) { kfree(hdl); return -ENOMEM; } iwdev->rf = kzalloc(sizeof(*rf), GFP_KERNEL); if (!iwdev->rf) { ib_dealloc_device(&iwdev->ibdev); kfree(hdl); return -ENOMEM; } hdl->iwdev = iwdev; iwdev->hdl = hdl; irdma_init_tunable(iwdev->rf, if_getdunit(peer->ifp)); irdma_fill_device_info(iwdev, peer); rf = iwdev->rf; if (irdma_alloc_pcidev(peer, rf)) goto err_pcidev; irdma_add_handler(hdl); if (irdma_ctrl_init_hw(rf)) { err = -EIO; goto err_ctrl_init; } rf->dev_ctx.task_arg.peer = peer; rf->dev_ctx.task_arg.iwdev = iwdev; rf->dev_ctx.task_arg.peer = peer; TASK_INIT(&hdl->deferred_task, 0, irdma_finalize_task, &rf->dev_ctx.task_arg); hdl->deferred_tq = taskqueue_create_fast("irdma_defer", M_NOWAIT, taskqueue_thread_enqueue, &hdl->deferred_tq); taskqueue_start_threads(&hdl->deferred_tq, 1, PI_NET, "irdma_defer_t"); taskqueue_enqueue(hdl->deferred_tq, &hdl->deferred_task); return 0; err_ctrl_init: irdma_del_handler(hdl); irdma_dealloc_pcidev(rf); err_pcidev: kfree(iwdev->rf); ib_dealloc_device(&iwdev->ibdev); kfree(hdl); return err; } /** * irdma_remove - Callback to remove an RDMA peer device * @peer: the new peer interface structure * * Callback implementing the RDMA_REMOVE function. Called by the ice driver to * notify the RDMA client driver that the device wille be delated */ static int irdma_remove(struct ice_rdma_peer *peer) { struct irdma_handler *hdl; struct irdma_device *iwdev; irdma_debug((struct irdma_sc_dev *)NULL, IRDMA_DEBUG_INIT, "removing %s irdma%d\n", __func__, if_getdunit(peer->ifp)); hdl = irdma_find_handler(peer); if (!hdl) return 0; iwdev = hdl->iwdev; if (iwdev->vsi.tc_change_pending) { iwdev->vsi.tc_change_pending = false; irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_RESUME); } taskqueue_enqueue(hdl->deferred_tq, &hdl->deferred_task); taskqueue_drain(hdl->deferred_tq, &hdl->deferred_task); taskqueue_free(hdl->deferred_tq); hdl->iwdev->rf->dev_ctx.task_arg.iwdev = NULL; hdl->iwdev->rf->dev_ctx.task_arg.peer = NULL; sysctl_ctx_free(&iwdev->rf->tun_info.irdma_sysctl_ctx); hdl->iwdev->rf->tun_info.irdma_sysctl_tree = NULL; + hdl->iwdev->rf->tun_info.sws_sysctl_tree = NULL; irdma_ctrl_deinit_hw(iwdev->rf); irdma_dealloc_pcidev(iwdev->rf); irdma_del_handler(iwdev->hdl); kfree(iwdev->hdl); kfree(iwdev->rf); ib_dealloc_device(&iwdev->ibdev); irdma_pr_info("IRDMA hardware deinitialization complete irdma%d\n", if_getdunit(peer->ifp)); return 0; } +/** + * irdma_open - Callback for operation open for RDMA device + * @peer: the new peer interface structure + * + * Callback implementing the RDMA_OPEN function. Called by the ice driver to + * notify the RDMA client driver that a new device has been initialized. + */ +static int +irdma_open(struct ice_rdma_peer *peer) +{ + struct irdma_device *iwdev; + struct ice_rdma_event event = {0}; + + iwdev = peer_to_iwdev(peer); + if (iwdev) { + event.type = ICE_RDMA_EVENT_MTU_CHANGE; + event.mtu = peer->mtu; + + irdma_event_handler(peer, &event); + } else { + irdma_probe(peer); + } + + return 0; +} + +/** + * irdma_close - Callback to notify that a peer device is down + * @peer: the RDMA peer device being stopped + * + * Callback implementing the RDMA_CLOSE function. Called by the ice driver to + * notify the RDMA client driver that a peer device is being stopped. + */ +static int +irdma_close(struct ice_rdma_peer *peer) +{ + /* + * This is called when ifconfig down or pf-reset is about to happen. + */ + struct irdma_device *iwdev; + + iwdev = peer_to_iwdev(peer); + if (iwdev && iwdev->rf->reset) + irdma_remove(peer); + + return 0; +} + /** * irdma_prep_for_unregister - ensure the driver is ready to unregister */ static void irdma_prep_for_unregister(void) { struct irdma_handler *hdl; unsigned long flags; bool hdl_valid; do { hdl_valid = false; spin_lock_irqsave(&irdma_handler_lock, flags); list_for_each_entry(hdl, &irdma_handlers, list) { - if (!hdl) - continue; if (!hdl->iwdev->rf->peer_info) continue; hdl_valid = true; break; } spin_unlock_irqrestore(&irdma_handler_lock, flags); if (!hdl || !hdl_valid) break; IRDMA_CLOSE(hdl->iwdev->rf->peer_info); IRDMA_REMOVE(hdl->iwdev->rf->peer_info); } while (1); } static kobj_method_t irdma_methods[] = { KOBJMETHOD(irdma_probe, irdma_probe), KOBJMETHOD(irdma_open, irdma_open), KOBJMETHOD(irdma_close, irdma_close), KOBJMETHOD(irdma_remove, irdma_remove), KOBJMETHOD(irdma_link_change, irdma_link_change), KOBJMETHOD(irdma_event_handler, irdma_event_handler), KOBJMETHOD_END }; /* declare irdma_class which extends the ice_rdma_di class */ DEFINE_CLASS_1(irdma, irdma_class, irdma_methods, sizeof(struct ice_rdma_peer), ice_rdma_di_class); static struct ice_rdma_info irdma_info = { .major_version = ICE_RDMA_MAJOR_VERSION, .minor_version = ICE_RDMA_MINOR_VERSION, .patch_version = ICE_RDMA_PATCH_VERSION, .rdma_class = &irdma_class, }; /** * irdma_module_event_handler - Module event handler callback * @mod: unused mod argument * @what: the module event to handle * @arg: unused module event argument * * Callback used by the FreeBSD module stack to notify the driver of module * events. Used to implement custom handling for certain module events such as * load and unload. */ static int irdma_module_event_handler(module_t __unused mod, int what, void __unused * arg) { switch (what) { case MOD_LOAD: printf("Loading irdma module\n"); return ice_rdma_register(&irdma_info); case MOD_UNLOAD: printf("Unloading irdma module\n"); irdma_prep_for_unregister(); ice_rdma_unregister(); return (0); default: return (EOPNOTSUPP); } return (0); } static moduledata_t irdma_moduledata = { "irdma", irdma_module_event_handler, NULL }; DECLARE_MODULE(irdma, irdma_moduledata, SI_SUB_LAST, SI_ORDER_ANY); MODULE_VERSION(irdma, 1); MODULE_DEPEND(irdma, ice, 1, 1, 1); MODULE_DEPEND(irdma, ibcore, 1, 1, 1); diff --git a/sys/dev/irdma/irdma.h b/sys/dev/irdma/irdma.h index d30bf5f20955..39f406180d34 100644 --- a/sys/dev/irdma/irdma.h +++ b/sys/dev/irdma/irdma.h @@ -1,233 +1,233 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2017 - 2022 Intel Corporation + * Copyright (c) 2017 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef IRDMA_H #define IRDMA_H #define RDMA_BIT2(type, a) ((u##type) 1UL << a) #define RDMA_MASK3(type, mask, shift) ((u##type) mask << shift) #define MAKEMASK(m, s) ((m) << (s)) #define IRDMA_WQEALLOC_WQE_DESC_INDEX_S 20 #define IRDMA_WQEALLOC_WQE_DESC_INDEX GENMASK(31, 20) #define IRDMA_CQPTAIL_WQTAIL_S 0 #define IRDMA_CQPTAIL_WQTAIL GENMASK(10, 0) #define IRDMA_CQPTAIL_CQP_OP_ERR_S 31 #define IRDMA_CQPTAIL_CQP_OP_ERR BIT(31) #define IRDMA_CQPERRCODES_CQP_MINOR_CODE_S 0 #define IRDMA_CQPERRCODES_CQP_MINOR_CODE GENMASK(15, 0) #define IRDMA_CQPERRCODES_CQP_MAJOR_CODE_S 16 #define IRDMA_CQPERRCODES_CQP_MAJOR_CODE GENMASK(31, 16) #define IRDMA_GLPCI_LBARCTRL_PE_DB_SIZE_S 4 #define IRDMA_GLPCI_LBARCTRL_PE_DB_SIZE GENMASK(5, 4) #define IRDMA_GLINT_RATE_INTERVAL_S 0 #define IRDMA_GLINT_RATE_INTERVAL GENMASK(4, 0) #define IRDMA_GLINT_RATE_INTRL_ENA_S 6 #define IRDMA_GLINT_RATE_INTRL_ENA_M BIT(6) #define IRDMA_GLINT_RATE_INTRL_ENA BIT(6) #define IRDMA_GLINT_DYN_CTL_INTENA_S 0 #define IRDMA_GLINT_DYN_CTL_INTENA BIT(0) #define IRDMA_GLINT_DYN_CTL_CLEARPBA_S 1 #define IRDMA_GLINT_DYN_CTL_CLEARPBA BIT(1) #define IRDMA_GLINT_DYN_CTL_ITR_INDX_S 3 #define IRDMA_GLINT_DYN_CTL_ITR_INDX GENMASK(4, 3) #define IRDMA_GLINT_DYN_CTL_INTERVAL_S 5 #define IRDMA_GLINT_DYN_CTL_INTERVAL GENMASK(16, 5) #define IRDMA_GLINT_CEQCTL_ITR_INDX_S 11 #define IRDMA_GLINT_CEQCTL_ITR_INDX GENMASK(12, 11) #define IRDMA_GLINT_CEQCTL_CAUSE_ENA_S 30 #define IRDMA_GLINT_CEQCTL_CAUSE_ENA BIT(30) #define IRDMA_GLINT_CEQCTL_MSIX_INDX_S 0 #define IRDMA_GLINT_CEQCTL_MSIX_INDX GENMASK(10, 0) #define IRDMA_PFINT_AEQCTL_MSIX_INDX_S 0 #define IRDMA_PFINT_AEQCTL_MSIX_INDX GENMASK(10, 0) #define IRDMA_PFINT_AEQCTL_ITR_INDX_S 11 #define IRDMA_PFINT_AEQCTL_ITR_INDX GENMASK(12, 11) #define IRDMA_PFINT_AEQCTL_CAUSE_ENA_S 30 #define IRDMA_PFINT_AEQCTL_CAUSE_ENA BIT(30) #define IRDMA_PFHMC_PDINV_PMSDIDX_S 0 #define IRDMA_PFHMC_PDINV_PMSDIDX GENMASK(11, 0) #define IRDMA_PFHMC_PDINV_PMSDPARTSEL_S 15 #define IRDMA_PFHMC_PDINV_PMSDPARTSEL BIT(15) #define IRDMA_PFHMC_PDINV_PMPDIDX_S 16 #define IRDMA_PFHMC_PDINV_PMPDIDX GENMASK(24, 16) #define IRDMA_PFHMC_SDDATALOW_PMSDVALID_S 0 #define IRDMA_PFHMC_SDDATALOW_PMSDVALID BIT(0) #define IRDMA_PFHMC_SDDATALOW_PMSDTYPE_S 1 #define IRDMA_PFHMC_SDDATALOW_PMSDTYPE BIT(1) #define IRDMA_PFHMC_SDDATALOW_PMSDBPCOUNT_S 2 #define IRDMA_PFHMC_SDDATALOW_PMSDBPCOUNT GENMASK(11, 2) #define IRDMA_PFHMC_SDDATALOW_PMSDDATALOW_S 12 #define IRDMA_PFHMC_SDDATALOW_PMSDDATALOW GENMASK(31, 12) #define IRDMA_PFHMC_SDCMD_PMSDWR_S 31 #define IRDMA_PFHMC_SDCMD_PMSDWR BIT(31) #define IRDMA_PFHMC_SDCMD_PMSDPARTSEL_S 15 #define IRDMA_PFHMC_SDCMD_PMSDPARTSEL BIT(15) #define IRDMA_INVALID_CQ_IDX 0xffffffff enum irdma_dyn_idx_t { IRDMA_IDX_ITR0 = 0, IRDMA_IDX_ITR1 = 1, IRDMA_IDX_ITR2 = 2, IRDMA_IDX_NOITR = 3, }; enum irdma_registers { IRDMA_CQPTAIL, IRDMA_CQPDB, IRDMA_CCQPSTATUS, IRDMA_CCQPHIGH, IRDMA_CCQPLOW, IRDMA_CQARM, IRDMA_CQACK, IRDMA_AEQALLOC, IRDMA_CQPERRCODES, IRDMA_WQEALLOC, IRDMA_GLINT_DYN_CTL, IRDMA_DB_ADDR_OFFSET, IRDMA_GLPCI_LBARCTRL, IRDMA_GLPE_CPUSTATUS0, IRDMA_GLPE_CPUSTATUS1, IRDMA_GLPE_CPUSTATUS2, IRDMA_PFINT_AEQCTL, IRDMA_GLINT_CEQCTL, IRDMA_VSIQF_PE_CTL1, IRDMA_PFHMC_PDINV, IRDMA_GLHMC_VFPDINV, IRDMA_GLPE_CRITERR, IRDMA_GLINT_RATE, IRDMA_MAX_REGS, /* Must be last entry */ }; enum irdma_shifts { IRDMA_CCQPSTATUS_CCQP_DONE_S, IRDMA_CCQPSTATUS_CCQP_ERR_S, IRDMA_CQPSQ_STAG_PDID_S, IRDMA_CQPSQ_CQ_CEQID_S, IRDMA_CQPSQ_CQ_CQID_S, IRDMA_COMMIT_FPM_CQCNT_S, IRDMA_CQPSQ_UPESD_HMCFNID_S, IRDMA_MAX_SHIFTS, }; enum irdma_masks { IRDMA_CCQPSTATUS_CCQP_DONE_M, IRDMA_CCQPSTATUS_CCQP_ERR_M, IRDMA_CQPSQ_STAG_PDID_M, IRDMA_CQPSQ_CQ_CEQID_M, IRDMA_CQPSQ_CQ_CQID_M, IRDMA_COMMIT_FPM_CQCNT_M, IRDMA_CQPSQ_UPESD_HMCFNID_M, IRDMA_MAX_MASKS, /* Must be last entry */ }; #define IRDMA_MAX_MGS_PER_CTX 8 struct irdma_mcast_grp_ctx_entry_info { u32 qp_id; bool valid_entry; u16 dest_port; u32 use_cnt; }; struct irdma_mcast_grp_info { - u8 dest_mac_addr[ETH_ALEN]; + u8 dest_mac_addr[ETHER_ADDR_LEN]; u16 vlan_id; u16 hmc_fcn_id; bool ipv4_valid:1; bool vlan_valid:1; u16 mg_id; u32 no_of_mgs; u32 dest_ip_addr[4]; u16 qs_handle; struct irdma_dma_mem dma_mem_mc; struct irdma_mcast_grp_ctx_entry_info mg_ctx_info[IRDMA_MAX_MGS_PER_CTX]; }; enum irdma_vers { IRDMA_GEN_RSVD = 0, IRDMA_GEN_1 = 1, IRDMA_GEN_2 = 2, IRDMA_GEN_MAX = 2, }; struct irdma_uk_attrs { u64 feature_flags; u32 max_hw_wq_frags; u32 max_hw_read_sges; u32 max_hw_inline; u32 max_hw_rq_quanta; u32 max_hw_wq_quanta; u32 min_hw_cq_size; u32 max_hw_cq_size; u16 max_hw_sq_chunk; u16 min_hw_wq_size; u8 hw_rev; }; struct irdma_hw_attrs { struct irdma_uk_attrs uk_attrs; u64 max_hw_outbound_msg_size; u64 max_hw_inbound_msg_size; u64 max_mr_size; u64 page_size_cap; u32 min_hw_qp_id; u32 min_hw_aeq_size; u32 max_hw_aeq_size; u32 min_hw_ceq_size; u32 max_hw_ceq_size; u32 max_hw_device_pages; u32 max_hw_vf_fpm_id; u32 first_hw_vf_fpm_id; u32 max_hw_ird; u32 max_hw_ord; u32 max_hw_wqes; u32 max_hw_pds; u32 max_hw_ena_vf_count; u32 max_qp_wr; u32 max_pe_ready_count; u32 max_done_count; u32 max_sleep_count; u32 max_cqp_compl_wait_time_ms; u16 max_stat_inst; u16 max_stat_idx; }; void icrdma_init_hw(struct irdma_sc_dev *dev); void irdma_check_fc_for_qp(struct irdma_sc_vsi *vsi, struct irdma_sc_qp *sc_qp); #endif /* IRDMA_H*/ diff --git a/sys/dev/irdma/irdma_cm.c b/sys/dev/irdma/irdma_cm.c index b4a26090ce92..48aefda681cb 100644 --- a/sys/dev/irdma/irdma_cm.c +++ b/sys/dev/irdma/irdma_cm.c @@ -1,4249 +1,4260 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "irdma_main.h" static void irdma_cm_post_event(struct irdma_cm_event *event); static void irdma_disconnect_worker(struct work_struct *work); /** * irdma_free_sqbuf - put back puda buffer if refcount is 0 * @vsi: The VSI structure of the device * @bufp: puda buffer to free */ void irdma_free_sqbuf(struct irdma_sc_vsi *vsi, void *bufp) { struct irdma_puda_buf *buf = bufp; struct irdma_puda_rsrc *ilq = vsi->ilq; if (atomic_dec_and_test(&buf->refcount)) irdma_puda_ret_bufpool(ilq, buf); } /** * irdma_record_ird_ord - Record IRD/ORD passed in * @cm_node: connection's node * @conn_ird: connection IRD * @conn_ord: connection ORD */ static void irdma_record_ird_ord(struct irdma_cm_node *cm_node, u32 conn_ird, u32 conn_ord) { if (conn_ird > cm_node->dev->hw_attrs.max_hw_ird) conn_ird = cm_node->dev->hw_attrs.max_hw_ird; if (conn_ord > cm_node->dev->hw_attrs.max_hw_ord) conn_ord = cm_node->dev->hw_attrs.max_hw_ord; else if (!conn_ord && cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO) conn_ord = 1; cm_node->ird_size = conn_ird; cm_node->ord_size = conn_ord; } /** * irdma_copy_ip_ntohl - copy IP address from network to host * @dst: IP address in host order * @src: IP address in network order (big endian) */ void irdma_copy_ip_ntohl(u32 *dst, __be32 *src) { *dst++ = ntohl(*src++); *dst++ = ntohl(*src++); *dst++ = ntohl(*src++); *dst = ntohl(*src); } /** * irdma_copy_ip_htonl - copy IP address from host to network order * @dst: IP address in network order (big endian) * @src: IP address in host order */ void irdma_copy_ip_htonl(__be32 *dst, u32 *src) { *dst++ = htonl(*src++); *dst++ = htonl(*src++); *dst++ = htonl(*src++); *dst = htonl(*src); } /** * irdma_get_addr_info * @cm_node: contains ip/tcp info * @cm_info: to get a copy of the cm_node ip/tcp info */ static void irdma_get_addr_info(struct irdma_cm_node *cm_node, struct irdma_cm_info *cm_info) { memset(cm_info, 0, sizeof(*cm_info)); cm_info->ipv4 = cm_node->ipv4; cm_info->vlan_id = cm_node->vlan_id; memcpy(cm_info->loc_addr, cm_node->loc_addr, sizeof(cm_info->loc_addr)); memcpy(cm_info->rem_addr, cm_node->rem_addr, sizeof(cm_info->rem_addr)); cm_info->loc_port = cm_node->loc_port; cm_info->rem_port = cm_node->rem_port; } /** * irdma_fill_sockaddr4 - fill in addr info for IPv4 connection * @cm_node: connection's node * @event: upper layer's cm event */ static inline void irdma_fill_sockaddr4(struct irdma_cm_node *cm_node, struct iw_cm_event *event) { struct sockaddr_in *laddr = (struct sockaddr_in *)&event->local_addr; struct sockaddr_in *raddr = (struct sockaddr_in *)&event->remote_addr; laddr->sin_family = AF_INET; raddr->sin_family = AF_INET; laddr->sin_port = htons(cm_node->loc_port); raddr->sin_port = htons(cm_node->rem_port); laddr->sin_addr.s_addr = htonl(cm_node->loc_addr[0]); raddr->sin_addr.s_addr = htonl(cm_node->rem_addr[0]); } /** * irdma_fill_sockaddr6 - fill in addr info for IPv6 connection * @cm_node: connection's node * @event: upper layer's cm event */ static inline void irdma_fill_sockaddr6(struct irdma_cm_node *cm_node, struct iw_cm_event *event) { struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&event->local_addr; struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)&event->remote_addr; laddr6->sin6_family = AF_INET6; raddr6->sin6_family = AF_INET6; laddr6->sin6_port = htons(cm_node->loc_port); raddr6->sin6_port = htons(cm_node->rem_port); irdma_copy_ip_htonl(laddr6->sin6_addr.__u6_addr.__u6_addr32, cm_node->loc_addr); irdma_copy_ip_htonl(raddr6->sin6_addr.__u6_addr.__u6_addr32, cm_node->rem_addr); } /** * irdma_get_cmevent_info - for cm event upcall * @cm_node: connection's node * @cm_id: upper layers cm struct for the event * @event: upper layer's cm event */ static inline void irdma_get_cmevent_info(struct irdma_cm_node *cm_node, struct iw_cm_id *cm_id, struct iw_cm_event *event) { memcpy(&event->local_addr, &cm_id->m_local_addr, sizeof(event->local_addr)); memcpy(&event->remote_addr, &cm_id->m_remote_addr, sizeof(event->remote_addr)); if (cm_node) { event->private_data = cm_node->pdata_buf; event->private_data_len = (u8)cm_node->pdata.size; event->ird = cm_node->ird_size; event->ord = cm_node->ord_size; } } /** * irdma_send_cm_event - upcall cm's event handler * @cm_node: connection's node * @cm_id: upper layer's cm info struct * @type: Event type to indicate * @status: status for the event type */ static int irdma_send_cm_event(struct irdma_cm_node *cm_node, struct iw_cm_id *cm_id, enum iw_cm_event_type type, int status) { struct iw_cm_event event = {0}; event.event = type; event.status = status; irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "cm_node %p cm_id=%p state=%d accel=%d event_type=%d status=%d\n", cm_node, cm_id, cm_node->accelerated, cm_node->state, type, status); switch (type) { case IW_CM_EVENT_CONNECT_REQUEST: if (cm_node->ipv4) irdma_fill_sockaddr4(cm_node, &event); else irdma_fill_sockaddr6(cm_node, &event); event.provider_data = cm_node; event.private_data = cm_node->pdata_buf; event.private_data_len = (u8)cm_node->pdata.size; event.ird = cm_node->ird_size; break; case IW_CM_EVENT_CONNECT_REPLY: irdma_get_cmevent_info(cm_node, cm_id, &event); break; case IW_CM_EVENT_ESTABLISHED: event.ird = cm_node->ird_size; event.ord = cm_node->ord_size; break; case IW_CM_EVENT_DISCONNECT: case IW_CM_EVENT_CLOSE: /* Wait if we are in RTS but havent issued the iwcm event upcall */ if (!cm_node->accelerated) wait_for_completion(&cm_node->establish_comp); break; default: return -EINVAL; } return cm_id->event_handler(cm_id, &event); } /** * irdma_timer_list_prep - add connection nodes to a list to perform timer tasks * @cm_core: cm's core * @timer_list: a timer list to which cm_node will be selected */ static void irdma_timer_list_prep(struct irdma_cm_core *cm_core, struct list_head *timer_list) { struct irdma_cm_node *cm_node; int bkt; HASH_FOR_EACH_RCU(cm_core->cm_hash_tbl, bkt, cm_node, list) { if ((cm_node->close_entry || cm_node->send_entry) && atomic_inc_not_zero(&cm_node->refcnt)) list_add(&cm_node->timer_entry, timer_list); } } /** * irdma_create_event - create cm event * @cm_node: connection's node * @type: Event type to generate */ static struct irdma_cm_event * irdma_create_event(struct irdma_cm_node *cm_node, enum irdma_cm_event_type type) { struct irdma_cm_event *event; if (!cm_node->cm_id) return NULL; event = kzalloc(sizeof(*event), GFP_ATOMIC); if (!event) return NULL; event->type = type; event->cm_node = cm_node; memcpy(event->cm_info.rem_addr, cm_node->rem_addr, sizeof(event->cm_info.rem_addr)); memcpy(event->cm_info.loc_addr, cm_node->loc_addr, sizeof(event->cm_info.loc_addr)); event->cm_info.rem_port = cm_node->rem_port; event->cm_info.loc_port = cm_node->loc_port; event->cm_info.cm_id = cm_node->cm_id; irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "node=%p event=%p type=%u dst=%pI4 src=%pI4\n", - cm_node, - event, type, event->cm_info.loc_addr, - event->cm_info.rem_addr); + "node=%p event=%p type=%u dst=%x src=%x\n", cm_node, event, + type, event->cm_info.loc_addr[0], + event->cm_info.rem_addr[0]); irdma_cm_post_event(event); return event; } /** * irdma_free_retrans_entry - free send entry * @cm_node: connection's node */ static void irdma_free_retrans_entry(struct irdma_cm_node *cm_node) { struct irdma_device *iwdev = cm_node->iwdev; struct irdma_timer_entry *send_entry; send_entry = cm_node->send_entry; if (!send_entry) return; cm_node->send_entry = NULL; irdma_free_sqbuf(&iwdev->vsi, send_entry->sqbuf); kfree(send_entry); atomic_dec(&cm_node->refcnt); } /** * irdma_cleanup_retrans_entry - free send entry with lock * @cm_node: connection's node */ static void irdma_cleanup_retrans_entry(struct irdma_cm_node *cm_node) { unsigned long flags; spin_lock_irqsave(&cm_node->retrans_list_lock, flags); irdma_free_retrans_entry(cm_node); spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); } /** * irdma_form_ah_cm_frame - get a free packet and build frame with address handle * @cm_node: connection's node ionfo to use in frame * @options: pointer to options info * @hdr: pointer mpa header * @pdata: pointer to private data * @flags: indicates FIN or ACK */ static struct irdma_puda_buf * irdma_form_ah_cm_frame(struct irdma_cm_node *cm_node, struct irdma_kmem_info *options, struct irdma_kmem_info *hdr, struct irdma_mpa_priv_info *pdata, u8 flags) { struct irdma_puda_buf *sqbuf; struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi; u8 *buf; struct tcphdr *tcph; u16 pktsize; u32 opts_len = 0; u32 pd_len = 0; u32 hdr_len = 0; if (!cm_node->ah || !cm_node->ah->ah_info.ah_valid) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "AH invalid\n"); return NULL; } sqbuf = irdma_puda_get_bufpool(vsi->ilq); if (!sqbuf) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "SQ buf NULL\n"); return NULL; } sqbuf->ah_id = cm_node->ah->ah_info.ah_idx; buf = sqbuf->mem.va; if (options) opts_len = (u32)options->size; if (hdr) hdr_len = hdr->size; if (pdata) pd_len = pdata->size; pktsize = sizeof(*tcph) + opts_len + hdr_len + pd_len; memset(buf, 0, pktsize); sqbuf->totallen = pktsize; sqbuf->tcphlen = sizeof(*tcph) + opts_len; sqbuf->scratch = cm_node; tcph = (struct tcphdr *)buf; buf += sizeof(*tcph); tcph->th_sport = htons(cm_node->loc_port); tcph->th_dport = htons(cm_node->rem_port); tcph->th_seq = htonl(cm_node->tcp_cntxt.loc_seq_num); if (flags & SET_ACK) { cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt; tcph->th_ack = htonl(cm_node->tcp_cntxt.loc_ack_num); tcph->th_flags |= TH_ACK; } else { tcph->th_ack = 0; } if (flags & SET_SYN) { cm_node->tcp_cntxt.loc_seq_num++; tcph->th_flags |= TH_SYN; } else { cm_node->tcp_cntxt.loc_seq_num += hdr_len + pd_len; } if (flags & SET_FIN) { cm_node->tcp_cntxt.loc_seq_num++; tcph->th_flags |= TH_FIN; } if (flags & SET_RST) tcph->th_flags |= TH_RST; tcph->th_off = (u16)((sizeof(*tcph) + opts_len + 3) >> 2); sqbuf->tcphlen = tcph->th_off << 2; tcph->th_win = htons(cm_node->tcp_cntxt.rcv_wnd); tcph->th_urp = 0; if (opts_len) { memcpy(buf, options->addr, opts_len); buf += opts_len; } if (hdr_len) { memcpy(buf, hdr->addr, hdr_len); buf += hdr_len; } if (pdata && pdata->addr) memcpy(buf, pdata->addr, pdata->size); atomic_set(&sqbuf->refcount, 1); irdma_debug_buf(vsi->dev, IRDMA_DEBUG_ILQ, "TRANSMIT ILQ BUFFER", sqbuf->mem.va, sqbuf->totallen); return sqbuf; } /** * irdma_form_uda_cm_frame - get a free packet and build frame full tcpip packet * @cm_node: connection's node ionfo to use in frame * @options: pointer to options info * @hdr: pointer mpa header * @pdata: pointer to private data * @flags: indicates FIN or ACK */ static struct irdma_puda_buf * irdma_form_uda_cm_frame(struct irdma_cm_node *cm_node, struct irdma_kmem_info *options, struct irdma_kmem_info *hdr, struct irdma_mpa_priv_info *pdata, u8 flags) { struct irdma_puda_buf *sqbuf; struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi; u8 *buf; struct tcphdr *tcph; struct ip *iph; struct ip6_hdr *ip6h; struct ether_header *ethh; u16 pktsize; u16 eth_hlen = ETH_HLEN; u32 opts_len = 0; u32 pd_len = 0; u32 hdr_len = 0; u16 vtag; sqbuf = irdma_puda_get_bufpool(vsi->ilq); if (!sqbuf) return NULL; buf = sqbuf->mem.va; if (options) opts_len = (u32)options->size; if (hdr) hdr_len = hdr->size; if (pdata) pd_len = pdata->size; if (cm_node->vlan_id < VLAN_N_VID) eth_hlen += 4; if (cm_node->ipv4) pktsize = sizeof(*iph) + sizeof(*tcph); else pktsize = sizeof(*ip6h) + sizeof(*tcph); pktsize += opts_len + hdr_len + pd_len; memset(buf, 0, eth_hlen + pktsize); sqbuf->totallen = pktsize + eth_hlen; sqbuf->maclen = eth_hlen; sqbuf->tcphlen = sizeof(*tcph) + opts_len; sqbuf->scratch = cm_node; ethh = (struct ether_header *)buf; buf += eth_hlen; if (cm_node->do_lpb) sqbuf->do_lpb = true; if (cm_node->ipv4) { sqbuf->ipv4 = true; iph = (struct ip *)buf; buf += sizeof(*iph); tcph = (struct tcphdr *)buf; buf += sizeof(*tcph); ether_addr_copy(ethh->ether_dhost, cm_node->rem_mac); ether_addr_copy(ethh->ether_shost, cm_node->loc_mac); if (cm_node->vlan_id < VLAN_N_VID) { ((struct ether_vlan_header *)ethh)->evl_proto = htons(ETH_P_8021Q); vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id; ((struct ether_vlan_header *)ethh)->evl_tag = htons(vtag); ((struct ether_vlan_header *)ethh)->evl_encap_proto = htons(ETH_P_IP); } else { ethh->ether_type = htons(ETH_P_IP); } iph->ip_v = IPVERSION; iph->ip_hl = 5; /* 5 * 4Byte words, IP headr len */ iph->ip_tos = cm_node->tos; iph->ip_len = htons(pktsize); iph->ip_id = htons(++cm_node->tcp_cntxt.loc_id); iph->ip_off = htons(0x4000); iph->ip_ttl = 0x40; iph->ip_p = IPPROTO_TCP; iph->ip_src.s_addr = htonl(cm_node->loc_addr[0]); iph->ip_dst.s_addr = htonl(cm_node->rem_addr[0]); } else { sqbuf->ipv4 = false; ip6h = (struct ip6_hdr *)buf; buf += sizeof(*ip6h); tcph = (struct tcphdr *)buf; buf += sizeof(*tcph); ether_addr_copy(ethh->ether_dhost, cm_node->rem_mac); ether_addr_copy(ethh->ether_shost, cm_node->loc_mac); if (cm_node->vlan_id < VLAN_N_VID) { ((struct ether_vlan_header *)ethh)->evl_proto = htons(ETH_P_8021Q); vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id; ((struct ether_vlan_header *)ethh)->evl_tag = htons(vtag); ((struct ether_vlan_header *)ethh)->evl_encap_proto = htons(ETH_P_IPV6); } else { ethh->ether_type = htons(ETH_P_IPV6); } ip6h->ip6_vfc = 6 << 4; ip6h->ip6_vfc |= cm_node->tos >> 4; ip6h->ip6_flow = cm_node->tos << 20; ip6h->ip6_plen = htons(pktsize - sizeof(*ip6h)); ip6h->ip6_nxt = 6; ip6h->ip6_hops = 128; irdma_copy_ip_htonl(ip6h->ip6_src.__u6_addr.__u6_addr32, cm_node->loc_addr); irdma_copy_ip_htonl(ip6h->ip6_dst.__u6_addr.__u6_addr32, cm_node->rem_addr); } tcph->th_sport = htons(cm_node->loc_port); tcph->th_dport = htons(cm_node->rem_port); tcph->th_seq = htonl(cm_node->tcp_cntxt.loc_seq_num); if (flags & SET_ACK) { cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt; tcph->th_ack = htonl(cm_node->tcp_cntxt.loc_ack_num); tcph->th_flags |= TH_ACK; } else { tcph->th_ack = 0; } if (flags & SET_SYN) { cm_node->tcp_cntxt.loc_seq_num++; tcph->th_flags |= TH_SYN; } else { cm_node->tcp_cntxt.loc_seq_num += hdr_len + pd_len; } if (flags & SET_FIN) { cm_node->tcp_cntxt.loc_seq_num++; tcph->th_flags |= TH_FIN; } if (flags & SET_RST) tcph->th_flags |= TH_RST; tcph->th_off = (u16)((sizeof(*tcph) + opts_len + 3) >> 2); sqbuf->tcphlen = tcph->th_off << 2; tcph->th_win = htons(cm_node->tcp_cntxt.rcv_wnd); tcph->th_urp = 0; if (opts_len) { memcpy(buf, options->addr, opts_len); buf += opts_len; } if (hdr_len) { memcpy(buf, hdr->addr, hdr_len); buf += hdr_len; } if (pdata && pdata->addr) memcpy(buf, pdata->addr, pdata->size); atomic_set(&sqbuf->refcount, 1); irdma_debug_buf(vsi->dev, IRDMA_DEBUG_ILQ, "TRANSMIT ILQ BUFFER", sqbuf->mem.va, sqbuf->totallen); return sqbuf; } /** * irdma_send_reset - Send RST packet * @cm_node: connection's node */ int irdma_send_reset(struct irdma_cm_node *cm_node) { struct irdma_puda_buf *sqbuf; int flags = SET_RST | SET_ACK; sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL, flags); if (!sqbuf) return -ENOMEM; irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "caller: %pS cm_node %p cm_id=%p accel=%d state=%d rem_port=0x%04x, loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4\n", + "caller: %pS cm_node %p cm_id=%p accel=%d state=%d rem_port=0x%04x, loc_port=0x%04x rem_addr=%x loc_addr=%x\n", __builtin_return_address(0), cm_node, cm_node->cm_id, cm_node->accelerated, cm_node->state, cm_node->rem_port, - cm_node->loc_port, cm_node->rem_addr, cm_node->loc_addr); + cm_node->loc_port, cm_node->rem_addr[0], + cm_node->loc_addr[0]); return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 0, 1); } /** * irdma_active_open_err - send event for active side cm error * @cm_node: connection's node * @reset: Flag to send reset or not */ static void irdma_active_open_err(struct irdma_cm_node *cm_node, bool reset) { irdma_cleanup_retrans_entry(cm_node); cm_node->cm_core->stats_connect_errs++; if (reset) { - irdma_debug(&cm_node->iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "cm_node=%p state=%d\n", cm_node, - cm_node->state); + irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "cm_node=%p state=%d\n", cm_node, cm_node->state); atomic_inc(&cm_node->refcnt); irdma_send_reset(cm_node); } cm_node->state = IRDMA_CM_STATE_CLOSED; irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED); } /** * irdma_passive_open_err - handle passive side cm error * @cm_node: connection's node * @reset: send reset or just free cm_node */ static void irdma_passive_open_err(struct irdma_cm_node *cm_node, bool reset) { irdma_cleanup_retrans_entry(cm_node); cm_node->cm_core->stats_passive_errs++; cm_node->state = IRDMA_CM_STATE_CLOSED; irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "cm_node=%p state =%d\n", - cm_node, cm_node->state); + "cm_node=%p state=%d\n", cm_node, cm_node->state); if (reset) irdma_send_reset(cm_node); else irdma_rem_ref_cm_node(cm_node); } /** * irdma_event_connect_error - to create connect error event * @event: cm information for connect event */ static void irdma_event_connect_error(struct irdma_cm_event *event) { struct irdma_qp *iwqp; struct iw_cm_id *cm_id; cm_id = event->cm_node->cm_id; if (!cm_id) return; iwqp = cm_id->provider_data; if (!iwqp || !iwqp->iwdev) return; iwqp->cm_id = NULL; cm_id->provider_data = NULL; irdma_send_cm_event(event->cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET); irdma_rem_ref_cm_node(event->cm_node); } /** * irdma_process_options - process options from TCP header * @cm_node: connection's node * @optionsloc: point to start of options * @optionsize: size of all options * @syn_pkt: flag if syn packet */ static int irdma_process_options(struct irdma_cm_node *cm_node, u8 *optionsloc, u32 optionsize, u32 syn_pkt) { u32 tmp; u32 offset = 0; union all_known_options *all_options; char got_mss_option = 0; while (offset < optionsize) { all_options = (union all_known_options *)(optionsloc + offset); switch (all_options->base.optionnum) { case OPTION_NUM_EOL: offset = optionsize; break; case OPTION_NUM_NONE: offset += 1; continue; case OPTION_NUM_MSS: irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "MSS Length: %d Offset: %d Size: %d\n", all_options->mss.len, offset, optionsize); got_mss_option = 1; if (all_options->mss.len != 4) return -EINVAL; tmp = ntohs(all_options->mss.mss); if ((cm_node->ipv4 && (tmp + IRDMA_MTU_TO_MSS_IPV4) < IRDMA_MIN_MTU_IPV4) || (!cm_node->ipv4 && (tmp + IRDMA_MTU_TO_MSS_IPV6) < IRDMA_MIN_MTU_IPV6)) return -EINVAL; if (tmp < cm_node->tcp_cntxt.mss) cm_node->tcp_cntxt.mss = tmp; break; case OPTION_NUM_WINDOW_SCALE: cm_node->tcp_cntxt.snd_wscale = all_options->windowscale.shiftcount; break; default: irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "Unsupported TCP Option: %x\n", all_options->base.optionnum); break; } offset += all_options->base.len; } if (!got_mss_option && syn_pkt) cm_node->tcp_cntxt.mss = IRDMA_CM_DEFAULT_MSS; return 0; } /** * irdma_handle_tcp_options - setup TCP context info after parsing TCP options * @cm_node: connection's node * @tcph: pointer tcp header * @optionsize: size of options rcvd * @passive: active or passive flag */ static int irdma_handle_tcp_options(struct irdma_cm_node *cm_node, struct tcphdr *tcph, int optionsize, int passive) { u8 *optionsloc = (u8 *)&tcph[1]; int ret; if (optionsize) { ret = irdma_process_options(cm_node, optionsloc, optionsize, (u32)tcph->th_flags & TH_SYN); if (ret) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "Node %p, Sending Reset\n", - cm_node); + "Node %p, Sending Reset\n", cm_node); if (passive) irdma_passive_open_err(cm_node, true); else irdma_active_open_err(cm_node, true); return ret; } } cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->th_win) << cm_node->tcp_cntxt.snd_wscale; if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd) cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd; return 0; } /** * irdma_build_mpa_v1 - build a MPA V1 frame * @cm_node: connection's node * @start_addr: address where to build frame * @mpa_key: to do read0 or write0 */ static void irdma_build_mpa_v1(struct irdma_cm_node *cm_node, void *start_addr, u8 mpa_key) { struct ietf_mpa_v1 *mpa_frame = start_addr; switch (mpa_key) { case MPA_KEY_REQUEST: memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE); break; case MPA_KEY_REPLY: memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE); break; default: break; } mpa_frame->flags = IETF_MPA_FLAGS_CRC; mpa_frame->rev = cm_node->mpa_frame_rev; mpa_frame->priv_data_len = htons(cm_node->pdata.size); } /** * irdma_build_mpa_v2 - build a MPA V2 frame * @cm_node: connection's node * @start_addr: buffer start address * @mpa_key: to do read0 or write0 */ static void irdma_build_mpa_v2(struct irdma_cm_node *cm_node, void *start_addr, u8 mpa_key) { struct ietf_mpa_v2 *mpa_frame = start_addr; struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg; u16 ctrl_ird, ctrl_ord; /* initialize the upper 5 bytes of the frame */ irdma_build_mpa_v1(cm_node, start_addr, mpa_key); mpa_frame->flags |= IETF_MPA_V2_FLAG; if (cm_node->iwdev->iw_ooo) { mpa_frame->flags |= IETF_MPA_FLAGS_MARKERS; cm_node->rcv_mark_en = true; } mpa_frame->priv_data_len = cpu_to_be16(be16_to_cpu(mpa_frame->priv_data_len) + IETF_RTR_MSG_SIZE); /* initialize RTR msg */ if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) { ctrl_ird = IETF_NO_IRD_ORD; ctrl_ord = IETF_NO_IRD_ORD; } else { ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ? IETF_NO_IRD_ORD : cm_node->ird_size; ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ? IETF_NO_IRD_ORD : cm_node->ord_size; } ctrl_ird |= IETF_PEER_TO_PEER; switch (mpa_key) { case MPA_KEY_REQUEST: ctrl_ord |= IETF_RDMA0_WRITE; ctrl_ord |= IETF_RDMA0_READ; break; case MPA_KEY_REPLY: switch (cm_node->send_rdma0_op) { case SEND_RDMA_WRITE_ZERO: ctrl_ord |= IETF_RDMA0_WRITE; break; case SEND_RDMA_READ_ZERO: ctrl_ord |= IETF_RDMA0_READ; break; } break; default: break; } rtr_msg->ctrl_ird = htons(ctrl_ird); rtr_msg->ctrl_ord = htons(ctrl_ord); } /** * irdma_cm_build_mpa_frame - build mpa frame for mpa version 1 or version 2 * @cm_node: connection's node * @mpa: mpa: data buffer * @mpa_key: to do read0 or write0 */ static int irdma_cm_build_mpa_frame(struct irdma_cm_node *cm_node, struct irdma_kmem_info *mpa, u8 mpa_key) { int hdr_len = 0; switch (cm_node->mpa_frame_rev) { case IETF_MPA_V1: hdr_len = sizeof(struct ietf_mpa_v1); irdma_build_mpa_v1(cm_node, mpa->addr, mpa_key); break; case IETF_MPA_V2: hdr_len = sizeof(struct ietf_mpa_v2); irdma_build_mpa_v2(cm_node, mpa->addr, mpa_key); break; default: break; } return hdr_len; } /** * irdma_send_mpa_request - active node send mpa request to passive node * @cm_node: connection's node */ static int irdma_send_mpa_request(struct irdma_cm_node *cm_node) { struct irdma_puda_buf *sqbuf; cm_node->mpa_hdr.addr = &cm_node->mpa_v2_frame; cm_node->mpa_hdr.size = irdma_cm_build_mpa_frame(cm_node, &cm_node->mpa_hdr, MPA_KEY_REQUEST); if (!cm_node->mpa_hdr.size) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "mpa size = %d\n", - cm_node->mpa_hdr.size); + "mpa size = %d\n", cm_node->mpa_hdr.size); return -EINVAL; } sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, &cm_node->mpa_hdr, &cm_node->pdata, SET_ACK); if (!sqbuf) return -ENOMEM; return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1, 0); } /** * irdma_send_mpa_reject - * @cm_node: connection's node * @pdata: reject data for connection * @plen: length of reject data */ static int irdma_send_mpa_reject(struct irdma_cm_node *cm_node, const void *pdata, u8 plen) { struct irdma_puda_buf *sqbuf; struct irdma_mpa_priv_info priv_info; cm_node->mpa_hdr.addr = &cm_node->mpa_v2_frame; cm_node->mpa_hdr.size = irdma_cm_build_mpa_frame(cm_node, &cm_node->mpa_hdr, MPA_KEY_REPLY); cm_node->mpa_v2_frame.flags |= IETF_MPA_FLAGS_REJECT; priv_info.addr = pdata; priv_info.size = plen; sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, &cm_node->mpa_hdr, &priv_info, SET_ACK | SET_FIN); if (!sqbuf) return -ENOMEM; cm_node->state = IRDMA_CM_STATE_FIN_WAIT1; return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1, 0); } /** * irdma_negotiate_mpa_v2_ird_ord - negotiate MPAv2 IRD/ORD * @cm_node: connection's node * @buf: Data pointer */ static int irdma_negotiate_mpa_v2_ird_ord(struct irdma_cm_node *cm_node, u8 *buf) { struct ietf_mpa_v2 *mpa_v2_frame; struct ietf_rtr_msg *rtr_msg; u16 ird_size; u16 ord_size; u16 ctrl_ord; u16 ctrl_ird; mpa_v2_frame = (struct ietf_mpa_v2 *)buf; rtr_msg = &mpa_v2_frame->rtr_msg; /* parse rtr message */ ctrl_ord = ntohs(rtr_msg->ctrl_ord); ctrl_ird = ntohs(rtr_msg->ctrl_ird); ird_size = ctrl_ird & IETF_NO_IRD_ORD; ord_size = ctrl_ord & IETF_NO_IRD_ORD; if (!(ctrl_ird & IETF_PEER_TO_PEER)) return -EOPNOTSUPP; if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD) { cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD; goto negotiate_done; } if (cm_node->state != IRDMA_CM_STATE_MPAREQ_SENT) { /* responder */ if (!ord_size && (ctrl_ord & IETF_RDMA0_READ)) cm_node->ird_size = 1; if (cm_node->ord_size > ird_size) cm_node->ord_size = ird_size; } else { /* initiator */ if (!ird_size && (ctrl_ord & IETF_RDMA0_READ)) /* Remote peer doesn't support RDMA0_READ */ return -EOPNOTSUPP; if (cm_node->ord_size > ird_size) cm_node->ord_size = ird_size; if (cm_node->ird_size < ord_size) /* no resources available */ return -EINVAL; } negotiate_done: if (ctrl_ord & IETF_RDMA0_READ) cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; else if (ctrl_ord & IETF_RDMA0_WRITE) cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO; else /* Not supported RDMA0 operation */ return -EOPNOTSUPP; - irdma_debug(&cm_node->iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "MPAV2 Negotiated ORD: %d, IRD: %d\n", - cm_node->ord_size, cm_node->ird_size); + irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "MPAV2 Negotiated ORD: %d, IRD: %d\n", cm_node->ord_size, + cm_node->ird_size); return 0; } /** * irdma_parse_mpa - process an IETF MPA frame * @cm_node: connection's node * @buf: Data pointer * @type: to return accept or reject * @len: Len of mpa buffer */ static int irdma_parse_mpa(struct irdma_cm_node *cm_node, u8 *buf, u32 *type, u32 len) { struct ietf_mpa_v1 *mpa_frame; int mpa_hdr_len, priv_data_len, ret; *type = IRDMA_MPA_REQUEST_ACCEPT; if (len < sizeof(struct ietf_mpa_v1)) { - irdma_debug(&cm_node->iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "ietf buffer small (%x)\n", len); + irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "ietf buffer small (%x)\n", len); return -EINVAL; } mpa_frame = (struct ietf_mpa_v1 *)buf; mpa_hdr_len = sizeof(struct ietf_mpa_v1); priv_data_len = ntohs(mpa_frame->priv_data_len); if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "private_data too big %d\n", - priv_data_len); + "private_data too big %d\n", priv_data_len); return -EOVERFLOW; } if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "unsupported mpa rev = %d\n", - mpa_frame->rev); + "unsupported mpa rev = %d\n", mpa_frame->rev); return -EINVAL; } if (mpa_frame->rev > cm_node->mpa_frame_rev) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "rev %d\n", - mpa_frame->rev); + "rev %d\n", mpa_frame->rev); return -EINVAL; } cm_node->mpa_frame_rev = mpa_frame->rev; if (cm_node->state != IRDMA_CM_STATE_MPAREQ_SENT) { if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) { - irdma_debug(&cm_node->iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "Unexpected MPA Key received\n"); + irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "Unexpected MPA Key received\n"); return -EINVAL; } } else { if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE)) { - irdma_debug(&cm_node->iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "Unexpected MPA Key received\n"); + irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "Unexpected MPA Key received\n"); return -EINVAL; } } if (priv_data_len + mpa_hdr_len > len) { - irdma_debug(&cm_node->iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "ietf buffer len(%x + %x != %x)\n", - priv_data_len, mpa_hdr_len, len); + irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "ietf buffer len(%x + %x != %x)\n", priv_data_len, + mpa_hdr_len, len); return -EOVERFLOW; } if (len > IRDMA_MAX_CM_BUF) { - irdma_debug(&cm_node->iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "ietf buffer large len = %d\n", len); + irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "ietf buffer large len = %d\n", len); return -EOVERFLOW; } switch (mpa_frame->rev) { case IETF_MPA_V2: mpa_hdr_len += IETF_RTR_MSG_SIZE; ret = irdma_negotiate_mpa_v2_ird_ord(cm_node, buf); if (ret) return ret; break; case IETF_MPA_V1: default: break; } memcpy(cm_node->pdata_buf, buf + mpa_hdr_len, priv_data_len); cm_node->pdata.size = priv_data_len; if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT) *type = IRDMA_MPA_REQUEST_REJECT; if (mpa_frame->flags & IETF_MPA_FLAGS_MARKERS) cm_node->snd_mark_en = true; return 0; } /** * irdma_schedule_cm_timer * @cm_node: connection's node * @sqbuf: buffer to send * @type: if it is send or close * @send_retrans: if rexmits to be done * @close_when_complete: is cm_node to be removed * * note - cm_node needs to be protected before calling this. Encase in: * irdma_rem_ref_cm_node(cm_core, cm_node); * irdma_schedule_cm_timer(...) * atomic_inc(&cm_node->refcnt); */ int irdma_schedule_cm_timer(struct irdma_cm_node *cm_node, struct irdma_puda_buf *sqbuf, enum irdma_timer_type type, int send_retrans, int close_when_complete) { struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi; struct irdma_cm_core *cm_core = cm_node->cm_core; struct irdma_timer_entry *new_send; u32 was_timer_set; unsigned long flags; new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); if (!new_send) { if (type != IRDMA_TIMER_TYPE_CLOSE) irdma_free_sqbuf(vsi, sqbuf); return -ENOMEM; } new_send->retrycount = IRDMA_DEFAULT_RETRYS; new_send->retranscount = IRDMA_DEFAULT_RETRANS; new_send->sqbuf = sqbuf; new_send->timetosend = jiffies; new_send->type = type; new_send->send_retrans = send_retrans; new_send->close_when_complete = close_when_complete; if (type == IRDMA_TIMER_TYPE_CLOSE) { new_send->timetosend += (HZ / 10); if (cm_node->close_entry) { kfree(new_send); - irdma_debug(&cm_node->iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "already close entry\n"); + irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "already close entry\n"); return -EINVAL; } cm_node->close_entry = new_send; } else { /* type == IRDMA_TIMER_TYPE_SEND */ spin_lock_irqsave(&cm_node->retrans_list_lock, flags); cm_node->send_entry = new_send; atomic_inc(&cm_node->refcnt); spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); new_send->timetosend = jiffies + IRDMA_RETRY_TIMEOUT; atomic_inc(&sqbuf->refcount); irdma_puda_send_buf(vsi->ilq, sqbuf); if (!send_retrans) { irdma_cleanup_retrans_entry(cm_node); if (close_when_complete) irdma_rem_ref_cm_node(cm_node); return 0; } } spin_lock_irqsave(&cm_core->ht_lock, flags); was_timer_set = timer_pending(&cm_core->tcp_timer); if (!was_timer_set) { cm_core->tcp_timer.expires = new_send->timetosend; add_timer(&cm_core->tcp_timer); } spin_unlock_irqrestore(&cm_core->ht_lock, flags); return 0; } /** * irdma_retrans_expired - Could not rexmit the packet * @cm_node: connection's node */ static void irdma_retrans_expired(struct irdma_cm_node *cm_node) { enum irdma_cm_node_state state = cm_node->state; cm_node->state = IRDMA_CM_STATE_CLOSED; switch (state) { case IRDMA_CM_STATE_SYN_RCVD: case IRDMA_CM_STATE_CLOSING: irdma_rem_ref_cm_node(cm_node); break; case IRDMA_CM_STATE_FIN_WAIT1: case IRDMA_CM_STATE_LAST_ACK: irdma_send_reset(cm_node); break; default: atomic_inc(&cm_node->refcnt); irdma_send_reset(cm_node); irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED); break; } } /** * irdma_handle_close_entry - for handling retry/timeouts * @cm_node: connection's node * @rem_node: flag for remove cm_node */ static void irdma_handle_close_entry(struct irdma_cm_node *cm_node, u32 rem_node) { struct irdma_timer_entry *close_entry = cm_node->close_entry; struct irdma_qp *iwqp; unsigned long flags; if (!close_entry) return; iwqp = (struct irdma_qp *)close_entry->sqbuf; if (iwqp) { spin_lock_irqsave(&iwqp->lock, flags); if (iwqp->cm_id) { iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED; iwqp->hw_iwarp_state = IRDMA_QP_STATE_ERROR; iwqp->last_aeq = IRDMA_AE_RESET_SENT; iwqp->ibqp_state = IB_QPS_ERR; spin_unlock_irqrestore(&iwqp->lock, flags); irdma_cm_disconn(iwqp); } else { spin_unlock_irqrestore(&iwqp->lock, flags); } } else if (rem_node) { /* TIME_WAIT state */ irdma_rem_ref_cm_node(cm_node); } kfree(close_entry); cm_node->close_entry = NULL; } /** * irdma_cm_timer_tick - system's timer expired callback * @t: Pointer to timer_list */ static void irdma_cm_timer_tick(struct timer_list *t) { unsigned long nexttimeout = jiffies + IRDMA_LONG_TIME; struct irdma_cm_node *cm_node; struct irdma_timer_entry *send_entry, *close_entry; struct list_head *list_core_temp; struct list_head *list_node; struct irdma_cm_core *cm_core = from_timer(cm_core, t, tcp_timer); struct irdma_sc_vsi *vsi; u32 settimer = 0; unsigned long timetosend; unsigned long flags; struct list_head timer_list; INIT_LIST_HEAD(&timer_list); rcu_read_lock(); irdma_timer_list_prep(cm_core, &timer_list); rcu_read_unlock(); list_for_each_safe(list_node, list_core_temp, &timer_list) { cm_node = container_of(list_node, struct irdma_cm_node, timer_entry); close_entry = cm_node->close_entry; if (close_entry) { if (time_after(close_entry->timetosend, jiffies)) { if (nexttimeout > close_entry->timetosend || !settimer) { nexttimeout = close_entry->timetosend; settimer = 1; } } else { irdma_handle_close_entry(cm_node, 1); } } spin_lock_irqsave(&cm_node->retrans_list_lock, flags); send_entry = cm_node->send_entry; if (!send_entry) goto done; if (time_after(send_entry->timetosend, jiffies)) { if (cm_node->state != IRDMA_CM_STATE_OFFLOADED) { if (nexttimeout > send_entry->timetosend || !settimer) { nexttimeout = send_entry->timetosend; settimer = 1; } } else { irdma_free_retrans_entry(cm_node); } goto done; } if (cm_node->state == IRDMA_CM_STATE_OFFLOADED || cm_node->state == IRDMA_CM_STATE_CLOSED) { irdma_free_retrans_entry(cm_node); goto done; } if (!send_entry->retranscount || !send_entry->retrycount) { irdma_free_retrans_entry(cm_node); spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); irdma_retrans_expired(cm_node); cm_node->state = IRDMA_CM_STATE_CLOSED; spin_lock_irqsave(&cm_node->retrans_list_lock, flags); goto done; } spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); vsi = &cm_node->iwdev->vsi; if (!cm_node->ack_rcvd) { atomic_inc(&send_entry->sqbuf->refcount); irdma_puda_send_buf(vsi->ilq, send_entry->sqbuf); cm_node->cm_core->stats_pkt_retrans++; } spin_lock_irqsave(&cm_node->retrans_list_lock, flags); if (send_entry->send_retrans) { send_entry->retranscount--; timetosend = (IRDMA_RETRY_TIMEOUT << (IRDMA_DEFAULT_RETRANS - send_entry->retranscount)); send_entry->timetosend = jiffies + min(timetosend, IRDMA_MAX_TIMEOUT); if (nexttimeout > send_entry->timetosend || !settimer) { nexttimeout = send_entry->timetosend; settimer = 1; } } else { int close_when_complete; close_when_complete = send_entry->close_when_complete; irdma_free_retrans_entry(cm_node); if (close_when_complete) irdma_rem_ref_cm_node(cm_node); } done: spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); irdma_rem_ref_cm_node(cm_node); } if (settimer) { spin_lock_irqsave(&cm_core->ht_lock, flags); if (!timer_pending(&cm_core->tcp_timer)) { cm_core->tcp_timer.expires = nexttimeout; add_timer(&cm_core->tcp_timer); } spin_unlock_irqrestore(&cm_core->ht_lock, flags); } } /** * irdma_send_syn - send SYN packet * @cm_node: connection's node * @sendack: flag to set ACK bit or not */ int irdma_send_syn(struct irdma_cm_node *cm_node, u32 sendack) { struct irdma_puda_buf *sqbuf; int flags = SET_SYN; char optionsbuf[sizeof(struct option_mss) + sizeof(struct option_windowscale) + sizeof(struct option_base) + TCP_OPTIONS_PADDING]; struct irdma_kmem_info opts; int optionssize = 0; /* Sending MSS option */ union all_known_options *options; opts.addr = optionsbuf; if (!cm_node) return -EINVAL; options = (union all_known_options *)&optionsbuf[optionssize]; options->mss.optionnum = OPTION_NUM_MSS; options->mss.len = sizeof(struct option_mss); options->mss.mss = htons(cm_node->tcp_cntxt.mss); optionssize += sizeof(struct option_mss); options = (union all_known_options *)&optionsbuf[optionssize]; options->windowscale.optionnum = OPTION_NUM_WINDOW_SCALE; options->windowscale.len = sizeof(struct option_windowscale); options->windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale; optionssize += sizeof(struct option_windowscale); options = (union all_known_options *)&optionsbuf[optionssize]; options->eol = OPTION_NUM_EOL; optionssize += 1; if (sendack) flags |= SET_ACK; opts.size = optionssize; sqbuf = cm_node->cm_core->form_cm_frame(cm_node, &opts, NULL, NULL, flags); if (!sqbuf) return -ENOMEM; return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1, 0); } /** * irdma_send_ack - Send ACK packet * @cm_node: connection's node */ void irdma_send_ack(struct irdma_cm_node *cm_node) { struct irdma_puda_buf *sqbuf; struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi; sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK); if (sqbuf) irdma_puda_send_buf(vsi->ilq, sqbuf); } /** * irdma_send_fin - Send FIN pkt * @cm_node: connection's node */ static int irdma_send_fin(struct irdma_cm_node *cm_node) { struct irdma_puda_buf *sqbuf; sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK | SET_FIN); if (!sqbuf) return -ENOMEM; return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1, 0); } /** * irdma_find_listener - find a cm node listening on this addr-port pair * @cm_core: cm's core * @dst_addr: listener ip addr * @ipv4: flag indicating IPv4 when true * @dst_port: listener tcp port num * @vlan_id: virtual LAN ID * @listener_state: state to match with listen node's */ static struct irdma_cm_listener * irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, bool ipv4, u16 dst_port, u16 vlan_id, enum irdma_cm_listener_state listener_state) { struct irdma_cm_listener *listen_node; static const u32 ip_zero[4] = {0, 0, 0, 0}; u32 listen_addr[4]; u16 listen_port; unsigned long flags; /* walk list and find cm_node associated with this session ID */ spin_lock_irqsave(&cm_core->listen_list_lock, flags); list_for_each_entry(listen_node, &cm_core->listen_list, list) { memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr)); listen_port = listen_node->loc_port; if (listen_node->ipv4 != ipv4 || listen_port != dst_port || !(listener_state & listen_node->listener_state)) continue; /* compare node pair, return node handle if a match */ if (!memcmp(listen_addr, ip_zero, sizeof(listen_addr)) || (!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) && vlan_id == listen_node->vlan_id)) { atomic_inc(&listen_node->refcnt); spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); return listen_node; } } spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); return NULL; } /** * irdma_del_multiple_qhash - Remove qhash and child listens * @iwdev: iWarp device * @cm_info: CM info for parent listen node * @cm_parent_listen_node: The parent listen node */ static int irdma_del_multiple_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cm_info, struct irdma_cm_listener *cm_parent_listen_node) { struct irdma_cm_listener *child_listen_node; struct list_head *pos, *tpos; unsigned long flags; int ret = -EINVAL; spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags); list_for_each_safe(pos, tpos, &cm_parent_listen_node->child_listen_list) { child_listen_node = list_entry(pos, struct irdma_cm_listener, child_listen_list); if (child_listen_node->ipv4) irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "removing child listen for IP=%pI4, port=%d, vlan=%d\n", - child_listen_node->loc_addr, + "removing child listen for IP=%x, port=%d, vlan=%d\n", + child_listen_node->loc_addr[0], child_listen_node->loc_port, child_listen_node->vlan_id); else irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "removing child listen for IP=%pI6, port=%d, vlan=%d\n", - child_listen_node->loc_addr, + "removing child listen for IP=%x:%x:%x:%x, port=%d, vlan=%d\n", + IRDMA_PRINT_IP6(child_listen_node->loc_addr), child_listen_node->loc_port, child_listen_node->vlan_id); list_del(pos); memcpy(cm_info->loc_addr, child_listen_node->loc_addr, sizeof(cm_info->loc_addr)); cm_info->vlan_id = child_listen_node->vlan_id; if (child_listen_node->qhash_set) { ret = irdma_manage_qhash(iwdev, cm_info, IRDMA_QHASH_TYPE_TCP_SYN, IRDMA_QHASH_MANAGE_TYPE_DELETE, NULL, false); child_listen_node->qhash_set = false; } else { ret = 0; } - irdma_debug(&iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "Child listen node freed = %p\n", + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "Child listen node freed = %p\n", child_listen_node); kfree(child_listen_node); cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++; } spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags); return ret; } -static u8 irdma_get_egress_vlan_prio(u32 *loc_addr, u8 prio, bool ipv4){ +static u8 irdma_iw_get_vlan_prio(u32 *loc_addr, u8 prio, bool ipv4) +{ return prio; } /** * irdma_netdev_vlan_ipv6 - Gets the netdev and mac * @addr: local IPv6 address * @vlan_id: vlan id for the given IPv6 address * @mac: mac address for the given IPv6 address * * Returns the net_device of the IPv6 address and also sets the * vlan id and mac for that address. */ if_t irdma_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac) { if_t ip_dev = NULL; struct in6_addr laddr6; struct ifaddr *ifa; u16 scope_id = 0; irdma_copy_ip_htonl(laddr6.__u6_addr.__u6_addr32, addr); if (vlan_id) *vlan_id = 0xFFFF; /* Match rdma_vlan_dev_vlan_id() */ if (mac) eth_zero_addr(mac); if (IN6_IS_SCOPE_LINKLOCAL(&laddr6) || IN6_IS_ADDR_MC_INTFACELOCAL(&laddr6)) scope_id = ntohs(laddr6.__u6_addr.__u6_addr16[1]); ip_dev = ip6_ifp_find(&init_net, laddr6, scope_id); if (ip_dev) { if (vlan_id) *vlan_id = rdma_vlan_dev_vlan_id(ip_dev); ifa = if_getifaddr(ip_dev); if (ifa && ifa->ifa_addr && mac) ether_addr_copy(mac, if_getlladdr(ip_dev)); } return ip_dev; } /** * irdma_get_vlan_ipv4 - Returns the vlan_id for IPv4 address * @addr: local IPv4 address */ u16 irdma_get_vlan_ipv4(u32 *addr) { if_t netdev; u16 vlan_id = 0xFFFF; netdev = ip_ifp_find(&init_net, htonl(addr[0])); if (netdev) { vlan_id = rdma_vlan_dev_vlan_id(netdev); dev_put(netdev); } return vlan_id; } -struct irdma_add_mqh_cbs { - struct irdma_device *iwdev; - struct irdma_cm_info *cm_info; - struct irdma_cm_listener *cm_listen_node; -}; +static int +irdma_manage_qhash_wait(struct irdma_pci_f *rf, struct irdma_cm_info *cm_info) +{ + struct irdma_cqp_request *cqp_request = cm_info->cqp_request; + int cnt = rf->sc_dev.hw_attrs.max_cqp_compl_wait_time_ms * CQP_TIMEOUT_THRESHOLD; + u32 ret_val; + + if (!cqp_request) + return -ENOMEM; + do { + irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq); + mdelay(1); + } while (!READ_ONCE(cqp_request->request_done) && --cnt); + + ret_val = cqp_request->compl_info.op_ret_val; + irdma_put_cqp_request(&rf->cqp, cqp_request); + if (cnt) { + if (!ret_val) + return 0; + return -EINVAL; + } + + return -ETIMEDOUT; +} /** - * irdma_add_mqh_ifa_cb - Adds multiple qhashes for IPV4/IPv6 + * irdma_add_mqh_ifa_cb - Adds multiple qhashes for IPv4/IPv6 * @arg: Calback argument structure from irdma_add_mqh * @ifa: Current address to compute against * @count: Current cumulative output of all callbacks in this iteration * * Adds a qhash and a child listen node for a single IPv4/IPv6 address * on the adapter and adds the associated qhash filter */ static u_int irdma_add_mqh_ifa_cb(void *arg, struct ifaddr *ifa, u_int count) { struct irdma_add_mqh_cbs *cbs = arg; struct irdma_cm_listener *child_listen_node; struct irdma_cm_info *cm_info = cbs->cm_info; struct irdma_device *iwdev = cbs->iwdev; struct irdma_cm_listener *cm_parent_listen_node = cbs->cm_listen_node; if_t ip_dev = ifa->ifa_ifp; unsigned long flags; int ret; if (count) return 0; - if (cm_info->ipv4) - irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "Allocating child CM Listener forIP=%pI4, vlan_id=%d, MAC=%pM\n", - &ifa->ifa_addr, - rdma_vlan_dev_vlan_id(ip_dev), if_getlladdr(ip_dev)); - else - irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "IP=%pI6, vlan_id=%d, MAC=%pM\n", - &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr, - rdma_vlan_dev_vlan_id(ip_dev), - if_getlladdr(ip_dev)); - child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_KERNEL); - irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "Allocating child listener %p\n", - child_listen_node); + child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_ATOMIC); if (!child_listen_node) { - irdma_debug(&iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "listener memory allocation\n"); return -ENOMEM; } memcpy(child_listen_node, cm_parent_listen_node, sizeof(*child_listen_node)); cm_info->vlan_id = rdma_vlan_dev_vlan_id(ip_dev); child_listen_node->vlan_id = cm_info->vlan_id; - if (cm_info->ipv4) + if (cm_info->ipv4) { + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "Allocating child CM Listener forIP=%x, vlan_id=%d, MAC=%x:%x:%x:%x:%x:%x\n", + ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr, + rdma_vlan_dev_vlan_id(ip_dev), + if_getlladdr(ip_dev)[0], if_getlladdr(ip_dev)[1], + if_getlladdr(ip_dev)[2], if_getlladdr(ip_dev)[3], + if_getlladdr(ip_dev)[4], if_getlladdr(ip_dev)[5]); child_listen_node->loc_addr[0] = ntohl(((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr); - else + } else { + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "IP=%x:%x:%x:%x, vlan_id=%d, MAC=%x:%x:%x:%x:%x:%x\n", + IRDMA_PRINT_IP6(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr), + rdma_vlan_dev_vlan_id(ip_dev), + if_getlladdr(ip_dev)[0], if_getlladdr(ip_dev)[1], + if_getlladdr(ip_dev)[2], if_getlladdr(ip_dev)[3], + if_getlladdr(ip_dev)[4], if_getlladdr(ip_dev)[5]); irdma_copy_ip_ntohl(child_listen_node->loc_addr, ((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr.__u6_addr.__u6_addr32); + } memcpy(cm_info->loc_addr, child_listen_node->loc_addr, sizeof(cm_info->loc_addr)); if (!iwdev->vsi.dscp_mode) cm_info->user_pri = - irdma_get_egress_vlan_prio(child_listen_node->loc_addr, - cm_info->user_pri, - false); + irdma_iw_get_vlan_prio(child_listen_node->loc_addr, + cm_info->user_pri, + cm_info->ipv4); ret = irdma_manage_qhash(iwdev, cm_info, IRDMA_QHASH_TYPE_TCP_SYN, IRDMA_QHASH_MANAGE_TYPE_ADD, - NULL, true); + NULL, false); + if (ret) { + kfree(child_listen_node); + return ret; + } + /* wait for qhash finish */ + ret = irdma_manage_qhash_wait(iwdev->rf, cm_info); if (ret) { kfree(child_listen_node); return ret; } child_listen_node->qhash_set = true; spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags); list_add(&child_listen_node->child_listen_list, &cm_parent_listen_node->child_listen_list); spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags); cm_parent_listen_node->cm_core->stats_listen_nodes_created++; return 0; } /** * irdma_add_mqh - Adds multiple qhashes * @iwdev: iWarp device * @cm_info: CM info for parent listen node * @cm_listen_node: The parent listen node */ static int irdma_add_mqh(struct irdma_device *iwdev, struct irdma_cm_info *cm_info, struct irdma_cm_listener *cm_listen_node) { struct epoch_tracker et; struct irdma_add_mqh_cbs cbs; struct if_iter iter; if_t ifp; - int err; + int err = -ENOENT; cbs.iwdev = iwdev; cbs.cm_info = cm_info; cbs.cm_listen_node = cm_listen_node; VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK(); NET_EPOCH_ENTER(et); VNET_FOREACH(vnet_iter) { CURVNET_SET_QUIET(vnet_iter); for (ifp = if_iter_start(&iter); ifp != NULL; ifp = if_iter_next(&iter)) { if (!(if_getflags(ifp) & IFF_UP)) continue; if (((rdma_vlan_dev_vlan_id(ifp) >= VLAN_N_VID) || (rdma_vlan_dev_real_dev(ifp) != iwdev->netdev)) && ifp != iwdev->netdev) continue; if_addr_rlock(ifp); if (cm_info->ipv4) err = if_foreach_addr_type(ifp, AF_INET, irdma_add_mqh_ifa_cb, &cbs); else err = if_foreach_addr_type(ifp, AF_INET6, irdma_add_mqh_ifa_cb, &cbs); if_addr_runlock(ifp); } if_iter_finish(&iter); CURVNET_RESTORE(); } NET_EPOCH_EXIT(et); VNET_LIST_RUNLOCK(); return err; } /** * irdma_reset_list_prep - add connection nodes slated for reset to list * @cm_core: cm's core * @listener: pointer to listener node * @reset_list: a list to which cm_node will be selected */ static void irdma_reset_list_prep(struct irdma_cm_core *cm_core, struct irdma_cm_listener *listener, struct list_head *reset_list) { struct irdma_cm_node *cm_node; int bkt; HASH_FOR_EACH_RCU(cm_core->cm_hash_tbl, bkt, cm_node, list) { if (cm_node->listener == listener && !cm_node->accelerated && atomic_inc_not_zero(&cm_node->refcnt)) list_add(&cm_node->reset_entry, reset_list); } } /** * irdma_dec_refcnt_listen - delete listener and associated cm nodes * @cm_core: cm's core * @listener: pointer to listener node * @free_hanging_nodes: to free associated cm_nodes * @apbvt_del: flag to delete the apbvt */ static int irdma_dec_refcnt_listen(struct irdma_cm_core *cm_core, struct irdma_cm_listener *listener, int free_hanging_nodes, bool apbvt_del) { struct list_head *list_pos; struct list_head *list_temp; struct irdma_cm_node *cm_node; struct list_head reset_list; struct irdma_cm_info nfo; enum irdma_cm_node_state old_state; unsigned long flags; int err; /* free non-accelerated child nodes for this listener */ INIT_LIST_HEAD(&reset_list); if (free_hanging_nodes) { rcu_read_lock(); irdma_reset_list_prep(cm_core, listener, &reset_list); rcu_read_unlock(); } list_for_each_safe(list_pos, list_temp, &reset_list) { cm_node = container_of(list_pos, struct irdma_cm_node, reset_entry); if (cm_node->state >= IRDMA_CM_STATE_FIN_WAIT1) { irdma_rem_ref_cm_node(cm_node); continue; } irdma_cleanup_retrans_entry(cm_node); err = irdma_send_reset(cm_node); if (err) { cm_node->state = IRDMA_CM_STATE_CLOSED; - irdma_debug(&cm_node->iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "send reset failed\n"); + irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "send reset failed\n"); } else { old_state = cm_node->state; cm_node->state = IRDMA_CM_STATE_LISTENER_DESTROYED; if (old_state != IRDMA_CM_STATE_MPAREQ_RCVD) irdma_rem_ref_cm_node(cm_node); } } if (atomic_dec_and_test(&listener->refcnt)) { spin_lock_irqsave(&cm_core->listen_list_lock, flags); list_del(&listener->list); spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); if (apbvt_del) irdma_del_apbvt(listener->iwdev, listener->apbvt_entry); memcpy(nfo.loc_addr, listener->loc_addr, sizeof(nfo.loc_addr)); nfo.loc_port = listener->loc_port; nfo.ipv4 = listener->ipv4; nfo.vlan_id = listener->vlan_id; nfo.user_pri = listener->user_pri; nfo.qh_qpid = listener->iwdev->vsi.ilq->qp_id; if (!list_empty(&listener->child_listen_list)) { irdma_del_multiple_qhash(listener->iwdev, &nfo, listener); } else { if (listener->qhash_set) irdma_manage_qhash(listener->iwdev, &nfo, IRDMA_QHASH_TYPE_TCP_SYN, IRDMA_QHASH_MANAGE_TYPE_DELETE, NULL, false); } cm_core->stats_listen_destroyed++; cm_core->stats_listen_nodes_destroyed++; irdma_debug(&listener->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "loc_port=0x%04x loc_addr=%pI4 cm_listen_node=%p cm_id=%p qhash_set=%d vlan_id=%d apbvt_del=%d\n", - listener->loc_port, listener->loc_addr, listener, + "loc_port=0x%04x loc_addr=%x cm_listen_node=%p cm_id=%p qhash_set=%d vlan_id=%d apbvt_del=%d\n", + listener->loc_port, listener->loc_addr[0], listener, listener->cm_id, listener->qhash_set, listener->vlan_id, apbvt_del); kfree(listener); listener = NULL; return 0; } return -EINVAL; } /** * irdma_cm_del_listen - delete a listener * @cm_core: cm's core * @listener: passive connection's listener * @apbvt_del: flag to delete apbvt */ static int irdma_cm_del_listen(struct irdma_cm_core *cm_core, struct irdma_cm_listener *listener, bool apbvt_del) { listener->listener_state = IRDMA_CM_LISTENER_PASSIVE_STATE; listener->cm_id = NULL; return irdma_dec_refcnt_listen(cm_core, listener, 1, apbvt_del); } /** * irdma_find_node - find a cm node that matches the reference cm node * @cm_core: cm's core * @rem_port: remote tcp port num * @rem_addr: remote ip addr * @loc_port: local tcp port num * @loc_addr: local ip addr * @vlan_id: local VLAN ID */ struct irdma_cm_node * irdma_find_node(struct irdma_cm_core *cm_core, u16 rem_port, u32 *rem_addr, u16 loc_port, u32 *loc_addr, u16 vlan_id) { struct irdma_cm_node *cm_node; u32 key = (rem_port << 16) | loc_port; rcu_read_lock(); HASH_FOR_EACH_POSSIBLE_RCU(cm_core->cm_hash_tbl, cm_node, list, key) { if (cm_node->vlan_id == vlan_id && cm_node->loc_port == loc_port && cm_node->rem_port == rem_port && !memcmp(cm_node->loc_addr, loc_addr, sizeof(cm_node->loc_addr)) && !memcmp(cm_node->rem_addr, rem_addr, sizeof(cm_node->rem_addr))) { if (!atomic_inc_not_zero(&cm_node->refcnt)) goto exit; rcu_read_unlock(); return cm_node; } } exit: rcu_read_unlock(); /* no owner node */ return NULL; } /** * irdma_add_hte_node - add a cm node to the hash table * @cm_core: cm's core * @cm_node: connection's node */ static void irdma_add_hte_node(struct irdma_cm_core *cm_core, struct irdma_cm_node *cm_node) { unsigned long flags; u32 key = (cm_node->rem_port << 16) | cm_node->loc_port; spin_lock_irqsave(&cm_core->ht_lock, flags); HASH_ADD_RCU(cm_core->cm_hash_tbl, &cm_node->list, key); spin_unlock_irqrestore(&cm_core->ht_lock, flags); } /** * irdma_ipv4_is_lpb - check if loopback * @loc_addr: local addr to compare * @rem_addr: remote address */ bool irdma_ipv4_is_lpb(u32 loc_addr, u32 rem_addr) { return ipv4_is_loopback(htonl(rem_addr)) || (loc_addr == rem_addr); } /** * irdma_ipv6_is_lpb - check if loopback * @loc_addr: local addr to compare * @rem_addr: remote address */ bool irdma_ipv6_is_lpb(u32 *loc_addr, u32 *rem_addr) { struct in6_addr raddr6; irdma_copy_ip_htonl(raddr6.__u6_addr.__u6_addr32, rem_addr); return !memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6); } /** * irdma_cm_create_ah - create a cm address handle * @cm_node: The connection manager node to create AH for * @wait: Provides option to wait for ah creation or not */ static int irdma_cm_create_ah(struct irdma_cm_node *cm_node, bool wait) { struct irdma_ah_info ah_info = {0}; struct irdma_device *iwdev = cm_node->iwdev; #ifdef VIMAGE struct rdma_cm_id *rdma_id = (struct rdma_cm_id *)cm_node->cm_id->context; struct vnet *vnet = rdma_id->route.addr.dev_addr.net; #endif ether_addr_copy(ah_info.mac_addr, if_getlladdr(iwdev->netdev)); ah_info.hop_ttl = 0x40; ah_info.tc_tos = cm_node->tos; ah_info.vsi = &iwdev->vsi; if (cm_node->ipv4) { ah_info.ipv4_valid = true; ah_info.dest_ip_addr[0] = cm_node->rem_addr[0]; ah_info.src_ip_addr[0] = cm_node->loc_addr[0]; CURVNET_SET_QUIET(vnet); ah_info.do_lpbk = irdma_ipv4_is_lpb(ah_info.src_ip_addr[0], ah_info.dest_ip_addr[0]); CURVNET_RESTORE(); } else { memcpy(ah_info.dest_ip_addr, cm_node->rem_addr, sizeof(ah_info.dest_ip_addr)); memcpy(ah_info.src_ip_addr, cm_node->loc_addr, sizeof(ah_info.src_ip_addr)); ah_info.do_lpbk = irdma_ipv6_is_lpb(ah_info.src_ip_addr, ah_info.dest_ip_addr); } ah_info.vlan_tag = cm_node->vlan_id; if (cm_node->vlan_id < VLAN_N_VID) { ah_info.insert_vlan_tag = 1; ah_info.vlan_tag |= cm_node->user_pri << VLAN_PRIO_SHIFT; } ah_info.dst_arpindex = irdma_arp_table(iwdev->rf, ah_info.dest_ip_addr, NULL, IRDMA_ARP_RESOLVE); if (irdma_puda_create_ah(&iwdev->rf->sc_dev, &ah_info, wait, IRDMA_PUDA_RSRC_TYPE_ILQ, cm_node, &cm_node->ah)) return -ENOMEM; return 0; } /** * irdma_cm_free_ah - free a cm address handle * @cm_node: The connection manager node to create AH for */ static void irdma_cm_free_ah(struct irdma_cm_node *cm_node) { struct irdma_device *iwdev = cm_node->iwdev; irdma_puda_free_ah(&iwdev->rf->sc_dev, cm_node->ah); cm_node->ah = NULL; } /** * irdma_make_cm_node - create a new instance of a cm node * @cm_core: cm's core * @iwdev: iwarp device structure * @cm_info: quad info for connection * @listener: passive connection's listener */ static struct irdma_cm_node * irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, struct irdma_cm_info *cm_info, struct irdma_cm_listener *listener) { struct irdma_cm_node *cm_node; int arpindex; if_t netdev = iwdev->netdev; /* create an hte and cm_node for this instance */ cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC); if (!cm_node) return NULL; /* set our node specific transport info */ cm_node->ipv4 = cm_info->ipv4; cm_node->vlan_id = cm_info->vlan_id; if (cm_node->vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode) cm_node->vlan_id = 0; cm_node->tos = cm_info->tos; cm_node->user_pri = cm_info->user_pri; if (listener) { if (listener->tos != cm_info->tos) irdma_dev_warn(&iwdev->ibdev, "application TOS[%d] and remote client TOS[%d] mismatch\n", listener->tos, cm_info->tos); if (iwdev->vsi.dscp_mode) { cm_node->user_pri = listener->user_pri; } else { cm_node->tos = max(listener->tos, cm_info->tos); cm_node->user_pri = rt_tos2priority(cm_node->tos); cm_node->user_pri = - irdma_get_egress_vlan_prio(cm_info->loc_addr, - cm_node->user_pri, - cm_info->ipv4); + irdma_iw_get_vlan_prio(cm_info->loc_addr, + cm_node->user_pri, + cm_info->ipv4); } irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DCB, - "listener: TOS:[%d] UP:[%d]\n", - cm_node->tos, + "listener: TOS:[%d] UP:[%d]\n", cm_node->tos, cm_node->user_pri); } memcpy(cm_node->loc_addr, cm_info->loc_addr, sizeof(cm_node->loc_addr)); memcpy(cm_node->rem_addr, cm_info->rem_addr, sizeof(cm_node->rem_addr)); cm_node->loc_port = cm_info->loc_port; cm_node->rem_port = cm_info->rem_port; cm_node->mpa_frame_rev = IRDMA_CM_DEFAULT_MPA_VER; cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; cm_node->iwdev = iwdev; cm_node->dev = &iwdev->rf->sc_dev; cm_node->ird_size = cm_node->dev->hw_attrs.max_hw_ird; cm_node->ord_size = cm_node->dev->hw_attrs.max_hw_ord; cm_node->listener = listener; cm_node->cm_id = cm_info->cm_id; ether_addr_copy(cm_node->loc_mac, if_getlladdr(netdev)); spin_lock_init(&cm_node->retrans_list_lock); cm_node->ack_rcvd = false; init_completion(&cm_node->establish_comp); atomic_set(&cm_node->refcnt, 1); /* associate our parent CM core */ cm_node->cm_core = cm_core; cm_node->tcp_cntxt.loc_id = IRDMA_CM_DEFAULT_LOCAL_ID; cm_node->tcp_cntxt.rcv_wscale = iwdev->rcv_wscale; cm_node->tcp_cntxt.rcv_wnd = iwdev->rcv_wnd >> cm_node->tcp_cntxt.rcv_wscale; kc_set_loc_seq_num_mss(cm_node); arpindex = irdma_resolve_neigh_lpb_chk(iwdev, cm_node, cm_info); if (arpindex < 0) goto err; ether_addr_copy(cm_node->rem_mac, iwdev->rf->arp_table[arpindex].mac_addr); irdma_add_hte_node(cm_core, cm_node); cm_core->stats_nodes_created++; return cm_node; err: kfree(cm_node); return NULL; } static void irdma_destroy_connection(struct irdma_cm_node *cm_node) { struct irdma_cm_core *cm_core = cm_node->cm_core; struct irdma_qp *iwqp; struct irdma_cm_info nfo; /* if the node is destroyed before connection was accelerated */ if (!cm_node->accelerated && cm_node->accept_pend) { - irdma_debug(&cm_node->iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "node destroyed before established\n"); + irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "node destroyed before established\n"); atomic_dec(&cm_node->listener->pend_accepts_cnt); } if (cm_node->close_entry) irdma_handle_close_entry(cm_node, 0); if (cm_node->listener) { irdma_dec_refcnt_listen(cm_core, cm_node->listener, 0, true); } else { if (cm_node->apbvt_set) { irdma_del_apbvt(cm_node->iwdev, cm_node->apbvt_entry); cm_node->apbvt_set = 0; } irdma_get_addr_info(cm_node, &nfo); if (cm_node->qhash_set) { nfo.qh_qpid = cm_node->iwdev->vsi.ilq->qp_id; irdma_manage_qhash(cm_node->iwdev, &nfo, IRDMA_QHASH_TYPE_TCP_ESTABLISHED, IRDMA_QHASH_MANAGE_TYPE_DELETE, NULL, false); cm_node->qhash_set = 0; } } iwqp = cm_node->iwqp; if (iwqp) { cm_node->cm_id->rem_ref(cm_node->cm_id); cm_node->cm_id = NULL; iwqp->cm_id = NULL; irdma_qp_rem_ref(&iwqp->ibqp); cm_node->iwqp = NULL; } else if (cm_node->qhash_set) { irdma_get_addr_info(cm_node, &nfo); nfo.qh_qpid = cm_node->iwdev->vsi.ilq->qp_id; irdma_manage_qhash(cm_node->iwdev, &nfo, IRDMA_QHASH_TYPE_TCP_ESTABLISHED, IRDMA_QHASH_MANAGE_TYPE_DELETE, NULL, false); cm_node->qhash_set = 0; } cm_core->cm_free_ah(cm_node); } /** * irdma_rem_ref_cm_node - destroy an instance of a cm node * @cm_node: connection's node */ void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node) { struct irdma_cm_core *cm_core = cm_node->cm_core; unsigned long flags; spin_lock_irqsave(&cm_core->ht_lock, flags); if (!atomic_dec_and_test(&cm_node->refcnt)) { spin_unlock_irqrestore(&cm_core->ht_lock, flags); return; } if (cm_node->iwqp) { cm_node->iwqp->cm_node = NULL; cm_node->iwqp->cm_id = NULL; } HASH_DEL_RCU(cm_core->cm_hash_tbl, &cm_node->list); cm_node->cm_core->stats_nodes_destroyed++; spin_unlock_irqrestore(&cm_core->ht_lock, flags); irdma_destroy_connection(cm_node); kfree_rcu(cm_node, rcu_head); } /** * irdma_handle_fin_pkt - FIN packet received * @cm_node: connection's node */ static void irdma_handle_fin_pkt(struct irdma_cm_node *cm_node) { switch (cm_node->state) { case IRDMA_CM_STATE_SYN_RCVD: case IRDMA_CM_STATE_SYN_SENT: case IRDMA_CM_STATE_ESTABLISHED: case IRDMA_CM_STATE_MPAREJ_RCVD: cm_node->tcp_cntxt.rcv_nxt++; irdma_cleanup_retrans_entry(cm_node); cm_node->state = IRDMA_CM_STATE_LAST_ACK; irdma_send_fin(cm_node); break; case IRDMA_CM_STATE_MPAREQ_SENT: irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED); cm_node->tcp_cntxt.rcv_nxt++; irdma_cleanup_retrans_entry(cm_node); cm_node->state = IRDMA_CM_STATE_CLOSED; atomic_inc(&cm_node->refcnt); irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_FIN_WAIT1: cm_node->tcp_cntxt.rcv_nxt++; irdma_cleanup_retrans_entry(cm_node); cm_node->state = IRDMA_CM_STATE_CLOSING; irdma_send_ack(cm_node); /* * Wait for ACK as this is simultaneous close. After we receive ACK, do not send anything. Just rm the * node. */ break; case IRDMA_CM_STATE_FIN_WAIT2: cm_node->tcp_cntxt.rcv_nxt++; irdma_cleanup_retrans_entry(cm_node); cm_node->state = IRDMA_CM_STATE_TIME_WAIT; irdma_send_ack(cm_node); irdma_schedule_cm_timer(cm_node, NULL, IRDMA_TIMER_TYPE_CLOSE, 1, 0); break; case IRDMA_CM_STATE_TIME_WAIT: cm_node->tcp_cntxt.rcv_nxt++; irdma_cleanup_retrans_entry(cm_node); cm_node->state = IRDMA_CM_STATE_CLOSED; irdma_rem_ref_cm_node(cm_node); break; case IRDMA_CM_STATE_OFFLOADED: default: irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "bad state node state = %d\n", - cm_node->state); + "bad state node state = %d\n", cm_node->state); break; } } /** * irdma_handle_rst_pkt - process received RST packet * @cm_node: connection's node * @rbuf: receive buffer */ static void irdma_handle_rst_pkt(struct irdma_cm_node *cm_node, struct irdma_puda_buf *rbuf) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "caller: %pS cm_node=%p state=%d rem_port=0x%04x loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4\n", + "caller: %pS cm_node=%p state=%d rem_port=0x%04x loc_port=0x%04x rem_addr=%x loc_addr=%x\n", __builtin_return_address(0), cm_node, cm_node->state, - cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr, - cm_node->loc_addr); + cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr[0], + cm_node->loc_addr[0]); irdma_cleanup_retrans_entry(cm_node); switch (cm_node->state) { case IRDMA_CM_STATE_SYN_SENT: case IRDMA_CM_STATE_MPAREQ_SENT: switch (cm_node->mpa_frame_rev) { case IETF_MPA_V2: /* Drop down to MPA_V1 */ cm_node->mpa_frame_rev = IETF_MPA_V1; /* send a syn and goto syn sent state */ cm_node->state = IRDMA_CM_STATE_SYN_SENT; if (irdma_send_syn(cm_node, 0)) irdma_active_open_err(cm_node, false); break; case IETF_MPA_V1: default: irdma_active_open_err(cm_node, false); break; } break; case IRDMA_CM_STATE_MPAREQ_RCVD: atomic_inc(&cm_node->passive_state); break; case IRDMA_CM_STATE_ESTABLISHED: case IRDMA_CM_STATE_SYN_RCVD: case IRDMA_CM_STATE_LISTENING: irdma_passive_open_err(cm_node, false); break; case IRDMA_CM_STATE_OFFLOADED: irdma_active_open_err(cm_node, false); break; case IRDMA_CM_STATE_CLOSED: break; case IRDMA_CM_STATE_FIN_WAIT2: case IRDMA_CM_STATE_FIN_WAIT1: case IRDMA_CM_STATE_LAST_ACK: case IRDMA_CM_STATE_TIME_WAIT: cm_node->state = IRDMA_CM_STATE_CLOSED; irdma_rem_ref_cm_node(cm_node); break; default: break; } } /** * irdma_handle_rcv_mpa - Process a recv'd mpa buffer * @cm_node: connection's node * @rbuf: receive buffer */ static void irdma_handle_rcv_mpa(struct irdma_cm_node *cm_node, struct irdma_puda_buf *rbuf) { int err; int datasize = rbuf->datalen; u8 *dataloc = rbuf->data; enum irdma_cm_event_type type = IRDMA_CM_EVENT_UNKNOWN; u32 res_type; err = irdma_parse_mpa(cm_node, dataloc, &res_type, datasize); if (err) { if (cm_node->state == IRDMA_CM_STATE_MPAREQ_SENT) irdma_active_open_err(cm_node, true); else irdma_passive_open_err(cm_node, true); return; } switch (cm_node->state) { case IRDMA_CM_STATE_ESTABLISHED: if (res_type == IRDMA_MPA_REQUEST_REJECT) - irdma_debug(&cm_node->iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "state for reject\n"); + irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "state for reject\n"); cm_node->state = IRDMA_CM_STATE_MPAREQ_RCVD; type = IRDMA_CM_EVENT_MPA_REQ; irdma_send_ack(cm_node); /* ACK received MPA request */ atomic_set(&cm_node->passive_state, IRDMA_PASSIVE_STATE_INDICATED); break; case IRDMA_CM_STATE_MPAREQ_SENT: irdma_cleanup_retrans_entry(cm_node); if (res_type == IRDMA_MPA_REQUEST_REJECT) { type = IRDMA_CM_EVENT_MPA_REJECT; cm_node->state = IRDMA_CM_STATE_MPAREJ_RCVD; } else { type = IRDMA_CM_EVENT_CONNECTED; cm_node->state = IRDMA_CM_STATE_OFFLOADED; } irdma_send_ack(cm_node); break; default: irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "wrong cm_node state =%d\n", - cm_node->state); + "wrong cm_node state=%d\n", cm_node->state); break; } irdma_create_event(cm_node, type); } /** * irdma_check_syn - Check for error on received syn ack * @cm_node: connection's node * @tcph: pointer tcp header */ static int irdma_check_syn(struct irdma_cm_node *cm_node, struct tcphdr *tcph) { if (ntohl(tcph->th_ack) != cm_node->tcp_cntxt.loc_seq_num) { irdma_active_open_err(cm_node, true); return 1; } return 0; } /** * irdma_check_seq - check seq numbers if OK * @cm_node: connection's node * @tcph: pointer tcp header */ static int irdma_check_seq(struct irdma_cm_node *cm_node, struct tcphdr *tcph) { u32 seq; u32 ack_seq; u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num; u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt; u32 rcv_wnd; int err = 0; seq = ntohl(tcph->th_seq); ack_seq = ntohl(tcph->th_ack); rcv_wnd = cm_node->tcp_cntxt.rcv_wnd; if (ack_seq != loc_seq_num || !between(seq, rcv_nxt, (rcv_nxt + rcv_wnd))) err = -1; if (err) - irdma_debug(&cm_node->iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "seq number err\n"); + irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "seq number err\n"); return err; } void irdma_add_conn_est_qh(struct irdma_cm_node *cm_node) { struct irdma_cm_info nfo; irdma_get_addr_info(cm_node, &nfo); nfo.qh_qpid = cm_node->iwdev->vsi.ilq->qp_id; irdma_manage_qhash(cm_node->iwdev, &nfo, IRDMA_QHASH_TYPE_TCP_ESTABLISHED, IRDMA_QHASH_MANAGE_TYPE_ADD, cm_node, false); cm_node->qhash_set = true; } /** * irdma_handle_syn_pkt - is for Passive node * @cm_node: connection's node * @rbuf: receive buffer */ static void irdma_handle_syn_pkt(struct irdma_cm_node *cm_node, struct irdma_puda_buf *rbuf) { struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph; int err; u32 inc_sequence; int optionsize; optionsize = (tcph->th_off << 2) - sizeof(struct tcphdr); inc_sequence = ntohl(tcph->th_seq); switch (cm_node->state) { case IRDMA_CM_STATE_SYN_SENT: case IRDMA_CM_STATE_MPAREQ_SENT: /* Rcvd syn on active open connection */ irdma_active_open_err(cm_node, 1); break; case IRDMA_CM_STATE_LISTENING: /* Passive OPEN */ if (atomic_read(&cm_node->listener->pend_accepts_cnt) > cm_node->listener->backlog) { cm_node->cm_core->stats_backlog_drops++; irdma_passive_open_err(cm_node, false); break; } err = irdma_handle_tcp_options(cm_node, tcph, optionsize, 1); if (err) { irdma_passive_open_err(cm_node, false); /* drop pkt */ break; } err = cm_node->cm_core->cm_create_ah(cm_node, false); if (err) { irdma_passive_open_err(cm_node, false); /* drop pkt */ break; } cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1; cm_node->accept_pend = 1; atomic_inc(&cm_node->listener->pend_accepts_cnt); cm_node->state = IRDMA_CM_STATE_SYN_RCVD; break; case IRDMA_CM_STATE_CLOSED: irdma_cleanup_retrans_entry(cm_node); atomic_inc(&cm_node->refcnt); irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_OFFLOADED: case IRDMA_CM_STATE_ESTABLISHED: case IRDMA_CM_STATE_FIN_WAIT1: case IRDMA_CM_STATE_FIN_WAIT2: case IRDMA_CM_STATE_MPAREQ_RCVD: case IRDMA_CM_STATE_LAST_ACK: case IRDMA_CM_STATE_CLOSING: case IRDMA_CM_STATE_UNKNOWN: default: break; } } /** * irdma_handle_synack_pkt - Process SYN+ACK packet (active side) * @cm_node: connection's node * @rbuf: receive buffer */ static void irdma_handle_synack_pkt(struct irdma_cm_node *cm_node, struct irdma_puda_buf *rbuf) { struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph; int err; u32 inc_sequence; int optionsize; optionsize = (tcph->th_off << 2) - sizeof(struct tcphdr); inc_sequence = ntohl(tcph->th_seq); switch (cm_node->state) { case IRDMA_CM_STATE_SYN_SENT: irdma_cleanup_retrans_entry(cm_node); /* active open */ if (irdma_check_syn(cm_node, tcph)) { - irdma_debug(&cm_node->iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "check syn fail\n"); + irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "check syn fail\n"); return; } cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->th_ack); /* setup options */ err = irdma_handle_tcp_options(cm_node, tcph, optionsize, 0); if (err) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "cm_node=%p tcp_options failed\n", - cm_node); + "cm_node=%p tcp_options failed\n", cm_node); break; } irdma_cleanup_retrans_entry(cm_node); cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1; irdma_send_ack(cm_node); /* ACK for the syn_ack */ err = irdma_send_mpa_request(cm_node); if (err) { irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "cm_node=%p irdma_send_mpa_request failed\n", cm_node); break; } cm_node->state = IRDMA_CM_STATE_MPAREQ_SENT; break; case IRDMA_CM_STATE_MPAREQ_RCVD: irdma_passive_open_err(cm_node, true); break; case IRDMA_CM_STATE_LISTENING: cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->th_ack); irdma_cleanup_retrans_entry(cm_node); cm_node->state = IRDMA_CM_STATE_CLOSED; irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_CLOSED: cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->th_ack); irdma_cleanup_retrans_entry(cm_node); atomic_inc(&cm_node->refcnt); irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_ESTABLISHED: case IRDMA_CM_STATE_FIN_WAIT1: case IRDMA_CM_STATE_FIN_WAIT2: case IRDMA_CM_STATE_LAST_ACK: case IRDMA_CM_STATE_OFFLOADED: case IRDMA_CM_STATE_CLOSING: case IRDMA_CM_STATE_UNKNOWN: case IRDMA_CM_STATE_MPAREQ_SENT: default: break; } } /** * irdma_handle_ack_pkt - process packet with ACK * @cm_node: connection's node * @rbuf: receive buffer */ static int irdma_handle_ack_pkt(struct irdma_cm_node *cm_node, struct irdma_puda_buf *rbuf) { struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph; u32 inc_sequence; int ret; int optionsize; u32 datasize = rbuf->datalen; optionsize = (tcph->th_off << 2) - sizeof(struct tcphdr); if (irdma_check_seq(cm_node, tcph)) return -EINVAL; inc_sequence = ntohl(tcph->th_seq); switch (cm_node->state) { case IRDMA_CM_STATE_SYN_RCVD: irdma_cleanup_retrans_entry(cm_node); ret = irdma_handle_tcp_options(cm_node, tcph, optionsize, 1); if (ret) return ret; cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->th_ack); cm_node->state = IRDMA_CM_STATE_ESTABLISHED; if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; irdma_handle_rcv_mpa(cm_node, rbuf); } break; case IRDMA_CM_STATE_ESTABLISHED: irdma_cleanup_retrans_entry(cm_node); if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; irdma_handle_rcv_mpa(cm_node, rbuf); } break; case IRDMA_CM_STATE_MPAREQ_SENT: cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->th_ack); if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; cm_node->ack_rcvd = false; irdma_handle_rcv_mpa(cm_node, rbuf); } else { cm_node->ack_rcvd = true; } break; case IRDMA_CM_STATE_LISTENING: irdma_cleanup_retrans_entry(cm_node); cm_node->state = IRDMA_CM_STATE_CLOSED; irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_CLOSED: irdma_cleanup_retrans_entry(cm_node); atomic_inc(&cm_node->refcnt); irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_LAST_ACK: case IRDMA_CM_STATE_CLOSING: irdma_cleanup_retrans_entry(cm_node); cm_node->state = IRDMA_CM_STATE_CLOSED; irdma_rem_ref_cm_node(cm_node); break; case IRDMA_CM_STATE_FIN_WAIT1: irdma_cleanup_retrans_entry(cm_node); cm_node->state = IRDMA_CM_STATE_FIN_WAIT2; break; case IRDMA_CM_STATE_SYN_SENT: case IRDMA_CM_STATE_FIN_WAIT2: case IRDMA_CM_STATE_OFFLOADED: case IRDMA_CM_STATE_MPAREQ_RCVD: case IRDMA_CM_STATE_UNKNOWN: default: irdma_cleanup_retrans_entry(cm_node); break; } return 0; } /** * irdma_process_pkt - process cm packet * @cm_node: connection's node * @rbuf: receive buffer */ static void irdma_process_pkt(struct irdma_cm_node *cm_node, struct irdma_puda_buf *rbuf) { enum irdma_tcpip_pkt_type pkt_type = IRDMA_PKT_TYPE_UNKNOWN; struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph; u32 fin_set = 0; int err; if (tcph->th_flags & TH_RST) { pkt_type = IRDMA_PKT_TYPE_RST; } else if (tcph->th_flags & TH_SYN) { pkt_type = IRDMA_PKT_TYPE_SYN; if (tcph->th_flags & TH_ACK) pkt_type = IRDMA_PKT_TYPE_SYNACK; } else if (tcph->th_flags & TH_ACK) { pkt_type = IRDMA_PKT_TYPE_ACK; } if (tcph->th_flags & TH_FIN) fin_set = 1; switch (pkt_type) { case IRDMA_PKT_TYPE_SYN: irdma_handle_syn_pkt(cm_node, rbuf); break; case IRDMA_PKT_TYPE_SYNACK: irdma_handle_synack_pkt(cm_node, rbuf); break; case IRDMA_PKT_TYPE_ACK: err = irdma_handle_ack_pkt(cm_node, rbuf); if (fin_set && !err) irdma_handle_fin_pkt(cm_node); break; case IRDMA_PKT_TYPE_RST: irdma_handle_rst_pkt(cm_node, rbuf); break; default: if (fin_set && (!irdma_check_seq(cm_node, (struct tcphdr *)rbuf->tcph))) irdma_handle_fin_pkt(cm_node); break; } } /** * irdma_make_listen_node - create a listen node with params * @cm_core: cm's core * @iwdev: iwarp device structure * @cm_info: quad info for connection */ static struct irdma_cm_listener * irdma_make_listen_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, struct irdma_cm_info *cm_info) { struct irdma_cm_listener *listener; unsigned long flags; /* cannot have multiple matching listeners */ listener = irdma_find_listener(cm_core, cm_info->loc_addr, cm_info->ipv4, cm_info->loc_port, cm_info->vlan_id, IRDMA_CM_LISTENER_EITHER_STATE); if (listener && listener->listener_state == IRDMA_CM_LISTENER_ACTIVE_STATE) { atomic_dec(&listener->refcnt); return NULL; } if (!listener) { /* * create a CM listen node 1/2 node to compare incoming traffic to */ listener = kzalloc(sizeof(*listener), GFP_KERNEL); if (!listener) return NULL; cm_core->stats_listen_nodes_created++; memcpy(listener->loc_addr, cm_info->loc_addr, sizeof(listener->loc_addr)); listener->loc_port = cm_info->loc_port; INIT_LIST_HEAD(&listener->child_listen_list); atomic_set(&listener->refcnt, 1); } else { listener->reused_node = 1; } listener->cm_id = cm_info->cm_id; listener->ipv4 = cm_info->ipv4; listener->vlan_id = cm_info->vlan_id; atomic_set(&listener->pend_accepts_cnt, 0); listener->cm_core = cm_core; listener->iwdev = iwdev; listener->backlog = cm_info->backlog; listener->listener_state = IRDMA_CM_LISTENER_ACTIVE_STATE; if (!listener->reused_node) { spin_lock_irqsave(&cm_core->listen_list_lock, flags); list_add(&listener->list, &cm_core->listen_list); spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); } return listener; } /** * irdma_create_cm_node - make a connection node with params * @cm_core: cm's core * @iwdev: iwarp device structure * @conn_param: connection parameters * @cm_info: quad info for connection * @caller_cm_node: pointer to cm_node structure to return */ static int irdma_create_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, struct iw_cm_conn_param *conn_param, struct irdma_cm_info *cm_info, struct irdma_cm_node **caller_cm_node) { struct irdma_cm_node *cm_node; u16 private_data_len = conn_param->private_data_len; const void *private_data = conn_param->private_data; /* create a CM connection node */ cm_node = irdma_make_cm_node(cm_core, iwdev, cm_info, NULL); if (!cm_node) return -ENOMEM; /* set our node side to client (active) side */ cm_node->tcp_cntxt.client = 1; cm_node->tcp_cntxt.rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE; irdma_record_ird_ord(cm_node, conn_param->ird, conn_param->ord); cm_node->pdata.size = private_data_len; cm_node->pdata.addr = cm_node->pdata_buf; memcpy(cm_node->pdata_buf, private_data, private_data_len); *caller_cm_node = cm_node; return 0; } /** * irdma_cm_reject - reject and teardown a connection * @cm_node: connection's node * @pdata: ptr to private data for reject * @plen: size of private data */ static int irdma_cm_reject(struct irdma_cm_node *cm_node, const void *pdata, u8 plen) { int ret; int passive_state; if (cm_node->tcp_cntxt.client) return 0; irdma_cleanup_retrans_entry(cm_node); passive_state = atomic_add_return(1, &cm_node->passive_state); if (passive_state == IRDMA_SEND_RESET_EVENT) { cm_node->state = IRDMA_CM_STATE_CLOSED; irdma_rem_ref_cm_node(cm_node); return 0; } if (cm_node->state == IRDMA_CM_STATE_LISTENER_DESTROYED) { irdma_rem_ref_cm_node(cm_node); return 0; } ret = irdma_send_mpa_reject(cm_node, pdata, plen); if (!ret) return 0; cm_node->state = IRDMA_CM_STATE_CLOSED; if (irdma_send_reset(cm_node)) - irdma_debug(&cm_node->iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "send reset failed\n"); + irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "send reset failed\n"); return ret; } /** * irdma_cm_close - close of cm connection * @cm_node: connection's node */ static int irdma_cm_close(struct irdma_cm_node *cm_node) { switch (cm_node->state) { case IRDMA_CM_STATE_SYN_RCVD: case IRDMA_CM_STATE_SYN_SENT: case IRDMA_CM_STATE_ONE_SIDE_ESTABLISHED: case IRDMA_CM_STATE_ESTABLISHED: case IRDMA_CM_STATE_ACCEPTING: case IRDMA_CM_STATE_MPAREQ_SENT: case IRDMA_CM_STATE_MPAREQ_RCVD: irdma_cleanup_retrans_entry(cm_node); irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_CLOSE_WAIT: cm_node->state = IRDMA_CM_STATE_LAST_ACK; irdma_send_fin(cm_node); break; case IRDMA_CM_STATE_FIN_WAIT1: case IRDMA_CM_STATE_FIN_WAIT2: case IRDMA_CM_STATE_LAST_ACK: case IRDMA_CM_STATE_TIME_WAIT: case IRDMA_CM_STATE_CLOSING: return -EINVAL; case IRDMA_CM_STATE_LISTENING: irdma_cleanup_retrans_entry(cm_node); irdma_send_reset(cm_node); break; case IRDMA_CM_STATE_MPAREJ_RCVD: case IRDMA_CM_STATE_UNKNOWN: case IRDMA_CM_STATE_INITED: case IRDMA_CM_STATE_CLOSED: case IRDMA_CM_STATE_LISTENER_DESTROYED: irdma_rem_ref_cm_node(cm_node); break; case IRDMA_CM_STATE_OFFLOADED: if (cm_node->send_entry) - irdma_debug(&cm_node->iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "CM send_entry in OFFLOADED state\n"); + irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "CM send_entry in OFFLOADED state\n"); irdma_rem_ref_cm_node(cm_node); break; } return 0; } /** * irdma_receive_ilq - recv an ETHERNET packet, and process it * through CM * @vsi: VSI structure of dev * @rbuf: receive buffer */ void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf) { struct irdma_cm_node *cm_node; struct irdma_cm_listener *listener; struct ip *iph; struct ip6_hdr *ip6h; struct tcphdr *tcph; struct irdma_cm_info cm_info = {0}; struct irdma_device *iwdev = vsi->back_vsi; struct irdma_cm_core *cm_core = &iwdev->cm_core; struct ether_vlan_header *ethh; u16 vtag; /* if vlan, then maclen = 18 else 14 */ iph = (struct ip *)rbuf->iph; irdma_debug_buf(vsi->dev, IRDMA_DEBUG_ILQ, "RECEIVE ILQ BUFFER", rbuf->mem.va, rbuf->totallen); if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { if (rbuf->vlan_valid) { vtag = rbuf->vlan_id; cm_info.user_pri = (vtag & EVL_PRI_MASK) >> VLAN_PRIO_SHIFT; cm_info.vlan_id = vtag & EVL_VLID_MASK; } else { cm_info.vlan_id = 0xFFFF; } } else { ethh = rbuf->mem.va; if (ethh->evl_proto == htons(ETH_P_8021Q)) { vtag = ntohs(ethh->evl_tag); cm_info.user_pri = (vtag & EVL_PRI_MASK) >> VLAN_PRIO_SHIFT; cm_info.vlan_id = vtag & EVL_VLID_MASK; irdma_debug(&cm_core->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "vlan_id=%d\n", - cm_info.vlan_id); + "vlan_id=%d\n", cm_info.vlan_id); } else { cm_info.vlan_id = 0xFFFF; } } tcph = (struct tcphdr *)rbuf->tcph; if (rbuf->ipv4) { cm_info.loc_addr[0] = ntohl(iph->ip_dst.s_addr); cm_info.rem_addr[0] = ntohl(iph->ip_src.s_addr); cm_info.ipv4 = true; cm_info.tos = iph->ip_tos; } else { ip6h = (struct ip6_hdr *)rbuf->iph; irdma_copy_ip_ntohl(cm_info.loc_addr, ip6h->ip6_dst.__u6_addr.__u6_addr32); irdma_copy_ip_ntohl(cm_info.rem_addr, ip6h->ip6_src.__u6_addr.__u6_addr32); cm_info.ipv4 = false; cm_info.tos = (ip6h->ip6_vfc << 4) | ip6h->ip6_flow; } cm_info.loc_port = ntohs(tcph->th_dport); cm_info.rem_port = ntohs(tcph->th_sport); cm_node = irdma_find_node(cm_core, cm_info.rem_port, cm_info.rem_addr, cm_info.loc_port, cm_info.loc_addr, cm_info.vlan_id); if (!cm_node) { /* * Only type of packet accepted are for the PASSIVE open (syn only) */ if (!(tcph->th_flags & TH_SYN) || tcph->th_flags & TH_ACK) return; listener = irdma_find_listener(cm_core, cm_info.loc_addr, cm_info.ipv4, cm_info.loc_port, cm_info.vlan_id, IRDMA_CM_LISTENER_ACTIVE_STATE); if (!listener) { cm_info.cm_id = NULL; - irdma_debug(&cm_core->iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "no listener found\n"); + irdma_debug(&cm_core->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "no listener found\n"); return; } cm_info.cm_id = listener->cm_id; cm_node = irdma_make_cm_node(cm_core, iwdev, &cm_info, listener); if (!cm_node) { - irdma_debug(&cm_core->iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "allocate node failed\n"); + irdma_debug(&cm_core->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "allocate node failed\n"); atomic_dec(&listener->refcnt); return; } if (!(tcph->th_flags & (TH_RST | TH_FIN))) { cm_node->state = IRDMA_CM_STATE_LISTENING; } else { irdma_rem_ref_cm_node(cm_node); return; } atomic_inc(&cm_node->refcnt); } else if (cm_node->state == IRDMA_CM_STATE_OFFLOADED) { irdma_rem_ref_cm_node(cm_node); return; } irdma_process_pkt(cm_node, rbuf); irdma_rem_ref_cm_node(cm_node); } static int irdma_add_qh(struct irdma_cm_node *cm_node, bool active) { if (!active) irdma_add_conn_est_qh(cm_node); return 0; } static void irdma_cm_free_ah_nop(struct irdma_cm_node *cm_node) { } /** * irdma_setup_cm_core - setup top level instance of a cm core * @iwdev: iwarp device structure * @rdma_ver: HW version */ int irdma_setup_cm_core(struct irdma_device *iwdev, u8 rdma_ver) { struct irdma_cm_core *cm_core = &iwdev->cm_core; cm_core->iwdev = iwdev; cm_core->dev = &iwdev->rf->sc_dev; /* Handles CM event work items send to Iwarp core */ cm_core->event_wq = alloc_ordered_workqueue("iwarp-event-wq", 0); if (!cm_core->event_wq) return -ENOMEM; INIT_LIST_HEAD(&cm_core->listen_list); timer_setup(&cm_core->tcp_timer, irdma_cm_timer_tick, 0); spin_lock_init(&cm_core->ht_lock); spin_lock_init(&cm_core->listen_list_lock); spin_lock_init(&cm_core->apbvt_lock); switch (rdma_ver) { case IRDMA_GEN_1: cm_core->form_cm_frame = irdma_form_uda_cm_frame; cm_core->cm_create_ah = irdma_add_qh; cm_core->cm_free_ah = irdma_cm_free_ah_nop; break; case IRDMA_GEN_2: default: cm_core->form_cm_frame = irdma_form_ah_cm_frame; cm_core->cm_create_ah = irdma_cm_create_ah; cm_core->cm_free_ah = irdma_cm_free_ah; } return 0; } /** * irdma_cleanup_cm_core - deallocate a top level instance of a * cm core * @cm_core: cm's core */ void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core) { if (!cm_core) return; del_timer_sync(&cm_core->tcp_timer); destroy_workqueue(cm_core->event_wq); cm_core->dev->ws_reset(&cm_core->iwdev->vsi); } /** * irdma_init_tcp_ctx - setup qp context * @cm_node: connection's node * @tcp_info: offload info for tcp * @iwqp: associate qp for the connection */ static void irdma_init_tcp_ctx(struct irdma_cm_node *cm_node, struct irdma_tcp_offload_info *tcp_info, struct irdma_qp *iwqp) { tcp_info->ipv4 = cm_node->ipv4; tcp_info->drop_ooo_seg = !iwqp->iwdev->iw_ooo; tcp_info->wscale = true; tcp_info->ignore_tcp_opt = true; tcp_info->ignore_tcp_uns_opt = true; tcp_info->no_nagle = false; tcp_info->ttl = IRDMA_DEFAULT_TTL; tcp_info->rtt_var = IRDMA_DEFAULT_RTT_VAR; tcp_info->ss_thresh = IRDMA_DEFAULT_SS_THRESH; tcp_info->rexmit_thresh = IRDMA_DEFAULT_REXMIT_THRESH; tcp_info->tcp_state = IRDMA_TCP_STATE_ESTABLISHED; tcp_info->snd_wscale = cm_node->tcp_cntxt.snd_wscale; tcp_info->rcv_wscale = cm_node->tcp_cntxt.rcv_wscale; tcp_info->snd_nxt = cm_node->tcp_cntxt.loc_seq_num; tcp_info->snd_wnd = cm_node->tcp_cntxt.snd_wnd; tcp_info->rcv_nxt = cm_node->tcp_cntxt.rcv_nxt; tcp_info->snd_max = cm_node->tcp_cntxt.loc_seq_num; tcp_info->snd_una = cm_node->tcp_cntxt.loc_seq_num; tcp_info->cwnd = 2 * cm_node->tcp_cntxt.mss; tcp_info->snd_wl1 = cm_node->tcp_cntxt.rcv_nxt; tcp_info->snd_wl2 = cm_node->tcp_cntxt.loc_seq_num; tcp_info->max_snd_window = cm_node->tcp_cntxt.max_snd_wnd; tcp_info->rcv_wnd = cm_node->tcp_cntxt.rcv_wnd << cm_node->tcp_cntxt.rcv_wscale; tcp_info->flow_label = 0; tcp_info->snd_mss = (u32)cm_node->tcp_cntxt.mss; tcp_info->tos = cm_node->tos; if (cm_node->vlan_id < VLAN_N_VID) { tcp_info->insert_vlan_tag = true; tcp_info->vlan_tag = cm_node->vlan_id; tcp_info->vlan_tag |= cm_node->user_pri << VLAN_PRIO_SHIFT; } tcp_info->src_port = cm_node->loc_port; tcp_info->dst_port = cm_node->rem_port; tcp_info->arp_idx = (u16)irdma_arp_table(iwqp->iwdev->rf, cm_node->rem_addr, NULL, IRDMA_ARP_RESOLVE); if (cm_node->ipv4) { tcp_info->dest_ip_addr[3] = cm_node->rem_addr[0]; tcp_info->local_ipaddr[3] = cm_node->loc_addr[0]; } else { memcpy(tcp_info->dest_ip_addr, cm_node->rem_addr, sizeof(tcp_info->dest_ip_addr)); memcpy(tcp_info->local_ipaddr, cm_node->loc_addr, sizeof(tcp_info->local_ipaddr)); } } /** * irdma_cm_init_tsa_conn - setup qp for RTS * @iwqp: associate qp for the connection * @cm_node: connection's node */ static void irdma_cm_init_tsa_conn(struct irdma_qp *iwqp, struct irdma_cm_node *cm_node) { struct irdma_iwarp_offload_info *iwarp_info; struct irdma_qp_host_ctx_info *ctx_info; iwarp_info = &iwqp->iwarp_info; ctx_info = &iwqp->ctx_info; ctx_info->tcp_info = &iwqp->tcp_info; ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id; ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id; iwarp_info->ord_size = cm_node->ord_size; iwarp_info->ird_size = cm_node->ird_size; iwarp_info->rd_en = true; iwarp_info->rdmap_ver = 1; iwarp_info->ddp_ver = 1; iwarp_info->pd_id = iwqp->iwpd->sc_pd.pd_id; ctx_info->tcp_info_valid = true; ctx_info->iwarp_info_valid = true; ctx_info->user_pri = cm_node->user_pri; irdma_init_tcp_ctx(cm_node, &iwqp->tcp_info, iwqp); if (cm_node->snd_mark_en) { iwarp_info->snd_mark_en = true; iwarp_info->snd_mark_offset = (iwqp->tcp_info.snd_nxt & SNDMARKER_SEQNMASK) + cm_node->lsmm_size; } cm_node->state = IRDMA_CM_STATE_OFFLOADED; iwqp->tcp_info.tcp_state = IRDMA_TCP_STATE_ESTABLISHED; iwqp->tcp_info.src_mac_addr_idx = iwqp->iwdev->mac_ip_table_idx; if (cm_node->rcv_mark_en) { iwarp_info->rcv_mark_en = true; iwarp_info->align_hdrs = true; } irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info); /* once tcp_info is set, no need to do it again */ ctx_info->tcp_info_valid = false; ctx_info->iwarp_info_valid = false; } /** * irdma_cm_disconn - when a connection is being closed * @iwqp: associated qp for the connection */ void irdma_cm_disconn(struct irdma_qp *iwqp) { struct irdma_device *iwdev = iwqp->iwdev; struct disconn_work *work; unsigned long flags; work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) return; spin_lock_irqsave(&iwdev->rf->qptable_lock, flags); if (!iwdev->rf->qp_table[iwqp->ibqp.qp_num]) { spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags); - irdma_debug(&iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "qp_id %d is already freed\n", - iwqp->ibqp.qp_num); + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "qp_id %d is already freed\n", iwqp->ibqp.qp_num); kfree(work); return; } irdma_qp_add_ref(&iwqp->ibqp); spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags); work->iwqp = iwqp; INIT_WORK(&work->work, irdma_disconnect_worker); queue_work(iwdev->cleanup_wq, &work->work); } /** * irdma_qp_disconnect - free qp and close cm * @iwqp: associate qp for the connection */ static void irdma_qp_disconnect(struct irdma_qp *iwqp) { struct irdma_device *iwdev = iwqp->iwdev; iwqp->active_conn = 0; /* close the CM node down if it is still active */ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "Call close API\n"); irdma_cm_close(iwqp->cm_node); } /** * irdma_cm_disconn_true - called by worker thread to disconnect qp * @iwqp: associate qp for the connection */ static void irdma_cm_disconn_true(struct irdma_qp *iwqp) { struct iw_cm_id *cm_id; struct irdma_device *iwdev; struct irdma_sc_qp *qp = &iwqp->sc_qp; u16 last_ae; u8 original_hw_tcp_state; u8 original_ibqp_state; int disconn_status = 0; int issue_disconn = 0; int issue_close = 0; int issue_flush = 0; unsigned long flags; int err; iwdev = iwqp->iwdev; spin_lock_irqsave(&iwqp->lock, flags); if (rdma_protocol_roce(&iwdev->ibdev, 1)) { struct ib_qp_attr attr; if (iwqp->flush_issued || iwqp->sc_qp.qp_uk.destroy_pending) { spin_unlock_irqrestore(&iwqp->lock, flags); return; } spin_unlock_irqrestore(&iwqp->lock, flags); attr.qp_state = IB_QPS_ERR; irdma_modify_qp_roce(&iwqp->ibqp, &attr, IB_QP_STATE, NULL); irdma_ib_qp_event(iwqp, qp->event_type); return; } cm_id = iwqp->cm_id; original_hw_tcp_state = iwqp->hw_tcp_state; original_ibqp_state = iwqp->ibqp_state; last_ae = iwqp->last_aeq; if (qp->term_flags) { issue_disconn = 1; issue_close = 1; iwqp->cm_id = NULL; irdma_terminate_del_timer(qp); if (!iwqp->flush_issued) { iwqp->flush_issued = 1; issue_flush = 1; } } else if ((original_hw_tcp_state == IRDMA_TCP_STATE_CLOSE_WAIT) || ((original_ibqp_state == IB_QPS_RTS) && (last_ae == IRDMA_AE_LLP_CONNECTION_RESET))) { issue_disconn = 1; if (last_ae == IRDMA_AE_LLP_CONNECTION_RESET) disconn_status = -ECONNRESET; } if (original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED || original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT || last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE || last_ae == IRDMA_AE_BAD_CLOSE || last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset || !cm_id) { issue_close = 1; iwqp->cm_id = NULL; qp->term_flags = 0; if (!iwqp->flush_issued) { iwqp->flush_issued = 1; issue_flush = 1; } } spin_unlock_irqrestore(&iwqp->lock, flags); if (issue_flush && !iwqp->sc_qp.qp_uk.destroy_pending) { irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_FLUSH_RQ | IRDMA_FLUSH_WAIT); if (qp->term_flags) irdma_ib_qp_event(iwqp, qp->event_type); } if (!cm_id || !cm_id->event_handler) return; spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags); if (!iwqp->cm_node) { spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags); return; } atomic_inc(&iwqp->cm_node->refcnt); spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags); if (issue_disconn) { err = irdma_send_cm_event(iwqp->cm_node, cm_id, IW_CM_EVENT_DISCONNECT, disconn_status); if (err) irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "disconnect event failed: - cm_id = %p\n", cm_id); } if (issue_close) { cm_id->provider_data = iwqp; err = irdma_send_cm_event(iwqp->cm_node, cm_id, IW_CM_EVENT_CLOSE, 0); if (err) irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "close event failed: - cm_id = %p\n", cm_id); irdma_qp_disconnect(iwqp); } irdma_rem_ref_cm_node(iwqp->cm_node); } /** * irdma_disconnect_worker - worker for connection close * @work: points or disconn structure */ static void irdma_disconnect_worker(struct work_struct *work) { struct disconn_work *dwork = container_of(work, struct disconn_work, work); struct irdma_qp *iwqp = dwork->iwqp; kfree(dwork); irdma_cm_disconn_true(iwqp); irdma_qp_rem_ref(&iwqp->ibqp); } /** * irdma_free_lsmm_rsrc - free lsmm memory and deregister * @iwqp: associate qp for the connection */ void irdma_free_lsmm_rsrc(struct irdma_qp *iwqp) { struct irdma_device *iwdev; iwdev = iwqp->iwdev; if (iwqp->ietf_mem.va) { if (iwqp->lsmm_mr) - kc_free_lsmm_dereg_mr(iwdev, iwqp); + iwdev->ibdev.dereg_mr(iwqp->lsmm_mr, NULL); irdma_free_dma_mem(iwdev->rf->sc_dev.hw, &iwqp->ietf_mem); iwqp->ietf_mem.va = NULL; } } /** * irdma_accept - registered call for connection to be accepted * @cm_id: cm information for passive connection * @conn_param: accpet parameters */ int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { struct ib_qp *ibqp; struct irdma_qp *iwqp; struct irdma_device *iwdev; struct irdma_sc_dev *dev; struct irdma_cm_node *cm_node; struct ib_qp_attr attr = {0}; int passive_state; struct ib_mr *ibmr; struct irdma_pd *iwpd; u16 buf_len = 0; struct irdma_kmem_info accept; u64 tagged_offset; int wait_ret; int ret = 0; ibqp = irdma_get_qp(cm_id->device, conn_param->qpn); if (!ibqp) return -EINVAL; iwqp = to_iwqp(ibqp); iwdev = iwqp->iwdev; dev = &iwdev->rf->sc_dev; cm_node = cm_id->provider_data; if (((struct sockaddr_in *)&cm_id->local_addr)->sin_family == AF_INET) { cm_node->ipv4 = true; cm_node->vlan_id = irdma_get_vlan_ipv4(cm_node->loc_addr); } else { cm_node->ipv4 = false; irdma_netdev_vlan_ipv6(cm_node->loc_addr, &cm_node->vlan_id, NULL); } irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "Accept vlan_id=%d\n", cm_node->vlan_id); if (cm_node->state == IRDMA_CM_STATE_LISTENER_DESTROYED) { ret = -EINVAL; goto error; } passive_state = atomic_add_return(1, &cm_node->passive_state); if (passive_state == IRDMA_SEND_RESET_EVENT) { ret = -ECONNRESET; goto error; } buf_len = conn_param->private_data_len + IRDMA_MAX_IETF_SIZE; iwqp->ietf_mem.size = buf_len; iwqp->ietf_mem.va = irdma_allocate_dma_mem(dev->hw, &iwqp->ietf_mem, iwqp->ietf_mem.size, 1); if (!iwqp->ietf_mem.va) { ret = -ENOMEM; goto error; } cm_node->pdata.size = conn_param->private_data_len; accept.addr = iwqp->ietf_mem.va; accept.size = irdma_cm_build_mpa_frame(cm_node, &accept, MPA_KEY_REPLY); memcpy((u8 *)accept.addr + accept.size, conn_param->private_data, conn_param->private_data_len); if (cm_node->dev->ws_add(iwqp->sc_qp.vsi, cm_node->user_pri)) { ret = -ENOMEM; goto error; } iwqp->sc_qp.user_pri = cm_node->user_pri; irdma_qp_add_qos(&iwqp->sc_qp); if (cm_node->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) iwdev->rf->check_fc(&iwdev->vsi, &iwqp->sc_qp); /* setup our first outgoing iWarp send WQE (the IETF frame response) */ iwpd = iwqp->iwpd; tagged_offset = (uintptr_t)iwqp->ietf_mem.va; ibmr = irdma_reg_phys_mr(&iwpd->ibpd, iwqp->ietf_mem.pa, buf_len, IB_ACCESS_LOCAL_WRITE, &tagged_offset); if (IS_ERR(ibmr)) { ret = -ENOMEM; goto error; } ibmr->pd = &iwpd->ibpd; ibmr->device = iwpd->ibpd.device; iwqp->lsmm_mr = ibmr; if (iwqp->page) iwqp->sc_qp.qp_uk.sq_base = kmap_local_page(iwqp->page); cm_node->lsmm_size = accept.size + conn_param->private_data_len; irdma_sc_send_lsmm(&iwqp->sc_qp, iwqp->ietf_mem.va, cm_node->lsmm_size, ibmr->lkey); if (iwqp->page) kunmap_local(iwqp->sc_qp.qp_uk.sq_base); iwqp->cm_id = cm_id; cm_node->cm_id = cm_id; cm_id->provider_data = iwqp; iwqp->active_conn = 0; iwqp->cm_node = cm_node; cm_node->iwqp = iwqp; irdma_cm_init_tsa_conn(iwqp, cm_node); irdma_qp_add_ref(&iwqp->ibqp); cm_id->add_ref(cm_id); attr.qp_state = IB_QPS_RTS; cm_node->qhash_set = false; cm_node->cm_core->cm_free_ah(cm_node); irdma_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL); if (dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_RTS_AE) { wait_ret = wait_event_interruptible_timeout(iwqp->waitq, iwqp->rts_ae_rcvd, IRDMA_MAX_TIMEOUT); if (!wait_ret) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "Slow Connection: cm_node=%p, loc_port=%d, rem_port=%d, cm_id=%p\n", cm_node, cm_node->loc_port, cm_node->rem_port, cm_node->cm_id); ret = -ECONNRESET; goto error; } } irdma_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0); cm_node->accelerated = true; complete(&cm_node->establish_comp); if (cm_node->accept_pend) { atomic_dec(&cm_node->listener->pend_accepts_cnt); cm_node->accept_pend = 0; } irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "rem_port=0x%04x, loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4 cm_node=%p cm_id=%p qp_id = %d\n\n", - cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr, - cm_node->loc_addr, cm_node, cm_id, ibqp->qp_num); + "rem_port=0x%04x, loc_port=0x%04x rem_addr=%x loc_addr=%x cm_node=%p cm_id=%p qp_id=%d\n\n", + cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr[0], + cm_node->loc_addr[0], cm_node, cm_id, ibqp->qp_num); cm_node->cm_core->stats_accepts++; return 0; error: irdma_free_lsmm_rsrc(iwqp); irdma_rem_ref_cm_node(cm_node); return ret; } /** * irdma_reject - registered call for connection to be rejected * @cm_id: cm information for passive connection * @pdata: private data to be sent * @pdata_len: private data length */ int irdma_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { struct irdma_device *iwdev; struct irdma_cm_node *cm_node; cm_node = cm_id->provider_data; cm_node->pdata.size = pdata_len; iwdev = to_iwdev(cm_id->device); if (!iwdev) return -EINVAL; cm_node->cm_core->stats_rejects++; if (pdata_len + sizeof(struct ietf_mpa_v2) > IRDMA_MAX_CM_BUF) return -EINVAL; return irdma_cm_reject(cm_node, pdata, pdata_len); } /** * irdma_connect - registered call for connection to be established * @cm_id: cm information for passive connection * @conn_param: Information about the connection */ int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { struct ib_qp *ibqp; struct irdma_qp *iwqp; struct irdma_device *iwdev; struct irdma_cm_node *cm_node; struct irdma_cm_info cm_info; struct sockaddr_in *laddr; struct sockaddr_in *raddr; struct sockaddr_in6 *laddr6; struct sockaddr_in6 *raddr6; int ret = 0; ibqp = irdma_get_qp(cm_id->device, conn_param->qpn); if (!ibqp) return -EINVAL; iwqp = to_iwqp(ibqp); if (!iwqp) return -EINVAL; iwdev = iwqp->iwdev; if (!iwdev) return -EINVAL; laddr = (struct sockaddr_in *)&cm_id->m_local_addr; raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr; raddr6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr; if (!(laddr->sin_port) || !(raddr->sin_port)) return -EINVAL; iwqp->active_conn = 1; iwqp->cm_id = NULL; cm_id->provider_data = iwqp; /* set up the connection params for the node */ if (cm_id->remote_addr.ss_family == AF_INET) { if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV4) return -EINVAL; cm_info.ipv4 = true; memset(cm_info.loc_addr, 0, sizeof(cm_info.loc_addr)); memset(cm_info.rem_addr, 0, sizeof(cm_info.rem_addr)); cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr); cm_info.rem_addr[0] = ntohl(raddr->sin_addr.s_addr); cm_info.loc_port = ntohs(laddr->sin_port); cm_info.rem_port = ntohs(raddr->sin_port); cm_info.vlan_id = irdma_get_vlan_ipv4(cm_info.loc_addr); } else { if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV6) return -EINVAL; cm_info.ipv4 = false; irdma_copy_ip_ntohl(cm_info.loc_addr, laddr6->sin6_addr.__u6_addr.__u6_addr32); irdma_copy_ip_ntohl(cm_info.rem_addr, raddr6->sin6_addr.__u6_addr.__u6_addr32); cm_info.loc_port = ntohs(laddr6->sin6_port); cm_info.rem_port = ntohs(raddr6->sin6_port); irdma_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, NULL); } cm_info.cm_id = cm_id; cm_info.qh_qpid = iwdev->vsi.ilq->qp_id; cm_info.tos = cm_id->tos; if (iwdev->vsi.dscp_mode) { cm_info.user_pri = iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(cm_info.tos)]; } else { cm_info.user_pri = rt_tos2priority(cm_id->tos); - cm_info.user_pri = irdma_get_egress_vlan_prio(cm_info.loc_addr, - cm_info.user_pri, - cm_info.ipv4); + cm_info.user_pri = + irdma_iw_get_vlan_prio(cm_info.loc_addr, + cm_info.user_pri, + cm_info.ipv4); } if (iwqp->sc_qp.dev->ws_add(iwqp->sc_qp.vsi, cm_info.user_pri)) return -ENOMEM; iwqp->sc_qp.user_pri = cm_info.user_pri; irdma_qp_add_qos(&iwqp->sc_qp); if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) iwdev->rf->check_fc(&iwdev->vsi, &iwqp->sc_qp); - irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DCB, - "TOS:[%d] UP:[%d]\n", cm_id->tos, - cm_info.user_pri); + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DCB, "TOS:[%d] UP:[%d]\n", + cm_id->tos, cm_info.user_pri); ret = irdma_create_cm_node(&iwdev->cm_core, iwdev, conn_param, &cm_info, &cm_node); if (ret) return ret; ret = cm_node->cm_core->cm_create_ah(cm_node, true); if (ret) goto err; if (irdma_manage_qhash(iwdev, &cm_info, IRDMA_QHASH_TYPE_TCP_ESTABLISHED, IRDMA_QHASH_MANAGE_TYPE_ADD, NULL, true)) { ret = -EINVAL; goto err; } cm_node->qhash_set = true; cm_node->apbvt_entry = irdma_add_apbvt(iwdev, cm_info.loc_port); if (!cm_node->apbvt_entry) { ret = -EINVAL; goto err; } cm_node->apbvt_set = true; iwqp->cm_node = cm_node; cm_node->iwqp = iwqp; iwqp->cm_id = cm_id; irdma_qp_add_ref(&iwqp->ibqp); cm_id->add_ref(cm_id); if (cm_node->state != IRDMA_CM_STATE_OFFLOADED) { cm_node->state = IRDMA_CM_STATE_SYN_SENT; ret = irdma_send_syn(cm_node, 0); if (ret) goto err; } irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "rem_port=0x%04x, loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4 cm_node=%p cm_id=%p qp_id = %d\n\n", - cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr, - cm_node->loc_addr, cm_node, cm_id, ibqp->qp_num); + "rem_port=0x%04x, loc_port=0x%04x rem_addr=%x loc_addr=%x cm_node=%p cm_id=%p qp_id = %d\n\n", + cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr[0], + cm_node->loc_addr[0], cm_node, cm_id, ibqp->qp_num); return 0; err: if (cm_info.ipv4) - irdma_debug(&iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "connect() FAILED: dest addr=%pI4", - cm_info.rem_addr); + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "connect() FAILED: dest addr=%x", + cm_info.rem_addr[0]); else - irdma_debug(&iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "connect() FAILED: dest addr=%pI6", - cm_info.rem_addr); + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "connect() FAILED: dest addr=%x:%x:%x:%x", + IRDMA_PRINT_IP6(cm_info.rem_addr)); irdma_rem_ref_cm_node(cm_node); iwdev->cm_core.stats_connect_errs++; return ret; } /** * irdma_create_listen - registered call creating listener * @cm_id: cm information for passive connection * @backlog: to max accept pending count */ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog) { struct irdma_device *iwdev; struct irdma_cm_listener *cm_listen_node; struct irdma_cm_info cm_info = {0}; struct sockaddr_in *laddr; struct sockaddr_in6 *laddr6; bool wildcard = false; int err; iwdev = to_iwdev(cm_id->device); if (!iwdev) return -EINVAL; laddr = (struct sockaddr_in *)&cm_id->m_local_addr; laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr; cm_info.qh_qpid = iwdev->vsi.ilq->qp_id; if (laddr->sin_family == AF_INET) { if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV4) return -EINVAL; cm_info.ipv4 = true; cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr); cm_info.loc_port = ntohs(laddr->sin_port); if (laddr->sin_addr.s_addr != htonl(INADDR_ANY)) { cm_info.vlan_id = irdma_get_vlan_ipv4(cm_info.loc_addr); } else { cm_info.vlan_id = 0xFFFF; wildcard = true; } } else { if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV6) return -EINVAL; cm_info.ipv4 = false; irdma_copy_ip_ntohl(cm_info.loc_addr, laddr6->sin6_addr.__u6_addr.__u6_addr32); cm_info.loc_port = ntohs(laddr6->sin6_port); if (!IN6_IS_ADDR_UNSPECIFIED(&laddr6->sin6_addr)) { irdma_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, NULL); } else { cm_info.vlan_id = 0xFFFF; wildcard = true; } } if (cm_info.vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode) cm_info.vlan_id = 0; cm_info.backlog = backlog; cm_info.cm_id = cm_id; cm_listen_node = irdma_make_listen_node(&iwdev->cm_core, iwdev, &cm_info); if (!cm_listen_node) { - irdma_debug(&iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "cm_listen_node == NULL\n"); + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "cm_listen_node == NULL\n"); return -ENOMEM; } cm_id->provider_data = cm_listen_node; cm_listen_node->tos = cm_id->tos; if (iwdev->vsi.dscp_mode) cm_listen_node->user_pri = iwdev->vsi.dscp_map[irdma_tos2dscp(cm_id->tos)]; else cm_listen_node->user_pri = rt_tos2priority(cm_id->tos); cm_info.user_pri = cm_listen_node->user_pri; if (!cm_listen_node->reused_node) { if (wildcard) { err = irdma_add_mqh(iwdev, &cm_info, cm_listen_node); if (err) goto error; } else { if (!iwdev->vsi.dscp_mode) cm_info.user_pri = cm_listen_node->user_pri = - irdma_get_egress_vlan_prio(cm_info.loc_addr, - cm_info.user_pri, - cm_info.ipv4); + irdma_iw_get_vlan_prio(cm_info.loc_addr, + cm_info.user_pri, + cm_info.ipv4); err = irdma_manage_qhash(iwdev, &cm_info, IRDMA_QHASH_TYPE_TCP_SYN, IRDMA_QHASH_MANAGE_TYPE_ADD, NULL, true); if (err) goto error; cm_listen_node->qhash_set = true; } cm_listen_node->apbvt_entry = irdma_add_apbvt(iwdev, cm_info.loc_port); if (!cm_listen_node->apbvt_entry) goto error; } cm_id->add_ref(cm_id); cm_listen_node->cm_core->stats_listen_created++; irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "loc_port=0x%04x loc_addr=%pI4 cm_listen_node=%p cm_id=%p qhash_set=%d vlan_id=%d\n", - cm_listen_node->loc_port, cm_listen_node->loc_addr, + "loc_port=0x%04x loc_addr=%x cm_listen_node=%p cm_id=%p qhash_set=%d vlan_id=%d\n", + cm_listen_node->loc_port, cm_listen_node->loc_addr[0], cm_listen_node, cm_listen_node->cm_id, cm_listen_node->qhash_set, cm_listen_node->vlan_id); return 0; error: irdma_cm_del_listen(&iwdev->cm_core, cm_listen_node, false); return -EINVAL; } /** * irdma_destroy_listen - registered call to destroy listener * @cm_id: cm information for passive connection */ int irdma_destroy_listen(struct iw_cm_id *cm_id) { struct irdma_device *iwdev; iwdev = to_iwdev(cm_id->device); if (cm_id->provider_data) irdma_cm_del_listen(&iwdev->cm_core, cm_id->provider_data, true); else - irdma_debug(&iwdev->rf->sc_dev, - IRDMA_DEBUG_CM, "cm_id->provider_data was NULL\n"); + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, + "cm_id->provider_data was NULL\n"); cm_id->rem_ref(cm_id); return 0; } /** * irdma_iw_teardown_list_prep - add conn nodes slated for tear * down to list * @cm_core: cm's core * @teardown_list: a list to which cm_node will be selected * @ipaddr: pointer to ip address * @nfo: pointer to cm_info structure instance * @disconnect_all: flag indicating disconnect all QPs */ static void irdma_iw_teardown_list_prep(struct irdma_cm_core *cm_core, struct list_head *teardown_list, u32 *ipaddr, struct irdma_cm_info *nfo, bool disconnect_all) { struct irdma_cm_node *cm_node; int bkt; HASH_FOR_EACH_RCU(cm_core->cm_hash_tbl, bkt, cm_node, list) { if ((disconnect_all || (nfo->vlan_id == cm_node->vlan_id && !memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16))) && atomic_inc_not_zero(&cm_node->refcnt)) list_add(&cm_node->teardown_entry, teardown_list); } } static inline bool irdma_ip_vlan_match(u32 *ip1, u16 vlan_id1, bool check_vlan, u32 *ip2, u16 vlan_id2, bool ipv4) { return (!check_vlan || vlan_id1 == vlan_id2) && !memcmp(ip1, ip2, ipv4 ? 4 : 16); } /** * irdma_roce_teardown_list_prep - add conn nodes slated for * tear down to list * @iwdev: RDMA device * @teardown_list: a list to which cm_node will be selected * @ipaddr: pointer to ip address * @nfo: pointer to cm_info structure instance * @disconnect_all: flag indicating disconnect all QPs */ static void irdma_roce_teardown_list_prep(struct irdma_device *iwdev, struct list_head *teardown_list, u32 *ipaddr, struct irdma_cm_info *nfo, bool disconnect_all) { struct irdma_sc_vsi *vsi = &iwdev->vsi; struct irdma_sc_qp *sc_qp; struct list_head *list_node; struct irdma_qp *qp; unsigned long flags; int i; for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { mutex_lock(&vsi->qos[i].qos_mutex); list_for_each(list_node, &vsi->qos[i].qplist) { u32 qp_ip[4]; sc_qp = container_of(list_node, struct irdma_sc_qp, list); if (sc_qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_RC) continue; qp = sc_qp->qp_uk.back_qp; if (!disconnect_all) { if (nfo->ipv4) qp_ip[0] = qp->udp_info.local_ipaddr[3]; else memcpy(qp_ip, &qp->udp_info.local_ipaddr[0], sizeof(qp_ip)); } if (disconnect_all || irdma_ip_vlan_match(qp_ip, qp->udp_info.vlan_tag & EVL_VLID_MASK, qp->udp_info.insert_vlan_tag, ipaddr, nfo->vlan_id, nfo->ipv4)) { spin_lock_irqsave(&iwdev->rf->qptable_lock, flags); if (iwdev->rf->qp_table[sc_qp->qp_uk.qp_id]) { irdma_qp_add_ref(&qp->ibqp); list_add(&qp->teardown_entry, teardown_list); } spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags); } } mutex_unlock(&vsi->qos[i].qos_mutex); } } /** * irdma_cm_event_connected - handle connected active node * @event: the info for cm_node of connection */ static void irdma_cm_event_connected(struct irdma_cm_event *event) { struct irdma_qp *iwqp; struct irdma_device *iwdev; struct irdma_cm_node *cm_node; struct irdma_sc_dev *dev; struct ib_qp_attr attr = {0}; struct iw_cm_id *cm_id; int status; bool read0; int wait_ret = 0; cm_node = event->cm_node; cm_id = cm_node->cm_id; iwqp = cm_id->provider_data; iwdev = iwqp->iwdev; dev = &iwdev->rf->sc_dev; if (iwqp->sc_qp.qp_uk.destroy_pending) { status = -ETIMEDOUT; goto error; } irdma_cm_init_tsa_conn(iwqp, cm_node); read0 = (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO); if (iwqp->page) iwqp->sc_qp.qp_uk.sq_base = kmap_local_page(iwqp->page); irdma_sc_send_rtt(&iwqp->sc_qp, read0); if (iwqp->page) kunmap_local(iwqp->sc_qp.qp_uk.sq_base); attr.qp_state = IB_QPS_RTS; cm_node->qhash_set = false; irdma_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL); if (dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_RTS_AE) { wait_ret = wait_event_interruptible_timeout(iwqp->waitq, iwqp->rts_ae_rcvd, IRDMA_MAX_TIMEOUT); if (!wait_ret) irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "Slow Connection: cm_node=%p, loc_port=%d, rem_port=%d, cm_id=%p\n", cm_node, cm_node->loc_port, cm_node->rem_port, cm_node->cm_id); } irdma_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, 0); cm_node->accelerated = true; complete(&cm_node->establish_comp); cm_node->cm_core->cm_free_ah(cm_node); return; error: iwqp->cm_id = NULL; cm_id->provider_data = NULL; irdma_send_cm_event(event->cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, status); irdma_rem_ref_cm_node(event->cm_node); } /** * irdma_cm_event_reset - handle reset * @event: the info for cm_node of connection */ static void irdma_cm_event_reset(struct irdma_cm_event *event) { struct irdma_cm_node *cm_node = event->cm_node; struct iw_cm_id *cm_id = cm_node->cm_id; struct irdma_qp *iwqp; if (!cm_id) return; iwqp = cm_id->provider_data; if (!iwqp) return; irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "reset event %p - cm_id = %p\n", - event->cm_node, cm_id); + "reset event %p - cm_id = %p\n", event->cm_node, cm_id); iwqp->cm_id = NULL; irdma_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_DISCONNECT, -ECONNRESET); irdma_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_CLOSE, 0); } /** * irdma_cm_event_handler - send event to cm upper layer * @work: pointer of cm event info. */ static void irdma_cm_event_handler(struct work_struct *work) { struct irdma_cm_event *event = container_of(work, struct irdma_cm_event, event_work); struct irdma_cm_node *cm_node; if (!event || !event->cm_node || !event->cm_node->cm_core) return; cm_node = event->cm_node; switch (event->type) { case IRDMA_CM_EVENT_MPA_REQ: irdma_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_CONNECT_REQUEST, 0); break; case IRDMA_CM_EVENT_RESET: irdma_cm_event_reset(event); break; case IRDMA_CM_EVENT_CONNECTED: if (!event->cm_node->cm_id || event->cm_node->state != IRDMA_CM_STATE_OFFLOADED) break; irdma_cm_event_connected(event); break; case IRDMA_CM_EVENT_MPA_REJECT: if (!event->cm_node->cm_id || cm_node->state == IRDMA_CM_STATE_OFFLOADED) break; irdma_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED); break; case IRDMA_CM_EVENT_ABORTED: if (!event->cm_node->cm_id || event->cm_node->state == IRDMA_CM_STATE_OFFLOADED) break; irdma_event_connect_error(event); break; default: irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "bad event type = %d\n", - event->type); + "bad event type = %d\n", event->type); break; } irdma_rem_ref_cm_node(event->cm_node); kfree(event); } /** * irdma_cm_post_event - queue event request for worker thread * @event: cm node's info for up event call */ static void irdma_cm_post_event(struct irdma_cm_event *event) { atomic_inc(&event->cm_node->refcnt); INIT_WORK(&event->event_work, irdma_cm_event_handler); queue_work(event->cm_node->cm_core->event_wq, &event->event_work); } /** * irdma_cm_teardown_connections - teardown QPs * @iwdev: device pointer * @ipaddr: Pointer to IPv4 or IPv6 address * @nfo: Connection info * @disconnect_all: flag indicating disconnect all QPs * * teardown QPs where source or destination addr matches ip addr */ static void __unused irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr, struct irdma_cm_info *nfo, bool disconnect_all) { struct irdma_cm_core *cm_core = &iwdev->cm_core; struct list_head *list_core_temp; struct list_head *list_node; struct irdma_cm_node *cm_node; struct list_head teardown_list; struct ib_qp_attr attr; struct irdma_qp *qp; INIT_LIST_HEAD(&teardown_list); rcu_read_lock(); irdma_iw_teardown_list_prep(cm_core, &teardown_list, ipaddr, nfo, disconnect_all); rcu_read_unlock(); attr.qp_state = IB_QPS_ERR; list_for_each_safe(list_node, list_core_temp, &teardown_list) { cm_node = container_of(list_node, struct irdma_cm_node, teardown_entry); irdma_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL); if (iwdev->rf->reset) irdma_cm_disconn(cm_node->iwqp); irdma_rem_ref_cm_node(cm_node); } if (!rdma_protocol_roce(&iwdev->ibdev, 1)) return; INIT_LIST_HEAD(&teardown_list); irdma_roce_teardown_list_prep(iwdev, &teardown_list, ipaddr, nfo, disconnect_all); list_for_each_safe(list_node, list_core_temp, &teardown_list) { qp = container_of(list_node, struct irdma_qp, teardown_entry); irdma_modify_qp_roce(&qp->ibqp, &attr, IB_QP_STATE, NULL); irdma_ib_qp_event(qp, IRDMA_QP_EVENT_CATASTROPHIC); irdma_qp_rem_ref(&qp->ibqp); } } diff --git a/sys/dev/irdma/irdma_cm.h b/sys/dev/irdma/irdma_cm.h index 96c4f4c6e29d..36cebdb5bf19 100644 --- a/sys/dev/irdma/irdma_cm.h +++ b/sys/dev/irdma/irdma_cm.h @@ -1,441 +1,448 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef IRDMA_CM_H #define IRDMA_CM_H #define IRDMA_MPA_REQUEST_ACCEPT 1 #define IRDMA_MPA_REQUEST_REJECT 2 /* IETF MPA -- defines */ #define IEFT_MPA_KEY_REQ "MPA ID Req Frame" #define IEFT_MPA_KEY_REP "MPA ID Rep Frame" #define IETF_MPA_KEY_SIZE 16 #define IETF_MPA_VER 1 #define IETF_MAX_PRIV_DATA_LEN 512 #define IETF_MPA_FRAME_SIZE 20 #define IETF_RTR_MSG_SIZE 4 #define IETF_MPA_V2_FLAG 0x10 #define SNDMARKER_SEQNMASK 0x000001ff #define IRDMA_MAX_IETF_SIZE 32 /* IETF RTR MSG Fields */ #define IETF_PEER_TO_PEER 0x8000 #define IETF_FLPDU_ZERO_LEN 0x4000 #define IETF_RDMA0_WRITE 0x8000 #define IETF_RDMA0_READ 0x4000 #define IETF_NO_IRD_ORD 0x3fff #define MAX_PORTS 65536 #define IRDMA_PASSIVE_STATE_INDICATED 0 #define IRDMA_DO_NOT_SEND_RESET_EVENT 1 #define IRDMA_SEND_RESET_EVENT 2 #define MAX_IRDMA_IFS 4 #define SET_ACK 1 #define SET_SYN 2 #define SET_FIN 4 #define SET_RST 8 #define TCP_OPTIONS_PADDING 3 #define IRDMA_DEFAULT_RETRYS 64 #define IRDMA_DEFAULT_RETRANS 32 #define IRDMA_DEFAULT_TTL 0x40 #define IRDMA_DEFAULT_RTT_VAR 6 #define IRDMA_DEFAULT_SS_THRESH 0x3fffffff #define IRDMA_DEFAULT_REXMIT_THRESH 8 #define IRDMA_RETRY_TIMEOUT HZ #define IRDMA_SHORT_TIME 10 #define IRDMA_LONG_TIME (2 * HZ) #define IRDMA_MAX_TIMEOUT ((unsigned long)(12 * HZ)) #define IRDMA_CM_HASHTABLE_SIZE 1024 #define IRDMA_CM_TCP_TIMER_INTERVAL 3000 #define IRDMA_CM_DEFAULT_MTU 1540 #define IRDMA_CM_DEFAULT_FRAME_CNT 10 #define IRDMA_CM_THREAD_STACK_SIZE 256 #define IRDMA_CM_DEFAULT_RCV_WND 64240 #define IRDMA_CM_DEFAULT_RCV_WND_SCALED 0x3FFFC #define IRDMA_CM_DEFAULT_RCV_WND_SCALE 2 #define IRDMA_CM_DEFAULT_FREE_PKTS 10 #define IRDMA_CM_FREE_PKT_LO_WATERMARK 2 #define IRDMA_CM_DEFAULT_MSS 536 #define IRDMA_CM_DEFAULT_MPA_VER 2 #define IRDMA_CM_DEFAULT_SEQ 0x159bf75f #define IRDMA_CM_DEFAULT_LOCAL_ID 0x3b47 #define IRDMA_CM_DEFAULT_SEQ2 0x18ed5740 #define IRDMA_CM_DEFAULT_LOCAL_ID2 0xb807 #define IRDMA_MAX_CM_BUF (IRDMA_MAX_IETF_SIZE + IETF_MAX_PRIV_DATA_LEN) enum ietf_mpa_flags { IETF_MPA_FLAGS_REJECT = 0x20, IETF_MPA_FLAGS_CRC = 0x40, IETF_MPA_FLAGS_MARKERS = 0x80, }; enum irdma_timer_type { IRDMA_TIMER_TYPE_SEND, IRDMA_TIMER_TYPE_CLOSE, }; enum option_nums { OPTION_NUM_EOL, OPTION_NUM_NONE, OPTION_NUM_MSS, OPTION_NUM_WINDOW_SCALE, OPTION_NUM_SACK_PERM, OPTION_NUM_SACK, OPTION_NUM_WRITE0 = 0xbc, }; /* cm node transition states */ enum irdma_cm_node_state { IRDMA_CM_STATE_UNKNOWN, IRDMA_CM_STATE_INITED, IRDMA_CM_STATE_LISTENING, IRDMA_CM_STATE_SYN_RCVD, IRDMA_CM_STATE_SYN_SENT, IRDMA_CM_STATE_ONE_SIDE_ESTABLISHED, IRDMA_CM_STATE_ESTABLISHED, IRDMA_CM_STATE_ACCEPTING, IRDMA_CM_STATE_MPAREQ_SENT, IRDMA_CM_STATE_MPAREQ_RCVD, IRDMA_CM_STATE_MPAREJ_RCVD, IRDMA_CM_STATE_OFFLOADED, IRDMA_CM_STATE_FIN_WAIT1, IRDMA_CM_STATE_FIN_WAIT2, IRDMA_CM_STATE_CLOSE_WAIT, IRDMA_CM_STATE_TIME_WAIT, IRDMA_CM_STATE_LAST_ACK, IRDMA_CM_STATE_CLOSING, IRDMA_CM_STATE_LISTENER_DESTROYED, IRDMA_CM_STATE_CLOSED, }; enum mpa_frame_ver { IETF_MPA_V1 = 1, IETF_MPA_V2 = 2, }; enum mpa_frame_key { MPA_KEY_REQUEST, MPA_KEY_REPLY, }; enum send_rdma0 { SEND_RDMA_READ_ZERO = 1, SEND_RDMA_WRITE_ZERO = 2, }; enum irdma_tcpip_pkt_type { IRDMA_PKT_TYPE_UNKNOWN, IRDMA_PKT_TYPE_SYN, IRDMA_PKT_TYPE_SYNACK, IRDMA_PKT_TYPE_ACK, IRDMA_PKT_TYPE_FIN, IRDMA_PKT_TYPE_RST, }; enum irdma_cm_listener_state { IRDMA_CM_LISTENER_PASSIVE_STATE = 1, IRDMA_CM_LISTENER_ACTIVE_STATE = 2, IRDMA_CM_LISTENER_EITHER_STATE = 3, }; /* CM event codes */ enum irdma_cm_event_type { IRDMA_CM_EVENT_UNKNOWN, IRDMA_CM_EVENT_ESTABLISHED, IRDMA_CM_EVENT_MPA_REQ, IRDMA_CM_EVENT_MPA_CONNECT, IRDMA_CM_EVENT_MPA_ACCEPT, IRDMA_CM_EVENT_MPA_REJECT, IRDMA_CM_EVENT_MPA_ESTABLISHED, IRDMA_CM_EVENT_CONNECTED, IRDMA_CM_EVENT_RESET, IRDMA_CM_EVENT_ABORTED, }; struct ietf_mpa_v1 { u8 key[IETF_MPA_KEY_SIZE]; u8 flags; u8 rev; __be16 priv_data_len; u8 priv_data[]; }; struct ietf_rtr_msg { __be16 ctrl_ird; __be16 ctrl_ord; }; struct ietf_mpa_v2 { u8 key[IETF_MPA_KEY_SIZE]; u8 flags; u8 rev; __be16 priv_data_len; struct ietf_rtr_msg rtr_msg; u8 priv_data[]; }; struct option_base { u8 optionnum; u8 len; }; struct option_mss { u8 optionnum; u8 len; __be16 mss; }; struct option_windowscale { u8 optionnum; u8 len; u8 shiftcount; }; union all_known_options { char eol; struct option_base base; struct option_mss mss; struct option_windowscale windowscale; }; struct irdma_timer_entry { struct list_head list; unsigned long timetosend; /* jiffies */ struct irdma_puda_buf *sqbuf; u32 type; u32 retrycount; u32 retranscount; u32 context; u32 send_retrans; int close_when_complete; }; /* CM context params */ struct irdma_cm_tcp_context { u8 client; u32 loc_seq_num; u32 loc_ack_num; u32 rem_ack_num; u32 rcv_nxt; u32 loc_id; u32 rem_id; u32 snd_wnd; u32 max_snd_wnd; u32 rcv_wnd; u32 mss; u8 snd_wscale; u8 rcv_wscale; }; struct irdma_apbvt_entry { struct hlist_node hlist; u32 use_cnt; u16 port; }; struct irdma_cm_listener { struct list_head list; struct iw_cm_id *cm_id; struct irdma_cm_core *cm_core; struct irdma_device *iwdev; struct list_head child_listen_list; struct irdma_apbvt_entry *apbvt_entry; enum irdma_cm_listener_state listener_state; atomic_t refcnt; atomic_t pend_accepts_cnt; u32 loc_addr[4]; u32 reused_node; int backlog; u16 loc_port; u16 vlan_id; - u8 loc_mac[ETH_ALEN]; + u8 loc_mac[ETHER_ADDR_LEN]; u8 user_pri; u8 tos; bool qhash_set:1; bool ipv4:1; }; struct irdma_kmem_info { void *addr; u32 size; }; struct irdma_mpa_priv_info { const void *addr; u32 size; }; struct irdma_cm_node { struct irdma_qp *iwqp; struct irdma_device *iwdev; struct irdma_sc_dev *dev; struct irdma_cm_tcp_context tcp_cntxt; struct irdma_cm_core *cm_core; struct irdma_timer_entry *send_entry; struct irdma_timer_entry *close_entry; struct irdma_cm_listener *listener; struct list_head timer_entry; struct list_head reset_entry; struct list_head teardown_entry; struct irdma_apbvt_entry *apbvt_entry; struct rcu_head rcu_head; struct irdma_mpa_priv_info pdata; struct irdma_sc_ah *ah; struct irdma_kmem_info mpa_hdr; struct iw_cm_id *cm_id; struct hlist_node list; struct completion establish_comp; spinlock_t retrans_list_lock; /* protect CM node rexmit updates*/ atomic_t passive_state; atomic_t refcnt; enum irdma_cm_node_state state; enum send_rdma0 send_rdma0_op; enum mpa_frame_ver mpa_frame_rev; u32 loc_addr[4], rem_addr[4]; u16 loc_port, rem_port; int apbvt_set; int accept_pend; u16 vlan_id; u16 ird_size; u16 ord_size; u16 mpav2_ird_ord; u16 lsmm_size; u8 pdata_buf[IETF_MAX_PRIV_DATA_LEN]; - u8 loc_mac[ETH_ALEN]; - u8 rem_mac[ETH_ALEN]; + u8 loc_mac[ETHER_ADDR_LEN]; + u8 rem_mac[ETHER_ADDR_LEN]; u8 user_pri; u8 tos; bool ack_rcvd:1; bool qhash_set:1; bool ipv4:1; bool snd_mark_en:1; bool rcv_mark_en:1; bool do_lpb:1; bool accelerated:1; struct ietf_mpa_v2 mpa_v2_frame; }; /* Used by internal CM APIs to pass CM information*/ struct irdma_cm_info { struct iw_cm_id *cm_id; + struct irdma_cqp_request *cqp_request; u16 loc_port; u16 rem_port; u32 loc_addr[4]; u32 rem_addr[4]; u32 qh_qpid; u16 vlan_id; int backlog; u8 user_pri; u8 tos; bool ipv4; }; struct irdma_cm_event { enum irdma_cm_event_type type; struct irdma_cm_info cm_info; struct work_struct event_work; struct irdma_cm_node *cm_node; }; struct irdma_cm_core { struct irdma_device *iwdev; struct irdma_sc_dev *dev; struct list_head listen_list; DECLARE_HASHTABLE(cm_hash_tbl, 8); DECLARE_HASHTABLE(apbvt_hash_tbl, 8); struct timer_list tcp_timer; struct workqueue_struct *event_wq; spinlock_t ht_lock; /* protect CM node (active side) list */ spinlock_t listen_list_lock; /* protect listener list */ spinlock_t apbvt_lock; /*serialize apbvt add/del entries*/ u64 stats_nodes_created; u64 stats_nodes_destroyed; u64 stats_listen_created; u64 stats_listen_destroyed; u64 stats_listen_nodes_created; u64 stats_listen_nodes_destroyed; u64 stats_lpbs; u64 stats_accepts; u64 stats_rejects; u64 stats_connect_errs; u64 stats_passive_errs; u64 stats_pkt_retrans; u64 stats_backlog_drops; struct irdma_puda_buf *(*form_cm_frame)(struct irdma_cm_node *cm_node, struct irdma_kmem_info *options, struct irdma_kmem_info *hdr, struct irdma_mpa_priv_info *pdata, u8 flags); int (*cm_create_ah)(struct irdma_cm_node *cm_node, bool wait); void (*cm_free_ah)(struct irdma_cm_node *cm_node); }; +struct irdma_add_mqh_cbs { + struct irdma_device *iwdev; + struct irdma_cm_info *cm_info; + struct irdma_cm_listener *cm_listen_node; +}; + int irdma_schedule_cm_timer(struct irdma_cm_node *cm_node, struct irdma_puda_buf *sqbuf, enum irdma_timer_type type, int send_retrans, int close_when_complete); static inline u8 irdma_tos2dscp(u8 tos) { #define IRDMA_DSCP_S 2 #define IRDMA_DSCP GENMASK(7, 2) return FIELD_GET(IRDMA_DSCP, tos); } int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int irdma_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int irdma_create_listen(struct iw_cm_id *cm_id, int backlog); int irdma_destroy_listen(struct iw_cm_id *cm_id); int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, const u8 *mac); int irdma_cm_start(struct irdma_device *dev); int irdma_cm_stop(struct irdma_device *dev); bool irdma_ipv4_is_lpb(u32 loc_addr, u32 rem_addr); bool irdma_ipv6_is_lpb(u32 *loc_addr, u32 *rem_addr); int irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, const u8 *mac_addr, u32 action); bool irdma_port_in_use(struct irdma_cm_core *cm_core, u16 port); void irdma_send_ack(struct irdma_cm_node *cm_node); void irdma_lpb_nop(struct irdma_sc_qp *qp); void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node); void irdma_add_conn_est_qh(struct irdma_cm_node *cm_node); #endif /* IRDMA_CM_H */ diff --git a/sys/dev/irdma/irdma_ctrl.c b/sys/dev/irdma/irdma_ctrl.c index 6bd0520e9bb8..675ce800eb6b 100644 --- a/sys/dev/irdma/irdma_ctrl.c +++ b/sys/dev/irdma/irdma_ctrl.c @@ -1,5644 +1,5596 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "osdep.h" #include "irdma_hmc.h" #include "irdma_defs.h" #include "irdma_type.h" #include "irdma_ws.h" #include "irdma_protos.h" /** * irdma_qp_from_entry - Given entry, get to the qp structure * @entry: Points to list of qp structure */ static struct irdma_sc_qp * irdma_qp_from_entry(struct list_head *entry) { if (!entry) return NULL; return (struct irdma_sc_qp *)((char *)entry - offsetof(struct irdma_sc_qp, list)); } /** * irdma_get_qp_from_list - get next qp from a list * @head: Listhead of qp's * @qp: current qp */ struct irdma_sc_qp * irdma_get_qp_from_list(struct list_head *head, struct irdma_sc_qp *qp) { struct list_head *lastentry; struct list_head *entry = NULL; if (list_empty(head)) return NULL; if (!qp) { entry = (head)->next; } else { lastentry = &qp->list; entry = (lastentry)->next; if (entry == head) return NULL; } return irdma_qp_from_entry(entry); } /** * irdma_sc_suspend_resume_qps - suspend/resume all qp's on VSI * @vsi: the VSI struct pointer * @op: Set to IRDMA_OP_RESUME or IRDMA_OP_SUSPEND */ void irdma_sc_suspend_resume_qps(struct irdma_sc_vsi *vsi, u8 op) { struct irdma_sc_qp *qp = NULL; u8 i; for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { mutex_lock(&vsi->qos[i].qos_mutex); qp = irdma_get_qp_from_list(&vsi->qos[i].qplist, qp); while (qp) { if (op == IRDMA_OP_RESUME) { if (!qp->dev->ws_add(vsi, i)) { qp->qs_handle = vsi->qos[qp->user_pri].qs_handle; irdma_cqp_qp_suspend_resume(qp, op); } else { irdma_cqp_qp_suspend_resume(qp, op); irdma_modify_qp_to_err(qp); } } else if (op == IRDMA_OP_SUSPEND) { /* issue cqp suspend command */ if (!irdma_cqp_qp_suspend_resume(qp, op)) atomic_inc(&vsi->qp_suspend_reqs); } qp = irdma_get_qp_from_list(&vsi->qos[i].qplist, qp); } mutex_unlock(&vsi->qos[i].qos_mutex); } } static void irdma_set_qos_info(struct irdma_sc_vsi *vsi, struct irdma_l2params *l2p) { u8 i; vsi->qos_rel_bw = l2p->vsi_rel_bw; vsi->qos_prio_type = l2p->vsi_prio_type; vsi->dscp_mode = l2p->dscp_mode; if (l2p->dscp_mode) { irdma_memcpy(vsi->dscp_map, l2p->dscp_map, sizeof(vsi->dscp_map)); for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) l2p->up2tc[i] = i; } for (i = 0; i < IRDMA_MAX_TRAFFIC_CLASS; i++) vsi->tc_print_warning[i] = true; for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) vsi->qos[i].qs_handle = l2p->qs_handle_list[i]; if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) irdma_init_config_check(&vsi->cfg_check[i], l2p->up2tc[i], l2p->qs_handle_list[i]); vsi->qos[i].traffic_class = l2p->up2tc[i]; vsi->qos[i].rel_bw = l2p->tc_info[vsi->qos[i].traffic_class].rel_bw; vsi->qos[i].prio_type = l2p->tc_info[vsi->qos[i].traffic_class].prio_type; vsi->qos[i].valid = false; } } /** * irdma_change_l2params - given the new l2 parameters, change all qp * @vsi: RDMA VSI pointer * @l2params: New parameters from l2 */ void irdma_change_l2params(struct irdma_sc_vsi *vsi, struct irdma_l2params *l2params) { if (l2params->tc_changed) { vsi->tc_change_pending = false; irdma_set_qos_info(vsi, l2params); irdma_sc_suspend_resume_qps(vsi, IRDMA_OP_RESUME); } if (l2params->mtu_changed) { vsi->mtu = l2params->mtu; if (vsi->ieq) irdma_reinitialize_ieq(vsi); } } /** * irdma_qp_rem_qos - remove qp from qos lists during destroy qp * @qp: qp to be removed from qos */ void irdma_qp_rem_qos(struct irdma_sc_qp *qp) { struct irdma_sc_vsi *vsi = qp->vsi; irdma_debug(qp->dev, IRDMA_DEBUG_DCB, "DCB: Remove qp[%d] UP[%d] qset[%d] on_qoslist[%d]\n", - qp->qp_uk.qp_id, qp->user_pri, qp->qs_handle, qp->on_qoslist); + qp->qp_uk.qp_id, qp->user_pri, qp->qs_handle, + qp->on_qoslist); mutex_lock(&vsi->qos[qp->user_pri].qos_mutex); if (qp->on_qoslist) { qp->on_qoslist = false; list_del(&qp->list); } mutex_unlock(&vsi->qos[qp->user_pri].qos_mutex); } /** * irdma_qp_add_qos - called during setctx for qp to be added to qos * @qp: qp to be added to qos */ void irdma_qp_add_qos(struct irdma_sc_qp *qp) { struct irdma_sc_vsi *vsi = qp->vsi; irdma_debug(qp->dev, IRDMA_DEBUG_DCB, "DCB: Add qp[%d] UP[%d] qset[%d] on_qoslist[%d]\n", - qp->qp_uk.qp_id, qp->user_pri, qp->qs_handle, qp->on_qoslist); + qp->qp_uk.qp_id, qp->user_pri, qp->qs_handle, + qp->on_qoslist); mutex_lock(&vsi->qos[qp->user_pri].qos_mutex); if (!qp->on_qoslist) { list_add(&qp->list, &vsi->qos[qp->user_pri].qplist); qp->on_qoslist = true; qp->qs_handle = vsi->qos[qp->user_pri].qs_handle; } mutex_unlock(&vsi->qos[qp->user_pri].qos_mutex); } /** * irdma_sc_pd_init - initialize sc pd struct * @dev: sc device struct * @pd: sc pd ptr * @pd_id: pd_id for allocated pd * @abi_ver: User/Kernel ABI version */ void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_id, int abi_ver) { pd->pd_id = pd_id; pd->abi_ver = abi_ver; pd->dev = dev; } /** * irdma_sc_add_arp_cache_entry - cqp wqe add arp cache entry * @cqp: struct for cqp hw * @info: arp entry information * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ static int irdma_sc_add_arp_cache_entry(struct irdma_sc_cqp *cqp, struct irdma_add_arp_cache_entry_info *info, u64 scratch, bool post_sq) { __le64 *wqe; - u64 temp, hdr; + u64 hdr; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_8, info->reach_max); - temp = info->mac_addr[5] | LS_64_1(info->mac_addr[4], 8) | - LS_64_1(info->mac_addr[3], 16) | LS_64_1(info->mac_addr[2], 24) | - LS_64_1(info->mac_addr[1], 32) | LS_64_1(info->mac_addr[0], 40); - set_64bit_val(wqe, IRDMA_BYTE_16, temp); + set_64bit_val(wqe, IRDMA_BYTE_16, irdma_mac_to_u64(info->mac_addr)); hdr = info->arp_index | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_ARP) | FIELD_PREP(IRDMA_CQPSQ_MAT_PERMANENT, info->permanent) | FIELD_PREP(IRDMA_CQPSQ_MAT_ENTRYVALID, true) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "ARP_CACHE_ENTRY WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_del_arp_cache_entry - dele arp cache entry * @cqp: struct for cqp hw * @scratch: u64 saved to be used during cqp completion * @arp_index: arp index to delete arp entry * @post_sq: flag for cqp db to ring */ static int irdma_sc_del_arp_cache_entry(struct irdma_sc_cqp *cqp, u64 scratch, u16 arp_index, bool post_sq) { __le64 *wqe; u64 hdr; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; hdr = arp_index | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_ARP) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "ARP_CACHE_DEL_ENTRY WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_manage_apbvt_entry - for adding and deleting apbvt entries * @cqp: struct for cqp hw * @info: info for apbvt entry to add or delete * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ static int irdma_sc_manage_apbvt_entry(struct irdma_sc_cqp *cqp, struct irdma_apbvt_info *info, u64 scratch, bool post_sq) { __le64 *wqe; u64 hdr; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_16, info->port); hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_APBVT) | FIELD_PREP(IRDMA_CQPSQ_MAPT_ADDPORT, info->add) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "MANAGE_APBVT WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_manage_qhash_table_entry - manage quad hash entries * @cqp: struct for cqp hw * @info: info for quad hash to manage * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring * * This is called before connection establishment is started. * For passive connections, when listener is created, it will * call with entry type of IRDMA_QHASH_TYPE_TCP_SYN with local * ip address and tcp port. When SYN is received (passive * connections) or sent (active connections), this routine is * called with entry type of IRDMA_QHASH_TYPE_TCP_ESTABLISHED * and quad is passed in info. * * When iwarp connection is done and its state moves to RTS, the * quad hash entry in the hardware will point to iwarp's qp * number and requires no calls from the driver. */ static int irdma_sc_manage_qhash_table_entry(struct irdma_sc_cqp *cqp, struct irdma_qhash_table_info *info, u64 scratch, bool post_sq) { __le64 *wqe; u64 qw1 = 0; u64 qw2 = 0; u64 temp; struct irdma_sc_vsi *vsi = info->vsi; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; - temp = info->mac_addr[5] | LS_64_1(info->mac_addr[4], 8) | - LS_64_1(info->mac_addr[3], 16) | LS_64_1(info->mac_addr[2], 24) | - LS_64_1(info->mac_addr[1], 32) | LS_64_1(info->mac_addr[0], 40); - set_64bit_val(wqe, IRDMA_BYTE_0, temp); + set_64bit_val(wqe, IRDMA_BYTE_0, irdma_mac_to_u64(info->mac_addr)); qw1 = FIELD_PREP(IRDMA_CQPSQ_QHASH_QPN, info->qp_num) | FIELD_PREP(IRDMA_CQPSQ_QHASH_DEST_PORT, info->dest_port); if (info->ipv4_valid) { set_64bit_val(wqe, IRDMA_BYTE_48, FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR3, info->dest_ip[0])); } else { set_64bit_val(wqe, IRDMA_BYTE_56, FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR0, info->dest_ip[0]) | FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR1, info->dest_ip[1])); set_64bit_val(wqe, IRDMA_BYTE_48, FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR2, info->dest_ip[2]) | FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR3, info->dest_ip[3])); } qw2 = FIELD_PREP(IRDMA_CQPSQ_QHASH_QS_HANDLE, vsi->qos[info->user_pri].qs_handle); if (info->vlan_valid) qw2 |= FIELD_PREP(IRDMA_CQPSQ_QHASH_VLANID, info->vlan_id); set_64bit_val(wqe, IRDMA_BYTE_16, qw2); if (info->entry_type == IRDMA_QHASH_TYPE_TCP_ESTABLISHED) { qw1 |= FIELD_PREP(IRDMA_CQPSQ_QHASH_SRC_PORT, info->src_port); if (!info->ipv4_valid) { set_64bit_val(wqe, IRDMA_BYTE_40, FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR0, info->src_ip[0]) | FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR1, info->src_ip[1])); set_64bit_val(wqe, IRDMA_BYTE_32, FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR2, info->src_ip[2]) | FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR3, info->src_ip[3])); } else { set_64bit_val(wqe, IRDMA_BYTE_32, FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR3, info->src_ip[0])); } } set_64bit_val(wqe, IRDMA_BYTE_8, qw1); temp = FIELD_PREP(IRDMA_CQPSQ_QHASH_WQEVALID, cqp->polarity) | FIELD_PREP(IRDMA_CQPSQ_QHASH_OPCODE, IRDMA_CQP_OP_MANAGE_QUAD_HASH_TABLE_ENTRY) | FIELD_PREP(IRDMA_CQPSQ_QHASH_MANAGE, info->manage) | FIELD_PREP(IRDMA_CQPSQ_QHASH_IPV4VALID, info->ipv4_valid) | FIELD_PREP(IRDMA_CQPSQ_QHASH_VLANVALID, info->vlan_valid) | FIELD_PREP(IRDMA_CQPSQ_QHASH_ENTRYTYPE, info->entry_type); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, temp); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "MANAGE_QHASH WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_qp_init - initialize qp * @qp: sc qp * @info: initialization qp info */ int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info) { int ret_code; u32 pble_obj_cnt; u16 wqe_size; if (info->qp_uk_init_info.max_sq_frag_cnt > info->pd->dev->hw_attrs.uk_attrs.max_hw_wq_frags || info->qp_uk_init_info.max_rq_frag_cnt > info->pd->dev->hw_attrs.uk_attrs.max_hw_wq_frags) return -EINVAL; qp->dev = info->pd->dev; qp->vsi = info->vsi; qp->ieq_qp = info->vsi->exception_lan_q; qp->sq_pa = info->sq_pa; qp->rq_pa = info->rq_pa; qp->hw_host_ctx_pa = info->host_ctx_pa; qp->q2_pa = info->q2_pa; qp->shadow_area_pa = info->shadow_area_pa; qp->q2_buf = info->q2; qp->pd = info->pd; qp->hw_host_ctx = info->host_ctx; info->qp_uk_init_info.wqe_alloc_db = qp->pd->dev->wqe_alloc_db; ret_code = irdma_uk_qp_init(&qp->qp_uk, &info->qp_uk_init_info); if (ret_code) return ret_code; qp->virtual_map = info->virtual_map; pble_obj_cnt = info->pd->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if ((info->virtual_map && info->sq_pa >= pble_obj_cnt) || (info->virtual_map && info->rq_pa >= pble_obj_cnt)) return -EINVAL; qp->llp_stream_handle = (void *)(-1); qp->hw_sq_size = irdma_get_encoded_wqe_size(qp->qp_uk.sq_ring.size, IRDMA_QUEUE_TYPE_SQ_RQ); irdma_debug(qp->dev, IRDMA_DEBUG_WQE, "hw_sq_size[%04d] sq_ring.size[%04d]\n", qp->hw_sq_size, qp->qp_uk.sq_ring.size); if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1) wqe_size = IRDMA_WQE_SIZE_128; else ret_code = irdma_fragcnt_to_wqesize_rq(qp->qp_uk.max_rq_frag_cnt, &wqe_size); if (ret_code) return ret_code; qp->hw_rq_size = irdma_get_encoded_wqe_size(qp->qp_uk.rq_size * (wqe_size / IRDMA_QP_WQE_MIN_SIZE), IRDMA_QUEUE_TYPE_SQ_RQ); irdma_debug(qp->dev, IRDMA_DEBUG_WQE, "hw_rq_size[%04d] qp_uk.rq_size[%04d] wqe_size[%04d]\n", qp->hw_rq_size, qp->qp_uk.rq_size, wqe_size); qp->sq_tph_val = info->sq_tph_val; qp->rq_tph_val = info->rq_tph_val; qp->sq_tph_en = info->sq_tph_en; qp->rq_tph_en = info->rq_tph_en; qp->rcv_tph_en = info->rcv_tph_en; qp->xmit_tph_en = info->xmit_tph_en; qp->qp_uk.first_sq_wq = info->qp_uk_init_info.first_sq_wq; qp->qs_handle = qp->vsi->qos[qp->user_pri].qs_handle; return 0; } /** * irdma_sc_qp_create - create qp * @qp: sc qp * @info: qp create info * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ int irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_create_qp_info *info, u64 scratch, bool post_sq) { struct irdma_sc_cqp *cqp; __le64 *wqe; u64 hdr; cqp = qp->dev->cqp; if (qp->qp_uk.qp_id < cqp->dev->hw_attrs.min_hw_qp_id || qp->qp_uk.qp_id > (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt - 1)) return -EINVAL; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_16, qp->hw_host_ctx_pa); set_64bit_val(wqe, IRDMA_BYTE_40, qp->shadow_area_pa); hdr = qp->qp_uk.qp_id | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_QP) | FIELD_PREP(IRDMA_CQPSQ_QP_ORDVALID, info->ord_valid) | FIELD_PREP(IRDMA_CQPSQ_QP_TOECTXVALID, info->tcp_ctx_valid) | FIELD_PREP(IRDMA_CQPSQ_QP_MACVALID, info->mac_valid) | FIELD_PREP(IRDMA_CQPSQ_QP_QPTYPE, qp->qp_uk.qp_type) | FIELD_PREP(IRDMA_CQPSQ_QP_VQ, qp->virtual_map) | FIELD_PREP(IRDMA_CQPSQ_QP_FORCELOOPBACK, info->force_lpb) | FIELD_PREP(IRDMA_CQPSQ_QP_CQNUMVALID, info->cq_num_valid) | FIELD_PREP(IRDMA_CQPSQ_QP_ARPTABIDXVALID, info->arp_cache_idx_valid) | FIELD_PREP(IRDMA_CQPSQ_QP_NEXTIWSTATE, info->next_iwarp_state) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "QP_CREATE WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_qp_modify - modify qp cqp wqe * @qp: sc qp * @info: modify qp info * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ int irdma_sc_qp_modify(struct irdma_sc_qp *qp, struct irdma_modify_qp_info *info, u64 scratch, bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; u64 hdr; u8 term_actions = 0; u8 term_len = 0; cqp = qp->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; if (info->next_iwarp_state == IRDMA_QP_STATE_TERMINATE) { if (info->dont_send_fin) term_actions += IRDMAQP_TERM_SEND_TERM_ONLY; if (info->dont_send_term) term_actions += IRDMAQP_TERM_SEND_FIN_ONLY; if (term_actions == IRDMAQP_TERM_SEND_TERM_AND_FIN || term_actions == IRDMAQP_TERM_SEND_TERM_ONLY) term_len = info->termlen; } set_64bit_val(wqe, IRDMA_BYTE_8, FIELD_PREP(IRDMA_CQPSQ_QP_NEWMSS, info->new_mss) | FIELD_PREP(IRDMA_CQPSQ_QP_TERMLEN, term_len)); set_64bit_val(wqe, IRDMA_BYTE_16, qp->hw_host_ctx_pa); set_64bit_val(wqe, IRDMA_BYTE_40, qp->shadow_area_pa); hdr = qp->qp_uk.qp_id | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MODIFY_QP) | FIELD_PREP(IRDMA_CQPSQ_QP_ORDVALID, info->ord_valid) | FIELD_PREP(IRDMA_CQPSQ_QP_TOECTXVALID, info->tcp_ctx_valid) | FIELD_PREP(IRDMA_CQPSQ_QP_CACHEDVARVALID, info->cached_var_valid) | FIELD_PREP(IRDMA_CQPSQ_QP_VQ, qp->virtual_map) | FIELD_PREP(IRDMA_CQPSQ_QP_FORCELOOPBACK, info->force_lpb) | FIELD_PREP(IRDMA_CQPSQ_QP_CQNUMVALID, info->cq_num_valid) | FIELD_PREP(IRDMA_CQPSQ_QP_MACVALID, info->mac_valid) | FIELD_PREP(IRDMA_CQPSQ_QP_QPTYPE, qp->qp_uk.qp_type) | FIELD_PREP(IRDMA_CQPSQ_QP_MSSCHANGE, info->mss_change) | FIELD_PREP(IRDMA_CQPSQ_QP_REMOVEHASHENTRY, info->remove_hash_idx) | FIELD_PREP(IRDMA_CQPSQ_QP_TERMACT, term_actions) | FIELD_PREP(IRDMA_CQPSQ_QP_RESETCON, info->reset_tcp_conn) | FIELD_PREP(IRDMA_CQPSQ_QP_ARPTABIDXVALID, info->arp_cache_idx_valid) | FIELD_PREP(IRDMA_CQPSQ_QP_NEXTIWSTATE, info->next_iwarp_state) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "QP_MODIFY WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_qp_destroy - cqp destroy qp * @qp: sc qp * @scratch: u64 saved to be used during cqp completion * @remove_hash_idx: flag if to remove hash idx * @ignore_mw_bnd: memory window bind flag * @post_sq: flag for cqp db to ring */ int irdma_sc_qp_destroy(struct irdma_sc_qp *qp, u64 scratch, bool remove_hash_idx, bool ignore_mw_bnd, bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; u64 hdr; cqp = qp->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_16, qp->hw_host_ctx_pa); set_64bit_val(wqe, IRDMA_BYTE_40, qp->shadow_area_pa); hdr = qp->qp_uk.qp_id | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_QP) | FIELD_PREP(IRDMA_CQPSQ_QP_QPTYPE, qp->qp_uk.qp_type) | FIELD_PREP(IRDMA_CQPSQ_QP_IGNOREMWBOUND, ignore_mw_bnd) | FIELD_PREP(IRDMA_CQPSQ_QP_REMOVEHASHENTRY, remove_hash_idx) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "QP_DESTROY WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_get_encoded_ird_size - * @ird_size: IRD size * The ird from the connection is rounded to a supported HW setting and then encoded * for ird_size field of qp_ctx. Consumers are expected to provide valid ird size based * on hardware attributes. IRD size defaults to a value of 4 in case of invalid input */ static u8 irdma_sc_get_encoded_ird_size(u16 ird_size) { switch (ird_size ? roundup_pow_of_two(2 * ird_size) : 4) { case 256: return IRDMA_IRD_HW_SIZE_256; case 128: return IRDMA_IRD_HW_SIZE_128; case 64: case 32: return IRDMA_IRD_HW_SIZE_64; case 16: case 8: return IRDMA_IRD_HW_SIZE_16; case 4: default: break; } return IRDMA_IRD_HW_SIZE_4; } /** * irdma_sc_qp_setctx_roce - set qp's context * @qp: sc qp * @qp_ctx: context ptr * @info: ctx info */ void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 * qp_ctx, struct irdma_qp_host_ctx_info *info) { struct irdma_roce_offload_info *roce_info; struct irdma_udp_offload_info *udp; u8 push_mode_en; u32 push_idx; - u64 mac; roce_info = info->roce_info; udp = info->udp_info; - mac = LS_64_1(roce_info->mac_addr[5], 16) | - LS_64_1(roce_info->mac_addr[4], 24) | - LS_64_1(roce_info->mac_addr[3], 32) | - LS_64_1(roce_info->mac_addr[2], 40) | - LS_64_1(roce_info->mac_addr[1], 48) | - LS_64_1(roce_info->mac_addr[0], 56); - qp->user_pri = info->user_pri; if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) { push_mode_en = 0; push_idx = 0; } else { push_mode_en = 1; push_idx = qp->push_idx; } set_64bit_val(qp_ctx, IRDMA_BYTE_0, FIELD_PREP(IRDMAQPC_RQWQESIZE, qp->qp_uk.rq_wqe_size) | FIELD_PREP(IRDMAQPC_RCVTPHEN, qp->rcv_tph_en) | FIELD_PREP(IRDMAQPC_XMITTPHEN, qp->xmit_tph_en) | FIELD_PREP(IRDMAQPC_RQTPHEN, qp->rq_tph_en) | FIELD_PREP(IRDMAQPC_SQTPHEN, qp->sq_tph_en) | FIELD_PREP(IRDMAQPC_PPIDX, push_idx) | FIELD_PREP(IRDMAQPC_PMENA, push_mode_en) | FIELD_PREP(IRDMAQPC_PDIDXHI, roce_info->pd_id >> 16) | FIELD_PREP(IRDMAQPC_DC_TCP_EN, roce_info->dctcp_en) | FIELD_PREP(IRDMAQPC_ERR_RQ_IDX_VALID, roce_info->err_rq_idx_valid) | FIELD_PREP(IRDMAQPC_ISQP1, roce_info->is_qp1) | FIELD_PREP(IRDMAQPC_ROCE_TVER, roce_info->roce_tver) | FIELD_PREP(IRDMAQPC_IPV4, udp->ipv4) | FIELD_PREP(IRDMAQPC_INSERTVLANTAG, udp->insert_vlan_tag)); set_64bit_val(qp_ctx, IRDMA_BYTE_8, qp->sq_pa); set_64bit_val(qp_ctx, IRDMA_BYTE_16, qp->rq_pa); if (roce_info->dcqcn_en || roce_info->dctcp_en) { udp->tos &= ~ECN_CODE_PT_MASK; udp->tos |= ECN_CODE_PT_VAL; } set_64bit_val(qp_ctx, IRDMA_BYTE_24, FIELD_PREP(IRDMAQPC_RQSIZE, qp->hw_rq_size) | FIELD_PREP(IRDMAQPC_SQSIZE, qp->hw_sq_size) | FIELD_PREP(IRDMAQPC_TTL, udp->ttl) | FIELD_PREP(IRDMAQPC_TOS, udp->tos) | FIELD_PREP(IRDMAQPC_SRCPORTNUM, udp->src_port) | FIELD_PREP(IRDMAQPC_DESTPORTNUM, udp->dst_port)); set_64bit_val(qp_ctx, IRDMA_BYTE_32, FIELD_PREP(IRDMAQPC_DESTIPADDR2, udp->dest_ip_addr[2]) | FIELD_PREP(IRDMAQPC_DESTIPADDR3, udp->dest_ip_addr[3])); set_64bit_val(qp_ctx, IRDMA_BYTE_40, FIELD_PREP(IRDMAQPC_DESTIPADDR0, udp->dest_ip_addr[0]) | FIELD_PREP(IRDMAQPC_DESTIPADDR1, udp->dest_ip_addr[1])); set_64bit_val(qp_ctx, IRDMA_BYTE_48, FIELD_PREP(IRDMAQPC_SNDMSS, udp->snd_mss) | FIELD_PREP(IRDMAQPC_VLANTAG, udp->vlan_tag) | FIELD_PREP(IRDMAQPC_ARPIDX, udp->arp_idx)); set_64bit_val(qp_ctx, IRDMA_BYTE_56, FIELD_PREP(IRDMAQPC_PKEY, roce_info->p_key) | FIELD_PREP(IRDMAQPC_PDIDX, roce_info->pd_id) | FIELD_PREP(IRDMAQPC_ACKCREDITS, roce_info->ack_credits) | FIELD_PREP(IRDMAQPC_FLOWLABEL, udp->flow_label)); set_64bit_val(qp_ctx, IRDMA_BYTE_64, FIELD_PREP(IRDMAQPC_QKEY, roce_info->qkey) | FIELD_PREP(IRDMAQPC_DESTQP, roce_info->dest_qp)); set_64bit_val(qp_ctx, IRDMA_BYTE_80, FIELD_PREP(IRDMAQPC_PSNNXT, udp->psn_nxt) | FIELD_PREP(IRDMAQPC_LSN, udp->lsn)); set_64bit_val(qp_ctx, IRDMA_BYTE_88, FIELD_PREP(IRDMAQPC_EPSN, udp->epsn)); set_64bit_val(qp_ctx, IRDMA_BYTE_96, FIELD_PREP(IRDMAQPC_PSNMAX, udp->psn_max) | FIELD_PREP(IRDMAQPC_PSNUNA, udp->psn_una)); set_64bit_val(qp_ctx, IRDMA_BYTE_112, FIELD_PREP(IRDMAQPC_CWNDROCE, udp->cwnd)); set_64bit_val(qp_ctx, IRDMA_BYTE_128, FIELD_PREP(IRDMAQPC_ERR_RQ_IDX, roce_info->err_rq_idx) | FIELD_PREP(IRDMAQPC_RNRNAK_THRESH, udp->rnr_nak_thresh) | FIELD_PREP(IRDMAQPC_REXMIT_THRESH, udp->rexmit_thresh) | FIELD_PREP(IRDMAQPC_RTOMIN, roce_info->rtomin)); set_64bit_val(qp_ctx, IRDMA_BYTE_136, FIELD_PREP(IRDMAQPC_TXCQNUM, info->send_cq_num) | FIELD_PREP(IRDMAQPC_RXCQNUM, info->rcv_cq_num)); set_64bit_val(qp_ctx, IRDMA_BYTE_144, FIELD_PREP(IRDMAQPC_STAT_INDEX, info->stats_idx)); - set_64bit_val(qp_ctx, IRDMA_BYTE_152, mac); + set_64bit_val(qp_ctx, IRDMA_BYTE_152, + FIELD_PREP(IRDMAQPC_MACADDRESS, irdma_mac_to_u64(roce_info->mac_addr))); set_64bit_val(qp_ctx, IRDMA_BYTE_160, FIELD_PREP(IRDMAQPC_ORDSIZE, roce_info->ord_size) | FIELD_PREP(IRDMAQPC_IRDSIZE, irdma_sc_get_encoded_ird_size(roce_info->ird_size)) | FIELD_PREP(IRDMAQPC_WRRDRSPOK, roce_info->wr_rdresp_en) | FIELD_PREP(IRDMAQPC_RDOK, roce_info->rd_en) | FIELD_PREP(IRDMAQPC_USESTATSINSTANCE, info->stats_idx_valid) | FIELD_PREP(IRDMAQPC_BINDEN, roce_info->bind_en) | FIELD_PREP(IRDMAQPC_FASTREGEN, roce_info->fast_reg_en) | FIELD_PREP(IRDMAQPC_DCQCNENABLE, roce_info->dcqcn_en) | FIELD_PREP(IRDMAQPC_RCVNOICRC, roce_info->rcv_no_icrc) | FIELD_PREP(IRDMAQPC_FW_CC_ENABLE, roce_info->fw_cc_enable) | FIELD_PREP(IRDMAQPC_UDPRIVCQENABLE, roce_info->udprivcq_en) | FIELD_PREP(IRDMAQPC_PRIVEN, roce_info->priv_mode_en) | FIELD_PREP(IRDMAQPC_TIMELYENABLE, roce_info->timely_en)); set_64bit_val(qp_ctx, IRDMA_BYTE_168, FIELD_PREP(IRDMAQPC_QPCOMPCTX, info->qp_compl_ctx)); set_64bit_val(qp_ctx, IRDMA_BYTE_176, FIELD_PREP(IRDMAQPC_SQTPHVAL, qp->sq_tph_val) | FIELD_PREP(IRDMAQPC_RQTPHVAL, qp->rq_tph_val) | FIELD_PREP(IRDMAQPC_QSHANDLE, qp->qs_handle)); set_64bit_val(qp_ctx, IRDMA_BYTE_184, FIELD_PREP(IRDMAQPC_LOCAL_IPADDR3, udp->local_ipaddr[3]) | FIELD_PREP(IRDMAQPC_LOCAL_IPADDR2, udp->local_ipaddr[2])); set_64bit_val(qp_ctx, IRDMA_BYTE_192, FIELD_PREP(IRDMAQPC_LOCAL_IPADDR1, udp->local_ipaddr[1]) | FIELD_PREP(IRDMAQPC_LOCAL_IPADDR0, udp->local_ipaddr[0])); set_64bit_val(qp_ctx, IRDMA_BYTE_200, FIELD_PREP(IRDMAQPC_THIGH, roce_info->t_high) | FIELD_PREP(IRDMAQPC_TLOW, roce_info->t_low)); set_64bit_val(qp_ctx, IRDMA_BYTE_208, FIELD_PREP(IRDMAQPC_REMENDPOINTIDX, info->rem_endpoint_idx)); irdma_debug_buf(qp->dev, IRDMA_DEBUG_WQE, "QP_HOST CTX WQE", qp_ctx, IRDMA_QP_CTX_SIZE); } /* * irdma_sc_alloc_local_mac_entry - allocate a mac entry @cqp: struct for cqp hw @scratch: u64 saved to be used during * cqp completion @post_sq: flag for cqp db to ring */ static int irdma_sc_alloc_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch, bool post_sq) { __le64 *wqe; u64 hdr; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_ALLOCATE_LOC_MAC_TABLE_ENTRY) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "ALLOCATE_LOCAL_MAC WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_add_local_mac_entry - add mac enry * @cqp: struct for cqp hw * @info:mac addr info * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ static int irdma_sc_add_local_mac_entry(struct irdma_sc_cqp *cqp, struct irdma_local_mac_entry_info *info, u64 scratch, bool post_sq) { __le64 *wqe; - u64 temp, header; + u64 header; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; - temp = info->mac_addr[5] | LS_64_1(info->mac_addr[4], 8) | - LS_64_1(info->mac_addr[3], 16) | LS_64_1(info->mac_addr[2], 24) | - LS_64_1(info->mac_addr[1], 32) | LS_64_1(info->mac_addr[0], 40); - set_64bit_val(wqe, IRDMA_BYTE_32, temp); + set_64bit_val(wqe, IRDMA_BYTE_32, irdma_mac_to_u64(info->mac_addr)); header = FIELD_PREP(IRDMA_CQPSQ_MLM_TABLEIDX, info->entry_idx) | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_LOC_MAC_TABLE) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, header); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "ADD_LOCAL_MAC WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_del_local_mac_entry - cqp wqe to dele local mac * @cqp: struct for cqp hw * @scratch: u64 saved to be used during cqp completion * @entry_idx: index of mac entry * @ignore_ref_count: to force mac adde delete * @post_sq: flag for cqp db to ring */ static int irdma_sc_del_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch, u16 entry_idx, u8 ignore_ref_count, bool post_sq) { __le64 *wqe; u64 header; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; header = FIELD_PREP(IRDMA_CQPSQ_MLM_TABLEIDX, entry_idx) | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_LOC_MAC_TABLE) | FIELD_PREP(IRDMA_CQPSQ_MLM_FREEENTRY, 1) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity) | FIELD_PREP(IRDMA_CQPSQ_MLM_IGNORE_REF_CNT, ignore_ref_count); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, header); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "DEL_LOCAL_MAC_IPADDR WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_qp_setctx - set qp's context * @qp: sc qp * @qp_ctx: context ptr * @info: ctx info */ void irdma_sc_qp_setctx(struct irdma_sc_qp *qp, __le64 * qp_ctx, struct irdma_qp_host_ctx_info *info) { struct irdma_iwarp_offload_info *iw; struct irdma_tcp_offload_info *tcp; struct irdma_sc_dev *dev; u8 push_mode_en; u32 push_idx; u64 qw0, qw3, qw7 = 0, qw16 = 0; u64 mac = 0; iw = info->iwarp_info; tcp = info->tcp_info; dev = qp->dev; if (iw->rcv_mark_en) { qp->pfpdu.marker_len = 4; qp->pfpdu.rcv_start_seq = tcp->rcv_nxt; } qp->user_pri = info->user_pri; if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) { push_mode_en = 0; push_idx = 0; } else { push_mode_en = 1; push_idx = qp->push_idx; } qw0 = FIELD_PREP(IRDMAQPC_RQWQESIZE, qp->qp_uk.rq_wqe_size) | FIELD_PREP(IRDMAQPC_RCVTPHEN, qp->rcv_tph_en) | FIELD_PREP(IRDMAQPC_XMITTPHEN, qp->xmit_tph_en) | FIELD_PREP(IRDMAQPC_RQTPHEN, qp->rq_tph_en) | FIELD_PREP(IRDMAQPC_SQTPHEN, qp->sq_tph_en) | FIELD_PREP(IRDMAQPC_PPIDX, push_idx) | FIELD_PREP(IRDMAQPC_PMENA, push_mode_en); set_64bit_val(qp_ctx, IRDMA_BYTE_8, qp->sq_pa); set_64bit_val(qp_ctx, IRDMA_BYTE_16, qp->rq_pa); qw3 = FIELD_PREP(IRDMAQPC_RQSIZE, qp->hw_rq_size) | FIELD_PREP(IRDMAQPC_SQSIZE, qp->hw_sq_size); if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) qw3 |= FIELD_PREP(IRDMAQPC_GEN1_SRCMACADDRIDX, qp->src_mac_addr_idx); set_64bit_val(qp_ctx, IRDMA_BYTE_136, FIELD_PREP(IRDMAQPC_TXCQNUM, info->send_cq_num) | FIELD_PREP(IRDMAQPC_RXCQNUM, info->rcv_cq_num)); set_64bit_val(qp_ctx, IRDMA_BYTE_168, FIELD_PREP(IRDMAQPC_QPCOMPCTX, info->qp_compl_ctx)); set_64bit_val(qp_ctx, IRDMA_BYTE_176, FIELD_PREP(IRDMAQPC_SQTPHVAL, qp->sq_tph_val) | FIELD_PREP(IRDMAQPC_RQTPHVAL, qp->rq_tph_val) | FIELD_PREP(IRDMAQPC_QSHANDLE, qp->qs_handle) | FIELD_PREP(IRDMAQPC_EXCEPTION_LAN_QUEUE, qp->ieq_qp)); if (info->iwarp_info_valid) { qw0 |= FIELD_PREP(IRDMAQPC_DDP_VER, iw->ddp_ver) | FIELD_PREP(IRDMAQPC_RDMAP_VER, iw->rdmap_ver) | FIELD_PREP(IRDMAQPC_DC_TCP_EN, iw->dctcp_en) | FIELD_PREP(IRDMAQPC_ECN_EN, iw->ecn_en) | FIELD_PREP(IRDMAQPC_IBRDENABLE, iw->ib_rd_en) | FIELD_PREP(IRDMAQPC_PDIDXHI, iw->pd_id >> 16) | FIELD_PREP(IRDMAQPC_ERR_RQ_IDX_VALID, iw->err_rq_idx_valid); qw7 |= FIELD_PREP(IRDMAQPC_PDIDX, iw->pd_id); qw16 |= FIELD_PREP(IRDMAQPC_ERR_RQ_IDX, iw->err_rq_idx) | FIELD_PREP(IRDMAQPC_RTOMIN, iw->rtomin); set_64bit_val(qp_ctx, IRDMA_BYTE_144, FIELD_PREP(IRDMAQPC_Q2ADDR, qp->q2_pa >> 8) | FIELD_PREP(IRDMAQPC_STAT_INDEX, info->stats_idx)); - if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { - mac = LS_64_1(iw->mac_addr[5], 16) | - LS_64_1(iw->mac_addr[4], 24) | - LS_64_1(iw->mac_addr[3], 32) | - LS_64_1(iw->mac_addr[2], 40) | - LS_64_1(iw->mac_addr[1], 48) | - LS_64_1(iw->mac_addr[0], 56); - } + if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) + mac = FIELD_PREP(IRDMAQPC_MACADDRESS, + irdma_mac_to_u64(iw->mac_addr)); set_64bit_val(qp_ctx, IRDMA_BYTE_152, mac | FIELD_PREP(IRDMAQPC_LASTBYTESENT, iw->last_byte_sent)); set_64bit_val(qp_ctx, IRDMA_BYTE_160, FIELD_PREP(IRDMAQPC_ORDSIZE, iw->ord_size) | FIELD_PREP(IRDMAQPC_IRDSIZE, irdma_sc_get_encoded_ird_size(iw->ird_size)) | FIELD_PREP(IRDMAQPC_WRRDRSPOK, iw->wr_rdresp_en) | FIELD_PREP(IRDMAQPC_RDOK, iw->rd_en) | FIELD_PREP(IRDMAQPC_SNDMARKERS, iw->snd_mark_en) | FIELD_PREP(IRDMAQPC_BINDEN, iw->bind_en) | FIELD_PREP(IRDMAQPC_FASTREGEN, iw->fast_reg_en) | FIELD_PREP(IRDMAQPC_PRIVEN, iw->priv_mode_en) | FIELD_PREP(IRDMAQPC_USESTATSINSTANCE, info->stats_idx_valid) | FIELD_PREP(IRDMAQPC_IWARPMODE, 1) | FIELD_PREP(IRDMAQPC_RCVMARKERS, iw->rcv_mark_en) | FIELD_PREP(IRDMAQPC_ALIGNHDRS, iw->align_hdrs) | FIELD_PREP(IRDMAQPC_RCVNOMPACRC, iw->rcv_no_mpa_crc) | FIELD_PREP(IRDMAQPC_RCVMARKOFFSET, iw->rcv_mark_offset) | FIELD_PREP(IRDMAQPC_SNDMARKOFFSET, iw->snd_mark_offset) | FIELD_PREP(IRDMAQPC_TIMELYENABLE, iw->timely_en)); } if (info->tcp_info_valid) { qw0 |= FIELD_PREP(IRDMAQPC_IPV4, tcp->ipv4) | FIELD_PREP(IRDMAQPC_NONAGLE, tcp->no_nagle) | FIELD_PREP(IRDMAQPC_INSERTVLANTAG, tcp->insert_vlan_tag) | FIELD_PREP(IRDMAQPC_TIMESTAMP, tcp->time_stamp) | FIELD_PREP(IRDMAQPC_LIMIT, tcp->cwnd_inc_limit) | FIELD_PREP(IRDMAQPC_DROPOOOSEG, tcp->drop_ooo_seg) | FIELD_PREP(IRDMAQPC_DUPACK_THRESH, tcp->dup_ack_thresh); if (iw->ecn_en || iw->dctcp_en) { tcp->tos &= ~ECN_CODE_PT_MASK; tcp->tos |= ECN_CODE_PT_VAL; } qw3 |= FIELD_PREP(IRDMAQPC_TTL, tcp->ttl) | FIELD_PREP(IRDMAQPC_AVOIDSTRETCHACK, tcp->avoid_stretch_ack) | FIELD_PREP(IRDMAQPC_TOS, tcp->tos) | FIELD_PREP(IRDMAQPC_SRCPORTNUM, tcp->src_port) | FIELD_PREP(IRDMAQPC_DESTPORTNUM, tcp->dst_port); if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) { qw3 |= FIELD_PREP(IRDMAQPC_GEN1_SRCMACADDRIDX, tcp->src_mac_addr_idx); qp->src_mac_addr_idx = tcp->src_mac_addr_idx; } set_64bit_val(qp_ctx, IRDMA_BYTE_32, FIELD_PREP(IRDMAQPC_DESTIPADDR2, tcp->dest_ip_addr[2]) | FIELD_PREP(IRDMAQPC_DESTIPADDR3, tcp->dest_ip_addr[3])); set_64bit_val(qp_ctx, IRDMA_BYTE_40, FIELD_PREP(IRDMAQPC_DESTIPADDR0, tcp->dest_ip_addr[0]) | FIELD_PREP(IRDMAQPC_DESTIPADDR1, tcp->dest_ip_addr[1])); set_64bit_val(qp_ctx, IRDMA_BYTE_48, FIELD_PREP(IRDMAQPC_SNDMSS, tcp->snd_mss) | FIELD_PREP(IRDMAQPC_SYN_RST_HANDLING, tcp->syn_rst_handling) | FIELD_PREP(IRDMAQPC_VLANTAG, tcp->vlan_tag) | FIELD_PREP(IRDMAQPC_ARPIDX, tcp->arp_idx)); qw7 |= FIELD_PREP(IRDMAQPC_FLOWLABEL, tcp->flow_label) | FIELD_PREP(IRDMAQPC_WSCALE, tcp->wscale) | FIELD_PREP(IRDMAQPC_IGNORE_TCP_OPT, tcp->ignore_tcp_opt) | FIELD_PREP(IRDMAQPC_IGNORE_TCP_UNS_OPT, tcp->ignore_tcp_uns_opt) | FIELD_PREP(IRDMAQPC_TCPSTATE, tcp->tcp_state) | FIELD_PREP(IRDMAQPC_RCVSCALE, tcp->rcv_wscale) | FIELD_PREP(IRDMAQPC_SNDSCALE, tcp->snd_wscale); set_64bit_val(qp_ctx, IRDMA_BYTE_72, FIELD_PREP(IRDMAQPC_TIMESTAMP_RECENT, tcp->time_stamp_recent) | FIELD_PREP(IRDMAQPC_TIMESTAMP_AGE, tcp->time_stamp_age)); set_64bit_val(qp_ctx, IRDMA_BYTE_80, FIELD_PREP(IRDMAQPC_SNDNXT, tcp->snd_nxt) | FIELD_PREP(IRDMAQPC_SNDWND, tcp->snd_wnd)); set_64bit_val(qp_ctx, IRDMA_BYTE_88, FIELD_PREP(IRDMAQPC_RCVNXT, tcp->rcv_nxt) | FIELD_PREP(IRDMAQPC_RCVWND, tcp->rcv_wnd)); set_64bit_val(qp_ctx, IRDMA_BYTE_96, FIELD_PREP(IRDMAQPC_SNDMAX, tcp->snd_max) | FIELD_PREP(IRDMAQPC_SNDUNA, tcp->snd_una)); set_64bit_val(qp_ctx, IRDMA_BYTE_104, FIELD_PREP(IRDMAQPC_SRTT, tcp->srtt) | FIELD_PREP(IRDMAQPC_RTTVAR, tcp->rtt_var)); set_64bit_val(qp_ctx, IRDMA_BYTE_112, FIELD_PREP(IRDMAQPC_SSTHRESH, tcp->ss_thresh) | FIELD_PREP(IRDMAQPC_CWND, tcp->cwnd)); set_64bit_val(qp_ctx, IRDMA_BYTE_120, FIELD_PREP(IRDMAQPC_SNDWL1, tcp->snd_wl1) | FIELD_PREP(IRDMAQPC_SNDWL2, tcp->snd_wl2)); qw16 |= FIELD_PREP(IRDMAQPC_MAXSNDWND, tcp->max_snd_window) | FIELD_PREP(IRDMAQPC_REXMIT_THRESH, tcp->rexmit_thresh); set_64bit_val(qp_ctx, IRDMA_BYTE_184, FIELD_PREP(IRDMAQPC_LOCAL_IPADDR3, tcp->local_ipaddr[3]) | FIELD_PREP(IRDMAQPC_LOCAL_IPADDR2, tcp->local_ipaddr[2])); set_64bit_val(qp_ctx, IRDMA_BYTE_192, FIELD_PREP(IRDMAQPC_LOCAL_IPADDR1, tcp->local_ipaddr[1]) | FIELD_PREP(IRDMAQPC_LOCAL_IPADDR0, tcp->local_ipaddr[0])); set_64bit_val(qp_ctx, IRDMA_BYTE_200, FIELD_PREP(IRDMAQPC_THIGH, iw->t_high) | FIELD_PREP(IRDMAQPC_TLOW, iw->t_low)); set_64bit_val(qp_ctx, IRDMA_BYTE_208, FIELD_PREP(IRDMAQPC_REMENDPOINTIDX, info->rem_endpoint_idx)); } set_64bit_val(qp_ctx, IRDMA_BYTE_0, qw0); set_64bit_val(qp_ctx, IRDMA_BYTE_24, qw3); set_64bit_val(qp_ctx, IRDMA_BYTE_56, qw7); set_64bit_val(qp_ctx, IRDMA_BYTE_128, qw16); irdma_debug_buf(qp->dev, IRDMA_DEBUG_WQE, "QP_HOST CTX", qp_ctx, IRDMA_QP_CTX_SIZE); } /** * irdma_sc_alloc_stag - mr stag alloc * @dev: sc device struct * @info: stag info * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ static int irdma_sc_alloc_stag(struct irdma_sc_dev *dev, struct irdma_allocate_stag_info *info, u64 scratch, bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; u64 hdr; enum irdma_page_size page_size; if (!info->total_len && !info->all_memory) return -EINVAL; if (info->page_size == 0x40000000) page_size = IRDMA_PAGE_SIZE_1G; else if (info->page_size == 0x200000) page_size = IRDMA_PAGE_SIZE_2M; else page_size = IRDMA_PAGE_SIZE_4K; cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_8, FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID) | FIELD_PREP(IRDMA_CQPSQ_STAG_STAGLEN, info->total_len)); set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx)); set_64bit_val(wqe, IRDMA_BYTE_40, FIELD_PREP(IRDMA_CQPSQ_STAG_HMCFNIDX, info->hmc_fcn_index)); if (info->chunk_size) set_64bit_val(wqe, IRDMA_BYTE_48, FIELD_PREP(IRDMA_CQPSQ_STAG_FIRSTPMPBLIDX, info->first_pm_pbl_idx)); hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_ALLOC_STAG) | FIELD_PREP(IRDMA_CQPSQ_STAG_MR, 1) | FIELD_PREP(IRDMA_CQPSQ_STAG_ARIGHTS, info->access_rights) | FIELD_PREP(IRDMA_CQPSQ_STAG_LPBLSIZE, info->chunk_size) | FIELD_PREP(IRDMA_CQPSQ_STAG_HPAGESIZE, page_size) | FIELD_PREP(IRDMA_CQPSQ_STAG_REMACCENABLED, info->remote_access) | FIELD_PREP(IRDMA_CQPSQ_STAG_USEHMCFNIDX, info->use_hmc_fcn_index) | FIELD_PREP(IRDMA_CQPSQ_STAG_USEPFRID, info->use_pf_rid) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(dev, IRDMA_DEBUG_WQE, "ALLOC_STAG WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_mr_reg_non_shared - non-shared mr registration * @dev: sc device struct * @info: mr info * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ static int irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, struct irdma_reg_ns_stag_info *info, u64 scratch, bool post_sq) { __le64 *wqe; u64 fbo; struct irdma_sc_cqp *cqp; u64 hdr; u32 pble_obj_cnt; bool remote_access; u8 addr_type; enum irdma_page_size page_size; if (!info->total_len && !info->all_memory) return -EINVAL; if (info->page_size == 0x40000000) page_size = IRDMA_PAGE_SIZE_1G; else if (info->page_size == 0x200000) page_size = IRDMA_PAGE_SIZE_2M; else if (info->page_size == 0x1000) page_size = IRDMA_PAGE_SIZE_4K; else return -EINVAL; if (info->access_rights & (IRDMA_ACCESS_FLAGS_REMOTEREAD_ONLY | IRDMA_ACCESS_FLAGS_REMOTEWRITE_ONLY)) remote_access = true; else remote_access = false; pble_obj_cnt = dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if (info->chunk_size && info->first_pm_pbl_index >= pble_obj_cnt) return -EINVAL; cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; fbo = info->va & (info->page_size - 1); set_64bit_val(wqe, IRDMA_BYTE_0, (info->addr_type == IRDMA_ADDR_TYPE_VA_BASED ? info->va : fbo)); set_64bit_val(wqe, IRDMA_BYTE_8, FIELD_PREP(IRDMA_CQPSQ_STAG_STAGLEN, info->total_len) | FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID)); set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMA_CQPSQ_STAG_KEY, info->stag_key) | FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx)); if (!info->chunk_size) set_64bit_val(wqe, IRDMA_BYTE_32, info->reg_addr_pa); else set_64bit_val(wqe, IRDMA_BYTE_48, FIELD_PREP(IRDMA_CQPSQ_STAG_FIRSTPMPBLIDX, info->first_pm_pbl_index)); set_64bit_val(wqe, IRDMA_BYTE_40, info->hmc_fcn_index); addr_type = (info->addr_type == IRDMA_ADDR_TYPE_VA_BASED) ? 1 : 0; hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_REG_MR) | FIELD_PREP(IRDMA_CQPSQ_STAG_MR, 1) | FIELD_PREP(IRDMA_CQPSQ_STAG_LPBLSIZE, info->chunk_size) | FIELD_PREP(IRDMA_CQPSQ_STAG_HPAGESIZE, page_size) | FIELD_PREP(IRDMA_CQPSQ_STAG_ARIGHTS, info->access_rights) | FIELD_PREP(IRDMA_CQPSQ_STAG_REMACCENABLED, remote_access) | FIELD_PREP(IRDMA_CQPSQ_STAG_VABASEDTO, addr_type) | FIELD_PREP(IRDMA_CQPSQ_STAG_USEHMCFNIDX, info->use_hmc_fcn_index) | FIELD_PREP(IRDMA_CQPSQ_STAG_USEPFRID, info->use_pf_rid) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(dev, IRDMA_DEBUG_WQE, "MR_REG_NS WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_dealloc_stag - deallocate stag * @dev: sc device struct * @info: dealloc stag info * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ static int irdma_sc_dealloc_stag(struct irdma_sc_dev *dev, struct irdma_dealloc_stag_info *info, u64 scratch, bool post_sq) { u64 hdr; __le64 *wqe; struct irdma_sc_cqp *cqp; cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_8, FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID)); set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx)); hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DEALLOC_STAG) | FIELD_PREP(IRDMA_CQPSQ_STAG_MR, info->mr) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(dev, IRDMA_DEBUG_WQE, "DEALLOC_STAG WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_mw_alloc - mw allocate * @dev: sc device struct * @info: memory window allocation information * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ static int irdma_sc_mw_alloc(struct irdma_sc_dev *dev, struct irdma_mw_alloc_info *info, u64 scratch, bool post_sq) { u64 hdr; struct irdma_sc_cqp *cqp; __le64 *wqe; cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_8, FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID)); set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->mw_stag_index)); hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_ALLOC_STAG) | FIELD_PREP(IRDMA_CQPSQ_STAG_MWTYPE, info->mw_wide) | FIELD_PREP(IRDMA_CQPSQ_STAG_MW1_BIND_DONT_VLDT_KEY, info->mw1_bind_dont_vldt_key) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(dev, IRDMA_DEBUG_WQE, "MW_ALLOC WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_mr_fast_register - Posts RDMA fast register mr WR to iwarp qp * @qp: sc qp struct * @info: fast mr info * @post_sq: flag for cqp db to ring */ int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, struct irdma_fast_reg_stag_info *info, bool post_sq) { u64 temp, hdr; __le64 *wqe; u32 wqe_idx; u16 quanta = IRDMA_QP_WQE_MIN_QUANTA; enum irdma_page_size page_size; struct irdma_post_sq_info sq_info = {0}; if (info->page_size == 0x40000000) page_size = IRDMA_PAGE_SIZE_1G; else if (info->page_size == 0x200000) page_size = IRDMA_PAGE_SIZE_2M; else page_size = IRDMA_PAGE_SIZE_4K; sq_info.wr_id = info->wr_id; sq_info.signaled = info->signaled; sq_info.push_wqe = info->push_wqe; wqe = irdma_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx, &quanta, 0, &sq_info); if (!wqe) return -ENOSPC; qp->qp_uk.sq_wrtrk_array[wqe_idx].signaled = info->signaled; irdma_debug(qp->dev, IRDMA_DEBUG_MR, - "wr_id[%llxh] wqe_idx[%04d] location[%p]\n", (unsigned long long)info->wr_id, - wqe_idx, &qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid); + "wr_id[%llxh] wqe_idx[%04d] location[%p]\n", + (unsigned long long)info->wr_id, wqe_idx, + &qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid); temp = (info->addr_type == IRDMA_ADDR_TYPE_VA_BASED) ? (uintptr_t)info->va : info->fbo; set_64bit_val(wqe, IRDMA_BYTE_0, temp); temp = FIELD_GET(IRDMAQPSQ_FIRSTPMPBLIDXHI, info->first_pm_pbl_index >> 16); set_64bit_val(wqe, IRDMA_BYTE_8, FIELD_PREP(IRDMAQPSQ_FIRSTPMPBLIDXHI, temp) | FIELD_PREP(IRDMAQPSQ_PBLADDR, info->reg_addr_pa >> IRDMA_HW_PAGE_SHIFT)); set_64bit_val(wqe, IRDMA_BYTE_16, info->total_len | FIELD_PREP(IRDMAQPSQ_FIRSTPMPBLIDXLO, info->first_pm_pbl_index)); hdr = FIELD_PREP(IRDMAQPSQ_STAGKEY, info->stag_key) | FIELD_PREP(IRDMAQPSQ_STAGINDEX, info->stag_idx) | FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_FAST_REGISTER) | FIELD_PREP(IRDMAQPSQ_LPBLSIZE, info->chunk_size) | FIELD_PREP(IRDMAQPSQ_HPAGESIZE, page_size) | FIELD_PREP(IRDMAQPSQ_STAGRIGHTS, info->access_rights) | FIELD_PREP(IRDMAQPSQ_VABASEDTO, info->addr_type) | FIELD_PREP(IRDMAQPSQ_PUSHWQE, (sq_info.push_wqe ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(qp->dev, IRDMA_DEBUG_WQE, "FAST_REG WQE", wqe, IRDMA_QP_WQE_MIN_SIZE); if (sq_info.push_wqe) irdma_qp_push_wqe(&qp->qp_uk, wqe, quanta, wqe_idx, post_sq); else if (post_sq) irdma_uk_qp_post_wr(&qp->qp_uk); return 0; } /** * irdma_sc_gen_rts_ae - request AE generated after RTS * @qp: sc qp struct */ static void irdma_sc_gen_rts_ae(struct irdma_sc_qp *qp) { __le64 *wqe; u64 hdr; struct irdma_qp_uk *qp_uk; qp_uk = &qp->qp_uk; wqe = qp_uk->sq_base[1].elem; hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_NOP) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, 1) | FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(qp->dev, IRDMA_DEBUG_QP, "NOP W/LOCAL FENCE WQE", wqe, IRDMA_QP_WQE_MIN_SIZE); wqe = qp_uk->sq_base[2].elem; hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_GEN_RTS_AE) | FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(qp->dev, IRDMA_DEBUG_QP, "CONN EST WQE", wqe, IRDMA_QP_WQE_MIN_SIZE); } /** * irdma_sc_send_lsmm - send last streaming mode message * @qp: sc qp struct * @lsmm_buf: buffer with lsmm message * @size: size of lsmm buffer * @stag: stag of lsmm buffer */ void irdma_sc_send_lsmm(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size, irdma_stag stag) { __le64 *wqe; u64 hdr; struct irdma_qp_uk *qp_uk; qp_uk = &qp->qp_uk; wqe = qp_uk->sq_base->elem; set_64bit_val(wqe, IRDMA_BYTE_0, (uintptr_t)lsmm_buf); if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1) { set_64bit_val(wqe, IRDMA_BYTE_8, FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, size) | FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_STAG, stag)); } else { set_64bit_val(wqe, IRDMA_BYTE_8, FIELD_PREP(IRDMAQPSQ_FRAG_LEN, size) | FIELD_PREP(IRDMAQPSQ_FRAG_STAG, stag) | FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity)); } set_64bit_val(wqe, IRDMA_BYTE_16, 0); hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_RDMA_SEND) | FIELD_PREP(IRDMAQPSQ_STREAMMODE, 1) | FIELD_PREP(IRDMAQPSQ_WAITFORRCVPDU, 1) | FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(qp->dev, IRDMA_DEBUG_WQE, "SEND_LSMM WQE", wqe, IRDMA_QP_WQE_MIN_SIZE); if (qp->dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_RTS_AE) irdma_sc_gen_rts_ae(qp); } -/** - * irdma_sc_send_lsmm_nostag - for privilege qp - * @qp: sc qp struct - * @lsmm_buf: buffer with lsmm message - * @size: size of lsmm buffer - */ -void -irdma_sc_send_lsmm_nostag(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size) -{ - __le64 *wqe; - u64 hdr; - struct irdma_qp_uk *qp_uk; - - qp_uk = &qp->qp_uk; - wqe = qp_uk->sq_base->elem; - - set_64bit_val(wqe, IRDMA_BYTE_0, (uintptr_t)lsmm_buf); - - if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1) - set_64bit_val(wqe, IRDMA_BYTE_8, - FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, size)); - else - set_64bit_val(wqe, IRDMA_BYTE_8, - FIELD_PREP(IRDMAQPSQ_FRAG_LEN, size) | - FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity)); - set_64bit_val(wqe, IRDMA_BYTE_16, 0); - - hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_RDMA_SEND) | - FIELD_PREP(IRDMAQPSQ_STREAMMODE, 1) | - FIELD_PREP(IRDMAQPSQ_WAITFORRCVPDU, 1) | - FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity); - irdma_wmb(); /* make sure WQE is written before valid bit is set */ - - set_64bit_val(wqe, IRDMA_BYTE_24, hdr); - - irdma_debug_buf(qp->dev, IRDMA_DEBUG_WQE, "SEND_LSMM_NOSTAG WQE", wqe, - IRDMA_QP_WQE_MIN_SIZE); -} - /** * irdma_sc_send_rtt - send last read0 or write0 * @qp: sc qp struct * @read: Do read0 or write0 */ void irdma_sc_send_rtt(struct irdma_sc_qp *qp, bool read) { __le64 *wqe; u64 hdr; struct irdma_qp_uk *qp_uk; qp_uk = &qp->qp_uk; wqe = qp_uk->sq_base->elem; set_64bit_val(wqe, IRDMA_BYTE_0, 0); set_64bit_val(wqe, IRDMA_BYTE_16, 0); if (read) { if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1) { set_64bit_val(wqe, IRDMA_BYTE_8, FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_STAG, 0xabcd)); } else { set_64bit_val(wqe, IRDMA_BYTE_8, (u64)0xabcd | FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity)); } hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, 0x1234) | FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_RDMA_READ) | FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity); } else { if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1) { set_64bit_val(wqe, IRDMA_BYTE_8, 0); } else { set_64bit_val(wqe, IRDMA_BYTE_8, FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity)); } hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_RDMA_WRITE) | FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity); } irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(qp->dev, IRDMA_DEBUG_WQE, "RTR WQE", wqe, IRDMA_QP_WQE_MIN_SIZE); if (qp->dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_RTS_AE) irdma_sc_gen_rts_ae(qp); } /** * irdma_iwarp_opcode - determine if incoming is rdma layer * @info: aeq info for the packet * @pkt: packet for error */ static u32 irdma_iwarp_opcode(struct irdma_aeqe_info *info, u8 *pkt){ BE16 *mpa; u32 opcode = 0xffffffff; if (info->q2_data_written) { mpa = (BE16 *) pkt; opcode = IRDMA_NTOHS(mpa[1]) & 0xf; } return opcode; } /** * irdma_locate_mpa - return pointer to mpa in the pkt * @pkt: packet with data */ static u8 *irdma_locate_mpa(u8 *pkt) { /* skip over ethernet header */ pkt += IRDMA_MAC_HLEN; /* Skip over IP and TCP headers */ pkt += 4 * (pkt[0] & 0x0f); pkt += 4 * ((pkt[12] >> 4) & 0x0f); return pkt; } /** * irdma_bld_termhdr_ctrl - setup terminate hdr control fields * @qp: sc qp ptr for pkt * @hdr: term hdr * @opcode: flush opcode for termhdr * @layer_etype: error layer + error type * @err: error cod ein the header */ static void irdma_bld_termhdr_ctrl(struct irdma_sc_qp *qp, struct irdma_terminate_hdr *hdr, enum irdma_flush_opcode opcode, u8 layer_etype, u8 err) { qp->flush_code = opcode; hdr->layer_etype = layer_etype; hdr->error_code = err; } /** * irdma_bld_termhdr_ddp_rdma - setup ddp and rdma hdrs in terminate hdr * @pkt: ptr to mpa in offending pkt * @hdr: term hdr * @copy_len: offending pkt length to be copied to term hdr * @is_tagged: DDP tagged or untagged */ static void irdma_bld_termhdr_ddp_rdma(u8 *pkt, struct irdma_terminate_hdr *hdr, int *copy_len, u8 *is_tagged) { u16 ddp_seg_len; ddp_seg_len = IRDMA_NTOHS(*(BE16 *) pkt); if (ddp_seg_len) { *copy_len = 2; hdr->hdrct = DDP_LEN_FLAG; if (pkt[2] & 0x80) { *is_tagged = 1; if (ddp_seg_len >= TERM_DDP_LEN_TAGGED) { *copy_len += TERM_DDP_LEN_TAGGED; hdr->hdrct |= DDP_HDR_FLAG; } } else { if (ddp_seg_len >= TERM_DDP_LEN_UNTAGGED) { *copy_len += TERM_DDP_LEN_UNTAGGED; hdr->hdrct |= DDP_HDR_FLAG; } if (ddp_seg_len >= (TERM_DDP_LEN_UNTAGGED + TERM_RDMA_LEN) && ((pkt[3] & RDMA_OPCODE_M) == RDMA_READ_REQ_OPCODE)) { *copy_len += TERM_RDMA_LEN; hdr->hdrct |= RDMA_HDR_FLAG; } } } } /** * irdma_bld_terminate_hdr - build terminate message header * @qp: qp associated with received terminate AE * @info: the struct contiaing AE information */ static int irdma_bld_terminate_hdr(struct irdma_sc_qp *qp, struct irdma_aeqe_info *info) { u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET; int copy_len = 0; u8 is_tagged = 0; u32 opcode; struct irdma_terminate_hdr *termhdr; termhdr = (struct irdma_terminate_hdr *)qp->q2_buf; memset(termhdr, 0, Q2_BAD_FRAME_OFFSET); if (info->q2_data_written) { pkt = irdma_locate_mpa(pkt); irdma_bld_termhdr_ddp_rdma(pkt, termhdr, ©_len, &is_tagged); } opcode = irdma_iwarp_opcode(info, pkt); qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; qp->sq_flush_code = info->sq; qp->rq_flush_code = info->rq; switch (info->ae_id) { case IRDMA_AE_AMP_UNALLOCATED_STAG: qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; if (opcode == IRDMA_OP_TYPE_RDMA_WRITE) irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_PROT_ERR, (LAYER_DDP << 4) | DDP_TAGGED_BUF, DDP_TAGGED_INV_STAG); else irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR, (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_INV_STAG); break; case IRDMA_AE_AMP_BOUNDS_VIOLATION: qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; if (info->q2_data_written) irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_PROT_ERR, (LAYER_DDP << 4) | DDP_TAGGED_BUF, DDP_TAGGED_BOUNDS); else irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR, (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_INV_BOUNDS); break; case IRDMA_AE_AMP_BAD_PD: switch (opcode) { case IRDMA_OP_TYPE_RDMA_WRITE: irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_PROT_ERR, (LAYER_DDP << 4) | DDP_TAGGED_BUF, DDP_TAGGED_UNASSOC_STAG); break; case IRDMA_OP_TYPE_SEND_INV: case IRDMA_OP_TYPE_SEND_SOL_INV: irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR, (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_CANT_INV_STAG); break; default: irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR, (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_UNASSOC_STAG); } break; case IRDMA_AE_AMP_INVALID_STAG: qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR, (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_INV_STAG); break; case IRDMA_AE_AMP_BAD_QP: irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_LOC_QP_OP_ERR, (LAYER_DDP << 4) | DDP_UNTAGGED_BUF, DDP_UNTAGGED_INV_QN); break; case IRDMA_AE_AMP_BAD_STAG_KEY: case IRDMA_AE_AMP_BAD_STAG_INDEX: qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; switch (opcode) { case IRDMA_OP_TYPE_SEND_INV: case IRDMA_OP_TYPE_SEND_SOL_INV: irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_OP_ERR, (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_CANT_INV_STAG); break; default: irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR, (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_INV_STAG); } break; case IRDMA_AE_AMP_RIGHTS_VIOLATION: case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS: case IRDMA_AE_PRIV_OPERATION_DENIED: qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR, (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_ACCESS); break; case IRDMA_AE_AMP_TO_WRAP: qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR, (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_TO_WRAP); break; case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR: irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR, (LAYER_MPA << 4) | DDP_LLP, MPA_CRC); break; case IRDMA_AE_LLP_SEGMENT_TOO_SMALL: irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_LOC_LEN_ERR, (LAYER_DDP << 4) | DDP_CATASTROPHIC, DDP_CATASTROPHIC_LOCAL); break; case IRDMA_AE_LCE_QP_CATASTROPHIC: case IRDMA_AE_DDP_NO_L_BIT: irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_FATAL_ERR, (LAYER_DDP << 4) | DDP_CATASTROPHIC, DDP_CATASTROPHIC_LOCAL); break; case IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN: irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR, (LAYER_DDP << 4) | DDP_UNTAGGED_BUF, DDP_UNTAGGED_INV_MSN_RANGE); break; case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_LOC_LEN_ERR, (LAYER_DDP << 4) | DDP_UNTAGGED_BUF, DDP_UNTAGGED_INV_TOO_LONG); break; case IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION: if (is_tagged) irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR, (LAYER_DDP << 4) | DDP_TAGGED_BUF, DDP_TAGGED_INV_DDP_VER); else irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR, (LAYER_DDP << 4) | DDP_UNTAGGED_BUF, DDP_UNTAGGED_INV_DDP_VER); break; case IRDMA_AE_DDP_UBE_INVALID_MO: irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR, (LAYER_DDP << 4) | DDP_UNTAGGED_BUF, DDP_UNTAGGED_INV_MO); break; case IRDMA_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE: irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_OP_ERR, (LAYER_DDP << 4) | DDP_UNTAGGED_BUF, DDP_UNTAGGED_INV_MSN_NO_BUF); break; case IRDMA_AE_DDP_UBE_INVALID_QN: irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR, (LAYER_DDP << 4) | DDP_UNTAGGED_BUF, DDP_UNTAGGED_INV_QN); break; case IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION: irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR, (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_INV_RDMAP_VER); break; default: irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_FATAL_ERR, (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_UNSPECIFIED); break; } if (copy_len) irdma_memcpy(termhdr + 1, pkt, copy_len); return sizeof(struct irdma_terminate_hdr) + copy_len; } /** * irdma_terminate_send_fin() - Send fin for terminate message * @qp: qp associated with received terminate AE */ void irdma_terminate_send_fin(struct irdma_sc_qp *qp) { irdma_term_modify_qp(qp, IRDMA_QP_STATE_TERMINATE, IRDMAQP_TERM_SEND_FIN_ONLY, 0); } /** * irdma_terminate_connection() - Bad AE and send terminate to remote QP * @qp: qp associated with received terminate AE * @info: the struct contiaing AE information */ void irdma_terminate_connection(struct irdma_sc_qp *qp, struct irdma_aeqe_info *info) { u8 termlen = 0; if (qp->term_flags & IRDMA_TERM_SENT) return; termlen = irdma_bld_terminate_hdr(qp, info); irdma_terminate_start_timer(qp); qp->term_flags |= IRDMA_TERM_SENT; irdma_term_modify_qp(qp, IRDMA_QP_STATE_TERMINATE, IRDMAQP_TERM_SEND_TERM_ONLY, termlen); } /** * irdma_terminate_received - handle terminate received AE * @qp: qp associated with received terminate AE * @info: the struct contiaing AE information */ void irdma_terminate_received(struct irdma_sc_qp *qp, struct irdma_aeqe_info *info) { u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET; BE32 *mpa; u8 ddp_ctl; u8 rdma_ctl; u16 aeq_id = 0; struct irdma_terminate_hdr *termhdr; mpa = (BE32 *) irdma_locate_mpa(pkt); if (info->q2_data_written) { /* did not validate the frame - do it now */ ddp_ctl = (ntohl(mpa[0]) >> 8) & 0xff; rdma_ctl = ntohl(mpa[0]) & 0xff; if ((ddp_ctl & 0xc0) != 0x40) aeq_id = IRDMA_AE_LCE_QP_CATASTROPHIC; else if ((ddp_ctl & 0x03) != 1) aeq_id = IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION; else if (ntohl(mpa[2]) != 2) aeq_id = IRDMA_AE_DDP_UBE_INVALID_QN; else if (ntohl(mpa[3]) != 1) aeq_id = IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN; else if (ntohl(mpa[4]) != 0) aeq_id = IRDMA_AE_DDP_UBE_INVALID_MO; else if ((rdma_ctl & 0xc0) != 0x40) aeq_id = IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION; info->ae_id = aeq_id; if (info->ae_id) { /* Bad terminate recvd - send back a terminate */ irdma_terminate_connection(qp, info); return; } } qp->term_flags |= IRDMA_TERM_RCVD; qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; termhdr = (struct irdma_terminate_hdr *)&mpa[5]; if (termhdr->layer_etype == RDMAP_REMOTE_PROT || termhdr->layer_etype == RDMAP_REMOTE_OP) { irdma_terminate_done(qp, 0); } else { irdma_terminate_start_timer(qp); irdma_terminate_send_fin(qp); } } static int irdma_null_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) { return 0; } static void irdma_null_ws_remove(struct irdma_sc_vsi *vsi, u8 user_pri) { /* do nothing */ } static void irdma_null_ws_reset(struct irdma_sc_vsi *vsi) { /* do nothing */ } /** * irdma_sc_vsi_init - Init the vsi structure * @vsi: pointer to vsi structure to initialize * @info: the info used to initialize the vsi struct */ void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi, struct irdma_vsi_init_info *info) { u8 i; vsi->dev = info->dev; vsi->back_vsi = info->back_vsi; vsi->register_qset = info->register_qset; vsi->unregister_qset = info->unregister_qset; vsi->mtu = info->params->mtu; vsi->exception_lan_q = info->exception_lan_q; vsi->vsi_idx = info->pf_data_vsi_num; irdma_set_qos_info(vsi, info->params); for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { mutex_init(&vsi->qos[i].qos_mutex); INIT_LIST_HEAD(&vsi->qos[i].qplist); } if (vsi->register_qset) { vsi->dev->ws_add = irdma_ws_add; vsi->dev->ws_remove = irdma_ws_remove; vsi->dev->ws_reset = irdma_ws_reset; } else { vsi->dev->ws_add = irdma_null_ws_add; vsi->dev->ws_remove = irdma_null_ws_remove; vsi->dev->ws_reset = irdma_null_ws_reset; } } /** * irdma_get_stats_idx - Return stats index * @vsi: pointer to the vsi */ static u16 irdma_get_stats_idx(struct irdma_sc_vsi *vsi){ struct irdma_stats_inst_info stats_info = {0}; struct irdma_sc_dev *dev = vsi->dev; if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { if (!irdma_cqp_stats_inst_cmd(vsi, IRDMA_OP_STATS_ALLOCATE, &stats_info)) return stats_info.stats_idx; } return IRDMA_INVALID_STATS_IDX; } /** * irdma_vsi_stats_init - Initialize the vsi statistics * @vsi: pointer to the vsi structure * @info: The info structure used for initialization */ int irdma_vsi_stats_init(struct irdma_sc_vsi *vsi, struct irdma_vsi_stats_info *info) { struct irdma_dma_mem *stats_buff_mem; vsi->pestat = info->pestat; vsi->pestat->hw = vsi->dev->hw; vsi->pestat->vsi = vsi; stats_buff_mem = &vsi->pestat->gather_info.stats_buff_mem; stats_buff_mem->size = IRDMA_GATHER_STATS_BUF_SIZE * 2; stats_buff_mem->va = irdma_allocate_dma_mem(vsi->pestat->hw, stats_buff_mem, stats_buff_mem->size, 1); if (!stats_buff_mem->va) return -ENOMEM; vsi->pestat->gather_info.gather_stats_va = stats_buff_mem->va; vsi->pestat->gather_info.last_gather_stats_va = (void *)((uintptr_t)stats_buff_mem->va + IRDMA_GATHER_STATS_BUF_SIZE); irdma_hw_stats_start_timer(vsi); /* when stat allocation is not required default to fcn_id. */ vsi->stats_idx = info->fcn_id; if (info->alloc_stats_inst) { u16 stats_idx = irdma_get_stats_idx(vsi); if (stats_idx != IRDMA_INVALID_STATS_IDX) { vsi->stats_inst_alloc = true; vsi->stats_idx = stats_idx; vsi->pestat->gather_info.use_stats_inst = true; vsi->pestat->gather_info.stats_inst_index = stats_idx; } } return 0; } /** * irdma_vsi_stats_free - Free the vsi stats * @vsi: pointer to the vsi structure */ void irdma_vsi_stats_free(struct irdma_sc_vsi *vsi) { struct irdma_stats_inst_info stats_info = {0}; struct irdma_sc_dev *dev = vsi->dev; if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { if (vsi->stats_inst_alloc) { stats_info.stats_idx = vsi->stats_idx; irdma_cqp_stats_inst_cmd(vsi, IRDMA_OP_STATS_FREE, &stats_info); } } if (!vsi->pestat) return; irdma_hw_stats_stop_timer(vsi); irdma_free_dma_mem(vsi->pestat->hw, &vsi->pestat->gather_info.stats_buff_mem); } /** * irdma_get_encoded_wqe_size - given wq size, returns hardware encoded size * @wqsize: size of the wq (sq, rq) to encoded_size * @queue_type: queue type selected for the calculation algorithm */ u8 irdma_get_encoded_wqe_size(u32 wqsize, enum irdma_queue_type queue_type) { u8 encoded_size = 0; /* * cqp sq's hw coded value starts from 1 for size of 4 while it starts from 0 for qp' wq's. */ if (queue_type == IRDMA_QUEUE_TYPE_CQP) encoded_size = 1; wqsize >>= 2; while (wqsize >>= 1) encoded_size++; return encoded_size; } /** * irdma_sc_gather_stats - collect the statistics * @cqp: struct for cqp hw * @info: gather stats info structure * @scratch: u64 saved to be used during cqp completion */ static int irdma_sc_gather_stats(struct irdma_sc_cqp *cqp, struct irdma_stats_gather_info *info, u64 scratch) { __le64 *wqe; u64 temp; if (info->stats_buff_mem.size < IRDMA_GATHER_STATS_BUF_SIZE) return -ENOSPC; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_40, FIELD_PREP(IRDMA_CQPSQ_STATS_HMC_FCN_INDEX, info->hmc_fcn_index)); set_64bit_val(wqe, IRDMA_BYTE_32, info->stats_buff_mem.pa); temp = FIELD_PREP(IRDMA_CQPSQ_STATS_WQEVALID, cqp->polarity) | FIELD_PREP(IRDMA_CQPSQ_STATS_USE_INST, info->use_stats_inst) | FIELD_PREP(IRDMA_CQPSQ_STATS_INST_INDEX, info->stats_inst_index) | FIELD_PREP(IRDMA_CQPSQ_STATS_USE_HMC_FCN_INDEX, info->use_hmc_fcn_index) | FIELD_PREP(IRDMA_CQPSQ_STATS_OP, IRDMA_CQP_OP_GATHER_STATS); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, temp); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_STATS, "GATHER_STATS WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); irdma_sc_cqp_post_sq(cqp); irdma_debug(cqp->dev, IRDMA_DEBUG_STATS, "CQP SQ head 0x%x tail 0x%x size 0x%x\n", cqp->sq_ring.head, cqp->sq_ring.tail, cqp->sq_ring.size); return 0; } /** * irdma_sc_manage_stats_inst - allocate or free stats instance * @cqp: struct for cqp hw * @info: stats info structure * @alloc: alloc vs. delete flag * @scratch: u64 saved to be used during cqp completion */ static int irdma_sc_manage_stats_inst(struct irdma_sc_cqp *cqp, struct irdma_stats_inst_info *info, bool alloc, u64 scratch) { __le64 *wqe; u64 temp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_40, FIELD_PREP(IRDMA_CQPSQ_STATS_HMC_FCN_INDEX, info->hmc_fn_id)); temp = FIELD_PREP(IRDMA_CQPSQ_STATS_WQEVALID, cqp->polarity) | FIELD_PREP(IRDMA_CQPSQ_STATS_ALLOC_INST, alloc) | FIELD_PREP(IRDMA_CQPSQ_STATS_USE_HMC_FCN_INDEX, info->use_hmc_fcn_index) | FIELD_PREP(IRDMA_CQPSQ_STATS_INST_INDEX, info->stats_idx) | FIELD_PREP(IRDMA_CQPSQ_STATS_OP, IRDMA_CQP_OP_MANAGE_STATS); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, temp); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "MANAGE_STATS WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_set_up_map - set the up map table * @cqp: struct for cqp hw * @info: User priority map info * @scratch: u64 saved to be used during cqp completion */ static int irdma_sc_set_up_map(struct irdma_sc_cqp *cqp, struct irdma_up_info *info, u64 scratch) { __le64 *wqe; - u64 temp; + u64 temp = 0; + int i; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; - temp = info->map[0] | LS_64_1(info->map[1], 8) | - LS_64_1(info->map[2], 16) | LS_64_1(info->map[3], 24) | - LS_64_1(info->map[4], 32) | LS_64_1(info->map[5], 40) | - LS_64_1(info->map[6], 48) | LS_64_1(info->map[7], 56); + for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) + temp |= (u64)info->map[i] << (i * 8); set_64bit_val(wqe, IRDMA_BYTE_0, temp); set_64bit_val(wqe, IRDMA_BYTE_40, FIELD_PREP(IRDMA_CQPSQ_UP_CNPOVERRIDE, info->cnp_up_override) | FIELD_PREP(IRDMA_CQPSQ_UP_HMCFCNIDX, info->hmc_fcn_idx)); temp = FIELD_PREP(IRDMA_CQPSQ_UP_WQEVALID, cqp->polarity) | FIELD_PREP(IRDMA_CQPSQ_UP_USEVLAN, info->use_vlan) | FIELD_PREP(IRDMA_CQPSQ_UP_USEOVERRIDE, info->use_cnp_up_override) | FIELD_PREP(IRDMA_CQPSQ_UP_OP, IRDMA_CQP_OP_UP_MAP); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, temp); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "UPMAP WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_manage_ws_node - create/modify/destroy WS node * @cqp: struct for cqp hw * @info: node info structure * @node_op: 0 for add 1 for modify, 2 for delete * @scratch: u64 saved to be used during cqp completion */ static int irdma_sc_manage_ws_node(struct irdma_sc_cqp *cqp, struct irdma_ws_node_info *info, enum irdma_ws_node_op node_op, u64 scratch) { __le64 *wqe; u64 temp = 0; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_32, FIELD_PREP(IRDMA_CQPSQ_WS_VSI, info->vsi) | FIELD_PREP(IRDMA_CQPSQ_WS_WEIGHT, info->weight)); temp = FIELD_PREP(IRDMA_CQPSQ_WS_WQEVALID, cqp->polarity) | FIELD_PREP(IRDMA_CQPSQ_WS_NODEOP, node_op) | FIELD_PREP(IRDMA_CQPSQ_WS_ENABLENODE, info->enable) | FIELD_PREP(IRDMA_CQPSQ_WS_NODETYPE, info->type_leaf) | FIELD_PREP(IRDMA_CQPSQ_WS_PRIOTYPE, info->prio_type) | FIELD_PREP(IRDMA_CQPSQ_WS_TC, info->tc) | FIELD_PREP(IRDMA_CQPSQ_WS_OP, IRDMA_CQP_OP_WORK_SCHED_NODE) | FIELD_PREP(IRDMA_CQPSQ_WS_PARENTID, info->parent_id) | FIELD_PREP(IRDMA_CQPSQ_WS_NODEID, info->id); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, temp); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "MANAGE_WS WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_qp_flush_wqes - flush qp's wqe * @qp: sc qp * @info: dlush information * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp, struct irdma_qp_flush_info *info, u64 scratch, bool post_sq) { u64 temp = 0; __le64 *wqe; struct irdma_sc_cqp *cqp; u64 hdr; bool flush_sq = false, flush_rq = false; if (info->rq && !qp->flush_rq) flush_rq = true; if (info->sq && !qp->flush_sq) flush_sq = true; qp->flush_sq |= flush_sq; qp->flush_rq |= flush_rq; if (!flush_sq && !flush_rq) { irdma_debug(qp->dev, IRDMA_DEBUG_CQP, - "Additional flush request ignored for qp %x\n", qp->qp_uk.qp_id); + "Additional flush request ignored for qp %x\n", + qp->qp_uk.qp_id); return -EALREADY; } cqp = qp->pd->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; if (info->userflushcode) { if (flush_rq) temp |= FIELD_PREP(IRDMA_CQPSQ_FWQE_RQMNERR, info->rq_minor_code) | FIELD_PREP(IRDMA_CQPSQ_FWQE_RQMJERR, info->rq_major_code); if (flush_sq) temp |= FIELD_PREP(IRDMA_CQPSQ_FWQE_SQMNERR, info->sq_minor_code) | FIELD_PREP(IRDMA_CQPSQ_FWQE_SQMJERR, info->sq_major_code); } set_64bit_val(wqe, IRDMA_BYTE_16, temp); temp = (info->generate_ae) ? info->ae_code | FIELD_PREP(IRDMA_CQPSQ_FWQE_AESOURCE, info->ae_src) : 0; set_64bit_val(wqe, IRDMA_BYTE_8, temp); hdr = qp->qp_uk.qp_id | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_FLUSH_WQES) | FIELD_PREP(IRDMA_CQPSQ_FWQE_GENERATE_AE, info->generate_ae) | FIELD_PREP(IRDMA_CQPSQ_FWQE_USERFLCODE, info->userflushcode) | FIELD_PREP(IRDMA_CQPSQ_FWQE_FLUSHSQ, flush_sq) | FIELD_PREP(IRDMA_CQPSQ_FWQE_FLUSHRQ, flush_rq) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "QP_FLUSH WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_gen_ae - generate AE, uses flush WQE CQP OP * @qp: sc qp * @info: gen ae information * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ static int irdma_sc_gen_ae(struct irdma_sc_qp *qp, struct irdma_gen_ae_info *info, u64 scratch, bool post_sq) { u64 temp; __le64 *wqe; struct irdma_sc_cqp *cqp; u64 hdr; cqp = qp->pd->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; temp = info->ae_code | FIELD_PREP(IRDMA_CQPSQ_FWQE_AESOURCE, info->ae_src); set_64bit_val(wqe, IRDMA_BYTE_8, temp); hdr = qp->qp_uk.qp_id | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_GEN_AE) | FIELD_PREP(IRDMA_CQPSQ_FWQE_GENERATE_AE, 1) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "GEN_AE WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /*** irdma_sc_qp_upload_context - upload qp's context * @dev: sc device struct * @info: upload context info ptr for return * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ static int irdma_sc_qp_upload_context(struct irdma_sc_dev *dev, struct irdma_upload_context_info *info, u64 scratch, bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; u64 hdr; cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_16, info->buf_pa); hdr = FIELD_PREP(IRDMA_CQPSQ_UCTX_QPID, info->qp_id) | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_UPLOAD_CONTEXT) | FIELD_PREP(IRDMA_CQPSQ_UCTX_QPTYPE, info->qp_type) | FIELD_PREP(IRDMA_CQPSQ_UCTX_RAWFORMAT, info->raw_format) | FIELD_PREP(IRDMA_CQPSQ_UCTX_FREEZEQP, info->freeze_qp) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(dev, IRDMA_DEBUG_WQE, "QP_UPLOAD_CTX WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_manage_push_page - Handle push page * @cqp: struct for cqp hw * @info: push page info * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ static int irdma_sc_manage_push_page(struct irdma_sc_cqp *cqp, struct irdma_cqp_manage_push_page_info *info, u64 scratch, bool post_sq) { __le64 *wqe; u64 hdr; if (info->free_page && info->push_idx >= cqp->dev->hw_attrs.max_hw_device_pages) return -EINVAL; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_16, info->qs_handle); hdr = FIELD_PREP(IRDMA_CQPSQ_MPP_PPIDX, info->push_idx) | FIELD_PREP(IRDMA_CQPSQ_MPP_PPTYPE, info->push_page_type) | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_PUSH_PAGES) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity) | FIELD_PREP(IRDMA_CQPSQ_MPP_FREE_PAGE, info->free_page); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "MANAGE_PUSH_PAGES WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_suspend_qp - suspend qp for param change * @cqp: struct for cqp hw * @qp: sc qp struct * @scratch: u64 saved to be used during cqp completion */ static int irdma_sc_suspend_qp(struct irdma_sc_cqp *cqp, struct irdma_sc_qp *qp, u64 scratch) { u64 hdr; __le64 *wqe; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; hdr = FIELD_PREP(IRDMA_CQPSQ_SUSPENDQP_QPID, qp->qp_uk.qp_id) | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_SUSPEND_QP) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "SUSPEND_QP WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_resume_qp - resume qp after suspend * @cqp: struct for cqp hw * @qp: sc qp struct * @scratch: u64 saved to be used during cqp completion */ static int irdma_sc_resume_qp(struct irdma_sc_cqp *cqp, struct irdma_sc_qp *qp, u64 scratch) { u64 hdr; __le64 *wqe; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMA_CQPSQ_RESUMEQP_QSHANDLE, qp->qs_handle)); hdr = FIELD_PREP(IRDMA_CQPSQ_RESUMEQP_QPID, qp->qp_uk.qp_id) | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_RESUME_QP) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "RESUME_QP WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_cq_ack - acknowledge completion q * @cq: cq struct */ static inline void irdma_sc_cq_ack(struct irdma_sc_cq *cq) { db_wr32(cq->cq_uk.cq_id, cq->cq_uk.cq_ack_db); } /** * irdma_sc_cq_init - initialize completion q * @cq: cq struct * @info: cq initialization info */ int irdma_sc_cq_init(struct irdma_sc_cq *cq, struct irdma_cq_init_info *info) { int ret_code; u32 pble_obj_cnt; pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt) return -EINVAL; cq->cq_pa = info->cq_base_pa; cq->dev = info->dev; cq->ceq_id = info->ceq_id; info->cq_uk_init_info.cqe_alloc_db = cq->dev->cq_arm_db; info->cq_uk_init_info.cq_ack_db = cq->dev->cq_ack_db; ret_code = irdma_uk_cq_init(&cq->cq_uk, &info->cq_uk_init_info); if (ret_code) return ret_code; cq->virtual_map = info->virtual_map; cq->pbl_chunk_size = info->pbl_chunk_size; cq->ceqe_mask = info->ceqe_mask; cq->cq_type = (info->type) ? info->type : IRDMA_CQ_TYPE_IWARP; cq->shadow_area_pa = info->shadow_area_pa; cq->shadow_read_threshold = info->shadow_read_threshold; cq->ceq_id_valid = info->ceq_id_valid; cq->tph_en = info->tph_en; cq->tph_val = info->tph_val; cq->first_pm_pbl_idx = info->first_pm_pbl_idx; cq->vsi = info->vsi; return 0; } /** * irdma_sc_cq_create - create completion q * @cq: cq struct * @scratch: u64 saved to be used during cqp completion * @check_overflow: flag for overflow check * @post_sq: flag for cqp db to ring */ static int irdma_sc_cq_create(struct irdma_sc_cq *cq, u64 scratch, bool check_overflow, bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; u64 hdr; struct irdma_sc_ceq *ceq; int ret_code = 0; cqp = cq->dev->cqp; if (cq->cq_uk.cq_id > (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt - 1)) return -EINVAL; if (cq->ceq_id > (cq->dev->hmc_fpm_misc.max_ceqs - 1)) return -EINVAL; ceq = cq->dev->ceq[cq->ceq_id]; - if (ceq && ceq->reg_cq) + if (ceq && ceq->reg_cq) { ret_code = irdma_sc_add_cq_ctx(ceq, cq); - - if (ret_code) - return ret_code; + if (ret_code) + return ret_code; + } wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) { if (ceq && ceq->reg_cq) irdma_sc_remove_cq_ctx(ceq, cq); return -ENOSPC; } set_64bit_val(wqe, IRDMA_BYTE_0, cq->cq_uk.cq_size); set_64bit_val(wqe, IRDMA_BYTE_8, RS_64_1(cq, 1)); set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD, cq->shadow_read_threshold)); set_64bit_val(wqe, IRDMA_BYTE_32, (cq->virtual_map ? 0 : cq->cq_pa)); set_64bit_val(wqe, IRDMA_BYTE_40, cq->shadow_area_pa); set_64bit_val(wqe, IRDMA_BYTE_48, FIELD_PREP(IRDMA_CQPSQ_CQ_FIRSTPMPBLIDX, (cq->virtual_map ? cq->first_pm_pbl_idx : 0))); set_64bit_val(wqe, IRDMA_BYTE_56, FIELD_PREP(IRDMA_CQPSQ_TPHVAL, cq->tph_val) | FIELD_PREP(IRDMA_CQPSQ_VSIIDX, cq->vsi->vsi_idx)); hdr = FLD_LS_64(cq->dev, cq->cq_uk.cq_id, IRDMA_CQPSQ_CQ_CQID) | FLD_LS_64(cq->dev, (cq->ceq_id_valid ? cq->ceq_id : 0), IRDMA_CQPSQ_CQ_CEQID) | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_CQ) | FIELD_PREP(IRDMA_CQPSQ_CQ_LPBLSIZE, cq->pbl_chunk_size) | FIELD_PREP(IRDMA_CQPSQ_CQ_CHKOVERFLOW, check_overflow) | FIELD_PREP(IRDMA_CQPSQ_CQ_VIRTMAP, cq->virtual_map) | FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, cq->ceqe_mask) | FIELD_PREP(IRDMA_CQPSQ_CQ_CEQIDVALID, cq->ceq_id_valid) | FIELD_PREP(IRDMA_CQPSQ_TPHEN, cq->tph_en) | FIELD_PREP(IRDMA_CQPSQ_CQ_AVOIDMEMCNFLCT, cq->cq_uk.avoid_mem_cflct) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "CQ_CREATE WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_cq_destroy - destroy completion q * @cq: cq struct * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ int irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, bool post_sq) { struct irdma_sc_cqp *cqp; __le64 *wqe; u64 hdr; struct irdma_sc_ceq *ceq; cqp = cq->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; ceq = cq->dev->ceq[cq->ceq_id]; if (ceq && ceq->reg_cq) irdma_sc_remove_cq_ctx(ceq, cq); set_64bit_val(wqe, IRDMA_BYTE_0, cq->cq_uk.cq_size); set_64bit_val(wqe, IRDMA_BYTE_8, RS_64_1(cq, 1)); set_64bit_val(wqe, IRDMA_BYTE_40, cq->shadow_area_pa); set_64bit_val(wqe, IRDMA_BYTE_48, (cq->virtual_map ? cq->first_pm_pbl_idx : 0)); hdr = cq->cq_uk.cq_id | FLD_LS_64(cq->dev, (cq->ceq_id_valid ? cq->ceq_id : 0), IRDMA_CQPSQ_CQ_CEQID) | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_CQ) | FIELD_PREP(IRDMA_CQPSQ_CQ_LPBLSIZE, cq->pbl_chunk_size) | FIELD_PREP(IRDMA_CQPSQ_CQ_VIRTMAP, cq->virtual_map) | FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, cq->ceqe_mask) | FIELD_PREP(IRDMA_CQPSQ_CQ_CEQIDVALID, cq->ceq_id_valid) | FIELD_PREP(IRDMA_CQPSQ_TPHEN, cq->tph_en) | FIELD_PREP(IRDMA_CQPSQ_CQ_AVOIDMEMCNFLCT, cq->cq_uk.avoid_mem_cflct) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "CQ_DESTROY WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_cq_resize - set resized cq buffer info * @cq: resized cq * @info: resized cq buffer info */ void irdma_sc_cq_resize(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *info) { cq->virtual_map = info->virtual_map; cq->cq_pa = info->cq_pa; cq->first_pm_pbl_idx = info->first_pm_pbl_idx; cq->pbl_chunk_size = info->pbl_chunk_size; irdma_uk_cq_resize(&cq->cq_uk, info->cq_base, info->cq_size); } /** * irdma_sc_cq_modify - modify a Completion Queue * @cq: cq struct * @info: modification info struct * @scratch: u64 saved to be used during cqp completion * @post_sq: flag to post to sq */ static int irdma_sc_cq_modify(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *info, u64 scratch, bool post_sq) { struct irdma_sc_cqp *cqp; __le64 *wqe; u64 hdr; u32 pble_obj_cnt; pble_obj_cnt = cq->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if (info->cq_resize && info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt) return -EINVAL; cqp = cq->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_0, info->cq_size); set_64bit_val(wqe, IRDMA_BYTE_8, RS_64_1(cq, 1)); set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD, info->shadow_read_threshold)); set_64bit_val(wqe, IRDMA_BYTE_32, info->cq_pa); set_64bit_val(wqe, IRDMA_BYTE_40, cq->shadow_area_pa); set_64bit_val(wqe, IRDMA_BYTE_48, info->first_pm_pbl_idx); set_64bit_val(wqe, IRDMA_BYTE_56, FIELD_PREP(IRDMA_CQPSQ_TPHVAL, cq->tph_val) | FIELD_PREP(IRDMA_CQPSQ_VSIIDX, cq->vsi->vsi_idx)); hdr = cq->cq_uk.cq_id | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MODIFY_CQ) | FIELD_PREP(IRDMA_CQPSQ_CQ_CQRESIZE, info->cq_resize) | FIELD_PREP(IRDMA_CQPSQ_CQ_LPBLSIZE, info->pbl_chunk_size) | FIELD_PREP(IRDMA_CQPSQ_CQ_CHKOVERFLOW, info->check_overflow) | FIELD_PREP(IRDMA_CQPSQ_CQ_VIRTMAP, info->virtual_map) | FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, cq->ceqe_mask) | FIELD_PREP(IRDMA_CQPSQ_TPHEN, cq->tph_en) | FIELD_PREP(IRDMA_CQPSQ_CQ_AVOIDMEMCNFLCT, cq->cq_uk.avoid_mem_cflct) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "CQ_MODIFY WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_check_cqp_progress - check cqp processing progress * @timeout: timeout info struct * @dev: sc device struct */ void irdma_check_cqp_progress(struct irdma_cqp_timeout *timeout, struct irdma_sc_dev *dev) { - if (timeout->compl_cqp_cmds != dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS]) { - timeout->compl_cqp_cmds = dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS]; + u64 completed_ops = atomic64_read(&dev->cqp->completed_ops); + + if (timeout->compl_cqp_cmds != completed_ops) { + timeout->compl_cqp_cmds = completed_ops; timeout->count = 0; - } else if (timeout->compl_cqp_cmds != - dev->cqp_cmd_stats[IRDMA_OP_REQ_CMDS]) { + } else if (timeout->compl_cqp_cmds != dev->cqp->requested_ops) { timeout->count++; } } /** * irdma_get_cqp_reg_info - get head and tail for cqp using registers * @cqp: struct for cqp hw * @val: cqp tail register value * @tail: wqtail register value * @error: cqp processing err */ static inline void irdma_get_cqp_reg_info(struct irdma_sc_cqp *cqp, u32 *val, u32 *tail, u32 *error) { *val = readl(cqp->dev->hw_regs[IRDMA_CQPTAIL]); *tail = FIELD_GET(IRDMA_CQPTAIL_WQTAIL, *val); *error = FIELD_GET(IRDMA_CQPTAIL_CQP_OP_ERR, *val); } /** * irdma_cqp_poll_registers - poll cqp registers * @cqp: struct for cqp hw * @tail: wqtail register value * @count: how many times to try for completion */ static int irdma_cqp_poll_registers(struct irdma_sc_cqp *cqp, u32 tail, u32 count) { u32 i = 0; u32 newtail, error, val; while (i++ < count) { irdma_get_cqp_reg_info(cqp, &val, &newtail, &error); if (error) { error = readl(cqp->dev->hw_regs[IRDMA_CQPERRCODES]); irdma_debug(cqp->dev, IRDMA_DEBUG_CQP, "CQPERRCODES error_code[x%08X]\n", error); return -EIO; } if (newtail != tail) { /* SUCCESS */ IRDMA_RING_MOVE_TAIL(cqp->sq_ring); - cqp->dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS]++; + atomic64_inc(&cqp->completed_ops); return 0; } irdma_usec_delay(cqp->dev->hw_attrs.max_sleep_count); } return -ETIMEDOUT; } /** * irdma_sc_decode_fpm_commit - decode a 64 bit value into count and base * @dev: sc device struct * @buf: pointer to commit buffer * @buf_idx: buffer index * @obj_info: object info pointer * @rsrc_idx: indexs of memory resource */ static u64 irdma_sc_decode_fpm_commit(struct irdma_sc_dev *dev, __le64 * buf, u32 buf_idx, struct irdma_hmc_obj_info *obj_info, u32 rsrc_idx){ u64 temp; get_64bit_val(buf, buf_idx, &temp); switch (rsrc_idx) { case IRDMA_HMC_IW_QP: obj_info[rsrc_idx].cnt = (u32)FIELD_GET(IRDMA_COMMIT_FPM_QPCNT, temp); break; case IRDMA_HMC_IW_CQ: obj_info[rsrc_idx].cnt = (u32)FLD_RS_64(dev, temp, IRDMA_COMMIT_FPM_CQCNT); break; case IRDMA_HMC_IW_APBVT_ENTRY: if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) obj_info[rsrc_idx].cnt = 1; else obj_info[rsrc_idx].cnt = 0; break; default: obj_info[rsrc_idx].cnt = (u32)temp; break; } obj_info[rsrc_idx].base = (u64)RS_64_1(temp, IRDMA_COMMIT_FPM_BASE_S) * 512; return temp; } /** * irdma_sc_parse_fpm_commit_buf - parse fpm commit buffer * @dev: pointer to dev struct * @buf: ptr to fpm commit buffer * @info: ptr to irdma_hmc_obj_info struct * @sd: number of SDs for HMC objects * * parses fpm commit info and copy base value * of hmc objects in hmc_info */ static void irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 * buf, struct irdma_hmc_obj_info *info, u32 *sd) { u64 size; u32 i; u64 max_base = 0; u32 last_hmc_obj = 0; irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_0, info, IRDMA_HMC_IW_QP); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_8, info, IRDMA_HMC_IW_CQ); /* skiping RSRVD */ irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_24, info, IRDMA_HMC_IW_HTE); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_32, info, IRDMA_HMC_IW_ARP); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_40, info, IRDMA_HMC_IW_APBVT_ENTRY); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_48, info, IRDMA_HMC_IW_MR); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_56, info, IRDMA_HMC_IW_XF); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_64, info, IRDMA_HMC_IW_XFFL); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_72, info, IRDMA_HMC_IW_Q1); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_80, info, IRDMA_HMC_IW_Q1FL); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_88, info, IRDMA_HMC_IW_TIMER); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_112, info, IRDMA_HMC_IW_PBLE); /* skipping RSVD. */ if (dev->hw_attrs.uk_attrs.hw_rev != IRDMA_GEN_1) { irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_96, info, IRDMA_HMC_IW_FSIMC); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_104, info, IRDMA_HMC_IW_FSIAV); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_128, info, IRDMA_HMC_IW_RRF); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_136, info, IRDMA_HMC_IW_RRFFL); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_144, info, IRDMA_HMC_IW_HDR); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_152, info, IRDMA_HMC_IW_MD); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_160, info, IRDMA_HMC_IW_OOISC); irdma_sc_decode_fpm_commit(dev, buf, IRDMA_BYTE_168, info, IRDMA_HMC_IW_OOISCFFL); } /* searching for the last object in HMC to find the size of the HMC area. */ for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++) { if (info[i].base > max_base) { max_base = info[i].base; last_hmc_obj = i; } } size = info[last_hmc_obj].cnt * info[last_hmc_obj].size + info[last_hmc_obj].base; if (size & 0x1FFFFF) *sd = (u32)((size >> 21) + 1); /* add 1 for remainder */ else *sd = (u32)(size >> 21); } /** * irdma_sc_decode_fpm_query() - Decode a 64 bit value into max count and size * @buf: ptr to fpm query buffer * @buf_idx: index into buf * @obj_info: ptr to irdma_hmc_obj_info struct * @rsrc_idx: resource index into info * * Decode a 64 bit value from fpm query buffer into max count and size */ static u64 irdma_sc_decode_fpm_query(__le64 * buf, u32 buf_idx, struct irdma_hmc_obj_info *obj_info, u32 rsrc_idx){ u64 temp; u32 size; get_64bit_val(buf, buf_idx, &temp); obj_info[rsrc_idx].max_cnt = (u32)temp; size = (u32)RS_64_1(temp, 32); obj_info[rsrc_idx].size = LS_64_1(1, size); return temp; } /** * irdma_sc_parse_fpm_query_buf() - parses fpm query buffer * @dev: ptr to shared code device * @buf: ptr to fpm query buffer * @hmc_info: ptr to irdma_hmc_obj_info struct * @hmc_fpm_misc: ptr to fpm data * * parses fpm query buffer and copy max_cnt and * size value of hmc objects in hmc_info */ static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 * buf, struct irdma_hmc_info *hmc_info, struct irdma_hmc_fpm_misc *hmc_fpm_misc) { struct irdma_hmc_obj_info *obj_info; u64 temp; u32 size; u16 max_pe_sds; obj_info = hmc_info->hmc_obj; get_64bit_val(buf, IRDMA_BYTE_0, &temp); hmc_info->first_sd_index = (u16)FIELD_GET(IRDMA_QUERY_FPM_FIRST_PE_SD_INDEX, temp); max_pe_sds = (u16)FIELD_GET(IRDMA_QUERY_FPM_MAX_PE_SDS, temp); hmc_fpm_misc->max_sds = max_pe_sds; hmc_info->sd_table.sd_cnt = max_pe_sds + hmc_info->first_sd_index; get_64bit_val(buf, 8, &temp); obj_info[IRDMA_HMC_IW_QP].max_cnt = (u32)FIELD_GET(IRDMA_QUERY_FPM_MAX_QPS, temp); size = (u32)RS_64_1(temp, 32); obj_info[IRDMA_HMC_IW_QP].size = LS_64_1(1, size); get_64bit_val(buf, 16, &temp); obj_info[IRDMA_HMC_IW_CQ].max_cnt = (u32)FIELD_GET(IRDMA_QUERY_FPM_MAX_CQS, temp); size = (u32)RS_64_1(temp, 32); obj_info[IRDMA_HMC_IW_CQ].size = LS_64_1(1, size); irdma_sc_decode_fpm_query(buf, 32, obj_info, IRDMA_HMC_IW_HTE); irdma_sc_decode_fpm_query(buf, 40, obj_info, IRDMA_HMC_IW_ARP); obj_info[IRDMA_HMC_IW_APBVT_ENTRY].size = 8192; obj_info[IRDMA_HMC_IW_APBVT_ENTRY].max_cnt = 1; irdma_sc_decode_fpm_query(buf, 48, obj_info, IRDMA_HMC_IW_MR); irdma_sc_decode_fpm_query(buf, 56, obj_info, IRDMA_HMC_IW_XF); get_64bit_val(buf, 64, &temp); obj_info[IRDMA_HMC_IW_XFFL].max_cnt = (u32)temp; obj_info[IRDMA_HMC_IW_XFFL].size = 4; hmc_fpm_misc->xf_block_size = FIELD_GET(IRDMA_QUERY_FPM_XFBLOCKSIZE, temp); if (!hmc_fpm_misc->xf_block_size) return -EINVAL; irdma_sc_decode_fpm_query(buf, 72, obj_info, IRDMA_HMC_IW_Q1); get_64bit_val(buf, 80, &temp); obj_info[IRDMA_HMC_IW_Q1FL].max_cnt = (u32)temp; obj_info[IRDMA_HMC_IW_Q1FL].size = 4; hmc_fpm_misc->q1_block_size = FIELD_GET(IRDMA_QUERY_FPM_Q1BLOCKSIZE, temp); if (!hmc_fpm_misc->q1_block_size) return -EINVAL; irdma_sc_decode_fpm_query(buf, 88, obj_info, IRDMA_HMC_IW_TIMER); get_64bit_val(buf, 112, &temp); obj_info[IRDMA_HMC_IW_PBLE].max_cnt = (u32)temp; obj_info[IRDMA_HMC_IW_PBLE].size = 8; get_64bit_val(buf, 120, &temp); hmc_fpm_misc->max_ceqs = FIELD_GET(IRDMA_QUERY_FPM_MAX_CEQS, temp); hmc_fpm_misc->ht_multiplier = FIELD_GET(IRDMA_QUERY_FPM_HTMULTIPLIER, temp); hmc_fpm_misc->timer_bucket = FIELD_GET(IRDMA_QUERY_FPM_TIMERBUCKET, temp); if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) return 0; irdma_sc_decode_fpm_query(buf, 96, obj_info, IRDMA_HMC_IW_FSIMC); irdma_sc_decode_fpm_query(buf, 104, obj_info, IRDMA_HMC_IW_FSIAV); irdma_sc_decode_fpm_query(buf, 128, obj_info, IRDMA_HMC_IW_RRF); get_64bit_val(buf, IRDMA_BYTE_136, &temp); obj_info[IRDMA_HMC_IW_RRFFL].max_cnt = (u32)temp; obj_info[IRDMA_HMC_IW_RRFFL].size = 4; hmc_fpm_misc->rrf_block_size = FIELD_GET(IRDMA_QUERY_FPM_RRFBLOCKSIZE, temp); if (!hmc_fpm_misc->rrf_block_size && obj_info[IRDMA_HMC_IW_RRFFL].max_cnt) return -EINVAL; irdma_sc_decode_fpm_query(buf, 144, obj_info, IRDMA_HMC_IW_HDR); irdma_sc_decode_fpm_query(buf, 152, obj_info, IRDMA_HMC_IW_MD); irdma_sc_decode_fpm_query(buf, 160, obj_info, IRDMA_HMC_IW_OOISC); get_64bit_val(buf, IRDMA_BYTE_168, &temp); obj_info[IRDMA_HMC_IW_OOISCFFL].max_cnt = (u32)temp; obj_info[IRDMA_HMC_IW_OOISCFFL].size = 4; hmc_fpm_misc->ooiscf_block_size = FIELD_GET(IRDMA_QUERY_FPM_OOISCFBLOCKSIZE, temp); if (!hmc_fpm_misc->ooiscf_block_size && obj_info[IRDMA_HMC_IW_OOISCFFL].max_cnt) return -EINVAL; return 0; } /** * irdma_sc_find_reg_cq - find cq ctx index * @ceq: ceq sc structure * @cq: cq sc structure */ static u32 irdma_sc_find_reg_cq(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq){ u32 i; for (i = 0; i < ceq->reg_cq_size; i++) { if (cq == ceq->reg_cq[i]) return i; } return IRDMA_INVALID_CQ_IDX; } /** * irdma_sc_add_cq_ctx - add cq ctx tracking for ceq * @ceq: ceq sc structure * @cq: cq sc structure */ int irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq) { unsigned long flags; spin_lock_irqsave(&ceq->req_cq_lock, flags); if (ceq->reg_cq_size == ceq->elem_cnt) { spin_unlock_irqrestore(&ceq->req_cq_lock, flags); return -ENOSPC; } ceq->reg_cq[ceq->reg_cq_size++] = cq; spin_unlock_irqrestore(&ceq->req_cq_lock, flags); return 0; } /** * irdma_sc_remove_cq_ctx - remove cq ctx tracking for ceq * @ceq: ceq sc structure * @cq: cq sc structure */ void irdma_sc_remove_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq) { unsigned long flags; u32 cq_ctx_idx; spin_lock_irqsave(&ceq->req_cq_lock, flags); cq_ctx_idx = irdma_sc_find_reg_cq(ceq, cq); if (cq_ctx_idx == IRDMA_INVALID_CQ_IDX) goto exit; ceq->reg_cq_size--; if (cq_ctx_idx != ceq->reg_cq_size) ceq->reg_cq[cq_ctx_idx] = ceq->reg_cq[ceq->reg_cq_size]; ceq->reg_cq[ceq->reg_cq_size] = NULL; exit: spin_unlock_irqrestore(&ceq->req_cq_lock, flags); } /** * irdma_sc_cqp_init - Initialize buffers for a control Queue Pair * @cqp: IWARP control queue pair pointer * @info: IWARP control queue pair init info pointer * * Initializes the object and context buffers for a control Queue Pair. */ int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, struct irdma_cqp_init_info *info) { u8 hw_sq_size; if (info->sq_size > IRDMA_CQP_SW_SQSIZE_2048 || info->sq_size < IRDMA_CQP_SW_SQSIZE_4 || ((info->sq_size & (info->sq_size - 1)))) return -EINVAL; hw_sq_size = irdma_get_encoded_wqe_size(info->sq_size, IRDMA_QUEUE_TYPE_CQP); cqp->size = sizeof(*cqp); cqp->sq_size = info->sq_size; cqp->hw_sq_size = hw_sq_size; cqp->sq_base = info->sq; cqp->host_ctx = info->host_ctx; cqp->sq_pa = info->sq_pa; cqp->host_ctx_pa = info->host_ctx_pa; cqp->dev = info->dev; cqp->struct_ver = info->struct_ver; cqp->hw_maj_ver = info->hw_maj_ver; cqp->hw_min_ver = info->hw_min_ver; cqp->scratch_array = info->scratch_array; cqp->polarity = 0; cqp->en_datacenter_tcp = info->en_datacenter_tcp; cqp->ena_vf_count = info->ena_vf_count; cqp->hmc_profile = info->hmc_profile; cqp->ceqs_per_vf = info->ceqs_per_vf; cqp->disable_packed = info->disable_packed; cqp->rocev2_rto_policy = info->rocev2_rto_policy; cqp->protocol_used = info->protocol_used; irdma_memcpy(&cqp->dcqcn_params, &info->dcqcn_params, sizeof(cqp->dcqcn_params)); cqp->en_rem_endpoint_trk = info->en_rem_endpoint_trk; info->dev->cqp = cqp; IRDMA_RING_INIT(cqp->sq_ring, cqp->sq_size); - cqp->dev->cqp_cmd_stats[IRDMA_OP_REQ_CMDS] = 0; - cqp->dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS] = 0; + cqp->requested_ops = 0; + atomic64_set(&cqp->completed_ops, 0); /* for the cqp commands backlog. */ INIT_LIST_HEAD(&cqp->dev->cqp_cmd_head); writel(0, cqp->dev->hw_regs[IRDMA_CQPTAIL]); writel(0, cqp->dev->hw_regs[IRDMA_CQPDB]); writel(0, cqp->dev->hw_regs[IRDMA_CCQPSTATUS]); irdma_debug(cqp->dev, IRDMA_DEBUG_WQE, "sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%llxh] cqp[%p] polarity[x%04x]\n", - cqp->sq_size, cqp->hw_sq_size, cqp->sq_base, (unsigned long long)cqp->sq_pa, cqp, - cqp->polarity); + cqp->sq_size, cqp->hw_sq_size, cqp->sq_base, + (unsigned long long)cqp->sq_pa, cqp, cqp->polarity); return 0; } /** * irdma_sc_cqp_create - create cqp during bringup * @cqp: struct for cqp hw * @maj_err: If error, major err number * @min_err: If error, minor err number */ int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err) { u64 temp; u8 hw_rev; u32 cnt = 0, p1, p2, val = 0, err_code; int ret_code; hw_rev = cqp->dev->hw_attrs.uk_attrs.hw_rev; cqp->sdbuf.size = IRDMA_UPDATE_SD_BUFF_SIZE * cqp->sq_size; cqp->sdbuf.va = irdma_allocate_dma_mem(cqp->dev->hw, &cqp->sdbuf, cqp->sdbuf.size, IRDMA_SD_BUF_ALIGNMENT); if (!cqp->sdbuf.va) return -ENOMEM; spin_lock_init(&cqp->dev->cqp_lock); temp = FIELD_PREP(IRDMA_CQPHC_SQSIZE, cqp->hw_sq_size) | FIELD_PREP(IRDMA_CQPHC_SVER, cqp->struct_ver) | FIELD_PREP(IRDMA_CQPHC_DISABLE_PFPDUS, cqp->disable_packed) | FIELD_PREP(IRDMA_CQPHC_CEQPERVF, cqp->ceqs_per_vf); if (hw_rev >= IRDMA_GEN_2) { temp |= FIELD_PREP(IRDMA_CQPHC_ROCEV2_RTO_POLICY, cqp->rocev2_rto_policy) | FIELD_PREP(IRDMA_CQPHC_PROTOCOL_USED, cqp->protocol_used); } set_64bit_val(cqp->host_ctx, IRDMA_BYTE_0, temp); set_64bit_val(cqp->host_ctx, IRDMA_BYTE_8, cqp->sq_pa); temp = FIELD_PREP(IRDMA_CQPHC_ENABLED_VFS, cqp->ena_vf_count) | FIELD_PREP(IRDMA_CQPHC_HMC_PROFILE, cqp->hmc_profile); if (hw_rev >= IRDMA_GEN_2) temp |= FIELD_PREP(IRDMA_CQPHC_EN_REM_ENDPOINT_TRK, cqp->en_rem_endpoint_trk); set_64bit_val(cqp->host_ctx, IRDMA_BYTE_16, temp); set_64bit_val(cqp->host_ctx, IRDMA_BYTE_24, (uintptr_t)cqp); temp = FIELD_PREP(IRDMA_CQPHC_HW_MAJVER, cqp->hw_maj_ver) | FIELD_PREP(IRDMA_CQPHC_HW_MINVER, cqp->hw_min_ver); if (hw_rev >= IRDMA_GEN_2) { temp |= FIELD_PREP(IRDMA_CQPHC_MIN_RATE, cqp->dcqcn_params.min_rate) | FIELD_PREP(IRDMA_CQPHC_MIN_DEC_FACTOR, cqp->dcqcn_params.min_dec_factor); } set_64bit_val(cqp->host_ctx, IRDMA_BYTE_32, temp); set_64bit_val(cqp->host_ctx, IRDMA_BYTE_40, 0); temp = 0; if (hw_rev >= IRDMA_GEN_2) { temp |= FIELD_PREP(IRDMA_CQPHC_DCQCN_T, cqp->dcqcn_params.dcqcn_t) | FIELD_PREP(IRDMA_CQPHC_RAI_FACTOR, cqp->dcqcn_params.rai_factor) | FIELD_PREP(IRDMA_CQPHC_HAI_FACTOR, cqp->dcqcn_params.hai_factor); } set_64bit_val(cqp->host_ctx, IRDMA_BYTE_48, temp); temp = 0; if (hw_rev >= IRDMA_GEN_2) { temp |= FIELD_PREP(IRDMA_CQPHC_DCQCN_B, cqp->dcqcn_params.dcqcn_b) | FIELD_PREP(IRDMA_CQPHC_DCQCN_F, cqp->dcqcn_params.dcqcn_f) | FIELD_PREP(IRDMA_CQPHC_CC_CFG_VALID, cqp->dcqcn_params.cc_cfg_valid) | FIELD_PREP(IRDMA_CQPHC_RREDUCE_MPERIOD, cqp->dcqcn_params.rreduce_mperiod); } set_64bit_val(cqp->host_ctx, IRDMA_BYTE_56, temp); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "CQP_HOST_CTX WQE", cqp->host_ctx, IRDMA_CQP_CTX_SIZE * 8); p1 = RS_32_1(cqp->host_ctx_pa, 32); p2 = (u32)cqp->host_ctx_pa; writel(p1, cqp->dev->hw_regs[IRDMA_CCQPHIGH]); writel(p2, cqp->dev->hw_regs[IRDMA_CCQPLOW]); do { if (cnt++ > cqp->dev->hw_attrs.max_done_count) { ret_code = -ETIMEDOUT; goto err; } irdma_usec_delay(cqp->dev->hw_attrs.max_sleep_count); val = readl(cqp->dev->hw_regs[IRDMA_CCQPSTATUS]); } while (!val); if (FLD_RS_32(cqp->dev, val, IRDMA_CCQPSTATUS_CCQP_ERR)) { ret_code = -EOPNOTSUPP; goto err; } cqp->process_cqp_sds = irdma_update_sds_noccq; return 0; err: spin_lock_destroy(&cqp->dev->cqp_lock); irdma_free_dma_mem(cqp->dev->hw, &cqp->sdbuf); err_code = readl(cqp->dev->hw_regs[IRDMA_CQPERRCODES]); *min_err = FIELD_GET(IRDMA_CQPERRCODES_CQP_MINOR_CODE, err_code); *maj_err = FIELD_GET(IRDMA_CQPERRCODES_CQP_MAJOR_CODE, err_code); return ret_code; } /** * irdma_sc_cqp_post_sq - post of cqp's sq * @cqp: struct for cqp hw */ void irdma_sc_cqp_post_sq(struct irdma_sc_cqp *cqp) { db_wr32(IRDMA_RING_CURRENT_HEAD(cqp->sq_ring), cqp->dev->cqp_db); irdma_debug(cqp->dev, IRDMA_DEBUG_WQE, "CQP SQ head 0x%x tail 0x%x size 0x%x\n", cqp->sq_ring.head, cqp->sq_ring.tail, cqp->sq_ring.size); } /** * irdma_sc_cqp_get_next_send_wqe_idx - get next wqe on cqp sq * and pass back index * @cqp: CQP HW structure * @scratch: private data for CQP WQE * @wqe_idx: WQE index of CQP SQ */ __le64 * irdma_sc_cqp_get_next_send_wqe_idx(struct irdma_sc_cqp *cqp, u64 scratch, u32 *wqe_idx) { __le64 *wqe = NULL; int ret_code; if (IRDMA_RING_FULL_ERR(cqp->sq_ring)) { irdma_debug(cqp->dev, IRDMA_DEBUG_WQE, "CQP SQ is full, head 0x%x tail 0x%x size 0x%x\n", cqp->sq_ring.head, cqp->sq_ring.tail, cqp->sq_ring.size); return NULL; } IRDMA_ATOMIC_RING_MOVE_HEAD(cqp->sq_ring, *wqe_idx, ret_code); if (ret_code) return NULL; - cqp->dev->cqp_cmd_stats[IRDMA_OP_REQ_CMDS]++; + cqp->requested_ops++; if (!*wqe_idx) cqp->polarity = !cqp->polarity; wqe = cqp->sq_base[*wqe_idx].elem; cqp->scratch_array[*wqe_idx] = scratch; memset(&wqe[0], 0, 24); memset(&wqe[4], 0, 32); return wqe; } /** * irdma_sc_cqp_destroy - destroy cqp during close * @cqp: struct for cqp hw * @free_hwcqp: true for regular cqp destroy; false for reset path */ int irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp, bool free_hwcqp) { u32 cnt = 0, val; int ret_code = 0; if (free_hwcqp) { writel(0, cqp->dev->hw_regs[IRDMA_CCQPHIGH]); writel(0, cqp->dev->hw_regs[IRDMA_CCQPLOW]); do { if (cnt++ > cqp->dev->hw_attrs.max_done_count) { ret_code = -ETIMEDOUT; break; } irdma_usec_delay(cqp->dev->hw_attrs.max_sleep_count); val = readl(cqp->dev->hw_regs[IRDMA_CCQPSTATUS]); } while (FLD_RS_32(cqp->dev, val, IRDMA_CCQPSTATUS_CCQP_DONE)); } irdma_free_dma_mem(cqp->dev->hw, &cqp->sdbuf); spin_lock_destroy(&cqp->dev->cqp_lock); return ret_code; } /** * irdma_sc_ccq_arm - enable intr for control cq * @ccq: ccq sc struct */ void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq) { u64 temp_val; u16 sw_cq_sel; u8 arm_next_se; u8 arm_seq_num; get_64bit_val(ccq->cq_uk.shadow_area, IRDMA_BYTE_32, &temp_val); sw_cq_sel = (u16)FIELD_GET(IRDMA_CQ_DBSA_SW_CQ_SELECT, temp_val); arm_next_se = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT_SE, temp_val); arm_seq_num = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_SEQ_NUM, temp_val); arm_seq_num++; temp_val = FIELD_PREP(IRDMA_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) | FIELD_PREP(IRDMA_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) | FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT_SE, arm_next_se) | FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT, 1); set_64bit_val(ccq->cq_uk.shadow_area, IRDMA_BYTE_32, temp_val); irdma_wmb(); /* make sure shadow area is updated before arming */ db_wr32(ccq->cq_uk.cq_id, ccq->dev->cq_arm_db); } /** * irdma_sc_ccq_get_cqe_info - get ccq's cq entry * @ccq: ccq sc struct * @info: completion q entry to return */ int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, struct irdma_ccq_cqe_info *info) { u64 qp_ctx, temp, temp1; __le64 *cqe; struct irdma_sc_cqp *cqp; u32 wqe_idx; u32 error; u8 polarity; int ret_code = 0; if (ccq->cq_uk.avoid_mem_cflct) cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(&ccq->cq_uk); else cqe = IRDMA_GET_CURRENT_CQ_ELEM(&ccq->cq_uk); get_64bit_val(cqe, IRDMA_BYTE_24, &temp); polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, temp); if (polarity != ccq->cq_uk.polarity) return -ENOENT; + /* Ensure CEQE contents are read after valid bit is checked */ + rmb(); + get_64bit_val(cqe, IRDMA_BYTE_8, &qp_ctx); cqp = (struct irdma_sc_cqp *)(irdma_uintptr) qp_ctx; info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, temp); info->maj_err_code = IRDMA_CQPSQ_MAJ_NO_ERROR; info->min_err_code = (u16)FIELD_GET(IRDMA_CQ_MINERR, temp); if (info->error) { info->maj_err_code = (u16)FIELD_GET(IRDMA_CQ_MAJERR, temp); error = readl(cqp->dev->hw_regs[IRDMA_CQPERRCODES]); irdma_debug(cqp->dev, IRDMA_DEBUG_CQP, "CQPERRCODES error_code[x%08X]\n", error); } wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, temp); info->scratch = cqp->scratch_array[wqe_idx]; get_64bit_val(cqe, IRDMA_BYTE_16, &temp1); info->op_ret_val = (u32)FIELD_GET(IRDMA_CCQ_OPRETVAL, temp1); get_64bit_val(cqp->sq_base[wqe_idx].elem, IRDMA_BYTE_24, &temp1); info->op_code = (u8)FIELD_GET(IRDMA_CQPSQ_OPCODE, temp1); info->cqp = cqp; /* move the head for cq */ IRDMA_RING_MOVE_HEAD(ccq->cq_uk.cq_ring, ret_code); if (!IRDMA_RING_CURRENT_HEAD(ccq->cq_uk.cq_ring)) ccq->cq_uk.polarity ^= 1; /* update cq tail in cq shadow memory also */ IRDMA_RING_MOVE_TAIL(ccq->cq_uk.cq_ring); set_64bit_val(ccq->cq_uk.shadow_area, IRDMA_BYTE_0, IRDMA_RING_CURRENT_HEAD(ccq->cq_uk.cq_ring)); irdma_wmb(); /* make sure shadow area is updated before moving tail */ IRDMA_RING_MOVE_TAIL(cqp->sq_ring); - ccq->dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS]++; + atomic64_inc(&cqp->completed_ops); return ret_code; } /** * irdma_sc_poll_for_cqp_op_done - Waits for last write to complete in CQP SQ * @cqp: struct for cqp hw * @op_code: cqp opcode for completion * @compl_info: completion q entry to return */ int irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u8 op_code, struct irdma_ccq_cqe_info *compl_info) { struct irdma_ccq_cqe_info info = {0}; struct irdma_sc_cq *ccq; int ret_code = 0; u32 cnt = 0; ccq = cqp->dev->ccq; while (1) { if (cnt++ > 100 * cqp->dev->hw_attrs.max_done_count) return -ETIMEDOUT; if (irdma_sc_ccq_get_cqe_info(ccq, &info)) { irdma_usec_delay(cqp->dev->hw_attrs.max_sleep_count); continue; } if (info.error && info.op_code != IRDMA_CQP_OP_QUERY_STAG) { ret_code = -EIO; break; } /* make sure op code matches */ if (op_code == info.op_code) break; irdma_debug(cqp->dev, IRDMA_DEBUG_WQE, "opcode mismatch for my op code 0x%x, returned opcode %x\n", op_code, info.op_code); } if (compl_info) irdma_memcpy(compl_info, &info, sizeof(*compl_info)); return ret_code; } /** * irdma_sc_manage_hmc_pm_func_table - manage of function table * @cqp: struct for cqp hw * @scratch: u64 saved to be used during cqp completion * @info: info for the manage function table operation * @post_sq: flag for cqp db to ring */ static int irdma_sc_manage_hmc_pm_func_table(struct irdma_sc_cqp *cqp, struct irdma_hmc_fcn_info *info, u64 scratch, bool post_sq) { __le64 *wqe; u64 hdr; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; hdr = FIELD_PREP(IRDMA_CQPSQ_MHMC_VFIDX, info->vf_id) | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE) | FIELD_PREP(IRDMA_CQPSQ_MHMC_FREEPMFN, info->free_fcn) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "MANAGE_HMC_PM_FUNC_TABLE WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_commit_fpm_val_done - wait for cqp eqe completion * for fpm commit * @cqp: struct for cqp hw */ static int irdma_sc_commit_fpm_val_done(struct irdma_sc_cqp *cqp) { return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_COMMIT_FPM_VAL, NULL); } /** * irdma_sc_commit_fpm_val - cqp wqe for commit fpm values * @cqp: struct for cqp hw * @scratch: u64 saved to be used during cqp completion * @hmc_fn_id: hmc function id * @commit_fpm_mem: Memory for fpm values * @post_sq: flag for cqp db to ring * @wait_type: poll ccq or cqp registers for cqp completion */ static int irdma_sc_commit_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch, u16 hmc_fn_id, struct irdma_dma_mem *commit_fpm_mem, bool post_sq, u8 wait_type) { __le64 *wqe; u64 hdr; u32 tail, val, error; int ret_code = 0; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_16, hmc_fn_id); set_64bit_val(wqe, IRDMA_BYTE_32, commit_fpm_mem->pa); hdr = FIELD_PREP(IRDMA_CQPSQ_BUFSIZE, IRDMA_COMMIT_FPM_BUF_SIZE) | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_COMMIT_FPM_VAL) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "COMMIT_FPM_VAL WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); irdma_get_cqp_reg_info(cqp, &val, &tail, &error); if (post_sq) { irdma_sc_cqp_post_sq(cqp); if (wait_type == IRDMA_CQP_WAIT_POLL_REGS) ret_code = irdma_cqp_poll_registers(cqp, tail, cqp->dev->hw_attrs.max_done_count); else if (wait_type == IRDMA_CQP_WAIT_POLL_CQ) ret_code = irdma_sc_commit_fpm_val_done(cqp); } return ret_code; } /** * irdma_sc_query_fpm_val_done - poll for cqp wqe completion for * query fpm * @cqp: struct for cqp hw */ static int irdma_sc_query_fpm_val_done(struct irdma_sc_cqp *cqp) { return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_QUERY_FPM_VAL, NULL); } /** * irdma_sc_query_fpm_val - cqp wqe query fpm values * @cqp: struct for cqp hw * @scratch: u64 saved to be used during cqp completion * @hmc_fn_id: hmc function id * @query_fpm_mem: memory for return fpm values * @post_sq: flag for cqp db to ring * @wait_type: poll ccq or cqp registers for cqp completion */ static int irdma_sc_query_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch, u16 hmc_fn_id, struct irdma_dma_mem *query_fpm_mem, bool post_sq, u8 wait_type) { __le64 *wqe; u64 hdr; u32 tail, val, error; int ret_code = 0; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_16, hmc_fn_id); set_64bit_val(wqe, IRDMA_BYTE_32, query_fpm_mem->pa); hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_QUERY_FPM_VAL) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "QUERY_FPM WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); irdma_get_cqp_reg_info(cqp, &val, &tail, &error); if (post_sq) { irdma_sc_cqp_post_sq(cqp); if (wait_type == IRDMA_CQP_WAIT_POLL_REGS) ret_code = irdma_cqp_poll_registers(cqp, tail, cqp->dev->hw_attrs.max_done_count); else if (wait_type == IRDMA_CQP_WAIT_POLL_CQ) ret_code = irdma_sc_query_fpm_val_done(cqp); } return ret_code; } /** * irdma_sc_ceq_init - initialize ceq * @ceq: ceq sc structure * @info: ceq initialization info */ int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq, struct irdma_ceq_init_info *info) { u32 pble_obj_cnt; if (info->elem_cnt < info->dev->hw_attrs.min_hw_ceq_size || info->elem_cnt > info->dev->hw_attrs.max_hw_ceq_size) return -EINVAL; if (info->ceq_id > (info->dev->hmc_fpm_misc.max_ceqs - 1)) return -EINVAL; pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt) return -EINVAL; ceq->size = sizeof(*ceq); ceq->ceqe_base = (struct irdma_ceqe *)info->ceqe_base; ceq->ceq_id = info->ceq_id; ceq->dev = info->dev; ceq->elem_cnt = info->elem_cnt; ceq->ceq_elem_pa = info->ceqe_pa; ceq->virtual_map = info->virtual_map; ceq->itr_no_expire = info->itr_no_expire; ceq->reg_cq = info->reg_cq; ceq->reg_cq_size = 0; spin_lock_init(&ceq->req_cq_lock); ceq->pbl_chunk_size = (ceq->virtual_map ? info->pbl_chunk_size : 0); ceq->first_pm_pbl_idx = (ceq->virtual_map ? info->first_pm_pbl_idx : 0); ceq->pbl_list = (ceq->virtual_map ? info->pbl_list : NULL); ceq->tph_en = info->tph_en; ceq->tph_val = info->tph_val; ceq->vsi = info->vsi; ceq->polarity = 1; IRDMA_RING_INIT(ceq->ceq_ring, ceq->elem_cnt); ceq->dev->ceq[info->ceq_id] = ceq; return 0; } /** * irdma_sc_ceq_create - create ceq wqe * @ceq: ceq sc structure * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ static int irdma_sc_ceq_create(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq) { struct irdma_sc_cqp *cqp; __le64 *wqe; u64 hdr; cqp = ceq->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_16, ceq->elem_cnt); set_64bit_val(wqe, IRDMA_BYTE_32, (ceq->virtual_map ? 0 : ceq->ceq_elem_pa)); set_64bit_val(wqe, IRDMA_BYTE_48, (ceq->virtual_map ? ceq->first_pm_pbl_idx : 0)); set_64bit_val(wqe, IRDMA_BYTE_56, FIELD_PREP(IRDMA_CQPSQ_TPHVAL, ceq->tph_val) | FIELD_PREP(IRDMA_CQPSQ_VSIIDX, ceq->vsi->vsi_idx)); hdr = FIELD_PREP(IRDMA_CQPSQ_CEQ_CEQID, ceq->ceq_id) | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_CEQ) | FIELD_PREP(IRDMA_CQPSQ_CEQ_LPBLSIZE, ceq->pbl_chunk_size) | FIELD_PREP(IRDMA_CQPSQ_CEQ_VMAP, ceq->virtual_map) | FIELD_PREP(IRDMA_CQPSQ_CEQ_ITRNOEXPIRE, ceq->itr_no_expire) | FIELD_PREP(IRDMA_CQPSQ_TPHEN, ceq->tph_en) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "CEQ_CREATE WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_cceq_create_done - poll for control ceq wqe to complete * @ceq: ceq sc structure */ static int irdma_sc_cceq_create_done(struct irdma_sc_ceq *ceq) { struct irdma_sc_cqp *cqp; cqp = ceq->dev->cqp; return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_CREATE_CEQ, NULL); } /** * irdma_sc_cceq_destroy_done - poll for destroy cceq to complete * @ceq: ceq sc structure */ int irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq) { struct irdma_sc_cqp *cqp; if (ceq->reg_cq) irdma_sc_remove_cq_ctx(ceq, ceq->dev->ccq); cqp = ceq->dev->cqp; cqp->process_cqp_sds = irdma_update_sds_noccq; return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_DESTROY_CEQ, NULL); } /** * irdma_sc_cceq_create - create cceq * @ceq: ceq sc structure * @scratch: u64 saved to be used during cqp completion */ int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch) { int ret_code; struct irdma_sc_dev *dev = ceq->dev; dev->ccq->vsi = ceq->vsi; if (ceq->reg_cq) { ret_code = irdma_sc_add_cq_ctx(ceq, ceq->dev->ccq); if (ret_code) return ret_code; } ret_code = irdma_sc_ceq_create(ceq, scratch, true); if (!ret_code) return irdma_sc_cceq_create_done(ceq); return ret_code; } /** * irdma_sc_ceq_destroy - destroy ceq * @ceq: ceq sc structure * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq) { struct irdma_sc_cqp *cqp; __le64 *wqe; u64 hdr; cqp = ceq->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_16, ceq->elem_cnt); set_64bit_val(wqe, IRDMA_BYTE_48, ceq->first_pm_pbl_idx); hdr = ceq->ceq_id | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_CEQ) | FIELD_PREP(IRDMA_CQPSQ_CEQ_LPBLSIZE, ceq->pbl_chunk_size) | FIELD_PREP(IRDMA_CQPSQ_CEQ_VMAP, ceq->virtual_map) | FIELD_PREP(IRDMA_CQPSQ_TPHEN, ceq->tph_en) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "CEQ_DESTROY WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); ceq->dev->ceq[ceq->ceq_id] = NULL; if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_process_ceq - process ceq * @dev: sc device struct * @ceq: ceq sc structure * * It is expected caller serializes this function with cleanup_ceqes() * because these functions manipulate the same ceq */ void * irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq) { u64 temp; __le64 *ceqe; struct irdma_sc_cq *cq = NULL; struct irdma_sc_cq *temp_cq; u8 polarity; u32 cq_idx; unsigned long flags; do { cq_idx = 0; ceqe = IRDMA_GET_CURRENT_CEQ_ELEM(ceq); get_64bit_val(ceqe, IRDMA_BYTE_0, &temp); polarity = (u8)FIELD_GET(IRDMA_CEQE_VALID, temp); if (polarity != ceq->polarity) return NULL; temp_cq = (struct irdma_sc_cq *)(irdma_uintptr) LS_64_1(temp, 1); if (!temp_cq) { cq_idx = IRDMA_INVALID_CQ_IDX; IRDMA_RING_MOVE_TAIL(ceq->ceq_ring); if (!IRDMA_RING_CURRENT_TAIL(ceq->ceq_ring)) ceq->polarity ^= 1; continue; } cq = temp_cq; if (ceq->reg_cq) { spin_lock_irqsave(&ceq->req_cq_lock, flags); cq_idx = irdma_sc_find_reg_cq(ceq, cq); spin_unlock_irqrestore(&ceq->req_cq_lock, flags); } IRDMA_RING_MOVE_TAIL(ceq->ceq_ring); if (!IRDMA_RING_CURRENT_TAIL(ceq->ceq_ring)) ceq->polarity ^= 1; } while (cq_idx == IRDMA_INVALID_CQ_IDX); if (cq) irdma_sc_cq_ack(cq); return cq; } /** * irdma_sc_cleanup_ceqes - clear the valid ceqes ctx matching the cq * @cq: cq for which the ceqes need to be cleaned up * @ceq: ceq ptr * * The function is called after the cq is destroyed to cleanup * its pending ceqe entries. It is expected caller serializes this * function with process_ceq() in interrupt context. */ void irdma_sc_cleanup_ceqes(struct irdma_sc_cq *cq, struct irdma_sc_ceq *ceq) { struct irdma_sc_cq *next_cq; u8 ceq_polarity = ceq->polarity; __le64 *ceqe; u8 polarity; u64 temp; int next; u32 i; next = IRDMA_RING_GET_NEXT_TAIL(ceq->ceq_ring, 0); for (i = 1; i <= IRDMA_RING_SIZE(*ceq); i++) { ceqe = IRDMA_GET_CEQ_ELEM_AT_POS(ceq, next); get_64bit_val(ceqe, IRDMA_BYTE_0, &temp); polarity = (u8)FIELD_GET(IRDMA_CEQE_VALID, temp); if (polarity != ceq_polarity) return; next_cq = (struct irdma_sc_cq *)(irdma_uintptr) LS_64_1(temp, 1); if (cq == next_cq) set_64bit_val(ceqe, IRDMA_BYTE_0, temp & IRDMA_CEQE_VALID); next = IRDMA_RING_GET_NEXT_TAIL(ceq->ceq_ring, i); if (!next) ceq_polarity ^= 1; } } /** * irdma_sc_aeq_init - initialize aeq * @aeq: aeq structure ptr * @info: aeq initialization info */ int irdma_sc_aeq_init(struct irdma_sc_aeq *aeq, struct irdma_aeq_init_info *info) { u32 pble_obj_cnt; if (info->elem_cnt < info->dev->hw_attrs.min_hw_aeq_size || info->elem_cnt > info->dev->hw_attrs.max_hw_aeq_size) return -EINVAL; pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt) return -EINVAL; aeq->size = sizeof(*aeq); aeq->polarity = 1; aeq->aeqe_base = (struct irdma_sc_aeqe *)info->aeqe_base; aeq->dev = info->dev; aeq->elem_cnt = info->elem_cnt; aeq->aeq_elem_pa = info->aeq_elem_pa; IRDMA_RING_INIT(aeq->aeq_ring, aeq->elem_cnt); aeq->virtual_map = info->virtual_map; aeq->pbl_list = (aeq->virtual_map ? info->pbl_list : NULL); aeq->pbl_chunk_size = (aeq->virtual_map ? info->pbl_chunk_size : 0); aeq->first_pm_pbl_idx = (aeq->virtual_map ? info->first_pm_pbl_idx : 0); aeq->msix_idx = info->msix_idx; info->dev->aeq = aeq; return 0; } /** * irdma_sc_aeq_create - create aeq * @aeq: aeq structure ptr * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ static int irdma_sc_aeq_create(struct irdma_sc_aeq *aeq, u64 scratch, bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; u64 hdr; cqp = aeq->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_16, aeq->elem_cnt); set_64bit_val(wqe, IRDMA_BYTE_32, (aeq->virtual_map ? 0 : aeq->aeq_elem_pa)); set_64bit_val(wqe, IRDMA_BYTE_48, (aeq->virtual_map ? aeq->first_pm_pbl_idx : 0)); hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_AEQ) | FIELD_PREP(IRDMA_CQPSQ_AEQ_LPBLSIZE, aeq->pbl_chunk_size) | FIELD_PREP(IRDMA_CQPSQ_AEQ_VMAP, aeq->virtual_map) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "AEQ_CREATE WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_aeq_destroy - destroy aeq during close * @aeq: aeq structure ptr * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ int irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, u64 scratch, bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; struct irdma_sc_dev *dev; u64 hdr; dev = aeq->dev; writel(0, dev->hw_regs[IRDMA_PFINT_AEQCTL]); cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_16, aeq->elem_cnt); set_64bit_val(wqe, IRDMA_BYTE_48, aeq->first_pm_pbl_idx); hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_AEQ) | FIELD_PREP(IRDMA_CQPSQ_AEQ_LPBLSIZE, aeq->pbl_chunk_size) | FIELD_PREP(IRDMA_CQPSQ_AEQ_VMAP, aeq->virtual_map) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(dev, IRDMA_DEBUG_WQE, "AEQ_DESTROY WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); if (post_sq) irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_sc_get_next_aeqe - get next aeq entry * @aeq: aeq structure ptr * @info: aeqe info to be returned */ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, struct irdma_aeqe_info *info) { u64 temp, compl_ctx; __le64 *aeqe; u8 ae_src; u8 polarity; aeqe = IRDMA_GET_CURRENT_AEQ_ELEM(aeq); - get_64bit_val(aeqe, IRDMA_BYTE_0, &compl_ctx); get_64bit_val(aeqe, IRDMA_BYTE_8, &temp); polarity = (u8)FIELD_GET(IRDMA_AEQE_VALID, temp); if (aeq->polarity != polarity) return -ENOENT; + /* Ensure AEQE contents are read after valid bit is checked */ + rmb(); + + get_64bit_val(aeqe, IRDMA_BYTE_0, &compl_ctx); + irdma_debug_buf(aeq->dev, IRDMA_DEBUG_WQE, "AEQ_ENTRY WQE", aeqe, 16); ae_src = (u8)FIELD_GET(IRDMA_AEQE_AESRC, temp); info->wqe_idx = (u16)FIELD_GET(IRDMA_AEQE_WQDESCIDX, temp); info->qp_cq_id = (u32)FIELD_GET(IRDMA_AEQE_QPCQID_LOW, temp) | ((u32)FIELD_GET(IRDMA_AEQE_QPCQID_HI, temp) << 18); info->ae_id = (u16)FIELD_GET(IRDMA_AEQE_AECODE, temp); info->tcp_state = (u8)FIELD_GET(IRDMA_AEQE_TCPSTATE, temp); info->iwarp_state = (u8)FIELD_GET(IRDMA_AEQE_IWSTATE, temp); info->q2_data_written = (u8)FIELD_GET(IRDMA_AEQE_Q2DATA, temp); info->aeqe_overflow = (bool)FIELD_GET(IRDMA_AEQE_OVERFLOW, temp); info->ae_src = ae_src; switch (info->ae_id) { case IRDMA_AE_PRIV_OPERATION_DENIED: case IRDMA_AE_AMP_INVALIDATE_TYPE1_MW: case IRDMA_AE_AMP_MWBIND_ZERO_BASED_TYPE1_MW: case IRDMA_AE_AMP_FASTREG_INVALID_PBL_HPS_CFG: case IRDMA_AE_AMP_FASTREG_PBLE_MISMATCH: case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: case IRDMA_AE_UDA_XMIT_BAD_PD: case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT: case IRDMA_AE_BAD_CLOSE: case IRDMA_AE_RDMA_READ_WHILE_ORD_ZERO: case IRDMA_AE_STAG_ZERO_INVALID: case IRDMA_AE_IB_RREQ_AND_Q1_FULL: case IRDMA_AE_IB_INVALID_REQUEST: case IRDMA_AE_WQE_UNEXPECTED_OPCODE: case IRDMA_AE_IB_REMOTE_ACCESS_ERROR: case IRDMA_AE_IB_REMOTE_OP_ERROR: case IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION: case IRDMA_AE_DDP_UBE_INVALID_MO: case IRDMA_AE_DDP_UBE_INVALID_QN: case IRDMA_AE_DDP_NO_L_BIT: case IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION: case IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE: case IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST: case IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP: case IRDMA_AE_ROCE_RSP_LENGTH_ERROR: case IRDMA_AE_INVALID_ARP_ENTRY: case IRDMA_AE_INVALID_TCP_OPTION_RCVD: case IRDMA_AE_STALE_ARP_ENTRY: case IRDMA_AE_INVALID_AH_ENTRY: case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR: case IRDMA_AE_LLP_SEGMENT_TOO_SMALL: case IRDMA_AE_LLP_TOO_MANY_RETRIES: case IRDMA_AE_LCE_QP_CATASTROPHIC: case IRDMA_AE_LLP_DOUBT_REACHABILITY: case IRDMA_AE_LLP_CONNECTION_ESTABLISHED: case IRDMA_AE_RESET_SENT: case IRDMA_AE_TERMINATE_SENT: case IRDMA_AE_RESET_NOT_SENT: case IRDMA_AE_QP_SUSPEND_COMPLETE: case IRDMA_AE_UDA_L4LEN_INVALID: info->qp = true; info->compl_ctx = compl_ctx; break; case IRDMA_AE_LCE_CQ_CATASTROPHIC: info->cq = true; info->compl_ctx = LS_64_1(compl_ctx, 1); ae_src = IRDMA_AE_SOURCE_RSVD; break; case IRDMA_AE_ROCE_EMPTY_MCG: case IRDMA_AE_ROCE_BAD_MC_IP_ADDR: case IRDMA_AE_ROCE_BAD_MC_QPID: case IRDMA_AE_MCG_QP_PROTOCOL_MISMATCH: /* fallthrough */ case IRDMA_AE_LLP_CONNECTION_RESET: case IRDMA_AE_LLP_SYN_RECEIVED: case IRDMA_AE_LLP_FIN_RECEIVED: case IRDMA_AE_LLP_CLOSE_COMPLETE: case IRDMA_AE_LLP_TERMINATE_RECEIVED: case IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE: ae_src = IRDMA_AE_SOURCE_RSVD; info->qp = true; info->compl_ctx = compl_ctx; break; case IRDMA_AE_RESOURCE_EXHAUSTION: /* * ae_src contains the exhausted resource with a unique decoding. Set RSVD here to prevent matching * with a CQ or QP. */ ae_src = IRDMA_AE_SOURCE_RSVD; break; default: break; } switch (ae_src) { case IRDMA_AE_SOURCE_RQ: case IRDMA_AE_SOURCE_RQ_0011: info->qp = true; info->rq = true; info->compl_ctx = compl_ctx; info->err_rq_idx_valid = true; break; case IRDMA_AE_SOURCE_CQ: case IRDMA_AE_SOURCE_CQ_0110: case IRDMA_AE_SOURCE_CQ_1010: case IRDMA_AE_SOURCE_CQ_1110: info->cq = true; info->compl_ctx = LS_64_1(compl_ctx, 1); break; case IRDMA_AE_SOURCE_SQ: case IRDMA_AE_SOURCE_SQ_0111: info->qp = true; info->sq = true; info->compl_ctx = compl_ctx; break; case IRDMA_AE_SOURCE_IN_WR: info->qp = true; info->compl_ctx = compl_ctx; info->in_rdrsp_wr = true; break; case IRDMA_AE_SOURCE_IN_RR: info->qp = true; info->compl_ctx = compl_ctx; info->in_rdrsp_wr = true; break; case IRDMA_AE_SOURCE_OUT_RR: case IRDMA_AE_SOURCE_OUT_RR_1111: info->qp = true; info->compl_ctx = compl_ctx; info->out_rdrsp = true; break; case IRDMA_AE_SOURCE_RSVD: default: break; } IRDMA_RING_MOVE_TAIL(aeq->aeq_ring); if (!IRDMA_RING_CURRENT_TAIL(aeq->aeq_ring)) aeq->polarity ^= 1; return 0; } /** * irdma_sc_repost_aeq_entries - repost completed aeq entries * @dev: sc device struct * @count: allocate count */ void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count) { db_wr32(count, dev->aeq_alloc_db); } /** * irdma_sc_ccq_init - initialize control cq * @cq: sc's cq ctruct * @info: info for control cq initialization */ int irdma_sc_ccq_init(struct irdma_sc_cq *cq, struct irdma_ccq_init_info *info) { u32 pble_obj_cnt; if (info->num_elem < info->dev->hw_attrs.uk_attrs.min_hw_cq_size || info->num_elem > info->dev->hw_attrs.uk_attrs.max_hw_cq_size) return -EINVAL; if (info->ceq_id > (info->dev->hmc_fpm_misc.max_ceqs - 1)) return -EINVAL; pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt) return -EINVAL; cq->cq_pa = info->cq_pa; cq->cq_uk.cq_base = info->cq_base; cq->shadow_area_pa = info->shadow_area_pa; cq->cq_uk.shadow_area = info->shadow_area; cq->shadow_read_threshold = info->shadow_read_threshold; cq->dev = info->dev; cq->ceq_id = info->ceq_id; cq->cq_uk.cq_size = info->num_elem; cq->cq_type = IRDMA_CQ_TYPE_CQP; cq->ceqe_mask = info->ceqe_mask; IRDMA_RING_INIT(cq->cq_uk.cq_ring, info->num_elem); cq->cq_uk.cq_id = 0; /* control cq is id 0 always */ cq->ceq_id_valid = info->ceq_id_valid; cq->tph_en = info->tph_en; cq->tph_val = info->tph_val; cq->cq_uk.avoid_mem_cflct = info->avoid_mem_cflct; cq->pbl_list = info->pbl_list; cq->virtual_map = info->virtual_map; cq->pbl_chunk_size = info->pbl_chunk_size; cq->first_pm_pbl_idx = info->first_pm_pbl_idx; cq->cq_uk.polarity = true; cq->vsi = info->vsi; cq->cq_uk.cq_ack_db = cq->dev->cq_ack_db; /* Only applicable to CQs other than CCQ so initialize to zero */ cq->cq_uk.cqe_alloc_db = NULL; info->dev->ccq = cq; return 0; } /** * irdma_sc_ccq_create_done - poll cqp for ccq create * @ccq: ccq sc struct */ static inline int irdma_sc_ccq_create_done(struct irdma_sc_cq *ccq) { struct irdma_sc_cqp *cqp; cqp = ccq->dev->cqp; return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_CREATE_CQ, NULL); } /** * irdma_sc_ccq_create - create control cq * @ccq: ccq sc struct * @scratch: u64 saved to be used during cqp completion * @check_overflow: overlow flag for ccq * @post_sq: flag for cqp db to ring */ int irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch, bool check_overflow, bool post_sq) { int ret_code; ret_code = irdma_sc_cq_create(ccq, scratch, check_overflow, post_sq); if (ret_code) return ret_code; if (post_sq) { ret_code = irdma_sc_ccq_create_done(ccq); if (ret_code) return ret_code; } ccq->dev->cqp->process_cqp_sds = irdma_cqp_sds_cmd; return 0; } /** * irdma_sc_ccq_destroy - destroy ccq during close * @ccq: ccq sc struct * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ int irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, bool post_sq) { struct irdma_sc_cqp *cqp; __le64 *wqe; u64 hdr; int ret_code = 0; u32 tail, val, error; cqp = ccq->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_0, ccq->cq_uk.cq_size); set_64bit_val(wqe, IRDMA_BYTE_8, RS_64_1(ccq, 1)); set_64bit_val(wqe, IRDMA_BYTE_40, ccq->shadow_area_pa); hdr = ccq->cq_uk.cq_id | FLD_LS_64(ccq->dev, (ccq->ceq_id_valid ? ccq->ceq_id : 0), IRDMA_CQPSQ_CQ_CEQID) | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_CQ) | FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, ccq->ceqe_mask) | FIELD_PREP(IRDMA_CQPSQ_CQ_CEQIDVALID, ccq->ceq_id_valid) | FIELD_PREP(IRDMA_CQPSQ_TPHEN, ccq->tph_en) | FIELD_PREP(IRDMA_CQPSQ_CQ_AVOIDMEMCNFLCT, ccq->cq_uk.avoid_mem_cflct) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "CCQ_DESTROY WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); irdma_get_cqp_reg_info(cqp, &val, &tail, &error); if (post_sq) { irdma_sc_cqp_post_sq(cqp); ret_code = irdma_cqp_poll_registers(cqp, tail, cqp->dev->hw_attrs.max_done_count); } cqp->process_cqp_sds = irdma_update_sds_noccq; return ret_code; } /** * irdma_sc_init_iw_hmc() - queries fpm values using cqp and populates hmc_info * @dev : ptr to irdma_dev struct * @hmc_fn_id: hmc function id */ int irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, u16 hmc_fn_id) { struct irdma_hmc_info *hmc_info; struct irdma_hmc_fpm_misc *hmc_fpm_misc; struct irdma_dma_mem query_fpm_mem; int ret_code = 0; u8 wait_type; hmc_info = dev->hmc_info; hmc_fpm_misc = &dev->hmc_fpm_misc; query_fpm_mem.pa = dev->fpm_query_buf_pa; query_fpm_mem.va = dev->fpm_query_buf; hmc_info->hmc_fn_id = hmc_fn_id; wait_type = (u8)IRDMA_CQP_WAIT_POLL_REGS; ret_code = irdma_sc_query_fpm_val(dev->cqp, 0, hmc_info->hmc_fn_id, &query_fpm_mem, true, wait_type); if (ret_code) return ret_code; /* parse the fpm_query_buf and fill hmc obj info */ ret_code = irdma_sc_parse_fpm_query_buf(dev, query_fpm_mem.va, hmc_info, hmc_fpm_misc); irdma_debug_buf(dev, IRDMA_DEBUG_HMC, "QUERY FPM BUFFER", query_fpm_mem.va, IRDMA_QUERY_FPM_BUF_SIZE); return ret_code; } /** * irdma_sc_cfg_iw_fpm() - commits hmc obj cnt values using cqp * command and populates fpm base address in hmc_info * @dev : ptr to irdma_dev struct * @hmc_fn_id: hmc function id */ static int irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev, u16 hmc_fn_id) { struct irdma_hmc_obj_info *obj_info; __le64 *buf; struct irdma_hmc_info *hmc_info; struct irdma_dma_mem commit_fpm_mem; int ret_code = 0; u8 wait_type; hmc_info = dev->hmc_info; obj_info = hmc_info->hmc_obj; buf = dev->fpm_commit_buf; set_64bit_val(buf, IRDMA_BYTE_0, (u64)obj_info[IRDMA_HMC_IW_QP].cnt); set_64bit_val(buf, IRDMA_BYTE_8, (u64)obj_info[IRDMA_HMC_IW_CQ].cnt); set_64bit_val(buf, IRDMA_BYTE_16, (u64)0); /* RSRVD */ set_64bit_val(buf, IRDMA_BYTE_24, (u64)obj_info[IRDMA_HMC_IW_HTE].cnt); set_64bit_val(buf, IRDMA_BYTE_32, (u64)obj_info[IRDMA_HMC_IW_ARP].cnt); set_64bit_val(buf, IRDMA_BYTE_40, (u64)0); /* RSVD */ set_64bit_val(buf, IRDMA_BYTE_48, (u64)obj_info[IRDMA_HMC_IW_MR].cnt); set_64bit_val(buf, IRDMA_BYTE_56, (u64)obj_info[IRDMA_HMC_IW_XF].cnt); set_64bit_val(buf, IRDMA_BYTE_64, (u64)obj_info[IRDMA_HMC_IW_XFFL].cnt); set_64bit_val(buf, IRDMA_BYTE_72, (u64)obj_info[IRDMA_HMC_IW_Q1].cnt); set_64bit_val(buf, IRDMA_BYTE_80, (u64)obj_info[IRDMA_HMC_IW_Q1FL].cnt); set_64bit_val(buf, IRDMA_BYTE_88, (u64)obj_info[IRDMA_HMC_IW_TIMER].cnt); set_64bit_val(buf, IRDMA_BYTE_96, (u64)obj_info[IRDMA_HMC_IW_FSIMC].cnt); set_64bit_val(buf, IRDMA_BYTE_104, (u64)obj_info[IRDMA_HMC_IW_FSIAV].cnt); set_64bit_val(buf, IRDMA_BYTE_112, (u64)obj_info[IRDMA_HMC_IW_PBLE].cnt); set_64bit_val(buf, IRDMA_BYTE_120, (u64)0); /* RSVD */ set_64bit_val(buf, IRDMA_BYTE_128, (u64)obj_info[IRDMA_HMC_IW_RRF].cnt); set_64bit_val(buf, IRDMA_BYTE_136, (u64)obj_info[IRDMA_HMC_IW_RRFFL].cnt); set_64bit_val(buf, IRDMA_BYTE_144, (u64)obj_info[IRDMA_HMC_IW_HDR].cnt); set_64bit_val(buf, IRDMA_BYTE_152, (u64)obj_info[IRDMA_HMC_IW_MD].cnt); set_64bit_val(buf, IRDMA_BYTE_160, (u64)obj_info[IRDMA_HMC_IW_OOISC].cnt); set_64bit_val(buf, IRDMA_BYTE_168, (u64)obj_info[IRDMA_HMC_IW_OOISCFFL].cnt); commit_fpm_mem.pa = dev->fpm_commit_buf_pa; commit_fpm_mem.va = dev->fpm_commit_buf; wait_type = (u8)IRDMA_CQP_WAIT_POLL_REGS; irdma_debug_buf(dev, IRDMA_DEBUG_HMC, "COMMIT FPM BUFFER", commit_fpm_mem.va, IRDMA_COMMIT_FPM_BUF_SIZE); ret_code = irdma_sc_commit_fpm_val(dev->cqp, 0, hmc_info->hmc_fn_id, &commit_fpm_mem, true, wait_type); if (!ret_code) irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf, hmc_info->hmc_obj, &hmc_info->sd_table.sd_cnt); irdma_debug_buf(dev, IRDMA_DEBUG_HMC, "COMMIT FPM BUFFER", commit_fpm_mem.va, IRDMA_COMMIT_FPM_BUF_SIZE); return ret_code; } /** * cqp_sds_wqe_fill - fill cqp wqe doe sd * @cqp: struct for cqp hw * @info: sd info for wqe * @scratch: u64 saved to be used during cqp completion */ static int cqp_sds_wqe_fill(struct irdma_sc_cqp *cqp, struct irdma_update_sds_info *info, u64 scratch) { u64 data; u64 hdr; __le64 *wqe; int mem_entries, wqe_entries; struct irdma_dma_mem *sdbuf = &cqp->sdbuf; u64 offset = 0; u32 wqe_idx; wqe = irdma_sc_cqp_get_next_send_wqe_idx(cqp, scratch, &wqe_idx); if (!wqe) return -ENOSPC; wqe_entries = (info->cnt > 3) ? 3 : info->cnt; mem_entries = info->cnt - wqe_entries; if (mem_entries) { offset = wqe_idx * IRDMA_UPDATE_SD_BUFF_SIZE; irdma_memcpy(((char *)sdbuf->va + offset), &info->entry[3], mem_entries << 4); data = (u64)sdbuf->pa + offset; } else { data = 0; } data |= FLD_LS_64(cqp->dev, info->hmc_fn_id, IRDMA_CQPSQ_UPESD_HMCFNID); set_64bit_val(wqe, IRDMA_BYTE_16, data); switch (wqe_entries) { case 3: set_64bit_val(wqe, IRDMA_BYTE_48, (FIELD_PREP(IRDMA_CQPSQ_UPESD_SDCMD, info->entry[2].cmd) | FIELD_PREP(IRDMA_CQPSQ_UPESD_ENTRY_VALID, 1))); set_64bit_val(wqe, IRDMA_BYTE_56, info->entry[2].data); /* fallthrough */ case 2: set_64bit_val(wqe, IRDMA_BYTE_32, (FIELD_PREP(IRDMA_CQPSQ_UPESD_SDCMD, info->entry[1].cmd) | FIELD_PREP(IRDMA_CQPSQ_UPESD_ENTRY_VALID, 1))); set_64bit_val(wqe, IRDMA_BYTE_40, info->entry[1].data); /* fallthrough */ case 1: set_64bit_val(wqe, IRDMA_BYTE_0, FIELD_PREP(IRDMA_CQPSQ_UPESD_SDCMD, info->entry[0].cmd)); set_64bit_val(wqe, IRDMA_BYTE_8, info->entry[0].data); break; default: break; } hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_UPDATE_PE_SDS) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity) | FIELD_PREP(IRDMA_CQPSQ_UPESD_ENTRY_COUNT, mem_entries); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); if (mem_entries) irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "UPDATE_PE_SDS WQE Buffer", (char *)sdbuf->va + offset, mem_entries << 4); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "UPDATE_PE_SDS WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); return 0; } /** * irdma_update_pe_sds - cqp wqe for sd * @dev: ptr to irdma_dev struct * @info: sd info for sd's * @scratch: u64 saved to be used during cqp completion */ static int irdma_update_pe_sds(struct irdma_sc_dev *dev, struct irdma_update_sds_info *info, u64 scratch) { struct irdma_sc_cqp *cqp = dev->cqp; int ret_code; ret_code = cqp_sds_wqe_fill(cqp, info, scratch); if (!ret_code) irdma_sc_cqp_post_sq(cqp); return ret_code; } /** * irdma_update_sds_noccq - update sd before ccq created * @dev: sc device struct * @info: sd info for sd's */ int irdma_update_sds_noccq(struct irdma_sc_dev *dev, struct irdma_update_sds_info *info) { u32 error, val, tail; struct irdma_sc_cqp *cqp = dev->cqp; int ret_code; ret_code = cqp_sds_wqe_fill(cqp, info, 0); if (ret_code) return ret_code; irdma_get_cqp_reg_info(cqp, &val, &tail, &error); irdma_sc_cqp_post_sq(cqp); return irdma_cqp_poll_registers(cqp, tail, cqp->dev->hw_attrs.max_done_count); } /** * irdma_sc_static_hmc_pages_allocated - cqp wqe to allocate hmc pages * @cqp: struct for cqp hw * @scratch: u64 saved to be used during cqp completion * @hmc_fn_id: hmc function id * @post_sq: flag for cqp db to ring * @poll_registers: flag to poll register for cqp completion */ int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch, u16 hmc_fn_id, bool post_sq, bool poll_registers) { u64 hdr; __le64 *wqe; u32 tail, val, error; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMA_SHMC_PAGE_ALLOCATED_HMC_FN_ID, hmc_fn_id)); hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_SHMC_PAGES_ALLOCATED) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "SHMC_PAGES_ALLOCATED WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); irdma_get_cqp_reg_info(cqp, &val, &tail, &error); if (post_sq) { irdma_sc_cqp_post_sq(cqp); if (poll_registers) /* check for cqp sq tail update */ return irdma_cqp_poll_registers(cqp, tail, cqp->dev->hw_attrs.max_done_count); else return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_SHMC_PAGES_ALLOCATED, NULL); } return 0; } /** * irdma_cqp_ring_full - check if cqp ring is full * @cqp: struct for cqp hw */ static bool irdma_cqp_ring_full(struct irdma_sc_cqp *cqp) { return IRDMA_RING_FULL_ERR(cqp->sq_ring); } /** * irdma_est_sd - returns approximate number of SDs for HMC * @dev: sc device struct * @hmc_info: hmc structure, size and count for HMC objects */ static u32 irdma_est_sd(struct irdma_sc_dev *dev, struct irdma_hmc_info *hmc_info){ - int i; + struct irdma_hmc_obj_info *pble_info; u64 size = 0; u64 sd; + int i; for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++) if (i != IRDMA_HMC_IW_PBLE) size += round_up(hmc_info->hmc_obj[i].cnt * hmc_info->hmc_obj[i].size, 512); - size += round_up(hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt * - hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].size, 512); + + pble_info = &hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE]; + size += round_up(pble_info->cnt * pble_info->size, 512); if (size & 0x1FFFFF) sd = (size >> 21) + 1; /* add 1 for remainder */ else sd = size >> 21; if (sd > 0xFFFFFFFF) { irdma_debug(dev, IRDMA_DEBUG_HMC, "sd overflow[%ld]\n", sd); sd = 0xFFFFFFFF - 1; } return (u32)sd; } /** * irdma_sc_query_rdma_features - query RDMA features and FW ver * @cqp: struct for cqp hw * @buf: buffer to hold query info * @scratch: u64 saved to be used during cqp completion */ static int irdma_sc_query_rdma_features(struct irdma_sc_cqp *cqp, struct irdma_dma_mem *buf, u64 scratch) { __le64 *wqe; u64 temp; u32 tail, val, error; int status; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; temp = buf->pa; set_64bit_val(wqe, IRDMA_BYTE_32, temp); temp = FIELD_PREP(IRDMA_CQPSQ_QUERY_RDMA_FEATURES_WQEVALID, cqp->polarity) | FIELD_PREP(IRDMA_CQPSQ_QUERY_RDMA_FEATURES_BUF_LEN, buf->size) | FIELD_PREP(IRDMA_CQPSQ_UP_OP, IRDMA_CQP_OP_QUERY_RDMA_FEATURES); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, temp); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "QUERY RDMA FEATURES", wqe, IRDMA_CQP_WQE_SIZE * 8); irdma_get_cqp_reg_info(cqp, &val, &tail, &error); irdma_sc_cqp_post_sq(cqp); status = irdma_cqp_poll_registers(cqp, tail, cqp->dev->hw_attrs.max_done_count); if (error || status) status = -EIO; return status; } /** * irdma_get_rdma_features - get RDMA features * @dev: sc device struct */ int irdma_get_rdma_features(struct irdma_sc_dev *dev) { int ret_code; struct irdma_dma_mem feat_buf; u64 temp; u16 byte_idx, feat_type, feat_cnt, feat_idx; feat_buf.size = IRDMA_FEATURE_BUF_SIZE; feat_buf.va = irdma_allocate_dma_mem(dev->hw, &feat_buf, feat_buf.size, IRDMA_FEATURE_BUF_ALIGNMENT); if (!feat_buf.va) return -ENOMEM; ret_code = irdma_sc_query_rdma_features(dev->cqp, &feat_buf, 0); if (ret_code) goto exit; get_64bit_val(feat_buf.va, IRDMA_BYTE_0, &temp); feat_cnt = (u16)FIELD_GET(IRDMA_FEATURE_CNT, temp); if (feat_cnt < IRDMA_MIN_FEATURES) { ret_code = -EINVAL; goto exit; } else if (feat_cnt > IRDMA_MAX_FEATURES) { irdma_debug(dev, IRDMA_DEBUG_DEV, - "feature buf size insufficient," - "retrying with larger buffer\n"); + "feature buf size insufficient, retrying with larger buffer\n"); irdma_free_dma_mem(dev->hw, &feat_buf); feat_buf.size = 8 * feat_cnt; feat_buf.va = irdma_allocate_dma_mem(dev->hw, &feat_buf, feat_buf.size, IRDMA_FEATURE_BUF_ALIGNMENT); if (!feat_buf.va) return -ENOMEM; ret_code = irdma_sc_query_rdma_features(dev->cqp, &feat_buf, 0); if (ret_code) goto exit; get_64bit_val(feat_buf.va, IRDMA_BYTE_0, &temp); feat_cnt = (u16)FIELD_GET(IRDMA_FEATURE_CNT, temp); if (feat_cnt < IRDMA_MIN_FEATURES) { ret_code = -EINVAL; goto exit; } } irdma_debug_buf(dev, IRDMA_DEBUG_WQE, "QUERY RDMA FEATURES", feat_buf.va, feat_cnt * 8); for (byte_idx = 0, feat_idx = 0; feat_idx < min(feat_cnt, (u16)IRDMA_MAX_FEATURES); feat_idx++, byte_idx += 8) { get_64bit_val(feat_buf.va, byte_idx, &temp); feat_type = FIELD_GET(IRDMA_FEATURE_TYPE, temp); dev->feature_info[feat_type] = temp; } exit: irdma_free_dma_mem(dev->hw, &feat_buf); return ret_code; } static u32 irdma_q1_cnt(struct irdma_sc_dev *dev, struct irdma_hmc_info *hmc_info, u32 qpwanted){ u32 q1_cnt; if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) { q1_cnt = roundup_pow_of_two(dev->hw_attrs.max_hw_ird * 2 * qpwanted); } else { if (dev->cqp->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) q1_cnt = roundup_pow_of_two(dev->hw_attrs.max_hw_ird * 2 * qpwanted + 512); else q1_cnt = dev->hw_attrs.max_hw_ird * 2 * qpwanted; } return q1_cnt; } static void cfg_fpm_value_gen_1(struct irdma_sc_dev *dev, struct irdma_hmc_info *hmc_info, u32 qpwanted) { hmc_info->hmc_obj[IRDMA_HMC_IW_XF].cnt = roundup_pow_of_two(qpwanted * dev->hw_attrs.max_hw_wqes); } static void cfg_fpm_value_gen_2(struct irdma_sc_dev *dev, struct irdma_hmc_info *hmc_info, u32 qpwanted) { struct irdma_hmc_fpm_misc *hmc_fpm_misc = &dev->hmc_fpm_misc; hmc_info->hmc_obj[IRDMA_HMC_IW_XF].cnt = 4 * hmc_fpm_misc->xf_block_size * qpwanted; hmc_info->hmc_obj[IRDMA_HMC_IW_HDR].cnt = qpwanted; if (hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].max_cnt) hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt = 32 * qpwanted; if (hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].max_cnt) hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].cnt = hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt / hmc_fpm_misc->rrf_block_size; if (dev->cqp->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) { if (hmc_info->hmc_obj[IRDMA_HMC_IW_OOISC].max_cnt) hmc_info->hmc_obj[IRDMA_HMC_IW_OOISC].cnt = 32 * qpwanted; if (hmc_info->hmc_obj[IRDMA_HMC_IW_OOISCFFL].max_cnt) hmc_info->hmc_obj[IRDMA_HMC_IW_OOISCFFL].cnt = hmc_info->hmc_obj[IRDMA_HMC_IW_OOISC].cnt / hmc_fpm_misc->ooiscf_block_size; } } /** * irdma_cfg_fpm_val - configure HMC objects * @dev: sc device struct * @qp_count: desired qp count */ int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) { struct irdma_virt_mem virt_mem; u32 i, mem_size; u32 qpwanted, mrwanted, pblewanted; u32 powerof2, hte; u32 sd_needed; u32 sd_diff; u32 loop_count = 0; struct irdma_hmc_info *hmc_info; struct irdma_hmc_fpm_misc *hmc_fpm_misc; int ret_code = 0; u32 max_sds; hmc_info = dev->hmc_info; hmc_fpm_misc = &dev->hmc_fpm_misc; ret_code = irdma_sc_init_iw_hmc(dev, dev->hmc_fn_id); if (ret_code) { irdma_debug(dev, IRDMA_DEBUG_HMC, "irdma_sc_init_iw_hmc returned error_code = %d\n", ret_code); return ret_code; } max_sds = hmc_fpm_misc->max_sds; for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++) hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt; sd_needed = irdma_est_sd(dev, hmc_info); irdma_debug(dev, IRDMA_DEBUG_HMC, "sd count %d where max sd is %d\n", hmc_info->sd_table.sd_cnt, max_sds); qpwanted = min(qp_count, hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt); powerof2 = 1; while (powerof2 <= qpwanted) powerof2 *= 2; powerof2 /= 2; qpwanted = powerof2; mrwanted = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].max_cnt; pblewanted = hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].max_cnt; irdma_debug(dev, IRDMA_DEBUG_HMC, "req_qp=%d max_sd=%d, max_qp = %d, max_cq=%d, max_mr=%d, max_pble=%d, mc=%d, av=%d\n", qp_count, max_sds, hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_MR].max_cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].max_cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].max_cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt); hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt = hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].max_cnt; hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt = hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt; hmc_info->hmc_obj[IRDMA_HMC_IW_ARP].cnt = hmc_info->hmc_obj[IRDMA_HMC_IW_ARP].max_cnt; if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) hmc_info->hmc_obj[IRDMA_HMC_IW_APBVT_ENTRY].cnt = 1; while (irdma_q1_cnt(dev, hmc_info, qpwanted) > hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].max_cnt) qpwanted /= 2; if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) { cfg_fpm_value_gen_1(dev, hmc_info, qpwanted); while (hmc_info->hmc_obj[IRDMA_HMC_IW_XF].cnt > hmc_info->hmc_obj[IRDMA_HMC_IW_XF].max_cnt) { qpwanted /= 2; cfg_fpm_value_gen_1(dev, hmc_info, qpwanted); } } do { ++loop_count; hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt = qpwanted; hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt = min(2 * qpwanted, hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt); hmc_info->hmc_obj[IRDMA_HMC_IW_RESERVED].cnt = 0; /* Reserved */ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt = mrwanted; hte = round_up(qpwanted + hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt, 512); powerof2 = 1; while (powerof2 < hte) powerof2 *= 2; hmc_info->hmc_obj[IRDMA_HMC_IW_HTE].cnt = powerof2 * hmc_fpm_misc->ht_multiplier; if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) cfg_fpm_value_gen_1(dev, hmc_info, qpwanted); else cfg_fpm_value_gen_2(dev, hmc_info, qpwanted); hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt = irdma_q1_cnt(dev, hmc_info, qpwanted); hmc_info->hmc_obj[IRDMA_HMC_IW_XFFL].cnt = hmc_info->hmc_obj[IRDMA_HMC_IW_XF].cnt / hmc_fpm_misc->xf_block_size; hmc_info->hmc_obj[IRDMA_HMC_IW_Q1FL].cnt = hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt / hmc_fpm_misc->q1_block_size; hmc_info->hmc_obj[IRDMA_HMC_IW_TIMER].cnt = (round_up(qpwanted, 512) / 512 + 1) * hmc_fpm_misc->timer_bucket; hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt = pblewanted; sd_needed = irdma_est_sd(dev, hmc_info); irdma_debug(dev, IRDMA_DEBUG_HMC, "sd_needed = %d, max_sds=%d, mrwanted=%d, pblewanted=%d qpwanted=%d\n", sd_needed, max_sds, mrwanted, pblewanted, qpwanted); /* Do not reduce resources further. All objects fit with max SDs */ if (sd_needed <= max_sds) break; sd_diff = sd_needed - max_sds; if (sd_diff > 128) { if (!(loop_count % 2) && qpwanted > 128) { qpwanted /= 2; } else { mrwanted /= 2; pblewanted /= 2; } continue; } if (dev->cqp->hmc_profile != IRDMA_HMC_PROFILE_FAVOR_VF && pblewanted > (512 * FPM_MULTIPLIER * sd_diff)) { pblewanted -= 256 * FPM_MULTIPLIER * sd_diff; continue; - } else if (pblewanted > (100 * FPM_MULTIPLIER)) { + } else if (pblewanted > 100 * FPM_MULTIPLIER) { pblewanted -= 10 * FPM_MULTIPLIER; - } else if (pblewanted > FPM_MULTIPLIER) { + } else if (pblewanted > 16 * FPM_MULTIPLIER) { pblewanted -= FPM_MULTIPLIER; } else if (qpwanted <= 128) { if (hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt > 256) hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt /= 2; if (hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt > 256) hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt /= 2; } if (mrwanted > FPM_MULTIPLIER) mrwanted -= FPM_MULTIPLIER; if (!(loop_count % 10) && qpwanted > 128) { qpwanted /= 2; if (hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt > 256) hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt /= 2; } } while (loop_count < 2000); if (sd_needed > max_sds) { irdma_debug(dev, IRDMA_DEBUG_HMC, "cfg_fpm failed loop_cnt=%d, sd_needed=%d, max sd count %d\n", loop_count, sd_needed, hmc_info->sd_table.sd_cnt); return -EINVAL; } if (loop_count > 1 && sd_needed < max_sds) { pblewanted += (max_sds - sd_needed) * 256 * FPM_MULTIPLIER; hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt = pblewanted; sd_needed = irdma_est_sd(dev, hmc_info); } irdma_debug(dev, IRDMA_DEBUG_HMC, "loop_cnt=%d, sd_needed=%d, qpcnt = %d, cqcnt=%d, mrcnt=%d, pblecnt=%d, mc=%d, ah=%d, max sd count %d, first sd index %d\n", loop_count, sd_needed, hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt, hmc_info->sd_table.sd_cnt, hmc_info->first_sd_index); ret_code = irdma_sc_cfg_iw_fpm(dev, dev->hmc_fn_id); if (ret_code) { irdma_debug(dev, IRDMA_DEBUG_HMC, "cfg_iw_fpm returned error_code[x%08X]\n", readl(dev->hw_regs[IRDMA_CQPERRCODES])); return ret_code; } mem_size = sizeof(struct irdma_hmc_sd_entry) * (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index + 1); virt_mem.size = mem_size; virt_mem.va = kzalloc(virt_mem.size, GFP_KERNEL); if (!virt_mem.va) { irdma_debug(dev, IRDMA_DEBUG_HMC, "failed to allocate memory for sd_entry buffer\n"); return -ENOMEM; } hmc_info->sd_table.sd_entry = virt_mem.va; return ret_code; } /** * irdma_exec_cqp_cmd - execute cqp cmd when wqe are available * @dev: rdma device * @pcmdinfo: cqp command info */ static int irdma_exec_cqp_cmd(struct irdma_sc_dev *dev, struct cqp_cmds_info *pcmdinfo) { int status; struct irdma_dma_mem val_mem; bool alloc = false; dev->cqp_cmd_stats[pcmdinfo->cqp_cmd]++; switch (pcmdinfo->cqp_cmd) { case IRDMA_OP_CEQ_DESTROY: status = irdma_sc_ceq_destroy(pcmdinfo->in.u.ceq_destroy.ceq, pcmdinfo->in.u.ceq_destroy.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_AEQ_DESTROY: status = irdma_sc_aeq_destroy(pcmdinfo->in.u.aeq_destroy.aeq, pcmdinfo->in.u.aeq_destroy.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_CEQ_CREATE: status = irdma_sc_ceq_create(pcmdinfo->in.u.ceq_create.ceq, pcmdinfo->in.u.ceq_create.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_AEQ_CREATE: status = irdma_sc_aeq_create(pcmdinfo->in.u.aeq_create.aeq, pcmdinfo->in.u.aeq_create.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_QP_UPLOAD_CONTEXT: status = irdma_sc_qp_upload_context(pcmdinfo->in.u.qp_upload_context.dev, &pcmdinfo->in.u.qp_upload_context.info, pcmdinfo->in.u.qp_upload_context.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_CQ_CREATE: status = irdma_sc_cq_create(pcmdinfo->in.u.cq_create.cq, pcmdinfo->in.u.cq_create.scratch, pcmdinfo->in.u.cq_create.check_overflow, pcmdinfo->post_sq); break; case IRDMA_OP_CQ_MODIFY: status = irdma_sc_cq_modify(pcmdinfo->in.u.cq_modify.cq, &pcmdinfo->in.u.cq_modify.info, pcmdinfo->in.u.cq_modify.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_CQ_DESTROY: status = irdma_sc_cq_destroy(pcmdinfo->in.u.cq_destroy.cq, pcmdinfo->in.u.cq_destroy.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_QP_FLUSH_WQES: status = irdma_sc_qp_flush_wqes(pcmdinfo->in.u.qp_flush_wqes.qp, &pcmdinfo->in.u.qp_flush_wqes.info, pcmdinfo->in.u.qp_flush_wqes.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_GEN_AE: status = irdma_sc_gen_ae(pcmdinfo->in.u.gen_ae.qp, &pcmdinfo->in.u.gen_ae.info, pcmdinfo->in.u.gen_ae.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_MANAGE_PUSH_PAGE: status = irdma_sc_manage_push_page(pcmdinfo->in.u.manage_push_page.cqp, &pcmdinfo->in.u.manage_push_page.info, pcmdinfo->in.u.manage_push_page.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_UPDATE_PE_SDS: status = irdma_update_pe_sds(pcmdinfo->in.u.update_pe_sds.dev, &pcmdinfo->in.u.update_pe_sds.info, pcmdinfo->in.u.update_pe_sds.scratch); break; case IRDMA_OP_MANAGE_HMC_PM_FUNC_TABLE: /* switch to calling through the call table */ status = irdma_sc_manage_hmc_pm_func_table(pcmdinfo->in.u.manage_hmc_pm.dev->cqp, &pcmdinfo->in.u.manage_hmc_pm.info, pcmdinfo->in.u.manage_hmc_pm.scratch, true); break; case IRDMA_OP_SUSPEND: status = irdma_sc_suspend_qp(pcmdinfo->in.u.suspend_resume.cqp, pcmdinfo->in.u.suspend_resume.qp, pcmdinfo->in.u.suspend_resume.scratch); break; case IRDMA_OP_RESUME: status = irdma_sc_resume_qp(pcmdinfo->in.u.suspend_resume.cqp, pcmdinfo->in.u.suspend_resume.qp, pcmdinfo->in.u.suspend_resume.scratch); break; case IRDMA_OP_QUERY_FPM_VAL: val_mem.pa = pcmdinfo->in.u.query_fpm_val.fpm_val_pa; val_mem.va = pcmdinfo->in.u.query_fpm_val.fpm_val_va; status = irdma_sc_query_fpm_val(pcmdinfo->in.u.query_fpm_val.cqp, pcmdinfo->in.u.query_fpm_val.scratch, pcmdinfo->in.u.query_fpm_val.hmc_fn_id, &val_mem, true, IRDMA_CQP_WAIT_EVENT); break; case IRDMA_OP_COMMIT_FPM_VAL: val_mem.pa = pcmdinfo->in.u.commit_fpm_val.fpm_val_pa; val_mem.va = pcmdinfo->in.u.commit_fpm_val.fpm_val_va; status = irdma_sc_commit_fpm_val(pcmdinfo->in.u.commit_fpm_val.cqp, pcmdinfo->in.u.commit_fpm_val.scratch, pcmdinfo->in.u.commit_fpm_val.hmc_fn_id, &val_mem, true, IRDMA_CQP_WAIT_EVENT); break; case IRDMA_OP_STATS_ALLOCATE: alloc = true; /* fallthrough */ case IRDMA_OP_STATS_FREE: status = irdma_sc_manage_stats_inst(pcmdinfo->in.u.stats_manage.cqp, &pcmdinfo->in.u.stats_manage.info, alloc, pcmdinfo->in.u.stats_manage.scratch); break; case IRDMA_OP_STATS_GATHER: status = irdma_sc_gather_stats(pcmdinfo->in.u.stats_gather.cqp, &pcmdinfo->in.u.stats_gather.info, pcmdinfo->in.u.stats_gather.scratch); break; case IRDMA_OP_WS_MODIFY_NODE: status = irdma_sc_manage_ws_node(pcmdinfo->in.u.ws_node.cqp, &pcmdinfo->in.u.ws_node.info, IRDMA_MODIFY_NODE, pcmdinfo->in.u.ws_node.scratch); break; case IRDMA_OP_WS_DELETE_NODE: status = irdma_sc_manage_ws_node(pcmdinfo->in.u.ws_node.cqp, &pcmdinfo->in.u.ws_node.info, IRDMA_DEL_NODE, pcmdinfo->in.u.ws_node.scratch); break; case IRDMA_OP_WS_ADD_NODE: status = irdma_sc_manage_ws_node(pcmdinfo->in.u.ws_node.cqp, &pcmdinfo->in.u.ws_node.info, IRDMA_ADD_NODE, pcmdinfo->in.u.ws_node.scratch); break; case IRDMA_OP_SET_UP_MAP: status = irdma_sc_set_up_map(pcmdinfo->in.u.up_map.cqp, &pcmdinfo->in.u.up_map.info, pcmdinfo->in.u.up_map.scratch); break; case IRDMA_OP_QUERY_RDMA_FEATURES: status = irdma_sc_query_rdma_features(pcmdinfo->in.u.query_rdma.cqp, &pcmdinfo->in.u.query_rdma.query_buff_mem, pcmdinfo->in.u.query_rdma.scratch); break; case IRDMA_OP_DELETE_ARP_CACHE_ENTRY: status = irdma_sc_del_arp_cache_entry(pcmdinfo->in.u.del_arp_cache_entry.cqp, pcmdinfo->in.u.del_arp_cache_entry.scratch, pcmdinfo->in.u.del_arp_cache_entry.arp_index, pcmdinfo->post_sq); break; case IRDMA_OP_MANAGE_APBVT_ENTRY: status = irdma_sc_manage_apbvt_entry(pcmdinfo->in.u.manage_apbvt_entry.cqp, &pcmdinfo->in.u.manage_apbvt_entry.info, pcmdinfo->in.u.manage_apbvt_entry.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_MANAGE_QHASH_TABLE_ENTRY: status = irdma_sc_manage_qhash_table_entry(pcmdinfo->in.u.manage_qhash_table_entry.cqp, &pcmdinfo->in.u.manage_qhash_table_entry.info, pcmdinfo->in.u.manage_qhash_table_entry.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_QP_MODIFY: status = irdma_sc_qp_modify(pcmdinfo->in.u.qp_modify.qp, &pcmdinfo->in.u.qp_modify.info, pcmdinfo->in.u.qp_modify.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_QP_CREATE: status = irdma_sc_qp_create(pcmdinfo->in.u.qp_create.qp, &pcmdinfo->in.u.qp_create.info, pcmdinfo->in.u.qp_create.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_QP_DESTROY: status = irdma_sc_qp_destroy(pcmdinfo->in.u.qp_destroy.qp, pcmdinfo->in.u.qp_destroy.scratch, pcmdinfo->in.u.qp_destroy.remove_hash_idx, pcmdinfo->in.u.qp_destroy.ignore_mw_bnd, pcmdinfo->post_sq); break; case IRDMA_OP_ALLOC_STAG: status = irdma_sc_alloc_stag(pcmdinfo->in.u.alloc_stag.dev, &pcmdinfo->in.u.alloc_stag.info, pcmdinfo->in.u.alloc_stag.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_MR_REG_NON_SHARED: status = irdma_sc_mr_reg_non_shared(pcmdinfo->in.u.mr_reg_non_shared.dev, &pcmdinfo->in.u.mr_reg_non_shared.info, pcmdinfo->in.u.mr_reg_non_shared.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_DEALLOC_STAG: status = irdma_sc_dealloc_stag(pcmdinfo->in.u.dealloc_stag.dev, &pcmdinfo->in.u.dealloc_stag.info, pcmdinfo->in.u.dealloc_stag.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_MW_ALLOC: status = irdma_sc_mw_alloc(pcmdinfo->in.u.mw_alloc.dev, &pcmdinfo->in.u.mw_alloc.info, pcmdinfo->in.u.mw_alloc.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_ADD_ARP_CACHE_ENTRY: status = irdma_sc_add_arp_cache_entry(pcmdinfo->in.u.add_arp_cache_entry.cqp, &pcmdinfo->in.u.add_arp_cache_entry.info, pcmdinfo->in.u.add_arp_cache_entry.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY: status = irdma_sc_alloc_local_mac_entry(pcmdinfo->in.u.alloc_local_mac_entry.cqp, pcmdinfo->in.u.alloc_local_mac_entry.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_ADD_LOCAL_MAC_ENTRY: status = irdma_sc_add_local_mac_entry(pcmdinfo->in.u.add_local_mac_entry.cqp, &pcmdinfo->in.u.add_local_mac_entry.info, pcmdinfo->in.u.add_local_mac_entry.scratch, pcmdinfo->post_sq); break; case IRDMA_OP_DELETE_LOCAL_MAC_ENTRY: status = irdma_sc_del_local_mac_entry(pcmdinfo->in.u.del_local_mac_entry.cqp, pcmdinfo->in.u.del_local_mac_entry.scratch, pcmdinfo->in.u.del_local_mac_entry.entry_idx, pcmdinfo->in.u.del_local_mac_entry.ignore_ref_count, pcmdinfo->post_sq); break; case IRDMA_OP_AH_CREATE: status = irdma_sc_create_ah(pcmdinfo->in.u.ah_create.cqp, &pcmdinfo->in.u.ah_create.info, pcmdinfo->in.u.ah_create.scratch); break; case IRDMA_OP_AH_DESTROY: status = irdma_sc_destroy_ah(pcmdinfo->in.u.ah_destroy.cqp, &pcmdinfo->in.u.ah_destroy.info, pcmdinfo->in.u.ah_destroy.scratch); break; case IRDMA_OP_MC_CREATE: status = irdma_sc_create_mcast_grp(pcmdinfo->in.u.mc_create.cqp, &pcmdinfo->in.u.mc_create.info, pcmdinfo->in.u.mc_create.scratch); break; case IRDMA_OP_MC_DESTROY: status = irdma_sc_destroy_mcast_grp(pcmdinfo->in.u.mc_destroy.cqp, &pcmdinfo->in.u.mc_destroy.info, pcmdinfo->in.u.mc_destroy.scratch); break; case IRDMA_OP_MC_MODIFY: status = irdma_sc_modify_mcast_grp(pcmdinfo->in.u.mc_modify.cqp, &pcmdinfo->in.u.mc_modify.info, pcmdinfo->in.u.mc_modify.scratch); break; default: status = -EOPNOTSUPP; break; } return status; } /** * irdma_process_cqp_cmd - process all cqp commands * @dev: sc device struct * @pcmdinfo: cqp command info */ int irdma_process_cqp_cmd(struct irdma_sc_dev *dev, struct cqp_cmds_info *pcmdinfo) { int status = 0; unsigned long flags; spin_lock_irqsave(&dev->cqp_lock, flags); if (list_empty(&dev->cqp_cmd_head) && !irdma_cqp_ring_full(dev->cqp)) status = irdma_exec_cqp_cmd(dev, pcmdinfo); else list_add_tail(&pcmdinfo->cqp_cmd_entry, &dev->cqp_cmd_head); spin_unlock_irqrestore(&dev->cqp_lock, flags); return status; } /** * irdma_process_bh - called from tasklet for cqp list * @dev: sc device struct */ int irdma_process_bh(struct irdma_sc_dev *dev) { int status = 0; struct cqp_cmds_info *pcmdinfo; unsigned long flags; spin_lock_irqsave(&dev->cqp_lock, flags); while (!list_empty(&dev->cqp_cmd_head) && !irdma_cqp_ring_full(dev->cqp)) { pcmdinfo = (struct cqp_cmds_info *)irdma_remove_cqp_head(dev); status = irdma_exec_cqp_cmd(dev, pcmdinfo); if (status) break; } spin_unlock_irqrestore(&dev->cqp_lock, flags); return status; } /** * irdma_cfg_aeq- Configure AEQ interrupt * @dev: pointer to the device structure * @idx: vector index * @enable: True to enable, False disables */ void irdma_cfg_aeq(struct irdma_sc_dev *dev, u32 idx, bool enable) { u32 reg_val; reg_val = FIELD_PREP(IRDMA_PFINT_AEQCTL_CAUSE_ENA, enable) | FIELD_PREP(IRDMA_PFINT_AEQCTL_MSIX_INDX, idx) | FIELD_PREP(IRDMA_PFINT_AEQCTL_ITR_INDX, IRDMA_IDX_NOITR); writel(reg_val, dev->hw_regs[IRDMA_PFINT_AEQCTL]); } /** * sc_vsi_update_stats - Update statistics * @vsi: sc_vsi instance to update */ void sc_vsi_update_stats(struct irdma_sc_vsi *vsi) { struct irdma_gather_stats *gather_stats; struct irdma_gather_stats *last_gather_stats; gather_stats = vsi->pestat->gather_info.gather_stats_va; last_gather_stats = vsi->pestat->gather_info.last_gather_stats_va; irdma_update_stats(&vsi->pestat->hw_stats, gather_stats, last_gather_stats, vsi->dev->hw_stats_map, vsi->dev->hw_attrs.max_stat_idx); } /** * irdma_wait_pe_ready - Check if firmware is ready * @dev: provides access to registers */ static int irdma_wait_pe_ready(struct irdma_sc_dev *dev) { u32 statuscpu0; u32 statuscpu1; u32 statuscpu2; u32 retrycount = 0; do { statuscpu0 = readl(dev->hw_regs[IRDMA_GLPE_CPUSTATUS0]); statuscpu1 = readl(dev->hw_regs[IRDMA_GLPE_CPUSTATUS1]); statuscpu2 = readl(dev->hw_regs[IRDMA_GLPE_CPUSTATUS2]); if (statuscpu0 == 0x80 && statuscpu1 == 0x80 && statuscpu2 == 0x80) return 0; mdelay(1000); } while (retrycount++ < dev->hw_attrs.max_pe_ready_count); return -1; } static inline void irdma_sc_init_hw(struct irdma_sc_dev *dev) { switch (dev->hw_attrs.uk_attrs.hw_rev) { case IRDMA_GEN_2: icrdma_init_hw(dev); break; } } /** * irdma_sc_dev_init - Initialize control part of device * @dev: Device pointer * @info: Device init info */ int irdma_sc_dev_init(struct irdma_sc_dev *dev, struct irdma_device_init_info *info) { u32 val; int ret_code = 0; u8 db_size; INIT_LIST_HEAD(&dev->cqp_cmd_head); /* for CQP command backlog */ mutex_init(&dev->ws_mutex); dev->debug_mask = info->debug_mask; dev->hmc_fn_id = info->hmc_fn_id; dev->fpm_query_buf_pa = info->fpm_query_buf_pa; dev->fpm_query_buf = info->fpm_query_buf; dev->fpm_commit_buf_pa = info->fpm_commit_buf_pa; dev->fpm_commit_buf = info->fpm_commit_buf; dev->hw = info->hw; dev->hw->hw_addr = info->bar0; /* Setup the hardware limits, hmc may limit further */ dev->hw_attrs.min_hw_qp_id = IRDMA_MIN_IW_QP_ID; dev->hw_attrs.min_hw_aeq_size = IRDMA_MIN_AEQ_ENTRIES; dev->hw_attrs.max_hw_aeq_size = IRDMA_MAX_AEQ_ENTRIES; dev->hw_attrs.min_hw_ceq_size = IRDMA_MIN_CEQ_ENTRIES; dev->hw_attrs.max_hw_ceq_size = IRDMA_MAX_CEQ_ENTRIES; dev->hw_attrs.uk_attrs.min_hw_cq_size = IRDMA_MIN_CQ_SIZE; dev->hw_attrs.uk_attrs.max_hw_cq_size = IRDMA_MAX_CQ_SIZE; dev->hw_attrs.max_hw_outbound_msg_size = IRDMA_MAX_OUTBOUND_MSG_SIZE; dev->hw_attrs.max_mr_size = IRDMA_MAX_MR_SIZE; dev->hw_attrs.max_hw_inbound_msg_size = IRDMA_MAX_INBOUND_MSG_SIZE; dev->hw_attrs.max_hw_device_pages = IRDMA_MAX_PUSH_PAGE_COUNT; dev->hw_attrs.uk_attrs.max_hw_inline = IRDMA_MAX_INLINE_DATA_SIZE; dev->hw_attrs.max_hw_wqes = IRDMA_MAX_WQ_ENTRIES; dev->hw_attrs.max_qp_wr = IRDMA_MAX_QP_WRS(IRDMA_MAX_QUANTA_PER_WR); dev->hw_attrs.uk_attrs.max_hw_rq_quanta = IRDMA_QP_SW_MAX_RQ_QUANTA; dev->hw_attrs.uk_attrs.max_hw_wq_quanta = IRDMA_QP_SW_MAX_WQ_QUANTA; dev->hw_attrs.max_hw_pds = IRDMA_MAX_PDS; dev->hw_attrs.max_hw_ena_vf_count = IRDMA_MAX_PE_ENA_VF_COUNT; dev->hw_attrs.max_pe_ready_count = 14; dev->hw_attrs.max_done_count = IRDMA_DONE_COUNT; dev->hw_attrs.max_sleep_count = IRDMA_SLEEP_COUNT; dev->hw_attrs.max_cqp_compl_wait_time_ms = CQP_COMPL_WAIT_TIME_MS; irdma_sc_init_hw(dev); if (irdma_wait_pe_ready(dev)) return -ETIMEDOUT; val = readl(dev->hw_regs[IRDMA_GLPCI_LBARCTRL]); db_size = (u8)FIELD_GET(IRDMA_GLPCI_LBARCTRL_PE_DB_SIZE, val); if (db_size != IRDMA_PE_DB_SIZE_4M && db_size != IRDMA_PE_DB_SIZE_8M) { irdma_debug(dev, IRDMA_DEBUG_DEV, "RDMA PE doorbell is not enabled in CSR val 0x%x db_size=%d\n", val, db_size); return -ENODEV; } dev->db_addr = dev->hw->hw_addr + (uintptr_t)dev->hw_regs[IRDMA_DB_ADDR_OFFSET]; return ret_code; } /** * irdma_stat_val - Extract HW counter value from statistics buffer * @stats_val: pointer to statistics buffer * @byteoff: byte offset of counter value in the buffer (8B-aligned) * @bitoff: bit offset of counter value within 8B entry * @bitmask: maximum counter value (e.g. 0xffffff for 24-bit counter) */ static inline u64 irdma_stat_val(const u64 *stats_val, u16 byteoff, u8 bitoff, u64 bitmask){ u16 idx = byteoff / sizeof(*stats_val); return (stats_val[idx] >> bitoff) & bitmask; } /** * irdma_stat_delta - Calculate counter delta * @new_val: updated counter value * @old_val: last counter value * @max_val: maximum counter value (e.g. 0xffffff for 24-bit counter) */ static inline u64 irdma_stat_delta(u64 new_val, u64 old_val, u64 max_val) { if (new_val >= old_val) return new_val - old_val; else /* roll-over case */ return max_val - old_val + new_val + 1; } /** * irdma_update_stats - Update statistics * @hw_stats: hw_stats instance to update * @gather_stats: updated stat counters * @last_gather_stats: last stat counters * @map: HW stat map (hw_stats => gather_stats) * @max_stat_idx: number of HW stats */ void irdma_update_stats(struct irdma_dev_hw_stats *hw_stats, struct irdma_gather_stats *gather_stats, struct irdma_gather_stats *last_gather_stats, const struct irdma_hw_stat_map *map, u16 max_stat_idx) { u64 *stats_val = hw_stats->stats_val; u16 i; for (i = 0; i < max_stat_idx; i++) { u64 new_val = irdma_stat_val(gather_stats->val, map[i].byteoff, map[i].bitoff, map[i].bitmask); u64 last_val = irdma_stat_val(last_gather_stats->val, map[i].byteoff, map[i].bitoff, map[i].bitmask); stats_val[i] += irdma_stat_delta(new_val, last_val, map[i].bitmask); } irdma_memcpy(last_gather_stats, gather_stats, sizeof(*last_gather_stats)); } diff --git a/sys/dev/irdma/irdma_defs.h b/sys/dev/irdma/irdma_defs.h index 37e664ba69b3..dcd6a0b5956b 100644 --- a/sys/dev/irdma/irdma_defs.h +++ b/sys/dev/irdma/irdma_defs.h @@ -1,1631 +1,1634 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef IRDMA_DEFS_H #define IRDMA_DEFS_H #define IRDMA_FIRST_USER_QP_ID 3 #define ECN_CODE_PT_MASK 3 #define ECN_CODE_PT_VAL 2 #define IRDMA_PUSH_OFFSET (8 * 1024 * 1024) #define IRDMA_PF_FIRST_PUSH_PAGE_INDEX 16 #define IRDMA_PF_BAR_RSVD (60 * 1024) #define IRDMA_PE_DB_SIZE_4M 1 #define IRDMA_PE_DB_SIZE_8M 2 #define IRDMA_IRD_HW_SIZE_4 0 #define IRDMA_IRD_HW_SIZE_16 1 #define IRDMA_IRD_HW_SIZE_64 2 #define IRDMA_IRD_HW_SIZE_128 3 #define IRDMA_IRD_HW_SIZE_256 4 #define IRDMA_QP_STATE_INVALID 0 #define IRDMA_QP_STATE_IDLE 1 #define IRDMA_QP_STATE_RTS 2 #define IRDMA_QP_STATE_CLOSING 3 #define IRDMA_QP_STATE_SQD 3 #define IRDMA_QP_STATE_RTR 4 #define IRDMA_QP_STATE_TERMINATE 5 #define IRDMA_QP_STATE_ERROR 6 #define IRDMA_MAX_USER_PRIORITY 8 #define IRDMA_DSCP_NUM_VAL 64 #define IRDMA_MAX_TRAFFIC_CLASS 8 #define IRDMA_MAX_STATS_COUNT 128 #define IRDMA_FIRST_NON_PF_STAT 4 #define IRDMA_MIN_MTU_IPV4 576 #define IRDMA_MIN_MTU_IPV6 1280 #define IRDMA_MTU_TO_MSS_IPV4 40 #define IRDMA_MTU_TO_MSS_IPV6 60 #define IRDMA_DEFAULT_MTU 1500 #define Q2_FPSN_OFFSET 64 #define TERM_DDP_LEN_TAGGED 14 #define TERM_DDP_LEN_UNTAGGED 18 #define TERM_RDMA_LEN 28 #define RDMA_OPCODE_M 0x0f #define RDMA_READ_REQ_OPCODE 1 #define Q2_BAD_FRAME_OFFSET 72 #define CQE_MAJOR_DRV 0x8000 #define IRDMA_TERM_SENT 1 #define IRDMA_TERM_RCVD 2 #define IRDMA_TERM_DONE 4 #define IRDMA_MAC_HLEN 14 #define IRDMA_BYTE_0 0 #define IRDMA_BYTE_8 8 #define IRDMA_BYTE_16 16 #define IRDMA_BYTE_24 24 #define IRDMA_BYTE_32 32 #define IRDMA_BYTE_40 40 #define IRDMA_BYTE_48 48 #define IRDMA_BYTE_56 56 #define IRDMA_BYTE_64 64 #define IRDMA_BYTE_72 72 #define IRDMA_BYTE_80 80 #define IRDMA_BYTE_88 88 #define IRDMA_BYTE_96 96 #define IRDMA_BYTE_104 104 #define IRDMA_BYTE_112 112 #define IRDMA_BYTE_120 120 #define IRDMA_BYTE_128 128 #define IRDMA_BYTE_136 136 #define IRDMA_BYTE_144 144 #define IRDMA_BYTE_152 152 #define IRDMA_BYTE_160 160 #define IRDMA_BYTE_168 168 #define IRDMA_BYTE_176 176 #define IRDMA_BYTE_184 184 #define IRDMA_BYTE_192 192 #define IRDMA_BYTE_200 200 #define IRDMA_BYTE_208 208 #define IRDMA_BYTE_216 216 #define IRDMA_CQP_WAIT_POLL_REGS 1 #define IRDMA_CQP_WAIT_POLL_CQ 2 #define IRDMA_CQP_WAIT_EVENT 3 #define IRDMA_AE_SOURCE_RSVD 0x0 #define IRDMA_AE_SOURCE_RQ 0x1 #define IRDMA_AE_SOURCE_RQ_0011 0x3 #define IRDMA_AE_SOURCE_CQ 0x2 #define IRDMA_AE_SOURCE_CQ_0110 0x6 #define IRDMA_AE_SOURCE_CQ_1010 0xa #define IRDMA_AE_SOURCE_CQ_1110 0xe #define IRDMA_AE_SOURCE_SQ 0x5 #define IRDMA_AE_SOURCE_SQ_0111 0x7 #define IRDMA_AE_SOURCE_IN_WR 0x9 #define IRDMA_AE_SOURCE_IN_RR 0xb #define IRDMA_AE_SOURCE_OUT_RR 0xd #define IRDMA_AE_SOURCE_OUT_RR_1111 0xf #define IRDMA_AE_SOURCE_RSRC_EXHT_Q1 0x1 #define IRDMA_AE_SOURCE_RSRC_EXHT_XT_RR 0x5 #define IRDMA_TCP_STATE_NON_EXISTENT 0 #define IRDMA_TCP_STATE_CLOSED 1 #define IRDMA_TCP_STATE_LISTEN 2 #define IRDMA_STATE_SYN_SEND 3 #define IRDMA_TCP_STATE_SYN_RECEIVED 4 #define IRDMA_TCP_STATE_ESTABLISHED 5 #define IRDMA_TCP_STATE_CLOSE_WAIT 6 #define IRDMA_TCP_STATE_FIN_WAIT_1 7 #define IRDMA_TCP_STATE_CLOSING 8 #define IRDMA_TCP_STATE_LAST_ACK 9 #define IRDMA_TCP_STATE_FIN_WAIT_2 10 #define IRDMA_TCP_STATE_TIME_WAIT 11 #define IRDMA_TCP_STATE_RESERVED_1 12 #define IRDMA_TCP_STATE_RESERVED_2 13 #define IRDMA_TCP_STATE_RESERVED_3 14 #define IRDMA_TCP_STATE_RESERVED_4 15 #define IRDMA_CQP_SW_SQSIZE_4 4 #define IRDMA_CQP_SW_SQSIZE_2048 2048 #define IRDMA_CQ_TYPE_IWARP 1 #define IRDMA_CQ_TYPE_ILQ 2 #define IRDMA_CQ_TYPE_IEQ 3 #define IRDMA_CQ_TYPE_CQP 4 #define IRDMA_DONE_COUNT 1000 #define IRDMA_SLEEP_COUNT 10 #define IRDMA_UPDATE_SD_BUFF_SIZE 128 #define IRDMA_FEATURE_BUF_SIZE (8 * IRDMA_MAX_FEATURES) #define IRDMA_MAX_QUANTA_PER_WR 8 #define IRDMA_QP_SW_MAX_WQ_QUANTA 32768 #define IRDMA_QP_SW_MAX_SQ_QUANTA 32768 #define IRDMA_QP_SW_MAX_RQ_QUANTA 32768 #define IRDMA_MAX_QP_WRS(max_quanta_per_wr) \ ((IRDMA_QP_SW_MAX_WQ_QUANTA - IRDMA_SQ_RSVD) / (max_quanta_per_wr)) #define IRDMAQP_TERM_SEND_TERM_AND_FIN 0 #define IRDMAQP_TERM_SEND_TERM_ONLY 1 #define IRDMAQP_TERM_SEND_FIN_ONLY 2 #define IRDMAQP_TERM_DONOT_SEND_TERM_OR_FIN 3 #define IRDMA_QP_TYPE_IWARP 1 #define IRDMA_QP_TYPE_UDA 2 #define IRDMA_QP_TYPE_ROCE_RC 3 #define IRDMA_QP_TYPE_ROCE_UD 4 #define IRDMA_HW_PAGE_SIZE 4096 #define IRDMA_HW_PAGE_SHIFT 12 #define IRDMA_CQE_QTYPE_RQ 0 #define IRDMA_CQE_QTYPE_SQ 1 #define IRDMA_QP_SW_MIN_WQSIZE 8 /* in WRs*/ #define IRDMA_QP_WQE_MIN_SIZE 32 #define IRDMA_QP_WQE_MAX_SIZE 256 #define IRDMA_QP_WQE_MIN_QUANTA 1 #define IRDMA_MAX_RQ_WQE_SHIFT_GEN1 2 #define IRDMA_MAX_RQ_WQE_SHIFT_GEN2 3 #define IRDMA_SQ_RSVD 258 #define IRDMA_RQ_RSVD 1 #define IRDMA_FEATURE_RTS_AE BIT_ULL(0) #define IRDMA_FEATURE_CQ_RESIZE BIT_ULL(1) #define IRDMA_FEATURE_RELAX_RQ_ORDER BIT_ULL(2) #define IRDMA_FEATURE_64_BYTE_CQE BIT_ULL(5) #define IRDMAQP_OP_RDMA_WRITE 0x00 #define IRDMAQP_OP_RDMA_READ 0x01 #define IRDMAQP_OP_RDMA_SEND 0x03 #define IRDMAQP_OP_RDMA_SEND_INV 0x04 #define IRDMAQP_OP_RDMA_SEND_SOL_EVENT 0x05 #define IRDMAQP_OP_RDMA_SEND_SOL_EVENT_INV 0x06 #define IRDMAQP_OP_BIND_MW 0x08 #define IRDMAQP_OP_FAST_REGISTER 0x09 #define IRDMAQP_OP_LOCAL_INVALIDATE 0x0a #define IRDMAQP_OP_RDMA_READ_LOC_INV 0x0b #define IRDMAQP_OP_NOP 0x0c #define IRDMAQP_OP_RDMA_WRITE_SOL 0x0d #define IRDMAQP_OP_GEN_RTS_AE 0x30 enum irdma_cqp_op_type { IRDMA_OP_CEQ_DESTROY = 1, IRDMA_OP_AEQ_DESTROY = 2, IRDMA_OP_DELETE_ARP_CACHE_ENTRY = 3, IRDMA_OP_MANAGE_APBVT_ENTRY = 4, IRDMA_OP_CEQ_CREATE = 5, IRDMA_OP_AEQ_CREATE = 6, IRDMA_OP_MANAGE_QHASH_TABLE_ENTRY = 7, IRDMA_OP_QP_MODIFY = 8, IRDMA_OP_QP_UPLOAD_CONTEXT = 9, IRDMA_OP_CQ_CREATE = 10, IRDMA_OP_CQ_DESTROY = 11, IRDMA_OP_QP_CREATE = 12, IRDMA_OP_QP_DESTROY = 13, IRDMA_OP_ALLOC_STAG = 14, IRDMA_OP_MR_REG_NON_SHARED = 15, IRDMA_OP_DEALLOC_STAG = 16, IRDMA_OP_MW_ALLOC = 17, IRDMA_OP_QP_FLUSH_WQES = 18, IRDMA_OP_ADD_ARP_CACHE_ENTRY = 19, IRDMA_OP_MANAGE_PUSH_PAGE = 20, IRDMA_OP_UPDATE_PE_SDS = 21, IRDMA_OP_MANAGE_HMC_PM_FUNC_TABLE = 22, IRDMA_OP_SUSPEND = 23, IRDMA_OP_RESUME = 24, - IRDMA_OP_MANAGE_VF_PBLE_BP = 25, + IRDMA_OP_MANAGE_VCHNL_REQ_PBLE_BP = 25, IRDMA_OP_QUERY_FPM_VAL = 26, IRDMA_OP_COMMIT_FPM_VAL = 27, - IRDMA_OP_REQ_CMDS = 28, - IRDMA_OP_CMPL_CMDS = 29, - IRDMA_OP_AH_CREATE = 30, - IRDMA_OP_AH_MODIFY = 31, - IRDMA_OP_AH_DESTROY = 32, - IRDMA_OP_MC_CREATE = 33, - IRDMA_OP_MC_DESTROY = 34, - IRDMA_OP_MC_MODIFY = 35, - IRDMA_OP_STATS_ALLOCATE = 36, - IRDMA_OP_STATS_FREE = 37, - IRDMA_OP_STATS_GATHER = 38, - IRDMA_OP_WS_ADD_NODE = 39, - IRDMA_OP_WS_MODIFY_NODE = 40, - IRDMA_OP_WS_DELETE_NODE = 41, - IRDMA_OP_WS_FAILOVER_START = 42, - IRDMA_OP_WS_FAILOVER_COMPLETE = 43, - IRDMA_OP_SET_UP_MAP = 44, - IRDMA_OP_GEN_AE = 45, - IRDMA_OP_QUERY_RDMA_FEATURES = 46, - IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY = 47, - IRDMA_OP_ADD_LOCAL_MAC_ENTRY = 48, - IRDMA_OP_DELETE_LOCAL_MAC_ENTRY = 49, - IRDMA_OP_CQ_MODIFY = 50, + IRDMA_OP_AH_CREATE = 28, + IRDMA_OP_AH_MODIFY = 29, + IRDMA_OP_AH_DESTROY = 30, + IRDMA_OP_MC_CREATE = 31, + IRDMA_OP_MC_DESTROY = 32, + IRDMA_OP_MC_MODIFY = 33, + IRDMA_OP_STATS_ALLOCATE = 34, + IRDMA_OP_STATS_FREE = 35, + IRDMA_OP_STATS_GATHER = 36, + IRDMA_OP_WS_ADD_NODE = 37, + IRDMA_OP_WS_MODIFY_NODE = 38, + IRDMA_OP_WS_DELETE_NODE = 39, + IRDMA_OP_WS_FAILOVER_START = 40, + IRDMA_OP_WS_FAILOVER_COMPLETE = 41, + IRDMA_OP_SET_UP_MAP = 42, + IRDMA_OP_GEN_AE = 43, + IRDMA_OP_QUERY_RDMA_FEATURES = 44, + IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY = 45, + IRDMA_OP_ADD_LOCAL_MAC_ENTRY = 46, + IRDMA_OP_DELETE_LOCAL_MAC_ENTRY = 47, + IRDMA_OP_CQ_MODIFY = 48, /* Must be last entry */ - IRDMA_MAX_CQP_OPS = 51, + IRDMA_MAX_CQP_OPS = 49, }; /* CQP SQ WQES */ -#define IRDMA_CQP_OP_CREATE_QP 0 -#define IRDMA_CQP_OP_MODIFY_QP 0x1 +#define IRDMA_CQP_OP_CREATE_QP 0x00 +#define IRDMA_CQP_OP_MODIFY_QP 0x01 #define IRDMA_CQP_OP_DESTROY_QP 0x02 #define IRDMA_CQP_OP_CREATE_CQ 0x03 #define IRDMA_CQP_OP_MODIFY_CQ 0x04 #define IRDMA_CQP_OP_DESTROY_CQ 0x05 #define IRDMA_CQP_OP_ALLOC_STAG 0x09 #define IRDMA_CQP_OP_REG_MR 0x0a #define IRDMA_CQP_OP_QUERY_STAG 0x0b #define IRDMA_CQP_OP_REG_SMR 0x0c #define IRDMA_CQP_OP_DEALLOC_STAG 0x0d #define IRDMA_CQP_OP_MANAGE_LOC_MAC_TABLE 0x0e #define IRDMA_CQP_OP_MANAGE_ARP 0x0f -#define IRDMA_CQP_OP_MANAGE_VF_PBLE_BP 0x10 +#define IRDMA_CQP_OP_MANAGE_VCHNL_REQ_PBLE_BP 0x10 #define IRDMA_CQP_OP_MANAGE_PUSH_PAGES 0x11 #define IRDMA_CQP_OP_QUERY_RDMA_FEATURES 0x12 #define IRDMA_CQP_OP_UPLOAD_CONTEXT 0x13 #define IRDMA_CQP_OP_ALLOCATE_LOC_MAC_TABLE_ENTRY 0x14 -#define IRDMA_CQP_OP_UPLOAD_CONTEXT 0x13 #define IRDMA_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE 0x15 #define IRDMA_CQP_OP_CREATE_CEQ 0x16 #define IRDMA_CQP_OP_DESTROY_CEQ 0x18 #define IRDMA_CQP_OP_CREATE_AEQ 0x19 #define IRDMA_CQP_OP_DESTROY_AEQ 0x1b #define IRDMA_CQP_OP_CREATE_ADDR_HANDLE 0x1c #define IRDMA_CQP_OP_MODIFY_ADDR_HANDLE 0x1d #define IRDMA_CQP_OP_DESTROY_ADDR_HANDLE 0x1e #define IRDMA_CQP_OP_UPDATE_PE_SDS 0x1f #define IRDMA_CQP_OP_QUERY_FPM_VAL 0x20 #define IRDMA_CQP_OP_COMMIT_FPM_VAL 0x21 #define IRDMA_CQP_OP_FLUSH_WQES 0x22 /* IRDMA_CQP_OP_GEN_AE is the same value as IRDMA_CQP_OP_FLUSH_WQES */ #define IRDMA_CQP_OP_GEN_AE 0x22 #define IRDMA_CQP_OP_MANAGE_APBVT 0x23 #define IRDMA_CQP_OP_NOP 0x24 #define IRDMA_CQP_OP_MANAGE_QUAD_HASH_TABLE_ENTRY 0x25 #define IRDMA_CQP_OP_CREATE_MCAST_GRP 0x26 #define IRDMA_CQP_OP_MODIFY_MCAST_GRP 0x27 #define IRDMA_CQP_OP_DESTROY_MCAST_GRP 0x28 #define IRDMA_CQP_OP_SUSPEND_QP 0x29 #define IRDMA_CQP_OP_RESUME_QP 0x2a #define IRDMA_CQP_OP_SHMC_PAGES_ALLOCATED 0x2b #define IRDMA_CQP_OP_WORK_SCHED_NODE 0x2c #define IRDMA_CQP_OP_MANAGE_STATS 0x2d #define IRDMA_CQP_OP_GATHER_STATS 0x2e #define IRDMA_CQP_OP_UP_MAP 0x2f #ifndef LS_64_1 #define LS_64_1(val, bits) ((u64)(uintptr_t)(val) << (bits)) #define RS_64_1(val, bits) ((u64)(uintptr_t)(val) >> (bits)) #define LS_32_1(val, bits) ((u32)((val) << (bits))) #define RS_32_1(val, bits) ((u32)((val) >> (bits))) #endif #ifndef GENMASK_ULL #define GENMASK_ULL(high, low) ((0xFFFFFFFFFFFFFFFFULL >> (64ULL - ((high) - (low) + 1ULL))) << (low)) #endif /* GENMASK_ULL */ #ifndef GENMASK #define GENMASK(high, low) ((0xFFFFFFFFUL >> (32UL - ((high) - (low) + 1UL))) << (low)) #endif /* GENMASK */ #ifndef FIELD_PREP #define FIELD_PREP(mask, val) (((u64)(val) << mask##_S) & (mask)) #define FIELD_GET(mask, val) (((val) & mask) >> mask##_S) #endif /* FIELD_PREP */ #define FLD_LS_64(dev, val, field) \ (((u64)(val) << (dev)->hw_shifts[field ## _S]) & (dev)->hw_masks[field ## _M]) #define FLD_RS_64(dev, val, field) \ ((u64)((val) & (dev)->hw_masks[field ## _M]) >> (dev)->hw_shifts[field ## _S]) #define FLD_LS_32(dev, val, field) \ (((val) << (dev)->hw_shifts[field ## _S]) & (dev)->hw_masks[field ## _M]) #define FLD_RS_32(dev, val, field) \ ((u64)((val) & (dev)->hw_masks[field ## _M]) >> (dev)->hw_shifts[field ## _S]) #define IRDMA_MAX_STATS_16 0xffffULL #define IRDMA_MAX_STATS_24 0xffffffULL #define IRDMA_MAX_STATS_32 0xffffffffULL #define IRDMA_MAX_STATS_48 0xffffffffffffULL #define IRDMA_MAX_STATS_56 0xffffffffffffffULL #define IRDMA_MAX_STATS_64 0xffffffffffffffffULL #define IRDMA_MAX_CQ_READ_THRESH 0x3FFFF #define IRDMA_CQPSQ_QHASH_VLANID_S 32 #define IRDMA_CQPSQ_QHASH_VLANID GENMASK_ULL(43, 32) #define IRDMA_CQPSQ_QHASH_QPN_S 32 #define IRDMA_CQPSQ_QHASH_QPN GENMASK_ULL(49, 32) #define IRDMA_CQPSQ_QHASH_QS_HANDLE_S 0 #define IRDMA_CQPSQ_QHASH_QS_HANDLE GENMASK_ULL(9, 0) #define IRDMA_CQPSQ_QHASH_SRC_PORT_S 16 #define IRDMA_CQPSQ_QHASH_SRC_PORT GENMASK_ULL(31, 16) #define IRDMA_CQPSQ_QHASH_DEST_PORT_S 0 #define IRDMA_CQPSQ_QHASH_DEST_PORT GENMASK_ULL(15, 0) #define IRDMA_CQPSQ_QHASH_ADDR0_S 32 #define IRDMA_CQPSQ_QHASH_ADDR0 GENMASK_ULL(63, 32) #define IRDMA_CQPSQ_QHASH_ADDR1_S 0 #define IRDMA_CQPSQ_QHASH_ADDR1 GENMASK_ULL(31, 0) #define IRDMA_CQPSQ_QHASH_ADDR2_S 32 #define IRDMA_CQPSQ_QHASH_ADDR2 GENMASK_ULL(63, 32) #define IRDMA_CQPSQ_QHASH_ADDR3_S 0 #define IRDMA_CQPSQ_QHASH_ADDR3 GENMASK_ULL(31, 0) #define IRDMA_CQPSQ_QHASH_WQEVALID_S 63 #define IRDMA_CQPSQ_QHASH_WQEVALID BIT_ULL(63) #define IRDMA_CQPSQ_QHASH_OPCODE_S 32 #define IRDMA_CQPSQ_QHASH_OPCODE GENMASK_ULL(37, 32) #define IRDMA_CQPSQ_QHASH_MANAGE_S 61 #define IRDMA_CQPSQ_QHASH_MANAGE GENMASK_ULL(62, 61) #define IRDMA_CQPSQ_QHASH_IPV4VALID_S 60 #define IRDMA_CQPSQ_QHASH_IPV4VALID BIT_ULL(60) #define IRDMA_CQPSQ_QHASH_VLANVALID_S 59 #define IRDMA_CQPSQ_QHASH_VLANVALID BIT_ULL(59) #define IRDMA_CQPSQ_QHASH_ENTRYTYPE_S 42 #define IRDMA_CQPSQ_QHASH_ENTRYTYPE GENMASK_ULL(44, 42) #define IRDMA_CQPSQ_STATS_WQEVALID_S 63 #define IRDMA_CQPSQ_STATS_WQEVALID BIT_ULL(63) #define IRDMA_CQPSQ_STATS_ALLOC_INST_S 62 #define IRDMA_CQPSQ_STATS_ALLOC_INST BIT_ULL(62) #define IRDMA_CQPSQ_STATS_USE_HMC_FCN_INDEX_S 60 #define IRDMA_CQPSQ_STATS_USE_HMC_FCN_INDEX BIT_ULL(60) #define IRDMA_CQPSQ_STATS_USE_INST_S 61 #define IRDMA_CQPSQ_STATS_USE_INST BIT_ULL(61) #define IRDMA_CQPSQ_STATS_OP_S 32 #define IRDMA_CQPSQ_STATS_OP GENMASK_ULL(37, 32) #define IRDMA_CQPSQ_STATS_INST_INDEX_S 0 #define IRDMA_CQPSQ_STATS_INST_INDEX GENMASK_ULL(6, 0) #define IRDMA_CQPSQ_STATS_HMC_FCN_INDEX_S 0 #define IRDMA_CQPSQ_STATS_HMC_FCN_INDEX GENMASK_ULL(15, 0) #define IRDMA_CQPSQ_WS_WQEVALID_S 63 #define IRDMA_CQPSQ_WS_WQEVALID BIT_ULL(63) #define IRDMA_CQPSQ_WS_NODEOP_S 52 #define IRDMA_CQPSQ_WS_NODEOP GENMASK_ULL(55, 52) #define IRDMA_CQPSQ_WS_ENABLENODE_S 62 #define IRDMA_CQPSQ_WS_ENABLENODE BIT_ULL(62) #define IRDMA_CQPSQ_WS_NODETYPE_S 61 #define IRDMA_CQPSQ_WS_NODETYPE BIT_ULL(61) #define IRDMA_CQPSQ_WS_PRIOTYPE_S 59 #define IRDMA_CQPSQ_WS_PRIOTYPE GENMASK_ULL(60, 59) #define IRDMA_CQPSQ_WS_TC_S 56 #define IRDMA_CQPSQ_WS_TC GENMASK_ULL(58, 56) #define IRDMA_CQPSQ_WS_VMVFTYPE_S 54 #define IRDMA_CQPSQ_WS_VMVFTYPE GENMASK_ULL(55, 54) #define IRDMA_CQPSQ_WS_VMVFNUM_S 42 #define IRDMA_CQPSQ_WS_VMVFNUM GENMASK_ULL(51, 42) #define IRDMA_CQPSQ_WS_OP_S 32 #define IRDMA_CQPSQ_WS_OP GENMASK_ULL(37, 32) #define IRDMA_CQPSQ_WS_PARENTID_S 16 #define IRDMA_CQPSQ_WS_PARENTID GENMASK_ULL(25, 16) #define IRDMA_CQPSQ_WS_NODEID_S 0 #define IRDMA_CQPSQ_WS_NODEID GENMASK_ULL(9, 0) #define IRDMA_CQPSQ_WS_VSI_S 48 #define IRDMA_CQPSQ_WS_VSI GENMASK_ULL(57, 48) #define IRDMA_CQPSQ_WS_WEIGHT_S 32 #define IRDMA_CQPSQ_WS_WEIGHT GENMASK_ULL(38, 32) #define IRDMA_CQPSQ_UP_WQEVALID_S 63 #define IRDMA_CQPSQ_UP_WQEVALID BIT_ULL(63) #define IRDMA_CQPSQ_UP_USEVLAN_S 62 #define IRDMA_CQPSQ_UP_USEVLAN BIT_ULL(62) #define IRDMA_CQPSQ_UP_USEOVERRIDE_S 61 #define IRDMA_CQPSQ_UP_USEOVERRIDE BIT_ULL(61) #define IRDMA_CQPSQ_UP_OP_S 32 #define IRDMA_CQPSQ_UP_OP GENMASK_ULL(37, 32) #define IRDMA_CQPSQ_UP_HMCFCNIDX_S 0 #define IRDMA_CQPSQ_UP_HMCFCNIDX GENMASK_ULL(5, 0) #define IRDMA_CQPSQ_UP_CNPOVERRIDE_S 32 #define IRDMA_CQPSQ_UP_CNPOVERRIDE GENMASK_ULL(37, 32) #define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_WQEVALID_S 63 #define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_WQEVALID BIT_ULL(63) #define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_BUF_LEN_S 0 #define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_BUF_LEN GENMASK_ULL(31, 0) #define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_OP_S 32 #define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_OP GENMASK_ULL(37, 32) #define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_HW_MODEL_USED_S 32 #define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_HW_MODEL_USED GENMASK_ULL(47, 32) #define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_HW_MAJOR_VERSION_S 16 #define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_HW_MAJOR_VERSION GENMASK_ULL(23, 16) #define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_HW_MINOR_VERSION_S 0 #define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_HW_MINOR_VERSION GENMASK_ULL(7, 0) #define IRDMA_CQPHC_SQSIZE_S 8 #define IRDMA_CQPHC_SQSIZE GENMASK_ULL(11, 8) #define IRDMA_CQPHC_DISABLE_PFPDUS_S 1 #define IRDMA_CQPHC_DISABLE_PFPDUS BIT_ULL(1) #define IRDMA_CQPHC_ROCEV2_RTO_POLICY_S 2 #define IRDMA_CQPHC_ROCEV2_RTO_POLICY BIT_ULL(2) #define IRDMA_CQPHC_PROTOCOL_USED_S 3 #define IRDMA_CQPHC_PROTOCOL_USED GENMASK_ULL(4, 3) #define IRDMA_CQPHC_MIN_RATE_S 48 #define IRDMA_CQPHC_MIN_RATE GENMASK_ULL(51, 48) #define IRDMA_CQPHC_MIN_DEC_FACTOR_S 56 #define IRDMA_CQPHC_MIN_DEC_FACTOR GENMASK_ULL(59, 56) #define IRDMA_CQPHC_DCQCN_T_S 0 #define IRDMA_CQPHC_DCQCN_T GENMASK_ULL(15, 0) #define IRDMA_CQPHC_HAI_FACTOR_S 32 #define IRDMA_CQPHC_HAI_FACTOR GENMASK_ULL(47, 32) #define IRDMA_CQPHC_RAI_FACTOR_S 48 #define IRDMA_CQPHC_RAI_FACTOR GENMASK_ULL(63, 48) #define IRDMA_CQPHC_DCQCN_B_S 0 #define IRDMA_CQPHC_DCQCN_B GENMASK_ULL(24, 0) #define IRDMA_CQPHC_DCQCN_F_S 25 #define IRDMA_CQPHC_DCQCN_F GENMASK_ULL(27, 25) #define IRDMA_CQPHC_CC_CFG_VALID_S 31 #define IRDMA_CQPHC_CC_CFG_VALID BIT_ULL(31) #define IRDMA_CQPHC_RREDUCE_MPERIOD_S 32 #define IRDMA_CQPHC_RREDUCE_MPERIOD GENMASK_ULL(63, 32) #define IRDMA_CQPHC_HW_MINVER_S 0 #define IRDMA_CQPHC_HW_MINVER GENMASK_ULL(15, 0) #define IRDMA_CQPHC_HW_MAJVER_GEN_1 0 #define IRDMA_CQPHC_HW_MAJVER_GEN_2 1 #define IRDMA_CQPHC_HW_MAJVER_GEN_3 2 #define IRDMA_CQPHC_HW_MAJVER_S 16 #define IRDMA_CQPHC_HW_MAJVER GENMASK_ULL(31, 16) #define IRDMA_CQPHC_CEQPERVF_S 32 #define IRDMA_CQPHC_CEQPERVF GENMASK_ULL(39, 32) #define IRDMA_CQPHC_EN_REM_ENDPOINT_TRK_S 3 #define IRDMA_CQPHC_EN_REM_ENDPOINT_TRK BIT_ULL(3) #define IRDMA_CQPHC_ENABLED_VFS_S 32 #define IRDMA_CQPHC_ENABLED_VFS GENMASK_ULL(37, 32) #define IRDMA_CQPHC_HMC_PROFILE_S 0 #define IRDMA_CQPHC_HMC_PROFILE GENMASK_ULL(2, 0) #define IRDMA_CQPHC_SVER_S 24 #define IRDMA_CQPHC_SVER GENMASK_ULL(31, 24) #define IRDMA_CQPHC_SQBASE_S 9 #define IRDMA_CQPHC_SQBASE GENMASK_ULL(63, 9) #define IRDMA_CQPHC_QPCTX_S 0 #define IRDMA_CQPHC_QPCTX GENMASK_ULL(63, 0) #define IRDMA_QP_DBSA_HW_SQ_TAIL_S 0 #define IRDMA_QP_DBSA_HW_SQ_TAIL GENMASK_ULL(14, 0) #define IRDMA_CQ_DBSA_CQEIDX_S 0 #define IRDMA_CQ_DBSA_CQEIDX GENMASK_ULL(19, 0) #define IRDMA_CQ_DBSA_SW_CQ_SELECT_S 0 #define IRDMA_CQ_DBSA_SW_CQ_SELECT GENMASK_ULL(13, 0) #define IRDMA_CQ_DBSA_ARM_NEXT_S 14 #define IRDMA_CQ_DBSA_ARM_NEXT BIT_ULL(14) #define IRDMA_CQ_DBSA_ARM_NEXT_SE_S 15 #define IRDMA_CQ_DBSA_ARM_NEXT_SE BIT_ULL(15) #define IRDMA_CQ_DBSA_ARM_SEQ_NUM_S 16 #define IRDMA_CQ_DBSA_ARM_SEQ_NUM GENMASK_ULL(17, 16) /* CQP and iWARP Completion Queue */ #define IRDMA_CQ_QPCTX_S IRDMA_CQPHC_QPCTX_S #define IRDMA_CQ_QPCTX IRDMA_CQPHC_QPCTX #define IRDMA_CCQ_OPRETVAL_S 0 #define IRDMA_CCQ_OPRETVAL GENMASK_ULL(31, 0) #define IRDMA_CQ_MINERR_S 0 #define IRDMA_CQ_MINERR GENMASK_ULL(15, 0) #define IRDMA_CQ_MAJERR_S 16 #define IRDMA_CQ_MAJERR GENMASK_ULL(31, 16) #define IRDMA_CQ_WQEIDX_S 32 #define IRDMA_CQ_WQEIDX GENMASK_ULL(46, 32) #define IRDMA_CQ_EXTCQE_S 50 #define IRDMA_CQ_EXTCQE BIT_ULL(50) #define IRDMA_OOO_CMPL_S 54 #define IRDMA_OOO_CMPL BIT_ULL(54) #define IRDMA_CQ_ERROR_S 55 #define IRDMA_CQ_ERROR BIT_ULL(55) #define IRDMA_CQ_SQ_S 62 #define IRDMA_CQ_SQ BIT_ULL(62) #define IRDMA_CQ_VALID_S 63 #define IRDMA_CQ_VALID BIT_ULL(63) #define IRDMA_CQ_IMMVALID BIT_ULL(62) #define IRDMA_CQ_UDSMACVALID_S 61 #define IRDMA_CQ_UDSMACVALID BIT_ULL(61) #define IRDMA_CQ_UDVLANVALID_S 60 #define IRDMA_CQ_UDVLANVALID BIT_ULL(60) #define IRDMA_CQ_UDSMAC_S 0 #define IRDMA_CQ_UDSMAC GENMASK_ULL(47, 0) #define IRDMA_CQ_UDVLAN_S 48 #define IRDMA_CQ_UDVLAN GENMASK_ULL(63, 48) #define IRDMA_CQ_IMMDATA_S 0 #define IRDMA_CQ_IMMVALID_S 62 #define IRDMA_CQ_IMMDATA GENMASK_ULL(125, 62) #define IRDMA_CQ_IMMDATALOW32_S 0 #define IRDMA_CQ_IMMDATALOW32 GENMASK_ULL(31, 0) #define IRDMA_CQ_IMMDATAUP32_S 32 #define IRDMA_CQ_IMMDATAUP32 GENMASK_ULL(63, 32) #define IRDMACQ_PAYLDLEN_S 0 #define IRDMACQ_PAYLDLEN GENMASK_ULL(31, 0) #define IRDMACQ_TCPSQN_ROCEPSN_RTT_TS_S 32 #define IRDMACQ_TCPSQN_ROCEPSN_RTT_TS GENMASK_ULL(63, 32) #define IRDMACQ_INVSTAG_S 0 #define IRDMACQ_INVSTAG GENMASK_ULL(31, 0) #define IRDMACQ_QPID_S 32 #define IRDMACQ_QPID GENMASK_ULL(55, 32) #define IRDMACQ_UDSRCQPN_S 0 #define IRDMACQ_UDSRCQPN GENMASK_ULL(31, 0) #define IRDMACQ_PSHDROP_S 51 #define IRDMACQ_PSHDROP BIT_ULL(51) #define IRDMACQ_STAG_S 53 #define IRDMACQ_STAG BIT_ULL(53) #define IRDMACQ_IPV4_S 53 #define IRDMACQ_IPV4 BIT_ULL(53) #define IRDMACQ_SOEVENT_S 54 #define IRDMACQ_SOEVENT BIT_ULL(54) #define IRDMACQ_OP_S 56 #define IRDMACQ_OP GENMASK_ULL(61, 56) #define IRDMA_CEQE_CQCTX_S 0 #define IRDMA_CEQE_CQCTX GENMASK_ULL(62, 0) #define IRDMA_CEQE_VALID_S 63 #define IRDMA_CEQE_VALID BIT_ULL(63) /* AEQE format */ #define IRDMA_AEQE_COMPCTX_S IRDMA_CQPHC_QPCTX_S #define IRDMA_AEQE_COMPCTX IRDMA_CQPHC_QPCTX #define IRDMA_AEQE_QPCQID_LOW_S 0 #define IRDMA_AEQE_QPCQID_LOW GENMASK_ULL(17, 0) #define IRDMA_AEQE_QPCQID_HI_S 46 #define IRDMA_AEQE_QPCQID_HI BIT_ULL(46) #define IRDMA_AEQE_WQDESCIDX_S 18 #define IRDMA_AEQE_WQDESCIDX GENMASK_ULL(32, 18) #define IRDMA_AEQE_OVERFLOW_S 33 #define IRDMA_AEQE_OVERFLOW BIT_ULL(33) #define IRDMA_AEQE_AECODE_S 34 #define IRDMA_AEQE_AECODE GENMASK_ULL(45, 34) #define IRDMA_AEQE_AESRC_S 50 #define IRDMA_AEQE_AESRC GENMASK_ULL(53, 50) #define IRDMA_AEQE_IWSTATE_S 54 #define IRDMA_AEQE_IWSTATE GENMASK_ULL(56, 54) #define IRDMA_AEQE_TCPSTATE_S 57 #define IRDMA_AEQE_TCPSTATE GENMASK_ULL(60, 57) #define IRDMA_AEQE_Q2DATA_S 61 #define IRDMA_AEQE_Q2DATA GENMASK_ULL(62, 61) #define IRDMA_AEQE_VALID_S 63 #define IRDMA_AEQE_VALID BIT_ULL(63) #define IRDMA_UDA_QPSQ_NEXT_HDR_S 16 #define IRDMA_UDA_QPSQ_NEXT_HDR GENMASK_ULL(23, 16) #define IRDMA_UDA_QPSQ_OPCODE_S 32 #define IRDMA_UDA_QPSQ_OPCODE GENMASK_ULL(37, 32) #define IRDMA_UDA_QPSQ_L4LEN_S 42 #define IRDMA_UDA_QPSQ_L4LEN GENMASK_ULL(45, 42) #define IRDMA_GEN1_UDA_QPSQ_L4LEN_S 24 #define IRDMA_GEN1_UDA_QPSQ_L4LEN GENMASK_ULL(27, 24) #define IRDMA_UDA_QPSQ_AHIDX_S 0 #define IRDMA_UDA_QPSQ_AHIDX GENMASK_ULL(16, 0) #define IRDMA_UDA_QPSQ_VALID_S 63 #define IRDMA_UDA_QPSQ_VALID BIT_ULL(63) #define IRDMA_UDA_QPSQ_SIGCOMPL_S 62 #define IRDMA_UDA_QPSQ_SIGCOMPL BIT_ULL(62) #define IRDMA_UDA_QPSQ_MACLEN_S 56 #define IRDMA_UDA_QPSQ_MACLEN GENMASK_ULL(62, 56) #define IRDMA_UDA_QPSQ_IPLEN_S 48 #define IRDMA_UDA_QPSQ_IPLEN GENMASK_ULL(54, 48) #define IRDMA_UDA_QPSQ_L4T_S 30 #define IRDMA_UDA_QPSQ_L4T GENMASK_ULL(31, 30) #define IRDMA_UDA_QPSQ_IIPT_S 28 #define IRDMA_UDA_QPSQ_IIPT GENMASK_ULL(29, 28) #define IRDMA_UDA_PAYLOADLEN_S 0 #define IRDMA_UDA_PAYLOADLEN GENMASK_ULL(13, 0) #define IRDMA_UDA_HDRLEN_S 16 #define IRDMA_UDA_HDRLEN GENMASK_ULL(24, 16) #define IRDMA_VLAN_TAG_VALID_S 50 #define IRDMA_VLAN_TAG_VALID BIT_ULL(50) #define IRDMA_UDA_L3PROTO_S 0 #define IRDMA_UDA_L3PROTO GENMASK_ULL(1, 0) #define IRDMA_UDA_L4PROTO_S 16 #define IRDMA_UDA_L4PROTO GENMASK_ULL(17, 16) #define IRDMA_UDA_QPSQ_DOLOOPBACK_S 44 #define IRDMA_UDA_QPSQ_DOLOOPBACK BIT_ULL(44) #define IRDMA_CQPSQ_BUFSIZE_S 0 #define IRDMA_CQPSQ_BUFSIZE GENMASK_ULL(31, 0) #define IRDMA_CQPSQ_OPCODE_S 32 #define IRDMA_CQPSQ_OPCODE GENMASK_ULL(37, 32) #define IRDMA_CQPSQ_WQEVALID_S 63 #define IRDMA_CQPSQ_WQEVALID BIT_ULL(63) #define IRDMA_CQPSQ_TPHVAL_S 0 #define IRDMA_CQPSQ_TPHVAL GENMASK_ULL(7, 0) #define IRDMA_CQPSQ_VSIIDX_S 8 #define IRDMA_CQPSQ_VSIIDX GENMASK_ULL(17, 8) #define IRDMA_CQPSQ_TPHEN_S 60 #define IRDMA_CQPSQ_TPHEN BIT_ULL(60) #define IRDMA_CQPSQ_PBUFADDR_S IRDMA_CQPHC_QPCTX_S #define IRDMA_CQPSQ_PBUFADDR IRDMA_CQPHC_QPCTX /* Create/Modify/Destroy QP */ #define IRDMA_CQPSQ_QP_NEWMSS_S 32 #define IRDMA_CQPSQ_QP_NEWMSS GENMASK_ULL(45, 32) #define IRDMA_CQPSQ_QP_TERMLEN_S 48 #define IRDMA_CQPSQ_QP_TERMLEN GENMASK_ULL(51, 48) #define IRDMA_CQPSQ_QP_QPCTX_S IRDMA_CQPHC_QPCTX_S #define IRDMA_CQPSQ_QP_QPCTX IRDMA_CQPHC_QPCTX #define IRDMA_CQPSQ_QP_QPID_S 0 #define IRDMA_CQPSQ_QP_QPID_M (0xFFFFFFUL) #define IRDMA_CQPSQ_QP_OP_S 32 #define IRDMA_CQPSQ_QP_OP_M IRDMACQ_OP_M #define IRDMA_CQPSQ_QP_ORDVALID_S 42 #define IRDMA_CQPSQ_QP_ORDVALID BIT_ULL(42) #define IRDMA_CQPSQ_QP_TOECTXVALID_S 43 #define IRDMA_CQPSQ_QP_TOECTXVALID BIT_ULL(43) #define IRDMA_CQPSQ_QP_CACHEDVARVALID_S 44 #define IRDMA_CQPSQ_QP_CACHEDVARVALID BIT_ULL(44) #define IRDMA_CQPSQ_QP_VQ_S 45 #define IRDMA_CQPSQ_QP_VQ BIT_ULL(45) #define IRDMA_CQPSQ_QP_FORCELOOPBACK_S 46 #define IRDMA_CQPSQ_QP_FORCELOOPBACK BIT_ULL(46) #define IRDMA_CQPSQ_QP_CQNUMVALID_S 47 #define IRDMA_CQPSQ_QP_CQNUMVALID BIT_ULL(47) #define IRDMA_CQPSQ_QP_QPTYPE_S 48 #define IRDMA_CQPSQ_QP_QPTYPE GENMASK_ULL(50, 48) #define IRDMA_CQPSQ_QP_MACVALID_S 51 #define IRDMA_CQPSQ_QP_MACVALID BIT_ULL(51) #define IRDMA_CQPSQ_QP_MSSCHANGE_S 52 #define IRDMA_CQPSQ_QP_MSSCHANGE BIT_ULL(52) - #define IRDMA_CQPSQ_QP_IGNOREMWBOUND_S 54 #define IRDMA_CQPSQ_QP_IGNOREMWBOUND BIT_ULL(54) #define IRDMA_CQPSQ_QP_REMOVEHASHENTRY_S 55 #define IRDMA_CQPSQ_QP_REMOVEHASHENTRY BIT_ULL(55) #define IRDMA_CQPSQ_QP_TERMACT_S 56 #define IRDMA_CQPSQ_QP_TERMACT GENMASK_ULL(57, 56) #define IRDMA_CQPSQ_QP_RESETCON_S 58 #define IRDMA_CQPSQ_QP_RESETCON BIT_ULL(58) #define IRDMA_CQPSQ_QP_ARPTABIDXVALID_S 59 #define IRDMA_CQPSQ_QP_ARPTABIDXVALID BIT_ULL(59) #define IRDMA_CQPSQ_QP_NEXTIWSTATE_S 60 #define IRDMA_CQPSQ_QP_NEXTIWSTATE GENMASK_ULL(62, 60) #define IRDMA_CQPSQ_QP_DBSHADOWADDR_S IRDMA_CQPHC_QPCTX_S #define IRDMA_CQPSQ_QP_DBSHADOWADDR IRDMA_CQPHC_QPCTX #define IRDMA_CQPSQ_CQ_CQSIZE_S 0 #define IRDMA_CQPSQ_CQ_CQSIZE GENMASK_ULL(20, 0) #define IRDMA_CQPSQ_CQ_CQCTX_S 0 #define IRDMA_CQPSQ_CQ_CQCTX GENMASK_ULL(62, 0) #define IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD_S 0 #define IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD GENMASK(17, 0) #define IRDMA_CQPSQ_CQ_OP_S 32 #define IRDMA_CQPSQ_CQ_OP GENMASK_ULL(37, 32) #define IRDMA_CQPSQ_CQ_CQRESIZE_S 43 #define IRDMA_CQPSQ_CQ_CQRESIZE BIT_ULL(43) #define IRDMA_CQPSQ_CQ_LPBLSIZE_S 44 #define IRDMA_CQPSQ_CQ_LPBLSIZE GENMASK_ULL(45, 44) #define IRDMA_CQPSQ_CQ_CHKOVERFLOW_S 46 #define IRDMA_CQPSQ_CQ_CHKOVERFLOW BIT_ULL(46) #define IRDMA_CQPSQ_CQ_VIRTMAP_S 47 #define IRDMA_CQPSQ_CQ_VIRTMAP BIT_ULL(47) #define IRDMA_CQPSQ_CQ_ENCEQEMASK_S 48 #define IRDMA_CQPSQ_CQ_ENCEQEMASK BIT_ULL(48) #define IRDMA_CQPSQ_CQ_CEQIDVALID_S 49 #define IRDMA_CQPSQ_CQ_CEQIDVALID BIT_ULL(49) #define IRDMA_CQPSQ_CQ_AVOIDMEMCNFLCT_S 61 #define IRDMA_CQPSQ_CQ_AVOIDMEMCNFLCT BIT_ULL(61) #define IRDMA_CQPSQ_CQ_FIRSTPMPBLIDX_S 0 #define IRDMA_CQPSQ_CQ_FIRSTPMPBLIDX GENMASK_ULL(27, 0) /* Allocate/Register/Register Shared/Deallocate Stag */ #define IRDMA_CQPSQ_STAG_VA_FBO_S IRDMA_CQPHC_QPCTX_S #define IRDMA_CQPSQ_STAG_VA_FBO IRDMA_CQPHC_QPCTX #define IRDMA_CQPSQ_STAG_STAGLEN_S 0 #define IRDMA_CQPSQ_STAG_STAGLEN GENMASK_ULL(45, 0) #define IRDMA_CQPSQ_STAG_KEY_S 0 #define IRDMA_CQPSQ_STAG_KEY GENMASK_ULL(7, 0) #define IRDMA_CQPSQ_STAG_IDX_S 8 #define IRDMA_CQPSQ_STAG_IDX GENMASK_ULL(31, 8) #define IRDMA_CQPSQ_STAG_PARENTSTAGIDX_S 32 #define IRDMA_CQPSQ_STAG_PARENTSTAGIDX GENMASK_ULL(55, 32) #define IRDMA_CQPSQ_STAG_MR_S 43 #define IRDMA_CQPSQ_STAG_MR BIT_ULL(43) #define IRDMA_CQPSQ_STAG_MWTYPE_S 42 #define IRDMA_CQPSQ_STAG_MWTYPE BIT_ULL(42) #define IRDMA_CQPSQ_STAG_MW1_BIND_DONT_VLDT_KEY_S 58 #define IRDMA_CQPSQ_STAG_MW1_BIND_DONT_VLDT_KEY BIT_ULL(58) #define IRDMA_CQPSQ_STAG_LPBLSIZE_S IRDMA_CQPSQ_CQ_LPBLSIZE_S #define IRDMA_CQPSQ_STAG_LPBLSIZE_M IRDMA_CQPSQ_CQ_LPBLSIZE_M #define IRDMA_CQPSQ_STAG_LPBLSIZE IRDMA_CQPSQ_CQ_LPBLSIZE #define IRDMA_CQPSQ_STAG_HPAGESIZE_S 46 #define IRDMA_CQPSQ_STAG_HPAGESIZE GENMASK_ULL(47, 46) #define IRDMA_CQPSQ_STAG_ARIGHTS_S 48 #define IRDMA_CQPSQ_STAG_ARIGHTS GENMASK_ULL(52, 48) #define IRDMA_CQPSQ_STAG_REMACCENABLED_S 53 #define IRDMA_CQPSQ_STAG_REMACCENABLED BIT_ULL(53) #define IRDMA_CQPSQ_STAG_VABASEDTO_S 59 #define IRDMA_CQPSQ_STAG_VABASEDTO BIT_ULL(59) #define IRDMA_CQPSQ_STAG_USEHMCFNIDX_S 60 #define IRDMA_CQPSQ_STAG_USEHMCFNIDX BIT_ULL(60) #define IRDMA_CQPSQ_STAG_USEPFRID_S 61 #define IRDMA_CQPSQ_STAG_USEPFRID BIT_ULL(61) #define IRDMA_CQPSQ_STAG_PBA_S IRDMA_CQPHC_QPCTX_S #define IRDMA_CQPSQ_STAG_PBA IRDMA_CQPHC_QPCTX #define IRDMA_CQPSQ_STAG_HMCFNIDX_S 0 #define IRDMA_CQPSQ_STAG_HMCFNIDX GENMASK_ULL(5, 0) #define IRDMA_CQPSQ_STAG_FIRSTPMPBLIDX_S 0 #define IRDMA_CQPSQ_STAG_FIRSTPMPBLIDX GENMASK_ULL(27, 0) #define IRDMA_CQPSQ_QUERYSTAG_IDX_S IRDMA_CQPSQ_STAG_IDX_S #define IRDMA_CQPSQ_QUERYSTAG_IDX IRDMA_CQPSQ_STAG_IDX #define IRDMA_CQPSQ_MLM_TABLEIDX_S 0 #define IRDMA_CQPSQ_MLM_TABLEIDX GENMASK_ULL(5, 0) #define IRDMA_CQPSQ_MLM_FREEENTRY_S 62 #define IRDMA_CQPSQ_MLM_FREEENTRY BIT_ULL(62) #define IRDMA_CQPSQ_MLM_IGNORE_REF_CNT_S 61 #define IRDMA_CQPSQ_MLM_IGNORE_REF_CNT BIT_ULL(61) #define IRDMA_CQPSQ_MLM_MAC0_S 0 #define IRDMA_CQPSQ_MLM_MAC0 GENMASK_ULL(7, 0) #define IRDMA_CQPSQ_MLM_MAC1_S 8 #define IRDMA_CQPSQ_MLM_MAC1 GENMASK_ULL(15, 8) #define IRDMA_CQPSQ_MLM_MAC2_S 16 #define IRDMA_CQPSQ_MLM_MAC2 GENMASK_ULL(23, 16) #define IRDMA_CQPSQ_MLM_MAC3_S 24 #define IRDMA_CQPSQ_MLM_MAC3 GENMASK_ULL(31, 24) #define IRDMA_CQPSQ_MLM_MAC4_S 32 #define IRDMA_CQPSQ_MLM_MAC4 GENMASK_ULL(39, 32) #define IRDMA_CQPSQ_MLM_MAC5_S 40 #define IRDMA_CQPSQ_MLM_MAC5 GENMASK_ULL(47, 40) #define IRDMA_CQPSQ_MAT_REACHMAX_S 0 #define IRDMA_CQPSQ_MAT_REACHMAX GENMASK_ULL(31, 0) #define IRDMA_CQPSQ_MAT_MACADDR_S 0 #define IRDMA_CQPSQ_MAT_MACADDR GENMASK_ULL(47, 0) #define IRDMA_CQPSQ_MAT_ARPENTRYIDX_S 0 #define IRDMA_CQPSQ_MAT_ARPENTRYIDX GENMASK_ULL(11, 0) #define IRDMA_CQPSQ_MAT_ENTRYVALID_S 42 #define IRDMA_CQPSQ_MAT_ENTRYVALID BIT_ULL(42) #define IRDMA_CQPSQ_MAT_PERMANENT_S 43 #define IRDMA_CQPSQ_MAT_PERMANENT BIT_ULL(43) #define IRDMA_CQPSQ_MAT_QUERY_S 44 #define IRDMA_CQPSQ_MAT_QUERY BIT_ULL(44) #define IRDMA_CQPSQ_MVPBP_PD_ENTRY_CNT_S 0 #define IRDMA_CQPSQ_MVPBP_PD_ENTRY_CNT GENMASK_ULL(9, 0) #define IRDMA_CQPSQ_MVPBP_FIRST_PD_INX_S 16 #define IRDMA_CQPSQ_MVPBP_FIRST_PD_INX GENMASK_ULL(24, 16) #define IRDMA_CQPSQ_MVPBP_SD_INX_S 32 #define IRDMA_CQPSQ_MVPBP_SD_INX GENMASK_ULL(43, 32) #define IRDMA_CQPSQ_MVPBP_INV_PD_ENT_S 62 #define IRDMA_CQPSQ_MVPBP_INV_PD_ENT BIT_ULL(62) #define IRDMA_CQPSQ_MVPBP_PD_PLPBA_S 3 #define IRDMA_CQPSQ_MVPBP_PD_PLPBA GENMASK_ULL(63, 3) /* Manage Push Page - MPP */ #define IRDMA_INVALID_PUSH_PAGE_INDEX_GEN_1 0xffff #define IRDMA_INVALID_PUSH_PAGE_INDEX 0xffffffff #define IRDMA_CQPSQ_MPP_QS_HANDLE_S 0 #define IRDMA_CQPSQ_MPP_QS_HANDLE GENMASK_ULL(9, 0) #define IRDMA_CQPSQ_MPP_PPIDX_S 0 #define IRDMA_CQPSQ_MPP_PPIDX GENMASK_ULL(9, 0) #define IRDMA_CQPSQ_MPP_PPTYPE_S 60 #define IRDMA_CQPSQ_MPP_PPTYPE GENMASK_ULL(61, 60) #define IRDMA_CQPSQ_MPP_FREE_PAGE_S 62 #define IRDMA_CQPSQ_MPP_FREE_PAGE BIT_ULL(62) /* Upload Context - UCTX */ #define IRDMA_CQPSQ_UCTX_QPCTXADDR_S IRDMA_CQPHC_QPCTX_S #define IRDMA_CQPSQ_UCTX_QPCTXADDR IRDMA_CQPHC_QPCTX #define IRDMA_CQPSQ_UCTX_QPID_S 0 #define IRDMA_CQPSQ_UCTX_QPID GENMASK_ULL(23, 0) #define IRDMA_CQPSQ_UCTX_QPTYPE_S 48 #define IRDMA_CQPSQ_UCTX_QPTYPE GENMASK_ULL(51, 48) #define IRDMA_CQPSQ_UCTX_RAWFORMAT_S 61 #define IRDMA_CQPSQ_UCTX_RAWFORMAT BIT_ULL(61) #define IRDMA_CQPSQ_UCTX_FREEZEQP_S 62 #define IRDMA_CQPSQ_UCTX_FREEZEQP BIT_ULL(62) #define IRDMA_CQPSQ_MHMC_VFIDX_S 0 #define IRDMA_CQPSQ_MHMC_VFIDX GENMASK_ULL(15, 0) #define IRDMA_CQPSQ_MHMC_FREEPMFN_S 62 #define IRDMA_CQPSQ_MHMC_FREEPMFN BIT_ULL(62) #define IRDMA_CQPSQ_SHMCRP_HMC_PROFILE_S 0 #define IRDMA_CQPSQ_SHMCRP_HMC_PROFILE GENMASK_ULL(2, 0) #define IRDMA_CQPSQ_SHMCRP_VFNUM_S 32 #define IRDMA_CQPSQ_SHMCRP_VFNUM GENMASK_ULL(37, 32) #define IRDMA_CQPSQ_CEQ_CEQSIZE_S 0 #define IRDMA_CQPSQ_CEQ_CEQSIZE GENMASK_ULL(21, 0) #define IRDMA_CQPSQ_CEQ_CEQID_S 0 #define IRDMA_CQPSQ_CEQ_CEQID GENMASK_ULL(9, 0) #define IRDMA_CQPSQ_CEQ_LPBLSIZE_S IRDMA_CQPSQ_CQ_LPBLSIZE_S #define IRDMA_CQPSQ_CEQ_LPBLSIZE_M IRDMA_CQPSQ_CQ_LPBLSIZE_M #define IRDMA_CQPSQ_CEQ_LPBLSIZE IRDMA_CQPSQ_CQ_LPBLSIZE #define IRDMA_CQPSQ_CEQ_VMAP_S 47 #define IRDMA_CQPSQ_CEQ_VMAP BIT_ULL(47) #define IRDMA_CQPSQ_CEQ_ITRNOEXPIRE_S 46 #define IRDMA_CQPSQ_CEQ_ITRNOEXPIRE BIT_ULL(46) #define IRDMA_CQPSQ_CEQ_FIRSTPMPBLIDX_S 0 #define IRDMA_CQPSQ_CEQ_FIRSTPMPBLIDX GENMASK_ULL(27, 0) #define IRDMA_CQPSQ_AEQ_AEQECNT_S 0 #define IRDMA_CQPSQ_AEQ_AEQECNT GENMASK_ULL(18, 0) #define IRDMA_CQPSQ_AEQ_LPBLSIZE_S IRDMA_CQPSQ_CQ_LPBLSIZE_S #define IRDMA_CQPSQ_AEQ_LPBLSIZE_M IRDMA_CQPSQ_CQ_LPBLSIZE_M #define IRDMA_CQPSQ_AEQ_LPBLSIZE IRDMA_CQPSQ_CQ_LPBLSIZE #define IRDMA_CQPSQ_AEQ_VMAP_S 47 #define IRDMA_CQPSQ_AEQ_VMAP BIT_ULL(47) #define IRDMA_CQPSQ_AEQ_FIRSTPMPBLIDX_S 0 #define IRDMA_CQPSQ_AEQ_FIRSTPMPBLIDX GENMASK_ULL(27, 0) #define IRDMA_COMMIT_FPM_QPCNT_S 0 #define IRDMA_COMMIT_FPM_QPCNT GENMASK_ULL(18, 0) #define IRDMA_COMMIT_FPM_BASE_S 32 #define IRDMA_CQPSQ_CFPM_HMCFNID_S 0 #define IRDMA_CQPSQ_CFPM_HMCFNID GENMASK_ULL(5, 0) #define IRDMA_CQPSQ_FWQE_AECODE_S 0 #define IRDMA_CQPSQ_FWQE_AECODE GENMASK_ULL(15, 0) #define IRDMA_CQPSQ_FWQE_AESOURCE_S 16 #define IRDMA_CQPSQ_FWQE_AESOURCE GENMASK_ULL(19, 16) #define IRDMA_CQPSQ_FWQE_RQMNERR_S 0 #define IRDMA_CQPSQ_FWQE_RQMNERR GENMASK_ULL(15, 0) #define IRDMA_CQPSQ_FWQE_RQMJERR_S 16 #define IRDMA_CQPSQ_FWQE_RQMJERR GENMASK_ULL(31, 16) #define IRDMA_CQPSQ_FWQE_SQMNERR_S 32 #define IRDMA_CQPSQ_FWQE_SQMNERR GENMASK_ULL(47, 32) #define IRDMA_CQPSQ_FWQE_SQMJERR_S 48 #define IRDMA_CQPSQ_FWQE_SQMJERR GENMASK_ULL(63, 48) #define IRDMA_CQPSQ_FWQE_QPID_S 0 #define IRDMA_CQPSQ_FWQE_QPID GENMASK_ULL(23, 0) #define IRDMA_CQPSQ_FWQE_GENERATE_AE_S 59 #define IRDMA_CQPSQ_FWQE_GENERATE_AE BIT_ULL(59) #define IRDMA_CQPSQ_FWQE_USERFLCODE_S 60 #define IRDMA_CQPSQ_FWQE_USERFLCODE BIT_ULL(60) #define IRDMA_CQPSQ_FWQE_FLUSHSQ_S 61 #define IRDMA_CQPSQ_FWQE_FLUSHSQ BIT_ULL(61) #define IRDMA_CQPSQ_FWQE_FLUSHRQ_S 62 #define IRDMA_CQPSQ_FWQE_FLUSHRQ BIT_ULL(62) #define IRDMA_CQPSQ_MAPT_PORT_S 0 #define IRDMA_CQPSQ_MAPT_PORT GENMASK_ULL(15, 0) #define IRDMA_CQPSQ_MAPT_ADDPORT_S 62 #define IRDMA_CQPSQ_MAPT_ADDPORT BIT_ULL(62) #define IRDMA_CQPSQ_UPESD_SDCMD_S 0 #define IRDMA_CQPSQ_UPESD_SDCMD GENMASK_ULL(31, 0) #define IRDMA_CQPSQ_UPESD_SDDATALOW_S 0 #define IRDMA_CQPSQ_UPESD_SDDATALOW GENMASK_ULL(31, 0) #define IRDMA_CQPSQ_UPESD_SDDATAHI_S 32 #define IRDMA_CQPSQ_UPESD_SDDATAHI GENMASK_ULL(63, 32) #define IRDMA_CQPSQ_UPESD_ENTRY_VALID_S 63 #define IRDMA_CQPSQ_UPESD_ENTRY_VALID BIT_ULL(63) #define IRDMA_CQPSQ_UPESD_BM_PF 0 #define IRDMA_CQPSQ_UPESD_BM_CP_LM 1 #define IRDMA_CQPSQ_UPESD_BM_AXF 2 #define IRDMA_CQPSQ_UPESD_BM_LM 4 #define IRDMA_CQPSQ_UPESD_BM_S 32 #define IRDMA_CQPSQ_UPESD_BM GENMASK_ULL(34, 32) #define IRDMA_CQPSQ_UPESD_ENTRY_COUNT_S 0 #define IRDMA_CQPSQ_UPESD_ENTRY_COUNT GENMASK_ULL(3, 0) #define IRDMA_CQPSQ_UPESD_SKIP_ENTRY_S 7 #define IRDMA_CQPSQ_UPESD_SKIP_ENTRY BIT_ULL(7) /* Suspend QP */ #define IRDMA_CQPSQ_SUSPENDQP_QPID_S 0 #define IRDMA_CQPSQ_SUSPENDQP_QPID GENMASK_ULL(23, 0) #define IRDMA_CQPSQ_RESUMEQP_QSHANDLE_S 0 #define IRDMA_CQPSQ_RESUMEQP_QSHANDLE GENMASK_ULL(31, 0) #define IRDMA_CQPSQ_RESUMEQP_QPID_S IRDMA_CQPSQ_SUSPENDQP_QPID_S #define IRDMA_CQPSQ_RESUMEQP_QPID_M IRDMA_CQPSQ_SUSPENDQP_QPID_M #define IRDMA_CQPSQ_RESUMEQP_QPID IRDMA_CQPSQ_SUSPENDQP_QPID #define IRDMA_CQPSQ_MIN_STAG_INVALID 0x0001 #define IRDMA_CQPSQ_MIN_SUSPEND_PND 0x0005 #define IRDMA_CQPSQ_MAJ_NO_ERROR 0x0000 #define IRDMA_CQPSQ_MAJ_OBJCACHE_ERROR 0xF000 #define IRDMA_CQPSQ_MAJ_CNTXTCACHE_ERROR 0xF001 #define IRDMA_CQPSQ_MAJ_ERROR 0xFFFF #define IRDMAQPC_DDP_VER_S 0 #define IRDMAQPC_DDP_VER GENMASK_ULL(1, 0) #define IRDMAQPC_IBRDENABLE_S 2 #define IRDMAQPC_IBRDENABLE BIT_ULL(2) #define IRDMAQPC_IPV4_S 3 #define IRDMAQPC_IPV4 BIT_ULL(3) #define IRDMAQPC_NONAGLE_S 4 #define IRDMAQPC_NONAGLE BIT_ULL(4) #define IRDMAQPC_INSERTVLANTAG_S 5 #define IRDMAQPC_INSERTVLANTAG BIT_ULL(5) #define IRDMAQPC_ISQP1_S 6 #define IRDMAQPC_ISQP1 BIT_ULL(6) #define IRDMAQPC_TIMESTAMP_S 7 #define IRDMAQPC_TIMESTAMP BIT_ULL(7) #define IRDMAQPC_RQWQESIZE_S 8 #define IRDMAQPC_RQWQESIZE GENMASK_ULL(9, 8) #define IRDMAQPC_INSERTL2TAG2_S 11 #define IRDMAQPC_INSERTL2TAG2 BIT_ULL(11) #define IRDMAQPC_LIMIT_S 12 #define IRDMAQPC_LIMIT GENMASK_ULL(13, 12) #define IRDMAQPC_ECN_EN_S 14 #define IRDMAQPC_ECN_EN BIT_ULL(14) #define IRDMAQPC_DROPOOOSEG_S 15 #define IRDMAQPC_DROPOOOSEG BIT_ULL(15) #define IRDMAQPC_DUPACK_THRESH_S 16 #define IRDMAQPC_DUPACK_THRESH GENMASK_ULL(18, 16) #define IRDMAQPC_ERR_RQ_IDX_VALID_S 19 #define IRDMAQPC_ERR_RQ_IDX_VALID BIT_ULL(19) #define IRDMAQPC_DIS_VLAN_CHECKS_S 19 #define IRDMAQPC_DIS_VLAN_CHECKS GENMASK_ULL(21, 19) #define IRDMAQPC_DC_TCP_EN_S 25 #define IRDMAQPC_DC_TCP_EN BIT_ULL(25) #define IRDMAQPC_RCVTPHEN_S 28 #define IRDMAQPC_RCVTPHEN BIT_ULL(28) #define IRDMAQPC_XMITTPHEN_S 29 #define IRDMAQPC_XMITTPHEN BIT_ULL(29) #define IRDMAQPC_RQTPHEN_S 30 #define IRDMAQPC_RQTPHEN BIT_ULL(30) #define IRDMAQPC_SQTPHEN_S 31 #define IRDMAQPC_SQTPHEN BIT_ULL(31) #define IRDMAQPC_PPIDX_S 32 #define IRDMAQPC_PPIDX GENMASK_ULL(41, 32) #define IRDMAQPC_PMENA_S 47 #define IRDMAQPC_PMENA BIT_ULL(47) #define IRDMAQPC_RDMAP_VER_S 62 #define IRDMAQPC_RDMAP_VER GENMASK_ULL(63, 62) #define IRDMAQPC_ROCE_TVER_S 60 #define IRDMAQPC_ROCE_TVER GENMASK_ULL(63, 60) #define IRDMAQPC_SQADDR_S IRDMA_CQPHC_QPCTX_S #define IRDMAQPC_SQADDR IRDMA_CQPHC_QPCTX #define IRDMAQPC_RQADDR_S IRDMA_CQPHC_QPCTX_S #define IRDMAQPC_RQADDR IRDMA_CQPHC_QPCTX #define IRDMAQPC_TTL_S 0 #define IRDMAQPC_TTL GENMASK_ULL(7, 0) #define IRDMAQPC_RQSIZE_S 8 #define IRDMAQPC_RQSIZE GENMASK_ULL(11, 8) #define IRDMAQPC_SQSIZE_S 12 #define IRDMAQPC_SQSIZE GENMASK_ULL(15, 12) #define IRDMAQPC_GEN1_SRCMACADDRIDX_S 16 #define IRDMAQPC_GEN1_SRCMACADDRIDX GENMASK(21, 16) #define IRDMAQPC_AVOIDSTRETCHACK_S 23 #define IRDMAQPC_AVOIDSTRETCHACK BIT_ULL(23) #define IRDMAQPC_TOS_S 24 #define IRDMAQPC_TOS GENMASK_ULL(31, 24) #define IRDMAQPC_SRCPORTNUM_S 32 #define IRDMAQPC_SRCPORTNUM GENMASK_ULL(47, 32) #define IRDMAQPC_DESTPORTNUM_S 48 #define IRDMAQPC_DESTPORTNUM GENMASK_ULL(63, 48) #define IRDMAQPC_DESTIPADDR0_S 32 #define IRDMAQPC_DESTIPADDR0 GENMASK_ULL(63, 32) #define IRDMAQPC_DESTIPADDR1_S 0 #define IRDMAQPC_DESTIPADDR1 GENMASK_ULL(31, 0) #define IRDMAQPC_DESTIPADDR2_S 32 #define IRDMAQPC_DESTIPADDR2 GENMASK_ULL(63, 32) #define IRDMAQPC_DESTIPADDR3_S 0 #define IRDMAQPC_DESTIPADDR3 GENMASK_ULL(31, 0) #define IRDMAQPC_SNDMSS_S 16 #define IRDMAQPC_SNDMSS GENMASK_ULL(29, 16) #define IRDMAQPC_SYN_RST_HANDLING_S 30 #define IRDMAQPC_SYN_RST_HANDLING GENMASK_ULL(31, 30) #define IRDMAQPC_VLANTAG_S 32 #define IRDMAQPC_VLANTAG GENMASK_ULL(47, 32) #define IRDMAQPC_ARPIDX_S 48 #define IRDMAQPC_ARPIDX GENMASK_ULL(63, 48) #define IRDMAQPC_FLOWLABEL_S 0 #define IRDMAQPC_FLOWLABEL GENMASK_ULL(19, 0) #define IRDMAQPC_WSCALE_S 20 #define IRDMAQPC_WSCALE BIT_ULL(20) #define IRDMAQPC_KEEPALIVE_S 21 #define IRDMAQPC_KEEPALIVE BIT_ULL(21) #define IRDMAQPC_IGNORE_TCP_OPT_S 22 #define IRDMAQPC_IGNORE_TCP_OPT BIT_ULL(22) #define IRDMAQPC_IGNORE_TCP_UNS_OPT_S 23 #define IRDMAQPC_IGNORE_TCP_UNS_OPT BIT_ULL(23) #define IRDMAQPC_TCPSTATE_S 28 #define IRDMAQPC_TCPSTATE GENMASK_ULL(31, 28) #define IRDMAQPC_RCVSCALE_S 32 #define IRDMAQPC_RCVSCALE GENMASK_ULL(35, 32) #define IRDMAQPC_SNDSCALE_S 40 #define IRDMAQPC_SNDSCALE GENMASK_ULL(43, 40) #define IRDMAQPC_PDIDX_S 48 #define IRDMAQPC_PDIDX GENMASK_ULL(63, 48) #define IRDMAQPC_PDIDXHI_S 20 #define IRDMAQPC_PDIDXHI GENMASK_ULL(21, 20) #define IRDMAQPC_PKEY_S 32 #define IRDMAQPC_PKEY GENMASK_ULL(47, 32) #define IRDMAQPC_ACKCREDITS_S 20 #define IRDMAQPC_ACKCREDITS GENMASK_ULL(24, 20) #define IRDMAQPC_QKEY_S 32 #define IRDMAQPC_QKEY GENMASK_ULL(63, 32) #define IRDMAQPC_DESTQP_S 0 #define IRDMAQPC_DESTQP GENMASK_ULL(23, 0) #define IRDMAQPC_KALIVE_TIMER_MAX_PROBES_S 16 #define IRDMAQPC_KALIVE_TIMER_MAX_PROBES GENMASK_ULL(23, 16) #define IRDMAQPC_KEEPALIVE_INTERVAL_S 24 #define IRDMAQPC_KEEPALIVE_INTERVAL GENMASK_ULL(31, 24) #define IRDMAQPC_TIMESTAMP_RECENT_S 0 #define IRDMAQPC_TIMESTAMP_RECENT GENMASK_ULL(31, 0) #define IRDMAQPC_TIMESTAMP_AGE_S 32 #define IRDMAQPC_TIMESTAMP_AGE GENMASK_ULL(63, 32) #define IRDMAQPC_SNDNXT_S 0 #define IRDMAQPC_SNDNXT GENMASK_ULL(31, 0) #define IRDMAQPC_ISN_S 32 #define IRDMAQPC_ISN GENMASK_ULL(55, 32) #define IRDMAQPC_PSNNXT_S 0 #define IRDMAQPC_PSNNXT GENMASK_ULL(23, 0) #define IRDMAQPC_LSN_S 32 #define IRDMAQPC_LSN GENMASK_ULL(55, 32) #define IRDMAQPC_SNDWND_S 32 #define IRDMAQPC_SNDWND GENMASK_ULL(63, 32) #define IRDMAQPC_RCVNXT_S 0 #define IRDMAQPC_RCVNXT GENMASK_ULL(31, 0) #define IRDMAQPC_EPSN_S 0 #define IRDMAQPC_EPSN GENMASK_ULL(23, 0) #define IRDMAQPC_RCVWND_S 32 #define IRDMAQPC_RCVWND GENMASK_ULL(63, 32) #define IRDMAQPC_SNDMAX_S 0 #define IRDMAQPC_SNDMAX GENMASK_ULL(31, 0) #define IRDMAQPC_SNDUNA_S 32 #define IRDMAQPC_SNDUNA GENMASK_ULL(63, 32) #define IRDMAQPC_PSNMAX_S 0 #define IRDMAQPC_PSNMAX GENMASK_ULL(23, 0) #define IRDMAQPC_PSNUNA_S 32 #define IRDMAQPC_PSNUNA GENMASK_ULL(55, 32) #define IRDMAQPC_SRTT_S 0 #define IRDMAQPC_SRTT GENMASK_ULL(31, 0) #define IRDMAQPC_RTTVAR_S 32 #define IRDMAQPC_RTTVAR GENMASK_ULL(63, 32) #define IRDMAQPC_SSTHRESH_S 0 #define IRDMAQPC_SSTHRESH GENMASK_ULL(31, 0) #define IRDMAQPC_CWND_S 32 #define IRDMAQPC_CWND GENMASK_ULL(63, 32) #define IRDMAQPC_CWNDROCE_S 32 #define IRDMAQPC_CWNDROCE GENMASK_ULL(55, 32) #define IRDMAQPC_SNDWL1_S 0 #define IRDMAQPC_SNDWL1 GENMASK_ULL(31, 0) #define IRDMAQPC_SNDWL2_S 32 #define IRDMAQPC_SNDWL2 GENMASK_ULL(63, 32) #define IRDMAQPC_ERR_RQ_IDX_S 32 #define IRDMAQPC_ERR_RQ_IDX GENMASK_ULL(46, 32) #define IRDMAQPC_RTOMIN_S 57 #define IRDMAQPC_RTOMIN GENMASK_ULL(63, 57) #define IRDMAQPC_MAXSNDWND_S 0 #define IRDMAQPC_MAXSNDWND GENMASK_ULL(31, 0) #define IRDMAQPC_REXMIT_THRESH_S 48 #define IRDMAQPC_REXMIT_THRESH GENMASK_ULL(53, 48) #define IRDMAQPC_RNRNAK_THRESH_S 54 #define IRDMAQPC_RNRNAK_THRESH GENMASK_ULL(56, 54) #define IRDMAQPC_TXCQNUM_S 0 #define IRDMAQPC_TXCQNUM GENMASK_ULL(18, 0) #define IRDMAQPC_RXCQNUM_S 32 #define IRDMAQPC_RXCQNUM GENMASK_ULL(50, 32) #define IRDMAQPC_STAT_INDEX_S 0 #define IRDMAQPC_STAT_INDEX GENMASK_ULL(6, 0) #define IRDMAQPC_Q2ADDR_S 8 #define IRDMAQPC_Q2ADDR GENMASK_ULL(63, 8) #define IRDMAQPC_LASTBYTESENT_S 0 #define IRDMAQPC_LASTBYTESENT GENMASK_ULL(7, 0) #define IRDMAQPC_MACADDRESS_S 16 #define IRDMAQPC_MACADDRESS GENMASK_ULL(63, 16) #define IRDMAQPC_ORDSIZE_S 0 #define IRDMAQPC_ORDSIZE GENMASK_ULL(7, 0) #define IRDMAQPC_IRDSIZE_S 16 #define IRDMAQPC_IRDSIZE GENMASK_ULL(18, 16) #define IRDMAQPC_UDPRIVCQENABLE_S 19 #define IRDMAQPC_UDPRIVCQENABLE BIT_ULL(19) #define IRDMAQPC_WRRDRSPOK_S 20 #define IRDMAQPC_WRRDRSPOK BIT_ULL(20) #define IRDMAQPC_RDOK_S 21 #define IRDMAQPC_RDOK BIT_ULL(21) #define IRDMAQPC_SNDMARKERS_S 22 #define IRDMAQPC_SNDMARKERS BIT_ULL(22) #define IRDMAQPC_DCQCNENABLE_S 22 #define IRDMAQPC_DCQCNENABLE BIT_ULL(22) #define IRDMAQPC_FW_CC_ENABLE_S 28 #define IRDMAQPC_FW_CC_ENABLE BIT_ULL(28) #define IRDMAQPC_RCVNOICRC_S 31 #define IRDMAQPC_RCVNOICRC BIT_ULL(31) #define IRDMAQPC_BINDEN_S 23 #define IRDMAQPC_BINDEN BIT_ULL(23) #define IRDMAQPC_FASTREGEN_S 24 #define IRDMAQPC_FASTREGEN BIT_ULL(24) #define IRDMAQPC_PRIVEN_S 25 #define IRDMAQPC_PRIVEN BIT_ULL(25) #define IRDMAQPC_TIMELYENABLE_S 27 #define IRDMAQPC_TIMELYENABLE BIT_ULL(27) #define IRDMAQPC_THIGH_S 52 #define IRDMAQPC_THIGH GENMASK_ULL(63, 52) #define IRDMAQPC_TLOW_S 32 #define IRDMAQPC_TLOW GENMASK_ULL(39, 32) #define IRDMAQPC_REMENDPOINTIDX_S 0 #define IRDMAQPC_REMENDPOINTIDX GENMASK_ULL(16, 0) #define IRDMAQPC_USESTATSINSTANCE_S 26 #define IRDMAQPC_USESTATSINSTANCE BIT_ULL(26) #define IRDMAQPC_IWARPMODE_S 28 #define IRDMAQPC_IWARPMODE BIT_ULL(28) #define IRDMAQPC_RCVMARKERS_S 29 #define IRDMAQPC_RCVMARKERS BIT_ULL(29) #define IRDMAQPC_ALIGNHDRS_S 30 #define IRDMAQPC_ALIGNHDRS BIT_ULL(30) #define IRDMAQPC_RCVNOMPACRC_S 31 #define IRDMAQPC_RCVNOMPACRC BIT_ULL(31) #define IRDMAQPC_RCVMARKOFFSET_S 32 #define IRDMAQPC_RCVMARKOFFSET GENMASK_ULL(40, 32) #define IRDMAQPC_SNDMARKOFFSET_S 48 #define IRDMAQPC_SNDMARKOFFSET GENMASK_ULL(56, 48) #define IRDMAQPC_QPCOMPCTX_S IRDMA_CQPHC_QPCTX_S #define IRDMAQPC_QPCOMPCTX IRDMA_CQPHC_QPCTX #define IRDMAQPC_SQTPHVAL_S 0 #define IRDMAQPC_SQTPHVAL GENMASK_ULL(7, 0) #define IRDMAQPC_RQTPHVAL_S 8 #define IRDMAQPC_RQTPHVAL GENMASK_ULL(15, 8) #define IRDMAQPC_QSHANDLE_S 16 #define IRDMAQPC_QSHANDLE GENMASK_ULL(25, 16) #define IRDMAQPC_EXCEPTION_LAN_QUEUE_S 32 #define IRDMAQPC_EXCEPTION_LAN_QUEUE GENMASK_ULL(43, 32) #define IRDMAQPC_LOCAL_IPADDR3_S 0 #define IRDMAQPC_LOCAL_IPADDR3 GENMASK_ULL(31, 0) #define IRDMAQPC_LOCAL_IPADDR2_S 32 #define IRDMAQPC_LOCAL_IPADDR2 GENMASK_ULL(63, 32) #define IRDMAQPC_LOCAL_IPADDR1_S 0 #define IRDMAQPC_LOCAL_IPADDR1 GENMASK_ULL(31, 0) #define IRDMAQPC_LOCAL_IPADDR0_S 32 #define IRDMAQPC_LOCAL_IPADDR0 GENMASK_ULL(63, 32) #define IRDMA_FW_VER_MINOR_S 0 #define IRDMA_FW_VER_MINOR GENMASK_ULL(15, 0) #define IRDMA_FW_VER_MAJOR_S 16 #define IRDMA_FW_VER_MAJOR GENMASK_ULL(31, 16) #define IRDMA_FEATURE_INFO_S 0 #define IRDMA_FEATURE_INFO GENMASK_ULL(47, 0) #define IRDMA_FEATURE_CNT_S 32 #define IRDMA_FEATURE_CNT GENMASK_ULL(47, 32) #define IRDMA_FEATURE_TYPE_S 48 #define IRDMA_FEATURE_TYPE GENMASK_ULL(63, 48) #define IRDMA_RSVD_S 41 #define IRDMA_RSVD GENMASK_ULL(55, 41) #define IRDMAQPSQ_OPCODE_S 32 #define IRDMAQPSQ_OPCODE GENMASK_ULL(37, 32) #define IRDMAQPSQ_COPY_HOST_PBL_S 43 #define IRDMAQPSQ_COPY_HOST_PBL BIT_ULL(43) #define IRDMAQPSQ_ADDFRAGCNT_S 38 #define IRDMAQPSQ_ADDFRAGCNT GENMASK_ULL(41, 38) #define IRDMAQPSQ_PUSHWQE_S 56 #define IRDMAQPSQ_PUSHWQE BIT_ULL(56) #define IRDMAQPSQ_STREAMMODE_S 58 #define IRDMAQPSQ_STREAMMODE BIT_ULL(58) #define IRDMAQPSQ_WAITFORRCVPDU_S 59 #define IRDMAQPSQ_WAITFORRCVPDU BIT_ULL(59) #define IRDMAQPSQ_READFENCE_S 60 #define IRDMAQPSQ_READFENCE BIT_ULL(60) #define IRDMAQPSQ_LOCALFENCE_S 61 #define IRDMAQPSQ_LOCALFENCE BIT_ULL(61) #define IRDMAQPSQ_UDPHEADER_S 61 #define IRDMAQPSQ_UDPHEADER BIT_ULL(61) #define IRDMAQPSQ_L4LEN_S 42 #define IRDMAQPSQ_L4LEN GENMASK_ULL(45, 42) #define IRDMAQPSQ_SIGCOMPL_S 62 #define IRDMAQPSQ_SIGCOMPL BIT_ULL(62) #define IRDMAQPSQ_VALID_S 63 #define IRDMAQPSQ_VALID BIT_ULL(63) #define IRDMAQPSQ_FRAG_TO_S IRDMA_CQPHC_QPCTX_S #define IRDMAQPSQ_FRAG_TO IRDMA_CQPHC_QPCTX #define IRDMAQPSQ_FRAG_VALID_S 63 #define IRDMAQPSQ_FRAG_VALID BIT_ULL(63) #define IRDMAQPSQ_FRAG_LEN_S 32 #define IRDMAQPSQ_FRAG_LEN GENMASK_ULL(62, 32) #define IRDMAQPSQ_FRAG_STAG_S 0 #define IRDMAQPSQ_FRAG_STAG GENMASK_ULL(31, 0) #define IRDMAQPSQ_GEN1_FRAG_LEN_S 0 #define IRDMAQPSQ_GEN1_FRAG_LEN GENMASK_ULL(31, 0) #define IRDMAQPSQ_GEN1_FRAG_STAG_S 32 #define IRDMAQPSQ_GEN1_FRAG_STAG GENMASK_ULL(63, 32) #define IRDMAQPSQ_REMSTAGINV_S 0 #define IRDMAQPSQ_REMSTAGINV GENMASK_ULL(31, 0) #define IRDMAQPSQ_DESTQKEY_S 0 #define IRDMAQPSQ_DESTQKEY GENMASK_ULL(31, 0) #define IRDMAQPSQ_DESTQPN_S 32 #define IRDMAQPSQ_DESTQPN GENMASK_ULL(55, 32) #define IRDMAQPSQ_AHID_S 0 #define IRDMAQPSQ_AHID GENMASK_ULL(16, 0) #define IRDMAQPSQ_INLINEDATAFLAG_S 57 #define IRDMAQPSQ_INLINEDATAFLAG BIT_ULL(57) #define IRDMA_INLINE_VALID_S 7 #define IRDMAQPSQ_INLINEDATALEN_S 48 #define IRDMAQPSQ_INLINEDATALEN GENMASK_ULL(55, 48) #define IRDMAQPSQ_IMMDATAFLAG_S 47 #define IRDMAQPSQ_IMMDATAFLAG BIT_ULL(47) #define IRDMAQPSQ_REPORTRTT_S 46 #define IRDMAQPSQ_REPORTRTT BIT_ULL(46) #define IRDMAQPSQ_IMMDATA_S 0 #define IRDMAQPSQ_IMMDATA GENMASK_ULL(63, 0) #define IRDMAQPSQ_REMSTAG_S 0 #define IRDMAQPSQ_REMSTAG GENMASK_ULL(31, 0) #define IRDMAQPSQ_REMTO_S IRDMA_CQPHC_QPCTX_S #define IRDMAQPSQ_REMTO IRDMA_CQPHC_QPCTX #define IRDMAQPSQ_STAGRIGHTS_S 48 #define IRDMAQPSQ_STAGRIGHTS GENMASK_ULL(52, 48) #define IRDMAQPSQ_VABASEDTO_S 53 #define IRDMAQPSQ_VABASEDTO BIT_ULL(53) #define IRDMAQPSQ_MEMWINDOWTYPE_S 54 #define IRDMAQPSQ_MEMWINDOWTYPE BIT_ULL(54) #define IRDMAQPSQ_MWLEN_S IRDMA_CQPHC_QPCTX_S #define IRDMAQPSQ_MWLEN IRDMA_CQPHC_QPCTX #define IRDMAQPSQ_PARENTMRSTAG_S 32 #define IRDMAQPSQ_PARENTMRSTAG GENMASK_ULL(63, 32) #define IRDMAQPSQ_MWSTAG_S 0 #define IRDMAQPSQ_MWSTAG GENMASK_ULL(31, 0) #define IRDMAQPSQ_BASEVA_TO_FBO_S IRDMA_CQPHC_QPCTX_S #define IRDMAQPSQ_BASEVA_TO_FBO IRDMA_CQPHC_QPCTX #define IRDMAQPSQ_LOCSTAG_S 0 #define IRDMAQPSQ_LOCSTAG GENMASK_ULL(31, 0) #define IRDMAQPSQ_STAGKEY_S 0 #define IRDMAQPSQ_STAGKEY GENMASK_ULL(7, 0) #define IRDMAQPSQ_STAGINDEX_S 8 #define IRDMAQPSQ_STAGINDEX GENMASK_ULL(31, 8) #define IRDMAQPSQ_COPYHOSTPBLS_S 43 #define IRDMAQPSQ_COPYHOSTPBLS BIT_ULL(43) #define IRDMAQPSQ_LPBLSIZE_S 44 #define IRDMAQPSQ_LPBLSIZE GENMASK_ULL(45, 44) #define IRDMAQPSQ_HPAGESIZE_S 46 #define IRDMAQPSQ_HPAGESIZE GENMASK_ULL(47, 46) #define IRDMAQPSQ_STAGLEN_S 0 #define IRDMAQPSQ_STAGLEN GENMASK_ULL(40, 0) #define IRDMAQPSQ_FIRSTPMPBLIDXLO_S 48 #define IRDMAQPSQ_FIRSTPMPBLIDXLO GENMASK_ULL(63, 48) #define IRDMAQPSQ_FIRSTPMPBLIDXHI_S 0 #define IRDMAQPSQ_FIRSTPMPBLIDXHI GENMASK_ULL(11, 0) #define IRDMAQPSQ_PBLADDR_S 12 #define IRDMAQPSQ_PBLADDR GENMASK_ULL(63, 12) /* iwarp QP RQ WQE common fields */ #define IRDMAQPRQ_ADDFRAGCNT_S IRDMAQPSQ_ADDFRAGCNT_S #define IRDMAQPRQ_ADDFRAGCNT IRDMAQPSQ_ADDFRAGCNT #define IRDMAQPRQ_VALID_S IRDMAQPSQ_VALID_S #define IRDMAQPRQ_VALID IRDMAQPSQ_VALID #define IRDMAQPRQ_COMPLCTX_S IRDMA_CQPHC_QPCTX_S #define IRDMAQPRQ_COMPLCTX IRDMA_CQPHC_QPCTX #define IRDMAQPRQ_FRAG_LEN_S IRDMAQPSQ_FRAG_LEN_S #define IRDMAQPRQ_FRAG_LEN IRDMAQPSQ_FRAG_LEN #define IRDMAQPRQ_STAG_S IRDMAQPSQ_FRAG_STAG_S #define IRDMAQPRQ_STAG IRDMAQPSQ_FRAG_STAG #define IRDMAQPRQ_TO_S IRDMAQPSQ_FRAG_TO_S #define IRDMAQPRQ_TO IRDMAQPSQ_FRAG_TO #define IRDMAPFINT_OICR_HMC_ERR_M BIT(26) #define IRDMAPFINT_OICR_PE_PUSH_M BIT(27) #define IRDMAPFINT_OICR_PE_CRITERR_M BIT(28) #define IRDMA_QUERY_FPM_MAX_QPS_S 0 #define IRDMA_QUERY_FPM_MAX_QPS GENMASK_ULL(18, 0) #define IRDMA_QUERY_FPM_MAX_CQS_S 0 #define IRDMA_QUERY_FPM_MAX_CQS GENMASK_ULL(19, 0) #define IRDMA_QUERY_FPM_FIRST_PE_SD_INDEX_S 0 #define IRDMA_QUERY_FPM_FIRST_PE_SD_INDEX GENMASK_ULL(13, 0) #define IRDMA_QUERY_FPM_MAX_PE_SDS_S 32 #define IRDMA_QUERY_FPM_MAX_PE_SDS GENMASK_ULL(45, 32) #define IRDMA_QUERY_FPM_MAX_CEQS_S 0 #define IRDMA_QUERY_FPM_MAX_CEQS GENMASK_ULL(9, 0) #define IRDMA_QUERY_FPM_XFBLOCKSIZE_S 32 #define IRDMA_QUERY_FPM_XFBLOCKSIZE GENMASK_ULL(63, 32) #define IRDMA_QUERY_FPM_Q1BLOCKSIZE_S 32 #define IRDMA_QUERY_FPM_Q1BLOCKSIZE GENMASK_ULL(63, 32) #define IRDMA_QUERY_FPM_HTMULTIPLIER_S 16 #define IRDMA_QUERY_FPM_HTMULTIPLIER GENMASK_ULL(19, 16) #define IRDMA_QUERY_FPM_TIMERBUCKET_S 32 #define IRDMA_QUERY_FPM_TIMERBUCKET GENMASK_ULL(47, 32) #define IRDMA_QUERY_FPM_RRFBLOCKSIZE_S 32 #define IRDMA_QUERY_FPM_RRFBLOCKSIZE GENMASK_ULL(63, 32) #define IRDMA_QUERY_FPM_RRFFLBLOCKSIZE_S 32 #define IRDMA_QUERY_FPM_RRFFLBLOCKSIZE GENMASK_ULL(63, 32) #define IRDMA_QUERY_FPM_OOISCFBLOCKSIZE_S 32 #define IRDMA_QUERY_FPM_OOISCFBLOCKSIZE GENMASK_ULL(63, 32) #define IRDMA_SHMC_PAGE_ALLOCATED_HMC_FN_ID_S 0 #define IRDMA_SHMC_PAGE_ALLOCATED_HMC_FN_ID GENMASK_ULL(15, 0) #define IRDMA_GET_CURRENT_AEQ_ELEM(_aeq) \ ( \ (_aeq)->aeqe_base[IRDMA_RING_CURRENT_TAIL((_aeq)->aeq_ring)].buf \ ) #define IRDMA_GET_CURRENT_CEQ_ELEM(_ceq) \ ( \ (_ceq)->ceqe_base[IRDMA_RING_CURRENT_TAIL((_ceq)->ceq_ring)].buf \ ) #define IRDMA_GET_CEQ_ELEM_AT_POS(_ceq, _pos) \ ( \ (_ceq)->ceqe_base[_pos].buf \ ) #define IRDMA_RING_GET_NEXT_TAIL(_ring, _idx) \ ( \ ((_ring).tail + (_idx)) % (_ring).size \ ) #define IRDMA_GET_RING_OFFSET(_ring, _i) \ ( \ ((_ring).head + (_i)) % (_ring).size \ ) #define IRDMA_GET_CQ_ELEM_AT_OFFSET(_cq, _i, _cqe) \ { \ - register __u32 offset; \ + __u32 offset; \ offset = IRDMA_GET_RING_OFFSET((_cq)->cq_ring, _i); \ (_cqe) = (_cq)->cq_base[offset].buf; \ } #define IRDMA_GET_CURRENT_CQ_ELEM(_cq) \ ( \ (_cq)->cq_base[IRDMA_RING_CURRENT_HEAD((_cq)->cq_ring)].buf \ ) #define IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(_cq) \ ( \ ((struct irdma_extended_cqe *) \ ((_cq)->cq_base))[IRDMA_RING_CURRENT_HEAD((_cq)->cq_ring)].buf \ ) #define IRDMA_RING_INIT(_ring, _size) \ { \ (_ring).head = 0; \ (_ring).tail = 0; \ (_ring).size = (_size); \ } #define IRDMA_RING_SIZE(_ring) ((_ring).size) #define IRDMA_RING_CURRENT_HEAD(_ring) ((_ring).head) #define IRDMA_RING_CURRENT_TAIL(_ring) ((_ring).tail) #define IRDMA_RING_MOVE_HEAD(_ring, _retcode) \ { \ - register u32 size; \ + u32 size; \ size = (_ring).size; \ if (!IRDMA_RING_FULL_ERR(_ring)) { \ (_ring).head = ((_ring).head + 1) % size; \ (_retcode) = 0; \ } else { \ (_retcode) = -ENOSPC; \ } \ } #define IRDMA_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ { \ - register u32 size; \ + u32 size; \ size = (_ring).size; \ if ((IRDMA_RING_USED_QUANTA(_ring) + (_count)) < size) { \ (_ring).head = ((_ring).head + (_count)) % size; \ (_retcode) = 0; \ } else { \ (_retcode) = -ENOSPC; \ } \ } #define IRDMA_SQ_RING_MOVE_HEAD(_ring, _retcode) \ { \ - register u32 size; \ + u32 size; \ size = (_ring).size; \ if (!IRDMA_SQ_RING_FULL_ERR(_ring)) { \ (_ring).head = ((_ring).head + 1) % size; \ (_retcode) = 0; \ } else { \ (_retcode) = -ENOSPC; \ } \ } #define IRDMA_SQ_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ { \ - register u32 size; \ + u32 size; \ size = (_ring).size; \ if ((IRDMA_RING_USED_QUANTA(_ring) + (_count)) < (size - 256)) { \ (_ring).head = ((_ring).head + (_count)) % size; \ (_retcode) = 0; \ } else { \ (_retcode) = -ENOSPC; \ } \ } #define IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \ (_ring).head = ((_ring).head + (_count)) % (_ring).size #define IRDMA_RING_MOVE_TAIL(_ring) \ (_ring).tail = ((_ring).tail + 1) % (_ring).size #define IRDMA_RING_MOVE_HEAD_NOCHECK(_ring) \ (_ring).head = ((_ring).head + 1) % (_ring).size #define IRDMA_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \ (_ring).tail = ((_ring).tail + (_count)) % (_ring).size #define IRDMA_RING_SET_TAIL(_ring, _pos) \ (_ring).tail = (_pos) % (_ring).size #define IRDMA_RING_FULL_ERR(_ring) \ ( \ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 1)) \ ) #define IRDMA_ERR_RING_FULL2(_ring) \ ( \ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 2)) \ ) #define IRDMA_ERR_RING_FULL3(_ring) \ ( \ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 3)) \ ) #define IRDMA_SQ_RING_FULL_ERR(_ring) \ ( \ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 257)) \ ) #define IRDMA_ERR_SQ_RING_FULL2(_ring) \ ( \ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 258)) \ ) #define IRDMA_ERR_SQ_RING_FULL3(_ring) \ ( \ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 259)) \ ) #define IRDMA_RING_MORE_WORK(_ring) \ ( \ (IRDMA_RING_USED_QUANTA(_ring) != 0) \ ) #define IRDMA_RING_USED_QUANTA(_ring) \ ( \ (((_ring).head + (_ring).size - (_ring).tail) % (_ring).size) \ ) #define IRDMA_RING_FREE_QUANTA(_ring) \ ( \ ((_ring).size - IRDMA_RING_USED_QUANTA(_ring) - 1) \ ) #define IRDMA_SQ_RING_FREE_QUANTA(_ring) \ ( \ ((_ring).size - IRDMA_RING_USED_QUANTA(_ring) - 257) \ ) #define IRDMA_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \ { \ index = IRDMA_RING_CURRENT_HEAD(_ring); \ IRDMA_RING_MOVE_HEAD(_ring, _retcode); \ } enum irdma_protocol_used { IRDMA_ANY_PROTOCOL = 0, IRDMA_IWARP_PROTOCOL_ONLY = 1, IRDMA_ROCE_PROTOCOL_ONLY = 2, }; enum irdma_qp_wqe_size { IRDMA_WQE_SIZE_32 = 32, IRDMA_WQE_SIZE_64 = 64, IRDMA_WQE_SIZE_96 = 96, IRDMA_WQE_SIZE_128 = 128, IRDMA_WQE_SIZE_256 = 256, }; +enum irdma_ws_op_type { + IRDMA_WS_OP_TYPE_NODE = 0, + IRDMA_WS_OP_TYPE_LEAF_NODE_GROUP, +}; + +enum irdma_ws_rate_limit_flags { + IRDMA_WS_RATE_LIMIT_FLAGS_VALID = 0x1, + IRDMA_WS_NO_RDMA_RATE_LIMIT = 0x2, + IRDMA_WS_LEAF_NODE_IS_PART_GROUP = 0x4, + IRDMA_WS_TREE_RATE_LIMITING = 0x8, + IRDMA_WS_PACING_CONTROL = 0x10, +}; + enum irdma_ws_node_op { IRDMA_ADD_NODE = 0, IRDMA_MODIFY_NODE, IRDMA_DEL_NODE, }; enum { IRDMA_Q_ALIGNMENT_M = (128 - 1), IRDMA_AEQ_ALIGNMENT_M = (256 - 1), IRDMA_Q2_ALIGNMENT_M = (256 - 1), IRDMA_CEQ_ALIGNMENT_M = (256 - 1), IRDMA_CQ0_ALIGNMENT_M = (256 - 1), IRDMA_HOST_CTX_ALIGNMENT_M = (4 - 1), IRDMA_SHADOWAREA_M = (128 - 1), IRDMA_FPM_QUERY_BUF_ALIGNMENT_M = (4 - 1), IRDMA_FPM_COMMIT_BUF_ALIGNMENT_M = (4 - 1), }; enum irdma_alignment { IRDMA_CQP_ALIGNMENT = 0x200, IRDMA_AEQ_ALIGNMENT = 0x100, IRDMA_CEQ_ALIGNMENT = 0x100, IRDMA_CQ0_ALIGNMENT = 0x100, IRDMA_SD_BUF_ALIGNMENT = 0x80, IRDMA_FEATURE_BUF_ALIGNMENT = 0x10, }; -enum icrdma_protocol_used { - ICRDMA_ANY_PROTOCOL = 0, - ICRDMA_IWARP_PROTOCOL_ONLY = 1, - ICRDMA_ROCE_PROTOCOL_ONLY = 2, -}; - /** * set_64bit_val - set 64 bit value to hw wqe * @wqe_words: wqe addr to write * @byte_index: index in wqe * @val: value to write **/ static inline void set_64bit_val(__le64 *wqe_words, u32 byte_index, u64 val) { wqe_words[byte_index >> 3] = cpu_to_le64(val); } /** * set_32bit_val - set 32 bit value to hw wqe * @wqe_words: wqe addr to write * @byte_index: index in wqe * @val: value to write **/ static inline void set_32bit_val(__le32 *wqe_words, u32 byte_index, u32 val) { wqe_words[byte_index >> 2] = cpu_to_le32(val); } /** * get_64bit_val - read 64 bit value from wqe * @wqe_words: wqe addr * @byte_index: index to read from * @val: read value **/ static inline void get_64bit_val(__le64 *wqe_words, u32 byte_index, u64 *val) { *val = le64_to_cpu(wqe_words[byte_index >> 3]); } /** * get_32bit_val - read 32 bit value from wqe * @wqe_words: wqe addr * @byte_index: index to reaad from * @val: return 32 bit value **/ static inline void get_32bit_val(__le32 *wqe_words, u32 byte_index, u32 *val) { *val = le32_to_cpu(wqe_words[byte_index >> 2]); } #endif /* IRDMA_DEFS_H */ diff --git a/sys/dev/irdma/irdma_hmc.c b/sys/dev/irdma/irdma_hmc.c index 1da0c9da9746..a3c47c8b1434 100644 --- a/sys/dev/irdma/irdma_hmc.c +++ b/sys/dev/irdma/irdma_hmc.c @@ -1,740 +1,740 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "osdep.h" #include "irdma_hmc.h" #include "irdma_defs.h" #include "irdma_type.h" #include "irdma_protos.h" /** * irdma_find_sd_index_limit - finds segment descriptor index limit * @hmc_info: pointer to the HMC configuration information structure * @type: type of HMC resources we're searching * @idx: starting index for the object * @cnt: number of objects we're trying to create * @sd_idx: pointer to return index of the segment descriptor in question * @sd_limit: pointer to return the maximum number of segment descriptors * * This function calculates the segment descriptor index and index limit * for the resource defined by irdma_hmc_rsrc_type. */ static void irdma_find_sd_index_limit(struct irdma_hmc_info *hmc_info, u32 type, u32 idx, u32 cnt, u32 *sd_idx, u32 *sd_limit) { u64 fpm_addr, fpm_limit; fpm_addr = hmc_info->hmc_obj[(type)].base + hmc_info->hmc_obj[type].size * idx; fpm_limit = fpm_addr + hmc_info->hmc_obj[type].size * cnt; *sd_idx = (u32)(fpm_addr / IRDMA_HMC_DIRECT_BP_SIZE); *sd_limit = (u32)((fpm_limit - 1) / IRDMA_HMC_DIRECT_BP_SIZE); *sd_limit += 1; } /** * irdma_find_pd_index_limit - finds page descriptor index limit * @hmc_info: pointer to the HMC configuration information struct * @type: HMC resource type we're examining * @idx: starting index for the object * @cnt: number of objects we're trying to create * @pd_idx: pointer to return page descriptor index * @pd_limit: pointer to return page descriptor index limit * * Calculates the page descriptor index and index limit for the resource * defined by irdma_hmc_rsrc_type. */ static void irdma_find_pd_index_limit(struct irdma_hmc_info *hmc_info, u32 type, u32 idx, u32 cnt, u32 *pd_idx, u32 *pd_limit) { u64 fpm_adr, fpm_limit; fpm_adr = hmc_info->hmc_obj[type].base + hmc_info->hmc_obj[type].size * idx; fpm_limit = fpm_adr + (hmc_info)->hmc_obj[(type)].size * (cnt); *pd_idx = (u32)(fpm_adr / IRDMA_HMC_PAGED_BP_SIZE); *pd_limit = (u32)((fpm_limit - 1) / IRDMA_HMC_PAGED_BP_SIZE); *pd_limit += 1; } /** * irdma_set_sd_entry - setup entry for sd programming * @pa: physical addr * @idx: sd index * @type: paged or direct sd * @entry: sd entry ptr */ static void irdma_set_sd_entry(u64 pa, u32 idx, enum irdma_sd_entry_type type, struct irdma_update_sd_entry *entry) { entry->data = pa | FIELD_PREP(IRDMA_PFHMC_SDDATALOW_PMSDBPCOUNT, IRDMA_HMC_MAX_BP_COUNT) | FIELD_PREP(IRDMA_PFHMC_SDDATALOW_PMSDTYPE, type == IRDMA_SD_TYPE_PAGED ? 0 : 1) | FIELD_PREP(IRDMA_PFHMC_SDDATALOW_PMSDVALID, 1); entry->cmd = idx | FIELD_PREP(IRDMA_PFHMC_SDCMD_PMSDWR, 1) | IRDMA_PFHMC_SDCMD_PMSDPARTSEL; } /** * irdma_clr_sd_entry - setup entry for sd clear * @idx: sd index * @type: paged or direct sd * @entry: sd entry ptr */ static void irdma_clr_sd_entry(u32 idx, enum irdma_sd_entry_type type, struct irdma_update_sd_entry *entry) { entry->data = FIELD_PREP(IRDMA_PFHMC_SDDATALOW_PMSDBPCOUNT, IRDMA_HMC_MAX_BP_COUNT) | FIELD_PREP(IRDMA_PFHMC_SDDATALOW_PMSDTYPE, type == IRDMA_SD_TYPE_PAGED ? 0 : 1); entry->cmd = idx | FIELD_PREP(IRDMA_PFHMC_SDCMD_PMSDWR, 1) | IRDMA_PFHMC_SDCMD_PMSDPARTSEL; } /** * irdma_invalidate_pf_hmc_pd - Invalidates the pd cache in the hardware for PF * @dev: pointer to our device struct * @sd_idx: segment descriptor index * @pd_idx: page descriptor index */ static inline void irdma_invalidate_pf_hmc_pd(struct irdma_sc_dev *dev, u32 sd_idx, u32 pd_idx) { u32 val = FIELD_PREP(IRDMA_PFHMC_PDINV_PMSDIDX, sd_idx) | FIELD_PREP(IRDMA_PFHMC_PDINV_PMSDPARTSEL, 1) | FIELD_PREP(IRDMA_PFHMC_PDINV_PMPDIDX, pd_idx); writel(val, dev->hw_regs[IRDMA_PFHMC_PDINV]); } /** * irdma_hmc_sd_one - setup 1 sd entry for cqp * @dev: pointer to the device structure * @hmc_fn_id: hmc's function id * @pa: physical addr * @sd_idx: sd index * @type: paged or direct sd * @setsd: flag to set or clear sd */ int irdma_hmc_sd_one(struct irdma_sc_dev *dev, u16 hmc_fn_id, u64 pa, u32 sd_idx, enum irdma_sd_entry_type type, bool setsd) { struct irdma_update_sds_info sdinfo; sdinfo.cnt = 1; sdinfo.hmc_fn_id = hmc_fn_id; if (setsd) irdma_set_sd_entry(pa, sd_idx, type, sdinfo.entry); else irdma_clr_sd_entry(sd_idx, type, sdinfo.entry); return dev->cqp->process_cqp_sds(dev, &sdinfo); } /** * irdma_hmc_sd_grp - setup group of sd entries for cqp * @dev: pointer to the device structure * @hmc_info: pointer to the HMC configuration information struct * @sd_index: sd index * @sd_cnt: number of sd entries * @setsd: flag to set or clear sd */ static int irdma_hmc_sd_grp(struct irdma_sc_dev *dev, struct irdma_hmc_info *hmc_info, u32 sd_index, u32 sd_cnt, bool setsd) { struct irdma_hmc_sd_entry *sd_entry; struct irdma_update_sds_info sdinfo = {0}; u64 pa; u32 i; int ret_code = 0; sdinfo.hmc_fn_id = hmc_info->hmc_fn_id; for (i = sd_index; i < sd_index + sd_cnt; i++) { sd_entry = &hmc_info->sd_table.sd_entry[i]; if (!sd_entry || (!sd_entry->valid && setsd) || (sd_entry->valid && !setsd)) continue; if (setsd) { pa = (sd_entry->entry_type == IRDMA_SD_TYPE_PAGED) ? sd_entry->u.pd_table.pd_page_addr.pa : sd_entry->u.bp.addr.pa; irdma_set_sd_entry(pa, i, sd_entry->entry_type, &sdinfo.entry[sdinfo.cnt]); } else { irdma_clr_sd_entry(i, sd_entry->entry_type, &sdinfo.entry[sdinfo.cnt]); } sdinfo.cnt++; if (sdinfo.cnt == IRDMA_MAX_SD_ENTRIES) { ret_code = dev->cqp->process_cqp_sds(dev, &sdinfo); if (ret_code) { irdma_debug(dev, IRDMA_DEBUG_HMC, "sd_programming failed err=%d\n", ret_code); return ret_code; } sdinfo.cnt = 0; } } if (sdinfo.cnt) ret_code = dev->cqp->process_cqp_sds(dev, &sdinfo); return ret_code; } /** * irdma_hmc_finish_add_sd_reg - program sd entries for objects * @dev: pointer to the device structure * @info: create obj info */ static int irdma_hmc_finish_add_sd_reg(struct irdma_sc_dev *dev, struct irdma_hmc_create_obj_info *info) { if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) return -EINVAL; if ((info->start_idx + info->count) > info->hmc_info->hmc_obj[info->rsrc_type].cnt) return -EINVAL; if (!info->add_sd_cnt) return 0; return irdma_hmc_sd_grp(dev, info->hmc_info, info->hmc_info->sd_indexes[0], info->add_sd_cnt, true); } /** * irdma_sc_create_hmc_obj - allocate backing store for hmc objects * @dev: pointer to the device structure * @info: pointer to irdma_hmc_create_obj_info struct * * This will allocate memory for PDs and backing pages and populate * the sd and pd entries. */ int irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev, struct irdma_hmc_create_obj_info *info) { struct irdma_hmc_sd_entry *sd_entry; u32 sd_idx, sd_lmt; u32 pd_idx = 0, pd_lmt = 0; u32 pd_idx1 = 0, pd_lmt1 = 0; u32 i, j; bool pd_error = false; int ret_code = 0; if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) return -EINVAL; if ((info->start_idx + info->count) > info->hmc_info->hmc_obj[info->rsrc_type].cnt) { irdma_debug(dev, IRDMA_DEBUG_HMC, "error type %u, start = %u, req cnt %u, cnt = %u\n", info->rsrc_type, info->start_idx, info->count, info->hmc_info->hmc_obj[info->rsrc_type].cnt); return -EINVAL; } irdma_find_sd_index_limit(info->hmc_info, info->rsrc_type, info->start_idx, info->count, &sd_idx, &sd_lmt); if (sd_idx >= info->hmc_info->sd_table.sd_cnt || sd_lmt > info->hmc_info->sd_table.sd_cnt) { return -EINVAL; } irdma_find_pd_index_limit(info->hmc_info, info->rsrc_type, info->start_idx, info->count, &pd_idx, &pd_lmt); for (j = sd_idx; j < sd_lmt; j++) { ret_code = irdma_add_sd_table_entry(dev->hw, info->hmc_info, j, info->entry_type, IRDMA_HMC_DIRECT_BP_SIZE); if (ret_code) goto exit_sd_error; sd_entry = &info->hmc_info->sd_table.sd_entry[j]; if (sd_entry->entry_type == IRDMA_SD_TYPE_PAGED && (dev->hmc_info == info->hmc_info && info->rsrc_type != IRDMA_HMC_IW_PBLE)) { pd_idx1 = max(pd_idx, (j * IRDMA_HMC_MAX_BP_COUNT)); pd_lmt1 = min(pd_lmt, (j + 1) * IRDMA_HMC_MAX_BP_COUNT); for (i = pd_idx1; i < pd_lmt1; i++) { /* update the pd table entry */ ret_code = irdma_add_pd_table_entry(dev, info->hmc_info, i, NULL); if (ret_code) { pd_error = true; break; } } if (pd_error) { while (i && (i > pd_idx1)) { irdma_remove_pd_bp(dev, info->hmc_info, i - 1); i--; } } } if (sd_entry->valid) continue; info->hmc_info->sd_indexes[info->add_sd_cnt] = (u16)j; info->add_sd_cnt++; sd_entry->valid = true; } return irdma_hmc_finish_add_sd_reg(dev, info); exit_sd_error: while (j && (j > sd_idx)) { sd_entry = &info->hmc_info->sd_table.sd_entry[j - 1]; switch (sd_entry->entry_type) { case IRDMA_SD_TYPE_PAGED: pd_idx1 = max(pd_idx, (j - 1) * IRDMA_HMC_MAX_BP_COUNT); pd_lmt1 = min(pd_lmt, (j * IRDMA_HMC_MAX_BP_COUNT)); for (i = pd_idx1; i < pd_lmt1; i++) irdma_prep_remove_pd_page(info->hmc_info, i); break; case IRDMA_SD_TYPE_DIRECT: irdma_prep_remove_pd_page(info->hmc_info, (j - 1)); break; default: ret_code = -EINVAL; break; } j--; } return ret_code; } /** * irdma_finish_del_sd_reg - delete sd entries for objects * @dev: pointer to the device structure * @info: dele obj info * @reset: true if called before reset */ static int irdma_finish_del_sd_reg(struct irdma_sc_dev *dev, struct irdma_hmc_del_obj_info *info, bool reset) { struct irdma_hmc_sd_entry *sd_entry; int ret_code = 0; struct irdma_dma_mem *mem; u32 i, sd_idx; if (!reset) ret_code = irdma_hmc_sd_grp(dev, info->hmc_info, info->hmc_info->sd_indexes[0], info->del_sd_cnt, false); if (ret_code) irdma_debug(dev, IRDMA_DEBUG_HMC, "error cqp sd sd_grp\n"); for (i = 0; i < info->del_sd_cnt; i++) { sd_idx = info->hmc_info->sd_indexes[i]; sd_entry = &info->hmc_info->sd_table.sd_entry[sd_idx]; mem = (sd_entry->entry_type == IRDMA_SD_TYPE_PAGED) ? &sd_entry->u.pd_table.pd_page_addr : &sd_entry->u.bp.addr; if (!mem || !mem->va) irdma_debug(dev, IRDMA_DEBUG_HMC, "error cqp sd mem\n"); else irdma_free_dma_mem(dev->hw, mem); } return ret_code; } /** * irdma_sc_del_hmc_obj - remove pe hmc objects * @dev: pointer to the device structure * @info: pointer to irdma_hmc_del_obj_info struct * @reset: true if called before reset * * This will de-populate the SDs and PDs. It frees * the memory for PDS and backing storage. After this function is returned, * caller should deallocate memory allocated previously for * book-keeping information about PDs and backing storage. */ int irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev, struct irdma_hmc_del_obj_info *info, bool reset) { struct irdma_hmc_pd_table *pd_table; u32 sd_idx, sd_lmt; u32 pd_idx, pd_lmt, rel_pd_idx; u32 i, j; int ret_code = 0; if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) { irdma_debug(dev, IRDMA_DEBUG_HMC, - "error start_idx[%04d] >= [type %04d].cnt[%04d]\n", + "error start_idx[%04d] >= [type %04d].cnt[%04d]\n", info->start_idx, info->rsrc_type, info->hmc_info->hmc_obj[info->rsrc_type].cnt); return -EINVAL; } if ((info->start_idx + info->count) > info->hmc_info->hmc_obj[info->rsrc_type].cnt) { irdma_debug(dev, IRDMA_DEBUG_HMC, - "error start_idx[%04d] + count %04d >= [type %04d].cnt[%04d]\n", + "error start_idx[%04d] + count %04d >= [type %04d].cnt[%04d]\n", info->start_idx, info->count, info->rsrc_type, info->hmc_info->hmc_obj[info->rsrc_type].cnt); return -EINVAL; } irdma_find_pd_index_limit(info->hmc_info, info->rsrc_type, info->start_idx, info->count, &pd_idx, &pd_lmt); for (j = pd_idx; j < pd_lmt; j++) { sd_idx = j / IRDMA_HMC_PD_CNT_IN_SD; if (!info->hmc_info->sd_table.sd_entry[sd_idx].valid) continue; if (info->hmc_info->sd_table.sd_entry[sd_idx].entry_type != IRDMA_SD_TYPE_PAGED) continue; rel_pd_idx = j % IRDMA_HMC_PD_CNT_IN_SD; pd_table = &info->hmc_info->sd_table.sd_entry[sd_idx].u.pd_table; if (pd_table->pd_entry && pd_table->pd_entry[rel_pd_idx].valid) { ret_code = irdma_remove_pd_bp(dev, info->hmc_info, j); if (ret_code) { irdma_debug(dev, IRDMA_DEBUG_HMC, "remove_pd_bp error\n"); return ret_code; } } } irdma_find_sd_index_limit(info->hmc_info, info->rsrc_type, info->start_idx, info->count, &sd_idx, &sd_lmt); if (sd_idx >= info->hmc_info->sd_table.sd_cnt || sd_lmt > info->hmc_info->sd_table.sd_cnt) { irdma_debug(dev, IRDMA_DEBUG_HMC, "invalid sd_idx\n"); return -EINVAL; } for (i = sd_idx; i < sd_lmt; i++) { pd_table = &info->hmc_info->sd_table.sd_entry[i].u.pd_table; if (!info->hmc_info->sd_table.sd_entry[i].valid) continue; switch (info->hmc_info->sd_table.sd_entry[i].entry_type) { case IRDMA_SD_TYPE_DIRECT: ret_code = irdma_prep_remove_sd_bp(info->hmc_info, i); if (!ret_code) { info->hmc_info->sd_indexes[info->del_sd_cnt] = (u16)i; info->del_sd_cnt++; } break; case IRDMA_SD_TYPE_PAGED: ret_code = irdma_prep_remove_pd_page(info->hmc_info, i); if (ret_code) break; if (dev->hmc_info != info->hmc_info && info->rsrc_type == IRDMA_HMC_IW_PBLE && pd_table->pd_entry) { kfree(pd_table->pd_entry_virt_mem.va); pd_table->pd_entry = NULL; } info->hmc_info->sd_indexes[info->del_sd_cnt] = (u16)i; info->del_sd_cnt++; break; default: break; } } return irdma_finish_del_sd_reg(dev, info, reset); } /** * irdma_add_sd_table_entry - Adds a segment descriptor to the table * @hw: pointer to our hw struct * @hmc_info: pointer to the HMC configuration information struct * @sd_index: segment descriptor index to manipulate * @type: what type of segment descriptor we're manipulating * @direct_mode_sz: size to alloc in direct mode */ int irdma_add_sd_table_entry(struct irdma_hw *hw, struct irdma_hmc_info *hmc_info, u32 sd_index, enum irdma_sd_entry_type type, u64 direct_mode_sz) { struct irdma_hmc_sd_entry *sd_entry; struct irdma_dma_mem dma_mem; u64 alloc_len; sd_entry = &hmc_info->sd_table.sd_entry[sd_index]; if (!sd_entry->valid) { if (type == IRDMA_SD_TYPE_PAGED) alloc_len = IRDMA_HMC_PAGED_BP_SIZE; else alloc_len = direct_mode_sz; /* allocate a 4K pd page or 2M backing page */ dma_mem.size = alloc_len; dma_mem.va = irdma_allocate_dma_mem(hw, &dma_mem, dma_mem.size, IRDMA_HMC_PD_BP_BUF_ALIGNMENT); if (!dma_mem.va) return -ENOMEM; if (type == IRDMA_SD_TYPE_PAGED) { struct irdma_virt_mem *vmem = &sd_entry->u.pd_table.pd_entry_virt_mem; vmem->size = sizeof(struct irdma_hmc_pd_entry) * 512; vmem->va = kzalloc(vmem->size, GFP_KERNEL); if (!vmem->va) { irdma_free_dma_mem(hw, &dma_mem); return -ENOMEM; } sd_entry->u.pd_table.pd_entry = vmem->va; irdma_memcpy(&sd_entry->u.pd_table.pd_page_addr, &dma_mem, sizeof(sd_entry->u.pd_table.pd_page_addr)); } else { irdma_memcpy(&sd_entry->u.bp.addr, &dma_mem, sizeof(sd_entry->u.bp.addr)); sd_entry->u.bp.sd_pd_index = sd_index; } hmc_info->sd_table.sd_entry[sd_index].entry_type = type; hmc_info->sd_table.use_cnt++; } if (sd_entry->entry_type == IRDMA_SD_TYPE_DIRECT) sd_entry->u.bp.use_cnt++; return 0; } /** * irdma_add_pd_table_entry - Adds page descriptor to the specified table * @dev: pointer to our device structure * @hmc_info: pointer to the HMC configuration information structure * @pd_index: which page descriptor index to manipulate * @rsrc_pg: if not NULL, use preallocated page instead of allocating new one. * * This function: * 1. Initializes the pd entry * 2. Adds pd_entry in the pd_table * 3. Mark the entry valid in irdma_hmc_pd_entry structure * 4. Initializes the pd_entry's ref count to 1 * assumptions: * 1. The memory for pd should be pinned down, physically contiguous and * aligned on 4K boundary and zeroed memory. * 2. It should be 4K in size. */ int irdma_add_pd_table_entry(struct irdma_sc_dev *dev, struct irdma_hmc_info *hmc_info, u32 pd_index, struct irdma_dma_mem *rsrc_pg) { struct irdma_hmc_pd_table *pd_table; struct irdma_hmc_pd_entry *pd_entry; struct irdma_dma_mem mem; struct irdma_dma_mem *page = &mem; u32 sd_idx, rel_pd_idx; u64 *pd_addr; u64 page_desc; if (pd_index / IRDMA_HMC_PD_CNT_IN_SD >= hmc_info->sd_table.sd_cnt) return -EINVAL; sd_idx = (pd_index / IRDMA_HMC_PD_CNT_IN_SD); if (hmc_info->sd_table.sd_entry[sd_idx].entry_type != IRDMA_SD_TYPE_PAGED) return 0; rel_pd_idx = (pd_index % IRDMA_HMC_PD_CNT_IN_SD); pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table; pd_entry = &pd_table->pd_entry[rel_pd_idx]; if (!pd_entry->valid) { if (rsrc_pg) { pd_entry->rsrc_pg = true; page = rsrc_pg; } else { page->size = IRDMA_HMC_PAGED_BP_SIZE; page->va = irdma_allocate_dma_mem(dev->hw, page, page->size, IRDMA_HMC_PD_BP_BUF_ALIGNMENT); if (!page->va) return -ENOMEM; pd_entry->rsrc_pg = false; } irdma_memcpy(&pd_entry->bp.addr, page, sizeof(pd_entry->bp.addr)); pd_entry->bp.sd_pd_index = pd_index; pd_entry->bp.entry_type = IRDMA_SD_TYPE_PAGED; page_desc = page->pa | 0x1; pd_addr = pd_table->pd_page_addr.va; pd_addr += rel_pd_idx; irdma_memcpy(pd_addr, &page_desc, sizeof(*pd_addr)); pd_entry->sd_index = sd_idx; pd_entry->valid = true; pd_table->use_cnt++; irdma_invalidate_pf_hmc_pd(dev, sd_idx, rel_pd_idx); } pd_entry->bp.use_cnt++; return 0; } /** * irdma_remove_pd_bp - remove a backing page from a page descriptor * @dev: pointer to our HW structure * @hmc_info: pointer to the HMC configuration information structure * @idx: the page index * * This function: * 1. Marks the entry in pd table (for paged address mode) or in sd table * (for direct address mode) invalid. * 2. Write to register PMPDINV to invalidate the backing page in FV cache * 3. Decrement the ref count for the pd _entry * assumptions: * 1. Caller can deallocate the memory used by backing storage after this * function returns. */ int irdma_remove_pd_bp(struct irdma_sc_dev *dev, struct irdma_hmc_info *hmc_info, u32 idx) { struct irdma_hmc_pd_entry *pd_entry; struct irdma_hmc_pd_table *pd_table; struct irdma_hmc_sd_entry *sd_entry; u32 sd_idx, rel_pd_idx; struct irdma_dma_mem *mem; u64 *pd_addr; sd_idx = idx / IRDMA_HMC_PD_CNT_IN_SD; rel_pd_idx = idx % IRDMA_HMC_PD_CNT_IN_SD; if (sd_idx >= hmc_info->sd_table.sd_cnt) return -EINVAL; sd_entry = &hmc_info->sd_table.sd_entry[sd_idx]; if (sd_entry->entry_type != IRDMA_SD_TYPE_PAGED) return -EINVAL; pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table; pd_entry = &pd_table->pd_entry[rel_pd_idx]; if (--pd_entry->bp.use_cnt) return 0; pd_entry->valid = false; pd_table->use_cnt--; pd_addr = pd_table->pd_page_addr.va; pd_addr += rel_pd_idx; irdma_memset(pd_addr, 0, sizeof(u64)); irdma_invalidate_pf_hmc_pd(dev, sd_idx, idx); if (!pd_entry->rsrc_pg) { mem = &pd_entry->bp.addr; if (!mem || !mem->va) return -EINVAL; irdma_free_dma_mem(dev->hw, mem); } if (!pd_table->use_cnt) kfree(pd_table->pd_entry_virt_mem.va); return 0; } /** * irdma_prep_remove_sd_bp - Prepares to remove a backing page from a sd entry * @hmc_info: pointer to the HMC configuration information structure * @idx: the page index */ int irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info, u32 idx) { struct irdma_hmc_sd_entry *sd_entry; sd_entry = &hmc_info->sd_table.sd_entry[idx]; if (--sd_entry->u.bp.use_cnt) return -EBUSY; hmc_info->sd_table.use_cnt--; sd_entry->valid = false; return 0; } /** * irdma_prep_remove_pd_page - Prepares to remove a PD page from sd entry. * @hmc_info: pointer to the HMC configuration information structure * @idx: segment descriptor index to find the relevant page descriptor */ int irdma_prep_remove_pd_page(struct irdma_hmc_info *hmc_info, u32 idx) { struct irdma_hmc_sd_entry *sd_entry; sd_entry = &hmc_info->sd_table.sd_entry[idx]; if (sd_entry->u.pd_table.use_cnt) return -EBUSY; sd_entry->valid = false; hmc_info->sd_table.use_cnt--; return 0; } diff --git a/sys/dev/irdma/irdma_hmc.h b/sys/dev/irdma/irdma_hmc.h index 7babecfcaf59..f0b0eff5d127 100644 --- a/sys/dev/irdma/irdma_hmc.h +++ b/sys/dev/irdma/irdma_hmc.h @@ -1,201 +1,197 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef IRDMA_HMC_H #define IRDMA_HMC_H #include "irdma_defs.h" #define IRDMA_HMC_MAX_BP_COUNT 512 #define IRDMA_MAX_SD_ENTRIES 11 #define IRDMA_HW_DBG_HMC_INVALID_BP_MARK 0xca #define IRDMA_HMC_INFO_SIGNATURE 0x484d5347 #define IRDMA_HMC_PD_CNT_IN_SD 512 #define IRDMA_HMC_DIRECT_BP_SIZE 0x200000 #define IRDMA_HMC_MAX_SD_COUNT 8192 #define IRDMA_HMC_PAGED_BP_SIZE 4096 #define IRDMA_HMC_PD_BP_BUF_ALIGNMENT 4096 #define IRDMA_FIRST_VF_FPM_ID 8 #define FPM_MULTIPLIER 1024 enum irdma_hmc_rsrc_type { IRDMA_HMC_IW_QP = 0, IRDMA_HMC_IW_CQ = 1, IRDMA_HMC_IW_RESERVED = 2, IRDMA_HMC_IW_HTE = 3, IRDMA_HMC_IW_ARP = 4, IRDMA_HMC_IW_APBVT_ENTRY = 5, IRDMA_HMC_IW_MR = 6, IRDMA_HMC_IW_XF = 7, IRDMA_HMC_IW_XFFL = 8, IRDMA_HMC_IW_Q1 = 9, IRDMA_HMC_IW_Q1FL = 10, IRDMA_HMC_IW_TIMER = 11, IRDMA_HMC_IW_FSIMC = 12, IRDMA_HMC_IW_FSIAV = 13, IRDMA_HMC_IW_PBLE = 14, IRDMA_HMC_IW_RRF = 15, IRDMA_HMC_IW_RRFFL = 16, IRDMA_HMC_IW_HDR = 17, IRDMA_HMC_IW_MD = 18, IRDMA_HMC_IW_OOISC = 19, IRDMA_HMC_IW_OOISCFFL = 20, IRDMA_HMC_IW_MAX, /* Must be last entry */ }; enum irdma_sd_entry_type { IRDMA_SD_TYPE_INVALID = 0, IRDMA_SD_TYPE_PAGED = 1, IRDMA_SD_TYPE_DIRECT = 2, }; struct irdma_hmc_obj_info { u64 base; u32 max_cnt; u32 cnt; u64 size; }; struct irdma_hmc_bp { enum irdma_sd_entry_type entry_type; struct irdma_dma_mem addr; u32 sd_pd_index; u32 use_cnt; }; struct irdma_hmc_pd_entry { struct irdma_hmc_bp bp; u32 sd_index; bool rsrc_pg:1; bool valid:1; }; struct irdma_hmc_pd_table { struct irdma_dma_mem pd_page_addr; struct irdma_hmc_pd_entry *pd_entry; struct irdma_virt_mem pd_entry_virt_mem; u32 use_cnt; u32 sd_index; }; struct irdma_hmc_sd_entry { enum irdma_sd_entry_type entry_type; bool valid; union { struct irdma_hmc_pd_table pd_table; struct irdma_hmc_bp bp; } u; }; struct irdma_hmc_sd_table { struct irdma_virt_mem addr; u32 sd_cnt; u32 use_cnt; struct irdma_hmc_sd_entry *sd_entry; }; struct irdma_hmc_info { u32 signature; u16 hmc_fn_id; u16 first_sd_index; struct irdma_hmc_obj_info *hmc_obj; struct irdma_virt_mem hmc_obj_virt_mem; struct irdma_hmc_sd_table sd_table; u16 sd_indexes[IRDMA_HMC_MAX_SD_COUNT]; }; struct irdma_update_sd_entry { u64 cmd; u64 data; }; struct irdma_update_sds_info { u32 cnt; u16 hmc_fn_id; struct irdma_update_sd_entry entry[IRDMA_MAX_SD_ENTRIES]; }; struct irdma_ccq_cqe_info; struct irdma_hmc_fcn_info { u32 vf_id; u8 free_fcn; }; struct irdma_hmc_create_obj_info { struct irdma_hmc_info *hmc_info; struct irdma_virt_mem add_sd_virt_mem; u32 rsrc_type; u32 start_idx; u32 count; u32 add_sd_cnt; enum irdma_sd_entry_type entry_type; bool privileged; }; struct irdma_hmc_del_obj_info { struct irdma_hmc_info *hmc_info; struct irdma_virt_mem del_sd_virt_mem; u32 rsrc_type; u32 start_idx; u32 count; u32 del_sd_cnt; bool privileged; }; int irdma_copy_dma_mem(struct irdma_hw *hw, void *dest_buf, struct irdma_dma_mem *src_mem, u64 src_offset, u64 size); int irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev, struct irdma_hmc_create_obj_info *info); int irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev, struct irdma_hmc_del_obj_info *info, bool reset); int irdma_hmc_sd_one(struct irdma_sc_dev *dev, u16 hmc_fn_id, u64 pa, u32 sd_idx, enum irdma_sd_entry_type type, bool setsd); int irdma_update_sds_noccq(struct irdma_sc_dev *dev, struct irdma_update_sds_info *info); -struct irdma_vfdev *irdma_vfdev_from_fpm(struct irdma_sc_dev *dev, - u16 hmc_fn_id); -struct irdma_hmc_info *irdma_vf_hmcinfo_from_fpm(struct irdma_sc_dev *dev, - u16 hmc_fn_id); int irdma_add_sd_table_entry(struct irdma_hw *hw, struct irdma_hmc_info *hmc_info, u32 sd_index, enum irdma_sd_entry_type type, u64 direct_mode_sz); int irdma_add_pd_table_entry(struct irdma_sc_dev *dev, struct irdma_hmc_info *hmc_info, u32 pd_index, struct irdma_dma_mem *rsrc_pg); int irdma_remove_pd_bp(struct irdma_sc_dev *dev, struct irdma_hmc_info *hmc_info, u32 idx); int irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info, u32 idx); int irdma_prep_remove_pd_page(struct irdma_hmc_info *hmc_info, u32 idx); #endif /* IRDMA_HMC_H */ diff --git a/sys/dev/irdma/irdma_hw.c b/sys/dev/irdma/irdma_hw.c index fa34e4297527..8d30ad279861 100644 --- a/sys/dev/irdma/irdma_hw.c +++ b/sys/dev/irdma/irdma_hw.c @@ -1,2800 +1,2809 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "irdma_main.h" static struct irdma_rsrc_limits rsrc_limits_table[] = { [0] = { .qplimit = SZ_128, }, [1] = { .qplimit = SZ_1K, }, [2] = { .qplimit = SZ_2K, }, [3] = { .qplimit = SZ_4K, }, [4] = { .qplimit = SZ_16K, }, [5] = { .qplimit = SZ_64K, }, [6] = { .qplimit = SZ_128K, }, [7] = { .qplimit = SZ_256K, }, }; /* types of hmc objects */ static enum irdma_hmc_rsrc_type iw_hmc_obj_types[] = { IRDMA_HMC_IW_QP, IRDMA_HMC_IW_CQ, IRDMA_HMC_IW_HTE, IRDMA_HMC_IW_ARP, IRDMA_HMC_IW_APBVT_ENTRY, IRDMA_HMC_IW_MR, IRDMA_HMC_IW_XF, IRDMA_HMC_IW_XFFL, IRDMA_HMC_IW_Q1, IRDMA_HMC_IW_Q1FL, IRDMA_HMC_IW_PBLE, IRDMA_HMC_IW_TIMER, IRDMA_HMC_IW_FSIMC, IRDMA_HMC_IW_FSIAV, IRDMA_HMC_IW_RRF, IRDMA_HMC_IW_RRFFL, IRDMA_HMC_IW_HDR, IRDMA_HMC_IW_MD, IRDMA_HMC_IW_OOISC, IRDMA_HMC_IW_OOISCFFL, }; /** * irdma_iwarp_ce_handler - handle iwarp completions * @iwcq: iwarp cq receiving event */ static void irdma_iwarp_ce_handler(struct irdma_sc_cq *iwcq) { struct irdma_cq *cq = iwcq->back_cq; if (!cq->user_mode) atomic_set(&cq->armed, 0); if (cq->ibcq.comp_handler) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } /** * irdma_puda_ce_handler - handle puda completion events * @rf: RDMA PCI function * @cq: puda completion q for event */ static void irdma_puda_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq) { struct irdma_sc_dev *dev = &rf->sc_dev; u32 compl_error; int status; do { status = irdma_puda_poll_cmpl(dev, cq, &compl_error); if (status == -ENOENT) break; if (status) { irdma_debug(dev, IRDMA_DEBUG_ERR, "puda status = %d\n", status); break; } if (compl_error) { irdma_debug(dev, IRDMA_DEBUG_ERR, - "puda compl_err =0x%x\n", - compl_error); + "puda compl_err = 0x%x\n", compl_error); break; } } while (1); irdma_sc_ccq_arm(cq); } /** * irdma_process_ceq - handle ceq for completions * @rf: RDMA PCI function * @ceq: ceq having cq for completion */ static void irdma_process_ceq(struct irdma_pci_f *rf, struct irdma_ceq *ceq) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_sc_ceq *sc_ceq; struct irdma_sc_cq *cq; unsigned long flags; sc_ceq = &ceq->sc_ceq; do { spin_lock_irqsave(&ceq->ce_lock, flags); cq = irdma_sc_process_ceq(dev, sc_ceq); if (!cq) { spin_unlock_irqrestore(&ceq->ce_lock, flags); break; } if (cq->cq_type == IRDMA_CQ_TYPE_IWARP) irdma_iwarp_ce_handler(cq); spin_unlock_irqrestore(&ceq->ce_lock, flags); if (cq->cq_type == IRDMA_CQ_TYPE_CQP) queue_work(rf->cqp_cmpl_wq, &rf->cqp_cmpl_work); else if (cq->cq_type == IRDMA_CQ_TYPE_ILQ || cq->cq_type == IRDMA_CQ_TYPE_IEQ) irdma_puda_ce_handler(rf, cq); } while (1); } static void irdma_set_flush_fields(struct irdma_sc_qp *qp, struct irdma_aeqe_info *info) { struct qp_err_code qp_err; qp->sq_flush_code = info->sq; qp->rq_flush_code = info->rq; qp_err = irdma_ae_to_qp_err_code(info->ae_id); qp->flush_code = qp_err.flush_code; qp->event_type = qp_err.event_type; } /** * irdma_complete_cqp_request - perform post-completion cleanup * @cqp: device CQP * @cqp_request: CQP request * * Mark CQP request as done, wake up waiting thread or invoke * callback function and release/free CQP request. */ static void irdma_complete_cqp_request(struct irdma_cqp *cqp, struct irdma_cqp_request *cqp_request) { - if (cqp_request->waiting) { - cqp_request->request_done = true; + WRITE_ONCE(cqp_request->request_done, true); + if (cqp_request->waiting) wake_up(&cqp_request->waitq); - } else if (cqp_request->callback_fcn) { + else if (cqp_request->callback_fcn) cqp_request->callback_fcn(cqp_request); - } irdma_put_cqp_request(cqp, cqp_request); } /** * irdma_process_aeq - handle aeq events * @rf: RDMA PCI function */ static void irdma_process_aeq(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_aeq *aeq = &rf->aeq; struct irdma_sc_aeq *sc_aeq = &aeq->sc_aeq; struct irdma_aeqe_info aeinfo; struct irdma_aeqe_info *info = &aeinfo; int ret; struct irdma_qp *iwqp = NULL; struct irdma_cq *iwcq = NULL; struct irdma_sc_qp *qp = NULL; struct irdma_device *iwdev = rf->iwdev; struct irdma_qp_host_ctx_info *ctx_info = NULL; unsigned long flags; u32 aeqcnt = 0; if (!sc_aeq->size) return; do { memset(info, 0, sizeof(*info)); ret = irdma_sc_get_next_aeqe(sc_aeq, info); if (ret) break; aeqcnt++; irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_AEQ, - "ae_id = 0x%x bool qp=%d qp_id = %d tcp_state=%d iwarp_state=%d ae_src=%d\n", - info->ae_id, info->qp, info->qp_cq_id, info->tcp_state, + "ae_id = 0x%x (%s), is_qp = %d, qp_id = %d, tcp_state = %d, iwarp_state = %d, ae_src = %d\n", + info->ae_id, irdma_get_ae_desc(info->ae_id), + info->qp, info->qp_cq_id, info->tcp_state, info->iwarp_state, info->ae_src); if (info->qp) { spin_lock_irqsave(&rf->qptable_lock, flags); iwqp = rf->qp_table[info->qp_cq_id]; if (!iwqp) { spin_unlock_irqrestore(&rf->qptable_lock, flags); if (info->ae_id == IRDMA_AE_QP_SUSPEND_COMPLETE) { struct irdma_device *iwdev = rf->iwdev; if (!iwdev->vsi.tc_change_pending) continue; atomic_dec(&iwdev->vsi.qp_suspend_reqs); wake_up(&iwdev->suspend_wq); continue; } irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_AEQ, "qp_id %d is already freed\n", info->qp_cq_id); continue; } irdma_qp_add_ref(&iwqp->ibqp); spin_unlock_irqrestore(&rf->qptable_lock, flags); qp = &iwqp->sc_qp; spin_lock_irqsave(&iwqp->lock, flags); iwqp->hw_tcp_state = info->tcp_state; iwqp->hw_iwarp_state = info->iwarp_state; if (info->ae_id != IRDMA_AE_QP_SUSPEND_COMPLETE) iwqp->last_aeq = info->ae_id; spin_unlock_irqrestore(&iwqp->lock, flags); ctx_info = &iwqp->ctx_info; } else { if (info->ae_id != IRDMA_AE_CQ_OPERATION_ERROR) continue; } switch (info->ae_id) { struct irdma_cm_node *cm_node; case IRDMA_AE_LLP_CONNECTION_ESTABLISHED: cm_node = iwqp->cm_node; if (cm_node->accept_pend) { atomic_dec(&cm_node->listener->pend_accepts_cnt); cm_node->accept_pend = 0; } iwqp->rts_ae_rcvd = 1; wake_up_interruptible(&iwqp->waitq); break; case IRDMA_AE_LLP_FIN_RECEIVED: if (qp->term_flags) break; if (atomic_inc_return(&iwqp->close_timer_started) == 1) { iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSE_WAIT; if (iwqp->ibqp_state == IB_QPS_RTS) { irdma_next_iw_state(iwqp, IRDMA_QP_STATE_CLOSING, 0, 0, 0); irdma_cm_disconn(iwqp); } irdma_schedule_cm_timer(iwqp->cm_node, (struct irdma_puda_buf *)iwqp, IRDMA_TIMER_TYPE_CLOSE, 1, 0); } break; case IRDMA_AE_LLP_CLOSE_COMPLETE: if (qp->term_flags) irdma_terminate_done(qp, 0); else irdma_cm_disconn(iwqp); break; case IRDMA_AE_BAD_CLOSE: case IRDMA_AE_RESET_SENT: irdma_next_iw_state(iwqp, IRDMA_QP_STATE_ERROR, 1, 0, 0); irdma_cm_disconn(iwqp); break; case IRDMA_AE_LLP_CONNECTION_RESET: if (atomic_read(&iwqp->close_timer_started)) break; irdma_cm_disconn(iwqp); break; case IRDMA_AE_QP_SUSPEND_COMPLETE: if (iwqp->iwdev->vsi.tc_change_pending) { - atomic_dec(&iwqp->sc_qp.vsi->qp_suspend_reqs); + if (!atomic_dec_return(&iwqp->sc_qp.vsi->qp_suspend_reqs)) + wake_up(&iwqp->iwdev->suspend_wq); + } + if (iwqp->suspend_pending) { + iwqp->suspend_pending = false; wake_up(&iwqp->iwdev->suspend_wq); } break; case IRDMA_AE_TERMINATE_SENT: irdma_terminate_send_fin(qp); break; case IRDMA_AE_LLP_TERMINATE_RECEIVED: irdma_terminate_received(qp, info); break; case IRDMA_AE_LCE_CQ_CATASTROPHIC: case IRDMA_AE_CQ_OPERATION_ERROR: irdma_dev_err(&iwdev->ibdev, "Processing CQ[0x%x] op error, AE 0x%04X\n", info->qp_cq_id, info->ae_id); spin_lock_irqsave(&rf->cqtable_lock, flags); iwcq = rf->cq_table[info->qp_cq_id]; if (!iwcq) { spin_unlock_irqrestore(&rf->cqtable_lock, flags); irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_AEQ, "cq_id %d is already freed\n", info->qp_cq_id); continue; } irdma_cq_add_ref(&iwcq->ibcq); spin_unlock_irqrestore(&rf->cqtable_lock, flags); if (iwcq->ibcq.event_handler) { struct ib_event ibevent; ibevent.device = iwcq->ibcq.device; ibevent.event = IB_EVENT_CQ_ERR; ibevent.element.cq = &iwcq->ibcq; iwcq->ibcq.event_handler(&ibevent, iwcq->ibcq.cq_context); } irdma_cq_rem_ref(&iwcq->ibcq); break; case IRDMA_AE_RESET_NOT_SENT: case IRDMA_AE_LLP_DOUBT_REACHABILITY: break; case IRDMA_AE_RESOURCE_EXHAUSTION: irdma_dev_err(&iwdev->ibdev, "Resource exhaustion reason: q1 = %d xmit or rreq = %d\n", info->ae_src == IRDMA_AE_SOURCE_RSRC_EXHT_Q1, info->ae_src == IRDMA_AE_SOURCE_RSRC_EXHT_XT_RR); break; case IRDMA_AE_PRIV_OPERATION_DENIED: case IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE: case IRDMA_AE_STAG_ZERO_INVALID: case IRDMA_AE_IB_RREQ_AND_Q1_FULL: case IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION: case IRDMA_AE_DDP_UBE_INVALID_MO: case IRDMA_AE_DDP_UBE_INVALID_QN: case IRDMA_AE_DDP_NO_L_BIT: case IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION: case IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE: case IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST: case IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP: case IRDMA_AE_INVALID_ARP_ENTRY: case IRDMA_AE_INVALID_TCP_OPTION_RCVD: case IRDMA_AE_STALE_ARP_ENTRY: case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR: case IRDMA_AE_LLP_SEGMENT_TOO_SMALL: case IRDMA_AE_LLP_SYN_RECEIVED: case IRDMA_AE_LLP_TOO_MANY_RETRIES: case IRDMA_AE_LCE_QP_CATASTROPHIC: case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC: case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: default: - irdma_dev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d ae_source=%d\n", - info->ae_id, info->qp, info->qp_cq_id, info->ae_src); + irdma_dev_err(&iwdev->ibdev, + "AEQ: abnormal ae_id = 0x%x (%s), is_qp = %d, qp_id = %d, ae_source = %d\n", + info->ae_id, irdma_get_ae_desc(info->ae_id), + info->qp, info->qp_cq_id, info->ae_src); if (rdma_protocol_roce(&iwqp->iwdev->ibdev, 1)) { ctx_info->roce_info->err_rq_idx_valid = info->err_rq_idx_valid; if (info->rq) { ctx_info->roce_info->err_rq_idx = info->wqe_idx; irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info); } irdma_set_flush_fields(qp, info); irdma_cm_disconn(iwqp); break; } ctx_info->iwarp_info->err_rq_idx_valid = info->err_rq_idx_valid; if (info->rq) { ctx_info->iwarp_info->err_rq_idx = info->wqe_idx; ctx_info->tcp_info_valid = false; ctx_info->iwarp_info_valid = true; irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info); } if (iwqp->hw_iwarp_state != IRDMA_QP_STATE_RTS && iwqp->hw_iwarp_state != IRDMA_QP_STATE_TERMINATE) { irdma_next_iw_state(iwqp, IRDMA_QP_STATE_ERROR, 1, 0, 0); irdma_cm_disconn(iwqp); } else { irdma_terminate_connection(qp, info); } break; } if (info->qp) irdma_qp_rem_ref(&iwqp->ibqp); } while (1); if (aeqcnt) irdma_sc_repost_aeq_entries(dev, aeqcnt); } /** * irdma_ena_intr - set up device interrupts * @dev: hardware control device structure * @msix_id: id of the interrupt to be enabled */ static void irdma_ena_intr(struct irdma_sc_dev *dev, u32 msix_id) { dev->irq_ops->irdma_en_irq(dev, msix_id); } /** * irdma_dpc - tasklet for aeq and ceq 0 * @t: tasklet_struct ptr */ static void irdma_dpc(unsigned long t) { struct irdma_pci_f *rf = from_tasklet(rf, (struct tasklet_struct *)t, dpc_tasklet); if (rf->msix_shared) irdma_process_ceq(rf, rf->ceqlist); irdma_process_aeq(rf); irdma_ena_intr(&rf->sc_dev, rf->iw_msixtbl[0].idx); } /** * irdma_ceq_dpc - dpc handler for CEQ * @t: tasklet_struct ptr */ static void irdma_ceq_dpc(unsigned long t) { struct irdma_ceq *iwceq = from_tasklet(iwceq, (struct tasklet_struct *)t, dpc_tasklet); struct irdma_pci_f *rf = iwceq->rf; irdma_process_ceq(rf, iwceq); irdma_ena_intr(&rf->sc_dev, iwceq->msix_idx); } /** * irdma_save_msix_info - copy msix vector information to iwarp device * @rf: RDMA PCI function * * Allocate iwdev msix table and copy the msix info to the table * Return 0 if successful, otherwise return error */ static int irdma_save_msix_info(struct irdma_pci_f *rf) { struct irdma_qvlist_info *iw_qvlist; struct irdma_qv_info *iw_qvinfo; u32 ceq_idx; u32 i; u32 size; if (!rf->msix_count) { irdma_dev_err(to_ibdev(&rf->sc_dev), "No MSI-X vectors reserved for RDMA.\n"); return -EINVAL; } size = sizeof(struct irdma_msix_vector) * rf->msix_count; size += sizeof(struct irdma_qvlist_info); size += sizeof(struct irdma_qv_info) * rf->msix_count - 1; rf->iw_msixtbl = kzalloc(size, GFP_KERNEL); if (!rf->iw_msixtbl) return -ENOMEM; rf->iw_qvlist = (struct irdma_qvlist_info *) (&rf->iw_msixtbl[rf->msix_count]); iw_qvlist = rf->iw_qvlist; iw_qvinfo = iw_qvlist->qv_info; iw_qvlist->num_vectors = rf->msix_count; if (rf->msix_count <= num_online_cpus()) rf->msix_shared = true; else if (rf->msix_count > num_online_cpus() + 1) rf->msix_count = num_online_cpus() + 1; for (i = 0, ceq_idx = 0; i < rf->msix_count; i++, iw_qvinfo++) { rf->iw_msixtbl[i].idx = rf->msix_info.entry + i; rf->iw_msixtbl[i].cpu_affinity = ceq_idx; if (!i) { iw_qvinfo->aeq_idx = 0; if (rf->msix_shared) iw_qvinfo->ceq_idx = ceq_idx++; else iw_qvinfo->ceq_idx = IRDMA_Q_INVALID_IDX; } else { iw_qvinfo->aeq_idx = IRDMA_Q_INVALID_IDX; iw_qvinfo->ceq_idx = ceq_idx++; } iw_qvinfo->itr_idx = IRDMA_IDX_NOITR; iw_qvinfo->v_idx = rf->iw_msixtbl[i].idx; } return 0; } /** * irdma_irq_handler - interrupt handler for aeq and ceq0 * @data: RDMA PCI function */ static void irdma_irq_handler(void *data) { struct irdma_pci_f *rf = data; tasklet_schedule(&rf->dpc_tasklet); } /** * irdma_ceq_handler - interrupt handler for ceq * @data: ceq pointer */ static void irdma_ceq_handler(void *data) { struct irdma_ceq *iwceq = data; tasklet_schedule(&iwceq->dpc_tasklet); } /** * irdma_free_irq - free device interrupts in FreeBSD manner * @rf: RDMA PCI function * @msix_vec: msix vector to disable irq * * The function is called when destroying irq. It tearsdown * the interrupt and release resources. */ static void irdma_free_irq(struct irdma_pci_f *rf, struct irdma_msix_vector *msix_vec) { if (msix_vec->tag) { bus_teardown_intr(rf->dev_ctx.dev, msix_vec->res, msix_vec->tag); msix_vec->tag = NULL; } if (msix_vec->res) { bus_release_resource(rf->dev_ctx.dev, SYS_RES_IRQ, msix_vec->idx + 1, msix_vec->res); msix_vec->res = NULL; } } /** * irdma_destroy_irq - destroy device interrupts * @rf: RDMA PCI function * @msix_vec: msix vector to disable irq * @dev_id: parameter to pass to free_irq (used during irq setup) * * The function is called when destroying aeq/ceq */ static void irdma_destroy_irq(struct irdma_pci_f *rf, struct irdma_msix_vector *msix_vec, void *dev_id) { struct irdma_sc_dev *dev = &rf->sc_dev; dev->irq_ops->irdma_dis_irq(dev, msix_vec->idx); irdma_free_irq(rf, msix_vec); } /** * irdma_destroy_cqp - destroy control qp * @rf: RDMA PCI function * @free_hwcqp: 1 if hw cqp should be freed * * Issue destroy cqp request and * free the resources associated with the cqp */ static void irdma_destroy_cqp(struct irdma_pci_f *rf, bool free_hwcqp) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_cqp *cqp = &rf->cqp; int status = 0; if (rf->cqp_cmpl_wq) destroy_workqueue(rf->cqp_cmpl_wq); status = irdma_sc_cqp_destroy(dev->cqp, free_hwcqp); if (status) irdma_debug(dev, IRDMA_DEBUG_ERR, "Destroy CQP failed %d\n", status); irdma_cleanup_pending_cqp_op(rf); irdma_free_dma_mem(dev->hw, &cqp->sq); kfree(cqp->scratch_array); cqp->scratch_array = NULL; kfree(cqp->cqp_requests); cqp->cqp_requests = NULL; } static void irdma_destroy_virt_aeq(struct irdma_pci_f *rf) { struct irdma_aeq *aeq = &rf->aeq; u32 pg_cnt = DIV_ROUND_UP(aeq->mem.size, PAGE_SIZE); dma_addr_t *pg_arr = (dma_addr_t *) aeq->palloc.level1.addr; irdma_unmap_vm_page_list(&rf->hw, pg_arr, pg_cnt); irdma_free_pble(rf->pble_rsrc, &aeq->palloc); vfree(aeq->mem.va); } /** * irdma_destroy_aeq - destroy aeq * @rf: RDMA PCI function * * Issue a destroy aeq request and * free the resources associated with the aeq * The function is called during driver unload */ static void irdma_destroy_aeq(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_aeq *aeq = &rf->aeq; int status = -EBUSY; if (!rf->msix_shared) { rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, rf->iw_msixtbl->idx, false); irdma_destroy_irq(rf, rf->iw_msixtbl, rf); } if (rf->reset) goto exit; aeq->sc_aeq.size = 0; status = irdma_cqp_aeq_cmd(dev, &aeq->sc_aeq, IRDMA_OP_AEQ_DESTROY); if (status) irdma_debug(dev, IRDMA_DEBUG_ERR, "Destroy AEQ failed %d\n", status); exit: if (aeq->virtual_map) irdma_destroy_virt_aeq(rf); else irdma_free_dma_mem(dev->hw, &aeq->mem); } /** * irdma_destroy_ceq - destroy ceq * @rf: RDMA PCI function * @iwceq: ceq to be destroyed * * Issue a destroy ceq request and * free the resources associated with the ceq */ static void irdma_destroy_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq) { struct irdma_sc_dev *dev = &rf->sc_dev; int status; if (rf->reset) goto exit; status = irdma_sc_ceq_destroy(&iwceq->sc_ceq, 0, 1); if (status) { irdma_debug(dev, IRDMA_DEBUG_ERR, "CEQ destroy command failed %d\n", status); goto exit; } status = irdma_sc_cceq_destroy_done(&iwceq->sc_ceq); if (status) irdma_debug(dev, IRDMA_DEBUG_ERR, - "CEQ destroy completion failed %d\n", - status); + "CEQ destroy completion failed %d\n", status); exit: spin_lock_destroy(&iwceq->ce_lock); spin_lock_destroy(&iwceq->sc_ceq.req_cq_lock); kfree(iwceq->sc_ceq.reg_cq); irdma_free_dma_mem(dev->hw, &iwceq->mem); } /** * irdma_del_ceq_0 - destroy ceq 0 * @rf: RDMA PCI function * * Disable the ceq 0 interrupt and destroy the ceq 0 */ static void irdma_del_ceq_0(struct irdma_pci_f *rf) { struct irdma_ceq *iwceq = rf->ceqlist; struct irdma_msix_vector *msix_vec; if (rf->msix_shared) { msix_vec = &rf->iw_msixtbl[0]; rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev, msix_vec->ceq_id, msix_vec->idx, false); irdma_destroy_irq(rf, msix_vec, rf); } else { msix_vec = &rf->iw_msixtbl[1]; irdma_destroy_irq(rf, msix_vec, iwceq); } irdma_destroy_ceq(rf, iwceq); rf->sc_dev.ceq_valid = false; rf->ceqs_count = 0; } /** * irdma_del_ceqs - destroy all ceq's except CEQ 0 * @rf: RDMA PCI function * * Go through all of the device ceq's, except 0, and for each * ceq disable the ceq interrupt and destroy the ceq */ static void irdma_del_ceqs(struct irdma_pci_f *rf) { struct irdma_ceq *iwceq = &rf->ceqlist[1]; struct irdma_msix_vector *msix_vec; u32 i = 0; if (rf->msix_shared) msix_vec = &rf->iw_msixtbl[1]; else msix_vec = &rf->iw_msixtbl[2]; for (i = 1; i < rf->ceqs_count; i++, msix_vec++, iwceq++) { rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev, msix_vec->ceq_id, msix_vec->idx, false); irdma_destroy_irq(rf, msix_vec, iwceq); irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq, IRDMA_OP_CEQ_DESTROY); spin_lock_destroy(&iwceq->ce_lock); spin_lock_destroy(&iwceq->sc_ceq.req_cq_lock); kfree(iwceq->sc_ceq.reg_cq); irdma_free_dma_mem(rf->sc_dev.hw, &iwceq->mem); } rf->ceqs_count = 1; } /** * irdma_destroy_ccq - destroy control cq * @rf: RDMA PCI function * * Issue destroy ccq request and * free the resources associated with the ccq */ static void irdma_destroy_ccq(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_ccq *ccq = &rf->ccq; int status = 0; if (!rf->reset) status = irdma_sc_ccq_destroy(dev->ccq, 0, true); if (status) irdma_debug(dev, IRDMA_DEBUG_ERR, "CCQ destroy failed %d\n", status); irdma_free_dma_mem(dev->hw, &ccq->mem_cq); } /** * irdma_close_hmc_objects_type - delete hmc objects of a given type * @dev: iwarp device * @obj_type: the hmc object type to be deleted * @hmc_info: host memory info struct * @privileged: permission to close HMC objects * @reset: true if called before reset */ static void irdma_close_hmc_objects_type(struct irdma_sc_dev *dev, enum irdma_hmc_rsrc_type obj_type, struct irdma_hmc_info *hmc_info, bool privileged, bool reset) { struct irdma_hmc_del_obj_info info = {0}; info.hmc_info = hmc_info; info.rsrc_type = obj_type; info.count = hmc_info->hmc_obj[obj_type].cnt; info.privileged = privileged; if (irdma_sc_del_hmc_obj(dev, &info, reset)) irdma_debug(dev, IRDMA_DEBUG_ERR, - "del HMC obj of type %d failed\n", - obj_type); + "del HMC obj of type %d failed\n", obj_type); } /** * irdma_del_hmc_objects - remove all device hmc objects * @dev: iwarp device * @hmc_info: hmc_info to free * @privileged: permission to delete HMC objects * @reset: true if called before reset * @vers: hardware version */ void irdma_del_hmc_objects(struct irdma_sc_dev *dev, struct irdma_hmc_info *hmc_info, bool privileged, bool reset, enum irdma_vers vers) { unsigned int i; for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) { if (dev->hmc_info->hmc_obj[iw_hmc_obj_types[i]].cnt) irdma_close_hmc_objects_type(dev, iw_hmc_obj_types[i], hmc_info, privileged, reset); if (vers == IRDMA_GEN_1 && i == IRDMA_HMC_IW_TIMER) break; } } /** * irdma_create_hmc_obj_type - create hmc object of a given type * @dev: hardware control device structure * @info: information for the hmc object to create */ static int irdma_create_hmc_obj_type(struct irdma_sc_dev *dev, struct irdma_hmc_create_obj_info *info) { return irdma_sc_create_hmc_obj(dev, info); } /** * irdma_create_hmc_objs - create all hmc objects for the device * @rf: RDMA PCI function * @privileged: permission to create HMC objects * @vers: HW version * * Create the device hmc objects and allocate hmc pages * Return 0 if successful, otherwise clean up and return error */ static int irdma_create_hmc_objs(struct irdma_pci_f *rf, bool privileged, enum irdma_vers vers) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_hmc_create_obj_info info = {0}; int i, status = 0; info.hmc_info = dev->hmc_info; info.privileged = privileged; info.entry_type = rf->sd_type; for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) { if (iw_hmc_obj_types[i] == IRDMA_HMC_IW_PBLE) continue; if (dev->hmc_info->hmc_obj[iw_hmc_obj_types[i]].cnt) { info.rsrc_type = iw_hmc_obj_types[i]; info.count = dev->hmc_info->hmc_obj[info.rsrc_type].cnt; info.add_sd_cnt = 0; status = irdma_create_hmc_obj_type(dev, &info); if (status) { irdma_debug(dev, IRDMA_DEBUG_ERR, "create obj type %d status = %d\n", iw_hmc_obj_types[i], status); break; } } if (vers == IRDMA_GEN_1 && i == IRDMA_HMC_IW_TIMER) break; } if (!status) return irdma_sc_static_hmc_pages_allocated(dev->cqp, 0, dev->hmc_fn_id, true, true); while (i) { i--; /* destroy the hmc objects of a given type */ if (dev->hmc_info->hmc_obj[iw_hmc_obj_types[i]].cnt) irdma_close_hmc_objects_type(dev, iw_hmc_obj_types[i], dev->hmc_info, privileged, false); } return status; } /** * irdma_obj_aligned_mem - get aligned memory from device allocated memory * @rf: RDMA PCI function * @memptr: points to the memory addresses * @size: size of memory needed * @mask: mask for the aligned memory * * Get aligned memory of the requested size and * update the memptr to point to the new aligned memory * Return 0 if successful, otherwise return no memory error */ static int irdma_obj_aligned_mem(struct irdma_pci_f *rf, struct irdma_dma_mem *memptr, u32 size, u32 mask) { unsigned long va, newva; unsigned long extra; va = (unsigned long)rf->obj_next.va; newva = va; if (mask) newva = ALIGN(va, (unsigned long)mask + 1ULL); extra = newva - va; memptr->va = (u8 *)va + extra; memptr->pa = rf->obj_next.pa + extra; memptr->size = size; if (((u8 *)memptr->va + size) > ((u8 *)rf->obj_mem.va + rf->obj_mem.size)) return -ENOMEM; rf->obj_next.va = (u8 *)memptr->va + size; rf->obj_next.pa = memptr->pa + size; return 0; } /** * irdma_create_cqp - create control qp * @rf: RDMA PCI function * * Return 0, if the cqp and all the resources associated with it * are successfully created, otherwise return error */ static int irdma_create_cqp(struct irdma_pci_f *rf) { u32 sqsize = IRDMA_CQP_SW_SQSIZE_2048; struct irdma_dma_mem mem; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_cqp_init_info cqp_init_info = {0}; struct irdma_cqp *cqp = &rf->cqp; u16 maj_err, min_err; int i, status; cqp->cqp_requests = kcalloc(sqsize, sizeof(*cqp->cqp_requests), GFP_KERNEL); memset(cqp->cqp_requests, 0, sqsize * sizeof(*cqp->cqp_requests)); if (!cqp->cqp_requests) return -ENOMEM; cqp->scratch_array = kcalloc(sqsize, sizeof(*cqp->scratch_array), GFP_KERNEL); memset(cqp->scratch_array, 0, sqsize * sizeof(*cqp->scratch_array)); if (!cqp->scratch_array) { status = -ENOMEM; goto err_scratch; } dev->cqp = &cqp->sc_cqp; dev->cqp->dev = dev; cqp->sq.size = sizeof(struct irdma_cqp_sq_wqe) * sqsize; cqp->sq.va = irdma_allocate_dma_mem(dev->hw, &cqp->sq, cqp->sq.size, IRDMA_CQP_ALIGNMENT); if (!cqp->sq.va) { status = -ENOMEM; goto err_sq; } status = irdma_obj_aligned_mem(rf, &mem, sizeof(struct irdma_cqp_ctx), IRDMA_HOST_CTX_ALIGNMENT_M); if (status) goto err_ctx; dev->cqp->host_ctx_pa = mem.pa; dev->cqp->host_ctx = mem.va; /* populate the cqp init info */ cqp_init_info.dev = dev; cqp_init_info.sq_size = sqsize; cqp_init_info.sq = cqp->sq.va; cqp_init_info.sq_pa = cqp->sq.pa; cqp_init_info.host_ctx_pa = mem.pa; cqp_init_info.host_ctx = mem.va; cqp_init_info.hmc_profile = rf->rsrc_profile; cqp_init_info.scratch_array = cqp->scratch_array; cqp_init_info.protocol_used = rf->protocol_used; cqp_init_info.en_rem_endpoint_trk = rf->en_rem_endpoint_trk; memcpy(&cqp_init_info.dcqcn_params, &rf->dcqcn_params, sizeof(cqp_init_info.dcqcn_params)); switch (rf->rdma_ver) { case IRDMA_GEN_1: cqp_init_info.hw_maj_ver = IRDMA_CQPHC_HW_MAJVER_GEN_1; break; case IRDMA_GEN_2: cqp_init_info.hw_maj_ver = IRDMA_CQPHC_HW_MAJVER_GEN_2; break; } status = irdma_sc_cqp_init(dev->cqp, &cqp_init_info); if (status) { irdma_debug(dev, IRDMA_DEBUG_ERR, "cqp init status %d\n", status); goto err_ctx; } spin_lock_init(&cqp->req_lock); spin_lock_init(&cqp->compl_lock); status = irdma_sc_cqp_create(dev->cqp, &maj_err, &min_err); if (status) { irdma_debug(dev, IRDMA_DEBUG_ERR, "cqp create failed - status %d maj_err %d min_err %d\n", status, maj_err, min_err); - goto err_create; + goto err_ctx; } INIT_LIST_HEAD(&cqp->cqp_avail_reqs); INIT_LIST_HEAD(&cqp->cqp_pending_reqs); /* init the waitqueue of the cqp_requests and add them to the list */ for (i = 0; i < sqsize; i++) { init_waitqueue_head(&cqp->cqp_requests[i].waitq); list_add_tail(&cqp->cqp_requests[i].list, &cqp->cqp_avail_reqs); } init_waitqueue_head(&cqp->remove_wq); return 0; -err_create: err_ctx: irdma_free_dma_mem(dev->hw, &cqp->sq); err_sq: kfree(cqp->scratch_array); cqp->scratch_array = NULL; err_scratch: kfree(cqp->cqp_requests); cqp->cqp_requests = NULL; return status; } /** * irdma_create_ccq - create control cq * @rf: RDMA PCI function * * Return 0, if the ccq and the resources associated with it * are successfully created, otherwise return error */ static int irdma_create_ccq(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_ccq_init_info info = {0}; struct irdma_ccq *ccq = &rf->ccq; int status; dev->ccq = &ccq->sc_cq; dev->ccq->dev = dev; info.dev = dev; ccq->shadow_area.size = sizeof(struct irdma_cq_shadow_area); ccq->mem_cq.size = sizeof(struct irdma_cqe) * IW_CCQ_SIZE; ccq->mem_cq.va = irdma_allocate_dma_mem(dev->hw, &ccq->mem_cq, ccq->mem_cq.size, IRDMA_CQ0_ALIGNMENT); if (!ccq->mem_cq.va) return -ENOMEM; status = irdma_obj_aligned_mem(rf, &ccq->shadow_area, ccq->shadow_area.size, IRDMA_SHADOWAREA_M); if (status) goto exit; ccq->sc_cq.back_cq = ccq; /* populate the ccq init info */ info.cq_base = ccq->mem_cq.va; info.cq_pa = ccq->mem_cq.pa; info.num_elem = IW_CCQ_SIZE; info.shadow_area = ccq->shadow_area.va; info.shadow_area_pa = ccq->shadow_area.pa; info.ceqe_mask = false; info.ceq_id_valid = true; info.shadow_read_threshold = 16; info.vsi = &rf->default_vsi; status = irdma_sc_ccq_init(dev->ccq, &info); if (!status) status = irdma_sc_ccq_create(dev->ccq, 0, true, true); exit: if (status) irdma_free_dma_mem(dev->hw, &ccq->mem_cq); return status; } /** * irdma_alloc_set_mac - set up a mac address table entry * @iwdev: irdma device * * Allocate a mac ip entry and add it to the hw table Return 0 * if successful, otherwise return error */ static int irdma_alloc_set_mac(struct irdma_device *iwdev) { int status; status = irdma_alloc_local_mac_entry(iwdev->rf, &iwdev->mac_ip_table_idx); if (!status) { status = irdma_add_local_mac_entry(iwdev->rf, (const u8 *)if_getlladdr(iwdev->netdev), (u8)iwdev->mac_ip_table_idx); if (status) irdma_del_local_mac_entry(iwdev->rf, (u8)iwdev->mac_ip_table_idx); } return status; } /** * irdma_irq_request - set up the msix interrupt vector * @rf: RDMA PCI function * @msix_vec: interrupt vector information * @handler: function pointer to associate with interrupt * @argument: argument passed to the handler * * Allocate interrupt resources and setup interrupt * Return 0 if successful, otherwise return error * Note that after this function bus_describe_intr shall * be called. */ static int irdma_irq_request(struct irdma_pci_f *rf, struct irdma_msix_vector *msix_vec, driver_intr_t handler, void *argument) { device_t dev = rf->dev_ctx.dev; int rid = msix_vec->idx + 1; int err, status; msix_vec->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!msix_vec->res) { irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR, - "Unable to allocate bus resource int[%d]\n", - rid); + "Unable to allocate bus resource int[%d]\n", rid); return -EINVAL; } err = bus_setup_intr(dev, msix_vec->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, handler, argument, &msix_vec->tag); if (err) { irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR, - "Unable to register handler with %x status\n", - err); + "Unable to register handler with %x status\n", err); status = -EINVAL; goto fail_intr; } return 0; fail_intr: bus_release_resource(dev, SYS_RES_IRQ, rid, msix_vec->res); msix_vec->res = NULL; return status; } /** * irdma_cfg_ceq_vector - set up the msix interrupt vector for * ceq * @rf: RDMA PCI function * @iwceq: ceq associated with the vector * @ceq_id: the id number of the iwceq * @msix_vec: interrupt vector information * * Allocate interrupt resources and enable irq handling * Return 0 if successful, otherwise return error */ static int irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, u32 ceq_id, struct irdma_msix_vector *msix_vec) { int status; if (rf->msix_shared && !ceq_id) { snprintf(msix_vec->name, sizeof(msix_vec->name) - 1, "irdma-%s-AEQCEQ-0", dev_name(&rf->pcidev->dev)); tasklet_setup(&rf->dpc_tasklet, irdma_dpc); status = irdma_irq_request(rf, msix_vec, irdma_irq_handler, rf); if (status) return status; bus_describe_intr(rf->dev_ctx.dev, msix_vec->res, msix_vec->tag, "%s", msix_vec->name); } else { snprintf(msix_vec->name, sizeof(msix_vec->name) - 1, "irdma-%s-CEQ-%d", dev_name(&rf->pcidev->dev), ceq_id); tasklet_setup(&iwceq->dpc_tasklet, irdma_ceq_dpc); status = irdma_irq_request(rf, msix_vec, irdma_ceq_handler, iwceq); if (status) return status; bus_describe_intr(rf->dev_ctx.dev, msix_vec->res, msix_vec->tag, "%s", msix_vec->name); } msix_vec->ceq_id = ceq_id; rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev, ceq_id, msix_vec->idx, true); return 0; } /** * irdma_cfg_aeq_vector - set up the msix vector for aeq * @rf: RDMA PCI function * * Allocate interrupt resources and enable irq handling * Return 0 if successful, otherwise return error */ static int irdma_cfg_aeq_vector(struct irdma_pci_f *rf) { struct irdma_msix_vector *msix_vec = rf->iw_msixtbl; - u32 ret = 0; + int status = 0; if (!rf->msix_shared) { snprintf(msix_vec->name, sizeof(msix_vec->name) - 1, "irdma-%s-AEQ", dev_name(&rf->pcidev->dev)); tasklet_setup(&rf->dpc_tasklet, irdma_dpc); - ret = irdma_irq_request(rf, msix_vec, irdma_irq_handler, rf); - if (ret) - return ret; + status = irdma_irq_request(rf, msix_vec, irdma_irq_handler, rf); + if (status) + return status; bus_describe_intr(rf->dev_ctx.dev, msix_vec->res, msix_vec->tag, "%s", msix_vec->name); } - if (ret) { + + if (status) { irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR, "aeq irq config fail\n"); - return -EINVAL; + return status; } rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, msix_vec->idx, true); return 0; } /** * irdma_create_ceq - create completion event queue * @rf: RDMA PCI function * @iwceq: pointer to the ceq resources to be created * @ceq_id: the id number of the iwceq * @vsi: SC vsi struct * * Return 0, if the ceq and the resources associated with it * are successfully created, otherwise return error */ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, u32 ceq_id, struct irdma_sc_vsi *vsi) { int status; struct irdma_ceq_init_info info = {0}; struct irdma_sc_dev *dev = &rf->sc_dev; u64 scratch; u32 ceq_size; info.ceq_id = ceq_id; iwceq->rf = rf; ceq_size = min(rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt, dev->hw_attrs.max_hw_ceq_size); iwceq->mem.size = sizeof(struct irdma_ceqe) * ceq_size; iwceq->mem.va = irdma_allocate_dma_mem(dev->hw, &iwceq->mem, iwceq->mem.size, IRDMA_CEQ_ALIGNMENT); if (!iwceq->mem.va) return -ENOMEM; info.ceq_id = ceq_id; info.ceqe_base = iwceq->mem.va; info.ceqe_pa = iwceq->mem.pa; info.elem_cnt = ceq_size; info.reg_cq = kzalloc(sizeof(struct irdma_sc_cq *) * info.elem_cnt, GFP_KERNEL); iwceq->sc_ceq.ceq_id = ceq_id; info.dev = dev; info.vsi = vsi; scratch = (uintptr_t)&rf->cqp.sc_cqp; status = irdma_sc_ceq_init(&iwceq->sc_ceq, &info); if (!status) { if (dev->ceq_valid) status = irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq, IRDMA_OP_CEQ_CREATE); else status = irdma_sc_cceq_create(&iwceq->sc_ceq, scratch); } if (status) { kfree(info.reg_cq); irdma_free_dma_mem(dev->hw, &iwceq->mem); } return status; } /** * irdma_setup_ceq_0 - create CEQ 0 and it's interrupt resource * @rf: RDMA PCI function * * Allocate a list for all device completion event queues * Create the ceq 0 and configure it's msix interrupt vector * Return 0, if successfully set up, otherwise return error */ static int irdma_setup_ceq_0(struct irdma_pci_f *rf) { struct irdma_ceq *iwceq; struct irdma_msix_vector *msix_vec; u32 i; int status = 0; u32 num_ceqs; num_ceqs = min(rf->msix_count, rf->sc_dev.hmc_fpm_misc.max_ceqs); rf->ceqlist = kcalloc(num_ceqs, sizeof(*rf->ceqlist), GFP_KERNEL); memset(rf->ceqlist, 0, num_ceqs * sizeof(*rf->ceqlist)); if (!rf->ceqlist) { status = -ENOMEM; goto exit; } iwceq = &rf->ceqlist[0]; status = irdma_create_ceq(rf, iwceq, 0, &rf->default_vsi); if (status) { irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR, - "create ceq status = %d\n", - status); + "create ceq status = %d\n", status); goto exit; } spin_lock_init(&iwceq->ce_lock); i = rf->msix_shared ? 0 : 1; msix_vec = &rf->iw_msixtbl[i]; iwceq->irq = msix_vec->irq; iwceq->msix_idx = msix_vec->idx; status = irdma_cfg_ceq_vector(rf, iwceq, 0, msix_vec); if (status) { irdma_destroy_ceq(rf, iwceq); goto exit; } irdma_ena_intr(&rf->sc_dev, msix_vec->idx); rf->ceqs_count++; exit: if (status && !rf->ceqs_count) { kfree(rf->ceqlist); rf->ceqlist = NULL; return status; } rf->sc_dev.ceq_valid = true; return 0; } /** * irdma_setup_ceqs - manage the device ceq's and their interrupt resources * @rf: RDMA PCI function * @vsi: VSI structure for this CEQ * * Allocate a list for all device completion event queues * Create the ceq's and configure their msix interrupt vectors * Return 0, if ceqs are successfully set up, otherwise return error */ static int irdma_setup_ceqs(struct irdma_pci_f *rf, struct irdma_sc_vsi *vsi) { u32 i; u32 ceq_id; struct irdma_ceq *iwceq; struct irdma_msix_vector *msix_vec; int status; u32 num_ceqs; num_ceqs = min(rf->msix_count, rf->sc_dev.hmc_fpm_misc.max_ceqs); i = (rf->msix_shared) ? 1 : 2; for (ceq_id = 1; i < num_ceqs; i++, ceq_id++) { iwceq = &rf->ceqlist[ceq_id]; status = irdma_create_ceq(rf, iwceq, ceq_id, vsi); if (status) { irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR, - "create ceq status = %d\n", - status); + "create ceq status = %d\n", status); goto del_ceqs; } spin_lock_init(&iwceq->ce_lock); msix_vec = &rf->iw_msixtbl[i]; iwceq->irq = msix_vec->irq; iwceq->msix_idx = msix_vec->idx; status = irdma_cfg_ceq_vector(rf, iwceq, ceq_id, msix_vec); if (status) { irdma_destroy_ceq(rf, iwceq); goto del_ceqs; } irdma_ena_intr(&rf->sc_dev, msix_vec->idx); rf->ceqs_count++; } return 0; del_ceqs: irdma_del_ceqs(rf); return status; } static int irdma_create_virt_aeq(struct irdma_pci_f *rf, u32 size) { struct irdma_aeq *aeq = &rf->aeq; dma_addr_t *pg_arr; u32 pg_cnt; int status; if (rf->rdma_ver < IRDMA_GEN_2) return -EOPNOTSUPP; aeq->mem.size = sizeof(struct irdma_sc_aeqe) * size; aeq->mem.va = vzalloc(aeq->mem.size); if (!aeq->mem.va) return -ENOMEM; pg_cnt = DIV_ROUND_UP(aeq->mem.size, PAGE_SIZE); status = irdma_get_pble(rf->pble_rsrc, &aeq->palloc, pg_cnt, true); if (status) { vfree(aeq->mem.va); return status; } pg_arr = (dma_addr_t *) aeq->palloc.level1.addr; status = irdma_map_vm_page_list(&rf->hw, aeq->mem.va, pg_arr, pg_cnt); if (status) { irdma_free_pble(rf->pble_rsrc, &aeq->palloc); vfree(aeq->mem.va); return status; } return 0; } /** * irdma_create_aeq - create async event queue * @rf: RDMA PCI function * * Return 0, if the aeq and the resources associated with it * are successfully created, otherwise return error */ static int irdma_create_aeq(struct irdma_pci_f *rf) { struct irdma_aeq_init_info info = {0}; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_aeq *aeq = &rf->aeq; struct irdma_hmc_info *hmc_info = rf->sc_dev.hmc_info; u32 aeq_size; u8 multiplier = (rf->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) ? 2 : 1; int status; aeq_size = multiplier * hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt + hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt; aeq_size = min(aeq_size, dev->hw_attrs.max_hw_aeq_size); aeq->mem.size = sizeof(struct irdma_sc_aeqe) * aeq_size; aeq->mem.va = irdma_allocate_dma_mem(dev->hw, &aeq->mem, aeq->mem.size, IRDMA_AEQ_ALIGNMENT); if (aeq->mem.va) goto skip_virt_aeq; /* physically mapped aeq failed. setup virtual aeq */ status = irdma_create_virt_aeq(rf, aeq_size); if (status) return status; info.virtual_map = true; aeq->virtual_map = info.virtual_map; info.pbl_chunk_size = 1; info.first_pm_pbl_idx = aeq->palloc.level1.idx; skip_virt_aeq: info.aeqe_base = aeq->mem.va; info.aeq_elem_pa = aeq->mem.pa; info.elem_cnt = aeq_size; info.dev = dev; info.msix_idx = rf->iw_msixtbl->idx; status = irdma_sc_aeq_init(&aeq->sc_aeq, &info); if (status) goto err; status = irdma_cqp_aeq_cmd(dev, &aeq->sc_aeq, IRDMA_OP_AEQ_CREATE); if (status) goto err; return 0; err: if (aeq->virtual_map) irdma_destroy_virt_aeq(rf); else irdma_free_dma_mem(dev->hw, &aeq->mem); return status; } /** * irdma_setup_aeq - set up the device aeq * @rf: RDMA PCI function * * Create the aeq and configure its msix interrupt vector * Return 0 if successful, otherwise return error */ static int irdma_setup_aeq(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; int status; status = irdma_create_aeq(rf); if (status) return status; status = irdma_cfg_aeq_vector(rf); if (status) { irdma_destroy_aeq(rf); return status; } if (!rf->msix_shared) irdma_ena_intr(dev, rf->iw_msixtbl[0].idx); return 0; } /** * irdma_initialize_ilq - create iwarp local queue for cm * @iwdev: irdma device * * Return 0 if successful, otherwise return error */ static int irdma_initialize_ilq(struct irdma_device *iwdev) { struct irdma_puda_rsrc_info info = {0}; int status; info.type = IRDMA_PUDA_RSRC_TYPE_ILQ; info.cq_id = 1; info.qp_id = 1; info.count = 1; info.pd_id = 1; info.abi_ver = IRDMA_ABI_VER; info.sq_size = min(iwdev->rf->max_qp / 2, (u32)32768); info.rq_size = info.sq_size; info.buf_size = 1024; info.tx_buf_cnt = 2 * info.sq_size; info.receive = irdma_receive_ilq; info.xmit_complete = irdma_free_sqbuf; status = irdma_puda_create_rsrc(&iwdev->vsi, &info); if (status) irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_ERR, "ilq create fail\n"); return status; } /** * irdma_initialize_ieq - create iwarp exception queue * @iwdev: irdma device * * Return 0 if successful, otherwise return error */ static int irdma_initialize_ieq(struct irdma_device *iwdev) { struct irdma_puda_rsrc_info info = {0}; int status; info.type = IRDMA_PUDA_RSRC_TYPE_IEQ; info.cq_id = 2; info.qp_id = iwdev->vsi.exception_lan_q; info.count = 1; info.pd_id = 2; info.abi_ver = IRDMA_ABI_VER; info.sq_size = min(iwdev->rf->max_qp / 2, (u32)32768); info.rq_size = info.sq_size; info.buf_size = iwdev->vsi.mtu + IRDMA_IPV4_PAD; info.tx_buf_cnt = 4096; status = irdma_puda_create_rsrc(&iwdev->vsi, &info); if (status) irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_ERR, "ieq create fail\n"); return status; } /** * irdma_reinitialize_ieq - destroy and re-create ieq * @vsi: VSI structure */ void irdma_reinitialize_ieq(struct irdma_sc_vsi *vsi) { struct irdma_device *iwdev = vsi->back_vsi; struct irdma_pci_f *rf = iwdev->rf; irdma_puda_dele_rsrc(vsi, IRDMA_PUDA_RSRC_TYPE_IEQ, false); if (irdma_initialize_ieq(iwdev)) { iwdev->rf->reset = true; rf->gen_ops.request_reset(rf); } } /** * irdma_hmc_setup - create hmc objects for the device * @rf: RDMA PCI function * * Set up the device private memory space for the number and size of * the hmc objects and create the objects * Return 0 if successful, otherwise return error */ static int irdma_hmc_setup(struct irdma_pci_f *rf) { - int status; struct irdma_sc_dev *dev = &rf->sc_dev; + int status; u32 qpcnt; qpcnt = rsrc_limits_table[rf->limits_sel].qplimit; rf->sd_type = IRDMA_SD_TYPE_DIRECT; status = irdma_cfg_fpm_val(dev, qpcnt); if (status) return status; status = irdma_create_hmc_objs(rf, true, rf->rdma_ver); return status; } /** * irdma_del_init_mem - deallocate memory resources * @rf: RDMA PCI function */ static void irdma_del_init_mem(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; kfree(dev->hmc_info->sd_table.sd_entry); dev->hmc_info->sd_table.sd_entry = NULL; vfree(rf->mem_rsrc); rf->mem_rsrc = NULL; irdma_free_dma_mem(&rf->hw, &rf->obj_mem); if (rf->rdma_ver != IRDMA_GEN_1) { kfree(rf->allocated_ws_nodes); rf->allocated_ws_nodes = NULL; } mutex_destroy(&dev->ws_mutex); kfree(rf->ceqlist); rf->ceqlist = NULL; kfree(rf->iw_msixtbl); rf->iw_msixtbl = NULL; kfree(rf->hmc_info_mem); rf->hmc_info_mem = NULL; } /** * irdma_initialize_dev - initialize device * @rf: RDMA PCI function * * Allocate memory for the hmc objects and initialize iwdev * Return 0 if successful, otherwise clean up the resources * and return error */ static int irdma_initialize_dev(struct irdma_pci_f *rf) { int status; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_device_init_info info = {0}; struct irdma_dma_mem mem; u32 size; size = sizeof(struct irdma_hmc_pble_rsrc) + sizeof(struct irdma_hmc_info) + (sizeof(struct irdma_hmc_obj_info) * IRDMA_HMC_IW_MAX); rf->hmc_info_mem = kzalloc(size, GFP_KERNEL); if (!rf->hmc_info_mem) return -ENOMEM; rf->pble_rsrc = (struct irdma_hmc_pble_rsrc *)rf->hmc_info_mem; dev->hmc_info = &rf->hw.hmc; dev->hmc_info->hmc_obj = (struct irdma_hmc_obj_info *) (rf->pble_rsrc + 1); status = irdma_obj_aligned_mem(rf, &mem, IRDMA_QUERY_FPM_BUF_SIZE, IRDMA_FPM_QUERY_BUF_ALIGNMENT_M); if (status) goto error; info.fpm_query_buf_pa = mem.pa; info.fpm_query_buf = mem.va; status = irdma_obj_aligned_mem(rf, &mem, IRDMA_COMMIT_FPM_BUF_SIZE, IRDMA_FPM_COMMIT_BUF_ALIGNMENT_M); if (status) goto error; info.fpm_commit_buf_pa = mem.pa; info.fpm_commit_buf = mem.va; info.bar0 = rf->hw.hw_addr; info.hmc_fn_id = rf->peer_info->pf_id; /* * the debug_mask is already assigned at this point through sysctl and so the value shouldn't be overwritten */ info.debug_mask = rf->sc_dev.debug_mask; info.hw = &rf->hw; status = irdma_sc_dev_init(&rf->sc_dev, &info); if (status) goto error; return status; error: kfree(rf->hmc_info_mem); rf->hmc_info_mem = NULL; return status; } /** * irdma_rt_deinit_hw - clean up the irdma device resources * @iwdev: irdma device * * remove the mac ip entry and ipv4/ipv6 addresses, destroy the * device queues and free the pble and the hmc objects */ void irdma_rt_deinit_hw(struct irdma_device *iwdev) { struct irdma_sc_qp qp = {{0}}; irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_INIT, "state = %d\n", iwdev->init_state); switch (iwdev->init_state) { case IP_ADDR_REGISTERED: if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) irdma_del_local_mac_entry(iwdev->rf, (u8)iwdev->mac_ip_table_idx); /* fallthrough */ case AEQ_CREATED: case PBLE_CHUNK_MEM: case CEQS_CREATED: case REM_ENDPOINT_TRK_CREATED: if (iwdev->rf->en_rem_endpoint_trk) { qp.dev = &iwdev->rf->sc_dev; qp.qp_uk.qp_id = IRDMA_REM_ENDPOINT_TRK_QPID; qp.qp_uk.qp_type = IRDMA_QP_TYPE_IWARP; irdma_cqp_qp_destroy_cmd(qp.dev, &qp); } /* fallthrough */ case IEQ_CREATED: if (!iwdev->roce_mode) irdma_puda_dele_rsrc(&iwdev->vsi, IRDMA_PUDA_RSRC_TYPE_IEQ, iwdev->rf->reset); /* fallthrough */ case ILQ_CREATED: if (!iwdev->roce_mode) irdma_puda_dele_rsrc(&iwdev->vsi, IRDMA_PUDA_RSRC_TYPE_ILQ, iwdev->rf->reset); break; default: irdma_dev_warn(&iwdev->ibdev, "bad init_state = %d\n", iwdev->init_state); break; } irdma_cleanup_cm_core(&iwdev->cm_core); if (iwdev->vsi.pestat) { irdma_vsi_stats_free(&iwdev->vsi); kfree(iwdev->vsi.pestat); } if (iwdev->cleanup_wq) destroy_workqueue(iwdev->cleanup_wq); } static int irdma_setup_init_state(struct irdma_pci_f *rf) { int status; status = irdma_save_msix_info(rf); if (status) return status; rf->obj_mem.size = 8192; rf->obj_mem.va = irdma_allocate_dma_mem(&rf->hw, &rf->obj_mem, rf->obj_mem.size, IRDMA_HW_PAGE_SIZE); if (!rf->obj_mem.va) { status = -ENOMEM; goto clean_msixtbl; } rf->obj_next = rf->obj_mem; status = irdma_initialize_dev(rf); if (status) goto clean_obj_mem; return 0; clean_obj_mem: irdma_free_dma_mem(&rf->hw, &rf->obj_mem); clean_msixtbl: kfree(rf->iw_msixtbl); rf->iw_msixtbl = NULL; return status; } /** * irdma_get_used_rsrc - determine resources used internally * @iwdev: irdma device * * Called at the end of open to get all internal allocations */ static void irdma_get_used_rsrc(struct irdma_device *iwdev) { iwdev->rf->used_pds = find_first_zero_bit(iwdev->rf->allocated_pds, iwdev->rf->max_pd); iwdev->rf->used_qps = find_first_zero_bit(iwdev->rf->allocated_qps, iwdev->rf->max_qp); iwdev->rf->used_cqs = find_first_zero_bit(iwdev->rf->allocated_cqs, iwdev->rf->max_cq); iwdev->rf->used_mrs = find_first_zero_bit(iwdev->rf->allocated_mrs, iwdev->rf->max_mr); } void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf) { enum init_completion_state state = rf->init_state; rf->init_state = INVALID_STATE; if (rf->rsrc_created) { irdma_destroy_aeq(rf); irdma_destroy_pble_prm(rf->pble_rsrc); irdma_del_ceqs(rf); rf->rsrc_created = false; } switch (state) { case CEQ0_CREATED: irdma_del_ceq_0(rf); /* fallthrough */ case CCQ_CREATED: irdma_destroy_ccq(rf); /* fallthrough */ case HW_RSRC_INITIALIZED: case HMC_OBJS_CREATED: irdma_del_hmc_objects(&rf->sc_dev, rf->sc_dev.hmc_info, true, rf->reset, rf->rdma_ver); /* fallthrough */ case CQP_CREATED: irdma_destroy_cqp(rf, !rf->reset); /* fallthrough */ case INITIAL_STATE: irdma_del_init_mem(rf); break; case INVALID_STATE: default: irdma_dev_warn(&rf->iwdev->ibdev, "bad init_state = %d\n", rf->init_state); break; } } /** * irdma_rt_init_hw - Initializes runtime portion of HW * @iwdev: irdma device * @l2params: qos, tc, mtu info from netdev driver * * Create device queues ILQ, IEQ, CEQs and PBLEs. Setup irdma * device resource objects. */ int irdma_rt_init_hw(struct irdma_device *iwdev, struct irdma_l2params *l2params) { struct irdma_pci_f *rf = iwdev->rf; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_sc_qp qp = {{0}}; struct irdma_vsi_init_info vsi_info = {0}; struct irdma_vsi_stats_info stats_info = {0}; int status; vsi_info.dev = dev; vsi_info.back_vsi = iwdev; vsi_info.params = l2params; vsi_info.pf_data_vsi_num = iwdev->vsi_num; vsi_info.register_qset = rf->gen_ops.register_qset; vsi_info.unregister_qset = rf->gen_ops.unregister_qset; vsi_info.exception_lan_q = 2; irdma_sc_vsi_init(&iwdev->vsi, &vsi_info); status = irdma_setup_cm_core(iwdev, rf->rdma_ver); if (status) return status; stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL); if (!stats_info.pestat) { irdma_cleanup_cm_core(&iwdev->cm_core); return -ENOMEM; } stats_info.fcn_id = dev->hmc_fn_id; status = irdma_vsi_stats_init(&iwdev->vsi, &stats_info); if (status) { irdma_cleanup_cm_core(&iwdev->cm_core); kfree(stats_info.pestat); return status; } do { if (!iwdev->roce_mode) { status = irdma_initialize_ilq(iwdev); if (status) break; iwdev->init_state = ILQ_CREATED; status = irdma_initialize_ieq(iwdev); if (status) break; iwdev->init_state = IEQ_CREATED; } if (iwdev->rf->en_rem_endpoint_trk) { qp.dev = dev; qp.qp_uk.qp_id = IRDMA_REM_ENDPOINT_TRK_QPID; qp.qp_uk.qp_type = IRDMA_QP_TYPE_IWARP; status = irdma_cqp_qp_create_cmd(dev, &qp); if (status) break; iwdev->init_state = REM_ENDPOINT_TRK_CREATED; } if (!rf->rsrc_created) { status = irdma_setup_ceqs(rf, &iwdev->vsi); if (status) break; iwdev->init_state = CEQS_CREATED; status = irdma_hmc_init_pble(&rf->sc_dev, rf->pble_rsrc); if (status) { irdma_del_ceqs(rf); break; } iwdev->init_state = PBLE_CHUNK_MEM; status = irdma_setup_aeq(rf); if (status) { irdma_destroy_pble_prm(rf->pble_rsrc); irdma_del_ceqs(rf); break; } iwdev->init_state = AEQ_CREATED; rf->rsrc_created = true; } if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) irdma_alloc_set_mac(iwdev); irdma_add_ip(iwdev); iwdev->init_state = IP_ADDR_REGISTERED; /* * handles asynch cleanup tasks - disconnect CM , free qp, free cq bufs */ iwdev->cleanup_wq = alloc_workqueue("irdma-cleanup-wq", WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); if (!iwdev->cleanup_wq) return -ENOMEM; irdma_get_used_rsrc(iwdev); init_waitqueue_head(&iwdev->suspend_wq); return 0; } while (0); dev_err(&rf->pcidev->dev, "HW runtime init FAIL status = %d last cmpl = %d\n", status, iwdev->init_state); irdma_rt_deinit_hw(iwdev); return status; } /** * irdma_ctrl_init_hw - Initializes control portion of HW * @rf: RDMA PCI function * * Create admin queues, HMC obejcts and RF resource objects */ int irdma_ctrl_init_hw(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; int status; do { status = irdma_setup_init_state(rf); if (status) break; rf->init_state = INITIAL_STATE; status = irdma_create_cqp(rf); if (status) break; rf->init_state = CQP_CREATED; dev->feature_info[IRDMA_FEATURE_FW_INFO] = IRDMA_FW_VER_DEFAULT; if (rf->rdma_ver != IRDMA_GEN_1) { status = irdma_get_rdma_features(dev); if (status) break; } status = irdma_hmc_setup(rf); if (status) break; rf->init_state = HMC_OBJS_CREATED; status = irdma_initialize_hw_rsrc(rf); if (status) break; rf->init_state = HW_RSRC_INITIALIZED; status = irdma_create_ccq(rf); if (status) break; rf->init_state = CCQ_CREATED; status = irdma_setup_ceq_0(rf); if (status) break; rf->init_state = CEQ0_CREATED; /* Handles processing of CQP completions */ rf->cqp_cmpl_wq = alloc_ordered_workqueue("cqp_cmpl_wq", WQ_HIGHPRI | WQ_UNBOUND); if (!rf->cqp_cmpl_wq) { status = -ENOMEM; break; } INIT_WORK(&rf->cqp_cmpl_work, cqp_compl_worker); irdma_sc_ccq_arm(dev->ccq); return 0; } while (0); pr_err("IRDMA hardware initialization FAILED init_state=%d status=%d\n", rf->init_state, status); irdma_ctrl_deinit_hw(rf); return status; } /** * irdma_set_hw_rsrc - set hw memory resources. * @rf: RDMA PCI function */ static void irdma_set_hw_rsrc(struct irdma_pci_f *rf) { rf->allocated_qps = (void *)(rf->mem_rsrc + (sizeof(struct irdma_arp_entry) * rf->arp_table_size)); rf->allocated_cqs = &rf->allocated_qps[BITS_TO_LONGS(rf->max_qp)]; rf->allocated_mrs = &rf->allocated_cqs[BITS_TO_LONGS(rf->max_cq)]; rf->allocated_pds = &rf->allocated_mrs[BITS_TO_LONGS(rf->max_mr)]; rf->allocated_ahs = &rf->allocated_pds[BITS_TO_LONGS(rf->max_pd)]; rf->allocated_mcgs = &rf->allocated_ahs[BITS_TO_LONGS(rf->max_ah)]; rf->allocated_arps = &rf->allocated_mcgs[BITS_TO_LONGS(rf->max_mcg)]; rf->qp_table = (struct irdma_qp **) (&rf->allocated_arps[BITS_TO_LONGS(rf->arp_table_size)]); rf->cq_table = (struct irdma_cq **)(&rf->qp_table[rf->max_qp]); spin_lock_init(&rf->rsrc_lock); spin_lock_init(&rf->arp_lock); spin_lock_init(&rf->qptable_lock); spin_lock_init(&rf->cqtable_lock); spin_lock_init(&rf->qh_list_lock); } /** * irdma_calc_mem_rsrc_size - calculate memory resources size. * @rf: RDMA PCI function */ static u32 irdma_calc_mem_rsrc_size(struct irdma_pci_f *rf){ u32 rsrc_size; rsrc_size = sizeof(struct irdma_arp_entry) * rf->arp_table_size; rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_qp); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_mr); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_cq); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_pd); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->arp_table_size); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_ah); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_mcg); rsrc_size += sizeof(struct irdma_qp **) * rf->max_qp; rsrc_size += sizeof(struct irdma_cq **) * rf->max_cq; return rsrc_size; } /** * irdma_initialize_hw_rsrc - initialize hw resource tracking array * @rf: RDMA PCI function */ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf) { u32 rsrc_size; u32 mrdrvbits; u32 ret; if (rf->rdma_ver != IRDMA_GEN_1) { rf->allocated_ws_nodes = kcalloc(BITS_TO_LONGS(IRDMA_MAX_WS_NODES), sizeof(unsigned long), GFP_KERNEL); if (!rf->allocated_ws_nodes) return -ENOMEM; set_bit(0, rf->allocated_ws_nodes); rf->max_ws_node_id = IRDMA_MAX_WS_NODES; } rf->max_cqe = rf->sc_dev.hw_attrs.uk_attrs.max_hw_cq_size; rf->max_qp = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt; rf->max_mr = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt; rf->max_cq = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt; rf->max_pd = rf->sc_dev.hw_attrs.max_hw_pds; rf->arp_table_size = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_ARP].cnt; rf->max_ah = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt; rf->max_mcg = rf->max_qp; rsrc_size = irdma_calc_mem_rsrc_size(rf); rf->mem_rsrc = vzalloc(rsrc_size); if (!rf->mem_rsrc) { ret = -ENOMEM; goto mem_rsrc_vmalloc_fail; } rf->arp_table = (struct irdma_arp_entry *)rf->mem_rsrc; irdma_set_hw_rsrc(rf); set_bit(0, rf->allocated_mrs); set_bit(0, rf->allocated_qps); set_bit(0, rf->allocated_cqs); set_bit(0, rf->allocated_pds); set_bit(0, rf->allocated_arps); set_bit(0, rf->allocated_ahs); set_bit(0, rf->allocated_mcgs); set_bit(2, rf->allocated_qps); /* qp 2 IEQ */ set_bit(1, rf->allocated_qps); /* qp 1 ILQ */ set_bit(IRDMA_REM_ENDPOINT_TRK_QPID, rf->allocated_qps); /* qp 3 Remote Endpt trk */ set_bit(1, rf->allocated_cqs); set_bit(1, rf->allocated_pds); set_bit(2, rf->allocated_cqs); set_bit(2, rf->allocated_pds); INIT_LIST_HEAD(&rf->mc_qht_list.list); /* stag index mask has a minimum of 14 bits */ mrdrvbits = 24 - max(get_count_order(rf->max_mr), 14); rf->mr_stagmask = ~(((1 << mrdrvbits) - 1) << (32 - mrdrvbits)); return 0; mem_rsrc_vmalloc_fail: kfree(rf->allocated_ws_nodes); rf->allocated_ws_nodes = NULL; return ret; } /** * irdma_cqp_ce_handler - handle cqp completions * @rf: RDMA PCI function * @cq: cq for cqp completions */ void irdma_cqp_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq) { struct irdma_cqp_request *cqp_request; struct irdma_sc_dev *dev = &rf->sc_dev; u32 cqe_count = 0; struct irdma_ccq_cqe_info info; unsigned long flags; int ret; do { memset(&info, 0, sizeof(info)); spin_lock_irqsave(&rf->cqp.compl_lock, flags); ret = irdma_sc_ccq_get_cqe_info(cq, &info); spin_unlock_irqrestore(&rf->cqp.compl_lock, flags); if (ret) break; cqp_request = (struct irdma_cqp_request *) (uintptr_t)info.scratch; if (info.error && irdma_cqp_crit_err(dev, cqp_request->info.cqp_cmd, info.maj_err_code, info.min_err_code)) irdma_dev_err(&rf->iwdev->ibdev, "cqp opcode = 0x%x maj_err_code = 0x%x min_err_code = 0x%x\n", info.op_code, info.maj_err_code, info.min_err_code); if (cqp_request) { cqp_request->compl_info.maj_err_code = info.maj_err_code; cqp_request->compl_info.min_err_code = info.min_err_code; cqp_request->compl_info.op_ret_val = info.op_ret_val; cqp_request->compl_info.error = info.error; irdma_complete_cqp_request(&rf->cqp, cqp_request); } cqe_count++; } while (1); if (cqe_count) { irdma_process_bh(dev); irdma_sc_ccq_arm(dev->ccq); } } /** * cqp_compl_worker - Handle cqp completions * @work: Pointer to work structure */ void cqp_compl_worker(struct work_struct *work) { struct irdma_pci_f *rf = container_of(work, struct irdma_pci_f, cqp_cmpl_work); struct irdma_sc_cq *cq = &rf->ccq.sc_cq; irdma_cqp_ce_handler(rf, cq); } /** * irdma_lookup_apbvt_entry - lookup hash table for an existing apbvt entry corresponding to port * @cm_core: cm's core * @port: port to identify apbvt entry */ static struct irdma_apbvt_entry * irdma_lookup_apbvt_entry(struct irdma_cm_core *cm_core, u16 port) { struct irdma_apbvt_entry *entry; HASH_FOR_EACH_POSSIBLE(cm_core->apbvt_hash_tbl, entry, hlist, port) { if (entry->port == port) { entry->use_cnt++; return entry; } } return NULL; } /** * irdma_next_iw_state - modify qp state * @iwqp: iwarp qp to modify * @state: next state for qp * @del_hash: del hash * @term: term message * @termlen: length of term message */ void irdma_next_iw_state(struct irdma_qp *iwqp, u8 state, u8 del_hash, u8 term, u8 termlen) { struct irdma_modify_qp_info info = {0}; info.next_iwarp_state = state; info.remove_hash_idx = del_hash; info.cq_num_valid = true; info.arp_cache_idx_valid = true; info.dont_send_term = true; info.dont_send_fin = true; info.termlen = termlen; if (term & IRDMAQP_TERM_SEND_TERM_ONLY) info.dont_send_term = false; if (term & IRDMAQP_TERM_SEND_FIN_ONLY) info.dont_send_fin = false; if (iwqp->sc_qp.term_flags && state == IRDMA_QP_STATE_ERROR) info.reset_tcp_conn = true; iwqp->hw_iwarp_state = state; irdma_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0); iwqp->iwarp_state = info.next_iwarp_state; } /** * irdma_del_local_mac_entry - remove a mac entry from the hw * table * @rf: RDMA PCI function * @idx: the index of the mac ip address to delete */ void irdma_del_local_mac_entry(struct irdma_pci_f *rf, u16 idx) { struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) return; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = IRDMA_OP_DELETE_LOCAL_MAC_ENTRY; cqp_info->post_sq = 1; cqp_info->in.u.del_local_mac_entry.cqp = &iwcqp->sc_cqp; cqp_info->in.u.del_local_mac_entry.scratch = (uintptr_t)cqp_request; cqp_info->in.u.del_local_mac_entry.entry_idx = idx; cqp_info->in.u.del_local_mac_entry.ignore_ref_count = 0; irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(iwcqp, cqp_request); } /** * irdma_add_local_mac_entry - add a mac ip address entry to the * hw table * @rf: RDMA PCI function * @mac_addr: pointer to mac address * @idx: the index of the mac ip address to add */ int irdma_add_local_mac_entry(struct irdma_pci_f *rf, const u8 *mac_addr, u16 idx) { struct irdma_local_mac_entry_info *info; struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->post_sq = 1; info = &cqp_info->in.u.add_local_mac_entry.info; ether_addr_copy(info->mac_addr, mac_addr); info->entry_idx = idx; cqp_info->in.u.add_local_mac_entry.scratch = (uintptr_t)cqp_request; cqp_info->cqp_cmd = IRDMA_OP_ADD_LOCAL_MAC_ENTRY; cqp_info->in.u.add_local_mac_entry.cqp = &iwcqp->sc_cqp; cqp_info->in.u.add_local_mac_entry.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(iwcqp, cqp_request); return status; } /** * irdma_alloc_local_mac_entry - allocate a mac entry * @rf: RDMA PCI function * @mac_tbl_idx: the index of the new mac address * * Allocate a mac address entry and update the mac_tbl_idx * to hold the index of the newly created mac address * Return 0 if successful, otherwise return error */ int irdma_alloc_local_mac_entry(struct irdma_pci_f *rf, u16 *mac_tbl_idx) { struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status = 0; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY; cqp_info->post_sq = 1; cqp_info->in.u.alloc_local_mac_entry.cqp = &iwcqp->sc_cqp; cqp_info->in.u.alloc_local_mac_entry.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); if (!status) *mac_tbl_idx = (u16)cqp_request->compl_info.op_ret_val; irdma_put_cqp_request(iwcqp, cqp_request); return status; } /** * irdma_cqp_manage_apbvt_cmd - send cqp command manage apbvt * @iwdev: irdma device * @accel_local_port: port for apbvt * @add_port: add ordelete port */ static int irdma_cqp_manage_apbvt_cmd(struct irdma_device *iwdev, u16 accel_local_port, bool add_port) { struct irdma_apbvt_info *info; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, add_port); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; info = &cqp_info->in.u.manage_apbvt_entry.info; memset(info, 0, sizeof(*info)); info->add = add_port; info->port = accel_local_port; cqp_info->cqp_cmd = IRDMA_OP_MANAGE_APBVT_ENTRY; cqp_info->post_sq = 1; cqp_info->in.u.manage_apbvt_entry.cqp = &iwdev->rf->cqp.sc_cqp; cqp_info->in.u.manage_apbvt_entry.scratch = (uintptr_t)cqp_request; - irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DEV, "%s: port=0x%04x\n", - (!add_port) ? "DELETE" : "ADD", accel_local_port); + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DEV, + "%s: port=0x%04x\n", (!add_port) ? "DELETE" : "ADD", + accel_local_port); status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); return status; } /** * irdma_add_apbvt - add tcp port to HW apbvt table * @iwdev: irdma device * @port: port for apbvt */ struct irdma_apbvt_entry * irdma_add_apbvt(struct irdma_device *iwdev, u16 port) { struct irdma_cm_core *cm_core = &iwdev->cm_core; struct irdma_apbvt_entry *entry; unsigned long flags; spin_lock_irqsave(&cm_core->apbvt_lock, flags); entry = irdma_lookup_apbvt_entry(cm_core, port); if (entry) { spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); return entry; } entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) { spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); return NULL; } entry->port = port; entry->use_cnt = 1; HASH_ADD(cm_core->apbvt_hash_tbl, &entry->hlist, entry->port); spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); if (irdma_cqp_manage_apbvt_cmd(iwdev, port, true)) { kfree(entry); return NULL; } return entry; } /** * irdma_del_apbvt - delete tcp port from HW apbvt table * @iwdev: irdma device * @entry: apbvt entry object */ void irdma_del_apbvt(struct irdma_device *iwdev, struct irdma_apbvt_entry *entry) { struct irdma_cm_core *cm_core = &iwdev->cm_core; unsigned long flags; spin_lock_irqsave(&cm_core->apbvt_lock, flags); if (--entry->use_cnt) { spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); return; } HASH_DEL(cm_core->apbvt_hash_tbl, &entry->hlist); /* * apbvt_lock is held across CQP delete APBVT OP (non-waiting) to protect against race where add APBVT CQP can * race ahead of the delete APBVT for same port. */ irdma_cqp_manage_apbvt_cmd(iwdev, entry->port, false); kfree(entry); spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); } /** * irdma_manage_arp_cache - manage hw arp cache * @rf: RDMA PCI function * @mac_addr: mac address ptr * @ip_addr: ip addr for arp cache * @action: add, delete or modify */ void irdma_manage_arp_cache(struct irdma_pci_f *rf, const unsigned char *mac_addr, u32 *ip_addr, u32 action) { struct irdma_add_arp_cache_entry_info *info; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int arp_index; arp_index = irdma_arp_table(rf, ip_addr, mac_addr, action); if (arp_index == -1) return; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, false); if (!cqp_request) return; cqp_info = &cqp_request->info; if (action == IRDMA_ARP_ADD) { cqp_info->cqp_cmd = IRDMA_OP_ADD_ARP_CACHE_ENTRY; info = &cqp_info->in.u.add_arp_cache_entry.info; memset(info, 0, sizeof(*info)); info->arp_index = (u16)arp_index; info->permanent = true; ether_addr_copy(info->mac_addr, mac_addr); cqp_info->in.u.add_arp_cache_entry.scratch = (uintptr_t)cqp_request; cqp_info->in.u.add_arp_cache_entry.cqp = &rf->cqp.sc_cqp; } else { cqp_info->cqp_cmd = IRDMA_OP_DELETE_ARP_CACHE_ENTRY; cqp_info->in.u.del_arp_cache_entry.scratch = (uintptr_t)cqp_request; cqp_info->in.u.del_arp_cache_entry.cqp = &rf->cqp.sc_cqp; cqp_info->in.u.del_arp_cache_entry.arp_index = arp_index; } cqp_info->post_sq = 1; irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); } /** * irdma_send_syn_cqp_callback - do syn/ack after qhash * @cqp_request: qhash cqp completion */ static void irdma_send_syn_cqp_callback(struct irdma_cqp_request *cqp_request) { struct irdma_cm_node *cm_node = cqp_request->param; irdma_send_syn(cm_node, 1); irdma_rem_ref_cm_node(cm_node); } /** * irdma_manage_qhash - add or modify qhash * @iwdev: irdma device * @cminfo: cm info for qhash * @etype: type (syn or quad) * @mtype: type of qhash * @cmnode: cmnode associated with connection * @wait: wait for completion */ int irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo, enum irdma_quad_entry_type etype, enum irdma_quad_hash_manage_type mtype, void *cmnode, bool wait) { struct irdma_qhash_table_info *info; struct irdma_cqp *iwcqp = &iwdev->rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_cm_node *cm_node = cmnode; int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait); if (!cqp_request) return -ENOMEM; + cminfo->cqp_request = cqp_request; + if (!wait) + atomic_inc(&cqp_request->refcnt); cqp_info = &cqp_request->info; info = &cqp_info->in.u.manage_qhash_table_entry.info; memset(info, 0, sizeof(*info)); info->vsi = &iwdev->vsi; info->manage = mtype; info->entry_type = etype; if (cminfo->vlan_id < VLAN_N_VID) { info->vlan_valid = true; info->vlan_id = cminfo->vlan_id; } else { info->vlan_valid = false; } info->ipv4_valid = cminfo->ipv4; info->user_pri = cminfo->user_pri; ether_addr_copy(info->mac_addr, if_getlladdr(iwdev->netdev)); info->qp_num = cminfo->qh_qpid; info->dest_port = cminfo->loc_port; info->dest_ip[0] = cminfo->loc_addr[0]; info->dest_ip[1] = cminfo->loc_addr[1]; info->dest_ip[2] = cminfo->loc_addr[2]; info->dest_ip[3] = cminfo->loc_addr[3]; if (etype == IRDMA_QHASH_TYPE_TCP_ESTABLISHED || etype == IRDMA_QHASH_TYPE_UDP_UNICAST || etype == IRDMA_QHASH_TYPE_UDP_MCAST || etype == IRDMA_QHASH_TYPE_ROCE_MCAST || etype == IRDMA_QHASH_TYPE_ROCEV2_HW) { info->src_port = cminfo->rem_port; info->src_ip[0] = cminfo->rem_addr[0]; info->src_ip[1] = cminfo->rem_addr[1]; info->src_ip[2] = cminfo->rem_addr[2]; info->src_ip[3] = cminfo->rem_addr[3]; } if (cmnode) { cqp_request->callback_fcn = irdma_send_syn_cqp_callback; cqp_request->param = cmnode; if (!wait) atomic_inc(&cm_node->refcnt); } if (info->ipv4_valid) irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "%s caller: %pS loc_port=0x%04x rem_port=0x%04x loc_addr=%pI4 rem_addr=%pI4 mac=%pM, vlan_id=%d cm_node=%p\n", + "%s caller: %pS loc_port=0x%04x rem_port=0x%04x loc_addr=%x rem_addr=%x mac=%x:%x:%x:%x:%x:%x, vlan_id=%d cm_node=%p\n", (!mtype) ? "DELETE" : "ADD", - __builtin_return_address(0), info->dest_port, - info->src_port, info->dest_ip, info->src_ip, - info->mac_addr, cminfo->vlan_id, - cmnode ? cmnode : NULL); + __builtin_return_address(0), info->src_port, + info->dest_port, info->src_ip[0], info->dest_ip[0], + info->mac_addr[0], info->mac_addr[1], + info->mac_addr[2], info->mac_addr[3], + info->mac_addr[4], info->mac_addr[5], + cminfo->vlan_id, cmnode ? cmnode : NULL); else irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, - "%s caller: %pS loc_port=0x%04x rem_port=0x%04x loc_addr=%pI6 rem_addr=%pI6 mac=%pM, vlan_id=%d cm_node=%p\n", + "%s caller: %pS loc_port=0x%04x rem_port=0x%04x loc_addr=%x:%x:%x:%x rem_addr=%x:%x:%x:%x mac=%x:%x:%x:%x:%x:%x, vlan_id=%d cm_node=%p\n", (!mtype) ? "DELETE" : "ADD", - __builtin_return_address(0), info->dest_port, - info->src_port, info->dest_ip, info->src_ip, - info->mac_addr, cminfo->vlan_id, + __builtin_return_address(0), info->src_port, + info->dest_port, IRDMA_PRINT_IP6(info->src_ip), + IRDMA_PRINT_IP6(info->dest_ip), info->mac_addr[0], + info->mac_addr[1], info->mac_addr[2], + info->mac_addr[3], info->mac_addr[4], + info->mac_addr[5], cminfo->vlan_id, cmnode ? cmnode : NULL); cqp_info->in.u.manage_qhash_table_entry.cqp = &iwdev->rf->cqp.sc_cqp; cqp_info->in.u.manage_qhash_table_entry.scratch = (uintptr_t)cqp_request; cqp_info->cqp_cmd = IRDMA_OP_MANAGE_QHASH_TABLE_ENTRY; cqp_info->post_sq = 1; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); if (status && cm_node && !wait) irdma_rem_ref_cm_node(cm_node); irdma_put_cqp_request(iwcqp, cqp_request); return status; } /** * irdma_hw_flush_wqes - flush qp's wqe * @rf: RDMA PCI function * @qp: hardware control qp * @info: info for flush * @wait: flag wait for completion */ int irdma_hw_flush_wqes(struct irdma_pci_f *rf, struct irdma_sc_qp *qp, struct irdma_qp_flush_info *info, bool wait) { int status; struct irdma_qp_flush_info *hw_info; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_qp *iwqp = qp->qp_uk.back_qp; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; hw_info = &cqp_request->info.in.u.qp_flush_wqes.info; memcpy(hw_info, info, sizeof(*hw_info)); cqp_info->cqp_cmd = IRDMA_OP_QP_FLUSH_WQES; cqp_info->post_sq = 1; cqp_info->in.u.qp_flush_wqes.qp = qp; cqp_info->in.u.qp_flush_wqes.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); if (status) { qp->qp_uk.sq_flush_complete = true; qp->qp_uk.rq_flush_complete = true; irdma_put_cqp_request(&rf->cqp, cqp_request); return status; } if (!wait || cqp_request->compl_info.maj_err_code) goto put_cqp; if (info->rq) { if (cqp_request->compl_info.min_err_code == IRDMA_CQP_COMPL_SQ_WQE_FLUSHED || cqp_request->compl_info.min_err_code == 0) { /* RQ WQE flush was requested but did not happen */ qp->qp_uk.rq_flush_complete = true; } } if (info->sq) { if (cqp_request->compl_info.min_err_code == IRDMA_CQP_COMPL_RQ_WQE_FLUSHED || cqp_request->compl_info.min_err_code == 0) { /* SQ WQE flush was requested but did not happen */ qp->qp_uk.sq_flush_complete = true; } } irdma_debug(&rf->sc_dev, IRDMA_DEBUG_VERBS, "qp_id=%d qp_type=%d qpstate=%d ibqpstate=%d last_aeq=%d hw_iw_state=%d maj_err_code=%d min_err_code=%d\n", iwqp->ibqp.qp_num, rf->protocol_used, iwqp->iwarp_state, iwqp->ibqp_state, iwqp->last_aeq, iwqp->hw_iwarp_state, - cqp_request->compl_info.maj_err_code, cqp_request->compl_info.min_err_code); + cqp_request->compl_info.maj_err_code, + cqp_request->compl_info.min_err_code); put_cqp: irdma_put_cqp_request(&rf->cqp, cqp_request); return status; } /** * irdma_gen_ae - generate AE * @rf: RDMA PCI function * @qp: qp associated with AE * @info: info for ae * @wait: wait for completion */ void irdma_gen_ae(struct irdma_pci_f *rf, struct irdma_sc_qp *qp, struct irdma_gen_ae_info *info, bool wait) { struct irdma_gen_ae_info *ae_info; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait); if (!cqp_request) return; cqp_info = &cqp_request->info; ae_info = &cqp_request->info.in.u.gen_ae.info; memcpy(ae_info, info, sizeof(*ae_info)); cqp_info->cqp_cmd = IRDMA_OP_GEN_AE; cqp_info->post_sq = 1; cqp_info->in.u.gen_ae.qp = qp; cqp_info->in.u.gen_ae.scratch = (uintptr_t)cqp_request; irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); } void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask) { struct irdma_qp_flush_info info = {0}; struct irdma_pci_f *rf = iwqp->iwdev->rf; u8 flush_code = iwqp->sc_qp.flush_code; if (!(flush_mask & IRDMA_FLUSH_SQ) && !(flush_mask & IRDMA_FLUSH_RQ)) return; /* Set flush info fields */ info.sq = flush_mask & IRDMA_FLUSH_SQ; info.rq = flush_mask & IRDMA_FLUSH_RQ; /* Generate userflush errors in CQE */ info.sq_major_code = IRDMA_FLUSH_MAJOR_ERR; info.sq_minor_code = FLUSH_GENERAL_ERR; info.rq_major_code = IRDMA_FLUSH_MAJOR_ERR; info.rq_minor_code = FLUSH_GENERAL_ERR; info.userflushcode = true; if (flush_mask & IRDMA_REFLUSH) { if (info.sq) iwqp->sc_qp.flush_sq = false; if (info.rq) iwqp->sc_qp.flush_rq = false; } else { if (flush_code) { if (info.sq && iwqp->sc_qp.sq_flush_code) info.sq_minor_code = flush_code; if (info.rq && iwqp->sc_qp.rq_flush_code) info.rq_minor_code = flush_code; } if (irdma_upload_context && irdma_upload_qp_context(iwqp, 0, 1)) irdma_dev_warn(&iwqp->iwdev->ibdev, "failed to upload QP context\n"); if (!iwqp->user_mode) irdma_sched_qp_flush_work(iwqp); } /* Issue flush */ (void)irdma_hw_flush_wqes(rf, &iwqp->sc_qp, &info, flush_mask & IRDMA_FLUSH_WAIT); iwqp->flush_issued = true; } diff --git a/sys/dev/irdma/irdma_kcompat.c b/sys/dev/irdma/irdma_kcompat.c index c1b39060a09b..8eb3f40771f1 100644 --- a/sys/dev/irdma/irdma_kcompat.c +++ b/sys/dev/irdma/irdma_kcompat.c @@ -1,2382 +1,2385 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2018 - 2022 Intel Corporation + * Copyright (c) 2018 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "irdma_main.h" #define IRDMA_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000) static u16 kc_rdma_flow_label_to_udp_sport(u32 fl) { u32 fl_low = fl & 0x03FFF; u32 fl_high = fl & 0xFC000; fl_low ^= fl_high >> 14; return (u16)(fl_low | IRDMA_ROCE_UDP_ENCAP_VALID_PORT_MIN); } #define IRDMA_GRH_FLOWLABEL_MASK (0x000FFFFF) static u32 kc_rdma_calc_flow_label(u32 lqpn, u32 rqpn) { u64 fl = (u64)lqpn * rqpn; fl ^= fl >> 20; fl ^= fl >> 40; return (u32)(fl & IRDMA_GRH_FLOWLABEL_MASK); } u16 kc_rdma_get_udp_sport(u32 fl, u32 lqpn, u32 rqpn) { if (!fl) fl = kc_rdma_calc_flow_label(lqpn, rqpn); return kc_rdma_flow_label_to_udp_sport(fl); } void irdma_get_dev_fw_str(struct ib_device *dev, char *str, size_t str_len) { struct irdma_device *iwdev = to_iwdev(dev); snprintf(str, str_len, "%u.%u", irdma_fw_major_ver(&iwdev->rf->sc_dev), irdma_fw_minor_ver(&iwdev->rf->sc_dev)); } int irdma_add_gid(struct ib_device *device, u8 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr, void **context) { return 0; } int irdma_del_gid(struct ib_device *device, u8 port_num, unsigned int index, void **context) { return 0; } #if __FreeBSD_version >= 1400026 /** * irdma_alloc_mr - register stag for fast memory registration * @pd: ibpd pointer * @mr_type: memory for stag registrion * @max_num_sg: man number of pages * @udata: user data */ struct ib_mr * irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg, struct ib_udata *udata) { #else /** * irdma_alloc_mr - register stag for fast memory registration * @pd: ibpd pointer * @mr_type: memory for stag registrion * @max_num_sg: man number of pages */ struct ib_mr * irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg) { #endif struct irdma_device *iwdev = to_iwdev(pd->device); struct irdma_pble_alloc *palloc; struct irdma_pbl *iwpbl; struct irdma_mr *iwmr; int status; u32 stag; int err_code = -ENOMEM; iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); if (!iwmr) return ERR_PTR(-ENOMEM); stag = irdma_create_stag(iwdev); if (!stag) { err_code = -ENOMEM; goto err; } iwmr->stag = stag; iwmr->ibmr.rkey = stag; iwmr->ibmr.lkey = stag; iwmr->ibmr.pd = pd; iwmr->ibmr.device = pd->device; iwpbl = &iwmr->iwpbl; iwpbl->iwmr = iwmr; iwmr->type = IRDMA_MEMREG_TYPE_MEM; palloc = &iwpbl->pble_alloc; iwmr->page_cnt = max_num_sg; /* Assume system PAGE_SIZE as the sg page sizes are unknown. */ iwmr->len = max_num_sg * PAGE_SIZE; status = irdma_get_pble(iwdev->rf->pble_rsrc, palloc, iwmr->page_cnt, false); if (status) goto err_get_pble; err_code = irdma_hw_alloc_stag(iwdev, iwmr); if (err_code) goto err_alloc_stag; iwpbl->pbl_allocated = true; return &iwmr->ibmr; err_alloc_stag: irdma_free_pble(iwdev->rf->pble_rsrc, palloc); err_get_pble: irdma_free_stag(iwdev, stag); err: kfree(iwmr); return ERR_PTR(err_code); } #define IRDMA_ALLOC_UCTX_MIN_REQ_LEN offsetofend(struct irdma_alloc_ucontext_req, rsvd8) #define IRDMA_ALLOC_UCTX_MIN_RESP_LEN offsetofend(struct irdma_alloc_ucontext_resp, rsvd) #if __FreeBSD_version >= 1400026 /** * irdma_alloc_ucontext - Allocate the user context data structure * @uctx: context * @udata: user data * * This keeps track of all objects associated with a particular * user-mode client. */ int irdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { struct ib_device *ibdev = uctx->device; struct irdma_device *iwdev = to_iwdev(ibdev); struct irdma_alloc_ucontext_req req = {0}; struct irdma_alloc_ucontext_resp uresp = {0}; struct irdma_ucontext *ucontext = to_ucontext(uctx); struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; if (udata->inlen < IRDMA_ALLOC_UCTX_MIN_REQ_LEN || udata->outlen < IRDMA_ALLOC_UCTX_MIN_RESP_LEN) return -EINVAL; if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) return -EINVAL; if (req.userspace_ver < 4 || req.userspace_ver > IRDMA_ABI_VER) goto ver_error; ucontext->iwdev = iwdev; ucontext->abi_ver = req.userspace_ver; if (req.comp_mask & IRDMA_ALLOC_UCTX_USE_RAW_ATTR) ucontext->use_raw_attrs = true; /* GEN_1 support for libi40iw */ if (udata->outlen == IRDMA_ALLOC_UCTX_MIN_RESP_LEN) { if (uk_attrs->hw_rev != IRDMA_GEN_1) return -EOPNOTSUPP; ucontext->legacy_mode = true; uresp.max_qps = iwdev->rf->max_qp; uresp.max_pds = iwdev->rf->sc_dev.hw_attrs.max_hw_pds; uresp.wq_size = iwdev->rf->sc_dev.hw_attrs.max_qp_wr * 2; uresp.kernel_ver = req.userspace_ver; if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) return -EFAULT; } else { u64 bar_off; uresp.kernel_ver = IRDMA_ABI_VER; uresp.feature_flags = uk_attrs->feature_flags; uresp.max_hw_wq_frags = uk_attrs->max_hw_wq_frags; uresp.max_hw_read_sges = uk_attrs->max_hw_read_sges; uresp.max_hw_inline = uk_attrs->max_hw_inline; uresp.max_hw_rq_quanta = uk_attrs->max_hw_rq_quanta; uresp.max_hw_wq_quanta = uk_attrs->max_hw_wq_quanta; uresp.max_hw_sq_chunk = uk_attrs->max_hw_sq_chunk; uresp.max_hw_cq_size = uk_attrs->max_hw_cq_size; uresp.min_hw_cq_size = uk_attrs->min_hw_cq_size; uresp.hw_rev = uk_attrs->hw_rev; uresp.comp_mask |= IRDMA_ALLOC_UCTX_USE_RAW_ATTR; bar_off = (uintptr_t)iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET]; ucontext->db_mmap_entry = irdma_user_mmap_entry_insert(ucontext, bar_off, IRDMA_MMAP_IO_NC, &uresp.db_mmap_key); if (!ucontext->db_mmap_entry) { return -ENOMEM; } if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) { rdma_user_mmap_entry_remove(ucontext->db_mmap_entry); return -EFAULT; } } INIT_LIST_HEAD(&ucontext->cq_reg_mem_list); spin_lock_init(&ucontext->cq_reg_mem_list_lock); INIT_LIST_HEAD(&ucontext->qp_reg_mem_list); spin_lock_init(&ucontext->qp_reg_mem_list_lock); INIT_LIST_HEAD(&ucontext->vma_list); mutex_init(&ucontext->vma_list_mutex); return 0; ver_error: irdma_dev_err(&iwdev->ibdev, "Invalid userspace driver version detected. Detected version %d, should be %d\n", req.userspace_ver, IRDMA_ABI_VER); return -EINVAL; } #endif #if __FreeBSD_version < 1400026 /** * irdma_alloc_ucontext - Allocate the user context data structure * @ibdev: ib device pointer * @udata: user data * * This keeps track of all objects associated with a particular * user-mode client. */ struct ib_ucontext * irdma_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { struct irdma_device *iwdev = to_iwdev(ibdev); struct irdma_alloc_ucontext_req req = {0}; struct irdma_alloc_ucontext_resp uresp = {0}; struct irdma_ucontext *ucontext; struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; if (udata->inlen < IRDMA_ALLOC_UCTX_MIN_REQ_LEN || udata->outlen < IRDMA_ALLOC_UCTX_MIN_RESP_LEN) return ERR_PTR(-EINVAL); if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) return ERR_PTR(-EINVAL); if (req.userspace_ver < 4 || req.userspace_ver > IRDMA_ABI_VER) goto ver_error; ucontext = kzalloc(sizeof(*ucontext), GFP_KERNEL); if (!ucontext) return ERR_PTR(-ENOMEM); ucontext->iwdev = iwdev; ucontext->abi_ver = req.userspace_ver; if (req.comp_mask & IRDMA_ALLOC_UCTX_USE_RAW_ATTR) ucontext->use_raw_attrs = true; /* GEN_1 legacy support with libi40iw */ if (udata->outlen == IRDMA_ALLOC_UCTX_MIN_RESP_LEN) { if (uk_attrs->hw_rev != IRDMA_GEN_1) { kfree(ucontext); return ERR_PTR(-EOPNOTSUPP); } ucontext->legacy_mode = true; uresp.max_qps = iwdev->rf->max_qp; uresp.max_pds = iwdev->rf->sc_dev.hw_attrs.max_hw_pds; uresp.wq_size = iwdev->rf->sc_dev.hw_attrs.max_qp_wr * 2; uresp.kernel_ver = req.userspace_ver; if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) { kfree(ucontext); return ERR_PTR(-EFAULT); } } else { u64 bar_off; uresp.kernel_ver = IRDMA_ABI_VER; uresp.feature_flags = uk_attrs->feature_flags; uresp.max_hw_wq_frags = uk_attrs->max_hw_wq_frags; uresp.max_hw_read_sges = uk_attrs->max_hw_read_sges; uresp.max_hw_inline = uk_attrs->max_hw_inline; uresp.max_hw_rq_quanta = uk_attrs->max_hw_rq_quanta; uresp.max_hw_wq_quanta = uk_attrs->max_hw_wq_quanta; uresp.max_hw_sq_chunk = uk_attrs->max_hw_sq_chunk; uresp.max_hw_cq_size = uk_attrs->max_hw_cq_size; uresp.min_hw_cq_size = uk_attrs->min_hw_cq_size; uresp.hw_rev = uk_attrs->hw_rev; uresp.comp_mask |= IRDMA_ALLOC_UCTX_USE_RAW_ATTR; bar_off = (uintptr_t)iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET]; spin_lock_init(&ucontext->mmap_tbl_lock); ucontext->db_mmap_entry = irdma_user_mmap_entry_add_hash(ucontext, bar_off, IRDMA_MMAP_IO_NC, &uresp.db_mmap_key); if (!ucontext->db_mmap_entry) { spin_lock_destroy(&ucontext->mmap_tbl_lock); kfree(ucontext); return ERR_PTR(-ENOMEM); } if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) { irdma_user_mmap_entry_del_hash(ucontext->db_mmap_entry); spin_lock_destroy(&ucontext->mmap_tbl_lock); kfree(ucontext); return ERR_PTR(-EFAULT); } } INIT_LIST_HEAD(&ucontext->cq_reg_mem_list); spin_lock_init(&ucontext->cq_reg_mem_list_lock); INIT_LIST_HEAD(&ucontext->qp_reg_mem_list); spin_lock_init(&ucontext->qp_reg_mem_list_lock); INIT_LIST_HEAD(&ucontext->vma_list); mutex_init(&ucontext->vma_list_mutex); return &ucontext->ibucontext; ver_error: irdma_dev_err(&iwdev->ibdev, "Invalid userspace driver version detected. Detected version %d, should be %d\n", req.userspace_ver, IRDMA_ABI_VER); return ERR_PTR(-EINVAL); } #endif #if __FreeBSD_version >= 1400026 /** * irdma_dealloc_ucontext - deallocate the user context data structure * @context: user context created during alloc */ void irdma_dealloc_ucontext(struct ib_ucontext *context) { struct irdma_ucontext *ucontext = to_ucontext(context); rdma_user_mmap_entry_remove(ucontext->db_mmap_entry); return; } #endif #if __FreeBSD_version < 1400026 /** * irdma_dealloc_ucontext - deallocate the user context data structure * @context: user context created during alloc */ int irdma_dealloc_ucontext(struct ib_ucontext *context) { struct irdma_ucontext *ucontext = to_ucontext(context); irdma_user_mmap_entry_del_hash(ucontext->db_mmap_entry); spin_lock_destroy(&ucontext->mmap_tbl_lock); kfree(ucontext); return 0; } #endif #define IRDMA_ALLOC_PD_MIN_RESP_LEN offsetofend(struct irdma_alloc_pd_resp, rsvd) #if __FreeBSD_version >= 1400026 /** * irdma_alloc_pd - allocate protection domain * @pd: protection domain * @udata: user data */ int irdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct irdma_pd *iwpd = to_iwpd(pd); struct irdma_device *iwdev = to_iwdev(pd->device); struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; struct irdma_pci_f *rf = iwdev->rf; struct irdma_alloc_pd_resp uresp = {0}; struct irdma_sc_pd *sc_pd; u32 pd_id = 0; int err; if (udata && udata->outlen < IRDMA_ALLOC_PD_MIN_RESP_LEN) return -EINVAL; err = irdma_alloc_rsrc(rf, rf->allocated_pds, rf->max_pd, &pd_id, &rf->next_pd); if (err) return err; sc_pd = &iwpd->sc_pd; if (udata) { struct irdma_ucontext *ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); irdma_sc_pd_init(dev, sc_pd, pd_id, ucontext->abi_ver); uresp.pd_id = pd_id; if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) { err = -EFAULT; goto error; } } else { irdma_sc_pd_init(dev, sc_pd, pd_id, IRDMA_ABI_VER); } spin_lock_init(&iwpd->udqp_list_lock); INIT_LIST_HEAD(&iwpd->udqp_list); return 0; error: irdma_free_rsrc(rf, rf->allocated_pds, pd_id); return err; } #endif #if __FreeBSD_version < 1400026 /** * irdma_alloc_pd - allocate protection domain * @ibdev: IB device * @context: user context * @udata: user data */ struct ib_pd * irdma_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) { struct irdma_pd *iwpd; struct irdma_device *iwdev = to_iwdev(ibdev); struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; struct irdma_pci_f *rf = iwdev->rf; struct irdma_alloc_pd_resp uresp = {0}; struct irdma_sc_pd *sc_pd; u32 pd_id = 0; int err; err = irdma_alloc_rsrc(rf, rf->allocated_pds, rf->max_pd, &pd_id, &rf->next_pd); if (err) return ERR_PTR(err); iwpd = kzalloc(sizeof(*iwpd), GFP_KERNEL); if (!iwpd) { err = -ENOMEM; goto free_res; } sc_pd = &iwpd->sc_pd; if (udata) { struct irdma_ucontext *ucontext = to_ucontext(context); irdma_sc_pd_init(dev, sc_pd, pd_id, ucontext->abi_ver); uresp.pd_id = pd_id; if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) { err = -EFAULT; goto error; } } else { irdma_sc_pd_init(dev, sc_pd, pd_id, IRDMA_ABI_VER); } spin_lock_init(&iwpd->udqp_list_lock); INIT_LIST_HEAD(&iwpd->udqp_list); return &iwpd->ibpd; error: kfree(iwpd); free_res: irdma_free_rsrc(rf, rf->allocated_pds, pd_id); return ERR_PTR(err); } #endif #if __FreeBSD_version >= 1400026 void irdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct irdma_pd *iwpd = to_iwpd(ibpd); struct irdma_device *iwdev = to_iwdev(ibpd->device); irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_pds, iwpd->sc_pd.pd_id); } #endif #if __FreeBSD_version < 1400026 int irdma_dealloc_pd(struct ib_pd *ibpd) { struct irdma_pd *iwpd = to_iwpd(ibpd); struct irdma_device *iwdev = to_iwdev(ibpd->device); irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_pds, iwpd->sc_pd.pd_id); kfree(iwpd); return 0; } #endif /** * irdma_find_qp_update_qs - update QS handle for UD QPs * @rf: RDMA PCI function * @pd: protection domain object * @user_pri: selected user priority */ static void irdma_find_qp_update_qs(struct irdma_pci_f *rf, struct irdma_pd *pd, u8 user_pri) { struct irdma_qp *iwqp; struct list_head *tmp_node, *list_node; struct irdma_udqs_work *work; unsigned long flags; bool qs_change; spin_lock_irqsave(&pd->udqp_list_lock, flags); list_for_each_safe(list_node, tmp_node, &pd->udqp_list) { qs_change = true; iwqp = list_entry(list_node, struct irdma_qp, ud_list_elem); irdma_qp_add_ref(&iwqp->ibqp); /* check if qs_handle needs to be changed */ if (iwqp->sc_qp.qs_handle == iwqp->sc_qp.vsi->qos[user_pri].qs_handle) { if (iwqp->ctx_info.user_pri == user_pri) { /* qs_handle and user_pri don't change */ irdma_qp_rem_ref(&iwqp->ibqp); continue; } qs_change = false; } /* perform qp qos change */ work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) { irdma_qp_rem_ref(&iwqp->ibqp); spin_unlock_irqrestore(&pd->udqp_list_lock, flags); return; } work->iwqp = iwqp; work->user_prio = user_pri; work->qs_change = qs_change; INIT_WORK(&work->work, irdma_udqp_qs_worker); if (qs_change) irdma_cqp_qp_suspend_resume(&iwqp->sc_qp, IRDMA_OP_SUSPEND); queue_work(rf->iwdev->cleanup_wq, &work->work); } spin_unlock_irqrestore(&pd->udqp_list_lock, flags); } static void irdma_fill_ah_info(struct vnet *vnet, struct irdma_ah_info *ah_info, const struct ib_gid_attr *sgid_attr, - struct sockaddr *sgid_addr, struct sockaddr *dgid_addr, + union irdma_sockaddr *sgid_addr, + union irdma_sockaddr *dgid_addr, u8 *dmac, u8 net_type) { if (net_type == RDMA_NETWORK_IPV4) { ah_info->ipv4_valid = true; ah_info->dest_ip_addr[0] = - ntohl(((struct sockaddr_in *)dgid_addr)->sin_addr.s_addr); + ntohl(dgid_addr->saddr_in.sin_addr.s_addr); ah_info->src_ip_addr[0] = - ntohl(((struct sockaddr_in *)sgid_addr)->sin_addr.s_addr); + ntohl(sgid_addr->saddr_in.sin_addr.s_addr); CURVNET_SET_QUIET(vnet); ah_info->do_lpbk = irdma_ipv4_is_lpb(ah_info->src_ip_addr[0], ah_info->dest_ip_addr[0]); CURVNET_RESTORE(); - if (ipv4_is_multicast(((struct sockaddr_in *)dgid_addr)->sin_addr.s_addr)) { + if (ipv4_is_multicast(dgid_addr->saddr_in.sin_addr.s_addr)) { irdma_mcast_mac_v4(ah_info->dest_ip_addr, dmac); } } else { irdma_copy_ip_ntohl(ah_info->dest_ip_addr, - ((struct sockaddr_in6 *)dgid_addr)->sin6_addr.__u6_addr.__u6_addr32); + dgid_addr->saddr_in6.sin6_addr.__u6_addr.__u6_addr32); irdma_copy_ip_ntohl(ah_info->src_ip_addr, - ((struct sockaddr_in6 *)sgid_addr)->sin6_addr.__u6_addr.__u6_addr32); + sgid_addr->saddr_in6.sin6_addr.__u6_addr.__u6_addr32); ah_info->do_lpbk = irdma_ipv6_is_lpb(ah_info->src_ip_addr, ah_info->dest_ip_addr); - if (rdma_is_multicast_addr(&((struct sockaddr_in6 *)dgid_addr)->sin6_addr)) { + if (rdma_is_multicast_addr(&dgid_addr->saddr_in6.sin6_addr)) { irdma_mcast_mac_v6(ah_info->dest_ip_addr, dmac); } } } -static inline u8 irdma_get_vlan_ndev_prio(if_t ndev, u8 prio) +static inline u8 irdma_roce_get_vlan_prio(if_t ndev, u8 prio) { return prio; } static int irdma_create_ah_vlan_tag(struct irdma_device *iwdev, struct irdma_pd *pd, struct irdma_ah_info *ah_info, const struct ib_gid_attr *sgid_attr, u8 *dmac) { u16 vlan_prio; if (sgid_attr->ndev && is_vlan_dev(sgid_attr->ndev)) ah_info->vlan_tag = vlan_dev_vlan_id(sgid_attr->ndev); else ah_info->vlan_tag = VLAN_N_VID; ah_info->dst_arpindex = irdma_add_arp(iwdev->rf, ah_info->dest_ip_addr, dmac); if (ah_info->dst_arpindex == -1) return -EINVAL; if (ah_info->vlan_tag >= VLAN_N_VID && iwdev->dcb_vlan_mode) ah_info->vlan_tag = 0; if (ah_info->vlan_tag < VLAN_N_VID) { - if_t ndev = sgid_attr->ndev; - ah_info->insert_vlan_tag = true; - vlan_prio = (u16)irdma_get_vlan_ndev_prio(ndev, rt_tos2priority(ah_info->tc_tos)); + vlan_prio = (u16)irdma_roce_get_vlan_prio(sgid_attr->ndev, + rt_tos2priority(ah_info->tc_tos)); ah_info->vlan_tag |= vlan_prio << VLAN_PRIO_SHIFT; irdma_find_qp_update_qs(iwdev->rf, pd, vlan_prio); } if (iwdev->roce_dcqcn_en) { ah_info->tc_tos &= ~ECN_CODE_PT_MASK; ah_info->tc_tos |= ECN_CODE_PT_VAL; } return 0; } static int irdma_create_ah_wait(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, bool sleep) { if (!sleep) { int cnt = rf->sc_dev.hw_attrs.max_cqp_compl_wait_time_ms * CQP_TIMEOUT_THRESHOLD; do { irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq); mdelay(1); } while (!sc_ah->ah_info.ah_valid && --cnt); if (!cnt) return -ETIMEDOUT; } return 0; } #define IRDMA_CREATE_AH_MIN_RESP_LEN offsetofend(struct irdma_create_ah_resp, rsvd) #if __FreeBSD_version >= 1400026 /** * irdma_create_ah - create address handle * @ib_ah: ptr to AH * @attr: address handle attributes * @flags: AH flags to wait * @udata: user data * * returns 0 on success, error otherwise */ int irdma_create_ah(struct ib_ah *ib_ah, struct ib_ah_attr *attr, u32 flags, struct ib_udata *udata) { struct irdma_pd *pd = to_iwpd(ib_ah->pd); struct irdma_ah *ah = container_of(ib_ah, struct irdma_ah, ibah); struct irdma_device *iwdev = to_iwdev(ib_ah->pd->device); union ib_gid sgid; struct ib_gid_attr sgid_attr; struct irdma_pci_f *rf = iwdev->rf; struct irdma_sc_ah *sc_ah; u32 ah_id = 0; struct irdma_ah_info *ah_info; - struct irdma_create_ah_resp uresp; - union { - struct sockaddr saddr; - struct sockaddr_in saddr_in; - struct sockaddr_in6 saddr_in6; - } sgid_addr, dgid_addr; + struct irdma_create_ah_resp uresp = {}; + union irdma_sockaddr sgid_addr, dgid_addr; int err; - u8 dmac[ETH_ALEN]; + u8 dmac[ETHER_ADDR_LEN]; bool sleep = (flags & RDMA_CREATE_AH_SLEEPABLE) != 0; if (udata && udata->outlen < IRDMA_CREATE_AH_MIN_RESP_LEN) return -EINVAL; err = irdma_alloc_rsrc(rf, rf->allocated_ahs, rf->max_ah, &ah_id, &rf->next_ah); if (err) return err; ah->pd = pd; sc_ah = &ah->sc_ah; sc_ah->ah_info.ah_idx = ah_id; sc_ah->ah_info.vsi = &iwdev->vsi; irdma_sc_init_ah(&rf->sc_dev, sc_ah); ah->sgid_index = attr->grh.sgid_index; memcpy(&ah->dgid, &attr->grh.dgid, sizeof(ah->dgid)); rcu_read_lock(); err = ib_get_cached_gid(&iwdev->ibdev, attr->port_num, attr->grh.sgid_index, &sgid, &sgid_attr); rcu_read_unlock(); if (err) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "GID lookup at idx=%d with port=%d failed\n", attr->grh.sgid_index, attr->port_num); err = -EINVAL; goto err_gid_l2; } rdma_gid2ip((struct sockaddr *)&sgid_addr, &sgid); rdma_gid2ip((struct sockaddr *)&dgid_addr, &attr->grh.dgid); ah->av.attrs = *attr; - ah->av.net_type = kc_rdma_gid_attr_network_type(sgid_attr, - sgid_attr.gid_type, - &sgid); + ah->av.net_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); if (sgid_attr.ndev) dev_put(sgid_attr.ndev); - ah->av.sgid_addr.saddr = sgid_addr.saddr; - ah->av.dgid_addr.saddr = dgid_addr.saddr; ah_info = &sc_ah->ah_info; ah_info->ah_idx = ah_id; ah_info->pd_idx = pd->sc_pd.pd_id; ether_addr_copy(ah_info->mac_addr, if_getlladdr(iwdev->netdev)); if (attr->ah_flags & IB_AH_GRH) { ah_info->flow_label = attr->grh.flow_label; ah_info->hop_ttl = attr->grh.hop_limit; ah_info->tc_tos = attr->grh.traffic_class; } ether_addr_copy(dmac, attr->dmac); - irdma_fill_ah_info(if_getvnet(iwdev->netdev), ah_info, &sgid_attr, &sgid_addr.saddr, &dgid_addr.saddr, + irdma_fill_ah_info(if_getvnet(iwdev->netdev), ah_info, &sgid_attr, &sgid_addr, &dgid_addr, dmac, ah->av.net_type); err = irdma_create_ah_vlan_tag(iwdev, pd, ah_info, &sgid_attr, dmac); if (err) goto err_gid_l2; err = irdma_ah_cqp_op(iwdev->rf, sc_ah, IRDMA_OP_AH_CREATE, sleep, irdma_gsi_ud_qp_ah_cb, sc_ah); if (err) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DEV, "CQP-OP Create AH fail"); goto err_gid_l2; } err = irdma_create_ah_wait(rf, sc_ah, sleep); if (err) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DEV, "CQP create AH timed out"); goto err_gid_l2; } if (udata) { uresp.ah_id = ah->sc_ah.ah_info.ah_idx; - err = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + err = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen)); if (err) { irdma_ah_cqp_op(iwdev->rf, &ah->sc_ah, IRDMA_OP_AH_DESTROY, false, NULL, ah); goto err_gid_l2; } } return 0; err_gid_l2: irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs, ah_id); return err; } #endif void irdma_ether_copy(u8 *dmac, struct ib_ah_attr *attr) { ether_addr_copy(dmac, attr->dmac); } #if __FreeBSD_version < 1400026 struct ib_ah * irdma_create_ah_stub(struct ib_pd *ibpd, struct ib_ah_attr *attr, struct ib_udata *udata) #else int irdma_create_ah_stub(struct ib_ah *ib_ah, struct ib_ah_attr *attr, u32 flags, struct ib_udata *udata) #endif { #if __FreeBSD_version >= 1400026 return -ENOSYS; #else return ERR_PTR(-ENOSYS); #endif } #if __FreeBSD_version >= 1400026 void irdma_destroy_ah_stub(struct ib_ah *ibah, u32 flags) { return; } #else int irdma_destroy_ah_stub(struct ib_ah *ibah) { return -ENOSYS; } #endif #if __FreeBSD_version < 1400026 /** * irdma_create_ah - create address handle * @ibpd: ptr to pd * @attr: address handle attributes * @udata: user data * * returns a pointer to an address handle */ struct ib_ah * irdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, struct ib_udata *udata) { struct irdma_pd *pd = to_iwpd(ibpd); struct irdma_device *iwdev = to_iwdev(ibpd->device); struct irdma_ah *ah; union ib_gid sgid; struct ib_gid_attr sgid_attr; struct irdma_pci_f *rf = iwdev->rf; struct irdma_sc_ah *sc_ah; u32 ah_id = 0; struct irdma_ah_info *ah_info; - struct irdma_create_ah_resp uresp; - union { - struct sockaddr saddr; - struct sockaddr_in saddr_in; - struct sockaddr_in6 saddr_in6; - } sgid_addr, dgid_addr; + struct irdma_create_ah_resp uresp = {}; + union irdma_sockaddr sgid_addr, dgid_addr; int err; - u8 dmac[ETH_ALEN]; + u8 dmac[ETHER_ADDR_LEN]; bool sleep = udata ? true : false; if (udata && udata->outlen < IRDMA_CREATE_AH_MIN_RESP_LEN) return ERR_PTR(-EINVAL); err = irdma_alloc_rsrc(rf, rf->allocated_ahs, rf->max_ah, &ah_id, &rf->next_ah); if (err) return ERR_PTR(err); ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) { irdma_free_rsrc(rf, rf->allocated_ahs, ah_id); return ERR_PTR(-ENOMEM); } ah->pd = pd; sc_ah = &ah->sc_ah; sc_ah->ah_info.ah_idx = ah_id; sc_ah->ah_info.vsi = &iwdev->vsi; irdma_sc_init_ah(&rf->sc_dev, sc_ah); ah->sgid_index = attr->grh.sgid_index; memcpy(&ah->dgid, &attr->grh.dgid, sizeof(ah->dgid)); rcu_read_lock(); err = ib_get_cached_gid(&iwdev->ibdev, attr->port_num, attr->grh.sgid_index, &sgid, &sgid_attr); rcu_read_unlock(); if (err) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "GID lookup at idx=%d with port=%d failed\n", attr->grh.sgid_index, attr->port_num); err = -EINVAL; goto err_gid_l2; } rdma_gid2ip((struct sockaddr *)&sgid_addr, &sgid); rdma_gid2ip((struct sockaddr *)&dgid_addr, &attr->grh.dgid); ah->av.attrs = *attr; - ah->av.net_type = kc_rdma_gid_attr_network_type(sgid_attr, - sgid_attr.gid_type, - &sgid); + ah->av.net_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); if (sgid_attr.ndev) dev_put(sgid_attr.ndev); - ah->av.sgid_addr.saddr = sgid_addr.saddr; - ah->av.dgid_addr.saddr = dgid_addr.saddr; ah_info = &sc_ah->ah_info; ah_info->ah_idx = ah_id; ah_info->pd_idx = pd->sc_pd.pd_id; ether_addr_copy(ah_info->mac_addr, if_getlladdr(iwdev->netdev)); if (attr->ah_flags & IB_AH_GRH) { ah_info->flow_label = attr->grh.flow_label; ah_info->hop_ttl = attr->grh.hop_limit; ah_info->tc_tos = attr->grh.traffic_class; } if (udata) ib_resolve_eth_dmac(ibpd->device, attr); irdma_ether_copy(dmac, attr); - irdma_fill_ah_info(if_getvnet(iwdev->netdev), ah_info, &sgid_attr, &sgid_addr.saddr, &dgid_addr.saddr, + irdma_fill_ah_info(if_getvnet(iwdev->netdev), ah_info, &sgid_attr, &sgid_addr, &dgid_addr, dmac, ah->av.net_type); err = irdma_create_ah_vlan_tag(iwdev, pd, ah_info, &sgid_attr, dmac); if (err) goto err_gid_l2; err = irdma_ah_cqp_op(iwdev->rf, sc_ah, IRDMA_OP_AH_CREATE, sleep, irdma_gsi_ud_qp_ah_cb, sc_ah); if (err) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "CQP-OP Create AH fail"); goto err_gid_l2; } err = irdma_create_ah_wait(rf, sc_ah, sleep); if (err) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DEV, "CQP create AH timed out"); goto err_gid_l2; } if (udata) { uresp.ah_id = ah->sc_ah.ah_info.ah_idx; - err = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + err = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen)); if (err) { irdma_ah_cqp_op(iwdev->rf, &ah->sc_ah, IRDMA_OP_AH_DESTROY, false, NULL, ah); goto err_gid_l2; } } return &ah->ibah; err_gid_l2: kfree(ah); irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs, ah_id); return ERR_PTR(err); } #endif /** * irdma_free_qp_rsrc - free up memory resources for qp * @iwqp: qp ptr (user or kernel) */ void irdma_free_qp_rsrc(struct irdma_qp *iwqp) { struct irdma_device *iwdev = iwqp->iwdev; struct irdma_pci_f *rf = iwdev->rf; u32 qp_num = iwqp->ibqp.qp_num; irdma_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp); irdma_dealloc_push_page(rf, &iwqp->sc_qp); if (iwqp->sc_qp.vsi) { irdma_qp_rem_qos(&iwqp->sc_qp); iwqp->sc_qp.dev->ws_remove(iwqp->sc_qp.vsi, iwqp->sc_qp.user_pri); } if (qp_num > 2) irdma_free_rsrc(rf, rf->allocated_qps, qp_num); irdma_free_dma_mem(rf->sc_dev.hw, &iwqp->q2_ctx_mem); irdma_free_dma_mem(rf->sc_dev.hw, &iwqp->kqp.dma_mem); kfree(iwqp->kqp.sig_trk_mem); iwqp->kqp.sig_trk_mem = NULL; kfree(iwqp->kqp.sq_wrid_mem); kfree(iwqp->kqp.rq_wrid_mem); kfree(iwqp->sg_list); kfree(iwqp); } /** * irdma_create_qp - create qp * @ibpd: ptr of pd * @init_attr: attributes for qp * @udata: user data for create qp */ struct ib_qp * irdma_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { #define IRDMA_CREATE_QP_MIN_REQ_LEN offsetofend(struct irdma_create_qp_req, user_compl_ctx) #define IRDMA_CREATE_QP_MIN_RESP_LEN offsetofend(struct irdma_create_qp_resp, rsvd) struct irdma_pd *iwpd = to_iwpd(ibpd); struct irdma_device *iwdev = to_iwdev(ibpd->device); struct irdma_pci_f *rf = iwdev->rf; struct irdma_qp *iwqp; struct irdma_create_qp_resp uresp = {0}; u32 qp_num = 0; int ret; int err_code; struct irdma_sc_qp *qp; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs; struct irdma_qp_init_info init_info = {{0}}; struct irdma_qp_host_ctx_info *ctx_info; unsigned long flags; err_code = irdma_validate_qp_attrs(init_attr, iwdev); if (err_code) return ERR_PTR(err_code); if (udata && (udata->inlen < IRDMA_CREATE_QP_MIN_REQ_LEN || udata->outlen < IRDMA_CREATE_QP_MIN_RESP_LEN)) return ERR_PTR(-EINVAL); init_info.vsi = &iwdev->vsi; init_info.qp_uk_init_info.uk_attrs = uk_attrs; init_info.qp_uk_init_info.sq_size = init_attr->cap.max_send_wr; init_info.qp_uk_init_info.rq_size = init_attr->cap.max_recv_wr; init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge; init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge; init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data; iwqp = kzalloc(sizeof(*iwqp), GFP_KERNEL); if (!iwqp) return ERR_PTR(-ENOMEM); iwqp->sg_list = kcalloc(uk_attrs->max_hw_wq_frags, sizeof(*iwqp->sg_list), GFP_KERNEL); if (!iwqp->sg_list) { kfree(iwqp); return ERR_PTR(-ENOMEM); } qp = &iwqp->sc_qp; qp->qp_uk.back_qp = iwqp; qp->qp_uk.lock = &iwqp->lock; qp->push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX; iwqp->iwdev = iwdev; iwqp->q2_ctx_mem.size = IRDMA_Q2_BUF_SIZE + IRDMA_QP_CTX_SIZE; iwqp->q2_ctx_mem.va = irdma_allocate_dma_mem(dev->hw, &iwqp->q2_ctx_mem, iwqp->q2_ctx_mem.size, 256); if (!iwqp->q2_ctx_mem.va) { kfree(iwqp->sg_list); kfree(iwqp); return ERR_PTR(-ENOMEM); } init_info.q2 = iwqp->q2_ctx_mem.va; init_info.q2_pa = iwqp->q2_ctx_mem.pa; init_info.host_ctx = (__le64 *) (init_info.q2 + IRDMA_Q2_BUF_SIZE); init_info.host_ctx_pa = init_info.q2_pa + IRDMA_Q2_BUF_SIZE; if (init_attr->qp_type == IB_QPT_GSI) qp_num = 1; else err_code = irdma_alloc_rsrc(rf, rf->allocated_qps, rf->max_qp, &qp_num, &rf->next_qp); if (err_code) goto error; iwqp->iwpd = iwpd; iwqp->ibqp.qp_num = qp_num; qp = &iwqp->sc_qp; iwqp->iwscq = to_iwcq(init_attr->send_cq); iwqp->iwrcq = to_iwcq(init_attr->recv_cq); iwqp->host_ctx.va = init_info.host_ctx; iwqp->host_ctx.pa = init_info.host_ctx_pa; iwqp->host_ctx.size = IRDMA_QP_CTX_SIZE; init_info.pd = &iwpd->sc_pd; init_info.qp_uk_init_info.qp_id = iwqp->ibqp.qp_num; if (!rdma_protocol_roce(&iwdev->ibdev, 1)) init_info.qp_uk_init_info.first_sq_wq = 1; iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp; init_waitqueue_head(&iwqp->waitq); init_waitqueue_head(&iwqp->mod_qp_waitq); + spin_lock_init(&iwqp->dwork_flush_lock); + if (udata) { init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver; err_code = irdma_setup_umode_qp(udata, iwdev, iwqp, &init_info, init_attr); } else { INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker); init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER; err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr); } if (err_code) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "setup qp failed\n"); goto error; } if (rdma_protocol_roce(&iwdev->ibdev, 1)) { if (init_attr->qp_type == IB_QPT_RC) { init_info.qp_uk_init_info.type = IRDMA_QP_TYPE_ROCE_RC; init_info.qp_uk_init_info.qp_caps = IRDMA_SEND_WITH_IMM | IRDMA_WRITE_WITH_IMM | IRDMA_ROCE; } else { init_info.qp_uk_init_info.type = IRDMA_QP_TYPE_ROCE_UD; init_info.qp_uk_init_info.qp_caps = IRDMA_SEND_WITH_IMM | IRDMA_ROCE; } } else { init_info.qp_uk_init_info.type = IRDMA_QP_TYPE_IWARP; init_info.qp_uk_init_info.qp_caps = IRDMA_WRITE_WITH_IMM; } ret = irdma_sc_qp_init(qp, &init_info); if (ret) { err_code = -EPROTO; irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "qp_init fail\n"); goto error; } ctx_info = &iwqp->ctx_info; ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id; ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id; if (rdma_protocol_roce(&iwdev->ibdev, 1)) irdma_roce_fill_and_set_qpctx_info(iwqp, ctx_info); else irdma_iw_fill_and_set_qpctx_info(iwqp, ctx_info); err_code = irdma_cqp_create_qp_cmd(iwqp); if (err_code) goto error; atomic_set(&iwqp->refcnt, 1); spin_lock_init(&iwqp->lock); spin_lock_init(&iwqp->sc_qp.pfpdu.lock); iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; rf->qp_table[qp_num] = iwqp; if (rdma_protocol_roce(&iwdev->ibdev, 1)) { if (dev->ws_add(&iwdev->vsi, 0)) { irdma_cqp_qp_destroy_cmd(&rf->sc_dev, &iwqp->sc_qp); err_code = -EINVAL; goto error; } irdma_qp_add_qos(&iwqp->sc_qp); spin_lock_irqsave(&iwpd->udqp_list_lock, flags); if (iwqp->sc_qp.qp_uk.qp_type == IRDMA_QP_TYPE_ROCE_UD) list_add_tail(&iwqp->ud_list_elem, &iwpd->udqp_list); spin_unlock_irqrestore(&iwpd->udqp_list_lock, flags); } if (udata) { /* GEN_1 legacy support with libi40iw does not have expanded uresp struct */ if (udata->outlen < sizeof(uresp)) { uresp.lsmm = 1; uresp.push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX_GEN_1; } else { if (rdma_protocol_iwarp(&iwdev->ibdev, 1)) uresp.lsmm = 1; } uresp.actual_sq_size = init_info.qp_uk_init_info.sq_size; uresp.actual_rq_size = init_info.qp_uk_init_info.rq_size; uresp.qp_id = qp_num; uresp.qp_caps = qp->qp_uk.qp_caps; err_code = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen)); if (err_code) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "copy_to_udata failed\n"); kc_irdma_destroy_qp(&iwqp->ibqp, udata); return ERR_PTR(err_code); } } init_completion(&iwqp->free_qp); return &iwqp->ibqp; error: irdma_free_qp_rsrc(iwqp); return ERR_PTR(err_code); } /** * irdma_destroy_qp - destroy qp * @ibqp: qp's ib pointer also to get to device's qp address * @udata: user data */ #if __FreeBSD_version >= 1400026 int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) #else int irdma_destroy_qp(struct ib_qp *ibqp) #endif { struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_device *iwdev = iwqp->iwdev; unsigned long flags; if (iwqp->sc_qp.qp_uk.destroy_pending) goto free_rsrc; iwqp->sc_qp.qp_uk.destroy_pending = true; spin_lock_irqsave(&iwqp->iwpd->udqp_list_lock, flags); if (iwqp->sc_qp.qp_uk.qp_type == IRDMA_QP_TYPE_ROCE_UD) list_del(&iwqp->ud_list_elem); spin_unlock_irqrestore(&iwqp->iwpd->udqp_list_lock, flags); - if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS) + if (iwqp->iwarp_state >= IRDMA_QP_STATE_IDLE) irdma_modify_qp_to_err(&iwqp->sc_qp); - irdma_qp_rem_ref(&iwqp->ibqp); - wait_for_completion(&iwqp->free_qp); - irdma_free_lsmm_rsrc(iwqp); - if (!iwdev->rf->reset && irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp)) - return (iwdev->rf->rdma_ver <= IRDMA_GEN_2 && !iwqp->user_mode) ? 0 : -ENOTRECOVERABLE; -free_rsrc: if (!iwqp->user_mode) { if (iwqp->iwscq) { irdma_clean_cqes(iwqp, iwqp->iwscq); if (iwqp->iwrcq != iwqp->iwscq) irdma_clean_cqes(iwqp, iwqp->iwrcq); } } + irdma_qp_rem_ref(&iwqp->ibqp); + wait_for_completion(&iwqp->free_qp); + irdma_free_lsmm_rsrc(iwqp); + if (!iwdev->rf->reset && irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp)) + return (iwdev->rf->rdma_ver <= IRDMA_GEN_2 && !iwqp->user_mode) ? 0 : -ENOTRECOVERABLE; +free_rsrc: irdma_remove_push_mmap_entries(iwqp); irdma_free_qp_rsrc(iwqp); return 0; } /** * irdma_create_cq - create cq * @ibcq: CQ allocated * @attr: attributes for cq * @udata: user data */ #if __FreeBSD_version >= 1400026 int irdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) #else struct ib_cq * irdma_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata) #endif { #define IRDMA_CREATE_CQ_MIN_REQ_LEN offsetofend(struct irdma_create_cq_req, user_cq_buf) #define IRDMA_CREATE_CQ_MIN_RESP_LEN offsetofend(struct irdma_create_cq_resp, cq_size) #if __FreeBSD_version >= 1400026 struct ib_device *ibdev = ibcq->device; #endif struct irdma_device *iwdev = to_iwdev(ibdev); struct irdma_pci_f *rf = iwdev->rf; #if __FreeBSD_version >= 1400026 struct irdma_cq *iwcq = to_iwcq(ibcq); #else struct irdma_cq *iwcq; #endif u32 cq_num = 0; struct irdma_sc_cq *cq; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_cq_init_info info = {0}; int status; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_cq_uk_init_info *ukinfo = &info.cq_uk_init_info; unsigned long flags; int err_code; int entries = attr->cqe; bool cqe_64byte_ena; #if __FreeBSD_version >= 1400026 err_code = cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev); if (err_code) return err_code; if (udata && (udata->inlen < IRDMA_CREATE_CQ_MIN_REQ_LEN || udata->outlen < IRDMA_CREATE_CQ_MIN_RESP_LEN)) return -EINVAL; #else err_code = cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev); if (err_code) return ERR_PTR(err_code); if (udata && (udata->inlen < IRDMA_CREATE_CQ_MIN_REQ_LEN || udata->outlen < IRDMA_CREATE_CQ_MIN_RESP_LEN)) return ERR_PTR(-EINVAL); iwcq = kzalloc(sizeof(*iwcq), GFP_KERNEL); if (!iwcq) return ERR_PTR(-ENOMEM); #endif err_code = irdma_alloc_rsrc(rf, rf->allocated_cqs, rf->max_cq, &cq_num, &rf->next_cq); if (err_code) #if __FreeBSD_version >= 1400026 return err_code; #else goto error; #endif cq = &iwcq->sc_cq; cq->back_cq = iwcq; atomic_set(&iwcq->refcnt, 1); spin_lock_init(&iwcq->lock); INIT_LIST_HEAD(&iwcq->resize_list); INIT_LIST_HEAD(&iwcq->cmpl_generated); info.dev = dev; ukinfo->cq_size = max(entries, 4); ukinfo->cq_id = cq_num; cqe_64byte_ena = (dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_64_BYTE_CQE) ? true : false; ukinfo->avoid_mem_cflct = cqe_64byte_ena; iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size; atomic_set(&iwcq->armed, 0); if (attr->comp_vector < rf->ceqs_count) info.ceq_id = attr->comp_vector; info.ceq_id_valid = true; info.ceqe_mask = 1; info.type = IRDMA_CQ_TYPE_IWARP; info.vsi = &iwdev->vsi; if (udata) { struct irdma_ucontext *ucontext; struct irdma_create_cq_req req = {0}; struct irdma_cq_mr *cqmr; struct irdma_pbl *iwpbl; struct irdma_pbl *iwpbl_shadow; struct irdma_cq_mr *cqmr_shadow; iwcq->user_mode = true; #if __FreeBSD_version >= 1400026 ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); #else ucontext = to_ucontext(context); #endif if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) { err_code = -EFAULT; goto cq_free_rsrc; } spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); iwpbl = irdma_get_pbl((unsigned long)req.user_cq_buf, &ucontext->cq_reg_mem_list); spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); if (!iwpbl) { err_code = -EPROTO; goto cq_free_rsrc; } iwcq->iwpbl = iwpbl; iwcq->cq_mem_size = 0; cqmr = &iwpbl->cq_mr; if (rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_CQ_RESIZE && !ucontext->legacy_mode) { spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); iwpbl_shadow = irdma_get_pbl((unsigned long)req.user_shadow_area, &ucontext->cq_reg_mem_list); spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); if (!iwpbl_shadow) { err_code = -EPROTO; goto cq_free_rsrc; } iwcq->iwpbl_shadow = iwpbl_shadow; cqmr_shadow = &iwpbl_shadow->cq_mr; info.shadow_area_pa = cqmr_shadow->cq_pbl.addr; cqmr->split = true; } else { info.shadow_area_pa = cqmr->shadow; } if (iwpbl->pbl_allocated) { info.virtual_map = true; info.pbl_chunk_size = 1; info.first_pm_pbl_idx = cqmr->cq_pbl.idx; } else { info.cq_base_pa = cqmr->cq_pbl.addr; } } else { /* Kmode allocations */ int rsize; if (entries < 1 || entries > rf->max_cqe) { err_code = -EINVAL; goto cq_free_rsrc; } entries++; if (!cqe_64byte_ena && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) entries *= 2; ukinfo->cq_size = entries; if (cqe_64byte_ena) rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_extended_cqe); else rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe); iwcq->kmem.size = round_up(rsize, IRDMA_HW_PAGE_SIZE); iwcq->kmem.va = irdma_allocate_dma_mem(dev->hw, &iwcq->kmem, iwcq->kmem.size, IRDMA_HW_PAGE_SIZE); if (!iwcq->kmem.va) { err_code = -ENOMEM; goto cq_free_rsrc; } iwcq->kmem_shadow.size = IRDMA_SHADOW_AREA_SIZE << 3; iwcq->kmem_shadow.va = irdma_allocate_dma_mem(dev->hw, &iwcq->kmem_shadow, iwcq->kmem_shadow.size, 64); if (!iwcq->kmem_shadow.va) { err_code = -ENOMEM; - goto cq_free_rsrc; + goto cq_kmem_free; } info.shadow_area_pa = iwcq->kmem_shadow.pa; ukinfo->shadow_area = iwcq->kmem_shadow.va; ukinfo->cq_base = iwcq->kmem.va; info.cq_base_pa = iwcq->kmem.pa; } - if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) - info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2, - (u32)IRDMA_MAX_CQ_READ_THRESH); + info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2, + (u32)IRDMA_MAX_CQ_READ_THRESH); if (irdma_sc_cq_init(cq, &info)) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "init cq fail\n"); err_code = -EPROTO; - goto cq_free_rsrc; + goto cq_kmem_free; } cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) { err_code = -ENOMEM; - goto cq_free_rsrc; + goto cq_kmem_free; } cqp_info = &cqp_request->info; cqp_info->cqp_cmd = IRDMA_OP_CQ_CREATE; cqp_info->post_sq = 1; cqp_info->in.u.cq_create.cq = cq; cqp_info->in.u.cq_create.check_overflow = true; cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); if (status) { err_code = -ENOMEM; - goto cq_free_rsrc; + goto cq_kmem_free; } if (udata) { struct irdma_create_cq_resp resp = {0}; resp.cq_id = info.cq_uk_init_info.cq_id; resp.cq_size = info.cq_uk_init_info.cq_size; if (ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen))) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "copy to user data\n"); err_code = -EPROTO; goto cq_destroy; } } rf->cq_table[cq_num] = iwcq; init_completion(&iwcq->free_cq); #if __FreeBSD_version >= 1400026 return 0; #else return &iwcq->ibcq; #endif cq_destroy: irdma_cq_wq_destroy(rf, cq); +cq_kmem_free: + if (!iwcq->user_mode) { + irdma_free_dma_mem(dev->hw, &iwcq->kmem); + irdma_free_dma_mem(dev->hw, &iwcq->kmem_shadow); + } cq_free_rsrc: - irdma_cq_free_rsrc(rf, iwcq); + irdma_free_rsrc(rf, rf->allocated_cqs, cq_num); #if __FreeBSD_version >= 1400026 return err_code; #else error: kfree(iwcq); return ERR_PTR(err_code); #endif } /** * irdma_copy_user_pgaddrs - copy user page address to pble's os locally * @iwmr: iwmr for IB's user page addresses * @pbl: ple pointer to save 1 level or 0 level pble * @level: indicated level 0, 1 or 2 */ void irdma_copy_user_pgaddrs(struct irdma_mr *iwmr, u64 *pbl, enum irdma_pble_level level) { struct ib_umem *region = iwmr->region; struct irdma_pbl *iwpbl = &iwmr->iwpbl; int chunk_pages, entry, i; struct scatterlist *sg; u64 pg_addr = 0; struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; struct irdma_pble_info *pinfo; u32 idx = 0; u32 pbl_cnt = 0; pinfo = (level == PBLE_LEVEL_1) ? NULL : palloc->level2.leaf; for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) { chunk_pages = DIV_ROUND_UP(sg_dma_len(sg), iwmr->page_size); if (iwmr->type == IRDMA_MEMREG_TYPE_QP && !iwpbl->qp_mr.sq_page) iwpbl->qp_mr.sq_page = sg_page(sg); for (i = 0; i < chunk_pages; i++) { pg_addr = sg_dma_address(sg) + (i * iwmr->page_size); if ((entry + i) == 0) *pbl = pg_addr & iwmr->page_msk; else if (!(pg_addr & ~iwmr->page_msk)) *pbl = pg_addr; else continue; if (++pbl_cnt == palloc->total_cnt) break; pbl = irdma_next_pbl_addr(pbl, &pinfo, &idx); } } } /** * irdma_destroy_ah - Destroy address handle * @ibah: pointer to address handle * @ah_flags: destroy flags */ #if __FreeBSD_version >= 1400026 void irdma_destroy_ah(struct ib_ah *ibah, u32 ah_flags) { struct irdma_device *iwdev = to_iwdev(ibah->device); struct irdma_ah *ah = to_iwah(ibah); irdma_ah_cqp_op(iwdev->rf, &ah->sc_ah, IRDMA_OP_AH_DESTROY, false, NULL, ah); irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs, ah->sc_ah.ah_info.ah_idx); } #endif #if __FreeBSD_version < 1400026 int irdma_destroy_ah(struct ib_ah *ibah) { struct irdma_device *iwdev = to_iwdev(ibah->device); struct irdma_ah *ah = to_iwah(ibah); irdma_ah_cqp_op(iwdev->rf, &ah->sc_ah, IRDMA_OP_AH_DESTROY, false, NULL, ah); irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs, ah->sc_ah.ah_info.ah_idx); kfree(ah); return 0; } #endif #if __FreeBSD_version >= 1400026 int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) #else int irdma_dereg_mr(struct ib_mr *ib_mr) #endif { struct irdma_mr *iwmr = to_iwmr(ib_mr); struct irdma_device *iwdev = to_iwdev(ib_mr->device); struct irdma_pbl *iwpbl = &iwmr->iwpbl; int ret; if (iwmr->type != IRDMA_MEMREG_TYPE_MEM) { if (iwmr->region) { struct irdma_ucontext *ucontext; #if __FreeBSD_version >= 1400026 ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); #else struct ib_pd *ibpd = ib_mr->pd; ucontext = to_ucontext(ibpd->uobject->context); #endif irdma_del_memlist(iwmr, ucontext); } goto done; } ret = irdma_hwdereg_mr(ib_mr); if (ret) return ret; irdma_free_stag(iwdev, iwmr->stag); done: if (iwpbl->pbl_allocated) irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc); if (iwmr->region) ib_umem_release(iwmr->region); kfree(iwmr); return 0; } /* * irdma_rereg_user_mr - Re-Register a user memory region @ibmr: ib mem to access iwarp mr pointer @flags: bit mask to * indicate which of the attr's of MR modified @start: virtual start address @len: length of mr @virt: virtual address * @new access flags: bit mask of access flags @new_pd: ptr of pd @udata: user data */ int irdma_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 len, u64 virt, int new_access, struct ib_pd *new_pd, struct ib_udata *udata) { struct irdma_device *iwdev = to_iwdev(ib_mr->device); struct irdma_mr *iwmr = to_iwmr(ib_mr); struct irdma_pbl *iwpbl = &iwmr->iwpbl; int ret; if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) return -EINVAL; if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) return -EOPNOTSUPP; ret = irdma_hwdereg_mr(ib_mr); if (ret) return ret; if (flags & IB_MR_REREG_ACCESS) iwmr->access = new_access; if (flags & IB_MR_REREG_PD) { iwmr->ibmr.pd = new_pd; iwmr->ibmr.device = new_pd->device; } if (flags & IB_MR_REREG_TRANS) { if (iwpbl->pbl_allocated) { irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc); iwpbl->pbl_allocated = false; } if (iwmr->region) { ib_umem_release(iwmr->region); iwmr->region = NULL; } ib_mr = irdma_rereg_mr_trans(iwmr, start, len, virt, udata); if (IS_ERR(ib_mr)) return PTR_ERR(ib_mr); } else { ret = irdma_hwreg_mr(iwdev, iwmr, iwmr->access); if (ret) return ret; } return 0; } int kc_irdma_set_roce_cm_info(struct irdma_qp *iwqp, struct ib_qp_attr *attr, u16 *vlan_id) { int ret; union ib_gid sgid; struct ib_gid_attr sgid_attr; struct irdma_av *av = &iwqp->roce_ah.av; ret = ib_get_cached_gid(iwqp->ibqp.device, attr->ah_attr.port_num, attr->ah_attr.grh.sgid_index, &sgid, &sgid_attr); if (ret) return ret; if (sgid_attr.ndev) { *vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); ether_addr_copy(iwqp->ctx_info.roce_info->mac_addr, if_getlladdr(sgid_attr.ndev)); } - av->net_type = kc_rdma_gid_attr_network_type(sgid_attr, - sgid_attr.gid_type, - &sgid); + av->net_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid); dev_put(sgid_attr.ndev); iwqp->sc_qp.user_pri = iwqp->ctx_info.user_pri; return 0; } #if __FreeBSD_version >= 1400026 /** * irdma_destroy_cq - destroy cq * @ib_cq: cq pointer * @udata: user data */ void irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct irdma_device *iwdev = to_iwdev(ib_cq->device); struct irdma_cq *iwcq = to_iwcq(ib_cq); struct irdma_sc_cq *cq = &iwcq->sc_cq; struct irdma_sc_dev *dev = cq->dev; struct irdma_sc_ceq *ceq = dev->ceq[cq->ceq_id]; struct irdma_ceq *iwceq = container_of(ceq, struct irdma_ceq, sc_ceq); unsigned long flags; spin_lock_irqsave(&iwcq->lock, flags); if (!list_empty(&iwcq->cmpl_generated)) irdma_remove_cmpls_list(iwcq); if (!list_empty(&iwcq->resize_list)) irdma_process_resize_list(iwcq, iwdev, NULL); spin_unlock_irqrestore(&iwcq->lock, flags); irdma_cq_rem_ref(ib_cq); wait_for_completion(&iwcq->free_cq); irdma_cq_wq_destroy(iwdev->rf, cq); spin_lock_irqsave(&iwceq->ce_lock, flags); irdma_sc_cleanup_ceqes(cq, ceq); spin_unlock_irqrestore(&iwceq->ce_lock, flags); irdma_cq_free_rsrc(iwdev->rf, iwcq); } #endif #if __FreeBSD_version < 1400026 /** * irdma_destroy_cq - destroy cq * @ib_cq: cq pointer */ int irdma_destroy_cq(struct ib_cq *ib_cq) { struct irdma_device *iwdev = to_iwdev(ib_cq->device); struct irdma_cq *iwcq = to_iwcq(ib_cq); struct irdma_sc_cq *cq = &iwcq->sc_cq; struct irdma_sc_dev *dev = cq->dev; struct irdma_sc_ceq *ceq = dev->ceq[cq->ceq_id]; struct irdma_ceq *iwceq = container_of(ceq, struct irdma_ceq, sc_ceq); unsigned long flags; spin_lock_irqsave(&iwcq->lock, flags); if (!list_empty(&iwcq->cmpl_generated)) irdma_remove_cmpls_list(iwcq); if (!list_empty(&iwcq->resize_list)) irdma_process_resize_list(iwcq, iwdev, NULL); spin_unlock_irqrestore(&iwcq->lock, flags); irdma_cq_rem_ref(ib_cq); wait_for_completion(&iwcq->free_cq); irdma_cq_wq_destroy(iwdev->rf, cq); spin_lock_irqsave(&iwceq->ce_lock, flags); irdma_sc_cleanup_ceqes(cq, ceq); spin_unlock_irqrestore(&iwceq->ce_lock, flags); irdma_cq_free_rsrc(iwdev->rf, iwcq); kfree(iwcq); return 0; } #endif /** * irdma_alloc_mw - Allocate memory window * @pd: Protection domain * @type: Window type * @udata: user data pointer */ struct ib_mw * irdma_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata) { struct irdma_device *iwdev = to_iwdev(pd->device); struct irdma_mr *iwmr; int err_code; u32 stag; + if (type != IB_MW_TYPE_1 && type != IB_MW_TYPE_2) + return ERR_PTR(-EINVAL); + iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); if (!iwmr) return ERR_PTR(-ENOMEM); stag = irdma_create_stag(iwdev); if (!stag) { kfree(iwmr); return ERR_PTR(-ENOMEM); } iwmr->stag = stag; iwmr->ibmw.rkey = stag; iwmr->ibmw.pd = pd; iwmr->ibmw.type = type; iwmr->ibmw.device = pd->device; err_code = irdma_hw_alloc_mw(iwdev, iwmr); if (err_code) { irdma_free_stag(iwdev, stag); kfree(iwmr); return ERR_PTR(err_code); } return &iwmr->ibmw; } /** * kc_set_loc_seq_num_mss - Set local seq number and mss * @cm_node: cm node info */ void kc_set_loc_seq_num_mss(struct irdma_cm_node *cm_node) { struct timespec ts; getnanotime(&ts); cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec; if (cm_node->iwdev->vsi.mtu > 1500 && 2 * cm_node->iwdev->vsi.mtu > cm_node->iwdev->rcv_wnd) cm_node->tcp_cntxt.mss = (cm_node->ipv4) ? (1500 - IRDMA_MTU_TO_MSS_IPV4) : (1500 - IRDMA_MTU_TO_MSS_IPV6); else cm_node->tcp_cntxt.mss = (cm_node->ipv4) ? (cm_node->iwdev->vsi.mtu - IRDMA_MTU_TO_MSS_IPV4) : (cm_node->iwdev->vsi.mtu - IRDMA_MTU_TO_MSS_IPV6); } #if __FreeBSD_version < 1400026 struct irdma_vma_data { struct list_head list; struct vm_area_struct *vma; struct mutex *vma_list_mutex; /* protect the vma_list */ }; /** * irdma_vma_open - * @vma: User VMA */ static void irdma_vma_open(struct vm_area_struct *vma) { vma->vm_ops = NULL; } /** * irdma_vma_close - Remove vma data from vma list * @vma: User VMA */ static void irdma_vma_close(struct vm_area_struct *vma) { struct irdma_vma_data *vma_data; vma_data = vma->vm_private_data; vma->vm_private_data = NULL; vma_data->vma = NULL; mutex_lock(vma_data->vma_list_mutex); list_del(&vma_data->list); mutex_unlock(vma_data->vma_list_mutex); kfree(vma_data); } static const struct vm_operations_struct irdma_vm_ops = { .open = irdma_vma_open, .close = irdma_vma_close }; /** * irdma_set_vma_data - Save vma data in context list * @vma: User VMA * @context: ib user context */ static int irdma_set_vma_data(struct vm_area_struct *vma, struct irdma_ucontext *context) { struct list_head *vma_head = &context->vma_list; struct irdma_vma_data *vma_entry; vma_entry = kzalloc(sizeof(*vma_entry), GFP_KERNEL); if (!vma_entry) return -ENOMEM; vma->vm_private_data = vma_entry; vma->vm_ops = &irdma_vm_ops; vma_entry->vma = vma; vma_entry->vma_list_mutex = &context->vma_list_mutex; mutex_lock(&context->vma_list_mutex); list_add(&vma_entry->list, vma_head); mutex_unlock(&context->vma_list_mutex); return 0; } /** * irdma_disassociate_ucontext - Disassociate user context * @context: ib user context */ void irdma_disassociate_ucontext(struct ib_ucontext *context) { struct irdma_ucontext *ucontext = to_ucontext(context); struct irdma_vma_data *vma_data, *n; struct vm_area_struct *vma; mutex_lock(&ucontext->vma_list_mutex); list_for_each_entry_safe(vma_data, n, &ucontext->vma_list, list) { vma = vma_data->vma; zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE); vma->vm_ops = NULL; list_del(&vma_data->list); kfree(vma_data); } mutex_unlock(&ucontext->vma_list_mutex); } int rdma_user_mmap_io(struct ib_ucontext *context, struct vm_area_struct *vma, unsigned long pfn, unsigned long size, pgprot_t prot) { if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) return -EAGAIN; return irdma_set_vma_data(vma, to_ucontext(context)); } #else /** * irdma_disassociate_ucontext - Disassociate user context * @context: ib user context */ void irdma_disassociate_ucontext(struct ib_ucontext *context) { } #endif struct ib_device * ib_device_get_by_netdev(if_t netdev, int driver_id) { struct irdma_device *iwdev; struct irdma_handler *hdl; unsigned long flags; spin_lock_irqsave(&irdma_handler_lock, flags); list_for_each_entry(hdl, &irdma_handlers, list) { iwdev = hdl->iwdev; if (netdev == iwdev->netdev) { spin_unlock_irqrestore(&irdma_handler_lock, flags); return &iwdev->ibdev; } } spin_unlock_irqrestore(&irdma_handler_lock, flags); return NULL; } void ib_unregister_device_put(struct ib_device *device) { ib_unregister_device(device); } /** * irdma_query_gid_roce - Query port GID for Roce * @ibdev: device pointer from stack * @port: port number * @index: Entry index * @gid: Global ID */ int irdma_query_gid_roce(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { int ret; ret = rdma_query_gid(ibdev, port, index, gid); if (ret == -EAGAIN) { memcpy(gid, &zgid, sizeof(*gid)); return 0; } return ret; } /** * irdma_modify_port - modify port attributes * @ibdev: device pointer from stack * @port: port number for query * @mask: Property mask * @props: returning device attributes */ int irdma_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { if (port > 1) return -EINVAL; return 0; } /** * irdma_query_pkey - Query partition key * @ibdev: device pointer from stack * @port: port number * @index: index of pkey * @pkey: pointer to store the pkey */ int irdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { if (index >= IRDMA_PKEY_TBL_SZ) return -EINVAL; *pkey = IRDMA_DEFAULT_PKEY; return 0; } int irdma_roce_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { struct ib_port_attr attr; int err; immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; err = ib_query_port(ibdev, port_num, &attr); if (err) return err; immutable->max_mad_size = IB_MGMT_MAD_SIZE; immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; return 0; } int irdma_iw_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { struct ib_port_attr attr; int err; immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; err = ib_query_port(ibdev, port_num, &attr); if (err) return err; immutable->gid_tbl_len = 1; return 0; } /** * irdma_query_port - get port attributes * @ibdev: device pointer from stack * @port: port number for query * @props: returning device attributes */ int irdma_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { struct irdma_device *iwdev = to_iwdev(ibdev); if_t netdev = iwdev->netdev; /* no need to zero out pros here. done by caller */ props->max_mtu = IB_MTU_4096; props->active_mtu = ib_mtu_int_to_enum(if_getmtu(netdev)); props->lid = 1; props->lmc = 0; props->sm_lid = 0; props->sm_sl = 0; if ((if_getlinkstate(netdev) == LINK_STATE_UP) && (if_getdrvflags(netdev) & IFF_DRV_RUNNING)) { props->state = IB_PORT_ACTIVE; props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; } else { props->state = IB_PORT_DOWN; props->phys_state = IB_PORT_PHYS_STATE_DISABLED; } ib_get_eth_speed(ibdev, port, &props->active_speed, &props->active_width); if (rdma_protocol_roce(ibdev, 1)) { props->gid_tbl_len = 32; - kc_set_props_ip_gid_caps(props); + props->port_cap_flags |= IB_PORT_IP_BASED_GIDS; props->pkey_tbl_len = IRDMA_PKEY_TBL_SZ; } else { props->gid_tbl_len = 1; } props->qkey_viol_cntr = 0; props->port_cap_flags |= IB_PORT_CM_SUP | IB_PORT_REINIT_SUP; props->max_msg_sz = iwdev->rf->sc_dev.hw_attrs.max_hw_outbound_msg_size; return 0; } static const char *const irdma_hw_stat_names[] = { /* gen1 - 32-bit */ [IRDMA_HW_STAT_INDEX_IP4RXDISCARD] = "ip4InDiscards", [IRDMA_HW_STAT_INDEX_IP4RXTRUNC] = "ip4InTruncatedPkts", [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE] = "ip4OutNoRoutes", [IRDMA_HW_STAT_INDEX_IP6RXDISCARD] = "ip6InDiscards", [IRDMA_HW_STAT_INDEX_IP6RXTRUNC] = "ip6InTruncatedPkts", [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE] = "ip6OutNoRoutes", [IRDMA_HW_STAT_INDEX_RXVLANERR] = "rxVlanErrors", /* gen1 - 64-bit */ [IRDMA_HW_STAT_INDEX_IP4RXOCTS] = "ip4InOctets", [IRDMA_HW_STAT_INDEX_IP4RXPKTS] = "ip4InPkts", [IRDMA_HW_STAT_INDEX_IP4RXFRAGS] = "ip4InReasmRqd", [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS] = "ip4InMcastPkts", [IRDMA_HW_STAT_INDEX_IP4TXOCTS] = "ip4OutOctets", [IRDMA_HW_STAT_INDEX_IP4TXPKTS] = "ip4OutPkts", [IRDMA_HW_STAT_INDEX_IP4TXFRAGS] = "ip4OutSegRqd", [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS] = "ip4OutMcastPkts", [IRDMA_HW_STAT_INDEX_IP6RXOCTS] = "ip6InOctets", [IRDMA_HW_STAT_INDEX_IP6RXPKTS] = "ip6InPkts", [IRDMA_HW_STAT_INDEX_IP6RXFRAGS] = "ip6InReasmRqd", [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS] = "ip6InMcastPkts", [IRDMA_HW_STAT_INDEX_IP6TXOCTS] = "ip6OutOctets", [IRDMA_HW_STAT_INDEX_IP6TXPKTS] = "ip6OutPkts", [IRDMA_HW_STAT_INDEX_IP6TXFRAGS] = "ip6OutSegRqd", [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS] = "ip6OutMcastPkts", [IRDMA_HW_STAT_INDEX_RDMARXRDS] = "InRdmaReads", [IRDMA_HW_STAT_INDEX_RDMARXSNDS] = "InRdmaSends", [IRDMA_HW_STAT_INDEX_RDMARXWRS] = "InRdmaWrites", [IRDMA_HW_STAT_INDEX_RDMATXRDS] = "OutRdmaReads", [IRDMA_HW_STAT_INDEX_RDMATXSNDS] = "OutRdmaSends", [IRDMA_HW_STAT_INDEX_RDMATXWRS] = "OutRdmaWrites", [IRDMA_HW_STAT_INDEX_RDMAVBND] = "RdmaBnd", [IRDMA_HW_STAT_INDEX_RDMAVINV] = "RdmaInv", /* gen2 - 32-bit */ [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED] = "cnpHandled", [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED] = "cnpIgnored", [IRDMA_HW_STAT_INDEX_TXNPCNPSENT] = "cnpSent", /* gen2 - 64-bit */ [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS] = "ip4InMcastOctets", [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS] = "ip4OutMcastOctets", [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS] = "ip6InMcastOctets", [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS] = "ip6OutMcastOctets", [IRDMA_HW_STAT_INDEX_UDPRXPKTS] = "RxUDP", [IRDMA_HW_STAT_INDEX_UDPTXPKTS] = "TxUDP", [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS] = "RxECNMrkd", [IRDMA_HW_STAT_INDEX_TCPRTXSEG] = "RetransSegs", [IRDMA_HW_STAT_INDEX_TCPRXOPTERR] = "InOptErrors", [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR] = "InProtoErrors", [IRDMA_HW_STAT_INDEX_TCPRXSEGS] = "InSegs", [IRDMA_HW_STAT_INDEX_TCPTXSEG] = "OutSegs", }; /** * irdma_alloc_hw_stats - Allocate a hw stats structure * @ibdev: device pointer from stack * @port_num: port number */ struct rdma_hw_stats * irdma_alloc_hw_stats(struct ib_device *ibdev, u8 port_num) { struct irdma_device *iwdev = to_iwdev(ibdev); struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; int num_counters = dev->hw_attrs.max_stat_idx; unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN; return rdma_alloc_hw_stats_struct(irdma_hw_stat_names, num_counters, lifespan); } /** * irdma_get_hw_stats - Populates the rdma_hw_stats structure * @ibdev: device pointer from stack * @stats: stats pointer from stack * @port_num: port number * @index: which hw counter the stack is requesting we update */ int irdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u8 port_num, int index) { struct irdma_device *iwdev = to_iwdev(ibdev); struct irdma_dev_hw_stats *hw_stats = &iwdev->vsi.pestat->hw_stats; if (iwdev->rf->rdma_ver >= IRDMA_GEN_2) irdma_cqp_gather_stats_cmd(&iwdev->rf->sc_dev, iwdev->vsi.pestat, true); memcpy(&stats->value[0], hw_stats, sizeof(u64)* stats->num_counters); return stats->num_counters; } /** * irdma_query_gid - Query port GID * @ibdev: device pointer from stack * @port: port number * @index: Entry index * @gid: Global ID */ int irdma_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { struct irdma_device *iwdev = to_iwdev(ibdev); memset(gid->raw, 0, sizeof(gid->raw)); ether_addr_copy(gid->raw, if_getlladdr(iwdev->netdev)); return 0; } enum rdma_link_layer irdma_get_link_layer(struct ib_device *ibdev, u8 port_num) { return IB_LINK_LAYER_ETHERNET; } inline enum ib_mtu ib_mtu_int_to_enum(int mtu) { if (mtu >= 4096) return IB_MTU_4096; else if (mtu >= 2048) return IB_MTU_2048; else if (mtu >= 1024) return IB_MTU_1024; else if (mtu >= 512) return IB_MTU_512; else return IB_MTU_256; } inline void kc_set_roce_uverbs_cmd_mask(struct irdma_device *iwdev) { iwdev->ibdev.uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST); } inline void kc_set_rdma_uverbs_cmd_mask(struct irdma_device *iwdev) { iwdev->ibdev.uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) | BIT_ULL(IB_USER_VERBS_CMD_REG_MR) | BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_MW) | BIT_ULL(IB_USER_VERBS_CMD_BIND_MW) | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND); iwdev->ibdev.uverbs_ex_cmd_mask = BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_QP) | BIT_ULL(IB_USER_VERBS_EX_CMD_QUERY_DEVICE); if (iwdev->rf->rdma_ver >= IRDMA_GEN_2) iwdev->ibdev.uverbs_ex_cmd_mask |= BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_CQ); } int ib_get_eth_speed(struct ib_device *ibdev, u32 port_num, u8 *speed, u8 *width) { if_t netdev = ibdev->get_netdev(ibdev, port_num); u32 netdev_speed; if (!netdev) return -ENODEV; netdev_speed = if_getbaudrate(netdev); dev_put(netdev); if (netdev_speed <= SPEED_1000) { *width = IB_WIDTH_1X; *speed = IB_SPEED_SDR; } else if (netdev_speed <= SPEED_10000) { *width = IB_WIDTH_1X; *speed = IB_SPEED_FDR10; } else if (netdev_speed <= SPEED_20000) { *width = IB_WIDTH_4X; *speed = IB_SPEED_DDR; } else if (netdev_speed <= SPEED_25000) { *width = IB_WIDTH_1X; *speed = IB_SPEED_EDR; } else if (netdev_speed <= SPEED_40000) { *width = IB_WIDTH_4X; *speed = IB_SPEED_FDR10; } else { *width = IB_WIDTH_4X; *speed = IB_SPEED_EDR; } return 0; } + +u64 +irdma_mac_to_u64(const u8 *eth_add) +{ + int idx; + u64 u64_eth_add; + + for (idx = 0, u64_eth_add = 0; idx < ETHER_ADDR_LEN; idx++) + u64_eth_add = u64_eth_add << 8 | eth_add[idx]; + + return u64_eth_add; +} diff --git a/sys/dev/irdma/irdma_main.h b/sys/dev/irdma/irdma_main.h index 6a56050bc808..25c3fcfc939e 100644 --- a/sys/dev/irdma/irdma_main.h +++ b/sys/dev/irdma/irdma_main.h @@ -1,607 +1,613 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef IRDMA_MAIN_H #define IRDMA_MAIN_H #include #include #include #include #include #include #include #include #if __FreeBSD_version >= 1400000 #include #endif #include #include #include #include #include #include #include #include #include "osdep.h" #include "irdma_defs.h" #include "irdma_hmc.h" #include "irdma_type.h" #include "irdma_ws.h" #include "irdma_protos.h" #include "irdma_pble.h" #include "irdma_cm.h" #include "fbsd_kcompat.h" #include "irdma-abi.h" #include "irdma_verbs.h" #include "irdma_user.h" #include "irdma_puda.h" extern struct list_head irdma_handlers; extern spinlock_t irdma_handler_lock; extern bool irdma_upload_context; #define IRDMA_FW_VER_DEFAULT 2 #define IRDMA_HW_VER 2 #define IRDMA_ARP_ADD 1 #define IRDMA_ARP_DELETE 2 #define IRDMA_ARP_RESOLVE 3 #define IRDMA_MACIP_ADD 1 #define IRDMA_MACIP_DELETE 2 #define IW_CCQ_SIZE (IRDMA_CQP_SW_SQSIZE_2048 + 1) #define IW_CEQ_SIZE 2048 #define IW_AEQ_SIZE 2048 #define RX_BUF_SIZE (1536 + 8) #define IW_REG0_SIZE (4 * 1024) #define IW_TX_TIMEOUT (6 * HZ) #define IW_FIRST_QPN 1 #define IW_SW_CONTEXT_ALIGN 1024 #define MAX_DPC_ITERATIONS 128 #define IRDMA_EVENT_TIMEOUT_MS 5000 #define IRDMA_VCHNL_EVENT_TIMEOUT_MS 10000 #define IRDMA_RST_TIMEOUT_HZ 4 #define IRDMA_NO_QSET 0xffff #define IW_CFG_FPM_QP_COUNT 32768 #define IRDMA_MAX_PAGES_PER_FMR 262144 #define IRDMA_MIN_PAGES_PER_FMR 1 #define IRDMA_CQP_COMPL_RQ_WQE_FLUSHED 2 #define IRDMA_CQP_COMPL_SQ_WQE_FLUSHED 3 #define IRDMA_Q_TYPE_PE_AEQ 0x80 #define IRDMA_Q_INVALID_IDX 0xffff #define IRDMA_REM_ENDPOINT_TRK_QPID 3 #define IRDMA_DRV_OPT_ENA_MPA_VER_0 0x00000001 #define IRDMA_DRV_OPT_DISABLE_MPA_CRC 0x00000002 #define IRDMA_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004 #define IRDMA_DRV_OPT_DISABLE_INTF 0x00000008 #define IRDMA_DRV_OPT_ENA_MSI 0x00000010 #define IRDMA_DRV_OPT_DUAL_LOGICAL_PORT 0x00000020 #define IRDMA_DRV_OPT_NO_INLINE_DATA 0x00000080 #define IRDMA_DRV_OPT_DISABLE_INT_MOD 0x00000100 #define IRDMA_DRV_OPT_DISABLE_VIRT_WQ 0x00000200 #define IRDMA_DRV_OPT_ENA_PAU 0x00000400 #define IRDMA_DRV_OPT_MCAST_LOGPORT_MAP 0x00000800 #define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types) #define IRDMA_ROCE_CWND_DEFAULT 0x400 #define IRDMA_ROCE_ACKCREDS_DEFAULT 0x1E #define IRDMA_FLUSH_SQ BIT(0) #define IRDMA_FLUSH_RQ BIT(1) #define IRDMA_REFLUSH BIT(2) #define IRDMA_FLUSH_WAIT BIT(3) #define IRDMA_IRQ_NAME_STR_LEN 64 enum init_completion_state { INVALID_STATE = 0, INITIAL_STATE, CQP_CREATED, HMC_OBJS_CREATED, HW_RSRC_INITIALIZED, CCQ_CREATED, CEQ0_CREATED, /* Last state of probe */ ILQ_CREATED, IEQ_CREATED, REM_ENDPOINT_TRK_CREATED, CEQS_CREATED, PBLE_CHUNK_MEM, AEQ_CREATED, IP_ADDR_REGISTERED, /* Last state of open */ }; +struct ae_desc { + u16 id; + const char *desc; +}; + struct irdma_rsrc_limits { u32 qplimit; u32 mrlimit; u32 cqlimit; }; struct irdma_cqp_err_info { u16 maj; u16 min; const char *desc; }; struct irdma_cqp_compl_info { u32 op_ret_val; u16 maj_err_code; u16 min_err_code; bool error; u8 op_code; }; struct irdma_cqp_request { struct cqp_cmds_info info; wait_queue_head_t waitq; struct list_head list; atomic_t refcnt; void (*callback_fcn)(struct irdma_cqp_request *cqp_request); void *param; struct irdma_cqp_compl_info compl_info; + bool request_done; /* READ/WRITE_ONCE macros operate on it */ bool waiting:1; - bool request_done:1; bool dynamic:1; }; struct irdma_cqp { struct irdma_sc_cqp sc_cqp; spinlock_t req_lock; /* protect CQP request list */ spinlock_t compl_lock; /* protect CQP completion processing */ wait_queue_head_t waitq; wait_queue_head_t remove_wq; struct irdma_dma_mem sq; struct irdma_dma_mem host_ctx; u64 *scratch_array; struct irdma_cqp_request *cqp_requests; struct list_head cqp_avail_reqs; struct list_head cqp_pending_reqs; }; struct irdma_ccq { struct irdma_sc_cq sc_cq; struct irdma_dma_mem mem_cq; struct irdma_dma_mem shadow_area; }; struct irdma_ceq { struct irdma_sc_ceq sc_ceq; struct irdma_dma_mem mem; u32 irq; u32 msix_idx; struct irdma_pci_f *rf; struct tasklet_struct dpc_tasklet; spinlock_t ce_lock; /* sync cq destroy with cq completion event notification */ }; struct irdma_aeq { struct irdma_sc_aeq sc_aeq; struct irdma_dma_mem mem; struct irdma_pble_alloc palloc; bool virtual_map; }; struct irdma_arp_entry { u32 ip_addr[4]; - u8 mac_addr[ETH_ALEN]; + u8 mac_addr[ETHER_ADDR_LEN]; }; struct irdma_msix_vector { u32 idx; u32 irq; u32 cpu_affinity; u32 ceq_id; char name[IRDMA_IRQ_NAME_STR_LEN]; struct resource *res; void *tag; }; struct irdma_mc_table_info { u32 mgn; u32 dest_ip[4]; bool lan_fwd:1; bool ipv4_valid:1; }; struct mc_table_list { struct list_head list; struct irdma_mc_table_info mc_info; struct irdma_mcast_grp_info mc_grp_ctx; }; struct irdma_qv_info { u32 v_idx; /* msix_vector */ u16 ceq_idx; u16 aeq_idx; u8 itr_idx; }; struct irdma_qvlist_info { u32 num_vectors; struct irdma_qv_info qv_info[1]; }; struct irdma_gen_ops { void (*request_reset)(struct irdma_pci_f *rf); int (*register_qset)(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node); void (*unregister_qset)(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node); }; struct irdma_pci_f { bool reset:1; bool rsrc_created:1; bool msix_shared:1; bool ftype:1; u8 rsrc_profile; u8 *hmc_info_mem; u8 *mem_rsrc; u8 rdma_ver; u8 rst_to; /* Not used in SRIOV VF mode */ u8 pf_id; enum irdma_protocol_used protocol_used; bool en_rem_endpoint_trk:1; bool dcqcn_ena:1; u32 sd_type; u32 msix_count; u32 max_mr; u32 max_qp; u32 max_cq; u32 max_ah; u32 next_ah; u32 max_mcg; u32 next_mcg; u32 max_pd; u32 next_qp; u32 next_cq; u32 next_pd; u32 max_mr_size; u32 max_cqe; u32 mr_stagmask; u32 used_pds; u32 used_cqs; u32 used_mrs; u32 used_qps; u32 arp_table_size; u32 next_arp_index; u32 ceqs_count; u32 next_ws_node_id; u32 max_ws_node_id; u32 limits_sel; unsigned long *allocated_ws_nodes; unsigned long *allocated_qps; unsigned long *allocated_cqs; unsigned long *allocated_mrs; unsigned long *allocated_pds; unsigned long *allocated_mcgs; unsigned long *allocated_ahs; unsigned long *allocated_arps; enum init_completion_state init_state; struct irdma_sc_dev sc_dev; struct irdma_dev_ctx dev_ctx; struct irdma_tunable_info tun_info; eventhandler_tag irdma_ifaddr_event; struct irdma_handler *hdl; struct pci_dev *pcidev; struct ice_rdma_peer *peer_info; struct irdma_hw hw; struct irdma_cqp cqp; struct irdma_ccq ccq; struct irdma_aeq aeq; struct irdma_ceq *ceqlist; struct irdma_hmc_pble_rsrc *pble_rsrc; struct irdma_arp_entry *arp_table; spinlock_t arp_lock; /*protect ARP table access*/ spinlock_t rsrc_lock; /* protect HW resource array access */ spinlock_t qptable_lock; /*protect QP table access*/ spinlock_t cqtable_lock; /*protect CQ table access*/ struct irdma_qp **qp_table; struct irdma_cq **cq_table; spinlock_t qh_list_lock; /* protect mc_qht_list */ struct mc_table_list mc_qht_list; struct irdma_msix_vector *iw_msixtbl; struct irdma_qvlist_info *iw_qvlist; struct tasklet_struct dpc_tasklet; struct msix_entry msix_info; struct irdma_dma_mem obj_mem; struct irdma_dma_mem obj_next; atomic_t vchnl_msgs; wait_queue_head_t vchnl_waitq; struct workqueue_struct *cqp_cmpl_wq; struct work_struct cqp_cmpl_work; struct irdma_sc_vsi default_vsi; void *back_fcn; struct irdma_gen_ops gen_ops; void (*check_fc)(struct irdma_sc_vsi *vsi, struct irdma_sc_qp *sc_qp); struct irdma_dcqcn_cc_params dcqcn_params; struct irdma_device *iwdev; }; struct irdma_device { struct ib_device ibdev; struct irdma_pci_f *rf; if_t netdev; struct notifier_block nb_netdevice_event; struct irdma_handler *hdl; struct workqueue_struct *cleanup_wq; struct irdma_sc_vsi vsi; struct irdma_cm_core cm_core; u32 roce_cwnd; u32 roce_ackcreds; u32 vendor_id; u32 vendor_part_id; u32 push_mode; u32 rcv_wnd; u16 mac_ip_table_idx; u16 vsi_num; u8 rcv_wscale; u8 iw_status; u8 roce_rtomin; u8 rd_fence_rate; bool override_rcv_wnd:1; bool override_cwnd:1; bool override_ackcreds:1; bool override_ooo:1; bool override_rd_fence_rate:1; bool override_rtomin:1; bool roce_mode:1; bool roce_dcqcn_en:1; bool dcb_vlan_mode:1; bool iw_ooo:1; enum init_completion_state init_state; wait_queue_head_t suspend_wq; }; struct irdma_handler { struct list_head list; struct irdma_device *iwdev; struct task deferred_task; struct taskqueue *deferred_tq; bool shared_res_created; }; static inline struct irdma_device *to_iwdev(struct ib_device *ibdev) { return container_of(ibdev, struct irdma_device, ibdev); } static inline struct irdma_ucontext *to_ucontext(struct ib_ucontext *ibucontext) { return container_of(ibucontext, struct irdma_ucontext, ibucontext); } #if __FreeBSD_version >= 1400026 static inline struct irdma_user_mmap_entry * to_irdma_mmap_entry(struct rdma_user_mmap_entry *rdma_entry) { return container_of(rdma_entry, struct irdma_user_mmap_entry, rdma_entry); } #endif static inline struct irdma_pd *to_iwpd(struct ib_pd *ibpd) { return container_of(ibpd, struct irdma_pd, ibpd); } static inline struct irdma_ah *to_iwah(struct ib_ah *ibah) { return container_of(ibah, struct irdma_ah, ibah); } static inline struct irdma_mr *to_iwmr(struct ib_mr *ibmr) { return container_of(ibmr, struct irdma_mr, ibmr); } static inline struct irdma_mr *to_iwmw(struct ib_mw *ibmw) { return container_of(ibmw, struct irdma_mr, ibmw); } static inline struct irdma_cq *to_iwcq(struct ib_cq *ibcq) { return container_of(ibcq, struct irdma_cq, ibcq); } static inline struct irdma_qp *to_iwqp(struct ib_qp *ibqp) { return container_of(ibqp, struct irdma_qp, ibqp); } static inline struct irdma_pci_f *dev_to_rf(struct irdma_sc_dev *dev) { return container_of(dev, struct irdma_pci_f, sc_dev); } /** * irdma_alloc_resource - allocate a resource * @iwdev: device pointer * @resource_array: resource bit array: * @max_resources: maximum resource number * @req_resources_num: Allocated resource number * @next: next free id **/ static inline int irdma_alloc_rsrc(struct irdma_pci_f *rf, unsigned long *rsrc_array, u32 max_rsrc, u32 *req_rsrc_num, u32 *next) { u32 rsrc_num; unsigned long flags; spin_lock_irqsave(&rf->rsrc_lock, flags); rsrc_num = find_next_zero_bit(rsrc_array, max_rsrc, *next); if (rsrc_num >= max_rsrc) { rsrc_num = find_first_zero_bit(rsrc_array, max_rsrc); if (rsrc_num >= max_rsrc) { spin_unlock_irqrestore(&rf->rsrc_lock, flags); irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR, "resource [%d] allocation failed\n", rsrc_num); return -EOVERFLOW; } } __set_bit(rsrc_num, rsrc_array); *next = rsrc_num + 1; if (*next == max_rsrc) *next = 0; *req_rsrc_num = rsrc_num; spin_unlock_irqrestore(&rf->rsrc_lock, flags); return 0; } /** * irdma_free_resource - free a resource * @iwdev: device pointer * @resource_array: resource array for the resource_num * @resource_num: resource number to free **/ static inline void irdma_free_rsrc(struct irdma_pci_f *rf, unsigned long *rsrc_array, u32 rsrc_num) { unsigned long flags; spin_lock_irqsave(&rf->rsrc_lock, flags); __clear_bit(rsrc_num, rsrc_array); spin_unlock_irqrestore(&rf->rsrc_lock, flags); } int irdma_ctrl_init_hw(struct irdma_pci_f *rf); void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf); int irdma_rt_init_hw(struct irdma_device *iwdev, struct irdma_l2params *l2params); void irdma_rt_deinit_hw(struct irdma_device *iwdev); void irdma_qp_add_ref(struct ib_qp *ibqp); void irdma_qp_rem_ref(struct ib_qp *ibqp); void irdma_free_lsmm_rsrc(struct irdma_qp *iwqp); struct ib_qp *irdma_get_qp(struct ib_device *ibdev, int qpn); void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask); void irdma_manage_arp_cache(struct irdma_pci_f *rf, const unsigned char *mac_addr, u32 *ip_addr, u32 action); struct irdma_apbvt_entry *irdma_add_apbvt(struct irdma_device *iwdev, u16 port); void irdma_del_apbvt(struct irdma_device *iwdev, struct irdma_apbvt_entry *entry); struct irdma_cqp_request *irdma_alloc_and_get_cqp_request(struct irdma_cqp *cqp, bool wait); void irdma_free_cqp_request(struct irdma_cqp *cqp, struct irdma_cqp_request *cqp_request); void irdma_put_cqp_request(struct irdma_cqp *cqp, struct irdma_cqp_request *cqp_request); int irdma_alloc_local_mac_entry(struct irdma_pci_f *rf, u16 *mac_tbl_idx); int irdma_add_local_mac_entry(struct irdma_pci_f *rf, const u8 *mac_addr, u16 idx); void irdma_del_local_mac_entry(struct irdma_pci_f *rf, u16 idx); +const char *irdma_get_ae_desc(u16 ae_id); u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf); void irdma_port_ibevent(struct irdma_device *iwdev); void irdma_cm_disconn(struct irdma_qp *qp); bool irdma_cqp_crit_err(struct irdma_sc_dev *dev, u8 cqp_cmd, u16 maj_err_code, u16 min_err_code); int irdma_handle_cqp_op(struct irdma_pci_f *rf, struct irdma_cqp_request *cqp_request); int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); void irdma_cq_add_ref(struct ib_cq *ibcq); void irdma_cq_rem_ref(struct ib_cq *ibcq); void irdma_cq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_cq *cq); void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf); int irdma_hw_modify_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, struct irdma_modify_qp_info *info, bool wait); int irdma_qp_suspend_resume(struct irdma_sc_qp *qp, bool suspend); int irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo, enum irdma_quad_entry_type etype, enum irdma_quad_hash_manage_type mtype, void *cmnode, bool wait); void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf); void irdma_free_sqbuf(struct irdma_sc_vsi *vsi, void *bufp); void irdma_free_qp_rsrc(struct irdma_qp *iwqp); int irdma_setup_cm_core(struct irdma_device *iwdev, u8 ver); void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core); void irdma_next_iw_state(struct irdma_qp *iwqp, u8 state, u8 del_hash, u8 term, u8 term_len); int irdma_send_syn(struct irdma_cm_node *cm_node, u32 sendack); int irdma_send_reset(struct irdma_cm_node *cm_node); struct irdma_cm_node *irdma_find_node(struct irdma_cm_core *cm_core, u16 rem_port, u32 *rem_addr, u16 loc_port, u32 *loc_addr, u16 vlan_id); int irdma_hw_flush_wqes(struct irdma_pci_f *rf, struct irdma_sc_qp *qp, struct irdma_qp_flush_info *info, bool wait); void irdma_gen_ae(struct irdma_pci_f *rf, struct irdma_sc_qp *qp, struct irdma_gen_ae_info *info, bool wait); void irdma_copy_ip_ntohl(u32 *dst, __be32 *src); void irdma_copy_ip_htonl(__be32 *dst, u32 *src); u16 irdma_get_vlan_ipv4(u32 *addr); if_t irdma_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac); struct ib_mr *irdma_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size, int acc, u64 *iova_start); int irdma_upload_qp_context(struct irdma_qp *iwqp, bool freeze, bool raw); void irdma_del_hmc_objects(struct irdma_sc_dev *dev, struct irdma_hmc_info *hmc_info, bool privileged, bool reset, enum irdma_vers vers); void irdma_cqp_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq); int irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd, bool wait, void (*callback_fcn)(struct irdma_cqp_request *cqp_request), void *cb_param); void irdma_gsi_ud_qp_ah_cb(struct irdma_cqp_request *cqp_request); void irdma_udqp_qs_worker(struct work_struct *work); bool irdma_cq_empty(struct irdma_cq *iwcq); int irdma_netdevice_event(struct notifier_block *notifier, unsigned long event, void *ptr); void irdma_unregister_notifiers(struct irdma_device *iwdev); int irdma_register_notifiers(struct irdma_device *iwdev); void irdma_set_rf_user_cfg_params(struct irdma_pci_f *rf); void irdma_add_ip(struct irdma_device *iwdev); void irdma_add_handler(struct irdma_handler *hdl); void irdma_del_handler(struct irdma_handler *hdl); void cqp_compl_worker(struct work_struct *work); void irdma_cleanup_dead_qps(struct irdma_sc_vsi *vsi); #endif /* IRDMA_MAIN_H */ diff --git a/sys/dev/irdma/irdma_pble.c b/sys/dev/irdma/irdma_pble.c index 4ab998bb3e90..aaf9d8917622 100644 --- a/sys/dev/irdma/irdma_pble.c +++ b/sys/dev/irdma/irdma_pble.c @@ -1,556 +1,556 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "osdep.h" #include "irdma_hmc.h" #include "irdma_defs.h" #include "irdma_type.h" #include "irdma_protos.h" #include "irdma_pble.h" static int add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc); /** * irdma_destroy_pble_prm - destroy prm during module unload * @pble_rsrc: pble resources */ void irdma_destroy_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc) { struct irdma_chunk *chunk; struct irdma_pble_prm *pinfo = &pble_rsrc->pinfo; while (!list_empty(&pinfo->clist)) { chunk = (struct irdma_chunk *)(&pinfo->clist)->next; list_del(&chunk->list); if (chunk->type == PBLE_SD_PAGED) irdma_pble_free_paged_mem(chunk); - if (chunk->bitmapbuf) - irdma_prm_rem_bitmapmem(pble_rsrc->dev->hw, chunk); + bitmap_free(chunk->bitmapbuf); kfree(chunk->chunkmem.va); } spin_lock_destroy(&pinfo->prm_lock); mutex_destroy(&pble_rsrc->pble_mutex_lock); } /** * irdma_hmc_init_pble - Initialize pble resources during module load * @dev: irdma_sc_dev struct * @pble_rsrc: pble resources */ int irdma_hmc_init_pble(struct irdma_sc_dev *dev, struct irdma_hmc_pble_rsrc *pble_rsrc) { struct irdma_hmc_info *hmc_info; u32 fpm_idx = 0; int status = 0; hmc_info = dev->hmc_info; pble_rsrc->dev = dev; pble_rsrc->fpm_base_addr = hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].base; /* Start pble' on 4k boundary */ if (pble_rsrc->fpm_base_addr & 0xfff) fpm_idx = (4096 - (pble_rsrc->fpm_base_addr & 0xfff)) >> 3; pble_rsrc->unallocated_pble = hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt - fpm_idx; pble_rsrc->next_fpm_addr = pble_rsrc->fpm_base_addr + (fpm_idx << 3); pble_rsrc->pinfo.pble_shift = PBLE_SHIFT; mutex_init(&pble_rsrc->pble_mutex_lock); spin_lock_init(&pble_rsrc->pinfo.prm_lock); INIT_LIST_HEAD(&pble_rsrc->pinfo.clist); if (add_pble_prm(pble_rsrc)) { irdma_destroy_pble_prm(pble_rsrc); status = -ENOMEM; } return status; } /** * get_sd_pd_idx - Returns sd index, pd index and rel_pd_idx from fpm address * @pble_rsrc: structure containing fpm address * @idx: where to return indexes */ static void get_sd_pd_idx(struct irdma_hmc_pble_rsrc *pble_rsrc, struct sd_pd_idx *idx) { idx->sd_idx = (u32)pble_rsrc->next_fpm_addr / IRDMA_HMC_DIRECT_BP_SIZE; idx->pd_idx = (u32)(pble_rsrc->next_fpm_addr / IRDMA_HMC_PAGED_BP_SIZE); idx->rel_pd_idx = (idx->pd_idx % IRDMA_HMC_PD_CNT_IN_SD); } /** * add_sd_direct - add sd direct for pble * @pble_rsrc: pble resource ptr * @info: page info for sd */ static int add_sd_direct(struct irdma_hmc_pble_rsrc *pble_rsrc, struct irdma_add_page_info *info) { struct irdma_sc_dev *dev = pble_rsrc->dev; int ret_code = 0; struct sd_pd_idx *idx = &info->idx; struct irdma_chunk *chunk = info->chunk; struct irdma_hmc_info *hmc_info = info->hmc_info; struct irdma_hmc_sd_entry *sd_entry = info->sd_entry; u32 offset = 0; if (!sd_entry->valid) { ret_code = irdma_add_sd_table_entry(dev->hw, hmc_info, info->idx.sd_idx, IRDMA_SD_TYPE_DIRECT, IRDMA_HMC_DIRECT_BP_SIZE); if (ret_code) return ret_code; chunk->type = PBLE_SD_CONTIGOUS; } offset = idx->rel_pd_idx << HMC_PAGED_BP_SHIFT; chunk->size = info->pages << HMC_PAGED_BP_SHIFT; chunk->vaddr = (u8 *)sd_entry->u.bp.addr.va + offset; chunk->fpm_addr = pble_rsrc->next_fpm_addr; irdma_debug(dev, IRDMA_DEBUG_PBLE, "chunk_size[%ld] = 0x%lx vaddr=0x%p fpm_addr = %lx\n", chunk->size, chunk->size, chunk->vaddr, chunk->fpm_addr); return 0; } /** * fpm_to_idx - given fpm address, get pble index * @pble_rsrc: pble resource management * @addr: fpm address for index */ static u32 fpm_to_idx(struct irdma_hmc_pble_rsrc *pble_rsrc, u64 addr){ u64 idx; idx = (addr - (pble_rsrc->fpm_base_addr)) >> 3; return (u32)idx; } /** * add_bp_pages - add backing pages for sd * @pble_rsrc: pble resource management * @info: page info for sd */ static int add_bp_pages(struct irdma_hmc_pble_rsrc *pble_rsrc, struct irdma_add_page_info *info) { struct irdma_sc_dev *dev = pble_rsrc->dev; u8 *addr; struct irdma_dma_mem mem; struct irdma_hmc_pd_entry *pd_entry; struct irdma_hmc_sd_entry *sd_entry = info->sd_entry; struct irdma_hmc_info *hmc_info = info->hmc_info; struct irdma_chunk *chunk = info->chunk; int status = 0; u32 rel_pd_idx = info->idx.rel_pd_idx; u32 pd_idx = info->idx.pd_idx; u32 i; if (irdma_pble_get_paged_mem(chunk, info->pages)) return -ENOMEM; status = irdma_add_sd_table_entry(dev->hw, hmc_info, info->idx.sd_idx, IRDMA_SD_TYPE_PAGED, IRDMA_HMC_DIRECT_BP_SIZE); if (status) goto error; addr = chunk->vaddr; for (i = 0; i < info->pages; i++) { mem.pa = (u64)chunk->dmainfo.dmaaddrs[i]; mem.size = 4096; mem.va = addr; pd_entry = &sd_entry->u.pd_table.pd_entry[rel_pd_idx++]; if (!pd_entry->valid) { status = irdma_add_pd_table_entry(dev, hmc_info, pd_idx++, &mem); if (status) goto error; addr += 4096; } } chunk->fpm_addr = pble_rsrc->next_fpm_addr; return 0; error: irdma_pble_free_paged_mem(chunk); return status; } /** * irdma_get_type - add a sd entry type for sd * @dev: irdma_sc_dev struct * @idx: index of sd * @pages: pages in the sd */ static enum irdma_sd_entry_type irdma_get_type(struct irdma_sc_dev *dev, struct sd_pd_idx *idx, u32 pages) { enum irdma_sd_entry_type sd_entry_type; sd_entry_type = !idx->rel_pd_idx && pages == IRDMA_HMC_PD_CNT_IN_SD ? IRDMA_SD_TYPE_DIRECT : IRDMA_SD_TYPE_PAGED; return sd_entry_type; } /** * add_pble_prm - add a sd entry for pble resoure * @pble_rsrc: pble resource management */ static int add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc) { struct irdma_sc_dev *dev = pble_rsrc->dev; struct irdma_hmc_sd_entry *sd_entry; struct irdma_hmc_info *hmc_info; struct irdma_chunk *chunk; struct irdma_add_page_info info; struct sd_pd_idx *idx = &info.idx; int ret_code = 0; enum irdma_sd_entry_type sd_entry_type; u64 sd_reg_val = 0; struct irdma_virt_mem chunkmem; u32 pages; if (pble_rsrc->unallocated_pble < PBLE_PER_PAGE) return -ENOMEM; if (pble_rsrc->next_fpm_addr & 0xfff) return -EINVAL; chunkmem.size = sizeof(*chunk); chunkmem.va = kzalloc(chunkmem.size, GFP_KERNEL); if (!chunkmem.va) return -ENOMEM; chunk = chunkmem.va; chunk->chunkmem = chunkmem; hmc_info = dev->hmc_info; chunk->dev = dev; chunk->fpm_addr = pble_rsrc->next_fpm_addr; get_sd_pd_idx(pble_rsrc, idx); sd_entry = &hmc_info->sd_table.sd_entry[idx->sd_idx]; pages = (idx->rel_pd_idx) ? (IRDMA_HMC_PD_CNT_IN_SD - idx->rel_pd_idx) : IRDMA_HMC_PD_CNT_IN_SD; pages = min(pages, pble_rsrc->unallocated_pble >> PBLE_512_SHIFT); info.chunk = chunk; info.hmc_info = hmc_info; info.pages = pages; info.sd_entry = sd_entry; if (!sd_entry->valid) sd_entry_type = irdma_get_type(dev, idx, pages); else sd_entry_type = sd_entry->entry_type; irdma_debug(dev, IRDMA_DEBUG_PBLE, "pages = %d, unallocated_pble[%d] current_fpm_addr = %lx\n", - pages, pble_rsrc->unallocated_pble, pble_rsrc->next_fpm_addr); + pages, pble_rsrc->unallocated_pble, + pble_rsrc->next_fpm_addr); irdma_debug(dev, IRDMA_DEBUG_PBLE, "sd_entry_type = %d\n", sd_entry_type); if (sd_entry_type == IRDMA_SD_TYPE_DIRECT) ret_code = add_sd_direct(pble_rsrc, &info); if (ret_code) sd_entry_type = IRDMA_SD_TYPE_PAGED; else pble_rsrc->stats_direct_sds++; if (sd_entry_type == IRDMA_SD_TYPE_PAGED) { ret_code = add_bp_pages(pble_rsrc, &info); if (ret_code) - goto error; + goto err_bp_pages; else pble_rsrc->stats_paged_sds++; } ret_code = irdma_prm_add_pble_mem(&pble_rsrc->pinfo, chunk); if (ret_code) - goto error; + goto err_bp_pages; pble_rsrc->next_fpm_addr += chunk->size; irdma_debug(dev, IRDMA_DEBUG_PBLE, "next_fpm_addr = %lx chunk_size[%lu] = 0x%lx\n", pble_rsrc->next_fpm_addr, chunk->size, chunk->size); pble_rsrc->unallocated_pble -= (u32)(chunk->size >> 3); sd_reg_val = (sd_entry_type == IRDMA_SD_TYPE_PAGED) ? sd_entry->u.pd_table.pd_page_addr.pa : sd_entry->u.bp.addr.pa; if (!sd_entry->valid) { ret_code = irdma_hmc_sd_one(dev, hmc_info->hmc_fn_id, sd_reg_val, idx->sd_idx, sd_entry->entry_type, true); if (ret_code) goto error; } sd_entry->valid = true; list_add(&chunk->list, &pble_rsrc->pinfo.clist); return 0; error: - if (chunk->bitmapbuf) - irdma_prm_rem_bitmapmem(pble_rsrc->dev->hw, chunk); + bitmap_free(chunk->bitmapbuf); +err_bp_pages: kfree(chunk->chunkmem.va); return ret_code; } /** * free_lvl2 - fee level 2 pble * @pble_rsrc: pble resource management * @palloc: level 2 pble allocation */ static void free_lvl2(struct irdma_hmc_pble_rsrc *pble_rsrc, struct irdma_pble_alloc *palloc) { u32 i; struct irdma_pble_level2 *lvl2 = &palloc->level2; struct irdma_pble_info *root = &lvl2->root; struct irdma_pble_info *leaf = lvl2->leaf; for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) { if (leaf->addr) irdma_prm_return_pbles(&pble_rsrc->pinfo, &leaf->chunkinfo); else break; } if (root->addr) irdma_prm_return_pbles(&pble_rsrc->pinfo, &root->chunkinfo); kfree(lvl2->leafmem.va); lvl2->leaf = NULL; } /** * get_lvl2_pble - get level 2 pble resource * @pble_rsrc: pble resource management * @palloc: level 2 pble allocation */ static int get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, struct irdma_pble_alloc *palloc) { u32 lf4k, lflast, total, i; u32 pblcnt = PBLE_PER_PAGE; u64 *addr; struct irdma_pble_level2 *lvl2 = &palloc->level2; struct irdma_pble_info *root = &lvl2->root; struct irdma_pble_info *leaf; int ret_code; u64 fpm_addr; /* number of full 512 (4K) leafs) */ lf4k = palloc->total_cnt >> 9; lflast = palloc->total_cnt % PBLE_PER_PAGE; total = (lflast == 0) ? lf4k : lf4k + 1; lvl2->leaf_cnt = total; lvl2->leafmem.size = (sizeof(*leaf) * total); lvl2->leafmem.va = kzalloc(lvl2->leafmem.size, GFP_KERNEL); if (!lvl2->leafmem.va) return -ENOMEM; lvl2->leaf = lvl2->leafmem.va; leaf = lvl2->leaf; ret_code = irdma_prm_get_pbles(&pble_rsrc->pinfo, &root->chunkinfo, total << 3, &root->addr, &fpm_addr); if (ret_code) { kfree(lvl2->leafmem.va); lvl2->leaf = NULL; return -ENOMEM; } root->idx = fpm_to_idx(pble_rsrc, fpm_addr); root->cnt = total; addr = root->addr; for (i = 0; i < total; i++, leaf++) { pblcnt = (lflast && ((i + 1) == total)) ? lflast : PBLE_PER_PAGE; ret_code = irdma_prm_get_pbles(&pble_rsrc->pinfo, &leaf->chunkinfo, pblcnt << 3, &leaf->addr, &fpm_addr); if (ret_code) goto error; leaf->idx = fpm_to_idx(pble_rsrc, fpm_addr); leaf->cnt = pblcnt; *addr = (u64)leaf->idx; addr++; } palloc->level = PBLE_LEVEL_2; pble_rsrc->stats_lvl2++; return 0; error: free_lvl2(pble_rsrc, palloc); return -ENOMEM; } /** * get_lvl1_pble - get level 1 pble resource * @pble_rsrc: pble resource management * @palloc: level 1 pble allocation */ static int get_lvl1_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, struct irdma_pble_alloc *palloc) { int ret_code; u64 fpm_addr; struct irdma_pble_info *lvl1 = &palloc->level1; ret_code = irdma_prm_get_pbles(&pble_rsrc->pinfo, &lvl1->chunkinfo, palloc->total_cnt << 3, &lvl1->addr, &fpm_addr); if (ret_code) return -ENOMEM; palloc->level = PBLE_LEVEL_1; lvl1->idx = fpm_to_idx(pble_rsrc, fpm_addr); lvl1->cnt = palloc->total_cnt; pble_rsrc->stats_lvl1++; return 0; } /** * get_lvl1_lvl2_pble - calls get_lvl1 and get_lvl2 pble routine * @pble_rsrc: pble resources * @palloc: contains all inforamtion regarding pble (idx + pble addr) * @lvl: Bitmask for requested pble level */ static int get_lvl1_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, struct irdma_pble_alloc *palloc, u8 lvl) { int status = 0; status = get_lvl1_pble(pble_rsrc, palloc); if (!status || lvl == PBLE_LEVEL_1 || palloc->total_cnt <= PBLE_PER_PAGE) return status; status = get_lvl2_pble(pble_rsrc, palloc); return status; } /** * irdma_get_pble - allocate pbles from the prm * @pble_rsrc: pble resources * @palloc: contains all inforamtion regarding pble (idx + pble addr) * @pble_cnt: #of pbles requested * @lvl: requested pble level mask */ int irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, struct irdma_pble_alloc *palloc, u32 pble_cnt, u8 lvl) { int status = 0; int max_sds = 0; int i; palloc->total_cnt = pble_cnt; palloc->level = PBLE_LEVEL_0; mutex_lock(&pble_rsrc->pble_mutex_lock); /* * check first to see if we can get pble's without acquiring additional sd's */ status = get_lvl1_lvl2_pble(pble_rsrc, palloc, lvl); if (!status) goto exit; max_sds = (palloc->total_cnt >> 18) + 1; for (i = 0; i < max_sds; i++) { status = add_pble_prm(pble_rsrc); if (status) break; status = get_lvl1_lvl2_pble(pble_rsrc, palloc, lvl); /* if level1_only, only go through it once */ if (!status || lvl == PBLE_LEVEL_1) break; } exit: if (!status) { pble_rsrc->allocdpbles += pble_cnt; pble_rsrc->stats_alloc_ok++; } else { pble_rsrc->stats_alloc_fail++; } mutex_unlock(&pble_rsrc->pble_mutex_lock); return status; } /** * irdma_free_pble - put pbles back into prm * @pble_rsrc: pble resources * @palloc: contains all information regarding pble resource being freed */ void irdma_free_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, struct irdma_pble_alloc *palloc) { pble_rsrc->freedpbles += palloc->total_cnt; if (palloc->level == PBLE_LEVEL_2) free_lvl2(pble_rsrc, palloc); else irdma_prm_return_pbles(&pble_rsrc->pinfo, &palloc->level1.chunkinfo); pble_rsrc->stats_alloc_freed++; } diff --git a/sys/dev/irdma/irdma_pble.h b/sys/dev/irdma/irdma_pble.h index 6f6ac2341e1c..44e43a8437a1 100644 --- a/sys/dev/irdma/irdma_pble.h +++ b/sys/dev/irdma/irdma_pble.h @@ -1,165 +1,163 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef IRDMA_PBLE_H #define IRDMA_PBLE_H #define PBLE_SHIFT 6 #define PBLE_PER_PAGE 512 #define HMC_PAGED_BP_SHIFT 12 #define PBLE_512_SHIFT 9 #define PBLE_INVALID_IDX 0xffffffff enum irdma_pble_level { PBLE_LEVEL_0 = 0, PBLE_LEVEL_1 = 1, PBLE_LEVEL_2 = 2, }; enum irdma_alloc_type { PBLE_NO_ALLOC = 0, PBLE_SD_CONTIGOUS = 1, PBLE_SD_PAGED = 2, }; struct irdma_chunk; struct irdma_pble_chunkinfo { struct irdma_chunk *pchunk; u64 bit_idx; u64 bits_used; }; struct irdma_pble_info { u64 *addr; u32 idx; u32 cnt; struct irdma_pble_chunkinfo chunkinfo; }; struct irdma_pble_level2 { struct irdma_pble_info root; struct irdma_pble_info *leaf; struct irdma_virt_mem leafmem; u32 leaf_cnt; }; struct irdma_pble_alloc { u32 total_cnt; enum irdma_pble_level level; union { struct irdma_pble_info level1; struct irdma_pble_level2 level2; }; }; struct sd_pd_idx { u32 sd_idx; u32 pd_idx; u32 rel_pd_idx; }; struct irdma_add_page_info { struct irdma_chunk *chunk; struct irdma_hmc_sd_entry *sd_entry; struct irdma_hmc_info *hmc_info; struct sd_pd_idx idx; u32 pages; }; struct irdma_chunk { struct list_head list; struct irdma_dma_info dmainfo; - void *bitmapbuf; + unsigned long *bitmapbuf; u32 sizeofbitmap; u64 size; void *vaddr; u64 fpm_addr; u32 pg_cnt; enum irdma_alloc_type type; struct irdma_sc_dev *dev; - struct irdma_virt_mem bitmapmem; struct irdma_virt_mem chunkmem; }; struct irdma_pble_prm { struct list_head clist; spinlock_t prm_lock; /* protect prm bitmap */ u64 total_pble_alloc; u64 free_pble_cnt; u8 pble_shift; }; struct irdma_hmc_pble_rsrc { u32 unallocated_pble; struct mutex pble_mutex_lock; /* protect PBLE resource */ struct irdma_sc_dev *dev; u64 fpm_base_addr; u64 next_fpm_addr; struct irdma_pble_prm pinfo; u64 allocdpbles; u64 freedpbles; u32 stats_direct_sds; u32 stats_paged_sds; u64 stats_alloc_ok; u64 stats_alloc_fail; u64 stats_alloc_freed; u64 stats_lvl1; u64 stats_lvl2; }; void irdma_destroy_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc); int irdma_hmc_init_pble(struct irdma_sc_dev *dev, struct irdma_hmc_pble_rsrc *pble_rsrc); void irdma_free_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, struct irdma_pble_alloc *palloc); int irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, struct irdma_pble_alloc *palloc, u32 pble_cnt, u8 lvl); int irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm, struct irdma_chunk *pchunk); int irdma_prm_get_pbles(struct irdma_pble_prm *pprm, struct irdma_pble_chunkinfo *chunkinfo, u64 mem_size, u64 **vaddr, u64 *fpm_addr); void irdma_prm_return_pbles(struct irdma_pble_prm *pprm, struct irdma_pble_chunkinfo *chunkinfo); void irdma_pble_acquire_lock(struct irdma_hmc_pble_rsrc *pble_rsrc, unsigned long *flags); void irdma_pble_release_lock(struct irdma_hmc_pble_rsrc *pble_rsrc, unsigned long *flags); void irdma_pble_free_paged_mem(struct irdma_chunk *chunk); int irdma_pble_get_paged_mem(struct irdma_chunk *chunk, u32 pg_cnt); -void irdma_prm_rem_bitmapmem(struct irdma_hw *hw, struct irdma_chunk *chunk); #endif /* IRDMA_PBLE_H */ diff --git a/sys/dev/irdma/irdma_protos.h b/sys/dev/irdma/irdma_protos.h index 6f248c9da5e9..0663f9591d52 100644 --- a/sys/dev/irdma/irdma_protos.h +++ b/sys/dev/irdma/irdma_protos.h @@ -1,137 +1,133 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2016 - 2022 Intel Corporation + * Copyright (c) 2016 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef IRDMA_PROTOS_H #define IRDMA_PROTOS_H #define PAUSE_TIMER_VAL 0xffff #define REFRESH_THRESHOLD 0x7fff #define HIGH_THRESHOLD 0x800 #define LOW_THRESHOLD 0x200 #define ALL_TC2PFC 0xff #define CQP_COMPL_WAIT_TIME_MS 10 #define CQP_TIMEOUT_THRESHOLD 500 /* init operations */ int irdma_sc_dev_init(struct irdma_sc_dev *dev, struct irdma_device_init_info *info); void irdma_sc_cqp_post_sq(struct irdma_sc_cqp *cqp); __le64 *irdma_sc_cqp_get_next_send_wqe(struct irdma_sc_cqp *cqp, u64 scratch); int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, struct irdma_fast_reg_stag_info *info, bool post_sq); void irdma_init_config_check(struct irdma_config_check *cc, u8 traffic_class, u16 qs_handle); /* HMC/FPM functions */ int irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, u16 hmc_fn_id); /* stats misc */ int irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev, struct irdma_vsi_pestat *pestat, bool wait); int irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd, struct irdma_ws_node_info *node_info); -int irdma_cqp_up_map_cmd(struct irdma_sc_dev *dev, u8 cmd, - struct irdma_up_info *map_info); int irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_ceq *sc_ceq, u8 op); int irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_aeq *sc_aeq, u8 op); int irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd, struct irdma_stats_inst_info *stats_info); void irdma_update_stats(struct irdma_dev_hw_stats *hw_stats, struct irdma_gather_stats *gather_stats, struct irdma_gather_stats *last_gather_stats, const struct irdma_hw_stat_map *map, u16 max_stat_idx); /* vsi functions */ int irdma_vsi_stats_init(struct irdma_sc_vsi *vsi, struct irdma_vsi_stats_info *info); void irdma_vsi_stats_free(struct irdma_sc_vsi *vsi); void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi, struct irdma_vsi_init_info *info); int irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq); void irdma_sc_remove_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq); /* misc L2 param change functions */ void irdma_change_l2params(struct irdma_sc_vsi *vsi, struct irdma_l2params *l2params); void irdma_sc_suspend_resume_qps(struct irdma_sc_vsi *vsi, u8 suspend); int irdma_cqp_qp_suspend_resume(struct irdma_sc_qp *qp, u8 cmd); void irdma_qp_add_qos(struct irdma_sc_qp *qp); void irdma_qp_rem_qos(struct irdma_sc_qp *qp); struct irdma_sc_qp *irdma_get_qp_from_list(struct list_head *head, struct irdma_sc_qp *qp); void irdma_reinitialize_ieq(struct irdma_sc_vsi *vsi); u16 irdma_alloc_ws_node_id(struct irdma_sc_dev *dev); void irdma_free_ws_node_id(struct irdma_sc_dev *dev, u16 node_id); /* terminate functions*/ void irdma_terminate_send_fin(struct irdma_sc_qp *qp); void irdma_terminate_connection(struct irdma_sc_qp *qp, struct irdma_aeqe_info *info); void irdma_terminate_received(struct irdma_sc_qp *qp, struct irdma_aeqe_info *info); /* dynamic memory allocation */ void *irdma_allocate_dma_mem(struct irdma_hw *hw, struct irdma_dma_mem *mem, u64 size, u32 alignment); void *irdma_allocate_virt_mem(struct irdma_hw *hw, struct irdma_virt_mem *mem, u32 size); int irdma_free_dma_mem(struct irdma_hw *hw, struct irdma_dma_mem *mem); /* misc */ u8 irdma_get_encoded_wqe_size(u32 wqsize, enum irdma_queue_type queue_type); void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp); int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch, u16 hmc_fn_id, bool post_sq, bool poll_registers); int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count); int irdma_get_rdma_features(struct irdma_sc_dev *dev); void free_sd_mem(struct irdma_sc_dev *dev); int irdma_process_cqp_cmd(struct irdma_sc_dev *dev, struct cqp_cmds_info *pcmdinfo); int irdma_process_bh(struct irdma_sc_dev *dev); extern void dump_ctx(struct irdma_sc_dev *dev, u32 pf_num, u32 qp_num); void dumpCSR(struct irdma_sc_dev *dev); void dumpCSRx(struct irdma_sc_dev *dev); void dumpcls(struct irdma_sc_dev *dev); int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, struct irdma_update_sds_info *info); int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, struct irdma_dma_mem *val_mem, u16 hmc_fn_id); int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, struct irdma_dma_mem *val_mem, u16 hmc_fn_id); int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev, struct irdma_dma_mem *mem); -int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev, - struct irdma_hmc_fcn_info *hmcfcninfo, - u16 *pmf_idx); void *irdma_remove_cqp_head(struct irdma_sc_dev *dev); +u64 irdma_mac_to_u64(const u8 *eth_add); #endif /* IRDMA_PROTOS_H */ diff --git a/sys/dev/irdma/irdma_puda.c b/sys/dev/irdma/irdma_puda.c index 45f37f4a750c..1fd8080a9d78 100644 --- a/sys/dev/irdma/irdma_puda.c +++ b/sys/dev/irdma/irdma_puda.c @@ -1,1851 +1,1857 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "osdep.h" #include "irdma_hmc.h" #include "irdma_defs.h" #include "irdma_type.h" #include "irdma_protos.h" #include "irdma_puda.h" #include "irdma_ws.h" static void irdma_ieq_receive(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *buf); static void irdma_ieq_tx_compl(struct irdma_sc_vsi *vsi, void *sqwrid); static void irdma_ilq_putback_rcvbuf(struct irdma_sc_qp *qp, struct irdma_puda_buf *buf, u32 wqe_idx); /** * irdma_puda_get_listbuf - get buffer from puda list * @list: list to use for buffers (ILQ or IEQ) */ static struct irdma_puda_buf * irdma_puda_get_listbuf(struct list_head *list) { struct irdma_puda_buf *buf = NULL; if (!list_empty(list)) { buf = (struct irdma_puda_buf *)(list)->next; list_del((struct list_head *)&buf->list); } return buf; } /** * irdma_puda_get_bufpool - return buffer from resource * @rsrc: resource to use for buffer */ struct irdma_puda_buf * irdma_puda_get_bufpool(struct irdma_puda_rsrc *rsrc) { struct irdma_puda_buf *buf = NULL; struct list_head *list = &rsrc->bufpool; unsigned long flags; spin_lock_irqsave(&rsrc->bufpool_lock, flags); buf = irdma_puda_get_listbuf(list); if (buf) { rsrc->avail_buf_count--; buf->vsi = rsrc->vsi; } else { rsrc->stats_buf_alloc_fail++; } spin_unlock_irqrestore(&rsrc->bufpool_lock, flags); return buf; } /** * irdma_puda_ret_bufpool - return buffer to rsrc list * @rsrc: resource to use for buffer * @buf: buffer to return to resource */ void irdma_puda_ret_bufpool(struct irdma_puda_rsrc *rsrc, struct irdma_puda_buf *buf) { unsigned long flags; buf->do_lpb = false; spin_lock_irqsave(&rsrc->bufpool_lock, flags); list_add(&buf->list, &rsrc->bufpool); spin_unlock_irqrestore(&rsrc->bufpool_lock, flags); rsrc->avail_buf_count++; } /** * irdma_puda_post_recvbuf - set wqe for rcv buffer * @rsrc: resource ptr * @wqe_idx: wqe index to use * @buf: puda buffer for rcv q * @initial: flag if during init time */ static void irdma_puda_post_recvbuf(struct irdma_puda_rsrc *rsrc, u32 wqe_idx, struct irdma_puda_buf *buf, bool initial) { __le64 *wqe; struct irdma_sc_qp *qp = &rsrc->qp; u64 offset24 = 0; /* Synch buffer for use by device */ dma_sync_single_for_device(hw_to_dev(rsrc->dev->hw), buf->mem.pa, buf->mem.size, DMA_BIDIRECTIONAL); qp->qp_uk.rq_wrid_array[wqe_idx] = (uintptr_t)buf; wqe = qp->qp_uk.rq_base[wqe_idx].elem; if (!initial) get_64bit_val(wqe, IRDMA_BYTE_24, &offset24); offset24 = (offset24) ? 0 : FIELD_PREP(IRDMAQPSQ_VALID, 1); set_64bit_val(wqe, IRDMA_BYTE_16, 0); set_64bit_val(wqe, 0, buf->mem.pa); if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1) { set_64bit_val(wqe, IRDMA_BYTE_8, FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, buf->mem.size)); } else { set_64bit_val(wqe, IRDMA_BYTE_8, FIELD_PREP(IRDMAQPSQ_FRAG_LEN, buf->mem.size) | offset24); } irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, offset24); } /** * irdma_puda_replenish_rq - post rcv buffers * @rsrc: resource to use for buffer * @initial: flag if during init time */ static int irdma_puda_replenish_rq(struct irdma_puda_rsrc *rsrc, bool initial) { u32 i; u32 invalid_cnt = rsrc->rxq_invalid_cnt; struct irdma_puda_buf *buf = NULL; for (i = 0; i < invalid_cnt; i++) { buf = irdma_puda_get_bufpool(rsrc); if (!buf) return -ENOBUFS; irdma_puda_post_recvbuf(rsrc, rsrc->rx_wqe_idx, buf, initial); rsrc->rx_wqe_idx = ((rsrc->rx_wqe_idx + 1) % rsrc->rq_size); rsrc->rxq_invalid_cnt--; } return 0; } /** * irdma_puda_alloc_buf - allocate mem for buffer * @dev: iwarp device * @len: length of buffer */ static struct irdma_puda_buf * irdma_puda_alloc_buf(struct irdma_sc_dev *dev, u32 len) { struct irdma_puda_buf *buf; struct irdma_virt_mem buf_mem; buf_mem.size = sizeof(struct irdma_puda_buf); buf_mem.va = kzalloc(buf_mem.size, GFP_KERNEL); if (!buf_mem.va) return NULL; buf = buf_mem.va; buf->mem.size = len; buf->mem.va = kzalloc(buf->mem.size, GFP_KERNEL); if (!buf->mem.va) goto free_virt; buf->mem.pa = dma_map_single(hw_to_dev(dev->hw), buf->mem.va, buf->mem.size, DMA_BIDIRECTIONAL); if (dma_mapping_error(hw_to_dev(dev->hw), buf->mem.pa)) { kfree(buf->mem.va); goto free_virt; } buf->buf_mem.va = buf_mem.va; buf->buf_mem.size = buf_mem.size; return buf; free_virt: kfree(buf_mem.va); return NULL; } /** * irdma_puda_dele_buf - delete buffer back to system * @dev: iwarp device * @buf: buffer to free */ static void irdma_puda_dele_buf(struct irdma_sc_dev *dev, struct irdma_puda_buf *buf) { if (!buf->virtdma) { irdma_free_dma_mem(dev->hw, &buf->mem); kfree(buf->buf_mem.va); } } /** * irdma_puda_get_next_send_wqe - return next wqe for processing * @qp: puda qp for wqe * @wqe_idx: wqe index for caller */ static __le64 * irdma_puda_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx){ int ret_code = 0; *wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); if (!*wqe_idx) qp->swqe_polarity = !qp->swqe_polarity; IRDMA_RING_MOVE_HEAD(qp->sq_ring, ret_code); if (ret_code) return NULL; return qp->sq_base[*wqe_idx].elem; } /** * irdma_puda_poll_info - poll cq for completion * @cq: cq for poll * @info: info return for successful completion */ static int irdma_puda_poll_info(struct irdma_sc_cq *cq, struct irdma_puda_cmpl_info *info) { struct irdma_cq_uk *cq_uk = &cq->cq_uk; u64 qword0, qword2, qword3, qword6; __le64 *cqe; __le64 *ext_cqe = NULL; u64 qword7 = 0; u64 comp_ctx; bool valid_bit; bool ext_valid = 0; u32 major_err, minor_err; u32 peek_head; bool error; u8 polarity; cqe = IRDMA_GET_CURRENT_CQ_ELEM(&cq->cq_uk); get_64bit_val(cqe, IRDMA_BYTE_24, &qword3); valid_bit = (bool)FIELD_GET(IRDMA_CQ_VALID, qword3); if (valid_bit != cq_uk->polarity) return -ENOENT; + /* Ensure CQE contents are read after valid bit is checked */ + rmb(); + if (cq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) ext_valid = (bool)FIELD_GET(IRDMA_CQ_EXTCQE, qword3); if (ext_valid) { peek_head = (cq_uk->cq_ring.head + 1) % cq_uk->cq_ring.size; ext_cqe = cq_uk->cq_base[peek_head].buf; get_64bit_val(ext_cqe, IRDMA_BYTE_24, &qword7); polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7); if (!peek_head) polarity ^= 1; if (polarity != cq_uk->polarity) return -ENOENT; + /* Ensure ext CQE contents are read after ext valid bit is checked */ + rmb(); + IRDMA_RING_MOVE_HEAD_NOCHECK(cq_uk->cq_ring); if (!IRDMA_RING_CURRENT_HEAD(cq_uk->cq_ring)) cq_uk->polarity = !cq_uk->polarity; /* update cq tail in cq shadow memory also */ IRDMA_RING_MOVE_TAIL(cq_uk->cq_ring); } irdma_debug_buf(cq->dev, IRDMA_DEBUG_PUDA, "PUDA CQE", cqe, 32); if (ext_valid) irdma_debug_buf(cq->dev, IRDMA_DEBUG_PUDA, "PUDA EXT-CQE", ext_cqe, 32); error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3); if (error) { irdma_debug(cq->dev, IRDMA_DEBUG_PUDA, "receive error\n"); major_err = (u32)(FIELD_GET(IRDMA_CQ_MAJERR, qword3)); minor_err = (u32)(FIELD_GET(IRDMA_CQ_MINERR, qword3)); info->compl_error = major_err << 16 | minor_err; return -EIO; } get_64bit_val(cqe, IRDMA_BYTE_0, &qword0); get_64bit_val(cqe, IRDMA_BYTE_16, &qword2); info->q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3); info->qp_id = (u32)FIELD_GET(IRDMACQ_QPID, qword2); if (cq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3); get_64bit_val(cqe, IRDMA_BYTE_8, &comp_ctx); info->qp = (struct irdma_qp_uk *)(irdma_uintptr) comp_ctx; info->wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3); if (info->q_type == IRDMA_CQE_QTYPE_RQ) { if (ext_valid) { info->vlan_valid = (bool)FIELD_GET(IRDMA_CQ_UDVLANVALID, qword7); if (info->vlan_valid) { get_64bit_val(ext_cqe, IRDMA_BYTE_16, &qword6); info->vlan = (u16)FIELD_GET(IRDMA_CQ_UDVLAN, qword6); } info->smac_valid = (bool)FIELD_GET(IRDMA_CQ_UDSMACVALID, qword7); if (info->smac_valid) { get_64bit_val(ext_cqe, IRDMA_BYTE_16, &qword6); info->smac[0] = (u8)((qword6 >> 40) & 0xFF); info->smac[1] = (u8)((qword6 >> 32) & 0xFF); info->smac[2] = (u8)((qword6 >> 24) & 0xFF); info->smac[3] = (u8)((qword6 >> 16) & 0xFF); info->smac[4] = (u8)((qword6 >> 8) & 0xFF); info->smac[5] = (u8)(qword6 & 0xFF); } } if (cq->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) { info->vlan_valid = (bool)FIELD_GET(IRDMA_VLAN_TAG_VALID, qword3); info->l4proto = (u8)FIELD_GET(IRDMA_UDA_L4PROTO, qword2); info->l3proto = (u8)FIELD_GET(IRDMA_UDA_L3PROTO, qword2); } info->payload_len = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0); } return 0; } /** * irdma_puda_poll_cmpl - processes completion for cq * @dev: iwarp device * @cq: cq getting interrupt * @compl_err: return any completion err */ int irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq, u32 *compl_err) { struct irdma_qp_uk *qp; struct irdma_cq_uk *cq_uk = &cq->cq_uk; struct irdma_puda_cmpl_info info = {0}; int ret = 0; struct irdma_puda_buf *buf; struct irdma_puda_rsrc *rsrc; u8 cq_type = cq->cq_type; unsigned long flags; if (cq_type == IRDMA_CQ_TYPE_ILQ || cq_type == IRDMA_CQ_TYPE_IEQ) { rsrc = (cq_type == IRDMA_CQ_TYPE_ILQ) ? cq->vsi->ilq : cq->vsi->ieq; } else { irdma_debug(dev, IRDMA_DEBUG_PUDA, "qp_type error\n"); return -EFAULT; } ret = irdma_puda_poll_info(cq, &info); *compl_err = info.compl_error; if (ret == -ENOENT) return ret; if (ret) goto done; qp = info.qp; if (!qp || !rsrc) { ret = -EFAULT; goto done; } if (qp->qp_id != rsrc->qp_id) { ret = -EFAULT; goto done; } if (info.q_type == IRDMA_CQE_QTYPE_RQ) { buf = (struct irdma_puda_buf *)(uintptr_t) qp->rq_wrid_array[info.wqe_idx]; /* reusing so synch the buffer for CPU use */ dma_sync_single_for_cpu(hw_to_dev(dev->hw), buf->mem.pa, buf->mem.size, DMA_BIDIRECTIONAL); /* Get all the tcpip information in the buf header */ ret = irdma_puda_get_tcpip_info(&info, buf); if (ret) { rsrc->stats_rcvd_pkt_err++; if (cq_type == IRDMA_CQ_TYPE_ILQ) { irdma_ilq_putback_rcvbuf(&rsrc->qp, buf, info.wqe_idx); } else { irdma_puda_ret_bufpool(rsrc, buf); irdma_puda_replenish_rq(rsrc, false); } goto done; } rsrc->stats_pkt_rcvd++; rsrc->compl_rxwqe_idx = info.wqe_idx; irdma_debug(dev, IRDMA_DEBUG_PUDA, "RQ completion\n"); rsrc->receive(rsrc->vsi, buf); if (cq_type == IRDMA_CQ_TYPE_ILQ) irdma_ilq_putback_rcvbuf(&rsrc->qp, buf, info.wqe_idx); else irdma_puda_replenish_rq(rsrc, false); } else { irdma_debug(dev, IRDMA_DEBUG_PUDA, "SQ completion\n"); buf = (struct irdma_puda_buf *)(uintptr_t) qp->sq_wrtrk_array[info.wqe_idx].wrid; /* reusing so synch the buffer for CPU use */ dma_sync_single_for_cpu(hw_to_dev(dev->hw), buf->mem.pa, buf->mem.size, DMA_BIDIRECTIONAL); IRDMA_RING_SET_TAIL(qp->sq_ring, info.wqe_idx); rsrc->xmit_complete(rsrc->vsi, buf); spin_lock_irqsave(&rsrc->bufpool_lock, flags); rsrc->tx_wqe_avail_cnt++; spin_unlock_irqrestore(&rsrc->bufpool_lock, flags); if (!list_empty(&rsrc->txpend)) irdma_puda_send_buf(rsrc, NULL); } done: IRDMA_RING_MOVE_HEAD_NOCHECK(cq_uk->cq_ring); if (!IRDMA_RING_CURRENT_HEAD(cq_uk->cq_ring)) cq_uk->polarity = !cq_uk->polarity; /* update cq tail in cq shadow memory also */ IRDMA_RING_MOVE_TAIL(cq_uk->cq_ring); set_64bit_val(cq_uk->shadow_area, IRDMA_BYTE_0, IRDMA_RING_CURRENT_HEAD(cq_uk->cq_ring)); return ret; } /** * irdma_puda_send - complete send wqe for transmit * @qp: puda qp for send * @info: buffer information for transmit */ int irdma_puda_send(struct irdma_sc_qp *qp, struct irdma_puda_send_info *info) { __le64 *wqe; u32 iplen, l4len; u64 hdr[2]; u32 wqe_idx; u8 iipt; /* number of 32 bits DWORDS in header */ l4len = info->tcplen >> 2; if (info->ipv4) { iipt = 3; iplen = 5; } else { iipt = 1; iplen = 10; } wqe = irdma_puda_get_next_send_wqe(&qp->qp_uk, &wqe_idx); if (!wqe) return -ENOSPC; qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid = (uintptr_t)info->scratch; /* Third line of WQE descriptor */ /* maclen is in words */ if (qp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { hdr[0] = 0; /* Dest_QPN and Dest_QKey only for UD */ hdr[1] = FIELD_PREP(IRDMA_UDA_QPSQ_OPCODE, IRDMA_OP_TYPE_SEND) | FIELD_PREP(IRDMA_UDA_QPSQ_L4LEN, l4len) | FIELD_PREP(IRDMAQPSQ_AHID, info->ah_id) | FIELD_PREP(IRDMA_UDA_QPSQ_SIGCOMPL, 1) | FIELD_PREP(IRDMA_UDA_QPSQ_VALID, qp->qp_uk.swqe_polarity); /* Forth line of WQE descriptor */ set_64bit_val(wqe, IRDMA_BYTE_0, info->paddr); set_64bit_val(wqe, IRDMA_BYTE_8, FIELD_PREP(IRDMAQPSQ_FRAG_LEN, info->len) | FIELD_PREP(IRDMA_UDA_QPSQ_VALID, qp->qp_uk.swqe_polarity)); } else { hdr[0] = FIELD_PREP(IRDMA_UDA_QPSQ_MACLEN, info->maclen >> 1) | FIELD_PREP(IRDMA_UDA_QPSQ_IPLEN, iplen) | FIELD_PREP(IRDMA_UDA_QPSQ_L4T, 1) | FIELD_PREP(IRDMA_UDA_QPSQ_IIPT, iipt) | FIELD_PREP(IRDMA_GEN1_UDA_QPSQ_L4LEN, l4len); hdr[1] = FIELD_PREP(IRDMA_UDA_QPSQ_OPCODE, IRDMA_OP_TYPE_SEND) | FIELD_PREP(IRDMA_UDA_QPSQ_SIGCOMPL, 1) | FIELD_PREP(IRDMA_UDA_QPSQ_DOLOOPBACK, info->do_lpb) | FIELD_PREP(IRDMA_UDA_QPSQ_VALID, qp->qp_uk.swqe_polarity); /* Forth line of WQE descriptor */ set_64bit_val(wqe, IRDMA_BYTE_0, info->paddr); set_64bit_val(wqe, IRDMA_BYTE_8, FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, info->len)); } set_64bit_val(wqe, IRDMA_BYTE_16, hdr[0]); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr[1]); irdma_debug_buf(qp->dev, IRDMA_DEBUG_PUDA, "PUDA SEND WQE", wqe, 32); irdma_uk_qp_post_wr(&qp->qp_uk); return 0; } /** * irdma_puda_send_buf - transmit puda buffer * @rsrc: resource to use for buffer * @buf: puda buffer to transmit */ void irdma_puda_send_buf(struct irdma_puda_rsrc *rsrc, struct irdma_puda_buf *buf) { struct irdma_puda_send_info info; int ret = 0; unsigned long flags; spin_lock_irqsave(&rsrc->bufpool_lock, flags); /* * if no wqe available or not from a completion and we have pending buffers, we must queue new buffer */ if (!rsrc->tx_wqe_avail_cnt || (buf && !list_empty(&rsrc->txpend))) { list_add_tail(&buf->list, &rsrc->txpend); spin_unlock_irqrestore(&rsrc->bufpool_lock, flags); rsrc->stats_sent_pkt_q++; if (rsrc->type == IRDMA_PUDA_RSRC_TYPE_ILQ) irdma_debug(rsrc->dev, IRDMA_DEBUG_PUDA, "adding to txpend\n"); return; } rsrc->tx_wqe_avail_cnt--; /* * if we are coming from a completion and have pending buffers then Get one from pending list */ if (!buf) { buf = irdma_puda_get_listbuf(&rsrc->txpend); if (!buf) goto done; } info.scratch = buf; info.paddr = buf->mem.pa; info.len = buf->totallen; info.tcplen = buf->tcphlen; info.ipv4 = buf->ipv4; if (rsrc->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { info.ah_id = buf->ah_id; } else { info.maclen = buf->maclen; info.do_lpb = buf->do_lpb; } /* Synch buffer for use by device */ dma_sync_single_for_cpu(hw_to_dev(rsrc->dev->hw), buf->mem.pa, buf->mem.size, DMA_BIDIRECTIONAL); ret = irdma_puda_send(&rsrc->qp, &info); if (ret) { rsrc->tx_wqe_avail_cnt++; rsrc->stats_sent_pkt_q++; list_add(&buf->list, &rsrc->txpend); if (rsrc->type == IRDMA_PUDA_RSRC_TYPE_ILQ) irdma_debug(rsrc->dev, IRDMA_DEBUG_PUDA, "adding to puda_send\n"); } else { rsrc->stats_pkt_sent++; } done: spin_unlock_irqrestore(&rsrc->bufpool_lock, flags); } /** * irdma_puda_qp_setctx - during init, set qp's context * @rsrc: qp's resource */ static void irdma_puda_qp_setctx(struct irdma_puda_rsrc *rsrc) { struct irdma_sc_qp *qp = &rsrc->qp; __le64 *qp_ctx = qp->hw_host_ctx; set_64bit_val(qp_ctx, IRDMA_BYTE_8, qp->sq_pa); set_64bit_val(qp_ctx, IRDMA_BYTE_16, qp->rq_pa); set_64bit_val(qp_ctx, IRDMA_BYTE_24, FIELD_PREP(IRDMAQPC_RQSIZE, qp->hw_rq_size) | FIELD_PREP(IRDMAQPC_SQSIZE, qp->hw_sq_size)); set_64bit_val(qp_ctx, IRDMA_BYTE_48, FIELD_PREP(IRDMAQPC_SNDMSS, rsrc->buf_size)); set_64bit_val(qp_ctx, IRDMA_BYTE_56, 0); if (qp->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) set_64bit_val(qp_ctx, IRDMA_BYTE_64, 1); set_64bit_val(qp_ctx, IRDMA_BYTE_136, FIELD_PREP(IRDMAQPC_TXCQNUM, rsrc->cq_id) | FIELD_PREP(IRDMAQPC_RXCQNUM, rsrc->cq_id)); set_64bit_val(qp_ctx, IRDMA_BYTE_144, FIELD_PREP(IRDMAQPC_STAT_INDEX, rsrc->stats_idx)); set_64bit_val(qp_ctx, IRDMA_BYTE_160, FIELD_PREP(IRDMAQPC_PRIVEN, 1) | FIELD_PREP(IRDMAQPC_USESTATSINSTANCE, rsrc->stats_idx_valid)); set_64bit_val(qp_ctx, IRDMA_BYTE_168, FIELD_PREP(IRDMAQPC_QPCOMPCTX, (uintptr_t)qp)); set_64bit_val(qp_ctx, IRDMA_BYTE_176, FIELD_PREP(IRDMAQPC_SQTPHVAL, qp->sq_tph_val) | FIELD_PREP(IRDMAQPC_RQTPHVAL, qp->rq_tph_val) | FIELD_PREP(IRDMAQPC_QSHANDLE, qp->qs_handle)); irdma_debug_buf(rsrc->dev, IRDMA_DEBUG_PUDA, "PUDA QP CONTEXT", qp_ctx, IRDMA_QP_CTX_SIZE); } /** * irdma_puda_qp_wqe - setup wqe for qp create * @dev: Device * @qp: Resource qp */ static int irdma_puda_qp_wqe(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp) { struct irdma_sc_cqp *cqp; __le64 *wqe; u64 hdr; struct irdma_ccq_cqe_info compl_info; int status = 0; cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, 0); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_16, qp->hw_host_ctx_pa); set_64bit_val(wqe, IRDMA_BYTE_40, qp->shadow_area_pa); hdr = qp->qp_uk.qp_id | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_QP) | FIELD_PREP(IRDMA_CQPSQ_QP_QPTYPE, IRDMA_QP_TYPE_UDA) | FIELD_PREP(IRDMA_CQPSQ_QP_CQNUMVALID, 1) | FIELD_PREP(IRDMA_CQPSQ_QP_NEXTIWSTATE, 2) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_PUDA, "PUDA QP CREATE", wqe, 40); irdma_sc_cqp_post_sq(cqp); status = irdma_sc_poll_for_cqp_op_done(dev->cqp, IRDMA_CQP_OP_CREATE_QP, &compl_info); return status; } /** * irdma_puda_qp_create - create qp for resource * @rsrc: resource to use for buffer */ static int irdma_puda_qp_create(struct irdma_puda_rsrc *rsrc) { struct irdma_sc_qp *qp = &rsrc->qp; struct irdma_qp_uk *ukqp = &qp->qp_uk; int ret = 0; u32 sq_size, rq_size; struct irdma_dma_mem *mem; sq_size = rsrc->sq_size * IRDMA_QP_WQE_MIN_SIZE; rq_size = rsrc->rq_size * IRDMA_QP_WQE_MIN_SIZE; rsrc->qpmem.size = (sq_size + rq_size + (IRDMA_SHADOW_AREA_SIZE << 3) + IRDMA_QP_CTX_SIZE); rsrc->qpmem.va = irdma_allocate_dma_mem(rsrc->dev->hw, &rsrc->qpmem, rsrc->qpmem.size, IRDMA_HW_PAGE_SIZE); if (!rsrc->qpmem.va) return -ENOMEM; mem = &rsrc->qpmem; memset(mem->va, 0, rsrc->qpmem.size); qp->hw_sq_size = irdma_get_encoded_wqe_size(rsrc->sq_size, IRDMA_QUEUE_TYPE_SQ_RQ); qp->hw_rq_size = irdma_get_encoded_wqe_size(rsrc->rq_size, IRDMA_QUEUE_TYPE_SQ_RQ); qp->pd = &rsrc->sc_pd; qp->qp_uk.qp_type = IRDMA_QP_TYPE_UDA; qp->dev = rsrc->dev; qp->qp_uk.back_qp = rsrc; qp->sq_pa = mem->pa; qp->rq_pa = qp->sq_pa + sq_size; qp->vsi = rsrc->vsi; ukqp->sq_base = mem->va; ukqp->rq_base = &ukqp->sq_base[rsrc->sq_size]; ukqp->shadow_area = ukqp->rq_base[rsrc->rq_size].elem; ukqp->uk_attrs = &qp->dev->hw_attrs.uk_attrs; qp->shadow_area_pa = qp->rq_pa + rq_size; qp->hw_host_ctx = ukqp->shadow_area + IRDMA_SHADOW_AREA_SIZE; qp->hw_host_ctx_pa = qp->shadow_area_pa + (IRDMA_SHADOW_AREA_SIZE << 3); qp->push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX; ukqp->qp_id = rsrc->qp_id; ukqp->sq_wrtrk_array = rsrc->sq_wrtrk_array; ukqp->rq_wrid_array = rsrc->rq_wrid_array; ukqp->sq_size = rsrc->sq_size; ukqp->rq_size = rsrc->rq_size; IRDMA_RING_INIT(ukqp->sq_ring, ukqp->sq_size); IRDMA_RING_INIT(ukqp->initial_ring, ukqp->sq_size); IRDMA_RING_INIT(ukqp->rq_ring, ukqp->rq_size); ukqp->wqe_alloc_db = qp->pd->dev->wqe_alloc_db; ret = rsrc->dev->ws_add(qp->vsi, qp->user_pri); if (ret) { irdma_free_dma_mem(rsrc->dev->hw, &rsrc->qpmem); return ret; } irdma_qp_add_qos(qp); irdma_puda_qp_setctx(rsrc); if (rsrc->dev->ceq_valid) ret = irdma_cqp_qp_create_cmd(rsrc->dev, qp); else ret = irdma_puda_qp_wqe(rsrc->dev, qp); if (ret) { irdma_qp_rem_qos(qp); rsrc->dev->ws_remove(qp->vsi, qp->user_pri); irdma_free_dma_mem(rsrc->dev->hw, &rsrc->qpmem); } return ret; } /** * irdma_puda_cq_wqe - setup wqe for CQ create * @dev: Device * @cq: resource for cq */ static int irdma_puda_cq_wqe(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq) { __le64 *wqe; struct irdma_sc_cqp *cqp; u64 hdr; struct irdma_ccq_cqe_info compl_info; int status = 0; cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, 0); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_0, cq->cq_uk.cq_size); set_64bit_val(wqe, IRDMA_BYTE_8, RS_64_1(cq, 1)); set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD, cq->shadow_read_threshold)); set_64bit_val(wqe, IRDMA_BYTE_32, cq->cq_pa); set_64bit_val(wqe, IRDMA_BYTE_40, cq->shadow_area_pa); set_64bit_val(wqe, IRDMA_BYTE_56, FIELD_PREP(IRDMA_CQPSQ_TPHVAL, cq->tph_val) | FIELD_PREP(IRDMA_CQPSQ_VSIIDX, cq->vsi->vsi_idx)); hdr = cq->cq_uk.cq_id | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_CQ) | FIELD_PREP(IRDMA_CQPSQ_CQ_CHKOVERFLOW, 1) | FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, 1) | FIELD_PREP(IRDMA_CQPSQ_CQ_CEQIDVALID, 1) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); irdma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); irdma_debug_buf(dev, IRDMA_DEBUG_PUDA, "PUDA CREATE CQ", wqe, IRDMA_CQP_WQE_SIZE * 8); irdma_sc_cqp_post_sq(dev->cqp); status = irdma_sc_poll_for_cqp_op_done(dev->cqp, IRDMA_CQP_OP_CREATE_CQ, &compl_info); if (!status) { struct irdma_sc_ceq *ceq = dev->ceq[0]; if (ceq && ceq->reg_cq) status = irdma_sc_add_cq_ctx(ceq, cq); } return status; } /** * irdma_puda_cq_create - create cq for resource * @rsrc: resource for which cq to create */ static int irdma_puda_cq_create(struct irdma_puda_rsrc *rsrc) { struct irdma_sc_dev *dev = rsrc->dev; struct irdma_sc_cq *cq = &rsrc->cq; int ret = 0; u32 cqsize; struct irdma_dma_mem *mem; struct irdma_cq_init_info info = {0}; struct irdma_cq_uk_init_info *init_info = &info.cq_uk_init_info; cq->vsi = rsrc->vsi; cqsize = rsrc->cq_size * (sizeof(struct irdma_cqe)); rsrc->cqmem.size = cqsize + sizeof(struct irdma_cq_shadow_area); rsrc->cqmem.va = irdma_allocate_dma_mem(dev->hw, &rsrc->cqmem, rsrc->cqmem.size, IRDMA_CQ0_ALIGNMENT); if (!rsrc->cqmem.va) return -ENOMEM; mem = &rsrc->cqmem; info.dev = dev; info.type = (rsrc->type == IRDMA_PUDA_RSRC_TYPE_ILQ) ? IRDMA_CQ_TYPE_ILQ : IRDMA_CQ_TYPE_IEQ; info.shadow_read_threshold = rsrc->cq_size >> 2; info.cq_base_pa = mem->pa; info.shadow_area_pa = mem->pa + cqsize; init_info->cq_base = mem->va; init_info->shadow_area = (__le64 *) ((u8 *)mem->va + cqsize); init_info->cq_size = rsrc->cq_size; init_info->cq_id = rsrc->cq_id; info.ceqe_mask = true; info.ceq_id_valid = true; info.vsi = rsrc->vsi; ret = irdma_sc_cq_init(cq, &info); if (ret) goto error; if (rsrc->dev->ceq_valid) ret = irdma_cqp_cq_create_cmd(dev, cq); else ret = irdma_puda_cq_wqe(dev, cq); error: if (ret) irdma_free_dma_mem(dev->hw, &rsrc->cqmem); return ret; } /** * irdma_puda_free_qp - free qp for resource * @rsrc: resource for which qp to free */ static void irdma_puda_free_qp(struct irdma_puda_rsrc *rsrc) { int ret; struct irdma_ccq_cqe_info compl_info; struct irdma_sc_dev *dev = rsrc->dev; if (rsrc->dev->ceq_valid) { irdma_cqp_qp_destroy_cmd(dev, &rsrc->qp); rsrc->dev->ws_remove(rsrc->qp.vsi, rsrc->qp.user_pri); return; } ret = irdma_sc_qp_destroy(&rsrc->qp, 0, false, true, true); if (ret) irdma_debug(dev, IRDMA_DEBUG_PUDA, "error puda qp destroy wqe, status = %d\n", ret); if (!ret) { ret = irdma_sc_poll_for_cqp_op_done(dev->cqp, IRDMA_CQP_OP_DESTROY_QP, &compl_info); if (ret) irdma_debug(dev, IRDMA_DEBUG_PUDA, "error puda qp destroy failed, status = %d\n", ret); } rsrc->dev->ws_remove(rsrc->qp.vsi, rsrc->qp.user_pri); } /** * irdma_puda_free_cq - free cq for resource * @rsrc: resource for which cq to free */ static void irdma_puda_free_cq(struct irdma_puda_rsrc *rsrc) { int ret; struct irdma_ccq_cqe_info compl_info; struct irdma_sc_dev *dev = rsrc->dev; if (rsrc->dev->ceq_valid) { irdma_cqp_cq_destroy_cmd(dev, &rsrc->cq); return; } ret = irdma_sc_cq_destroy(&rsrc->cq, 0, true); if (ret) irdma_debug(dev, IRDMA_DEBUG_PUDA, "error ieq cq destroy\n"); if (!ret) { ret = irdma_sc_poll_for_cqp_op_done(dev->cqp, IRDMA_CQP_OP_DESTROY_CQ, &compl_info); if (ret) irdma_debug(dev, IRDMA_DEBUG_PUDA, "error ieq qp destroy done\n"); } } /** * irdma_puda_dele_rsrc - delete all resources during close * @vsi: VSI structure of device * @type: type of resource to dele * @reset: true if reset chip */ void irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type, bool reset) { struct irdma_sc_dev *dev = vsi->dev; struct irdma_puda_rsrc *rsrc; struct irdma_puda_buf *buf = NULL; struct irdma_puda_buf *nextbuf = NULL; struct irdma_virt_mem *vmem; struct irdma_sc_ceq *ceq; ceq = vsi->dev->ceq[0]; switch (type) { case IRDMA_PUDA_RSRC_TYPE_ILQ: rsrc = vsi->ilq; vmem = &vsi->ilq_mem; vsi->ilq = NULL; if (ceq && ceq->reg_cq) irdma_sc_remove_cq_ctx(ceq, &rsrc->cq); break; case IRDMA_PUDA_RSRC_TYPE_IEQ: rsrc = vsi->ieq; vmem = &vsi->ieq_mem; vsi->ieq = NULL; if (ceq && ceq->reg_cq) irdma_sc_remove_cq_ctx(ceq, &rsrc->cq); break; default: irdma_debug(dev, IRDMA_DEBUG_PUDA, "error resource type = 0x%x\n", type); return; } spin_lock_destroy(&rsrc->bufpool_lock); switch (rsrc->cmpl) { case PUDA_HASH_CRC_COMPLETE: irdma_free_hash_desc(rsrc->hash_desc); /* fallthrough */ case PUDA_QP_CREATED: irdma_qp_rem_qos(&rsrc->qp); if (!reset) irdma_puda_free_qp(rsrc); irdma_free_dma_mem(dev->hw, &rsrc->qpmem); /* fallthrough */ case PUDA_CQ_CREATED: if (!reset) irdma_puda_free_cq(rsrc); irdma_free_dma_mem(dev->hw, &rsrc->cqmem); break; default: irdma_debug(rsrc->dev, IRDMA_DEBUG_PUDA, "error no resources\n"); break; } /* Free all allocated puda buffers for both tx and rx */ buf = rsrc->alloclist; while (buf) { nextbuf = buf->next; irdma_puda_dele_buf(dev, buf); buf = nextbuf; rsrc->alloc_buf_count--; } kfree(vmem->va); } /** * irdma_puda_allocbufs - allocate buffers for resource * @rsrc: resource for buffer allocation * @count: number of buffers to create */ static int irdma_puda_allocbufs(struct irdma_puda_rsrc *rsrc, u32 count) { u32 i; struct irdma_puda_buf *buf; struct irdma_puda_buf *nextbuf; struct irdma_virt_mem buf_mem; struct irdma_dma_mem *dma_mem; bool virtdma = false; unsigned long flags; buf_mem.size = count * sizeof(struct irdma_puda_buf); buf_mem.va = kzalloc(buf_mem.size, GFP_KERNEL); if (!buf_mem.va) { irdma_debug(rsrc->dev, IRDMA_DEBUG_PUDA, "error virt_mem for buf\n"); rsrc->stats_buf_alloc_fail++; goto trysmall; } /* * Allocate the large dma chunk and setup dma attributes into first puda buffer. This is required during free */ buf = (struct irdma_puda_buf *)buf_mem.va; buf->mem.va = irdma_allocate_dma_mem(rsrc->dev->hw, &buf->mem, rsrc->buf_size * count, 1); if (!buf->mem.va) { irdma_debug(rsrc->dev, IRDMA_DEBUG_PUDA, "error dma_mem for buf\n"); kfree(buf_mem.va); rsrc->stats_buf_alloc_fail++; goto trysmall; } /* * dma_mem points to start of the large DMA chunk */ dma_mem = &buf->mem; spin_lock_irqsave(&rsrc->bufpool_lock, flags); for (i = 0; i < count; i++) { buf = ((struct irdma_puda_buf *)buf_mem.va) + i; buf->mem.va = (char *)dma_mem->va + (i * rsrc->buf_size); buf->mem.pa = dma_mem->pa + (i * rsrc->buf_size); buf->mem.size = rsrc->buf_size; buf->virtdma = virtdma; virtdma = true; buf->buf_mem.va = buf_mem.va; buf->buf_mem.size = buf_mem.size; list_add(&buf->list, &rsrc->bufpool); rsrc->alloc_buf_count++; if (!rsrc->alloclist) { rsrc->alloclist = buf; } else { nextbuf = rsrc->alloclist; rsrc->alloclist = buf; buf->next = nextbuf; } } spin_unlock_irqrestore(&rsrc->bufpool_lock, flags); rsrc->avail_buf_count = rsrc->alloc_buf_count; return 0; trysmall: for (i = 0; i < count; i++) { buf = irdma_puda_alloc_buf(rsrc->dev, rsrc->buf_size); if (!buf) { rsrc->stats_buf_alloc_fail++; return -ENOMEM; } irdma_puda_ret_bufpool(rsrc, buf); rsrc->alloc_buf_count++; if (!rsrc->alloclist) { rsrc->alloclist = buf; } else { nextbuf = rsrc->alloclist; rsrc->alloclist = buf; buf->next = nextbuf; } } rsrc->avail_buf_count = rsrc->alloc_buf_count; return 0; } /** * irdma_puda_create_rsrc - create resource (ilq or ieq) * @vsi: sc VSI struct * @info: resource information */ int irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi, struct irdma_puda_rsrc_info *info) { struct irdma_sc_dev *dev = vsi->dev; int ret = 0; struct irdma_puda_rsrc *rsrc; u32 pudasize; u32 sqwridsize, rqwridsize; struct irdma_virt_mem *vmem; info->count = 1; pudasize = sizeof(struct irdma_puda_rsrc); sqwridsize = info->sq_size * sizeof(struct irdma_sq_uk_wr_trk_info); rqwridsize = info->rq_size * 8; switch (info->type) { case IRDMA_PUDA_RSRC_TYPE_ILQ: vmem = &vsi->ilq_mem; break; case IRDMA_PUDA_RSRC_TYPE_IEQ: vmem = &vsi->ieq_mem; break; default: return -EOPNOTSUPP; } vmem->size = pudasize + sqwridsize + rqwridsize; vmem->va = kzalloc(vmem->size, GFP_KERNEL); if (!vmem->va) return -ENOMEM; rsrc = vmem->va; spin_lock_init(&rsrc->bufpool_lock); switch (info->type) { case IRDMA_PUDA_RSRC_TYPE_ILQ: vsi->ilq = vmem->va; vsi->ilq_count = info->count; rsrc->receive = info->receive; rsrc->xmit_complete = info->xmit_complete; break; case IRDMA_PUDA_RSRC_TYPE_IEQ: vsi->ieq_count = info->count; vsi->ieq = vmem->va; rsrc->receive = irdma_ieq_receive; rsrc->xmit_complete = irdma_ieq_tx_compl; break; default: return -EOPNOTSUPP; } rsrc->type = info->type; rsrc->sq_wrtrk_array = (struct irdma_sq_uk_wr_trk_info *) ((u8 *)vmem->va + pudasize); rsrc->rq_wrid_array = (u64 *)((u8 *)vmem->va + pudasize + sqwridsize); /* Initialize all ieq lists */ INIT_LIST_HEAD(&rsrc->bufpool); INIT_LIST_HEAD(&rsrc->txpend); rsrc->tx_wqe_avail_cnt = info->sq_size - 1; irdma_sc_pd_init(dev, &rsrc->sc_pd, info->pd_id, info->abi_ver); rsrc->qp_id = info->qp_id; rsrc->cq_id = info->cq_id; rsrc->sq_size = info->sq_size; rsrc->rq_size = info->rq_size; rsrc->cq_size = info->rq_size + info->sq_size; if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { if (rsrc->type == IRDMA_PUDA_RSRC_TYPE_ILQ) rsrc->cq_size += info->rq_size; } rsrc->buf_size = info->buf_size; rsrc->dev = dev; rsrc->vsi = vsi; rsrc->stats_idx = info->stats_idx; rsrc->stats_idx_valid = info->stats_idx_valid; ret = irdma_puda_cq_create(rsrc); if (!ret) { rsrc->cmpl = PUDA_CQ_CREATED; ret = irdma_puda_qp_create(rsrc); } if (ret) { irdma_debug(dev, IRDMA_DEBUG_PUDA, "error qp_create type=%d, status=%d\n", rsrc->type, ret); goto error; } rsrc->cmpl = PUDA_QP_CREATED; ret = irdma_puda_allocbufs(rsrc, info->tx_buf_cnt + info->rq_size); if (ret) { irdma_debug(dev, IRDMA_DEBUG_PUDA, "error alloc_buf\n"); goto error; } rsrc->rxq_invalid_cnt = info->rq_size; ret = irdma_puda_replenish_rq(rsrc, true); if (ret) goto error; if (info->type == IRDMA_PUDA_RSRC_TYPE_IEQ) { if (!irdma_init_hash_desc(&rsrc->hash_desc)) { rsrc->check_crc = true; rsrc->cmpl = PUDA_HASH_CRC_COMPLETE; ret = 0; } } irdma_sc_ccq_arm(&rsrc->cq); return ret; error: irdma_puda_dele_rsrc(vsi, info->type, false); return ret; } /** * irdma_ilq_putback_rcvbuf - ilq buffer to put back on rq * @qp: ilq's qp resource * @buf: puda buffer for rcv q * @wqe_idx: wqe index of completed rcvbuf */ static void irdma_ilq_putback_rcvbuf(struct irdma_sc_qp *qp, struct irdma_puda_buf *buf, u32 wqe_idx) { __le64 *wqe; u64 offset8, offset24; /* Synch buffer for use by device */ dma_sync_single_for_device(hw_to_dev(qp->dev->hw), buf->mem.pa, buf->mem.size, DMA_BIDIRECTIONAL); wqe = qp->qp_uk.rq_base[wqe_idx].elem; get_64bit_val(wqe, IRDMA_BYTE_24, &offset24); if (qp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { get_64bit_val(wqe, IRDMA_BYTE_8, &offset8); if (offset24) offset8 &= ~FIELD_PREP(IRDMAQPSQ_VALID, 1); else offset8 |= FIELD_PREP(IRDMAQPSQ_VALID, 1); set_64bit_val(wqe, IRDMA_BYTE_8, offset8); irdma_wmb(); /* make sure WQE is written before valid bit is set */ } if (offset24) offset24 = 0; else offset24 = FIELD_PREP(IRDMAQPSQ_VALID, 1); set_64bit_val(wqe, IRDMA_BYTE_24, offset24); } /** * irdma_ieq_get_fpdu_len - get length of fpdu with or without marker * @pfpdu: pointer to fpdu * @datap: pointer to data in the buffer * @rcv_seq: seqnum of the data buffer */ static u16 irdma_ieq_get_fpdu_len(struct irdma_pfpdu *pfpdu, u8 *datap, u32 rcv_seq){ u32 marker_seq, end_seq, blk_start; u8 marker_len = pfpdu->marker_len; u16 total_len = 0; u16 fpdu_len; blk_start = (pfpdu->rcv_start_seq - rcv_seq) & (IRDMA_MRK_BLK_SZ - 1); if (!blk_start) { total_len = marker_len; marker_seq = rcv_seq + IRDMA_MRK_BLK_SZ; if (marker_len && *(u32 *)datap) return 0; } else { marker_seq = rcv_seq + blk_start; } datap += total_len; fpdu_len = IRDMA_NTOHS(*(__be16 *) datap); fpdu_len += IRDMA_IEQ_MPA_FRAMING; fpdu_len = (fpdu_len + 3) & 0xfffc; if (fpdu_len > pfpdu->max_fpdu_data) return 0; total_len += fpdu_len; end_seq = rcv_seq + total_len; while ((int)(marker_seq - end_seq) < 0) { total_len += marker_len; end_seq += marker_len; marker_seq += IRDMA_MRK_BLK_SZ; } return total_len; } /** * irdma_ieq_copy_to_txbuf - copydata from rcv buf to tx buf * @buf: rcv buffer with partial * @txbuf: tx buffer for sending back * @buf_offset: rcv buffer offset to copy from * @txbuf_offset: at offset in tx buf to copy * @len: length of data to copy */ static void irdma_ieq_copy_to_txbuf(struct irdma_puda_buf *buf, struct irdma_puda_buf *txbuf, u16 buf_offset, u32 txbuf_offset, u32 len) { void *mem1 = (u8 *)buf->mem.va + buf_offset; void *mem2 = (u8 *)txbuf->mem.va + txbuf_offset; irdma_memcpy(mem2, mem1, len); } /** * irdma_ieq_setup_tx_buf - setup tx buffer for partial handling * @buf: reeive buffer with partial * @txbuf: buffer to prepare */ static void irdma_ieq_setup_tx_buf(struct irdma_puda_buf *buf, struct irdma_puda_buf *txbuf) { txbuf->tcphlen = buf->tcphlen; txbuf->ipv4 = buf->ipv4; if (buf->vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { txbuf->hdrlen = txbuf->tcphlen; irdma_ieq_copy_to_txbuf(buf, txbuf, IRDMA_TCP_OFFSET, 0, txbuf->hdrlen); } else { txbuf->maclen = buf->maclen; txbuf->hdrlen = buf->hdrlen; irdma_ieq_copy_to_txbuf(buf, txbuf, 0, 0, buf->hdrlen); } } /** * irdma_ieq_check_first_buf - check if rcv buffer's seq is in range * @buf: receive exception buffer * @fps: first partial sequence number */ static void irdma_ieq_check_first_buf(struct irdma_puda_buf *buf, u32 fps) { u32 offset; if (buf->seqnum < fps) { offset = fps - buf->seqnum; if (offset > buf->datalen) return; buf->data += offset; buf->datalen -= (u16)offset; buf->seqnum = fps; } } /** * irdma_ieq_compl_pfpdu - write txbuf with full fpdu * @ieq: ieq resource * @rxlist: ieq's received buffer list * @pbufl: temporary list for buffers for fpddu * @txbuf: tx buffer for fpdu * @fpdu_len: total length of fpdu */ static void irdma_ieq_compl_pfpdu(struct irdma_puda_rsrc *ieq, struct list_head *rxlist, struct list_head *pbufl, struct irdma_puda_buf *txbuf, u16 fpdu_len) { struct irdma_puda_buf *buf; u32 nextseqnum; u16 txoffset, bufoffset; buf = irdma_puda_get_listbuf(pbufl); if (!buf) return; nextseqnum = buf->seqnum + fpdu_len; irdma_ieq_setup_tx_buf(buf, txbuf); if (buf->vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { txoffset = txbuf->hdrlen; txbuf->totallen = txbuf->hdrlen + fpdu_len; txbuf->data = (u8 *)txbuf->mem.va + txoffset; } else { txoffset = buf->hdrlen; txbuf->totallen = buf->hdrlen + fpdu_len; txbuf->data = (u8 *)txbuf->mem.va + buf->hdrlen; } bufoffset = (u16)(buf->data - (u8 *)buf->mem.va); do { if (buf->datalen >= fpdu_len) { /* copied full fpdu */ irdma_ieq_copy_to_txbuf(buf, txbuf, bufoffset, txoffset, fpdu_len); buf->datalen -= fpdu_len; buf->data += fpdu_len; buf->seqnum = nextseqnum; break; } /* copy partial fpdu */ irdma_ieq_copy_to_txbuf(buf, txbuf, bufoffset, txoffset, buf->datalen); txoffset += buf->datalen; fpdu_len -= buf->datalen; irdma_puda_ret_bufpool(ieq, buf); buf = irdma_puda_get_listbuf(pbufl); if (!buf) return; bufoffset = (u16)(buf->data - (u8 *)buf->mem.va); } while (1); /* last buffer on the list */ if (buf->datalen) list_add(&buf->list, rxlist); else irdma_puda_ret_bufpool(ieq, buf); } /** * irdma_ieq_create_pbufl - create buffer list for single fpdu * @pfpdu: pointer to fpdu * @rxlist: resource list for receive ieq buffes * @pbufl: temp. list for buffers for fpddu * @buf: first receive buffer * @fpdu_len: total length of fpdu */ static int irdma_ieq_create_pbufl(struct irdma_pfpdu *pfpdu, struct list_head *rxlist, struct list_head *pbufl, struct irdma_puda_buf *buf, u16 fpdu_len) { int status = 0; struct irdma_puda_buf *nextbuf; u32 nextseqnum; u16 plen = fpdu_len - buf->datalen; bool done = false; nextseqnum = buf->seqnum + buf->datalen; do { nextbuf = irdma_puda_get_listbuf(rxlist); if (!nextbuf) { status = -ENOBUFS; break; } list_add_tail(&nextbuf->list, pbufl); if (nextbuf->seqnum != nextseqnum) { pfpdu->bad_seq_num++; status = -ERANGE; break; } if (nextbuf->datalen >= plen) { done = true; } else { plen -= nextbuf->datalen; nextseqnum = nextbuf->seqnum + nextbuf->datalen; } } while (!done); return status; } /** * irdma_ieq_handle_partial - process partial fpdu buffer * @ieq: ieq resource * @pfpdu: partial management per user qp * @buf: receive buffer * @fpdu_len: fpdu len in the buffer */ static int irdma_ieq_handle_partial(struct irdma_puda_rsrc *ieq, struct irdma_pfpdu *pfpdu, struct irdma_puda_buf *buf, u16 fpdu_len) { int status = 0; u8 *crcptr; u32 mpacrc; u32 seqnum = buf->seqnum; struct list_head pbufl; /* partial buffer list */ struct irdma_puda_buf *txbuf = NULL; struct list_head *rxlist = &pfpdu->rxlist; ieq->partials_handled++; INIT_LIST_HEAD(&pbufl); list_add(&buf->list, &pbufl); status = irdma_ieq_create_pbufl(pfpdu, rxlist, &pbufl, buf, fpdu_len); if (status) goto error; txbuf = irdma_puda_get_bufpool(ieq); if (!txbuf) { pfpdu->no_tx_bufs++; status = -ENOBUFS; goto error; } irdma_ieq_compl_pfpdu(ieq, rxlist, &pbufl, txbuf, fpdu_len); irdma_ieq_update_tcpip_info(txbuf, fpdu_len, seqnum); crcptr = txbuf->data + fpdu_len - 4; mpacrc = *(u32 *)crcptr; if (ieq->check_crc) { status = irdma_ieq_check_mpacrc(ieq->hash_desc, txbuf->data, (fpdu_len - 4), mpacrc); if (status) { irdma_debug(ieq->dev, IRDMA_DEBUG_IEQ, "error bad crc\n"); pfpdu->mpa_crc_err = true; goto error; } } irdma_debug_buf(ieq->dev, IRDMA_DEBUG_IEQ, "IEQ TX BUFFER", txbuf->mem.va, txbuf->totallen); if (ieq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) txbuf->ah_id = pfpdu->ah->ah_info.ah_idx; txbuf->do_lpb = true; irdma_puda_send_buf(ieq, txbuf); pfpdu->rcv_nxt = seqnum + fpdu_len; return status; error: while (!list_empty(&pbufl)) { buf = (struct irdma_puda_buf *)(&pbufl)->prev; list_move(&buf->list, rxlist); } if (txbuf) irdma_puda_ret_bufpool(ieq, txbuf); return status; } /** * irdma_ieq_process_buf - process buffer rcvd for ieq * @ieq: ieq resource * @pfpdu: partial management per user qp * @buf: receive buffer */ static int irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq, struct irdma_pfpdu *pfpdu, struct irdma_puda_buf *buf) { u16 fpdu_len = 0; u16 datalen = buf->datalen; u8 *datap = buf->data; u8 *crcptr; u16 ioffset = 0; u32 mpacrc; u32 seqnum = buf->seqnum; u16 len = 0; u16 full = 0; bool partial = false; struct irdma_puda_buf *txbuf; struct list_head *rxlist = &pfpdu->rxlist; int ret = 0; ioffset = (u16)(buf->data - (u8 *)buf->mem.va); while (datalen) { fpdu_len = irdma_ieq_get_fpdu_len(pfpdu, datap, buf->seqnum); if (!fpdu_len) { irdma_debug(ieq->dev, IRDMA_DEBUG_IEQ, "error bad fpdu len\n"); list_add(&buf->list, rxlist); pfpdu->mpa_crc_err = true; return -EINVAL; } if (datalen < fpdu_len) { partial = true; break; } crcptr = datap + fpdu_len - 4; mpacrc = *(u32 *)crcptr; if (ieq->check_crc) ret = irdma_ieq_check_mpacrc(ieq->hash_desc, datap, fpdu_len - 4, mpacrc); if (ret) { list_add(&buf->list, rxlist); irdma_debug(ieq->dev, IRDMA_DEBUG_ERR, "IRDMA_ERR_MPA_CRC\n"); pfpdu->mpa_crc_err = true; return ret; } full++; pfpdu->fpdu_processed++; ieq->fpdu_processed++; datap += fpdu_len; len += fpdu_len; datalen -= fpdu_len; } if (full) { /* copy full pdu's in the txbuf and send them out */ txbuf = irdma_puda_get_bufpool(ieq); if (!txbuf) { pfpdu->no_tx_bufs++; list_add(&buf->list, rxlist); return -ENOBUFS; } /* modify txbuf's buffer header */ irdma_ieq_setup_tx_buf(buf, txbuf); /* copy full fpdu's to new buffer */ if (ieq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { irdma_ieq_copy_to_txbuf(buf, txbuf, ioffset, txbuf->hdrlen, len); txbuf->totallen = txbuf->hdrlen + len; txbuf->ah_id = pfpdu->ah->ah_info.ah_idx; } else { irdma_ieq_copy_to_txbuf(buf, txbuf, ioffset, buf->hdrlen, len); txbuf->totallen = buf->hdrlen + len; } irdma_ieq_update_tcpip_info(txbuf, len, buf->seqnum); irdma_debug_buf(ieq->dev, IRDMA_DEBUG_IEQ, "IEQ TX BUFFER", txbuf->mem.va, txbuf->totallen); txbuf->do_lpb = true; irdma_puda_send_buf(ieq, txbuf); if (!datalen) { pfpdu->rcv_nxt = buf->seqnum + len; irdma_puda_ret_bufpool(ieq, buf); return 0; } buf->data = datap; buf->seqnum = seqnum + len; buf->datalen = datalen; pfpdu->rcv_nxt = buf->seqnum; } if (partial) return irdma_ieq_handle_partial(ieq, pfpdu, buf, fpdu_len); return 0; } /** * irdma_ieq_process_fpdus - process fpdu's buffers on its list * @qp: qp for which partial fpdus * @ieq: ieq resource */ void irdma_ieq_process_fpdus(struct irdma_sc_qp *qp, struct irdma_puda_rsrc *ieq) { struct irdma_pfpdu *pfpdu = &qp->pfpdu; struct list_head *rxlist = &pfpdu->rxlist; struct irdma_puda_buf *buf; int status; do { if (list_empty(rxlist)) break; buf = irdma_puda_get_listbuf(rxlist); if (!buf) { irdma_debug(ieq->dev, IRDMA_DEBUG_IEQ, "error no buf\n"); break; } if (buf->seqnum != pfpdu->rcv_nxt) { /* This could be out of order or missing packet */ pfpdu->out_of_order++; list_add(&buf->list, rxlist); break; } /* keep processing buffers from the head of the list */ status = irdma_ieq_process_buf(ieq, pfpdu, buf); if (status && pfpdu->mpa_crc_err) { while (!list_empty(rxlist)) { buf = irdma_puda_get_listbuf(rxlist); irdma_puda_ret_bufpool(ieq, buf); pfpdu->crc_err++; ieq->crc_err++; } /* create CQP for AE */ irdma_ieq_mpa_crc_ae(ieq->dev, qp); } } while (!status); } /** * irdma_ieq_create_ah - create an address handle for IEQ * @qp: qp pointer * @buf: buf received on IEQ used to create AH */ static int irdma_ieq_create_ah(struct irdma_sc_qp *qp, struct irdma_puda_buf *buf) { struct irdma_ah_info ah_info = {0}; qp->pfpdu.ah_buf = buf; irdma_puda_ieq_get_ah_info(qp, &ah_info); return irdma_puda_create_ah(qp->vsi->dev, &ah_info, false, IRDMA_PUDA_RSRC_TYPE_IEQ, qp, &qp->pfpdu.ah); } /** * irdma_ieq_handle_exception - handle qp's exception * @ieq: ieq resource * @qp: qp receiving excpetion * @buf: receive buffer */ static void irdma_ieq_handle_exception(struct irdma_puda_rsrc *ieq, struct irdma_sc_qp *qp, struct irdma_puda_buf *buf) { struct irdma_pfpdu *pfpdu = &qp->pfpdu; u32 *hw_host_ctx = (u32 *)qp->hw_host_ctx; u32 rcv_wnd = hw_host_ctx[23]; /* first partial seq # in q2 */ u32 fps = *(u32 *)(qp->q2_buf + Q2_FPSN_OFFSET); struct list_head *rxlist = &pfpdu->rxlist; struct list_head *plist; struct irdma_puda_buf *tmpbuf = NULL; unsigned long flags = 0; u8 hw_rev = qp->dev->hw_attrs.uk_attrs.hw_rev; irdma_debug_buf(ieq->dev, IRDMA_DEBUG_IEQ, "IEQ RX BUFFER", buf->mem.va, buf->totallen); spin_lock_irqsave(&pfpdu->lock, flags); pfpdu->total_ieq_bufs++; if (pfpdu->mpa_crc_err) { pfpdu->crc_err++; goto error; } if (pfpdu->mode && fps != pfpdu->fps) { /* clean up qp as it is new partial sequence */ irdma_ieq_cleanup_qp(ieq, qp); irdma_debug(ieq->dev, IRDMA_DEBUG_IEQ, "restarting new partial\n"); pfpdu->mode = false; } if (!pfpdu->mode) { irdma_debug_buf(ieq->dev, IRDMA_DEBUG_IEQ, "Q2 BUFFER", (u64 *)qp->q2_buf, 128); /* First_Partial_Sequence_Number check */ pfpdu->rcv_nxt = fps; pfpdu->fps = fps; pfpdu->mode = true; pfpdu->max_fpdu_data = (buf->ipv4) ? (ieq->vsi->mtu - IRDMA_MTU_TO_MSS_IPV4) : (ieq->vsi->mtu - IRDMA_MTU_TO_MSS_IPV6); pfpdu->pmode_count++; ieq->pmode_count++; INIT_LIST_HEAD(rxlist); irdma_ieq_check_first_buf(buf, fps); } if (!(rcv_wnd >= (buf->seqnum - pfpdu->rcv_nxt))) { pfpdu->bad_seq_num++; ieq->bad_seq_num++; goto error; } if (!list_empty(rxlist)) { tmpbuf = (struct irdma_puda_buf *)(rxlist)->next; while ((struct list_head *)tmpbuf != rxlist) { if (buf->seqnum == tmpbuf->seqnum) goto error; if ((int)(buf->seqnum - tmpbuf->seqnum) < 0) break; plist = &tmpbuf->list; tmpbuf = (struct irdma_puda_buf *)(plist)->next; } /* Insert buf before tmpbuf */ list_add_tail(&buf->list, &tmpbuf->list); } else { list_add_tail(&buf->list, rxlist); } pfpdu->nextseqnum = buf->seqnum + buf->datalen; pfpdu->lastrcv_buf = buf; if (hw_rev >= IRDMA_GEN_2 && !pfpdu->ah) { irdma_ieq_create_ah(qp, buf); if (!pfpdu->ah) goto error; goto exit; } if (hw_rev == IRDMA_GEN_1) irdma_ieq_process_fpdus(qp, ieq); else if (pfpdu->ah && pfpdu->ah->ah_info.ah_valid) irdma_ieq_process_fpdus(qp, ieq); exit: spin_unlock_irqrestore(&pfpdu->lock, flags); return; error: irdma_puda_ret_bufpool(ieq, buf); spin_unlock_irqrestore(&pfpdu->lock, flags); } /** * irdma_ieq_receive - received exception buffer * @vsi: VSI of device * @buf: exception buffer received */ static void irdma_ieq_receive(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *buf) { struct irdma_puda_rsrc *ieq = vsi->ieq; struct irdma_sc_qp *qp = NULL; u32 wqe_idx = ieq->compl_rxwqe_idx; qp = irdma_ieq_get_qp(vsi->dev, buf); if (!qp) { ieq->stats_bad_qp_id++; irdma_puda_ret_bufpool(ieq, buf); } else { irdma_ieq_handle_exception(ieq, qp, buf); } /* * ieq->rx_wqe_idx is used by irdma_puda_replenish_rq() on which wqe_idx to start replenish rq */ if (!ieq->rxq_invalid_cnt) ieq->rx_wqe_idx = wqe_idx; ieq->rxq_invalid_cnt++; } /** * irdma_ieq_tx_compl - put back after sending completed exception buffer * @vsi: sc VSI struct * @sqwrid: pointer to puda buffer */ static void irdma_ieq_tx_compl(struct irdma_sc_vsi *vsi, void *sqwrid) { struct irdma_puda_rsrc *ieq = vsi->ieq; struct irdma_puda_buf *buf = sqwrid; irdma_puda_ret_bufpool(ieq, buf); } /** * irdma_ieq_cleanup_qp - qp is being destroyed * @ieq: ieq resource * @qp: all pending fpdu buffers */ void irdma_ieq_cleanup_qp(struct irdma_puda_rsrc *ieq, struct irdma_sc_qp *qp) { struct irdma_puda_buf *buf; struct irdma_pfpdu *pfpdu = &qp->pfpdu; struct list_head *rxlist = &pfpdu->rxlist; if (qp->pfpdu.ah) { irdma_puda_free_ah(ieq->dev, qp->pfpdu.ah); qp->pfpdu.ah = NULL; qp->pfpdu.ah_buf = NULL; } if (!pfpdu->mode) return; while (!list_empty(rxlist)) { buf = irdma_puda_get_listbuf(rxlist); irdma_puda_ret_bufpool(ieq, buf); } } diff --git a/sys/dev/irdma/irdma_puda.h b/sys/dev/irdma/irdma_puda.h index 20fb9df9a25e..c853047d20f5 100644 --- a/sys/dev/irdma/irdma_puda.h +++ b/sys/dev/irdma/irdma_puda.h @@ -1,220 +1,220 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef IRDMA_PUDA_H #define IRDMA_PUDA_H #define IRDMA_IEQ_MPA_FRAMING 6 #define IRDMA_TCP_OFFSET 40 #define IRDMA_IPV4_PAD 20 #define IRDMA_MRK_BLK_SZ 512 enum puda_rsrc_type { IRDMA_PUDA_RSRC_TYPE_ILQ = 1, IRDMA_PUDA_RSRC_TYPE_IEQ, IRDMA_PUDA_RSRC_TYPE_MAX, /* Must be last entry */ }; enum puda_rsrc_complete { PUDA_CQ_CREATED = 1, PUDA_QP_CREATED, PUDA_TX_COMPLETE, PUDA_RX_COMPLETE, PUDA_HASH_CRC_COMPLETE, }; struct irdma_sc_dev; struct irdma_sc_qp; struct irdma_sc_cq; struct irdma_puda_cmpl_info { struct irdma_qp_uk *qp; u8 q_type; u8 l3proto; u8 l4proto; u16 vlan; u32 payload_len; u32 compl_error; /* No_err=0, else major and minor err code */ u32 qp_id; u32 wqe_idx; bool ipv4:1; bool smac_valid:1; bool vlan_valid:1; - u8 smac[ETH_ALEN]; + u8 smac[ETHER_ADDR_LEN]; }; struct irdma_puda_send_info { u64 paddr; /* Physical address */ u32 len; u32 ah_id; u8 tcplen; u8 maclen; bool ipv4:1; bool do_lpb:1; void *scratch; }; struct irdma_puda_buf { struct list_head list; /* MUST be first entry */ struct irdma_dma_mem mem; /* DMA memory for the buffer */ struct irdma_puda_buf *next; /* for alloclist in rsrc struct */ struct irdma_virt_mem buf_mem; /* Buffer memory for this buffer */ void *scratch; u8 *iph; u8 *tcph; u8 *data; u16 datalen; u16 vlan_id; u8 tcphlen; /* tcp length in bytes */ u8 maclen; /* mac length in bytes */ u32 totallen; /* machlen+iphlen+tcphlen+datalen */ atomic_t refcount; u8 hdrlen; bool virtdma:1; bool ipv4:1; bool vlan_valid:1; bool do_lpb:1; /* Loopback buffer */ bool smac_valid:1; u32 seqnum; u32 ah_id; - u8 smac[ETH_ALEN]; + u8 smac[ETHER_ADDR_LEN]; struct irdma_sc_vsi *vsi; }; struct irdma_puda_rsrc_info { void (*receive)(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *buf); void (*xmit_complete)(struct irdma_sc_vsi *vsi, void *sqwrid); enum puda_rsrc_type type; /* ILQ or IEQ */ u32 count; u32 pd_id; u32 cq_id; u32 qp_id; u32 sq_size; u32 rq_size; u32 tx_buf_cnt; /* total bufs allocated will be rq_size + tx_buf_cnt */ u16 buf_size; u16 mss; /* FIXME: Windows driver still using this */ u16 stats_idx; bool stats_idx_valid:1; int abi_ver; }; struct irdma_puda_rsrc { struct irdma_sc_cq cq; struct irdma_sc_qp qp; struct irdma_sc_pd sc_pd; struct irdma_sc_dev *dev; struct irdma_sc_vsi *vsi; struct irdma_dma_mem cqmem; struct irdma_dma_mem qpmem; struct irdma_virt_mem ilq_mem; enum puda_rsrc_complete cmpl; enum puda_rsrc_type type; u16 buf_size; /*buf must be max datalen + tcpip hdr + mac */ u32 cq_id; u32 qp_id; u32 sq_size; u32 rq_size; u32 cq_size; struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array; u64 *rq_wrid_array; u32 compl_rxwqe_idx; u32 rx_wqe_idx; u32 rxq_invalid_cnt; u32 tx_wqe_avail_cnt; void *hash_desc; struct list_head txpend; struct list_head bufpool; /* free buffers pool list for recv and xmit */ u32 alloc_buf_count; u32 avail_buf_count; /* snapshot of currently available buffers */ spinlock_t bufpool_lock; struct irdma_puda_buf *alloclist; void (*receive)(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *buf); void (*xmit_complete)(struct irdma_sc_vsi *vsi, void *sqwrid); /* puda stats */ u64 stats_buf_alloc_fail; u64 stats_pkt_rcvd; u64 stats_pkt_sent; u64 stats_rcvd_pkt_err; u64 stats_sent_pkt_q; u64 stats_bad_qp_id; /* IEQ stats */ u64 fpdu_processed; u64 bad_seq_num; u64 crc_err; u64 pmode_count; u64 partials_handled; u16 mss; /* FIXME: Windows driver still using this */ u16 stats_idx; bool check_crc:1; bool stats_idx_valid:1; }; struct irdma_puda_buf *irdma_puda_get_bufpool(struct irdma_puda_rsrc *rsrc); void irdma_puda_ret_bufpool(struct irdma_puda_rsrc *rsrc, struct irdma_puda_buf *buf); void irdma_puda_send_buf(struct irdma_puda_rsrc *rsrc, struct irdma_puda_buf *buf); int irdma_puda_send(struct irdma_sc_qp *qp, struct irdma_puda_send_info *info); int irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi, struct irdma_puda_rsrc_info *info); void irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type, bool reset); int irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq, u32 *compl_err); struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev, struct irdma_puda_buf *buf); int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, struct irdma_puda_buf *buf); int irdma_ieq_check_mpacrc(void *desc, void *addr, u32 len, u32 val); int irdma_init_hash_desc(void **desc); void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp); void irdma_free_hash_desc(void *desc); void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, u32 seqnum); int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp); int irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq); int irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp); void irdma_cqp_cq_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq); void irdma_puda_ieq_get_ah_info(struct irdma_sc_qp *qp, struct irdma_ah_info *ah_info); int irdma_puda_create_ah(struct irdma_sc_dev *dev, struct irdma_ah_info *ah_info, bool wait, enum puda_rsrc_type type, void *cb_param, struct irdma_sc_ah **ah); void irdma_puda_free_ah(struct irdma_sc_dev *dev, struct irdma_sc_ah *ah); void irdma_ieq_process_fpdus(struct irdma_sc_qp *qp, struct irdma_puda_rsrc *ieq); void irdma_ieq_cleanup_qp(struct irdma_puda_rsrc *ieq, struct irdma_sc_qp *qp); #endif /*IRDMA_PROTOS_H */ diff --git a/sys/dev/irdma/irdma_type.h b/sys/dev/irdma/irdma_type.h index ac9860c956d4..9d1242dafea4 100644 --- a/sys/dev/irdma/irdma_type.h +++ b/sys/dev/irdma/irdma_type.h @@ -1,1531 +1,1528 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef IRDMA_TYPE_H #define IRDMA_TYPE_H #include "osdep.h" #include "irdma.h" #include "irdma_user.h" #include "irdma_hmc.h" #include "irdma_uda.h" #include "irdma_ws.h" enum irdma_debug_flag { IRDMA_DEBUG_NONE = 0x00000000, IRDMA_DEBUG_ERR = 0x00000001, IRDMA_DEBUG_INIT = 0x00000002, IRDMA_DEBUG_DEV = 0x00000004, IRDMA_DEBUG_CM = 0x00000008, IRDMA_DEBUG_VERBS = 0x00000010, IRDMA_DEBUG_PUDA = 0x00000020, IRDMA_DEBUG_ILQ = 0x00000040, IRDMA_DEBUG_IEQ = 0x00000080, IRDMA_DEBUG_QP = 0x00000100, IRDMA_DEBUG_CQ = 0x00000200, IRDMA_DEBUG_MR = 0x00000400, IRDMA_DEBUG_PBLE = 0x00000800, IRDMA_DEBUG_WQE = 0x00001000, IRDMA_DEBUG_AEQ = 0x00002000, IRDMA_DEBUG_CQP = 0x00004000, IRDMA_DEBUG_HMC = 0x00008000, IRDMA_DEBUG_USER = 0x00010000, IRDMA_DEBUG_VIRT = 0x00020000, IRDMA_DEBUG_DCB = 0x00040000, IRDMA_DEBUG_CQE = 0x00800000, IRDMA_DEBUG_CLNT = 0x01000000, IRDMA_DEBUG_WS = 0x02000000, IRDMA_DEBUG_STATS = 0x04000000, IRDMA_DEBUG_ALL = 0xFFFFFFFF, }; enum irdma_page_size { IRDMA_PAGE_SIZE_4K = 0, IRDMA_PAGE_SIZE_2M, IRDMA_PAGE_SIZE_1G, }; enum irdma_hdrct_flags { DDP_LEN_FLAG = 0x80, DDP_HDR_FLAG = 0x40, RDMA_HDR_FLAG = 0x20, }; enum irdma_term_layers { LAYER_RDMA = 0, LAYER_DDP = 1, LAYER_MPA = 2, }; enum irdma_term_error_types { RDMAP_REMOTE_PROT = 1, RDMAP_REMOTE_OP = 2, DDP_CATASTROPHIC = 0, DDP_TAGGED_BUF = 1, DDP_UNTAGGED_BUF = 2, DDP_LLP = 3, }; enum irdma_term_rdma_errors { RDMAP_INV_STAG = 0x00, RDMAP_INV_BOUNDS = 0x01, RDMAP_ACCESS = 0x02, RDMAP_UNASSOC_STAG = 0x03, RDMAP_TO_WRAP = 0x04, RDMAP_INV_RDMAP_VER = 0x05, RDMAP_UNEXPECTED_OP = 0x06, RDMAP_CATASTROPHIC_LOCAL = 0x07, RDMAP_CATASTROPHIC_GLOBAL = 0x08, RDMAP_CANT_INV_STAG = 0x09, RDMAP_UNSPECIFIED = 0xff, }; enum irdma_term_ddp_errors { DDP_CATASTROPHIC_LOCAL = 0x00, DDP_TAGGED_INV_STAG = 0x00, DDP_TAGGED_BOUNDS = 0x01, DDP_TAGGED_UNASSOC_STAG = 0x02, DDP_TAGGED_TO_WRAP = 0x03, DDP_TAGGED_INV_DDP_VER = 0x04, DDP_UNTAGGED_INV_QN = 0x01, DDP_UNTAGGED_INV_MSN_NO_BUF = 0x02, DDP_UNTAGGED_INV_MSN_RANGE = 0x03, DDP_UNTAGGED_INV_MO = 0x04, DDP_UNTAGGED_INV_TOO_LONG = 0x05, DDP_UNTAGGED_INV_DDP_VER = 0x06, }; enum irdma_term_mpa_errors { MPA_CLOSED = 0x01, MPA_CRC = 0x02, MPA_MARKER = 0x03, MPA_REQ_RSP = 0x04, }; enum irdma_hw_stats_index { /* gen1 - 32-bit */ IRDMA_HW_STAT_INDEX_IP4RXDISCARD = 0, IRDMA_HW_STAT_INDEX_IP4RXTRUNC = 1, IRDMA_HW_STAT_INDEX_IP4TXNOROUTE = 2, IRDMA_HW_STAT_INDEX_IP6RXDISCARD = 3, IRDMA_HW_STAT_INDEX_IP6RXTRUNC = 4, IRDMA_HW_STAT_INDEX_IP6TXNOROUTE = 5, IRDMA_HW_STAT_INDEX_TCPRTXSEG = 6, IRDMA_HW_STAT_INDEX_TCPRXOPTERR = 7, IRDMA_HW_STAT_INDEX_TCPRXPROTOERR = 8, IRDMA_HW_STAT_INDEX_RXVLANERR = 9, /* gen1 - 64-bit */ IRDMA_HW_STAT_INDEX_IP4RXOCTS = 10, IRDMA_HW_STAT_INDEX_IP4RXPKTS = 11, IRDMA_HW_STAT_INDEX_IP4RXFRAGS = 12, IRDMA_HW_STAT_INDEX_IP4RXMCPKTS = 13, IRDMA_HW_STAT_INDEX_IP4TXOCTS = 14, IRDMA_HW_STAT_INDEX_IP4TXPKTS = 15, IRDMA_HW_STAT_INDEX_IP4TXFRAGS = 16, IRDMA_HW_STAT_INDEX_IP4TXMCPKTS = 17, IRDMA_HW_STAT_INDEX_IP6RXOCTS = 18, IRDMA_HW_STAT_INDEX_IP6RXPKTS = 19, IRDMA_HW_STAT_INDEX_IP6RXFRAGS = 20, IRDMA_HW_STAT_INDEX_IP6RXMCPKTS = 21, IRDMA_HW_STAT_INDEX_IP6TXOCTS = 22, IRDMA_HW_STAT_INDEX_IP6TXPKTS = 23, IRDMA_HW_STAT_INDEX_IP6TXFRAGS = 24, IRDMA_HW_STAT_INDEX_IP6TXMCPKTS = 25, IRDMA_HW_STAT_INDEX_TCPRXSEGS = 26, IRDMA_HW_STAT_INDEX_TCPTXSEG = 27, IRDMA_HW_STAT_INDEX_RDMARXRDS = 28, IRDMA_HW_STAT_INDEX_RDMARXSNDS = 29, IRDMA_HW_STAT_INDEX_RDMARXWRS = 30, IRDMA_HW_STAT_INDEX_RDMATXRDS = 31, IRDMA_HW_STAT_INDEX_RDMATXSNDS = 32, IRDMA_HW_STAT_INDEX_RDMATXWRS = 33, IRDMA_HW_STAT_INDEX_RDMAVBND = 34, IRDMA_HW_STAT_INDEX_RDMAVINV = 35, IRDMA_HW_STAT_INDEX_IP4RXMCOCTS = 36, IRDMA_HW_STAT_INDEX_IP4TXMCOCTS = 37, IRDMA_HW_STAT_INDEX_IP6RXMCOCTS = 38, IRDMA_HW_STAT_INDEX_IP6TXMCOCTS = 39, IRDMA_HW_STAT_INDEX_UDPRXPKTS = 40, IRDMA_HW_STAT_INDEX_UDPTXPKTS = 41, IRDMA_HW_STAT_INDEX_MAX_GEN_1 = 42, /* Must be same value as next entry */ /* gen2 - 64-bit */ IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS = 42, /* gen2 - 32-bit */ IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED = 43, IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED = 44, IRDMA_HW_STAT_INDEX_TXNPCNPSENT = 45, IRDMA_HW_STAT_INDEX_MAX_GEN_2 = 46, }; #define IRDMA_MIN_FEATURES 2 enum irdma_feature_type { IRDMA_FEATURE_FW_INFO = 0, IRDMA_HW_VERSION_INFO = 1, IRDMA_QSETS_MAX = 26, IRDMA_MAX_FEATURES, /* Must be last entry */ }; enum irdma_sched_prio_type { IRDMA_PRIO_WEIGHTED_RR = 1, IRDMA_PRIO_STRICT = 2, IRDMA_PRIO_WEIGHTED_STRICT = 3, }; enum irdma_vm_vf_type { IRDMA_VF_TYPE = 0, IRDMA_VM_TYPE, IRDMA_PF_TYPE, }; enum irdma_cqp_hmc_profile { IRDMA_HMC_PROFILE_DEFAULT = 1, IRDMA_HMC_PROFILE_FAVOR_VF = 2, IRDMA_HMC_PROFILE_EQUAL = 3, }; enum irdma_quad_entry_type { IRDMA_QHASH_TYPE_TCP_ESTABLISHED = 1, IRDMA_QHASH_TYPE_TCP_SYN, IRDMA_QHASH_TYPE_UDP_UNICAST, IRDMA_QHASH_TYPE_UDP_MCAST, IRDMA_QHASH_TYPE_ROCE_MCAST, IRDMA_QHASH_TYPE_ROCEV2_HW, }; enum irdma_quad_hash_manage_type { IRDMA_QHASH_MANAGE_TYPE_DELETE = 0, IRDMA_QHASH_MANAGE_TYPE_ADD, IRDMA_QHASH_MANAGE_TYPE_MODIFY, }; enum irdma_syn_rst_handling { IRDMA_SYN_RST_HANDLING_HW_TCP_SECURE = 0, IRDMA_SYN_RST_HANDLING_HW_TCP, IRDMA_SYN_RST_HANDLING_FW_TCP_SECURE, IRDMA_SYN_RST_HANDLING_FW_TCP, }; enum irdma_queue_type { IRDMA_QUEUE_TYPE_SQ_RQ = 0, IRDMA_QUEUE_TYPE_CQP, }; struct irdma_sc_dev; struct irdma_vsi_pestat; struct irdma_dcqcn_cc_params { u8 cc_cfg_valid; u8 min_dec_factor; u8 min_rate; u8 dcqcn_f; u16 rai_factor; u16 hai_factor; u16 dcqcn_t; u32 dcqcn_b; u32 rreduce_mperiod; }; struct irdma_cqp_init_info { u64 cqp_compl_ctx; u64 host_ctx_pa; u64 sq_pa; struct irdma_sc_dev *dev; struct irdma_cqp_quanta *sq; struct irdma_dcqcn_cc_params dcqcn_params; __le64 *host_ctx; u64 *scratch_array; u32 sq_size; u16 hw_maj_ver; u16 hw_min_ver; u8 struct_ver; u8 hmc_profile; u8 ena_vf_count; u8 ceqs_per_vf; bool en_datacenter_tcp:1; bool disable_packed:1; bool rocev2_rto_policy:1; bool en_rem_endpoint_trk:1; enum irdma_protocol_used protocol_used; }; struct irdma_terminate_hdr { u8 layer_etype; u8 error_code; u8 hdrct; u8 rsvd; }; struct irdma_cqp_sq_wqe { __le64 buf[IRDMA_CQP_WQE_SIZE]; }; struct irdma_sc_aeqe { __le64 buf[IRDMA_AEQE_SIZE]; }; struct irdma_ceqe { __le64 buf[IRDMA_CEQE_SIZE]; }; struct irdma_cqp_ctx { __le64 buf[IRDMA_CQP_CTX_SIZE]; }; struct irdma_cq_shadow_area { __le64 buf[IRDMA_SHADOW_AREA_SIZE]; }; struct irdma_dev_hw_stats { u64 stats_val[IRDMA_GATHER_STATS_BUF_SIZE / sizeof(u64)]; }; struct irdma_gather_stats { u64 val[IRDMA_GATHER_STATS_BUF_SIZE / sizeof(u64)]; }; struct irdma_hw_stat_map { u16 byteoff; u8 bitoff; u64 bitmask; }; struct irdma_stats_gather_info { bool use_hmc_fcn_index:1; bool use_stats_inst:1; u16 hmc_fcn_index; u16 stats_inst_index; struct irdma_dma_mem stats_buff_mem; void *gather_stats_va; void *last_gather_stats_va; }; struct irdma_vsi_pestat { struct irdma_hw *hw; struct irdma_dev_hw_stats hw_stats; struct irdma_stats_gather_info gather_info; struct OS_TIMER stats_timer; struct irdma_sc_vsi *vsi; spinlock_t lock; /* rdma stats lock */ }; struct irdma_hw { u8 IOMEM *hw_addr; u8 IOMEM *priv_hw_addr; void *dev_context; struct irdma_hmc_info hmc; }; struct irdma_pfpdu { struct list_head rxlist; u32 rcv_nxt; u32 fps; u32 max_fpdu_data; u32 nextseqnum; u32 rcv_start_seq; bool mode:1; bool mpa_crc_err:1; u8 marker_len; u64 total_ieq_bufs; u64 fpdu_processed; u64 bad_seq_num; u64 crc_err; u64 no_tx_bufs; u64 tx_err; u64 out_of_order; u64 pmode_count; struct irdma_sc_ah *ah; struct irdma_puda_buf *ah_buf; spinlock_t lock; /* fpdu processing lock */ struct irdma_puda_buf *lastrcv_buf; }; struct irdma_sc_pd { struct irdma_sc_dev *dev; u32 pd_id; int abi_ver; }; struct irdma_cqp_quanta { __le64 elem[IRDMA_CQP_WQE_SIZE]; }; struct irdma_sc_cqp { u32 size; u64 sq_pa; u64 host_ctx_pa; void *back_cqp; struct irdma_sc_dev *dev; int (*process_cqp_sds)(struct irdma_sc_dev *dev, struct irdma_update_sds_info *info); struct irdma_dma_mem sdbuf; struct irdma_ring sq_ring; struct irdma_cqp_quanta *sq_base; struct irdma_dcqcn_cc_params dcqcn_params; __le64 *host_ctx; u64 *scratch_array; + u64 requested_ops; + atomic64_t completed_ops; u32 cqp_id; u32 sq_size; u32 hw_sq_size; u16 hw_maj_ver; u16 hw_min_ver; u8 struct_ver; u8 polarity; u8 hmc_profile; u8 ena_vf_count; u8 timeout_count; u8 ceqs_per_vf; bool en_datacenter_tcp:1; bool disable_packed:1; bool rocev2_rto_policy:1; bool en_rem_endpoint_trk:1; enum irdma_protocol_used protocol_used; }; struct irdma_sc_aeq { u32 size; u64 aeq_elem_pa; struct irdma_sc_dev *dev; struct irdma_sc_aeqe *aeqe_base; void *pbl_list; u32 elem_cnt; struct irdma_ring aeq_ring; u8 pbl_chunk_size; u32 first_pm_pbl_idx; u32 msix_idx; u8 polarity; bool virtual_map:1; }; struct irdma_sc_ceq { u32 size; u64 ceq_elem_pa; struct irdma_sc_dev *dev; struct irdma_ceqe *ceqe_base; void *pbl_list; u32 ceq_id; u32 elem_cnt; struct irdma_ring ceq_ring; u8 pbl_chunk_size; u8 tph_val; u32 first_pm_pbl_idx; u8 polarity; struct irdma_sc_vsi *vsi; struct irdma_sc_cq **reg_cq; u32 reg_cq_size; spinlock_t req_cq_lock; /* protect access to reg_cq array */ bool virtual_map:1; bool tph_en:1; bool itr_no_expire:1; }; struct irdma_sc_cq { struct irdma_cq_uk cq_uk; u64 cq_pa; u64 shadow_area_pa; struct irdma_sc_dev *dev; struct irdma_sc_vsi *vsi; void *pbl_list; void *back_cq; u32 ceq_id; u32 shadow_read_threshold; u8 pbl_chunk_size; u8 cq_type; u8 tph_val; u32 first_pm_pbl_idx; bool ceqe_mask:1; bool virtual_map:1; bool check_overflow:1; bool ceq_id_valid:1; bool tph_en; }; struct irdma_sc_qp { struct irdma_qp_uk qp_uk; u64 sq_pa; u64 rq_pa; u64 hw_host_ctx_pa; u64 shadow_area_pa; u64 q2_pa; struct irdma_sc_dev *dev; struct irdma_sc_vsi *vsi; struct irdma_sc_pd *pd; __le64 *hw_host_ctx; void *llp_stream_handle; struct irdma_pfpdu pfpdu; u32 ieq_qp; u8 *q2_buf; u64 qp_compl_ctx; u32 push_idx; u16 qs_handle; u16 push_offset; u8 flush_wqes_count; u8 sq_tph_val; u8 rq_tph_val; u8 qp_state; u8 hw_sq_size; u8 hw_rq_size; u8 src_mac_addr_idx; bool on_qoslist:1; bool ieq_pass_thru:1; bool sq_tph_en:1; bool rq_tph_en:1; bool rcv_tph_en:1; bool xmit_tph_en:1; bool virtual_map:1; bool flush_sq:1; bool flush_rq:1; bool sq_flush_code:1; bool rq_flush_code:1; enum irdma_flush_opcode flush_code; enum irdma_qp_event_type event_type; u8 term_flags; u8 user_pri; struct list_head list; }; struct irdma_stats_inst_info { bool use_hmc_fcn_index; u16 hmc_fn_id; u16 stats_idx; }; struct irdma_up_info { u8 map[8]; u8 cnp_up_override; u16 hmc_fcn_idx; bool use_vlan:1; bool use_cnp_up_override:1; }; -#define IRDMA_MAX_WS_NODES 0x3FF +#define IRDMA_MAX_WS_NODES 0x3FF #define IRDMA_WS_NODE_INVALID 0xFFFF struct irdma_ws_node_info { u16 id; u16 vsi; u16 parent_id; u16 qs_handle; bool type_leaf:1; bool enable:1; u8 prio_type; u8 tc; u8 weight; }; struct irdma_hmc_fpm_misc { u32 max_ceqs; u32 max_sds; u32 xf_block_size; u32 q1_block_size; u32 ht_multiplier; u32 timer_bucket; u32 rrf_block_size; u32 ooiscf_block_size; }; #define IRDMA_LEAF_DEFAULT_REL_BW 64 #define IRDMA_PARENT_DEFAULT_REL_BW 1 struct irdma_qos { struct list_head qplist; struct mutex qos_mutex; /* protect QoS attributes per QoS level */ u32 l2_sched_node_id; u16 qs_handle; u8 traffic_class; u8 rel_bw; u8 prio_type; bool valid; }; struct irdma_config_check { bool config_ok:1; bool lfc_set:1; bool pfc_set:1; u8 traffic_class; u16 qs_handle; }; #define IRDMA_INVALID_STATS_IDX 0xff struct irdma_sc_vsi { u16 vsi_idx; struct irdma_sc_dev *dev; void *back_vsi; u32 ilq_count; struct irdma_virt_mem ilq_mem; struct irdma_puda_rsrc *ilq; u32 ieq_count; struct irdma_virt_mem ieq_mem; struct irdma_puda_rsrc *ieq; u32 exception_lan_q; u16 mtu; enum irdma_vm_vf_type vm_vf_type; bool stats_inst_alloc:1; bool tc_change_pending:1; bool mtu_change_pending:1; struct irdma_vsi_pestat *pestat; - ATOMIC qp_suspend_reqs; + atomic_t qp_suspend_reqs; int (*register_qset)(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node); void (*unregister_qset)(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node); struct irdma_config_check cfg_check[IRDMA_MAX_USER_PRIORITY]; bool tc_print_warning[IRDMA_MAX_TRAFFIC_CLASS]; u8 qos_rel_bw; u8 qos_prio_type; u16 stats_idx; u8 dscp_map[IRDMA_DSCP_NUM_VAL]; struct irdma_qos qos[IRDMA_MAX_USER_PRIORITY]; bool dscp_mode:1; }; struct irdma_sc_dev { struct list_head cqp_cmd_head; /* head of the CQP command list */ spinlock_t cqp_lock; /* protect CQP list access */ struct irdma_dma_mem vf_fpm_query_buf[IRDMA_MAX_PE_ENA_VF_COUNT]; u64 fpm_query_buf_pa; u64 fpm_commit_buf_pa; __le64 *fpm_query_buf; __le64 *fpm_commit_buf; struct irdma_hw *hw; u8 IOMEM *db_addr; u32 IOMEM *wqe_alloc_db; u32 IOMEM *cq_arm_db; u32 IOMEM *aeq_alloc_db; u32 IOMEM *cqp_db; u32 IOMEM *cq_ack_db; - u32 IOMEM *ceq_itr_mask_db; - u32 IOMEM *aeq_itr_mask_db; u32 IOMEM *hw_regs[IRDMA_MAX_REGS]; u32 ceq_itr; /* Interrupt throttle, usecs between interrupts: 0 disabled. 2 - 8160 */ u64 hw_masks[IRDMA_MAX_MASKS]; u8 hw_shifts[IRDMA_MAX_SHIFTS]; const struct irdma_hw_stat_map *hw_stats_map; u64 feature_info[IRDMA_MAX_FEATURES]; u64 cqp_cmd_stats[IRDMA_MAX_CQP_OPS]; struct irdma_hw_attrs hw_attrs; struct irdma_hmc_info *hmc_info; struct irdma_sc_cqp *cqp; struct irdma_sc_aeq *aeq; struct irdma_sc_ceq *ceq[IRDMA_CEQ_MAX_COUNT]; struct irdma_sc_cq *ccq; const struct irdma_irq_ops *irq_ops; struct irdma_hmc_fpm_misc hmc_fpm_misc; struct irdma_ws_node *ws_tree_root; struct mutex ws_mutex; /* ws tree mutex */ u32 debug_mask; u16 num_vfs; u16 hmc_fn_id; u8 vf_id; bool vchnl_up:1; bool ceq_valid:1; u8 pci_rev; int (*ws_add)(struct irdma_sc_vsi *vsi, u8 user_pri); void (*ws_remove)(struct irdma_sc_vsi *vsi, u8 user_pri); void (*ws_reset)(struct irdma_sc_vsi *vsi); }; struct irdma_modify_cq_info { u64 cq_pa; struct irdma_cqe *cq_base; u32 cq_size; u32 shadow_read_threshold; u8 pbl_chunk_size; u32 first_pm_pbl_idx; bool virtual_map:1; bool check_overflow; bool cq_resize:1; }; struct irdma_create_qp_info { bool ord_valid:1; bool tcp_ctx_valid:1; bool cq_num_valid:1; bool arp_cache_idx_valid:1; bool mac_valid:1; bool force_lpb; u8 next_iwarp_state; }; struct irdma_modify_qp_info { u64 rx_win0; u64 rx_win1; u16 new_mss; u8 next_iwarp_state; u8 curr_iwarp_state; u8 termlen; bool ord_valid:1; bool tcp_ctx_valid:1; bool udp_ctx_valid:1; bool cq_num_valid:1; bool arp_cache_idx_valid:1; bool reset_tcp_conn:1; bool remove_hash_idx:1; bool dont_send_term:1; bool dont_send_fin:1; bool cached_var_valid:1; bool mss_change:1; bool force_lpb:1; bool mac_valid:1; }; struct irdma_ccq_cqe_info { struct irdma_sc_cqp *cqp; u64 scratch; u32 op_ret_val; u16 maj_err_code; u16 min_err_code; u8 op_code; bool error; }; struct irdma_qos_tc_info { u64 tc_ctx; u8 rel_bw; u8 prio_type; u8 egress_virt_up; u8 ingress_virt_up; }; struct irdma_l2params { struct irdma_qos_tc_info tc_info[IRDMA_MAX_USER_PRIORITY]; u32 num_apps; u16 qs_handle_list[IRDMA_MAX_USER_PRIORITY]; u16 mtu; u8 up2tc[IRDMA_MAX_USER_PRIORITY]; u8 dscp_map[IRDMA_DSCP_NUM_VAL]; u8 num_tc; u8 vsi_rel_bw; u8 vsi_prio_type; bool mtu_changed:1; bool tc_changed:1; bool dscp_mode:1; }; struct irdma_vsi_init_info { struct irdma_sc_dev *dev; void *back_vsi; struct irdma_l2params *params; u16 exception_lan_q; u16 pf_data_vsi_num; enum irdma_vm_vf_type vm_vf_type; int (*register_qset)(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node); void (*unregister_qset)(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node); }; struct irdma_vsi_stats_info { struct irdma_vsi_pestat *pestat; u8 fcn_id; bool alloc_stats_inst; }; struct irdma_device_init_info { u64 fpm_query_buf_pa; u64 fpm_commit_buf_pa; __le64 *fpm_query_buf; __le64 *fpm_commit_buf; struct irdma_hw *hw; void IOMEM *bar0; u16 max_vfs; u16 hmc_fn_id; u32 debug_mask; }; struct irdma_ceq_init_info { u64 ceqe_pa; struct irdma_sc_dev *dev; u64 *ceqe_base; void *pbl_list; u32 elem_cnt; u32 ceq_id; bool virtual_map:1; bool tph_en:1; bool itr_no_expire:1; u8 pbl_chunk_size; u8 tph_val; u32 first_pm_pbl_idx; struct irdma_sc_vsi *vsi; struct irdma_sc_cq **reg_cq; - u32 reg_cq_idx; }; struct irdma_aeq_init_info { u64 aeq_elem_pa; struct irdma_sc_dev *dev; u32 *aeqe_base; void *pbl_list; u32 elem_cnt; bool virtual_map; u8 pbl_chunk_size; u32 first_pm_pbl_idx; u32 msix_idx; }; struct irdma_ccq_init_info { u64 cq_pa; u64 shadow_area_pa; struct irdma_sc_dev *dev; struct irdma_cqe *cq_base; __le64 *shadow_area; void *pbl_list; u32 num_elem; u32 ceq_id; u32 shadow_read_threshold; bool ceqe_mask:1; bool ceq_id_valid:1; bool avoid_mem_cflct:1; bool virtual_map:1; bool tph_en:1; u8 tph_val; u8 pbl_chunk_size; u32 first_pm_pbl_idx; struct irdma_sc_vsi *vsi; }; struct irdma_udp_offload_info { bool ipv4:1; bool insert_vlan_tag:1; u8 ttl; u8 tos; u16 src_port; u16 dst_port; u32 dest_ip_addr[4]; u32 snd_mss; u16 vlan_tag; u16 arp_idx; u32 flow_label; u8 udp_state; u32 psn_nxt; u32 lsn; u32 epsn; u32 psn_max; u32 psn_una; u32 local_ipaddr[4]; u32 cwnd; u8 rexmit_thresh; u8 rnr_nak_thresh; }; struct irdma_roce_offload_info { u16 p_key; u32 err_rq_idx; u32 qkey; u32 dest_qp; u8 roce_tver; u8 ack_credits; u8 err_rq_idx_valid; u32 pd_id; u16 ord_size; u16 ird_size; bool is_qp1:1; bool udprivcq_en:1; bool dcqcn_en:1; bool rcv_no_icrc:1; bool wr_rdresp_en:1; bool bind_en:1; bool fast_reg_en:1; bool priv_mode_en:1; bool rd_en:1; bool timely_en:1; bool dctcp_en:1; bool fw_cc_enable:1; bool use_stats_inst:1; u16 t_high; u16 t_low; u8 last_byte_sent; - u8 mac_addr[ETH_ALEN]; + u8 mac_addr[ETHER_ADDR_LEN]; u8 rtomin; }; struct irdma_iwarp_offload_info { u16 rcv_mark_offset; u16 snd_mark_offset; u8 ddp_ver; u8 rdmap_ver; u8 iwarp_mode; u32 err_rq_idx; u32 pd_id; u16 ord_size; u16 ird_size; bool ib_rd_en:1; bool align_hdrs:1; bool rcv_no_mpa_crc:1; bool err_rq_idx_valid:1; bool snd_mark_en:1; bool rcv_mark_en:1; bool wr_rdresp_en:1; bool bind_en:1; bool fast_reg_en:1; bool priv_mode_en:1; bool rd_en:1; bool timely_en:1; bool use_stats_inst:1; bool ecn_en:1; bool dctcp_en:1; u16 t_high; u16 t_low; u8 last_byte_sent; - u8 mac_addr[ETH_ALEN]; + u8 mac_addr[ETHER_ADDR_LEN]; u8 rtomin; }; struct irdma_tcp_offload_info { bool ipv4:1; bool no_nagle:1; bool insert_vlan_tag:1; bool time_stamp:1; bool drop_ooo_seg:1; bool avoid_stretch_ack:1; bool wscale:1; bool ignore_tcp_opt:1; bool ignore_tcp_uns_opt:1; u8 cwnd_inc_limit; u8 dup_ack_thresh; u8 ttl; u8 src_mac_addr_idx; u8 tos; u16 src_port; u16 dst_port; u32 dest_ip_addr[4]; //u32 dest_ip_addr0; //u32 dest_ip_addr1; //u32 dest_ip_addr2; //u32 dest_ip_addr3; u32 snd_mss; u16 syn_rst_handling; u16 vlan_tag; u16 arp_idx; u32 flow_label; u8 tcp_state; u8 snd_wscale; u8 rcv_wscale; u32 time_stamp_recent; u32 time_stamp_age; u32 snd_nxt; u32 snd_wnd; u32 rcv_nxt; u32 rcv_wnd; u32 snd_max; u32 snd_una; u32 srtt; u32 rtt_var; u32 ss_thresh; u32 cwnd; u32 snd_wl1; u32 snd_wl2; u32 max_snd_window; u8 rexmit_thresh; u32 local_ipaddr[4]; }; struct irdma_qp_host_ctx_info { u64 qp_compl_ctx; union { struct irdma_tcp_offload_info *tcp_info; struct irdma_udp_offload_info *udp_info; }; union { struct irdma_iwarp_offload_info *iwarp_info; struct irdma_roce_offload_info *roce_info; }; u32 send_cq_num; u32 rcv_cq_num; u32 rem_endpoint_idx; u16 stats_idx; - bool srq_valid:1; bool tcp_info_valid:1; bool iwarp_info_valid:1; bool stats_idx_valid:1; u8 user_pri; }; struct irdma_aeqe_info { u64 compl_ctx; u32 qp_cq_id; u32 wqe_idx; u16 ae_id; u8 tcp_state; u8 iwarp_state; bool qp:1; bool cq:1; bool sq:1; bool rq:1; bool in_rdrsp_wr:1; bool out_rdrsp:1; bool aeqe_overflow:1; /* This flag is used to determine if we should pass the rq tail * in the QP context for FW/HW. It is set when ae_src is rq for GEN1/GEN2 * And additionally set for inbound atomic, read and write for GEN3 */ bool err_rq_idx_valid:1; u8 q2_data_written; u8 ae_src; }; struct irdma_allocate_stag_info { u64 total_len; u64 first_pm_pbl_idx; u32 chunk_size; u32 stag_idx; u32 page_size; u32 pd_id; u16 access_rights; bool remote_access:1; bool use_hmc_fcn_index:1; bool use_pf_rid:1; bool all_memory:1; u16 hmc_fcn_index; }; struct irdma_mw_alloc_info { u32 mw_stag_index; u32 page_size; u32 pd_id; bool remote_access:1; bool mw_wide:1; bool mw1_bind_dont_vldt_key:1; }; struct irdma_reg_ns_stag_info { u64 reg_addr_pa; u64 va; u64 total_len; u32 page_size; u32 chunk_size; u32 first_pm_pbl_index; enum irdma_addressing_type addr_type; irdma_stag_index stag_idx; u16 access_rights; u32 pd_id; irdma_stag_key stag_key; bool use_hmc_fcn_index:1; u16 hmc_fcn_index; bool use_pf_rid:1; bool all_memory:1; }; struct irdma_fast_reg_stag_info { u64 wr_id; u64 reg_addr_pa; u64 fbo; void *va; u64 total_len; u32 page_size; u32 chunk_size; u32 first_pm_pbl_index; enum irdma_addressing_type addr_type; irdma_stag_index stag_idx; u16 access_rights; u32 pd_id; irdma_stag_key stag_key; bool local_fence:1; bool read_fence:1; bool signaled:1; bool push_wqe:1; bool use_hmc_fcn_index:1; u16 hmc_fcn_index; bool use_pf_rid:1; bool defer_flag:1; }; struct irdma_dealloc_stag_info { u32 stag_idx; u32 pd_id; bool mr:1; bool dealloc_pbl:1; }; struct irdma_register_shared_stag { u64 va; enum irdma_addressing_type addr_type; irdma_stag_index new_stag_idx; irdma_stag_index parent_stag_idx; u32 access_rights; u32 pd_id; u32 page_size; irdma_stag_key new_stag_key; }; struct irdma_qp_init_info { struct irdma_qp_uk_init_info qp_uk_init_info; struct irdma_sc_pd *pd; struct irdma_sc_vsi *vsi; __le64 *host_ctx; u8 *q2; u64 sq_pa; u64 rq_pa; u64 host_ctx_pa; u64 q2_pa; u64 shadow_area_pa; u8 sq_tph_val; u8 rq_tph_val; bool sq_tph_en:1; bool rq_tph_en:1; bool rcv_tph_en:1; bool xmit_tph_en:1; bool virtual_map:1; }; struct irdma_cq_init_info { struct irdma_sc_dev *dev; u64 cq_base_pa; u64 shadow_area_pa; u32 ceq_id; u32 shadow_read_threshold; u8 pbl_chunk_size; u32 first_pm_pbl_idx; bool virtual_map:1; bool ceqe_mask:1; bool ceq_id_valid:1; bool tph_en:1; u8 tph_val; u8 type; struct irdma_cq_uk_init_info cq_uk_init_info; struct irdma_sc_vsi *vsi; }; struct irdma_upload_context_info { u64 buf_pa; u32 qp_id; u8 qp_type; bool freeze_qp:1; bool raw_format:1; }; struct irdma_local_mac_entry_info { u8 mac_addr[6]; u16 entry_idx; }; struct irdma_add_arp_cache_entry_info { - u8 mac_addr[ETH_ALEN]; + u8 mac_addr[ETHER_ADDR_LEN]; u32 reach_max; u16 arp_index; bool permanent; }; struct irdma_apbvt_info { u16 port; bool add; }; struct irdma_qhash_table_info { struct irdma_sc_vsi *vsi; enum irdma_quad_hash_manage_type manage; enum irdma_quad_entry_type entry_type; bool vlan_valid:1; bool ipv4_valid:1; - u8 mac_addr[ETH_ALEN]; + u8 mac_addr[ETHER_ADDR_LEN]; u16 vlan_id; u8 user_pri; u32 qp_num; u32 dest_ip[4]; u32 src_ip[4]; u16 dest_port; u16 src_port; }; struct irdma_cqp_manage_push_page_info { u32 push_idx; u16 qs_handle; u8 free_page; u8 push_page_type; }; struct irdma_qp_flush_info { u16 sq_minor_code; u16 sq_major_code; u16 rq_minor_code; u16 rq_major_code; u16 ae_code; u8 ae_src; bool sq:1; bool rq:1; bool userflushcode:1; bool generate_ae:1; }; struct irdma_gen_ae_info { u16 ae_code; u8 ae_src; }; struct irdma_cqp_timeout { u64 compl_cqp_cmds; u32 count; }; struct irdma_irq_ops { void (*irdma_cfg_aeq)(struct irdma_sc_dev *dev, u32 idx, bool enable); void (*irdma_cfg_ceq)(struct irdma_sc_dev *dev, u32 ceq_id, u32 idx, bool enable); void (*irdma_dis_irq)(struct irdma_sc_dev *dev, u32 idx); void (*irdma_en_irq)(struct irdma_sc_dev *dev, u32 idx); }; void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq); int irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch, bool check_overflow, bool post_sq); int irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, bool post_sq); int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, struct irdma_ccq_cqe_info *info); int irdma_sc_ccq_init(struct irdma_sc_cq *ccq, struct irdma_ccq_init_info *info); int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch); int irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq); int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq); int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq, struct irdma_ceq_init_info *info); void irdma_sc_cleanup_ceqes(struct irdma_sc_cq *cq, struct irdma_sc_ceq *ceq); void *irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq); int irdma_sc_aeq_init(struct irdma_sc_aeq *aeq, struct irdma_aeq_init_info *info); int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, struct irdma_aeqe_info *info); void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count); void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_id, int abi_ver); void irdma_cfg_aeq(struct irdma_sc_dev *dev, u32 idx, bool enable); void irdma_check_cqp_progress(struct irdma_cqp_timeout *cqp_timeout, struct irdma_sc_dev *dev); int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err); int irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp, bool free_hwcqp); int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, struct irdma_cqp_init_info *info); void irdma_sc_cqp_post_sq(struct irdma_sc_cqp *cqp); int irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u8 opcode, struct irdma_ccq_cqe_info *cmpl_info); int irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_create_qp_info *info, u64 scratch, bool post_sq); int irdma_sc_qp_destroy(struct irdma_sc_qp *qp, u64 scratch, bool remove_hash_idx, bool ignore_mw_bnd, bool post_sq); int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp, struct irdma_qp_flush_info *info, u64 scratch, bool post_sq); int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info); int irdma_sc_qp_modify(struct irdma_sc_qp *qp, struct irdma_modify_qp_info *info, u64 scratch, bool post_sq); void irdma_sc_send_lsmm(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size, irdma_stag stag); -void irdma_sc_send_lsmm_nostag(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size); void irdma_sc_send_rtt(struct irdma_sc_qp *qp, bool read); void irdma_sc_qp_setctx(struct irdma_sc_qp *qp, __le64 *qp_ctx, struct irdma_qp_host_ctx_info *info); void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx, struct irdma_qp_host_ctx_info *info); int irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, bool post_sq); int irdma_sc_cq_init(struct irdma_sc_cq *cq, struct irdma_cq_init_info *info); void irdma_sc_cq_resize(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *info); int irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, u64 scratch, bool post_sq); int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch, u16 hmc_fn_id, bool post_sq, bool poll_registers); void sc_vsi_update_stats(struct irdma_sc_vsi *vsi); struct cqp_info { union { struct { struct irdma_sc_qp *qp; struct irdma_create_qp_info info; u64 scratch; } qp_create; struct { struct irdma_sc_qp *qp; struct irdma_modify_qp_info info; u64 scratch; } qp_modify; struct { struct irdma_sc_qp *qp; u64 scratch; bool remove_hash_idx; bool ignore_mw_bnd; } qp_destroy; struct { struct irdma_sc_cq *cq; u64 scratch; bool check_overflow; } cq_create; struct { struct irdma_sc_cq *cq; struct irdma_modify_cq_info info; u64 scratch; } cq_modify; struct { struct irdma_sc_cq *cq; u64 scratch; } cq_destroy; struct { struct irdma_sc_dev *dev; struct irdma_allocate_stag_info info; u64 scratch; } alloc_stag; struct { struct irdma_sc_dev *dev; struct irdma_mw_alloc_info info; u64 scratch; } mw_alloc; struct { struct irdma_sc_dev *dev; struct irdma_reg_ns_stag_info info; u64 scratch; } mr_reg_non_shared; struct { struct irdma_sc_dev *dev; struct irdma_dealloc_stag_info info; u64 scratch; } dealloc_stag; struct { struct irdma_sc_cqp *cqp; struct irdma_add_arp_cache_entry_info info; u64 scratch; } add_arp_cache_entry; struct { struct irdma_sc_cqp *cqp; u64 scratch; u16 arp_index; } del_arp_cache_entry; struct { struct irdma_sc_cqp *cqp; struct irdma_local_mac_entry_info info; u64 scratch; } add_local_mac_entry; struct { struct irdma_sc_cqp *cqp; u64 scratch; u8 entry_idx; u8 ignore_ref_count; } del_local_mac_entry; struct { struct irdma_sc_cqp *cqp; u64 scratch; } alloc_local_mac_entry; struct { struct irdma_sc_cqp *cqp; struct irdma_cqp_manage_push_page_info info; u64 scratch; } manage_push_page; struct { struct irdma_sc_dev *dev; struct irdma_upload_context_info info; u64 scratch; } qp_upload_context; struct { struct irdma_sc_dev *dev; struct irdma_hmc_fcn_info info; u64 scratch; } manage_hmc_pm; struct { struct irdma_sc_ceq *ceq; u64 scratch; } ceq_create; struct { struct irdma_sc_ceq *ceq; u64 scratch; } ceq_destroy; struct { struct irdma_sc_aeq *aeq; u64 scratch; } aeq_create; struct { struct irdma_sc_aeq *aeq; u64 scratch; } aeq_destroy; struct { struct irdma_sc_qp *qp; struct irdma_qp_flush_info info; u64 scratch; } qp_flush_wqes; struct { struct irdma_sc_qp *qp; struct irdma_gen_ae_info info; u64 scratch; } gen_ae; struct { struct irdma_sc_cqp *cqp; void *fpm_val_va; u64 fpm_val_pa; u16 hmc_fn_id; u64 scratch; } query_fpm_val; struct { struct irdma_sc_cqp *cqp; void *fpm_val_va; u64 fpm_val_pa; u16 hmc_fn_id; u64 scratch; } commit_fpm_val; struct { struct irdma_sc_cqp *cqp; struct irdma_apbvt_info info; u64 scratch; } manage_apbvt_entry; struct { struct irdma_sc_cqp *cqp; struct irdma_qhash_table_info info; u64 scratch; } manage_qhash_table_entry; struct { struct irdma_sc_dev *dev; struct irdma_update_sds_info info; u64 scratch; } update_pe_sds; struct { struct irdma_sc_cqp *cqp; struct irdma_sc_qp *qp; u64 scratch; } suspend_resume; struct { struct irdma_sc_cqp *cqp; struct irdma_ah_info info; u64 scratch; } ah_create; struct { struct irdma_sc_cqp *cqp; struct irdma_ah_info info; u64 scratch; } ah_destroy; struct { struct irdma_sc_cqp *cqp; struct irdma_mcast_grp_info info; u64 scratch; } mc_create; struct { struct irdma_sc_cqp *cqp; struct irdma_mcast_grp_info info; u64 scratch; } mc_destroy; struct { struct irdma_sc_cqp *cqp; struct irdma_mcast_grp_info info; u64 scratch; } mc_modify; struct { struct irdma_sc_cqp *cqp; struct irdma_stats_inst_info info; u64 scratch; } stats_manage; struct { struct irdma_sc_cqp *cqp; struct irdma_stats_gather_info info; u64 scratch; } stats_gather; struct { struct irdma_sc_cqp *cqp; struct irdma_ws_node_info info; u64 scratch; } ws_node; struct { struct irdma_sc_cqp *cqp; struct irdma_up_info info; u64 scratch; } up_map; struct { struct irdma_sc_cqp *cqp; struct irdma_dma_mem query_buff_mem; u64 scratch; } query_rdma; } u; }; struct cqp_cmds_info { struct list_head cqp_cmd_entry; u8 cqp_cmd; u8 post_sq; struct cqp_info in; }; __le64 *irdma_sc_cqp_get_next_send_wqe_idx(struct irdma_sc_cqp *cqp, u64 scratch, u32 *wqe_idx); /** * irdma_sc_cqp_get_next_send_wqe - get next wqe on cqp sq * @cqp: struct for cqp hw * @scratch: private data for CQP WQE */ static inline __le64 *irdma_sc_cqp_get_next_send_wqe(struct irdma_sc_cqp *cqp, u64 scratch) { u32 wqe_idx; return irdma_sc_cqp_get_next_send_wqe_idx(cqp, scratch, &wqe_idx); } #endif /* IRDMA_TYPE_H */ diff --git a/sys/dev/irdma/irdma_uda.c b/sys/dev/irdma/irdma_uda.c index 85850a726e74..6aaceac68cdb 100644 --- a/sys/dev/irdma/irdma_uda.c +++ b/sys/dev/irdma/irdma_uda.c @@ -1,311 +1,302 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2016 - 2022 Intel Corporation + * Copyright (c) 2016 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "osdep.h" #include "irdma_hmc.h" #include "irdma_defs.h" #include "irdma_type.h" #include "irdma_protos.h" #include "irdma_uda.h" #include "irdma_uda_d.h" /** * irdma_sc_access_ah() - Create, modify or delete AH * @cqp: struct for cqp hw * @info: ah information * @op: Operation * @scratch: u64 saved to be used during cqp completion */ int irdma_sc_access_ah(struct irdma_sc_cqp *cqp, struct irdma_ah_info *info, u32 op, u64 scratch) { __le64 *wqe; u64 qw1, qw2; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) return -ENOSPC; - set_64bit_val(wqe, IRDMA_BYTE_0, LS_64_1(info->mac_addr[5], 16) | - LS_64_1(info->mac_addr[4], 24) | - LS_64_1(info->mac_addr[3], 32) | - LS_64_1(info->mac_addr[2], 40) | - LS_64_1(info->mac_addr[1], 48) | - LS_64_1(info->mac_addr[0], 56)); + set_64bit_val(wqe, IRDMA_BYTE_0, + FIELD_PREP(IRDMAQPC_MACADDRESS, irdma_mac_to_u64(info->mac_addr))); qw1 = FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_PDINDEXLO, info->pd_idx) | FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_TC, info->tc_tos) | FIELD_PREP(IRDMA_UDAQPC_VLANTAG, info->vlan_tag); qw2 = FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ARPINDEX, info->dst_arpindex) | FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_FLOWLABEL, info->flow_label) | FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_HOPLIMIT, info->hop_ttl) | FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_PDINDEXHI, info->pd_idx >> 16); if (!info->ipv4_valid) { set_64bit_val(wqe, IRDMA_BYTE_40, FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR0, info->dest_ip_addr[0]) | FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR1, info->dest_ip_addr[1])); set_64bit_val(wqe, IRDMA_BYTE_32, FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR2, info->dest_ip_addr[2]) | FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR3, info->dest_ip_addr[3])); set_64bit_val(wqe, IRDMA_BYTE_56, FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR0, info->src_ip_addr[0]) | FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR1, info->src_ip_addr[1])); set_64bit_val(wqe, IRDMA_BYTE_48, FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR2, info->src_ip_addr[2]) | FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR3, info->src_ip_addr[3])); } else { set_64bit_val(wqe, IRDMA_BYTE_32, FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR3, info->dest_ip_addr[0])); set_64bit_val(wqe, IRDMA_BYTE_48, FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR3, info->src_ip_addr[0])); } set_64bit_val(wqe, IRDMA_BYTE_8, qw1); set_64bit_val(wqe, IRDMA_BYTE_16, qw2); irdma_wmb(); /* need write block before writing WQE header */ set_64bit_val( wqe, IRDMA_BYTE_24, FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_WQEVALID, cqp->polarity) | FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_OPCODE, op) | FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_DOLOOPBACKK, info->do_lpbk) | FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_IPV4VALID, info->ipv4_valid) | FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_AVIDX, info->ah_idx) | FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_INSERTVLANTAG, info->insert_vlan_tag)); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "MANAGE_AH WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_create_mg_ctx() - create a mcg context * @info: multicast group context info */ static void irdma_create_mg_ctx(struct irdma_mcast_grp_info *info) { struct irdma_mcast_grp_ctx_entry_info *entry_info = NULL; u8 idx = 0; /* index in the array */ u8 ctx_idx = 0; /* index in the MG context */ memset(info->dma_mem_mc.va, 0, IRDMA_MAX_MGS_PER_CTX * sizeof(u64)); for (idx = 0; idx < IRDMA_MAX_MGS_PER_CTX; idx++) { entry_info = &info->mg_ctx_info[idx]; if (entry_info->valid_entry) { set_64bit_val((__le64 *) info->dma_mem_mc.va, ctx_idx * sizeof(u64), FIELD_PREP(IRDMA_UDA_MGCTX_DESTPORT, entry_info->dest_port) | FIELD_PREP(IRDMA_UDA_MGCTX_VALIDENT, entry_info->valid_entry) | FIELD_PREP(IRDMA_UDA_MGCTX_QPID, entry_info->qp_id)); ctx_idx++; } } } /** * irdma_access_mcast_grp() - Access mcast group based on op * @cqp: Control QP * @info: multicast group context info * @op: operation to perform * @scratch: u64 saved to be used during cqp completion */ int irdma_access_mcast_grp(struct irdma_sc_cqp *cqp, struct irdma_mcast_grp_info *info, u32 op, u64 scratch) { __le64 *wqe; if (info->mg_id >= IRDMA_UDA_MAX_FSI_MGS) { irdma_debug(cqp->dev, IRDMA_DEBUG_WQE, "mg_id out of range\n"); return -EINVAL; } wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) { irdma_debug(cqp->dev, IRDMA_DEBUG_WQE, "ring full\n"); return -ENOSPC; } irdma_create_mg_ctx(info); set_64bit_val(wqe, IRDMA_BYTE_32, info->dma_mem_mc.pa); set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMA_UDA_CQPSQ_MG_VLANID, info->vlan_id) | FIELD_PREP(IRDMA_UDA_CQPSQ_QS_HANDLE, info->qs_handle)); - set_64bit_val(wqe, IRDMA_BYTE_0, LS_64_1(info->dest_mac_addr[5], 0) | - LS_64_1(info->dest_mac_addr[4], 8) | - LS_64_1(info->dest_mac_addr[3], 16) | - LS_64_1(info->dest_mac_addr[2], 24) | - LS_64_1(info->dest_mac_addr[1], 32) | - LS_64_1(info->dest_mac_addr[0], 40)); + set_64bit_val(wqe, IRDMA_BYTE_0, irdma_mac_to_u64(info->dest_mac_addr)); set_64bit_val(wqe, IRDMA_BYTE_8, FIELD_PREP(IRDMA_UDA_CQPSQ_MG_HMC_FCN_ID, info->hmc_fcn_id)); if (!info->ipv4_valid) { set_64bit_val(wqe, IRDMA_BYTE_56, FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR0, info->dest_ip_addr[0]) | FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR1, info->dest_ip_addr[1])); set_64bit_val(wqe, IRDMA_BYTE_48, FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR2, info->dest_ip_addr[2]) | FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR3, info->dest_ip_addr[3])); } else { set_64bit_val(wqe, IRDMA_BYTE_48, FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR3, info->dest_ip_addr[0])); } irdma_wmb(); /* need write memory block before writing the WQE header. */ set_64bit_val(wqe, IRDMA_BYTE_24, FIELD_PREP(IRDMA_UDA_CQPSQ_MG_WQEVALID, cqp->polarity) | FIELD_PREP(IRDMA_UDA_CQPSQ_MG_OPCODE, op) | FIELD_PREP(IRDMA_UDA_CQPSQ_MG_MGIDX, info->mg_id) | FIELD_PREP(IRDMA_UDA_CQPSQ_MG_VLANVALID, info->vlan_valid) | FIELD_PREP(IRDMA_UDA_CQPSQ_MG_IPV4VALID, info->ipv4_valid)); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "MANAGE_MCG WQE", wqe, IRDMA_CQP_WQE_SIZE * 8); irdma_debug_buf(cqp->dev, IRDMA_DEBUG_WQE, "MCG_HOST CTX WQE", info->dma_mem_mc.va, IRDMA_MAX_MGS_PER_CTX * 8); irdma_sc_cqp_post_sq(cqp); return 0; } /** * irdma_compare_mgs - Compares two multicast group structures * @entry1: Multcast group info * @entry2: Multcast group info in context */ static bool irdma_compare_mgs(struct irdma_mcast_grp_ctx_entry_info *entry1, struct irdma_mcast_grp_ctx_entry_info *entry2) { if (entry1->dest_port == entry2->dest_port && entry1->qp_id == entry2->qp_id) return true; return false; } /** * irdma_sc_add_mcast_grp - Allocates mcast group entry in ctx * @ctx: Multcast group context * @mg: Multcast group info */ int irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx, struct irdma_mcast_grp_ctx_entry_info *mg) { u32 idx; bool free_entry_found = false; u32 free_entry_idx = 0; /* find either an identical or a free entry for a multicast group */ for (idx = 0; idx < IRDMA_MAX_MGS_PER_CTX; idx++) { if (ctx->mg_ctx_info[idx].valid_entry) { if (irdma_compare_mgs(&ctx->mg_ctx_info[idx], mg)) { ctx->mg_ctx_info[idx].use_cnt++; return 0; } continue; } if (!free_entry_found) { free_entry_found = true; free_entry_idx = idx; } } if (free_entry_found) { ctx->mg_ctx_info[free_entry_idx] = *mg; ctx->mg_ctx_info[free_entry_idx].valid_entry = true; ctx->mg_ctx_info[free_entry_idx].use_cnt = 1; ctx->no_of_mgs++; return 0; } return -ENOMEM; } /** * irdma_sc_del_mcast_grp - Delete mcast group * @ctx: Multcast group context * @mg: Multcast group info * * Finds and removes a specific mulicast group from context, all * parameters must match to remove a multicast group. */ int irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx, struct irdma_mcast_grp_ctx_entry_info *mg) { u32 idx; /* find an entry in multicast group context */ for (idx = 0; idx < IRDMA_MAX_MGS_PER_CTX; idx++) { if (!ctx->mg_ctx_info[idx].valid_entry) continue; if (irdma_compare_mgs(mg, &ctx->mg_ctx_info[idx])) { ctx->mg_ctx_info[idx].use_cnt--; if (!ctx->mg_ctx_info[idx].use_cnt) { ctx->mg_ctx_info[idx].valid_entry = false; ctx->no_of_mgs--; /* Remove gap if element was not the last */ if (idx != ctx->no_of_mgs && ctx->no_of_mgs > 0) { irdma_memcpy(&ctx->mg_ctx_info[idx], &ctx->mg_ctx_info[ctx->no_of_mgs - 1], sizeof(ctx->mg_ctx_info[idx])); ctx->mg_ctx_info[ctx->no_of_mgs - 1].valid_entry = false; } } return 0; } } return -EINVAL; } diff --git a/sys/dev/irdma/irdma_uda.h b/sys/dev/irdma/irdma_uda.h index fcf6c875ea45..8a43898ca810 100644 --- a/sys/dev/irdma/irdma_uda.h +++ b/sys/dev/irdma/irdma_uda.h @@ -1,119 +1,119 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2016 - 2021 Intel Corporation + * Copyright (c) 2016 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef IRDMA_UDA_H #define IRDMA_UDA_H #define IRDMA_UDA_MAX_FSI_MGS 4096 #define IRDMA_UDA_MAX_PFS 16 #define IRDMA_UDA_MAX_VFS 128 struct irdma_sc_cqp; struct irdma_ah_info { struct irdma_sc_vsi *vsi; u32 pd_idx; u32 dst_arpindex; u32 dest_ip_addr[4]; u32 src_ip_addr[4]; u32 flow_label; u32 ah_idx; u16 vlan_tag; u8 insert_vlan_tag; u8 tc_tos; u8 hop_ttl; - u8 mac_addr[ETH_ALEN]; + u8 mac_addr[ETHER_ADDR_LEN]; bool ah_valid:1; bool ipv4_valid:1; bool do_lpbk:1; }; struct irdma_sc_ah { struct irdma_sc_dev *dev; struct irdma_ah_info ah_info; }; int irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx, struct irdma_mcast_grp_ctx_entry_info *mg); int irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx, struct irdma_mcast_grp_ctx_entry_info *mg); int irdma_sc_access_ah(struct irdma_sc_cqp *cqp, struct irdma_ah_info *info, u32 op, u64 scratch); int irdma_access_mcast_grp(struct irdma_sc_cqp *cqp, struct irdma_mcast_grp_info *info, u32 op, u64 scratch); static inline void irdma_sc_init_ah(struct irdma_sc_dev *dev, struct irdma_sc_ah *ah) { ah->dev = dev; } static inline int irdma_sc_create_ah(struct irdma_sc_cqp *cqp, struct irdma_ah_info *info, u64 scratch) { return irdma_sc_access_ah(cqp, info, IRDMA_CQP_OP_CREATE_ADDR_HANDLE, scratch); } static inline int irdma_sc_destroy_ah(struct irdma_sc_cqp *cqp, struct irdma_ah_info *info, u64 scratch) { return irdma_sc_access_ah(cqp, info, IRDMA_CQP_OP_DESTROY_ADDR_HANDLE, scratch); } static inline int irdma_sc_create_mcast_grp(struct irdma_sc_cqp *cqp, struct irdma_mcast_grp_info *info, u64 scratch) { return irdma_access_mcast_grp(cqp, info, IRDMA_CQP_OP_CREATE_MCAST_GRP, scratch); } static inline int irdma_sc_modify_mcast_grp(struct irdma_sc_cqp *cqp, struct irdma_mcast_grp_info *info, u64 scratch) { return irdma_access_mcast_grp(cqp, info, IRDMA_CQP_OP_MODIFY_MCAST_GRP, scratch); } static inline int irdma_sc_destroy_mcast_grp(struct irdma_sc_cqp *cqp, struct irdma_mcast_grp_info *info, u64 scratch) { return irdma_access_mcast_grp(cqp, info, IRDMA_CQP_OP_DESTROY_MCAST_GRP, scratch); } #endif /* IRDMA_UDA_H */ diff --git a/sys/dev/irdma/irdma_uk.c b/sys/dev/irdma/irdma_uk.c index 76648af33488..30dd02284277 100644 --- a/sys/dev/irdma/irdma_uk.c +++ b/sys/dev/irdma/irdma_uk.c @@ -1,1900 +1,1828 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "osdep.h" #include "irdma_defs.h" #include "irdma_user.h" #include "irdma.h" /** * irdma_set_fragment - set fragment in wqe * @wqe: wqe for setting fragment * @offset: offset value * @sge: sge length and stag * @valid: The wqe valid */ static void irdma_set_fragment(__le64 * wqe, u32 offset, struct irdma_sge *sge, u8 valid) { if (sge) { set_64bit_val(wqe, offset, FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->tag_off)); set_64bit_val(wqe, offset + IRDMA_BYTE_8, FIELD_PREP(IRDMAQPSQ_VALID, valid) | FIELD_PREP(IRDMAQPSQ_FRAG_LEN, sge->len) | FIELD_PREP(IRDMAQPSQ_FRAG_STAG, sge->stag)); } else { set_64bit_val(wqe, offset, 0); set_64bit_val(wqe, offset + IRDMA_BYTE_8, FIELD_PREP(IRDMAQPSQ_VALID, valid)); } } /** * irdma_set_fragment_gen_1 - set fragment in wqe * @wqe: wqe for setting fragment * @offset: offset value * @sge: sge length and stag * @valid: wqe valid flag */ static void irdma_set_fragment_gen_1(__le64 * wqe, u32 offset, struct irdma_sge *sge, u8 valid) { if (sge) { set_64bit_val(wqe, offset, FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->tag_off)); set_64bit_val(wqe, offset + IRDMA_BYTE_8, FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, sge->len) | FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_STAG, sge->stag)); } else { set_64bit_val(wqe, offset, 0); set_64bit_val(wqe, offset + IRDMA_BYTE_8, 0); } } /** * irdma_nop_hdr - Format header section of noop WQE * @qp: hw qp ptr */ static inline u64 irdma_nop_hdr(struct irdma_qp_uk *qp){ return FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_NOP) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, false) | FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); } /** * irdma_nop_1 - insert a NOP wqe * @qp: hw qp ptr */ static int irdma_nop_1(struct irdma_qp_uk *qp) { __le64 *wqe; u32 wqe_idx; if (!qp->sq_ring.head) return -EINVAL; wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); wqe = qp->sq_base[wqe_idx].elem; qp->sq_wrtrk_array[wqe_idx].quanta = IRDMA_QP_WQE_MIN_QUANTA; set_64bit_val(wqe, IRDMA_BYTE_0, 0); set_64bit_val(wqe, IRDMA_BYTE_8, 0); set_64bit_val(wqe, IRDMA_BYTE_16, 0); /* make sure WQE is written before valid bit is set */ irdma_wmb(); set_64bit_val(wqe, IRDMA_BYTE_24, irdma_nop_hdr(qp)); return 0; } /** * irdma_clr_wqes - clear next 128 sq entries * @qp: hw qp ptr * @qp_wqe_idx: wqe_idx */ void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx) { __le64 *wqe; u32 wqe_idx; if (!(qp_wqe_idx & 0x7F)) { wqe_idx = (qp_wqe_idx + 128) % qp->sq_ring.size; wqe = qp->sq_base[wqe_idx].elem; if (wqe_idx) memset(wqe, qp->swqe_polarity ? 0 : 0xFF, 0x1000); else memset(wqe, qp->swqe_polarity ? 0xFF : 0, 0x1000); } } /** * irdma_uk_qp_post_wr - ring doorbell * @qp: hw qp ptr */ void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp) { u64 temp; u32 hw_sq_tail; u32 sw_sq_head; /* valid bit is written and loads completed before reading shadow */ irdma_mb(); /* read the doorbell shadow area */ get_64bit_val(qp->shadow_area, IRDMA_BYTE_0, &temp); hw_sq_tail = (u32)FIELD_GET(IRDMA_QP_DBSA_HW_SQ_TAIL, temp); sw_sq_head = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); if (sw_sq_head != qp->initial_ring.head) { if (qp->push_dropped) { db_wr32(qp->qp_id, qp->wqe_alloc_db); qp->push_dropped = false; } else if (sw_sq_head != hw_sq_tail) { if (sw_sq_head > qp->initial_ring.head) { if (hw_sq_tail >= qp->initial_ring.head && hw_sq_tail < sw_sq_head) db_wr32(qp->qp_id, qp->wqe_alloc_db); } else { if (hw_sq_tail >= qp->initial_ring.head || hw_sq_tail < sw_sq_head) db_wr32(qp->qp_id, qp->wqe_alloc_db); } } } qp->initial_ring.head = qp->sq_ring.head; } /** * irdma_qp_ring_push_db - ring qp doorbell * @qp: hw qp ptr * @wqe_idx: wqe index */ static void irdma_qp_ring_push_db(struct irdma_qp_uk *qp, u32 wqe_idx) { set_32bit_val(qp->push_db, 0, FIELD_PREP(IRDMA_WQEALLOC_WQE_DESC_INDEX, wqe_idx >> 3) | qp->qp_id); qp->initial_ring.head = qp->sq_ring.head; qp->push_mode = true; qp->push_dropped = false; } void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 * wqe, u16 quanta, u32 wqe_idx, bool post_sq) { __le64 *push; if (IRDMA_RING_CURRENT_HEAD(qp->initial_ring) != IRDMA_RING_CURRENT_TAIL(qp->sq_ring) && !qp->push_mode) { if (post_sq) irdma_uk_qp_post_wr(qp); } else { push = (__le64 *) ((uintptr_t)qp->push_wqe + (wqe_idx & 0x7) * 0x20); irdma_memcpy(push, wqe, quanta * IRDMA_QP_WQE_MIN_SIZE); irdma_qp_ring_push_db(qp, wqe_idx); } } /** * irdma_qp_get_next_send_wqe - pad with NOP if needed, return where next WR should go * @qp: hw qp ptr * @wqe_idx: return wqe index * @quanta: (in/out) ptr to size of WR in quanta. Modified in case pad is needed * @total_size: size of WR in bytes * @info: info on WR */ __le64 * irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx, u16 *quanta, u32 total_size, struct irdma_post_sq_info *info) { __le64 *wqe; __le64 *wqe_0 = NULL; u32 nop_wqe_idx; u16 avail_quanta, wqe_quanta = *quanta; u16 i; avail_quanta = qp->uk_attrs->max_hw_sq_chunk - (IRDMA_RING_CURRENT_HEAD(qp->sq_ring) % qp->uk_attrs->max_hw_sq_chunk); if (*quanta <= avail_quanta) { /* WR fits in current chunk */ if (*quanta > IRDMA_SQ_RING_FREE_QUANTA(qp->sq_ring)) return NULL; } else { /* Need to pad with NOP */ if (*quanta + avail_quanta > IRDMA_SQ_RING_FREE_QUANTA(qp->sq_ring)) return NULL; nop_wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); for (i = 0; i < avail_quanta; i++) { irdma_nop_1(qp); IRDMA_RING_MOVE_HEAD_NOCHECK(qp->sq_ring); } if (qp->push_db && info->push_wqe) irdma_qp_push_wqe(qp, qp->sq_base[nop_wqe_idx].elem, avail_quanta, nop_wqe_idx, true); } *wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); if (!*wqe_idx) qp->swqe_polarity = !qp->swqe_polarity; IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, *quanta); irdma_clr_wqes(qp, *wqe_idx); wqe = qp->sq_base[*wqe_idx].elem; if (qp->uk_attrs->hw_rev == IRDMA_GEN_1 && wqe_quanta == 1 && (IRDMA_RING_CURRENT_HEAD(qp->sq_ring) & 1)) { wqe_0 = qp->sq_base[IRDMA_RING_CURRENT_HEAD(qp->sq_ring)].elem; - wqe_0[3] = cpu_to_le64(FIELD_PREP(IRDMAQPSQ_VALID, !qp->swqe_polarity)); + wqe_0[3] = cpu_to_le64(FIELD_PREP(IRDMAQPSQ_VALID, + qp->swqe_polarity ? 0 : 1)); } qp->sq_wrtrk_array[*wqe_idx].wrid = info->wr_id; qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size; qp->sq_wrtrk_array[*wqe_idx].quanta = wqe_quanta; qp->sq_wrtrk_array[*wqe_idx].signaled = info->signaled; return wqe; } /** * irdma_qp_get_next_recv_wqe - get next qp's rcv wqe * @qp: hw qp ptr * @wqe_idx: return wqe index */ __le64 * irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx) { __le64 *wqe; int ret_code; if (IRDMA_RING_FULL_ERR(qp->rq_ring)) return NULL; IRDMA_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code); if (ret_code) return NULL; if (!*wqe_idx) qp->rwqe_polarity = !qp->rwqe_polarity; /* rq_wqe_size_multiplier is no of 32 byte quanta in one rq wqe */ wqe = qp->rq_base[*wqe_idx * qp->rq_wqe_size_multiplier].elem; return wqe; } /** * irdma_uk_rdma_write - rdma write operation * @qp: hw qp ptr * @info: post sq information * @post_sq: flag to post sq */ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq) { u64 hdr; __le64 *wqe; struct irdma_rdma_write *op_info; u32 i, wqe_idx; u32 total_size = 0, byte_off; int ret_code; u32 frag_cnt, addl_frag_cnt; bool read_fence = false; u16 quanta; info->push_wqe = qp->push_db ? true : false; op_info = &info->op.rdma_write; if (op_info->num_lo_sges > qp->max_sq_frag_cnt) return -EINVAL; for (i = 0; i < op_info->num_lo_sges; i++) total_size += op_info->lo_sg_list[i].len; read_fence |= info->read_fence; if (info->imm_data_valid) frag_cnt = op_info->num_lo_sges + 1; else frag_cnt = op_info->num_lo_sges; addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0; ret_code = irdma_fragcnt_to_quanta_sq(frag_cnt, &quanta); if (ret_code) return ret_code; wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return -ENOSPC; qp->sq_wrtrk_array[wqe_idx].signaled = info->signaled; set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); if (info->imm_data_valid) { set_64bit_val(wqe, IRDMA_BYTE_0, FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data)); i = 0; } else { qp->wqe_ops.iw_set_fragment(wqe, IRDMA_BYTE_0, op_info->lo_sg_list, qp->swqe_polarity); i = 1; } for (byte_off = IRDMA_BYTE_32; i < op_info->num_lo_sges; i++) { qp->wqe_ops.iw_set_fragment(wqe, byte_off, &op_info->lo_sg_list[i], qp->swqe_polarity); byte_off += 16; } /* if not an odd number set valid bit in next fragment */ if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(frag_cnt & 0x01) && frag_cnt) { qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, qp->swqe_polarity); if (qp->uk_attrs->hw_rev == IRDMA_GEN_2) ++addl_frag_cnt; } hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.stag) | FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt) | FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); irdma_wmb(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); if (info->push_wqe) irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); else if (post_sq) irdma_uk_qp_post_wr(qp); return 0; } /** * irdma_uk_rdma_read - rdma read command * @qp: hw qp ptr * @info: post sq information * @inv_stag: flag for inv_stag * @post_sq: flag to post sq */ int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool inv_stag, bool post_sq) { struct irdma_rdma_read *op_info; int ret_code; u32 i, byte_off, total_size = 0; bool local_fence = false; bool ord_fence = false; u32 addl_frag_cnt; __le64 *wqe; u32 wqe_idx; u16 quanta; u64 hdr; info->push_wqe = qp->push_db ? true : false; op_info = &info->op.rdma_read; if (qp->max_sq_frag_cnt < op_info->num_lo_sges) return -EINVAL; for (i = 0; i < op_info->num_lo_sges; i++) total_size += op_info->lo_sg_list[i].len; ret_code = irdma_fragcnt_to_quanta_sq(op_info->num_lo_sges, &quanta); if (ret_code) return ret_code; wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return -ENOSPC; if (qp->rd_fence_rate && (qp->ord_cnt++ == qp->rd_fence_rate)) { ord_fence = true; qp->ord_cnt = 0; } qp->sq_wrtrk_array[wqe_idx].signaled = info->signaled; addl_frag_cnt = op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0; local_fence |= info->local_fence; qp->wqe_ops.iw_set_fragment(wqe, IRDMA_BYTE_0, op_info->lo_sg_list, qp->swqe_polarity); for (i = 1, byte_off = IRDMA_BYTE_32; i < op_info->num_lo_sges; ++i) { qp->wqe_ops.iw_set_fragment(wqe, byte_off, &op_info->lo_sg_list[i], qp->swqe_polarity); byte_off += IRDMA_BYTE_16; } /* if not an odd number set valid bit in next fragment */ if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(op_info->num_lo_sges & 0x01) && op_info->num_lo_sges) { qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, qp->swqe_polarity); if (qp->uk_attrs->hw_rev == IRDMA_GEN_2) ++addl_frag_cnt; } set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.stag) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | FIELD_PREP(IRDMAQPSQ_OPCODE, (inv_stag ? IRDMAQP_OP_RDMA_READ_LOC_INV : IRDMAQP_OP_RDMA_READ)) | FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence || ord_fence ? 1 : 0) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); irdma_wmb(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); if (info->push_wqe) irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); else if (post_sq) irdma_uk_qp_post_wr(qp); return 0; } /** * irdma_uk_send - rdma send command * @qp: hw qp ptr * @info: post sq information * @post_sq: flag to post sq */ int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq) { __le64 *wqe; struct irdma_post_send *op_info; u64 hdr; u32 i, wqe_idx, total_size = 0, byte_off; int ret_code; u32 frag_cnt, addl_frag_cnt; bool read_fence = false; u16 quanta; info->push_wqe = qp->push_db ? true : false; op_info = &info->op.send; if (qp->max_sq_frag_cnt < op_info->num_sges) return -EINVAL; for (i = 0; i < op_info->num_sges; i++) total_size += op_info->sg_list[i].len; if (info->imm_data_valid) frag_cnt = op_info->num_sges + 1; else frag_cnt = op_info->num_sges; ret_code = irdma_fragcnt_to_quanta_sq(frag_cnt, &quanta); if (ret_code) return ret_code; wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return -ENOSPC; read_fence |= info->read_fence; addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0; if (info->imm_data_valid) { set_64bit_val(wqe, IRDMA_BYTE_0, FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data)); i = 0; } else { qp->wqe_ops.iw_set_fragment(wqe, IRDMA_BYTE_0, frag_cnt ? op_info->sg_list : NULL, qp->swqe_polarity); i = 1; } for (byte_off = IRDMA_BYTE_32; i < op_info->num_sges; i++) { qp->wqe_ops.iw_set_fragment(wqe, byte_off, &op_info->sg_list[i], qp->swqe_polarity); byte_off += IRDMA_BYTE_16; } /* if not an odd number set valid bit in next fragment */ if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(frag_cnt & 0x01) && frag_cnt) { qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, qp->swqe_polarity); if (qp->uk_attrs->hw_rev == IRDMA_GEN_2) ++addl_frag_cnt; } set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMAQPSQ_DESTQKEY, op_info->qkey) | FIELD_PREP(IRDMAQPSQ_DESTQPN, op_info->dest_qp)); hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, info->stag_to_inv) | FIELD_PREP(IRDMAQPSQ_AHID, op_info->ah_id) | FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, (info->imm_data_valid ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | FIELD_PREP(IRDMAQPSQ_UDPHEADER, info->udp_hdr) | FIELD_PREP(IRDMAQPSQ_L4LEN, info->l4len) | FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); irdma_wmb(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); if (info->push_wqe) irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); else if (post_sq) irdma_uk_qp_post_wr(qp); return 0; } -/** - * irdma_set_mw_bind_wqe_gen_1 - set mw bind wqe - * @wqe: wqe for setting fragment - * @op_info: info for setting bind wqe values - */ -static void -irdma_set_mw_bind_wqe_gen_1(__le64 * wqe, - struct irdma_bind_window *op_info) -{ - set_64bit_val(wqe, IRDMA_BYTE_0, (uintptr_t)op_info->va); - set_64bit_val(wqe, IRDMA_BYTE_8, - FIELD_PREP(IRDMAQPSQ_PARENTMRSTAG, op_info->mw_stag) | - FIELD_PREP(IRDMAQPSQ_MWSTAG, op_info->mr_stag)); - set_64bit_val(wqe, IRDMA_BYTE_16, op_info->bind_len); -} - /** * irdma_copy_inline_data_gen_1 - Copy inline data to wqe * @wqe: pointer to wqe * @sge_list: table of pointers to inline data * @num_sges: Total inline data length * @polarity: compatibility parameter */ static void irdma_copy_inline_data_gen_1(u8 *wqe, struct irdma_sge *sge_list, u32 num_sges, u8 polarity) { u32 quanta_bytes_remaining = 16; u32 i; for (i = 0; i < num_sges; i++) { u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].tag_off; u32 sge_len = sge_list[i].len; while (sge_len) { u32 bytes_copied; bytes_copied = min(sge_len, quanta_bytes_remaining); irdma_memcpy(wqe, cur_sge, bytes_copied); wqe += bytes_copied; cur_sge += bytes_copied; quanta_bytes_remaining -= bytes_copied; sge_len -= bytes_copied; if (!quanta_bytes_remaining) { /* Remaining inline bytes reside after hdr */ wqe += 16; quanta_bytes_remaining = 32; } } } } /** * irdma_inline_data_size_to_quanta_gen_1 - based on inline data, quanta * @data_size: data size for inline * * Gets the quanta based on inline and immediate data. */ static inline u16 irdma_inline_data_size_to_quanta_gen_1(u32 data_size) { return data_size <= 16 ? IRDMA_QP_WQE_MIN_QUANTA : 2; } -/** - * irdma_set_mw_bind_wqe - set mw bind in wqe - * @wqe: wqe for setting mw bind - * @op_info: info for setting wqe values - */ -static void -irdma_set_mw_bind_wqe(__le64 * wqe, - struct irdma_bind_window *op_info) -{ - set_64bit_val(wqe, IRDMA_BYTE_0, (uintptr_t)op_info->va); - set_64bit_val(wqe, IRDMA_BYTE_8, - FIELD_PREP(IRDMAQPSQ_PARENTMRSTAG, op_info->mr_stag) | - FIELD_PREP(IRDMAQPSQ_MWSTAG, op_info->mw_stag)); - set_64bit_val(wqe, IRDMA_BYTE_16, op_info->bind_len); -} - /** * irdma_copy_inline_data - Copy inline data to wqe * @wqe: pointer to wqe * @sge_list: table of pointers to inline data * @num_sges: number of SGE's * @polarity: polarity of wqe valid bit */ static void irdma_copy_inline_data(u8 *wqe, struct irdma_sge *sge_list, u32 num_sges, u8 polarity) { u8 inline_valid = polarity << IRDMA_INLINE_VALID_S; u32 quanta_bytes_remaining = 8; u32 i; bool first_quanta = true; wqe += 8; for (i = 0; i < num_sges; i++) { u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].tag_off; u32 sge_len = sge_list[i].len; while (sge_len) { u32 bytes_copied; bytes_copied = min(sge_len, quanta_bytes_remaining); irdma_memcpy(wqe, cur_sge, bytes_copied); wqe += bytes_copied; cur_sge += bytes_copied; quanta_bytes_remaining -= bytes_copied; sge_len -= bytes_copied; if (!quanta_bytes_remaining) { quanta_bytes_remaining = 31; /* Remaining inline bytes reside after hdr */ if (first_quanta) { first_quanta = false; wqe += 16; } else { *wqe = inline_valid; wqe++; } } } } if (!first_quanta && quanta_bytes_remaining < 31) *(wqe + quanta_bytes_remaining) = inline_valid; } /** * irdma_inline_data_size_to_quanta - based on inline data, quanta * @data_size: data size for inline * * Gets the quanta based on inline and immediate data. */ static u16 irdma_inline_data_size_to_quanta(u32 data_size) { if (data_size <= 8) return IRDMA_QP_WQE_MIN_QUANTA; else if (data_size <= 39) return 2; else if (data_size <= 70) return 3; else if (data_size <= 101) return 4; else if (data_size <= 132) return 5; else if (data_size <= 163) return 6; else if (data_size <= 194) return 7; else return 8; } /** * irdma_uk_inline_rdma_write - inline rdma write operation * @qp: hw qp ptr * @info: post sq information * @post_sq: flag to post sq */ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq) { __le64 *wqe; struct irdma_rdma_write *op_info; u64 hdr = 0; u32 wqe_idx; bool read_fence = false; u16 quanta; u32 i, total_size = 0; info->push_wqe = qp->push_db ? true : false; op_info = &info->op.rdma_write; if (unlikely(qp->max_sq_frag_cnt < op_info->num_lo_sges)) return -EINVAL; for (i = 0; i < op_info->num_lo_sges; i++) total_size += op_info->lo_sg_list[i].len; if (unlikely(total_size > qp->max_inline_data)) return -EINVAL; quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size); wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return -ENOSPC; qp->sq_wrtrk_array[wqe_idx].signaled = info->signaled; read_fence |= info->read_fence; set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.stag) | FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt ? 1 : 0) | FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) | FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid ? 1 : 0) | FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe ? 1 : 0) | FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); if (info->imm_data_valid) set_64bit_val(wqe, IRDMA_BYTE_0, FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data)); qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->lo_sg_list, op_info->num_lo_sges, qp->swqe_polarity); irdma_wmb(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); if (info->push_wqe) irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); else if (post_sq) irdma_uk_qp_post_wr(qp); return 0; } /** * irdma_uk_inline_send - inline send operation * @qp: hw qp ptr * @info: post sq information * @post_sq: flag to post sq */ int irdma_uk_inline_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq) { __le64 *wqe; struct irdma_post_send *op_info; u64 hdr; u32 wqe_idx; bool read_fence = false; u16 quanta; u32 i, total_size = 0; info->push_wqe = qp->push_db ? true : false; op_info = &info->op.send; if (unlikely(qp->max_sq_frag_cnt < op_info->num_sges)) return -EINVAL; for (i = 0; i < op_info->num_sges; i++) total_size += op_info->sg_list[i].len; if (unlikely(total_size > qp->max_inline_data)) return -EINVAL; quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size); wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, total_size, info); if (!wqe) return -ENOSPC; set_64bit_val(wqe, IRDMA_BYTE_16, FIELD_PREP(IRDMAQPSQ_DESTQKEY, op_info->qkey) | FIELD_PREP(IRDMAQPSQ_DESTQPN, op_info->dest_qp)); read_fence |= info->read_fence; hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, info->stag_to_inv) | FIELD_PREP(IRDMAQPSQ_AHID, op_info->ah_id) | FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) | FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, (info->imm_data_valid ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) | FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | FIELD_PREP(IRDMAQPSQ_UDPHEADER, info->udp_hdr) | FIELD_PREP(IRDMAQPSQ_L4LEN, info->l4len) | FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); if (info->imm_data_valid) set_64bit_val(wqe, IRDMA_BYTE_0, FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data)); qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->sg_list, op_info->num_sges, qp->swqe_polarity); irdma_wmb(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); if (info->push_wqe) irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); else if (post_sq) irdma_uk_qp_post_wr(qp); return 0; } /** * irdma_uk_stag_local_invalidate - stag invalidate operation * @qp: hw qp ptr * @info: post sq information * @post_sq: flag to post sq */ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq) { __le64 *wqe; struct irdma_inv_local_stag *op_info; u64 hdr; u32 wqe_idx; bool local_fence = false; struct irdma_sge sge = {0}; u16 quanta = IRDMA_QP_WQE_MIN_QUANTA; info->push_wqe = qp->push_db ? true : false; op_info = &info->op.inv_local_stag; local_fence = info->local_fence; wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, 0, info); if (!wqe) return -ENOSPC; sge.stag = op_info->target_stag; qp->wqe_ops.iw_set_fragment(wqe, IRDMA_BYTE_0, &sge, 0); set_64bit_val(wqe, IRDMA_BYTE_16, 0); hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMA_OP_TYPE_INV_STAG) | FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); irdma_wmb(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); if (info->push_wqe) irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); else if (post_sq) irdma_uk_qp_post_wr(qp); return 0; } /** * irdma_uk_post_receive - post receive wqe * @qp: hw qp ptr * @info: post rq information */ int irdma_uk_post_receive(struct irdma_qp_uk *qp, struct irdma_post_rq_info *info) { u32 wqe_idx, i, byte_off; u32 addl_frag_cnt; __le64 *wqe; u64 hdr; if (qp->max_rq_frag_cnt < info->num_sges) return -EINVAL; wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx); if (!wqe) return -ENOSPC; qp->rq_wrid_array[wqe_idx] = info->wr_id; addl_frag_cnt = info->num_sges > 1 ? (info->num_sges - 1) : 0; qp->wqe_ops.iw_set_fragment(wqe, IRDMA_BYTE_0, info->sg_list, qp->rwqe_polarity); for (i = 1, byte_off = IRDMA_BYTE_32; i < info->num_sges; i++) { qp->wqe_ops.iw_set_fragment(wqe, byte_off, &info->sg_list[i], qp->rwqe_polarity); byte_off += 16; } /* if not an odd number set valid bit in next fragment */ if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(info->num_sges & 0x01) && info->num_sges) { qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, qp->rwqe_polarity); if (qp->uk_attrs->hw_rev == IRDMA_GEN_2) ++addl_frag_cnt; } set_64bit_val(wqe, IRDMA_BYTE_16, 0); hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | FIELD_PREP(IRDMAQPSQ_VALID, qp->rwqe_polarity); irdma_wmb(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, IRDMA_BYTE_24, hdr); return 0; } /** * irdma_uk_cq_resize - reset the cq buffer info * @cq: cq to resize * @cq_base: new cq buffer addr * @cq_size: number of cqes */ void irdma_uk_cq_resize(struct irdma_cq_uk *cq, void *cq_base, int cq_size) { cq->cq_base = cq_base; cq->cq_size = cq_size; IRDMA_RING_INIT(cq->cq_ring, cq->cq_size); cq->polarity = 1; } /** * irdma_uk_cq_set_resized_cnt - record the count of the resized buffers * @cq: cq to resize * @cq_cnt: the count of the resized cq buffers */ void irdma_uk_cq_set_resized_cnt(struct irdma_cq_uk *cq, u16 cq_cnt) { u64 temp_val; u16 sw_cq_sel; u8 arm_next_se; u8 arm_next; u8 arm_seq_num; get_64bit_val(cq->shadow_area, 32, &temp_val); sw_cq_sel = (u16)FIELD_GET(IRDMA_CQ_DBSA_SW_CQ_SELECT, temp_val); sw_cq_sel += cq_cnt; arm_seq_num = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_SEQ_NUM, temp_val); arm_next_se = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT_SE, temp_val); arm_next = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT, temp_val); temp_val = FIELD_PREP(IRDMA_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) | FIELD_PREP(IRDMA_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) | FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT_SE, arm_next_se) | FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT, arm_next); set_64bit_val(cq->shadow_area, 32, temp_val); } /** * irdma_uk_cq_request_notification - cq notification request (door bell) * @cq: hw cq * @cq_notify: notification type */ void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq, enum irdma_cmpl_notify cq_notify) { u64 temp_val; u16 sw_cq_sel; u8 arm_next_se = 0; u8 arm_next = 0; u8 arm_seq_num; get_64bit_val(cq->shadow_area, IRDMA_BYTE_32, &temp_val); arm_seq_num = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_SEQ_NUM, temp_val); arm_seq_num++; sw_cq_sel = (u16)FIELD_GET(IRDMA_CQ_DBSA_SW_CQ_SELECT, temp_val); arm_next_se = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT_SE, temp_val); arm_next_se |= 1; if (cq_notify == IRDMA_CQ_COMPL_EVENT) arm_next = 1; temp_val = FIELD_PREP(IRDMA_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) | FIELD_PREP(IRDMA_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) | FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT_SE, arm_next_se) | FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT, arm_next); set_64bit_val(cq->shadow_area, IRDMA_BYTE_32, temp_val); irdma_wmb(); /* make sure WQE is populated before valid bit is set */ db_wr32(cq->cq_id, cq->cqe_alloc_db); } static int irdma_check_rq_cqe(struct irdma_qp_uk *qp, u32 *array_idx) { u32 exp_idx = (qp->last_rx_cmpl_idx + 1) % qp->rq_size; if (*array_idx != exp_idx) { *array_idx = exp_idx; qp->last_rx_cmpl_idx = exp_idx; return -1; } qp->last_rx_cmpl_idx = *array_idx; return 0; } /** * irdma_skip_duplicate_flush_cmpl - check last cmpl and update wqe if needed * * @ring: sq/rq ring * @flush_seen: information if flush for specific ring was already seen * @comp_status: completion status * @wqe_idx: new value of WQE index returned if there is more work on ring */ static inline int irdma_skip_duplicate_flush_cmpl(struct irdma_ring ring, u8 flush_seen, enum irdma_cmpl_status comp_status, u32 *wqe_idx) { if (flush_seen) { if (IRDMA_RING_MORE_WORK(ring)) *wqe_idx = ring.tail; else return -ENOENT; } return 0; } /** * irdma_detect_unsignaled_cmpls - check if unsignaled cmpl is to be reported * @cq: hw cq * @qp: hw qp * @info: cq poll information collected * @wge_idx: index of the WR in SQ ring */ static int irdma_detect_unsignaled_cmpls(struct irdma_cq_uk *cq, struct irdma_qp_uk *qp, struct irdma_cq_poll_info *info, u32 wqe_idx) { u64 qword0, qword1, qword2, qword3; __le64 *cqe, *wqe; int i; u32 widx; if (qp->sq_wrtrk_array[wqe_idx].signaled == 0) { cqe = IRDMA_GET_CURRENT_CQ_ELEM(cq); irdma_pr_err("%p %d %d\n", cqe, cq->cq_ring.head, wqe_idx); for (i = -10; i <= 10; i++) { IRDMA_GET_CQ_ELEM_AT_OFFSET(cq, i + cq->cq_ring.size, cqe); get_64bit_val(cqe, IRDMA_BYTE_0, &qword0); get_64bit_val(cqe, IRDMA_BYTE_8, &qword1); get_64bit_val(cqe, IRDMA_BYTE_16, &qword2); get_64bit_val(cqe, IRDMA_BYTE_24, &qword3); widx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3); irdma_pr_err("%d %04x %p %016lx %016lx %016lx %016lx ", i, widx, cqe, qword0, qword1, qword2, qword3); if ((u8)FIELD_GET(IRDMA_CQ_SQ, qword3)) { irdma_pr_err("%lx %x %x %x ", qp->sq_wrtrk_array[widx].wrid, qp->sq_wrtrk_array[widx].wr_len, qp->sq_wrtrk_array[widx].quanta, qp->sq_wrtrk_array[widx].signaled); wqe = qp->sq_base[widx].elem; get_64bit_val(wqe, IRDMA_BYTE_0, &qword0); get_64bit_val(wqe, IRDMA_BYTE_8, &qword1); get_64bit_val(wqe, IRDMA_BYTE_16, &qword2); get_64bit_val(wqe, IRDMA_BYTE_24, &qword3); irdma_pr_err("%016lx %016lx %016lx %016lx \n", qword0, qword1, qword2, qword3); } else { irdma_pr_err("\n"); } } return -ENOENT; } return 0; } /** * irdma_uk_cq_poll_cmpl - get cq completion info * @cq: hw cq * @info: cq poll information returned */ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info) { u64 comp_ctx, qword0, qword2, qword3; __le64 *cqe; struct irdma_qp_uk *qp; struct irdma_ring *pring = NULL; u32 wqe_idx; int ret_code; bool move_cq_head = true; u8 polarity; bool ext_valid; __le64 *ext_cqe; if (cq->avoid_mem_cflct) cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(cq); else cqe = IRDMA_GET_CURRENT_CQ_ELEM(cq); get_64bit_val(cqe, IRDMA_BYTE_24, &qword3); polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); if (polarity != cq->polarity) return -ENOENT; /* Ensure CQE contents are read after valid bit is checked */ rmb(); ext_valid = (bool)FIELD_GET(IRDMA_CQ_EXTCQE, qword3); if (ext_valid) { u64 qword6, qword7; u32 peek_head; if (cq->avoid_mem_cflct) { ext_cqe = (__le64 *) ((u8 *)cqe + 32); get_64bit_val(ext_cqe, IRDMA_BYTE_24, &qword7); polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7); } else { peek_head = (cq->cq_ring.head + 1) % cq->cq_ring.size; ext_cqe = cq->cq_base[peek_head].buf; get_64bit_val(ext_cqe, IRDMA_BYTE_24, &qword7); polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7); if (!peek_head) polarity ^= 1; } if (polarity != cq->polarity) return -ENOENT; /* Ensure ext CQE contents are read after ext valid bit is checked */ rmb(); info->imm_valid = (bool)FIELD_GET(IRDMA_CQ_IMMVALID, qword7); if (info->imm_valid) { u64 qword4; get_64bit_val(ext_cqe, IRDMA_BYTE_0, &qword4); info->imm_data = (u32)FIELD_GET(IRDMA_CQ_IMMDATALOW32, qword4); } info->ud_smac_valid = (bool)FIELD_GET(IRDMA_CQ_UDSMACVALID, qword7); info->ud_vlan_valid = (bool)FIELD_GET(IRDMA_CQ_UDVLANVALID, qword7); if (info->ud_smac_valid || info->ud_vlan_valid) { get_64bit_val(ext_cqe, IRDMA_BYTE_16, &qword6); if (info->ud_vlan_valid) info->ud_vlan = (u16)FIELD_GET(IRDMA_CQ_UDVLAN, qword6); if (info->ud_smac_valid) { info->ud_smac[5] = qword6 & 0xFF; info->ud_smac[4] = (qword6 >> 8) & 0xFF; info->ud_smac[3] = (qword6 >> 16) & 0xFF; info->ud_smac[2] = (qword6 >> 24) & 0xFF; info->ud_smac[1] = (qword6 >> 32) & 0xFF; info->ud_smac[0] = (qword6 >> 40) & 0xFF; } } } else { info->imm_valid = false; info->ud_smac_valid = false; info->ud_vlan_valid = false; } info->q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3); info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3); info->push_dropped = (bool)FIELD_GET(IRDMACQ_PSHDROP, qword3); info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3); get_64bit_val(cqe, IRDMA_BYTE_8, &comp_ctx); qp = (struct irdma_qp_uk *)(irdma_uintptr) comp_ctx; if (info->error) { info->major_err = FIELD_GET(IRDMA_CQ_MAJERR, qword3); info->minor_err = FIELD_GET(IRDMA_CQ_MINERR, qword3); switch (info->major_err) { case IRDMA_FLUSH_MAJOR_ERR: /* Set the min error to standard flush error code for remaining cqes */ if (info->minor_err != FLUSH_GENERAL_ERR) { qword3 &= ~IRDMA_CQ_MINERR; qword3 |= FIELD_PREP(IRDMA_CQ_MINERR, FLUSH_GENERAL_ERR); set_64bit_val(cqe, IRDMA_BYTE_24, qword3); } info->comp_status = IRDMA_COMPL_STATUS_FLUSHED; break; default: info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN; break; } } else { info->comp_status = IRDMA_COMPL_STATUS_SUCCESS; } get_64bit_val(cqe, IRDMA_BYTE_0, &qword0); get_64bit_val(cqe, IRDMA_BYTE_16, &qword2); info->stat.raw = (u32)FIELD_GET(IRDMACQ_TCPSQN_ROCEPSN_RTT_TS, qword0); info->qp_id = (u32)FIELD_GET(IRDMACQ_QPID, qword2); info->ud_src_qpn = (u32)FIELD_GET(IRDMACQ_UDSRCQPN, qword2); info->solicited_event = (bool)FIELD_GET(IRDMACQ_SOEVENT, qword3); if (!qp || qp->destroy_pending) { ret_code = -EFAULT; goto exit; } wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3); info->qp_handle = (irdma_qp_handle) (irdma_uintptr) qp; info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3); if (info->q_type == IRDMA_CQE_QTYPE_RQ) { u32 array_idx; ret_code = irdma_skip_duplicate_flush_cmpl(qp->rq_ring, qp->rq_flush_seen, info->comp_status, &wqe_idx); if (ret_code != 0) goto exit; array_idx = wqe_idx / qp->rq_wqe_size_multiplier; if (info->comp_status == IRDMA_COMPL_STATUS_FLUSHED || info->comp_status == IRDMA_COMPL_STATUS_UNKNOWN) { if (!IRDMA_RING_MORE_WORK(qp->rq_ring)) { ret_code = -ENOENT; goto exit; } info->wr_id = qp->rq_wrid_array[qp->rq_ring.tail]; info->signaled = 1; array_idx = qp->rq_ring.tail; } else { info->wr_id = qp->rq_wrid_array[array_idx]; info->signaled = 1; if (irdma_check_rq_cqe(qp, &array_idx)) { info->wr_id = qp->rq_wrid_array[array_idx]; info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN; IRDMA_RING_SET_TAIL(qp->rq_ring, array_idx + 1); return 0; } } info->bytes_xfered = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0); if (qword3 & IRDMACQ_STAG) { info->stag_invalid_set = true; info->inv_stag = (u32)FIELD_GET(IRDMACQ_INVSTAG, qword2); } else { info->stag_invalid_set = false; } IRDMA_RING_SET_TAIL(qp->rq_ring, array_idx + 1); if (info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) { qp->rq_flush_seen = true; if (!IRDMA_RING_MORE_WORK(qp->rq_ring)) qp->rq_flush_complete = true; else move_cq_head = false; } pring = &qp->rq_ring; } else { /* q_type is IRDMA_CQE_QTYPE_SQ */ if (qp->first_sq_wq) { if (wqe_idx + 1 >= qp->conn_wqes) qp->first_sq_wq = false; if (wqe_idx < qp->conn_wqes && qp->sq_ring.head == qp->sq_ring.tail) { IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); IRDMA_RING_MOVE_TAIL(cq->cq_ring); set_64bit_val(cq->shadow_area, IRDMA_BYTE_0, IRDMA_RING_CURRENT_HEAD(cq->cq_ring)); memset(info, 0, sizeof(struct irdma_cq_poll_info)); return irdma_uk_cq_poll_cmpl(cq, info); } } /* cease posting push mode on push drop */ if (info->push_dropped) { qp->push_mode = false; qp->push_dropped = true; } ret_code = irdma_skip_duplicate_flush_cmpl(qp->sq_ring, qp->sq_flush_seen, info->comp_status, &wqe_idx); if (ret_code != 0) goto exit; if (info->comp_status != IRDMA_COMPL_STATUS_FLUSHED) { info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; info->signaled = qp->sq_wrtrk_array[wqe_idx].signaled; if (!info->comp_status) info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len; ret_code = irdma_detect_unsignaled_cmpls(cq, qp, info, wqe_idx); if (ret_code != 0) goto exit; info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3); IRDMA_RING_SET_TAIL(qp->sq_ring, wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta); } else { unsigned long flags; spin_lock_irqsave(qp->lock, flags); if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) { spin_unlock_irqrestore(qp->lock, flags); ret_code = -ENOENT; goto exit; } do { __le64 *sw_wqe; u64 wqe_qword; u32 tail; tail = qp->sq_ring.tail; sw_wqe = qp->sq_base[tail].elem; get_64bit_val(sw_wqe, IRDMA_BYTE_24, &wqe_qword); info->op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, wqe_qword); IRDMA_RING_SET_TAIL(qp->sq_ring, tail + qp->sq_wrtrk_array[tail].quanta); if (info->op_type != IRDMAQP_OP_NOP) { info->wr_id = qp->sq_wrtrk_array[tail].wrid; info->signaled = qp->sq_wrtrk_array[tail].signaled; info->bytes_xfered = qp->sq_wrtrk_array[tail].wr_len; break; } } while (1); if (info->op_type == IRDMA_OP_TYPE_BIND_MW && info->minor_err == FLUSH_PROT_ERR) info->minor_err = FLUSH_MW_BIND_ERR; qp->sq_flush_seen = true; if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) qp->sq_flush_complete = true; spin_unlock_irqrestore(qp->lock, flags); } pring = &qp->sq_ring; } ret_code = 0; exit: if (!ret_code && info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) { if (pring && IRDMA_RING_MORE_WORK(*pring)) move_cq_head = false; } if (move_cq_head) { IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); if (!IRDMA_RING_CURRENT_HEAD(cq->cq_ring)) cq->polarity ^= 1; if (ext_valid && !cq->avoid_mem_cflct) { IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); if (!IRDMA_RING_CURRENT_HEAD(cq->cq_ring)) cq->polarity ^= 1; } IRDMA_RING_MOVE_TAIL(cq->cq_ring); if (!cq->avoid_mem_cflct && ext_valid) IRDMA_RING_MOVE_TAIL(cq->cq_ring); set_64bit_val(cq->shadow_area, IRDMA_BYTE_0, IRDMA_RING_CURRENT_HEAD(cq->cq_ring)); } else { qword3 &= ~IRDMA_CQ_WQEIDX; qword3 |= FIELD_PREP(IRDMA_CQ_WQEIDX, pring->tail); set_64bit_val(cqe, IRDMA_BYTE_24, qword3); } return ret_code; } /** * irdma_round_up_wq - return round up qp wq depth * @wqdepth: wq depth in quanta to round up */ static int irdma_round_up_wq(u32 wqdepth) { int scount = 1; for (wqdepth--; scount <= 16; scount *= 2) wqdepth |= wqdepth >> scount; return ++wqdepth; } /** * irdma_get_wqe_shift - get shift count for maximum wqe size * @uk_attrs: qp HW attributes * @sge: Maximum Scatter Gather Elements wqe * @inline_data: Maximum inline data size * @shift: Returns the shift needed based on sge * * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size. * For 1 SGE or inline data <= 8, shift = 0 (wqe size of 32 * bytes). For 2 or 3 SGEs or inline data <= 39, shift = 1 (wqe * size of 64 bytes). * For 4-7 SGE's and inline <= 101 Shift of 2 otherwise (wqe * size of 256 bytes). */ void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, u32 inline_data, u8 *shift) { *shift = 0; if (uk_attrs->hw_rev >= IRDMA_GEN_2) { if (sge > 1 || inline_data > 8) { if (sge < 4 && inline_data <= 39) *shift = 1; else if (sge < 8 && inline_data <= 101) *shift = 2; else *shift = 3; } } else if (sge > 1 || inline_data > 16) { *shift = (sge < 4 && inline_data <= 48) ? 1 : 2; } } /* * irdma_get_sqdepth - get SQ depth (quanta) @uk_attrs: qp HW attributes @sq_size: SQ size @shift: shift which * determines size of WQE @sqdepth: depth of SQ */ int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *sqdepth) { *sqdepth = irdma_round_up_wq((sq_size << shift) + IRDMA_SQ_RSVD); if (*sqdepth < ((u32)uk_attrs->min_hw_wq_size << shift)) *sqdepth = uk_attrs->min_hw_wq_size << shift; else if (*sqdepth > uk_attrs->max_hw_wq_quanta) return -EINVAL; return 0; } /* * irdma_get_rqdepth - get RQ depth (quanta) @uk_attrs: qp HW attributes @rq_size: SRQ size @shift: shift which * determines size of WQE @rqdepth: depth of RQ/SRQ */ int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, u32 *rqdepth) { *rqdepth = irdma_round_up_wq((rq_size << shift) + IRDMA_RQ_RSVD); if (*rqdepth < ((u32)uk_attrs->min_hw_wq_size << shift)) *rqdepth = uk_attrs->min_hw_wq_size << shift; else if (*rqdepth > uk_attrs->max_hw_rq_quanta) return -EINVAL; return 0; } static const struct irdma_wqe_uk_ops iw_wqe_uk_ops = { .iw_copy_inline_data = irdma_copy_inline_data, .iw_inline_data_size_to_quanta = irdma_inline_data_size_to_quanta, .iw_set_fragment = irdma_set_fragment, - .iw_set_mw_bind_wqe = irdma_set_mw_bind_wqe, }; static const struct irdma_wqe_uk_ops iw_wqe_uk_ops_gen_1 = { .iw_copy_inline_data = irdma_copy_inline_data_gen_1, .iw_inline_data_size_to_quanta = irdma_inline_data_size_to_quanta_gen_1, .iw_set_fragment = irdma_set_fragment_gen_1, - .iw_set_mw_bind_wqe = irdma_set_mw_bind_wqe_gen_1, }; /** * irdma_setup_connection_wqes - setup WQEs necessary to complete * connection. * @qp: hw qp (user and kernel) * @info: qp initialization info */ static void irdma_setup_connection_wqes(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) { u16 move_cnt = 1; if (qp->uk_attrs->feature_flags & IRDMA_FEATURE_RTS_AE) move_cnt = 3; qp->conn_wqes = move_cnt; IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, move_cnt); IRDMA_RING_MOVE_TAIL_BY_COUNT(qp->sq_ring, move_cnt); IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->initial_ring, move_cnt); } /** * irdma_uk_calc_shift_wq - calculate WQE shift for both SQ and RQ * @ukinfo: qp initialization info * @sq_shift: Returns shift of SQ * @rq_shift: Returns shift of RQ */ void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift, u8 *rq_shift) { bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2 ? true : false; irdma_get_wqe_shift(ukinfo->uk_attrs, imm_support ? ukinfo->max_sq_frag_cnt + 1 : ukinfo->max_sq_frag_cnt, ukinfo->max_inline_data, sq_shift); irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0, rq_shift); if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) { if (ukinfo->abi_ver > 4) *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; } } /** * irdma_uk_calc_depth_shift_sq - calculate depth and shift for SQ size. * @ukinfo: qp initialization info * @sq_depth: Returns depth of SQ * @sq_shift: Returns shift of SQ */ int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo, u32 *sq_depth, u8 *sq_shift) { bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2 ? true : false; int status; irdma_get_wqe_shift(ukinfo->uk_attrs, imm_support ? ukinfo->max_sq_frag_cnt + 1 : ukinfo->max_sq_frag_cnt, ukinfo->max_inline_data, sq_shift); status = irdma_get_sqdepth(ukinfo->uk_attrs, ukinfo->sq_size, *sq_shift, sq_depth); return status; } /** * irdma_uk_calc_depth_shift_rq - calculate depth and shift for RQ size. * @ukinfo: qp initialization info * @rq_depth: Returns depth of RQ * @rq_shift: Returns shift of RQ */ int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo, u32 *rq_depth, u8 *rq_shift) { int status; irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0, rq_shift); if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) { if (ukinfo->abi_ver > 4) *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; } status = irdma_get_rqdepth(ukinfo->uk_attrs, ukinfo->rq_size, *rq_shift, rq_depth); return status; } /** * irdma_uk_qp_init - initialize shared qp * @qp: hw qp (user and kernel) * @info: qp initialization info * * initializes the vars used in both user and kernel mode. * size of the wqe depends on numbers of max. fragements * allowed. Then size of wqe * the number of wqes should be the * amount of memory allocated for sq and rq. */ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) { int ret_code = 0; u32 sq_ring_size; qp->uk_attrs = info->uk_attrs; if (info->max_sq_frag_cnt > qp->uk_attrs->max_hw_wq_frags || info->max_rq_frag_cnt > qp->uk_attrs->max_hw_wq_frags) return -EINVAL; qp->qp_caps = info->qp_caps; qp->sq_base = info->sq; qp->rq_base = info->rq; qp->qp_type = info->type ? info->type : IRDMA_QP_TYPE_IWARP; qp->shadow_area = info->shadow_area; qp->sq_wrtrk_array = info->sq_wrtrk_array; qp->rq_wrid_array = info->rq_wrid_array; qp->wqe_alloc_db = info->wqe_alloc_db; qp->last_rx_cmpl_idx = 0xffffffff; qp->rd_fence_rate = info->rd_fence_rate; qp->qp_id = info->qp_id; qp->sq_size = info->sq_size; qp->push_mode = false; qp->max_sq_frag_cnt = info->max_sq_frag_cnt; sq_ring_size = qp->sq_size << info->sq_shift; IRDMA_RING_INIT(qp->sq_ring, sq_ring_size); IRDMA_RING_INIT(qp->initial_ring, sq_ring_size); if (info->first_sq_wq) { irdma_setup_connection_wqes(qp, info); qp->swqe_polarity = 1; qp->first_sq_wq = true; } else { qp->swqe_polarity = 0; } qp->swqe_polarity_deferred = 1; qp->rwqe_polarity = 0; qp->rq_size = info->rq_size; qp->max_rq_frag_cnt = info->max_rq_frag_cnt; qp->max_inline_data = info->max_inline_data; qp->rq_wqe_size = info->rq_shift; IRDMA_RING_INIT(qp->rq_ring, qp->rq_size); qp->rq_wqe_size_multiplier = 1 << info->rq_shift; if (qp->uk_attrs->hw_rev == IRDMA_GEN_1) qp->wqe_ops = iw_wqe_uk_ops_gen_1; else qp->wqe_ops = iw_wqe_uk_ops; return ret_code; } /** * irdma_uk_cq_init - initialize shared cq (user and kernel) * @cq: hw cq * @info: hw cq initialization info */ int irdma_uk_cq_init(struct irdma_cq_uk *cq, struct irdma_cq_uk_init_info *info) { cq->cq_base = info->cq_base; cq->cq_id = info->cq_id; cq->cq_size = info->cq_size; cq->cqe_alloc_db = info->cqe_alloc_db; cq->cq_ack_db = info->cq_ack_db; cq->shadow_area = info->shadow_area; cq->avoid_mem_cflct = info->avoid_mem_cflct; IRDMA_RING_INIT(cq->cq_ring, cq->cq_size); cq->polarity = 1; return 0; } /** * irdma_uk_clean_cq - clean cq entries * @q: completion context * @cq: cq to clean */ int irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq) { __le64 *cqe; u64 qword3, comp_ctx; u32 cq_head; u8 polarity, temp; cq_head = cq->cq_ring.head; temp = cq->polarity; do { if (cq->avoid_mem_cflct) cqe = ((struct irdma_extended_cqe *)(cq->cq_base))[cq_head].buf; else cqe = cq->cq_base[cq_head].buf; get_64bit_val(cqe, IRDMA_BYTE_24, &qword3); polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); if (polarity != temp) break; + /* Ensure CQE contents are read after valid bit is checked */ + rmb(); + get_64bit_val(cqe, IRDMA_BYTE_8, &comp_ctx); if ((void *)(irdma_uintptr) comp_ctx == q) set_64bit_val(cqe, IRDMA_BYTE_8, 0); cq_head = (cq_head + 1) % cq->cq_ring.size; if (!cq_head) temp ^= 1; } while (true); return 0; } -/** - * irdma_nop - post a nop - * @qp: hw qp ptr - * @wr_id: work request id - * @signaled: signaled for completion - * @post_sq: ring doorbell - */ -int -irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq) -{ - __le64 *wqe; - u64 hdr; - u32 wqe_idx; - struct irdma_post_sq_info info = {0}; - u16 quanta = IRDMA_QP_WQE_MIN_QUANTA; - - info.push_wqe = qp->push_db ? true : false; - info.wr_id = wr_id; - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, &quanta, 0, &info); - if (!wqe) - return -ENOSPC; - - set_64bit_val(wqe, IRDMA_BYTE_0, 0); - set_64bit_val(wqe, IRDMA_BYTE_8, 0); - set_64bit_val(wqe, IRDMA_BYTE_16, 0); - - hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_NOP) | - FIELD_PREP(IRDMAQPSQ_SIGCOMPL, signaled) | - FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); - - irdma_wmb(); /* make sure WQE is populated before valid bit is set */ - - set_64bit_val(wqe, IRDMA_BYTE_24, hdr); - - if (info.push_wqe) - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); - else if (post_sq) - irdma_uk_qp_post_wr(qp); - - return 0; -} - /** * irdma_fragcnt_to_quanta_sq - calculate quanta based on fragment count for SQ * @frag_cnt: number of fragments * @quanta: quanta for frag_cnt */ int irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta) { switch (frag_cnt) { case 0: case 1: *quanta = IRDMA_QP_WQE_MIN_QUANTA; break; case 2: case 3: *quanta = 2; break; case 4: case 5: *quanta = 3; break; case 6: case 7: *quanta = 4; break; case 8: case 9: *quanta = 5; break; case 10: case 11: *quanta = 6; break; case 12: case 13: *quanta = 7; break; case 14: case 15: /* when immediate data is present */ *quanta = 8; break; default: return -EINVAL; } return 0; } /** * irdma_fragcnt_to_wqesize_rq - calculate wqe size based on fragment count for RQ * @frag_cnt: number of fragments * @wqe_size: size in bytes given frag_cnt */ int irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size) { switch (frag_cnt) { case 0: case 1: *wqe_size = 32; break; case 2: case 3: *wqe_size = 64; break; case 4: case 5: case 6: case 7: *wqe_size = 128; break; case 8: case 9: case 10: case 11: case 12: case 13: case 14: *wqe_size = 256; break; default: return -EINVAL; } return 0; } diff --git a/sys/dev/irdma/irdma_user.h b/sys/dev/irdma/irdma_user.h index 5fe9d5cfdfbe..21bf1e7e5268 100644 --- a/sys/dev/irdma/irdma_user.h +++ b/sys/dev/irdma/irdma_user.h @@ -1,645 +1,644 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef IRDMA_USER_H #define IRDMA_USER_H #define irdma_handle void * #define irdma_adapter_handle irdma_handle #define irdma_qp_handle irdma_handle #define irdma_cq_handle irdma_handle #define irdma_pd_id irdma_handle #define irdma_stag_handle irdma_handle #define irdma_stag_index u32 #define irdma_stag u32 #define irdma_stag_key u8 #define irdma_tagged_offset u64 #define irdma_access_privileges u32 #define irdma_physical_fragment u64 #define irdma_address_list u64 * #define irdma_sgl struct irdma_sge * #define IRDMA_MAX_MR_SIZE 0x200000000000ULL #define IRDMA_ACCESS_FLAGS_LOCALREAD 0x01 #define IRDMA_ACCESS_FLAGS_LOCALWRITE 0x02 #define IRDMA_ACCESS_FLAGS_REMOTEREAD_ONLY 0x04 #define IRDMA_ACCESS_FLAGS_REMOTEREAD 0x05 #define IRDMA_ACCESS_FLAGS_REMOTEWRITE_ONLY 0x08 #define IRDMA_ACCESS_FLAGS_REMOTEWRITE 0x0a #define IRDMA_ACCESS_FLAGS_BIND_WINDOW 0x10 #define IRDMA_ACCESS_FLAGS_ZERO_BASED 0x20 #define IRDMA_ACCESS_FLAGS_ALL 0x3f #define IRDMA_OP_TYPE_RDMA_WRITE 0x00 #define IRDMA_OP_TYPE_RDMA_READ 0x01 #define IRDMA_OP_TYPE_SEND 0x03 #define IRDMA_OP_TYPE_SEND_INV 0x04 #define IRDMA_OP_TYPE_SEND_SOL 0x05 #define IRDMA_OP_TYPE_SEND_SOL_INV 0x06 #define IRDMA_OP_TYPE_RDMA_WRITE_SOL 0x0d #define IRDMA_OP_TYPE_BIND_MW 0x08 #define IRDMA_OP_TYPE_FAST_REG_NSMR 0x09 #define IRDMA_OP_TYPE_INV_STAG 0x0a #define IRDMA_OP_TYPE_RDMA_READ_INV_STAG 0x0b #define IRDMA_OP_TYPE_NOP 0x0c #define IRDMA_OP_TYPE_REC 0x3e #define IRDMA_OP_TYPE_REC_IMM 0x3f #define IRDMA_FLUSH_MAJOR_ERR 1 #define IRDMA_SRQFLUSH_RSVD_MAJOR_ERR 0xfffe /* Async Events codes */ #define IRDMA_AE_AMP_UNALLOCATED_STAG 0x0102 #define IRDMA_AE_AMP_INVALID_STAG 0x0103 #define IRDMA_AE_AMP_BAD_QP 0x0104 #define IRDMA_AE_AMP_BAD_PD 0x0105 #define IRDMA_AE_AMP_BAD_STAG_KEY 0x0106 #define IRDMA_AE_AMP_BAD_STAG_INDEX 0x0107 #define IRDMA_AE_AMP_BOUNDS_VIOLATION 0x0108 #define IRDMA_AE_AMP_RIGHTS_VIOLATION 0x0109 #define IRDMA_AE_AMP_TO_WRAP 0x010a #define IRDMA_AE_AMP_FASTREG_VALID_STAG 0x010c #define IRDMA_AE_AMP_FASTREG_MW_STAG 0x010d #define IRDMA_AE_AMP_FASTREG_INVALID_RIGHTS 0x010e #define IRDMA_AE_AMP_FASTREG_INVALID_LENGTH 0x0110 #define IRDMA_AE_AMP_INVALIDATE_SHARED 0x0111 #define IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS 0x0112 #define IRDMA_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS 0x0113 #define IRDMA_AE_AMP_MWBIND_VALID_STAG 0x0114 #define IRDMA_AE_AMP_MWBIND_OF_MR_STAG 0x0115 #define IRDMA_AE_AMP_MWBIND_TO_ZERO_BASED_STAG 0x0116 #define IRDMA_AE_AMP_MWBIND_TO_MW_STAG 0x0117 #define IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS 0x0118 #define IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS 0x0119 #define IRDMA_AE_AMP_MWBIND_TO_INVALID_PARENT 0x011a #define IRDMA_AE_AMP_MWBIND_BIND_DISABLED 0x011b #define IRDMA_AE_PRIV_OPERATION_DENIED 0x011c #define IRDMA_AE_AMP_INVALIDATE_TYPE1_MW 0x011d #define IRDMA_AE_AMP_MWBIND_ZERO_BASED_TYPE1_MW 0x011e #define IRDMA_AE_AMP_FASTREG_INVALID_PBL_HPS_CFG 0x011f #define IRDMA_AE_AMP_MWBIND_WRONG_TYPE 0x0120 #define IRDMA_AE_AMP_FASTREG_PBLE_MISMATCH 0x0121 #define IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG 0x0132 #define IRDMA_AE_UDA_XMIT_BAD_PD 0x0133 #define IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT 0x0134 #define IRDMA_AE_UDA_L4LEN_INVALID 0x0135 #define IRDMA_AE_BAD_CLOSE 0x0201 #define IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE 0x0202 #define IRDMA_AE_CQ_OPERATION_ERROR 0x0203 #define IRDMA_AE_RDMA_READ_WHILE_ORD_ZERO 0x0205 #define IRDMA_AE_STAG_ZERO_INVALID 0x0206 #define IRDMA_AE_IB_RREQ_AND_Q1_FULL 0x0207 #define IRDMA_AE_IB_INVALID_REQUEST 0x0208 #define IRDMA_AE_WQE_UNEXPECTED_OPCODE 0x020a #define IRDMA_AE_WQE_INVALID_PARAMETER 0x020b #define IRDMA_AE_WQE_INVALID_FRAG_DATA 0x020c #define IRDMA_AE_IB_REMOTE_ACCESS_ERROR 0x020d #define IRDMA_AE_IB_REMOTE_OP_ERROR 0x020e #define IRDMA_AE_WQE_LSMM_TOO_LONG 0x0220 #define IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301 #define IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303 #define IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304 #define IRDMA_AE_DDP_UBE_INVALID_MO 0x0305 #define IRDMA_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE 0x0306 #define IRDMA_AE_DDP_UBE_INVALID_QN 0x0307 #define IRDMA_AE_DDP_NO_L_BIT 0x0308 #define IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION 0x0311 #define IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE 0x0312 #define IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST 0x0313 #define IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP 0x0314 #define IRDMA_AE_ROCE_RSP_LENGTH_ERROR 0x0316 #define IRDMA_AE_ROCE_EMPTY_MCG 0x0380 #define IRDMA_AE_ROCE_BAD_MC_IP_ADDR 0x0381 #define IRDMA_AE_ROCE_BAD_MC_QPID 0x0382 #define IRDMA_AE_MCG_QP_PROTOCOL_MISMATCH 0x0383 #define IRDMA_AE_INVALID_ARP_ENTRY 0x0401 #define IRDMA_AE_INVALID_TCP_OPTION_RCVD 0x0402 #define IRDMA_AE_STALE_ARP_ENTRY 0x0403 #define IRDMA_AE_INVALID_AH_ENTRY 0x0406 #define IRDMA_AE_LLP_CLOSE_COMPLETE 0x0501 #define IRDMA_AE_LLP_CONNECTION_RESET 0x0502 #define IRDMA_AE_LLP_FIN_RECEIVED 0x0503 #define IRDMA_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH 0x0504 #define IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR 0x0505 #define IRDMA_AE_LLP_SEGMENT_TOO_SMALL 0x0507 #define IRDMA_AE_LLP_SYN_RECEIVED 0x0508 #define IRDMA_AE_LLP_TERMINATE_RECEIVED 0x0509 #define IRDMA_AE_LLP_TOO_MANY_RETRIES 0x050a #define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b #define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c #define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e #define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520 #define IRDMA_AE_RESET_SENT 0x0601 #define IRDMA_AE_TERMINATE_SENT 0x0602 #define IRDMA_AE_RESET_NOT_SENT 0x0603 #define IRDMA_AE_LCE_QP_CATASTROPHIC 0x0700 #define IRDMA_AE_LCE_FUNCTION_CATASTROPHIC 0x0701 #define IRDMA_AE_LCE_CQ_CATASTROPHIC 0x0702 #define IRDMA_AE_QP_SUSPEND_COMPLETE 0x0900 enum irdma_device_caps_const { IRDMA_WQE_SIZE = 4, IRDMA_CQP_WQE_SIZE = 8, IRDMA_CQE_SIZE = 4, IRDMA_EXTENDED_CQE_SIZE = 8, IRDMA_AEQE_SIZE = 2, IRDMA_CEQE_SIZE = 1, IRDMA_CQP_CTX_SIZE = 8, IRDMA_SHADOW_AREA_SIZE = 8, IRDMA_GATHER_STATS_BUF_SIZE = 1024, IRDMA_MIN_IW_QP_ID = 0, IRDMA_QUERY_FPM_BUF_SIZE = 176, IRDMA_COMMIT_FPM_BUF_SIZE = 176, IRDMA_MAX_IW_QP_ID = 262143, IRDMA_MIN_CEQID = 0, IRDMA_MAX_CEQID = 1023, IRDMA_CEQ_MAX_COUNT = IRDMA_MAX_CEQID + 1, IRDMA_MIN_CQID = 0, IRDMA_MAX_CQID = 524287, IRDMA_MIN_AEQ_ENTRIES = 1, IRDMA_MAX_AEQ_ENTRIES = 524287, IRDMA_MIN_CEQ_ENTRIES = 1, IRDMA_MAX_CEQ_ENTRIES = 262143, IRDMA_MIN_CQ_SIZE = 1, IRDMA_MAX_CQ_SIZE = 1048575, IRDMA_DB_ID_ZERO = 0, /* 64K + 1 */ IRDMA_MAX_OUTBOUND_MSG_SIZE = 65537, /* 64K +1 */ IRDMA_MAX_INBOUND_MSG_SIZE = 65537, IRDMA_MAX_PUSH_PAGE_COUNT = 1024, IRDMA_MAX_PE_ENA_VF_COUNT = 32, IRDMA_MAX_VF_FPM_ID = 47, IRDMA_MAX_SQ_PAYLOAD_SIZE = 2145386496, IRDMA_MAX_INLINE_DATA_SIZE = 101, IRDMA_MAX_WQ_ENTRIES = 32768, IRDMA_Q2_BUF_SIZE = 256, IRDMA_QP_CTX_SIZE = 256, IRDMA_MAX_PDS = 262144, }; enum irdma_addressing_type { IRDMA_ADDR_TYPE_ZERO_BASED = 0, IRDMA_ADDR_TYPE_VA_BASED = 1, }; enum irdma_flush_opcode { FLUSH_INVALID = 0, FLUSH_GENERAL_ERR, FLUSH_PROT_ERR, FLUSH_REM_ACCESS_ERR, FLUSH_LOC_QP_OP_ERR, FLUSH_REM_OP_ERR, FLUSH_LOC_LEN_ERR, FLUSH_FATAL_ERR, FLUSH_RETRY_EXC_ERR, FLUSH_MW_BIND_ERR, FLUSH_REM_INV_REQ_ERR, }; enum irdma_qp_event_type { IRDMA_QP_EVENT_CATASTROPHIC, IRDMA_QP_EVENT_ACCESS_ERR, IRDMA_QP_EVENT_REQ_ERR, }; enum irdma_cmpl_status { IRDMA_COMPL_STATUS_SUCCESS = 0, IRDMA_COMPL_STATUS_FLUSHED, IRDMA_COMPL_STATUS_INVALID_WQE, IRDMA_COMPL_STATUS_QP_CATASTROPHIC, IRDMA_COMPL_STATUS_REMOTE_TERMINATION, IRDMA_COMPL_STATUS_INVALID_STAG, IRDMA_COMPL_STATUS_BASE_BOUND_VIOLATION, IRDMA_COMPL_STATUS_ACCESS_VIOLATION, IRDMA_COMPL_STATUS_INVALID_PD_ID, IRDMA_COMPL_STATUS_WRAP_ERROR, IRDMA_COMPL_STATUS_STAG_INVALID_PDID, IRDMA_COMPL_STATUS_RDMA_READ_ZERO_ORD, IRDMA_COMPL_STATUS_QP_NOT_PRIVLEDGED, IRDMA_COMPL_STATUS_STAG_NOT_INVALID, IRDMA_COMPL_STATUS_INVALID_PHYS_BUF_SIZE, IRDMA_COMPL_STATUS_INVALID_PHYS_BUF_ENTRY, IRDMA_COMPL_STATUS_INVALID_FBO, IRDMA_COMPL_STATUS_INVALID_LEN, IRDMA_COMPL_STATUS_INVALID_ACCESS, IRDMA_COMPL_STATUS_PHYS_BUF_LIST_TOO_LONG, IRDMA_COMPL_STATUS_INVALID_VIRT_ADDRESS, IRDMA_COMPL_STATUS_INVALID_REGION, IRDMA_COMPL_STATUS_INVALID_WINDOW, IRDMA_COMPL_STATUS_INVALID_TOTAL_LEN, IRDMA_COMPL_STATUS_UNKNOWN, }; enum irdma_cmpl_notify { IRDMA_CQ_COMPL_EVENT = 0, IRDMA_CQ_COMPL_SOLICITED = 1, }; enum irdma_qp_caps { IRDMA_WRITE_WITH_IMM = 1, IRDMA_SEND_WITH_IMM = 2, IRDMA_ROCE = 4, IRDMA_PUSH_MODE = 8, }; struct irdma_qp_uk; struct irdma_cq_uk; struct irdma_qp_uk_init_info; struct irdma_cq_uk_init_info; struct irdma_sge { irdma_tagged_offset tag_off; u32 len; irdma_stag stag; }; struct irdma_ring { volatile u32 head; volatile u32 tail; /* effective tail */ u32 size; }; struct irdma_cqe { __le64 buf[IRDMA_CQE_SIZE]; }; struct irdma_extended_cqe { __le64 buf[IRDMA_EXTENDED_CQE_SIZE]; }; struct irdma_post_send { irdma_sgl sg_list; u32 num_sges; u32 qkey; u32 dest_qp; u32 ah_id; }; struct irdma_post_rq_info { u64 wr_id; irdma_sgl sg_list; u32 num_sges; }; struct irdma_rdma_write { irdma_sgl lo_sg_list; u32 num_lo_sges; struct irdma_sge rem_addr; }; struct irdma_rdma_read { irdma_sgl lo_sg_list; u32 num_lo_sges; struct irdma_sge rem_addr; }; struct irdma_bind_window { irdma_stag mr_stag; u64 bind_len; void *va; enum irdma_addressing_type addressing_type; bool ena_reads:1; bool ena_writes:1; irdma_stag mw_stag; bool mem_window_type_1:1; }; struct irdma_inv_local_stag { irdma_stag target_stag; }; struct irdma_post_sq_info { u64 wr_id; u8 op_type; u8 l4len; bool signaled:1; bool read_fence:1; bool local_fence:1; bool inline_data:1; bool imm_data_valid:1; bool push_wqe:1; bool report_rtt:1; bool udp_hdr:1; bool defer_flag:1; u32 imm_data; u32 stag_to_inv; union { struct irdma_post_send send; struct irdma_rdma_write rdma_write; struct irdma_rdma_read rdma_read; struct irdma_bind_window bind_window; struct irdma_inv_local_stag inv_local_stag; } op; }; struct irdma_cq_poll_info { u64 wr_id; irdma_qp_handle qp_handle; u32 bytes_xfered; u32 qp_id; u32 ud_src_qpn; u32 imm_data; irdma_stag inv_stag; /* or L_R_Key */ enum irdma_cmpl_status comp_status; u16 major_err; u16 minor_err; u16 ud_vlan; u8 ud_smac[6]; u8 op_type; u8 q_type; bool stag_invalid_set:1; /* or L_R_Key set */ bool push_dropped:1; bool error:1; bool solicited_event:1; bool ipv4:1; bool ud_vlan_valid:1; bool ud_smac_valid:1; bool imm_valid:1; bool signaled:1; union { u32 tcp_sqn; u32 roce_psn; u32 rtt; u32 raw; } stat; }; struct qp_err_code { enum irdma_flush_opcode flush_code; enum irdma_qp_event_type event_type; }; int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq); int irdma_uk_inline_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq); int irdma_uk_post_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq); int irdma_uk_post_receive(struct irdma_qp_uk *qp, struct irdma_post_rq_info *info); void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp); int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool inv_stag, bool post_sq); int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq); int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq); int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq); struct irdma_wqe_uk_ops { void (*iw_copy_inline_data)(u8 *dest, struct irdma_sge *sge_list, u32 num_sges, u8 polarity); u16 (*iw_inline_data_size_to_quanta)(u32 data_size); void (*iw_set_fragment)(__le64 *wqe, u32 offset, struct irdma_sge *sge, u8 valid); void (*iw_set_mw_bind_wqe)(__le64 *wqe, struct irdma_bind_window *op_info); }; int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info); void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq, enum irdma_cmpl_notify cq_notify); void irdma_uk_cq_resize(struct irdma_cq_uk *cq, void *cq_base, int size); void irdma_uk_cq_set_resized_cnt(struct irdma_cq_uk *qp, u16 cnt); int irdma_uk_cq_init(struct irdma_cq_uk *cq, struct irdma_cq_uk_init_info *info); int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info); void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift, u8 *rq_shift); int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo, u32 *sq_depth, u8 *sq_shift); int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo, u32 *rq_depth, u8 *rq_shift); struct irdma_sq_uk_wr_trk_info { u64 wrid; u32 wr_len; u16 quanta; u8 signaled; u8 reserved[1]; }; struct irdma_qp_quanta { __le64 elem[IRDMA_WQE_SIZE]; }; struct irdma_qp_uk { struct irdma_qp_quanta *sq_base; struct irdma_qp_quanta *rq_base; struct irdma_uk_attrs *uk_attrs; u32 IOMEM *wqe_alloc_db; struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array; struct irdma_sig_wr_trk_info *sq_sigwrtrk_array; u64 *rq_wrid_array; __le64 *shadow_area; __le32 *push_db; __le64 *push_wqe; struct irdma_ring sq_ring; struct irdma_ring sq_sig_ring; struct irdma_ring rq_ring; struct irdma_ring initial_ring; u32 qp_id; u32 qp_caps; u32 sq_size; u32 rq_size; u32 max_sq_frag_cnt; u32 max_rq_frag_cnt; u32 max_inline_data; u32 last_rx_cmpl_idx; u32 last_tx_cmpl_idx; struct irdma_wqe_uk_ops wqe_ops; u16 conn_wqes; u8 qp_type; u8 swqe_polarity; u8 swqe_polarity_deferred; u8 rwqe_polarity; u8 rq_wqe_size; u8 rq_wqe_size_multiplier; bool deferred_flag:1; bool push_mode:1; /* whether the last post wqe was pushed */ bool push_dropped:1; bool first_sq_wq:1; bool sq_flush_complete:1; /* Indicates flush was seen and SQ was empty after the flush */ bool rq_flush_complete:1; /* Indicates flush was seen and RQ was empty after the flush */ bool destroy_pending:1; /* Indicates the QP is being destroyed */ void *back_qp; spinlock_t *lock; u8 dbg_rq_flushed; u16 ord_cnt; u8 sq_flush_seen; u8 rq_flush_seen; u8 rd_fence_rate; }; struct irdma_cq_uk { struct irdma_cqe *cq_base; u32 IOMEM *cqe_alloc_db; u32 IOMEM *cq_ack_db; __le64 *shadow_area; u32 cq_id; u32 cq_size; struct irdma_ring cq_ring; u8 polarity; bool avoid_mem_cflct:1; }; struct irdma_qp_uk_init_info { struct irdma_qp_quanta *sq; struct irdma_qp_quanta *rq; struct irdma_uk_attrs *uk_attrs; u32 IOMEM *wqe_alloc_db; __le64 *shadow_area; struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array; struct irdma_sig_wr_trk_info *sq_sigwrtrk_array; u64 *rq_wrid_array; u32 qp_id; u32 qp_caps; u32 sq_size; u32 rq_size; u32 max_sq_frag_cnt; u32 max_rq_frag_cnt; u32 max_inline_data; u32 sq_depth; u32 rq_depth; u8 first_sq_wq; u8 type; u8 sq_shift; u8 rq_shift; u8 rd_fence_rate; int abi_ver; bool legacy_mode; }; struct irdma_cq_uk_init_info { u32 IOMEM *cqe_alloc_db; u32 IOMEM *cq_ack_db; struct irdma_cqe *cq_base; __le64 *shadow_area; u32 cq_size; u32 cq_id; bool avoid_mem_cflct; }; __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx, u16 *quanta, u32 total_size, struct irdma_post_sq_info *info); __le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx); int irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq); int irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq); int irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta); int irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size); void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, u32 inline_data, u8 *shift); int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *sqdepth); int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, u32 *rqdepth); -int irdma_get_srqdepth(struct irdma_uk_attrs *uk_attrs, u32 srq_size, u8 shift, u32 *srqdepth); void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 *wqe, u16 quanta, u32 wqe_idx, bool post_sq); void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx); static inline struct qp_err_code irdma_ae_to_qp_err_code(u16 ae_id) { struct qp_err_code qp_err = { 0 }; switch (ae_id) { case IRDMA_AE_AMP_BOUNDS_VIOLATION: case IRDMA_AE_AMP_INVALID_STAG: case IRDMA_AE_AMP_RIGHTS_VIOLATION: case IRDMA_AE_AMP_UNALLOCATED_STAG: case IRDMA_AE_AMP_BAD_PD: case IRDMA_AE_AMP_BAD_QP: case IRDMA_AE_AMP_BAD_STAG_KEY: case IRDMA_AE_AMP_BAD_STAG_INDEX: case IRDMA_AE_AMP_TO_WRAP: case IRDMA_AE_PRIV_OPERATION_DENIED: qp_err.flush_code = FLUSH_PROT_ERR; qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR; break; case IRDMA_AE_UDA_XMIT_BAD_PD: case IRDMA_AE_WQE_UNEXPECTED_OPCODE: qp_err.flush_code = FLUSH_LOC_QP_OP_ERR; qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT: case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: case IRDMA_AE_UDA_L4LEN_INVALID: case IRDMA_AE_DDP_UBE_INVALID_MO: case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: qp_err.flush_code = FLUSH_LOC_LEN_ERR; qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS: case IRDMA_AE_IB_REMOTE_ACCESS_ERROR: qp_err.flush_code = FLUSH_REM_ACCESS_ERR; qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR; break; case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS: case IRDMA_AE_AMP_MWBIND_BIND_DISABLED: case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS: case IRDMA_AE_AMP_MWBIND_VALID_STAG: qp_err.flush_code = FLUSH_MW_BIND_ERR; qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR; break; case IRDMA_AE_LLP_TOO_MANY_RETRIES: qp_err.flush_code = FLUSH_RETRY_EXC_ERR; qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; case IRDMA_AE_IB_INVALID_REQUEST: qp_err.flush_code = FLUSH_REM_INV_REQ_ERR; qp_err.event_type = IRDMA_QP_EVENT_REQ_ERR; break; case IRDMA_AE_LLP_SEGMENT_TOO_SMALL: case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR: case IRDMA_AE_ROCE_RSP_LENGTH_ERROR: case IRDMA_AE_IB_REMOTE_OP_ERROR: qp_err.flush_code = FLUSH_REM_OP_ERR; qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; case IRDMA_AE_LCE_QP_CATASTROPHIC: qp_err.flush_code = FLUSH_FATAL_ERR; qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; default: qp_err.flush_code = FLUSH_GENERAL_ERR; qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; } return qp_err; } #endif /* IRDMA_USER_H */ diff --git a/sys/dev/irdma/irdma_utils.c b/sys/dev/irdma/irdma_utils.c index 83c0dfcdbce4..b334bdf80c1a 100644 --- a/sys/dev/irdma/irdma_utils.c +++ b/sys/dev/irdma/irdma_utils.c @@ -1,2427 +1,2549 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "irdma_main.h" LIST_HEAD(irdma_handlers); DEFINE_SPINLOCK(irdma_handler_lock); +static const struct ae_desc ae_desc_list[] = { + {IRDMA_AE_AMP_UNALLOCATED_STAG, "Unallocated memory key (L-Key/R-Key)"}, + {IRDMA_AE_AMP_INVALID_STAG, "Invalid memory key (L-Key/R-Key)"}, + {IRDMA_AE_AMP_BAD_QP, + "Memory protection error: Accessing Memory Window (MW) which belongs to a different QP"}, + {IRDMA_AE_AMP_BAD_PD, + "Memory protection error: Accessing Memory Window (MW)/Memory Region (MR) which belongs to a different PD"}, + {IRDMA_AE_AMP_BAD_STAG_KEY, "Bad memory key (L-Key/R-Key)"}, + {IRDMA_AE_AMP_BAD_STAG_INDEX, "Bad memory key (L-Key/R-Key): Too large memory key index"}, + {IRDMA_AE_AMP_BOUNDS_VIOLATION, "Memory Window (MW)/Memory Region (MR) bounds violation"}, + {IRDMA_AE_AMP_RIGHTS_VIOLATION, "Memory Window (MW)/Memory Region (MR) rights violation"}, + {IRDMA_AE_AMP_TO_WRAP, + "Memory protection error: The address within Memory Window (MW)/Memory Region (MR) wraps"}, + {IRDMA_AE_AMP_FASTREG_VALID_STAG, + "Fastreg error: Registration to a valid MR"}, + {IRDMA_AE_AMP_FASTREG_MW_STAG, + "Fastreg error: Registration to a valid Memory Window (MW)"}, + {IRDMA_AE_AMP_FASTREG_INVALID_RIGHTS, "Fastreg error: Invalid rights"}, + {IRDMA_AE_AMP_FASTREG_INVALID_LENGTH, "Fastreg error: Invalid length"}, + {IRDMA_AE_AMP_INVALIDATE_SHARED, "Attempt to invalidate a shared MR"}, + {IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS, + "Attempt to remotely invalidate Memory Window (MW)/Memory Region (MR) without rights"}, + {IRDMA_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS, + "Attempt to invalidate MR with a bound Memory Window (MW)"}, + {IRDMA_AE_AMP_MWBIND_VALID_STAG, + "Attempt to bind an Memory Window (MW) with a valid MW memory key (L-Key/R-Key)"}, + {IRDMA_AE_AMP_MWBIND_OF_MR_STAG, + "Attempt to bind an Memory Window (MW) with an MR memory key (L-Key/R-Key)"}, + {IRDMA_AE_AMP_MWBIND_TO_ZERO_BASED_STAG, + "Attempt to bind an Memory Window (MW) to a zero based MR"}, + {IRDMA_AE_AMP_MWBIND_TO_MW_STAG, + "Attempt to bind an Memory Window (MW) using MW memory key (L-Key/R-Key) instead of MR memory key (L-Key/R-Key)"}, + {IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS, "Memory Window (MW) bind error: Invalid rights"}, + {IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS, "Memory Window (MW) bind error: Invalid bounds"}, + {IRDMA_AE_AMP_MWBIND_TO_INVALID_PARENT, + "Memory Window (MW) bind error: Invalid parent MR"}, + {IRDMA_AE_AMP_MWBIND_BIND_DISABLED, + "Memory Window (MW) bind error: Disabled bind support"}, + {IRDMA_AE_PRIV_OPERATION_DENIED, + "Denying a privileged operation on a non-privileged QP"}, + {IRDMA_AE_AMP_INVALIDATE_TYPE1_MW, "Memory Window (MW) error: Invalidate type 1 MW"}, + {IRDMA_AE_AMP_MWBIND_ZERO_BASED_TYPE1_MW, + "Memory Window (MW) bind error: Zero-based addressing for type 1 MW"}, + {IRDMA_AE_AMP_FASTREG_INVALID_PBL_HPS_CFG, + "Fastreg error: Invalid host page size config"}, + {IRDMA_AE_AMP_MWBIND_WRONG_TYPE, "MB bind error: Wrong Memory Window (MW) type"}, + {IRDMA_AE_AMP_FASTREG_PBLE_MISMATCH, + "Fastreg error: Invalid request to change physical MR to virtual or vice versa"}, + {IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG, + "Userspace Direct Access (UDA) QP xmit error: Packet length exceeds the QP MTU"}, + {IRDMA_AE_UDA_XMIT_BAD_PD, + "Userspace Direct Access (UDA) QP xmit error: Attempt to access a different PD"}, + {IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT, + "Userspace Direct Access (UDA) QP xmit error: Too short packet length"}, + {IRDMA_AE_UDA_L4LEN_INVALID, + "Userspace Direct Access (UDA) error: Invalid packet length field"}, + {IRDMA_AE_BAD_CLOSE, + "iWARP error: Data is received when QP state is closing"}, + {IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE, + "iWARP error: FIN is received when xmit data is pending"}, + {IRDMA_AE_CQ_OPERATION_ERROR, "CQ overflow"}, + {IRDMA_AE_RDMA_READ_WHILE_ORD_ZERO, + "QP error: Attempted RDMA Read when the outbound RDMA Read queue depth is zero"}, + {IRDMA_AE_STAG_ZERO_INVALID, + "Zero invalid memory key (L-Key/R-Key) on inbound RDMA R/W"}, + {IRDMA_AE_IB_RREQ_AND_Q1_FULL, + "QP error: Received RDMA Read request when the inbound RDMA Read queue is full"}, + {IRDMA_AE_IB_INVALID_REQUEST, + "QP error: Invalid operation detected by the remote peer"}, + {IRDMA_AE_WQE_UNEXPECTED_OPCODE, + "QP error: Invalid opcode in SQ WQE"}, + {IRDMA_AE_WQE_INVALID_PARAMETER, + "QP error: Invalid parameter in a WQE"}, + {IRDMA_AE_WQE_INVALID_FRAG_DATA, + "QP error: Invalid fragment in a WQE"}, + {IRDMA_AE_IB_REMOTE_ACCESS_ERROR, + "RoCEv2 error: Remote access error"}, + {IRDMA_AE_IB_REMOTE_OP_ERROR, + "RoCEv2 error: Remote operation error"}, + {IRDMA_AE_WQE_LSMM_TOO_LONG, "iWARP error: Connection error"}, + {IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN, + "iWARP error: Invalid message sequence number"}, + {IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER, + "iWARP error: Inbound message is too long for the available buffer"}, + {IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION, "iWARP error: Invalid DDP protocol version"}, + {IRDMA_AE_DDP_UBE_INVALID_MO, "Received message with too large offset"}, + {IRDMA_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE, + "iWARP error: Inbound Send message when no receive buffer is available"}, + {IRDMA_AE_DDP_UBE_INVALID_QN, "iWARP error: Invalid QP number in inbound packet"}, + {IRDMA_AE_DDP_NO_L_BIT, + "iWARP error: Last bit not set in an inbound packet which completes RDMA Read"}, + {IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION, "iWARP error: Invalid RDMAP protocol version"}, + {IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE, "QP error: Invalid opcode"}, + {IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST, "Inbound Read request when QP isn't enabled for RDMA Read"}, + {IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP, + "Inbound RDMA Read response or RDMA Write when QP isn't enabled for RDMA R/W"}, + {IRDMA_AE_ROCE_RSP_LENGTH_ERROR, "RoCEv2 error: Received packet with incorrect length field"}, + {IRDMA_AE_ROCE_EMPTY_MCG, "RoCEv2 error: Multicast group has no valid members"}, + {IRDMA_AE_ROCE_BAD_MC_IP_ADDR, "RoCEv2 error: Multicast IP address doesn't match"}, + {IRDMA_AE_ROCE_BAD_MC_QPID, "RoCEv2 error: Multicast packet QP number isn't 0xffffff"}, + {IRDMA_AE_MCG_QP_PROTOCOL_MISMATCH, "RoCEv2 error: Multicast packet protocol mismatch"}, + {IRDMA_AE_INVALID_ARP_ENTRY, "Invalid ARP entry"}, + {IRDMA_AE_INVALID_TCP_OPTION_RCVD, "iWARP error: Invalid TCP option"}, + {IRDMA_AE_STALE_ARP_ENTRY, "Stale ARP entry"}, + {IRDMA_AE_INVALID_AH_ENTRY, "Invalid AH entry"}, + {IRDMA_AE_LLP_CLOSE_COMPLETE, + "iWARP event: Graceful close complete"}, + {IRDMA_AE_LLP_CONNECTION_RESET, + "iWARP event: Received a TCP packet with a RST bit set"}, + {IRDMA_AE_LLP_FIN_RECEIVED, + "iWARP event: Received a TCP packet with a FIN bit set"}, + {IRDMA_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH, + "iWARP error: Unable to close a gap in the TCP sequence"}, + {IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR, "Received an ICRC error"}, + {IRDMA_AE_LLP_SEGMENT_TOO_SMALL, + "iWARP error: Received a packet with insufficient space for protocol headers"}, + {IRDMA_AE_LLP_SYN_RECEIVED, + "iWARP event: Received a TCP packet with a SYN bit set"}, + {IRDMA_AE_LLP_TERMINATE_RECEIVED, + "iWARP error: Received a terminate message"}, + {IRDMA_AE_LLP_TOO_MANY_RETRIES, "Connection error: The max number of retries has been reached"}, + {IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES, + "Connection error: The max number of keepalive retries has been reached"}, + {IRDMA_AE_LLP_DOUBT_REACHABILITY, + "Connection error: Doubt reachability (usually occurs after the max number of retries has been reached)"}, + {IRDMA_AE_LLP_CONNECTION_ESTABLISHED, + "iWARP event: Connection established"}, + {IRDMA_AE_RESOURCE_EXHAUSTION, + "QP error: Resource exhaustion"}, + {IRDMA_AE_RESET_SENT, + "Reset sent (as requested via Modify QP)"}, + {IRDMA_AE_TERMINATE_SENT, + "Terminate sent (as requested via Modify QP)"}, + {IRDMA_AE_RESET_NOT_SENT, + "Reset not sent (but requested via Modify QP)"}, + {IRDMA_AE_LCE_QP_CATASTROPHIC, + "QP error: HW transaction resulted in catastrophic error"}, + {IRDMA_AE_LCE_FUNCTION_CATASTROPHIC, + "PCIe function error: HW transaction resulted in catastrophic error"}, + {IRDMA_AE_LCE_CQ_CATASTROPHIC, + "CQ error: HW transaction resulted in catastrophic error"}, + {IRDMA_AE_QP_SUSPEND_COMPLETE, "QP event: Suspend complete"}, +}; + +/** + * irdma_get_ae_desc - returns AE description + * @ae_id: the AE number + */ +const char * +irdma_get_ae_desc(u16 ae_id) +{ + const char *desc = ""; + int i; + + for (i = 0; i < ARRAY_SIZE(ae_desc_list); i++) { + if (ae_desc_list[i].id == ae_id) { + desc = ae_desc_list[i].desc; + break; + } + } + return desc; +} + /** * irdma_arp_table -manage arp table * @rf: RDMA PCI function * @ip_addr: ip address for device * @mac_addr: mac address ptr * @action: modify, delete or add */ int irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, const u8 *mac_addr, u32 action) { unsigned long flags; int arp_index; u32 ip[4] = {}; memcpy(ip, ip_addr, sizeof(ip)); spin_lock_irqsave(&rf->arp_lock, flags); for (arp_index = 0; (u32)arp_index < rf->arp_table_size; arp_index++) { if (!memcmp(rf->arp_table[arp_index].ip_addr, ip, sizeof(ip))) break; } switch (action) { case IRDMA_ARP_ADD: if (arp_index != rf->arp_table_size) { arp_index = -1; break; } arp_index = 0; if (irdma_alloc_rsrc(rf, rf->allocated_arps, rf->arp_table_size, (u32 *)&arp_index, &rf->next_arp_index)) { arp_index = -1; break; } memcpy(rf->arp_table[arp_index].ip_addr, ip, sizeof(rf->arp_table[arp_index].ip_addr)); ether_addr_copy(rf->arp_table[arp_index].mac_addr, mac_addr); break; case IRDMA_ARP_RESOLVE: if (arp_index == rf->arp_table_size) arp_index = -1; break; case IRDMA_ARP_DELETE: if (arp_index == rf->arp_table_size) { arp_index = -1; break; } memset(rf->arp_table[arp_index].ip_addr, 0, sizeof(rf->arp_table[arp_index].ip_addr)); eth_zero_addr(rf->arp_table[arp_index].mac_addr); irdma_free_rsrc(rf, rf->allocated_arps, arp_index); break; default: arp_index = -1; break; } spin_unlock_irqrestore(&rf->arp_lock, flags); return arp_index; } /** * irdma_add_arp - add a new arp entry if needed * @rf: RDMA function * @ip: IP address * @mac: MAC address */ int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, const u8 *mac) { int arpidx; arpidx = irdma_arp_table(rf, &ip[0], NULL, IRDMA_ARP_RESOLVE); if (arpidx >= 0) { if (ether_addr_equal(rf->arp_table[arpidx].mac_addr, mac)) return arpidx; irdma_manage_arp_cache(rf, rf->arp_table[arpidx].mac_addr, ip, IRDMA_ARP_DELETE); } irdma_manage_arp_cache(rf, mac, ip, IRDMA_ARP_ADD); return irdma_arp_table(rf, ip, NULL, IRDMA_ARP_RESOLVE); } /** * irdma_netdevice_event - system notifier for netdev events * @notifier: not used * @event: event for notifier * @ptr: netdev */ int irdma_netdevice_event(struct notifier_block *notifier, unsigned long event, void *ptr) { struct irdma_device *iwdev; struct ifnet *netdev = netdev_notifier_info_to_ifp(ptr); iwdev = container_of(notifier, struct irdma_device, nb_netdevice_event); if (iwdev->netdev != netdev) return NOTIFY_DONE; iwdev->iw_status = 1; switch (event) { case NETDEV_DOWN: iwdev->iw_status = 0; /* fallthrough */ case NETDEV_UP: irdma_port_ibevent(iwdev); break; default: break; } return NOTIFY_DONE; } void irdma_unregister_notifiers(struct irdma_device *iwdev) { unregister_netdevice_notifier(&iwdev->nb_netdevice_event); } int irdma_register_notifiers(struct irdma_device *iwdev) { int ret; iwdev->nb_netdevice_event.notifier_call = irdma_netdevice_event; ret = register_netdevice_notifier(&iwdev->nb_netdevice_event); if (ret) { irdma_dev_err(&iwdev->ibdev, "register_netdevice_notifier failed\n"); return ret; } return ret; } /** * irdma_alloc_and_get_cqp_request - get cqp struct * @cqp: device cqp ptr * @wait: cqp to be used in wait mode */ struct irdma_cqp_request * irdma_alloc_and_get_cqp_request(struct irdma_cqp *cqp, bool wait) { struct irdma_cqp_request *cqp_request = NULL; unsigned long flags; spin_lock_irqsave(&cqp->req_lock, flags); if (!list_empty(&cqp->cqp_avail_reqs)) { cqp_request = list_entry(cqp->cqp_avail_reqs.next, struct irdma_cqp_request, list); list_del_init(&cqp_request->list); } spin_unlock_irqrestore(&cqp->req_lock, flags); if (!cqp_request) { cqp_request = kzalloc(sizeof(*cqp_request), GFP_ATOMIC); if (cqp_request) { cqp_request->dynamic = true; if (wait) init_waitqueue_head(&cqp_request->waitq); } } if (!cqp_request) { irdma_debug(cqp->sc_cqp.dev, IRDMA_DEBUG_ERR, "CQP Request Fail: No Memory"); return NULL; } cqp_request->waiting = wait; atomic_set(&cqp_request->refcnt, 1); memset(&cqp_request->compl_info, 0, sizeof(cqp_request->compl_info)); return cqp_request; } /** * irdma_get_cqp_request - increase refcount for cqp_request * @cqp_request: pointer to cqp_request instance */ static inline void irdma_get_cqp_request(struct irdma_cqp_request *cqp_request) { atomic_inc(&cqp_request->refcnt); } /** * irdma_free_cqp_request - free cqp request * @cqp: cqp ptr * @cqp_request: to be put back in cqp list */ void irdma_free_cqp_request(struct irdma_cqp *cqp, struct irdma_cqp_request *cqp_request) { unsigned long flags; if (cqp_request->dynamic) { kfree(cqp_request); } else { - cqp_request->request_done = false; + WRITE_ONCE(cqp_request->request_done, false); cqp_request->callback_fcn = NULL; cqp_request->waiting = false; spin_lock_irqsave(&cqp->req_lock, flags); list_add_tail(&cqp_request->list, &cqp->cqp_avail_reqs); spin_unlock_irqrestore(&cqp->req_lock, flags); } wake_up(&cqp->remove_wq); } /** * irdma_put_cqp_request - dec ref count and free if 0 * @cqp: cqp ptr * @cqp_request: to be put back in cqp list */ void irdma_put_cqp_request(struct irdma_cqp *cqp, struct irdma_cqp_request *cqp_request) { if (atomic_dec_and_test(&cqp_request->refcnt)) irdma_free_cqp_request(cqp, cqp_request); } /** * irdma_free_pending_cqp_request -free pending cqp request objs * @cqp: cqp ptr * @cqp_request: to be put back in cqp list */ static void irdma_free_pending_cqp_request(struct irdma_cqp *cqp, struct irdma_cqp_request *cqp_request) { if (cqp_request->waiting) { cqp_request->compl_info.error = true; - cqp_request->request_done = true; + WRITE_ONCE(cqp_request->request_done, true); wake_up(&cqp_request->waitq); } wait_event_timeout(cqp->remove_wq, atomic_read(&cqp_request->refcnt) == 1, 1000); irdma_put_cqp_request(cqp, cqp_request); } /** * irdma_cleanup_pending_cqp_op - clean-up cqp with no * completions * @rf: RDMA PCI function */ void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_cqp *cqp = &rf->cqp; struct irdma_cqp_request *cqp_request = NULL; struct cqp_cmds_info *pcmdinfo = NULL; u32 i, pending_work, wqe_idx; pending_work = IRDMA_RING_USED_QUANTA(cqp->sc_cqp.sq_ring); wqe_idx = IRDMA_RING_CURRENT_TAIL(cqp->sc_cqp.sq_ring); for (i = 0; i < pending_work; i++) { cqp_request = (struct irdma_cqp_request *)(uintptr_t) cqp->scratch_array[wqe_idx]; if (cqp_request) irdma_free_pending_cqp_request(cqp, cqp_request); wqe_idx = (wqe_idx + 1) % IRDMA_RING_SIZE(cqp->sc_cqp.sq_ring); } while (!list_empty(&dev->cqp_cmd_head)) { pcmdinfo = irdma_remove_cqp_head(dev); cqp_request = container_of(pcmdinfo, struct irdma_cqp_request, info); if (cqp_request) irdma_free_pending_cqp_request(cqp, cqp_request); } } /** * irdma_wait_event - wait for completion * @rf: RDMA PCI function * @cqp_request: cqp request to wait */ static int irdma_wait_event(struct irdma_pci_f *rf, struct irdma_cqp_request *cqp_request) { struct irdma_cqp_timeout cqp_timeout = {0}; - int timeout_threshold = CQP_TIMEOUT_THRESHOLD; bool cqp_error = false; int err_code = 0; - cqp_timeout.compl_cqp_cmds = rf->sc_dev.cqp_cmd_stats[IRDMA_OP_CMPL_CMDS]; + cqp_timeout.compl_cqp_cmds = atomic64_read(&rf->sc_dev.cqp->completed_ops); do { int wait_time_ms = rf->sc_dev.hw_attrs.max_cqp_compl_wait_time_ms; irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq); if (wait_event_timeout(cqp_request->waitq, - cqp_request->request_done, + READ_ONCE(cqp_request->request_done), msecs_to_jiffies(wait_time_ms))) break; irdma_check_cqp_progress(&cqp_timeout, &rf->sc_dev); - if (cqp_timeout.count < timeout_threshold) + if (cqp_timeout.count < CQP_TIMEOUT_THRESHOLD) continue; if (!rf->reset) { rf->reset = true; rf->gen_ops.request_reset(rf); } return -ETIMEDOUT; } while (1); cqp_error = cqp_request->compl_info.error; if (cqp_error) { err_code = -EIO; if (cqp_request->compl_info.maj_err_code == 0xFFFF) { if (cqp_request->compl_info.min_err_code == 0x8002) { err_code = -EBUSY; } else if (cqp_request->compl_info.min_err_code == 0x8029) { if (!rf->reset) { rf->reset = true; rf->gen_ops.request_reset(rf); } } } } return err_code; } static const char *const irdma_cqp_cmd_names[IRDMA_MAX_CQP_OPS] = { [IRDMA_OP_CEQ_DESTROY] = "Destroy CEQ Cmd", [IRDMA_OP_AEQ_DESTROY] = "Destroy AEQ Cmd", [IRDMA_OP_DELETE_ARP_CACHE_ENTRY] = "Delete ARP Cache Cmd", [IRDMA_OP_MANAGE_APBVT_ENTRY] = "Manage APBV Table Entry Cmd", [IRDMA_OP_CEQ_CREATE] = "CEQ Create Cmd", [IRDMA_OP_AEQ_CREATE] = "AEQ Destroy Cmd", [IRDMA_OP_MANAGE_QHASH_TABLE_ENTRY] = "Manage Quad Hash Table Entry Cmd", [IRDMA_OP_QP_MODIFY] = "Modify QP Cmd", [IRDMA_OP_QP_UPLOAD_CONTEXT] = "Upload Context Cmd", [IRDMA_OP_CQ_CREATE] = "Create CQ Cmd", [IRDMA_OP_CQ_DESTROY] = "Destroy CQ Cmd", [IRDMA_OP_QP_CREATE] = "Create QP Cmd", [IRDMA_OP_QP_DESTROY] = "Destroy QP Cmd", [IRDMA_OP_ALLOC_STAG] = "Allocate STag Cmd", [IRDMA_OP_MR_REG_NON_SHARED] = "Register Non-Shared MR Cmd", [IRDMA_OP_DEALLOC_STAG] = "Deallocate STag Cmd", [IRDMA_OP_MW_ALLOC] = "Allocate Memory Window Cmd", [IRDMA_OP_QP_FLUSH_WQES] = "Flush QP Cmd", [IRDMA_OP_ADD_ARP_CACHE_ENTRY] = "Add ARP Cache Cmd", [IRDMA_OP_MANAGE_PUSH_PAGE] = "Manage Push Page Cmd", [IRDMA_OP_UPDATE_PE_SDS] = "Update PE SDs Cmd", [IRDMA_OP_MANAGE_HMC_PM_FUNC_TABLE] = "Manage HMC PM Function Table Cmd", [IRDMA_OP_SUSPEND] = "Suspend QP Cmd", [IRDMA_OP_RESUME] = "Resume QP Cmd", - [IRDMA_OP_MANAGE_VF_PBLE_BP] = "Manage VF PBLE Backing Pages Cmd", + [IRDMA_OP_MANAGE_VCHNL_REQ_PBLE_BP] = + "Manage Virtual Channel Requester Function PBLE Backing Pages Cmd", [IRDMA_OP_QUERY_FPM_VAL] = "Query FPM Values Cmd", [IRDMA_OP_COMMIT_FPM_VAL] = "Commit FPM Values Cmd", [IRDMA_OP_AH_CREATE] = "Create Address Handle Cmd", [IRDMA_OP_AH_MODIFY] = "Modify Address Handle Cmd", [IRDMA_OP_AH_DESTROY] = "Destroy Address Handle Cmd", [IRDMA_OP_MC_CREATE] = "Create Multicast Group Cmd", [IRDMA_OP_MC_DESTROY] = "Destroy Multicast Group Cmd", [IRDMA_OP_MC_MODIFY] = "Modify Multicast Group Cmd", [IRDMA_OP_STATS_ALLOCATE] = "Add Statistics Instance Cmd", [IRDMA_OP_STATS_FREE] = "Free Statistics Instance Cmd", [IRDMA_OP_STATS_GATHER] = "Gather Statistics Cmd", [IRDMA_OP_WS_ADD_NODE] = "Add Work Scheduler Node Cmd", [IRDMA_OP_WS_MODIFY_NODE] = "Modify Work Scheduler Node Cmd", [IRDMA_OP_WS_DELETE_NODE] = "Delete Work Scheduler Node Cmd", [IRDMA_OP_WS_FAILOVER_START] = "Failover Start Cmd", [IRDMA_OP_WS_FAILOVER_COMPLETE] = "Failover Complete Cmd", [IRDMA_OP_SET_UP_MAP] = "Set UP-UP Mapping Cmd", [IRDMA_OP_GEN_AE] = "Generate AE Cmd", [IRDMA_OP_QUERY_RDMA_FEATURES] = "RDMA Get Features Cmd", [IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY] = "Allocate Local MAC Entry Cmd", [IRDMA_OP_ADD_LOCAL_MAC_ENTRY] = "Add Local MAC Entry Cmd", [IRDMA_OP_DELETE_LOCAL_MAC_ENTRY] = "Delete Local MAC Entry Cmd", [IRDMA_OP_CQ_MODIFY] = "CQ Modify Cmd", }; static const struct irdma_cqp_err_info irdma_noncrit_err_list[] = { {0xffff, 0x8002, "Invalid State"}, {0xffff, 0x8006, "Flush No Wqe Pending"}, {0xffff, 0x8007, "Modify QP Bad Close"}, {0xffff, 0x8009, "LLP Closed"}, {0xffff, 0x800a, "Reset Not Sent"}, {0xffff, 0x200, "Failover Pending"} }; /** * irdma_cqp_crit_err - check if CQP error is critical * @dev: pointer to dev structure * @cqp_cmd: code for last CQP operation * @maj_err_code: major error code * @min_err_code: minot error code */ bool irdma_cqp_crit_err(struct irdma_sc_dev *dev, u8 cqp_cmd, u16 maj_err_code, u16 min_err_code) { int i; for (i = 0; i < ARRAY_SIZE(irdma_noncrit_err_list); ++i) { if (maj_err_code == irdma_noncrit_err_list[i].maj && min_err_code == irdma_noncrit_err_list[i].min) { irdma_debug(dev, IRDMA_DEBUG_CQP, "[%s Error][%s] maj=0x%x min=0x%x\n", irdma_noncrit_err_list[i].desc, irdma_cqp_cmd_names[cqp_cmd], maj_err_code, min_err_code); return false; } } return true; } /** * irdma_handle_cqp_op - process cqp command * @rf: RDMA PCI function * @cqp_request: cqp request to process */ int irdma_handle_cqp_op(struct irdma_pci_f *rf, struct irdma_cqp_request *cqp_request) { struct irdma_sc_dev *dev = &rf->sc_dev; struct cqp_cmds_info *info = &cqp_request->info; int status; bool put_cqp_request = true; if (rf->reset) return 0; irdma_get_cqp_request(cqp_request); status = irdma_process_cqp_cmd(dev, info); if (status) goto err; if (cqp_request->waiting) { put_cqp_request = false; status = irdma_wait_event(rf, cqp_request); if (status) goto err; } return 0; err: if (irdma_cqp_crit_err(dev, info->cqp_cmd, cqp_request->compl_info.maj_err_code, cqp_request->compl_info.min_err_code)) irdma_dev_err(&rf->iwdev->ibdev, "[%s Error][op_code=%d] status=%d waiting=%d completion_err=%d maj=0x%x min=0x%x\n", irdma_cqp_cmd_names[info->cqp_cmd], info->cqp_cmd, status, cqp_request->waiting, cqp_request->compl_info.error, cqp_request->compl_info.maj_err_code, cqp_request->compl_info.min_err_code); if (put_cqp_request) irdma_put_cqp_request(&rf->cqp, cqp_request); return status; } void irdma_qp_add_ref(struct ib_qp *ibqp) { struct irdma_qp *iwqp = to_iwqp(ibqp); atomic_inc(&iwqp->refcnt); } void irdma_qp_rem_ref(struct ib_qp *ibqp) { struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_device *iwdev = iwqp->iwdev; unsigned long flags; spin_lock_irqsave(&iwdev->rf->qptable_lock, flags); if (!atomic_dec_and_test(&iwqp->refcnt)) { spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags); return; } iwdev->rf->qp_table[iwqp->ibqp.qp_num] = NULL; spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags); complete(&iwqp->free_qp); } void irdma_cq_add_ref(struct ib_cq *ibcq) { struct irdma_cq *iwcq = to_iwcq(ibcq); atomic_inc(&iwcq->refcnt); } void irdma_cq_rem_ref(struct ib_cq *ibcq) { struct irdma_cq *iwcq = to_iwcq(ibcq); struct irdma_pci_f *rf = container_of(iwcq->sc_cq.dev, struct irdma_pci_f, sc_dev); unsigned long flags; spin_lock_irqsave(&rf->cqtable_lock, flags); if (!atomic_dec_and_test(&iwcq->refcnt)) { spin_unlock_irqrestore(&rf->cqtable_lock, flags); return; } rf->cq_table[iwcq->cq_num] = NULL; spin_unlock_irqrestore(&rf->cqtable_lock, flags); complete(&iwcq->free_cq); } struct ib_device * to_ibdev(struct irdma_sc_dev *dev) { return &(container_of(dev, struct irdma_pci_f, sc_dev))->iwdev->ibdev; } /** * irdma_get_qp - get qp address * @device: iwarp device * @qpn: qp number */ struct ib_qp * irdma_get_qp(struct ib_device *device, int qpn) { struct irdma_device *iwdev = to_iwdev(device); if (qpn < IW_FIRST_QPN || qpn >= iwdev->rf->max_qp) return NULL; return &iwdev->rf->qp_table[qpn]->ibqp; } /** * irdma_remove_cqp_head - return head entry and remove * @dev: device */ void * irdma_remove_cqp_head(struct irdma_sc_dev *dev) { struct list_head *entry; struct list_head *list = &dev->cqp_cmd_head; if (list_empty(list)) return NULL; entry = list->next; list_del(entry); return entry; } /** * irdma_cqp_sds_cmd - create cqp command for sd * @dev: hardware control device structure * @sdinfo: information for sd cqp * */ int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, struct irdma_update_sds_info *sdinfo) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_pci_f *rf = dev_to_rf(dev); int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; memcpy(&cqp_info->in.u.update_pe_sds.info, sdinfo, sizeof(cqp_info->in.u.update_pe_sds.info)); cqp_info->cqp_cmd = IRDMA_OP_UPDATE_PE_SDS; cqp_info->post_sq = 1; cqp_info->in.u.update_pe_sds.dev = dev; cqp_info->in.u.update_pe_sds.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); return status; } /** * irdma_cqp_qp_suspend_resume - cqp command for suspend/resume * @qp: hardware control qp * @op: suspend or resume */ int irdma_cqp_qp_suspend_resume(struct irdma_sc_qp *qp, u8 op) { struct irdma_sc_dev *dev = qp->dev; struct irdma_cqp_request *cqp_request; struct irdma_sc_cqp *cqp = dev->cqp; struct cqp_cmds_info *cqp_info; struct irdma_pci_f *rf = dev_to_rf(dev); int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, false); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = op; cqp_info->in.u.suspend_resume.cqp = cqp; cqp_info->in.u.suspend_resume.qp = qp; cqp_info->in.u.suspend_resume.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); return status; } /** * irdma_term_modify_qp - modify qp for term message * @qp: hardware control qp * @next_state: qp's next state * @term: terminate code * @term_len: length */ void irdma_term_modify_qp(struct irdma_sc_qp *qp, u8 next_state, u8 term, u8 term_len) { struct irdma_qp *iwqp; iwqp = qp->qp_uk.back_qp; irdma_next_iw_state(iwqp, next_state, 0, term, term_len); }; /** * irdma_terminate_done - after terminate is completed * @qp: hardware control qp * @timeout_occurred: indicates if terminate timer expired */ void irdma_terminate_done(struct irdma_sc_qp *qp, int timeout_occurred) { struct irdma_qp *iwqp; u8 hte = 0; bool first_time; unsigned long flags; iwqp = qp->qp_uk.back_qp; spin_lock_irqsave(&iwqp->lock, flags); if (iwqp->hte_added) { iwqp->hte_added = 0; hte = 1; } first_time = !(qp->term_flags & IRDMA_TERM_DONE); qp->term_flags |= IRDMA_TERM_DONE; spin_unlock_irqrestore(&iwqp->lock, flags); if (first_time) { if (!timeout_occurred) irdma_terminate_del_timer(qp); irdma_next_iw_state(iwqp, IRDMA_QP_STATE_ERROR, hte, 0, 0); irdma_cm_disconn(iwqp); } } static void irdma_terminate_timeout(struct timer_list *t) { struct irdma_qp *iwqp = from_timer(iwqp, t, terminate_timer); struct irdma_sc_qp *qp = &iwqp->sc_qp; irdma_terminate_done(qp, 1); irdma_qp_rem_ref(&iwqp->ibqp); } /** * irdma_terminate_start_timer - start terminate timeout * @qp: hardware control qp */ void irdma_terminate_start_timer(struct irdma_sc_qp *qp) { struct irdma_qp *iwqp; iwqp = qp->qp_uk.back_qp; irdma_qp_add_ref(&iwqp->ibqp); timer_setup(&iwqp->terminate_timer, irdma_terminate_timeout, 0); iwqp->terminate_timer.expires = jiffies + HZ; add_timer(&iwqp->terminate_timer); } /** * irdma_terminate_del_timer - delete terminate timeout * @qp: hardware control qp */ void irdma_terminate_del_timer(struct irdma_sc_qp *qp) { struct irdma_qp *iwqp; int ret; iwqp = qp->qp_uk.back_qp; ret = irdma_del_timer_compat(&iwqp->terminate_timer); if (ret) irdma_qp_rem_ref(&iwqp->ibqp); } /** * irdma_cqp_query_fpm_val_cmd - send cqp command for fpm * @dev: function device struct * @val_mem: buffer for fpm * @hmc_fn_id: function id for fpm */ int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, struct irdma_dma_mem *val_mem, u16 hmc_fn_id) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_pci_f *rf = dev_to_rf(dev); int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; cqp_request->param = NULL; cqp_info->in.u.query_fpm_val.cqp = dev->cqp; cqp_info->in.u.query_fpm_val.fpm_val_pa = val_mem->pa; cqp_info->in.u.query_fpm_val.fpm_val_va = val_mem->va; cqp_info->in.u.query_fpm_val.hmc_fn_id = hmc_fn_id; cqp_info->cqp_cmd = IRDMA_OP_QUERY_FPM_VAL; cqp_info->post_sq = 1; cqp_info->in.u.query_fpm_val.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); return status; } /** * irdma_cqp_commit_fpm_val_cmd - commit fpm values in hw * @dev: hardware control device structure * @val_mem: buffer with fpm values * @hmc_fn_id: function id for fpm */ int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, struct irdma_dma_mem *val_mem, u16 hmc_fn_id) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_pci_f *rf = dev_to_rf(dev); int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; cqp_request->param = NULL; cqp_info->in.u.commit_fpm_val.cqp = dev->cqp; cqp_info->in.u.commit_fpm_val.fpm_val_pa = val_mem->pa; cqp_info->in.u.commit_fpm_val.fpm_val_va = val_mem->va; cqp_info->in.u.commit_fpm_val.hmc_fn_id = hmc_fn_id; cqp_info->cqp_cmd = IRDMA_OP_COMMIT_FPM_VAL; cqp_info->post_sq = 1; cqp_info->in.u.commit_fpm_val.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); return status; } /** * irdma_cqp_cq_create_cmd - create a cq for the cqp * @dev: device pointer * @cq: pointer to created cq */ int irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq) { struct irdma_pci_f *rf = dev_to_rf(dev); struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = IRDMA_OP_CQ_CREATE; cqp_info->post_sq = 1; cqp_info->in.u.cq_create.cq = cq; cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(iwcqp, cqp_request); return status; } /** * irdma_cqp_qp_create_cmd - create a qp for the cqp * @dev: device pointer * @qp: pointer to created qp */ int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp) { struct irdma_pci_f *rf = dev_to_rf(dev); struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_create_qp_info *qp_info; int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; qp_info = &cqp_request->info.in.u.qp_create.info; memset(qp_info, 0, sizeof(*qp_info)); qp_info->cq_num_valid = true; qp_info->next_iwarp_state = IRDMA_QP_STATE_RTS; cqp_info->cqp_cmd = IRDMA_OP_QP_CREATE; cqp_info->post_sq = 1; cqp_info->in.u.qp_create.qp = qp; cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(iwcqp, cqp_request); return status; } /** * irdma_dealloc_push_page - free a push page for qp * @rf: RDMA PCI function * @qp: hardware control qp */ void irdma_dealloc_push_page(struct irdma_pci_f *rf, struct irdma_sc_qp *qp) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) return; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, false); if (!cqp_request) return; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = IRDMA_OP_MANAGE_PUSH_PAGE; cqp_info->post_sq = 1; cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx; cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle; cqp_info->in.u.manage_push_page.info.free_page = 1; cqp_info->in.u.manage_push_page.info.push_page_type = 0; cqp_info->in.u.manage_push_page.cqp = &rf->cqp.sc_cqp; cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); if (!status) qp->push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX; irdma_put_cqp_request(&rf->cqp, cqp_request); } /** * irdma_cq_wq_destroy - send cq destroy cqp * @rf: RDMA PCI function * @cq: hardware control cq */ void irdma_cq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_cq *cq) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) return; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = IRDMA_OP_CQ_DESTROY; cqp_info->post_sq = 1; cqp_info->in.u.cq_destroy.cq = cq; cqp_info->in.u.cq_destroy.scratch = (uintptr_t)cqp_request; irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); } /** * irdma_hw_modify_qp_callback - handle state for modifyQPs that don't wait * @cqp_request: modify QP completion */ static void irdma_hw_modify_qp_callback(struct irdma_cqp_request *cqp_request) { struct cqp_cmds_info *cqp_info; struct irdma_qp *iwqp; cqp_info = &cqp_request->info; iwqp = cqp_info->in.u.qp_modify.qp->qp_uk.back_qp; atomic_dec(&iwqp->hw_mod_qp_pend); wake_up(&iwqp->mod_qp_waitq); } /** * irdma_hw_modify_qp - setup cqp for modify qp * @iwdev: RDMA device * @iwqp: qp ptr (user or kernel) * @info: info for modify qp * @wait: flag to wait or not for modify qp completion */ int irdma_hw_modify_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, struct irdma_modify_qp_info *info, bool wait) { int status; struct irdma_pci_f *rf = iwdev->rf; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_modify_qp_info *m_info; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait); if (!cqp_request) return -ENOMEM; if (!wait) { cqp_request->callback_fcn = irdma_hw_modify_qp_callback; atomic_inc(&iwqp->hw_mod_qp_pend); } cqp_info = &cqp_request->info; m_info = &cqp_info->in.u.qp_modify.info; memcpy(m_info, info, sizeof(*m_info)); cqp_info->cqp_cmd = IRDMA_OP_QP_MODIFY; cqp_info->post_sq = 1; cqp_info->in.u.qp_modify.qp = &iwqp->sc_qp; cqp_info->in.u.qp_modify.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); if (status) { if (rdma_protocol_roce(&iwdev->ibdev, 1)) return status; switch (m_info->next_iwarp_state) { struct irdma_gen_ae_info ae_info; case IRDMA_QP_STATE_RTS: case IRDMA_QP_STATE_IDLE: case IRDMA_QP_STATE_TERMINATE: case IRDMA_QP_STATE_CLOSING: if (info->curr_iwarp_state == IRDMA_QP_STATE_IDLE) irdma_send_reset(iwqp->cm_node); else iwqp->sc_qp.term_flags = IRDMA_TERM_DONE; if (!wait) { ae_info.ae_code = IRDMA_AE_BAD_CLOSE; ae_info.ae_src = 0; irdma_gen_ae(rf, &iwqp->sc_qp, &ae_info, false); } else { cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; m_info = &cqp_info->in.u.qp_modify.info; memcpy(m_info, info, sizeof(*m_info)); cqp_info->cqp_cmd = IRDMA_OP_QP_MODIFY; cqp_info->post_sq = 1; cqp_info->in.u.qp_modify.qp = &iwqp->sc_qp; cqp_info->in.u.qp_modify.scratch = (uintptr_t)cqp_request; m_info->next_iwarp_state = IRDMA_QP_STATE_ERROR; m_info->reset_tcp_conn = true; irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); } break; case IRDMA_QP_STATE_ERROR: default: break; } } return status; } /** * irdma_cqp_cq_destroy_cmd - destroy the cqp cq * @dev: device pointer * @cq: pointer to cq */ void irdma_cqp_cq_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq) { struct irdma_pci_f *rf = dev_to_rf(dev); irdma_cq_wq_destroy(rf, cq); } /** * irdma_cqp_qp_destroy_cmd - destroy the cqp * @dev: device pointer * @qp: pointer to qp */ int irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp) { struct irdma_pci_f *rf = dev_to_rf(dev); struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; memset(cqp_info, 0, sizeof(*cqp_info)); cqp_info->cqp_cmd = IRDMA_OP_QP_DESTROY; cqp_info->post_sq = 1; cqp_info->in.u.qp_destroy.qp = qp; cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request; cqp_info->in.u.qp_destroy.remove_hash_idx = true; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); return status; } /** * irdma_ieq_mpa_crc_ae - generate AE for crc error * @dev: hardware control device structure * @qp: hardware control qp */ void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp) { struct irdma_gen_ae_info info = {0}; struct irdma_pci_f *rf = dev_to_rf(dev); irdma_debug(&rf->sc_dev, IRDMA_DEBUG_AEQ, "Generate MPA CRC AE\n"); info.ae_code = IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR; info.ae_src = IRDMA_AE_SOURCE_RQ; irdma_gen_ae(rf, qp, &info, false); } /** * irdma_ieq_get_qp - get qp based on quad in puda buffer * @dev: hardware control device structure * @buf: receive puda buffer on exception q */ struct irdma_sc_qp * irdma_ieq_get_qp(struct irdma_sc_dev *dev, struct irdma_puda_buf *buf) { struct irdma_qp *iwqp; struct irdma_cm_node *cm_node; struct irdma_device *iwdev = buf->vsi->back_vsi; u32 loc_addr[4] = {0}; u32 rem_addr[4] = {0}; u16 loc_port, rem_port; struct ip6_hdr *ip6h; struct ip *iph = (struct ip *)buf->iph; struct tcphdr *tcph = (struct tcphdr *)buf->tcph; if (iph->ip_v == 4) { loc_addr[0] = ntohl(iph->ip_dst.s_addr); rem_addr[0] = ntohl(iph->ip_src.s_addr); } else { ip6h = (struct ip6_hdr *)buf->iph; irdma_copy_ip_ntohl(loc_addr, ip6h->ip6_dst.__u6_addr.__u6_addr32); irdma_copy_ip_ntohl(rem_addr, ip6h->ip6_src.__u6_addr.__u6_addr32); } loc_port = ntohs(tcph->th_dport); rem_port = ntohs(tcph->th_sport); cm_node = irdma_find_node(&iwdev->cm_core, rem_port, rem_addr, loc_port, loc_addr, buf->vlan_valid ? buf->vlan_id : 0xFFFF); if (!cm_node) return NULL; iwqp = cm_node->iwqp; irdma_rem_ref_cm_node(cm_node); return &iwqp->sc_qp; } /** * irdma_send_ieq_ack - ACKs for duplicate or OOO partials FPDUs * @qp: qp ptr */ void irdma_send_ieq_ack(struct irdma_sc_qp *qp) { struct irdma_cm_node *cm_node = ((struct irdma_qp *)qp->qp_uk.back_qp)->cm_node; struct irdma_puda_buf *buf = qp->pfpdu.lastrcv_buf; struct tcphdr *tcph = (struct tcphdr *)buf->tcph; cm_node->tcp_cntxt.rcv_nxt = qp->pfpdu.nextseqnum; cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->th_ack); irdma_send_ack(cm_node); } /** * irdma_puda_ieq_get_ah_info - get AH info from IEQ buffer * @qp: qp pointer * @ah_info: AH info pointer */ void irdma_puda_ieq_get_ah_info(struct irdma_sc_qp *qp, struct irdma_ah_info *ah_info) { struct irdma_puda_buf *buf = qp->pfpdu.ah_buf; struct ip *iph; struct ip6_hdr *ip6h; memset(ah_info, 0, sizeof(*ah_info)); ah_info->do_lpbk = true; ah_info->vlan_tag = buf->vlan_id; ah_info->insert_vlan_tag = buf->vlan_valid; ah_info->ipv4_valid = buf->ipv4; ah_info->vsi = qp->vsi; if (buf->smac_valid) ether_addr_copy(ah_info->mac_addr, buf->smac); if (buf->ipv4) { ah_info->ipv4_valid = true; iph = (struct ip *)buf->iph; ah_info->hop_ttl = iph->ip_ttl; ah_info->tc_tos = iph->ip_tos; ah_info->dest_ip_addr[0] = ntohl(iph->ip_dst.s_addr); ah_info->src_ip_addr[0] = ntohl(iph->ip_src.s_addr); } else { ip6h = (struct ip6_hdr *)buf->iph; ah_info->hop_ttl = ip6h->ip6_hops; ah_info->tc_tos = ip6h->ip6_vfc; irdma_copy_ip_ntohl(ah_info->dest_ip_addr, ip6h->ip6_dst.__u6_addr.__u6_addr32); irdma_copy_ip_ntohl(ah_info->src_ip_addr, ip6h->ip6_src.__u6_addr.__u6_addr32); } ah_info->dst_arpindex = irdma_arp_table(dev_to_rf(qp->dev), ah_info->dest_ip_addr, NULL, IRDMA_ARP_RESOLVE); } /** * irdma_gen1_ieq_update_tcpip_info - update tcpip in the buffer * @buf: puda to update * @len: length of buffer * @seqnum: seq number for tcp */ static void irdma_gen1_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, u32 seqnum) { struct tcphdr *tcph; struct ip *iph; u16 iphlen; u16 pktsize; u8 *addr = buf->mem.va; iphlen = (buf->ipv4) ? 20 : 40; iph = (struct ip *)(addr + buf->maclen); tcph = (struct tcphdr *)(addr + buf->maclen + iphlen); pktsize = len + buf->tcphlen + iphlen; iph->ip_len = htons(pktsize); tcph->th_seq = htonl(seqnum); } /** * irdma_ieq_update_tcpip_info - update tcpip in the buffer * @buf: puda to update * @len: length of buffer * @seqnum: seq number for tcp */ void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, u32 seqnum) { struct tcphdr *tcph; u8 *addr; if (buf->vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) return irdma_gen1_ieq_update_tcpip_info(buf, len, seqnum); addr = buf->mem.va; tcph = (struct tcphdr *)addr; tcph->th_seq = htonl(seqnum); } /** * irdma_gen1_puda_get_tcpip_info - get tcpip info from puda * buffer * @info: to get information * @buf: puda buffer */ static int irdma_gen1_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, struct irdma_puda_buf *buf) { struct ip *iph; struct ip6_hdr *ip6h; struct tcphdr *tcph; u16 iphlen; u16 pkt_len; u8 *mem = buf->mem.va; struct ether_header *ethh = buf->mem.va; if (ethh->ether_type == htons(0x8100)) { info->vlan_valid = true; buf->vlan_id = ntohs(((struct ether_vlan_header *)ethh)->evl_tag) & EVL_VLID_MASK; } buf->maclen = (info->vlan_valid) ? 18 : 14; iphlen = (info->l3proto) ? 40 : 20; buf->ipv4 = (info->l3proto) ? false : true; buf->iph = mem + buf->maclen; iph = (struct ip *)buf->iph; buf->tcph = buf->iph + iphlen; tcph = (struct tcphdr *)buf->tcph; if (buf->ipv4) { pkt_len = ntohs(iph->ip_len); } else { ip6h = (struct ip6_hdr *)buf->iph; pkt_len = ntohs(ip6h->ip6_plen) + iphlen; } buf->totallen = pkt_len + buf->maclen; if (info->payload_len < buf->totallen) { irdma_debug(buf->vsi->dev, IRDMA_DEBUG_ERR, "payload_len = 0x%x totallen expected0x%x\n", info->payload_len, buf->totallen); return -EINVAL; } buf->tcphlen = tcph->th_off << 2; buf->datalen = pkt_len - iphlen - buf->tcphlen; buf->data = buf->datalen ? buf->tcph + buf->tcphlen : NULL; buf->hdrlen = buf->maclen + iphlen + buf->tcphlen; buf->seqnum = ntohl(tcph->th_seq); return 0; } /** * irdma_puda_get_tcpip_info - get tcpip info from puda buffer * @info: to get information * @buf: puda buffer */ int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, struct irdma_puda_buf *buf) { struct tcphdr *tcph; u32 pkt_len; u8 *mem; if (buf->vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) return irdma_gen1_puda_get_tcpip_info(info, buf); mem = buf->mem.va; buf->vlan_valid = info->vlan_valid; if (info->vlan_valid) buf->vlan_id = info->vlan; buf->ipv4 = info->ipv4; if (buf->ipv4) buf->iph = mem + IRDMA_IPV4_PAD; else buf->iph = mem; buf->tcph = mem + IRDMA_TCP_OFFSET; tcph = (struct tcphdr *)buf->tcph; pkt_len = info->payload_len; buf->totallen = pkt_len; buf->tcphlen = tcph->th_off << 2; buf->datalen = pkt_len - IRDMA_TCP_OFFSET - buf->tcphlen; buf->data = buf->datalen ? buf->tcph + buf->tcphlen : NULL; buf->hdrlen = IRDMA_TCP_OFFSET + buf->tcphlen; buf->seqnum = ntohl(tcph->th_seq); if (info->smac_valid) { ether_addr_copy(buf->smac, info->smac); buf->smac_valid = true; } return 0; } /** * irdma_hw_stats_timeout - Stats timer-handler which updates all HW stats * @t: timer_list pointer */ static void irdma_hw_stats_timeout(struct timer_list *t) { struct irdma_vsi_pestat *pf_devstat = from_timer(pf_devstat, t, stats_timer); struct irdma_sc_vsi *sc_vsi = pf_devstat->vsi; if (sc_vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) irdma_cqp_gather_stats_cmd(sc_vsi->dev, sc_vsi->pestat, false); mod_timer(&pf_devstat->stats_timer, jiffies + msecs_to_jiffies(STATS_TIMER_DELAY)); } /** * irdma_hw_stats_start_timer - Start periodic stats timer * @vsi: vsi structure pointer */ void irdma_hw_stats_start_timer(struct irdma_sc_vsi *vsi) { struct irdma_vsi_pestat *devstat = vsi->pestat; timer_setup(&devstat->stats_timer, irdma_hw_stats_timeout, 0); mod_timer(&devstat->stats_timer, jiffies + msecs_to_jiffies(STATS_TIMER_DELAY)); } /** * irdma_hw_stats_stop_timer - Delete periodic stats timer * @vsi: pointer to vsi structure */ void irdma_hw_stats_stop_timer(struct irdma_sc_vsi *vsi) { struct irdma_vsi_pestat *devstat = vsi->pestat; del_timer_sync(&devstat->stats_timer); } /** * irdma_process_stats - Checking for wrap and update stats * @pestat: stats structure pointer */ static inline void irdma_process_stats(struct irdma_vsi_pestat *pestat) { sc_vsi_update_stats(pestat->vsi); } /** * irdma_process_cqp_stats - Checking for wrap and update stats * @cqp_request: cqp_request structure pointer */ static void irdma_process_cqp_stats(struct irdma_cqp_request *cqp_request) { struct irdma_vsi_pestat *pestat = cqp_request->param; irdma_process_stats(pestat); } /** * irdma_cqp_gather_stats_cmd - Gather stats * @dev: pointer to device structure * @pestat: pointer to stats info * @wait: flag to wait or not wait for stats */ int irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev, struct irdma_vsi_pestat *pestat, bool wait) { struct irdma_pci_f *rf = dev_to_rf(dev); struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; memset(cqp_info, 0, sizeof(*cqp_info)); cqp_info->cqp_cmd = IRDMA_OP_STATS_GATHER; cqp_info->post_sq = 1; cqp_info->in.u.stats_gather.info = pestat->gather_info; cqp_info->in.u.stats_gather.scratch = (uintptr_t)cqp_request; cqp_info->in.u.stats_gather.cqp = &rf->cqp.sc_cqp; cqp_request->param = pestat; if (!wait) cqp_request->callback_fcn = irdma_process_cqp_stats; status = irdma_handle_cqp_op(rf, cqp_request); if (wait) irdma_process_stats(pestat); irdma_put_cqp_request(&rf->cqp, cqp_request); return status; } /** * irdma_cqp_stats_inst_cmd - Allocate/free stats instance * @vsi: pointer to vsi structure * @cmd: command to allocate or free * @stats_info: pointer to allocate stats info */ int irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd, struct irdma_stats_inst_info *stats_info) { struct irdma_pci_f *rf = dev_to_rf(vsi->dev); struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; bool wait = false; if (cmd == IRDMA_OP_STATS_ALLOCATE) wait = true; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; memset(cqp_info, 0, sizeof(*cqp_info)); cqp_info->cqp_cmd = cmd; cqp_info->post_sq = 1; cqp_info->in.u.stats_manage.info = *stats_info; cqp_info->in.u.stats_manage.scratch = (uintptr_t)cqp_request; cqp_info->in.u.stats_manage.cqp = &rf->cqp.sc_cqp; status = irdma_handle_cqp_op(rf, cqp_request); if (wait) stats_info->stats_idx = cqp_request->compl_info.op_ret_val; irdma_put_cqp_request(iwcqp, cqp_request); return status; } /** * irdma_cqp_ceq_cmd - Create/Destroy CEQ's after CEQ 0 * @dev: pointer to device info * @sc_ceq: pointer to ceq structure * @op: Create or Destroy */ int irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_ceq *sc_ceq, u8 op) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_pci_f *rf = dev_to_rf(dev); int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->post_sq = 1; cqp_info->cqp_cmd = op; cqp_info->in.u.ceq_create.ceq = sc_ceq; cqp_info->in.u.ceq_create.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); return status; } /** * irdma_cqp_aeq_cmd - Create/Destroy AEQ * @dev: pointer to device info * @sc_aeq: pointer to aeq structure * @op: Create or Destroy */ int irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_aeq *sc_aeq, u8 op) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_pci_f *rf = dev_to_rf(dev); int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->post_sq = 1; cqp_info->cqp_cmd = op; cqp_info->in.u.aeq_create.aeq = sc_aeq; cqp_info->in.u.aeq_create.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); return status; } /** * irdma_cqp_ws_node_cmd - Add/modify/delete ws node * @dev: pointer to device structure * @cmd: Add, modify or delete * @node_info: pointer to ws node info */ int irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd, struct irdma_ws_node_info *node_info) { struct irdma_pci_f *rf = dev_to_rf(dev); struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_sc_cqp *cqp = &iwcqp->sc_cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; bool poll; if (!rf->sc_dev.ceq_valid) poll = true; else poll = false; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, !poll); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; memset(cqp_info, 0, sizeof(*cqp_info)); cqp_info->cqp_cmd = cmd; cqp_info->post_sq = 1; cqp_info->in.u.ws_node.info = *node_info; cqp_info->in.u.ws_node.cqp = cqp; cqp_info->in.u.ws_node.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); if (status) goto exit; if (poll) { struct irdma_ccq_cqe_info compl_info; status = irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_WORK_SCHED_NODE, &compl_info); node_info->qs_handle = compl_info.op_ret_val; irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "opcode=%d, compl_info.retval=%d\n", compl_info.op_code, compl_info.op_ret_val); } else { node_info->qs_handle = cqp_request->compl_info.op_ret_val; } exit: irdma_put_cqp_request(&rf->cqp, cqp_request); return status; } -/** - * irdma_cqp_up_map_cmd - Set the up-up mapping - * @dev: pointer to device structure - * @cmd: map command - * @map_info: pointer to up map info - */ -int -irdma_cqp_up_map_cmd(struct irdma_sc_dev *dev, u8 cmd, - struct irdma_up_info *map_info) -{ - struct irdma_pci_f *rf = dev_to_rf(dev); - struct irdma_cqp *iwcqp = &rf->cqp; - struct irdma_sc_cqp *cqp = &iwcqp->sc_cqp; - struct irdma_cqp_request *cqp_request; - struct cqp_cmds_info *cqp_info; - int status; - - cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, false); - if (!cqp_request) - return -ENOMEM; - - cqp_info = &cqp_request->info; - memset(cqp_info, 0, sizeof(*cqp_info)); - cqp_info->cqp_cmd = cmd; - cqp_info->post_sq = 1; - cqp_info->in.u.up_map.info = *map_info; - cqp_info->in.u.up_map.cqp = cqp; - cqp_info->in.u.up_map.scratch = (uintptr_t)cqp_request; - - status = irdma_handle_cqp_op(rf, cqp_request); - irdma_put_cqp_request(&rf->cqp, cqp_request); - - return status; -} - /** * irdma_ah_cqp_op - perform an AH cqp operation * @rf: RDMA PCI function * @sc_ah: address handle * @cmd: AH operation * @wait: wait if true * @callback_fcn: Callback function on CQP op completion * @cb_param: parameter for callback function * * returns errno */ int irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd, bool wait, void (*callback_fcn) (struct irdma_cqp_request *), void *cb_param) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; if (cmd != IRDMA_OP_AH_CREATE && cmd != IRDMA_OP_AH_DESTROY) return -EINVAL; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = cmd; cqp_info->post_sq = 1; if (cmd == IRDMA_OP_AH_CREATE) { cqp_info->in.u.ah_create.info = sc_ah->ah_info; cqp_info->in.u.ah_create.scratch = (uintptr_t)cqp_request; cqp_info->in.u.ah_create.cqp = &rf->cqp.sc_cqp; } else if (cmd == IRDMA_OP_AH_DESTROY) { cqp_info->in.u.ah_destroy.info = sc_ah->ah_info; cqp_info->in.u.ah_destroy.scratch = (uintptr_t)cqp_request; cqp_info->in.u.ah_destroy.cqp = &rf->cqp.sc_cqp; } if (!wait) { cqp_request->callback_fcn = callback_fcn; cqp_request->param = cb_param; } status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); if (status) return -ENOMEM; if (wait) - sc_ah->ah_info.ah_valid = (cmd == IRDMA_OP_AH_CREATE); + sc_ah->ah_info.ah_valid = (cmd != IRDMA_OP_AH_DESTROY); return 0; } /** * irdma_ieq_ah_cb - callback after creation of AH for IEQ * @cqp_request: pointer to cqp_request of create AH */ static void irdma_ieq_ah_cb(struct irdma_cqp_request *cqp_request) { struct irdma_sc_qp *qp = cqp_request->param; struct irdma_sc_ah *sc_ah = qp->pfpdu.ah; unsigned long flags; spin_lock_irqsave(&qp->pfpdu.lock, flags); if (!cqp_request->compl_info.op_ret_val) { sc_ah->ah_info.ah_valid = true; irdma_ieq_process_fpdus(qp, qp->vsi->ieq); } else { sc_ah->ah_info.ah_valid = false; irdma_ieq_cleanup_qp(qp->vsi->ieq, qp); } spin_unlock_irqrestore(&qp->pfpdu.lock, flags); } /** * irdma_ilq_ah_cb - callback after creation of AH for ILQ * @cqp_request: pointer to cqp_request of create AH */ static void irdma_ilq_ah_cb(struct irdma_cqp_request *cqp_request) { struct irdma_cm_node *cm_node = cqp_request->param; struct irdma_sc_ah *sc_ah = cm_node->ah; sc_ah->ah_info.ah_valid = !cqp_request->compl_info.op_ret_val; irdma_add_conn_est_qh(cm_node); } /** * irdma_puda_create_ah - create AH for ILQ/IEQ qp's * @dev: device pointer * @ah_info: Address handle info * @wait: When true will wait for operation to complete * @type: ILQ/IEQ * @cb_param: Callback param when not waiting * @ah_ret: Returned pointer to address handle if created * */ int irdma_puda_create_ah(struct irdma_sc_dev *dev, struct irdma_ah_info *ah_info, bool wait, enum puda_rsrc_type type, void *cb_param, struct irdma_sc_ah **ah_ret) { struct irdma_sc_ah *ah; struct irdma_pci_f *rf = dev_to_rf(dev); int err; ah = kzalloc(sizeof(*ah), GFP_ATOMIC); *ah_ret = ah; if (!ah) return -ENOMEM; err = irdma_alloc_rsrc(rf, rf->allocated_ahs, rf->max_ah, &ah_info->ah_idx, &rf->next_ah); if (err) goto err_free; ah->dev = dev; ah->ah_info = *ah_info; if (type == IRDMA_PUDA_RSRC_TYPE_ILQ) err = irdma_ah_cqp_op(rf, ah, IRDMA_OP_AH_CREATE, wait, irdma_ilq_ah_cb, cb_param); else err = irdma_ah_cqp_op(rf, ah, IRDMA_OP_AH_CREATE, wait, irdma_ieq_ah_cb, cb_param); if (err) goto error; return 0; error: irdma_free_rsrc(rf, rf->allocated_ahs, ah->ah_info.ah_idx); err_free: kfree(ah); *ah_ret = NULL; return -ENOMEM; } /** * irdma_puda_free_ah - free a puda address handle * @dev: device pointer * @ah: The address handle to free */ void irdma_puda_free_ah(struct irdma_sc_dev *dev, struct irdma_sc_ah *ah) { struct irdma_pci_f *rf = dev_to_rf(dev); if (!ah) return; if (ah->ah_info.ah_valid) { irdma_ah_cqp_op(rf, ah, IRDMA_OP_AH_DESTROY, false, NULL, NULL); irdma_free_rsrc(rf, rf->allocated_ahs, ah->ah_info.ah_idx); } kfree(ah); } /** * irdma_gsi_ud_qp_ah_cb - callback after creation of AH for GSI/ID QP * @cqp_request: pointer to cqp_request of create AH */ void irdma_gsi_ud_qp_ah_cb(struct irdma_cqp_request *cqp_request) { struct irdma_sc_ah *sc_ah = cqp_request->param; if (!cqp_request->compl_info.op_ret_val) sc_ah->ah_info.ah_valid = true; else sc_ah->ah_info.ah_valid = false; } /** * irdma_prm_add_pble_mem - add moemory to pble resources * @pprm: pble resource manager * @pchunk: chunk of memory to add */ int irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm, struct irdma_chunk *pchunk) { u64 sizeofbitmap; if (pchunk->size & 0xfff) return -EINVAL; sizeofbitmap = (u64)pchunk->size >> pprm->pble_shift; - pchunk->bitmapmem.size = sizeofbitmap >> 3; - pchunk->bitmapmem.va = kzalloc(pchunk->bitmapmem.size, GFP_KERNEL); - - if (!pchunk->bitmapmem.va) + pchunk->bitmapbuf = bitmap_zalloc(sizeofbitmap, GFP_KERNEL); + if (!pchunk->bitmapbuf) return -ENOMEM; - pchunk->bitmapbuf = pchunk->bitmapmem.va; - bitmap_zero(pchunk->bitmapbuf, sizeofbitmap); - pchunk->sizeofbitmap = sizeofbitmap; /* each pble is 8 bytes hence shift by 3 */ pprm->total_pble_alloc += pchunk->size >> 3; pprm->free_pble_cnt += pchunk->size >> 3; return 0; } /** * irdma_prm_get_pbles - get pble's from prm * @pprm: pble resource manager * @chunkinfo: nformation about chunk where pble's were acquired * @mem_size: size of pble memory needed * @vaddr: returns virtual address of pble memory * @fpm_addr: returns fpm address of pble memory */ int irdma_prm_get_pbles(struct irdma_pble_prm *pprm, struct irdma_pble_chunkinfo *chunkinfo, u64 mem_size, u64 **vaddr, u64 *fpm_addr) { u64 bits_needed; u64 bit_idx = PBLE_INVALID_IDX; struct irdma_chunk *pchunk = NULL; struct list_head *chunk_entry = (&pprm->clist)->next; u32 offset; unsigned long flags; *vaddr = NULL; *fpm_addr = 0; bits_needed = DIV_ROUND_UP_ULL(mem_size, BIT_ULL(pprm->pble_shift)); spin_lock_irqsave(&pprm->prm_lock, flags); while (chunk_entry != &pprm->clist) { pchunk = (struct irdma_chunk *)chunk_entry; bit_idx = bitmap_find_next_zero_area(pchunk->bitmapbuf, pchunk->sizeofbitmap, 0, bits_needed, 0); if (bit_idx < pchunk->sizeofbitmap) break; /* list.next used macro */ chunk_entry = (&pchunk->list)->next; } if (!pchunk || bit_idx >= pchunk->sizeofbitmap) { spin_unlock_irqrestore(&pprm->prm_lock, flags); return -ENOMEM; } bitmap_set(pchunk->bitmapbuf, bit_idx, bits_needed); offset = bit_idx << pprm->pble_shift; *vaddr = (u64 *)((u8 *)pchunk->vaddr + offset); *fpm_addr = pchunk->fpm_addr + offset; chunkinfo->pchunk = pchunk; chunkinfo->bit_idx = bit_idx; chunkinfo->bits_used = bits_needed; /* 3 is sizeof pble divide */ pprm->free_pble_cnt -= chunkinfo->bits_used << (pprm->pble_shift - 3); spin_unlock_irqrestore(&pprm->prm_lock, flags); return 0; } /** * irdma_prm_return_pbles - return pbles back to prm * @pprm: pble resource manager * @chunkinfo: chunk where pble's were acquired and to be freed */ void irdma_prm_return_pbles(struct irdma_pble_prm *pprm, struct irdma_pble_chunkinfo *chunkinfo) { unsigned long flags; spin_lock_irqsave(&pprm->prm_lock, flags); pprm->free_pble_cnt += chunkinfo->bits_used << (pprm->pble_shift - 3); bitmap_clear(chunkinfo->pchunk->bitmapbuf, chunkinfo->bit_idx, chunkinfo->bits_used); spin_unlock_irqrestore(&pprm->prm_lock, flags); } int irdma_map_vm_page_list(struct irdma_hw *hw, void *va, dma_addr_t * pg_dma, u32 pg_cnt) { struct page *vm_page; int i; u8 *addr; addr = (u8 *)(uintptr_t)va; for (i = 0; i < pg_cnt; i++) { vm_page = vmalloc_to_page(addr); if (!vm_page) goto err; pg_dma[i] = dma_map_page(hw_to_dev(hw), vm_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); if (dma_mapping_error(hw_to_dev(hw), pg_dma[i])) goto err; addr += PAGE_SIZE; } return 0; err: irdma_unmap_vm_page_list(hw, pg_dma, i); return -ENOMEM; } void irdma_unmap_vm_page_list(struct irdma_hw *hw, dma_addr_t * pg_dma, u32 pg_cnt) { int i; for (i = 0; i < pg_cnt; i++) dma_unmap_page(hw_to_dev(hw), pg_dma[i], PAGE_SIZE, DMA_BIDIRECTIONAL); } /** * irdma_pble_free_paged_mem - free virtual paged memory * @chunk: chunk to free with paged memory */ void irdma_pble_free_paged_mem(struct irdma_chunk *chunk) { if (!chunk->pg_cnt) goto done; irdma_unmap_vm_page_list(chunk->dev->hw, chunk->dmainfo.dmaaddrs, chunk->pg_cnt); done: kfree(chunk->dmainfo.dmaaddrs); chunk->dmainfo.dmaaddrs = NULL; vfree(chunk->vaddr); chunk->vaddr = NULL; chunk->type = 0; } /** * irdma_pble_get_paged_mem -allocate paged memory for pbles * @chunk: chunk to add for paged memory * @pg_cnt: number of pages needed */ int irdma_pble_get_paged_mem(struct irdma_chunk *chunk, u32 pg_cnt) { u32 size; void *va; chunk->dmainfo.dmaaddrs = kzalloc(pg_cnt << 3, GFP_KERNEL); if (!chunk->dmainfo.dmaaddrs) return -ENOMEM; size = PAGE_SIZE * pg_cnt; va = vmalloc(size); if (!va) goto err; if (irdma_map_vm_page_list(chunk->dev->hw, va, chunk->dmainfo.dmaaddrs, pg_cnt)) { vfree(va); goto err; } chunk->vaddr = va; chunk->size = size; chunk->pg_cnt = pg_cnt; chunk->type = PBLE_SD_PAGED; return 0; err: kfree(chunk->dmainfo.dmaaddrs); chunk->dmainfo.dmaaddrs = NULL; return -ENOMEM; } /** * irdma_alloc_ws_node_id - Allocate a tx scheduler node ID * @dev: device pointer */ u16 irdma_alloc_ws_node_id(struct irdma_sc_dev *dev) { struct irdma_pci_f *rf = dev_to_rf(dev); u32 next = 1; u32 node_id; if (irdma_alloc_rsrc(rf, rf->allocated_ws_nodes, rf->max_ws_node_id, &node_id, &next)) return IRDMA_WS_NODE_INVALID; return (u16)node_id; } /** * irdma_free_ws_node_id - Free a tx scheduler node ID * @dev: device pointer * @node_id: Work scheduler node ID */ void irdma_free_ws_node_id(struct irdma_sc_dev *dev, u16 node_id) { struct irdma_pci_f *rf = dev_to_rf(dev); irdma_free_rsrc(rf, rf->allocated_ws_nodes, (u32)node_id); } /** * irdma_modify_qp_to_err - Modify a QP to error * @sc_qp: qp structure */ void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp) { struct irdma_qp *qp = sc_qp->qp_uk.back_qp; struct ib_qp_attr attr; if (qp->iwdev->rf->reset) return; attr.qp_state = IB_QPS_ERR; if (rdma_protocol_roce(qp->ibqp.device, 1)) irdma_modify_qp_roce(&qp->ibqp, &attr, IB_QP_STATE, NULL); else irdma_modify_qp(&qp->ibqp, &attr, IB_QP_STATE, NULL); } void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event) { struct ib_event ibevent; if (!iwqp->ibqp.event_handler) return; switch (event) { case IRDMA_QP_EVENT_CATASTROPHIC: ibevent.event = IB_EVENT_QP_FATAL; break; case IRDMA_QP_EVENT_ACCESS_ERR: ibevent.event = IB_EVENT_QP_ACCESS_ERR; break; case IRDMA_QP_EVENT_REQ_ERR: ibevent.event = IB_EVENT_QP_REQ_ERR; break; } ibevent.device = iwqp->ibqp.device; ibevent.element.qp = &iwqp->ibqp; iwqp->ibqp.event_handler(&ibevent, iwqp->ibqp.qp_context); } static void clear_qp_ctx_addr(__le64 * ctx) { u64 tmp; get_64bit_val(ctx, 272, &tmp); tmp &= GENMASK_ULL(63, 58); set_64bit_val(ctx, 272, tmp); get_64bit_val(ctx, 296, &tmp); tmp &= GENMASK_ULL(7, 0); set_64bit_val(ctx, 296, tmp); get_64bit_val(ctx, 312, &tmp); tmp &= GENMASK_ULL(7, 0); set_64bit_val(ctx, 312, tmp); set_64bit_val(ctx, 368, 0); } /** * irdma_upload_qp_context - upload raw QP context * @iwqp: QP pointer * @freeze: freeze QP * @raw: raw context flag */ int irdma_upload_qp_context(struct irdma_qp *iwqp, bool freeze, bool raw) { struct irdma_dma_mem dma_mem; struct irdma_sc_dev *dev; struct irdma_sc_qp *qp; struct irdma_cqp *iwcqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_upload_context_info *info; struct irdma_pci_f *rf; int ret; u32 *ctx; rf = iwqp->iwdev->rf; if (!rf) return -EINVAL; qp = &iwqp->sc_qp; dev = &rf->sc_dev; iwcqp = &rf->cqp; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) return -EINVAL; cqp_info = &cqp_request->info; info = &cqp_info->in.u.qp_upload_context.info; memset(info, 0, sizeof(struct irdma_upload_context_info)); cqp_info->cqp_cmd = IRDMA_OP_QP_UPLOAD_CONTEXT; cqp_info->post_sq = 1; cqp_info->in.u.qp_upload_context.dev = dev; cqp_info->in.u.qp_upload_context.scratch = (uintptr_t)cqp_request; dma_mem.size = PAGE_SIZE; dma_mem.va = irdma_allocate_dma_mem(dev->hw, &dma_mem, dma_mem.size, PAGE_SIZE); if (!dma_mem.va) { irdma_put_cqp_request(&rf->cqp, cqp_request); return -ENOMEM; } ctx = dma_mem.va; info->buf_pa = dma_mem.pa; info->raw_format = raw; info->freeze_qp = freeze; info->qp_type = qp->qp_uk.qp_type; /* 1 is iWARP and 2 UDA */ info->qp_id = qp->qp_uk.qp_id; ret = irdma_handle_cqp_op(rf, cqp_request); if (ret) goto error; irdma_debug(dev, IRDMA_DEBUG_QP, "PRINT CONTXT QP [%d]\n", info->qp_id); { u32 i, j; clear_qp_ctx_addr(dma_mem.va); for (i = 0, j = 0; i < 32; i++, j += 4) irdma_debug(dev, IRDMA_DEBUG_QP, - "%d:\t [%08X %08x %08X %08X]\n", - (j * 4), ctx[j], ctx[j + 1], ctx[j + 2], - ctx[j + 3]); + "%d:\t [%08X %08x %08X %08X]\n", (j * 4), + ctx[j], ctx[j + 1], ctx[j + 2], ctx[j + 3]); } error: irdma_put_cqp_request(iwcqp, cqp_request); irdma_free_dma_mem(dev->hw, &dma_mem); return ret; } bool irdma_cq_empty(struct irdma_cq *iwcq) { struct irdma_cq_uk *ukcq; u64 qword3; __le64 *cqe; u8 polarity; ukcq = &iwcq->sc_cq.cq_uk; cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq); get_64bit_val(cqe, 24, &qword3); polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); return polarity != ukcq->polarity; } void irdma_remove_cmpls_list(struct irdma_cq *iwcq) { struct irdma_cmpl_gen *cmpl_node; struct list_head *tmp_node, *list_node; list_for_each_safe(list_node, tmp_node, &iwcq->cmpl_generated) { cmpl_node = list_entry(list_node, struct irdma_cmpl_gen, list); list_del(&cmpl_node->list); kfree(cmpl_node); } } int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info) { struct irdma_cmpl_gen *cmpl; if (list_empty(&iwcq->cmpl_generated)) return -ENOENT; cmpl = list_first_entry_or_null(&iwcq->cmpl_generated, struct irdma_cmpl_gen, list); list_del(&cmpl->list); memcpy(cq_poll_info, &cmpl->cpi, sizeof(*cq_poll_info)); kfree(cmpl); irdma_debug(iwcq->sc_cq.dev, IRDMA_DEBUG_VERBS, "%s: Poll artificially generated completion for QP 0x%X, op %u, wr_id=0x%lx\n", __func__, cq_poll_info->qp_id, cq_poll_info->op_type, cq_poll_info->wr_id); return 0; } /** * irdma_set_cpi_common_values - fill in values for polling info struct * @cpi: resulting structure of cq_poll_info type * @qp: QPair * @qp_num: id of the QP */ static void irdma_set_cpi_common_values(struct irdma_cq_poll_info *cpi, struct irdma_qp_uk *qp, u32 qp_num) { cpi->comp_status = IRDMA_COMPL_STATUS_FLUSHED; cpi->error = 1; cpi->major_err = IRDMA_FLUSH_MAJOR_ERR; cpi->minor_err = FLUSH_GENERAL_ERR; cpi->qp_handle = (irdma_qp_handle) (uintptr_t)qp; cpi->qp_id = qp_num; } static inline void irdma_comp_handler(struct irdma_cq *cq) { if (!cq->ibcq.comp_handler) return; if (atomic_cmpxchg(&cq->armed, 1, 0)) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } /** * irdma_generate_flush_completions - generate completion from WRs * @iwqp: pointer to QP */ void irdma_generate_flush_completions(struct irdma_qp *iwqp) { struct irdma_qp_uk *qp = &iwqp->sc_qp.qp_uk; struct irdma_ring *sq_ring = &qp->sq_ring; struct irdma_ring *rq_ring = &qp->rq_ring; struct irdma_cmpl_gen *cmpl; __le64 *sw_wqe; u64 wqe_qword; u32 wqe_idx; bool compl_generated = false; unsigned long flags1; spin_lock_irqsave(&iwqp->iwscq->lock, flags1); if (irdma_cq_empty(iwqp->iwscq)) { unsigned long flags2; spin_lock_irqsave(&iwqp->lock, flags2); while (IRDMA_RING_MORE_WORK(*sq_ring)) { cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC); if (!cmpl) { spin_unlock_irqrestore(&iwqp->lock, flags2); spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1); return; } wqe_idx = sq_ring->tail; irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id); cmpl->cpi.wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; cmpl->cpi.signaled = qp->sq_wrtrk_array[wqe_idx].signaled; sw_wqe = qp->sq_base[wqe_idx].elem; get_64bit_val(sw_wqe, IRDMA_BYTE_24, &wqe_qword); cmpl->cpi.op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, wqe_qword); cmpl->cpi.q_type = IRDMA_CQE_QTYPE_SQ; /* remove the SQ WR by moving SQ tail */ IRDMA_RING_SET_TAIL(*sq_ring, sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta); if (cmpl->cpi.op_type == IRDMAQP_OP_NOP) { kfree(cmpl); continue; } irdma_debug(iwqp->sc_qp.dev, IRDMA_DEBUG_DEV, "%s: adding wr_id = 0x%lx SQ Completion to list qp_id=%d\n", __func__, cmpl->cpi.wr_id, qp->qp_id); list_add_tail(&cmpl->list, &iwqp->iwscq->cmpl_generated); compl_generated = true; } spin_unlock_irqrestore(&iwqp->lock, flags2); spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1); if (compl_generated) { irdma_comp_handler(iwqp->iwscq); compl_generated = false; } } else { spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1); irdma_sched_qp_flush_work(iwqp); } spin_lock_irqsave(&iwqp->iwrcq->lock, flags1); if (irdma_cq_empty(iwqp->iwrcq)) { unsigned long flags2; spin_lock_irqsave(&iwqp->lock, flags2); while (IRDMA_RING_MORE_WORK(*rq_ring)) { cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC); if (!cmpl) { spin_unlock_irqrestore(&iwqp->lock, flags2); spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1); return; } wqe_idx = rq_ring->tail; irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id); cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx]; cmpl->cpi.signaled = 1; cmpl->cpi.op_type = IRDMA_OP_TYPE_REC; cmpl->cpi.q_type = IRDMA_CQE_QTYPE_RQ; /* remove the RQ WR by moving RQ tail */ IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1); irdma_debug(iwqp->sc_qp.dev, IRDMA_DEBUG_DEV, "%s: adding wr_id = 0x%lx RQ Completion to list qp_id=%d, wqe_idx=%d\n", __func__, cmpl->cpi.wr_id, qp->qp_id, wqe_idx); list_add_tail(&cmpl->list, &iwqp->iwrcq->cmpl_generated); compl_generated = true; } spin_unlock_irqrestore(&iwqp->lock, flags2); spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1); if (compl_generated) irdma_comp_handler(iwqp->iwrcq); } else { spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1); irdma_sched_qp_flush_work(iwqp); } } /** * irdma_udqp_qs_change - change qs for UD QP in a worker thread * @iwqp: QP pointer * @user_prio: new user priority value * @qs_change: when false, only user priority changes, QS handle do not need to change */ static void irdma_udqp_qs_change(struct irdma_qp *iwqp, u8 user_prio, bool qs_change) { irdma_qp_rem_qos(&iwqp->sc_qp); if (qs_change) iwqp->sc_qp.dev->ws_remove(iwqp->sc_qp.vsi, iwqp->ctx_info.user_pri); iwqp->ctx_info.user_pri = user_prio; iwqp->sc_qp.user_pri = user_prio; if (qs_change) if (iwqp->sc_qp.dev->ws_add(iwqp->sc_qp.vsi, user_prio)) irdma_dev_warn(&iwqp->iwdev->ibdev, "WS add failed during %s, qp_id: %x user_pri: %x", __func__, iwqp->ibqp.qp_num, user_prio); irdma_qp_add_qos(&iwqp->sc_qp); } void irdma_udqp_qs_worker(struct work_struct *work) { struct irdma_udqs_work *udqs_work = container_of(work, struct irdma_udqs_work, work); irdma_udqp_qs_change(udqs_work->iwqp, udqs_work->user_prio, udqs_work->qs_change); if (udqs_work->qs_change) irdma_cqp_qp_suspend_resume(&udqs_work->iwqp->sc_qp, IRDMA_OP_RESUME); irdma_qp_rem_ref(&udqs_work->iwqp->ibqp); kfree(udqs_work); } diff --git a/sys/dev/irdma/irdma_verbs.c b/sys/dev/irdma/irdma_verbs.c index f0792696b490..8b7b34f7b2fb 100644 --- a/sys/dev/irdma/irdma_verbs.c +++ b/sys/dev/irdma/irdma_verbs.c @@ -1,3760 +1,3793 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "irdma_main.h" /** * irdma_query_device - get device attributes * @ibdev: device pointer from stack * @props: returning device attributes * @udata: user data */ static int irdma_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *udata) { struct irdma_device *iwdev = to_iwdev(ibdev); struct irdma_pci_f *rf = iwdev->rf; struct pci_dev *pcidev = iwdev->rf->pcidev; struct irdma_hw_attrs *hw_attrs = &rf->sc_dev.hw_attrs; if (udata->inlen || udata->outlen) return -EINVAL; memset(props, 0, sizeof(*props)); addrconf_addr_eui48((u8 *)&props->sys_image_guid, if_getlladdr(iwdev->netdev)); props->fw_ver = (u64)irdma_fw_major_ver(&rf->sc_dev) << 32 | irdma_fw_minor_ver(&rf->sc_dev); props->device_cap_flags = IB_DEVICE_MEM_WINDOW | IB_DEVICE_MEM_MGT_EXTENSIONS; props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; props->vendor_id = pcidev->vendor; props->vendor_part_id = pcidev->device; props->hw_ver = pcidev->revision; props->page_size_cap = hw_attrs->page_size_cap; props->max_mr_size = hw_attrs->max_mr_size; props->max_qp = rf->max_qp - rf->used_qps; props->max_qp_wr = hw_attrs->max_qp_wr; set_max_sge(props, rf); props->max_cq = rf->max_cq - rf->used_cqs; props->max_cqe = rf->max_cqe - 1; props->max_mr = rf->max_mr - rf->used_mrs; props->max_mw = props->max_mr; props->max_pd = rf->max_pd - rf->used_pds; props->max_sge_rd = hw_attrs->uk_attrs.max_hw_read_sges; props->max_qp_rd_atom = hw_attrs->max_hw_ird; props->max_qp_init_rd_atom = hw_attrs->max_hw_ord; if (rdma_protocol_roce(ibdev, 1)) { props->device_cap_flags |= IB_DEVICE_RC_RNR_NAK_GEN; props->max_pkeys = IRDMA_PKEY_TBL_SZ; props->max_ah = rf->max_ah; if (hw_attrs->uk_attrs.hw_rev == IRDMA_GEN_2) { props->max_mcast_grp = rf->max_mcg; props->max_mcast_qp_attach = IRDMA_MAX_MGS_PER_CTX; props->max_total_mcast_qp_attach = rf->max_qp * IRDMA_MAX_MGS_PER_CTX; } } props->max_fast_reg_page_list_len = IRDMA_MAX_PAGES_PER_FMR; if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_2) props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B; return 0; } static int irdma_mmap_legacy(struct irdma_ucontext *ucontext, struct vm_area_struct *vma) { u64 pfn; if (vma->vm_pgoff || vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; vma->vm_private_data = ucontext; pfn = ((uintptr_t)ucontext->iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET] + pci_resource_start(ucontext->iwdev->rf->pcidev, 0)) >> PAGE_SHIFT; #if __FreeBSD_version >= 1400026 return rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, PAGE_SIZE, pgprot_noncached(vma->vm_page_prot), NULL); #else return rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, PAGE_SIZE, pgprot_noncached(vma->vm_page_prot)); #endif } #if __FreeBSD_version >= 1400026 static void irdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry) { struct irdma_user_mmap_entry *entry = to_irdma_mmap_entry(rdma_entry); kfree(entry); } struct rdma_user_mmap_entry * irdma_user_mmap_entry_insert(struct irdma_ucontext *ucontext, u64 bar_offset, enum irdma_mmap_flag mmap_flag, u64 *mmap_offset) { struct irdma_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); int ret; if (!entry) return NULL; entry->bar_offset = bar_offset; entry->mmap_flag = mmap_flag; ret = rdma_user_mmap_entry_insert(&ucontext->ibucontext, &entry->rdma_entry, PAGE_SIZE); if (ret) { kfree(entry); return NULL; } *mmap_offset = rdma_user_mmap_get_offset(&entry->rdma_entry); return &entry->rdma_entry; } #else static inline bool find_key_in_mmap_tbl(struct irdma_ucontext *ucontext, u64 key) { struct irdma_user_mmap_entry *entry; HASH_FOR_EACH_POSSIBLE(ucontext->mmap_hash_tbl, entry, hlist, key) { if (entry->pgoff_key == key) return true; } return false; } struct irdma_user_mmap_entry * irdma_user_mmap_entry_add_hash(struct irdma_ucontext *ucontext, u64 bar_offset, enum irdma_mmap_flag mmap_flag, u64 *mmap_offset) { struct irdma_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); unsigned long flags; int retry_cnt = 0; if (!entry) return NULL; entry->bar_offset = bar_offset; entry->mmap_flag = mmap_flag; entry->ucontext = ucontext; do { get_random_bytes(&entry->pgoff_key, sizeof(entry->pgoff_key)); /* The key is a page offset */ entry->pgoff_key >>= PAGE_SHIFT; /* In the event of a collision in the hash table, retry a new key */ spin_lock_irqsave(&ucontext->mmap_tbl_lock, flags); if (!find_key_in_mmap_tbl(ucontext, entry->pgoff_key)) { HASH_ADD(ucontext->mmap_hash_tbl, &entry->hlist, entry->pgoff_key); spin_unlock_irqrestore(&ucontext->mmap_tbl_lock, flags); goto hash_add_done; } spin_unlock_irqrestore(&ucontext->mmap_tbl_lock, flags); } while (retry_cnt++ < 10); irdma_debug(&ucontext->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "mmap table add failed: Cannot find a unique key\n"); kfree(entry); return NULL; hash_add_done: /* libc mmap uses a byte offset */ *mmap_offset = entry->pgoff_key << PAGE_SHIFT; return entry; } static struct irdma_user_mmap_entry * irdma_find_user_mmap_entry(struct irdma_ucontext *ucontext, struct vm_area_struct *vma) { struct irdma_user_mmap_entry *entry; unsigned long flags; if (vma->vm_end - vma->vm_start != PAGE_SIZE) return NULL; spin_lock_irqsave(&ucontext->mmap_tbl_lock, flags); HASH_FOR_EACH_POSSIBLE(ucontext->mmap_hash_tbl, entry, hlist, vma->vm_pgoff) { if (entry->pgoff_key == vma->vm_pgoff) { spin_unlock_irqrestore(&ucontext->mmap_tbl_lock, flags); return entry; } } spin_unlock_irqrestore(&ucontext->mmap_tbl_lock, flags); return NULL; } void irdma_user_mmap_entry_del_hash(struct irdma_user_mmap_entry *entry) { struct irdma_ucontext *ucontext; unsigned long flags; if (!entry) return; ucontext = entry->ucontext; spin_lock_irqsave(&ucontext->mmap_tbl_lock, flags); HASH_DEL(ucontext->mmap_hash_tbl, &entry->hlist); spin_unlock_irqrestore(&ucontext->mmap_tbl_lock, flags); kfree(entry); } #endif /** * irdma_mmap - user memory map * @context: context created during alloc * @vma: kernel info for user memory map */ static int irdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { #if __FreeBSD_version >= 1400026 struct rdma_user_mmap_entry *rdma_entry; #endif struct irdma_user_mmap_entry *entry; struct irdma_ucontext *ucontext; u64 pfn; int ret; ucontext = to_ucontext(context); /* Legacy support for libi40iw with hard-coded mmap key */ if (ucontext->legacy_mode) return irdma_mmap_legacy(ucontext, vma); #if __FreeBSD_version >= 1400026 rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma); if (!rdma_entry) { irdma_debug(&ucontext->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "pgoff[0x%lx] does not have valid entry\n", vma->vm_pgoff); return -EINVAL; } entry = to_irdma_mmap_entry(rdma_entry); #else entry = irdma_find_user_mmap_entry(ucontext, vma); if (!entry) { irdma_debug(&ucontext->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "pgoff[0x%lx] does not have valid entry\n", vma->vm_pgoff); return -EINVAL; } #endif - irdma_debug(&ucontext->iwdev->rf->sc_dev, - IRDMA_DEBUG_VERBS, "bar_offset [0x%lx] mmap_flag [%d]\n", - entry->bar_offset, entry->mmap_flag); + irdma_debug(&ucontext->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, + "bar_offset [0x%lx] mmap_flag [%d]\n", entry->bar_offset, + entry->mmap_flag); pfn = (entry->bar_offset + pci_resource_start(ucontext->iwdev->rf->pcidev, 0)) >> PAGE_SHIFT; switch (entry->mmap_flag) { case IRDMA_MMAP_IO_NC: #if __FreeBSD_version >= 1400026 ret = rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE, pgprot_noncached(vma->vm_page_prot), rdma_entry); #else ret = rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE, pgprot_noncached(vma->vm_page_prot)); #endif break; case IRDMA_MMAP_IO_WC: #if __FreeBSD_version >= 1400026 ret = rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot), rdma_entry); #else ret = rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot)); #endif break; default: ret = -EINVAL; } if (ret) irdma_debug(&ucontext->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "bar_offset [0x%lx] mmap_flag[%d] err[%d]\n", entry->bar_offset, entry->mmap_flag, ret); #if __FreeBSD_version >= 1400026 rdma_user_mmap_entry_put(rdma_entry); #endif return ret; } /** * irdma_alloc_push_page - allocate a push page for qp * @iwqp: qp pointer */ static void irdma_alloc_push_page(struct irdma_qp *iwqp) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_device *iwdev = iwqp->iwdev; struct irdma_sc_qp *qp = &iwqp->sc_qp; int status; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) return; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = IRDMA_OP_MANAGE_PUSH_PAGE; cqp_info->post_sq = 1; cqp_info->in.u.manage_push_page.info.push_idx = 0; cqp_info->in.u.manage_push_page.info.qs_handle = qp->vsi->qos[qp->user_pri].qs_handle; cqp_info->in.u.manage_push_page.info.free_page = 0; cqp_info->in.u.manage_push_page.info.push_page_type = 0; cqp_info->in.u.manage_push_page.cqp = &iwdev->rf->cqp.sc_cqp; cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); if (!status && cqp_request->compl_info.op_ret_val < iwdev->rf->sc_dev.hw_attrs.max_hw_device_pages) { qp->push_idx = cqp_request->compl_info.op_ret_val; qp->push_offset = 0; } irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); } /** * irdma_get_pbl - Retrieve pbl from a list given a virtual * address * @va: user virtual address * @pbl_list: pbl list to search in (QP's or CQ's) */ struct irdma_pbl * irdma_get_pbl(unsigned long va, struct list_head *pbl_list) { struct irdma_pbl *iwpbl; list_for_each_entry(iwpbl, pbl_list, list) { if (iwpbl->user_base == va) { list_del(&iwpbl->list); iwpbl->on_list = false; return iwpbl; } } return NULL; } /** * irdma_clean_cqes - clean cq entries for qp * @iwqp: qp ptr (user or kernel) * @iwcq: cq ptr */ void irdma_clean_cqes(struct irdma_qp *iwqp, struct irdma_cq *iwcq) { struct irdma_cq_uk *ukcq = &iwcq->sc_cq.cq_uk; unsigned long flags; spin_lock_irqsave(&iwcq->lock, flags); irdma_uk_clean_cq(&iwqp->sc_qp.qp_uk, ukcq); spin_unlock_irqrestore(&iwcq->lock, flags); } static u64 irdma_compute_push_wqe_offset(struct irdma_device *iwdev, u32 page_idx){ u64 bar_off = (uintptr_t)iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET]; if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) { /* skip over db page */ bar_off += IRDMA_HW_PAGE_SIZE; /* skip over reserved space */ bar_off += IRDMA_PF_BAR_RSVD; } /* push wqe page */ bar_off += (u64)page_idx * IRDMA_HW_PAGE_SIZE; return bar_off; } void irdma_remove_push_mmap_entries(struct irdma_qp *iwqp) { if (iwqp->push_db_mmap_entry) { #if __FreeBSD_version >= 1400026 rdma_user_mmap_entry_remove(iwqp->push_db_mmap_entry); #else irdma_user_mmap_entry_del_hash(iwqp->push_db_mmap_entry); #endif iwqp->push_db_mmap_entry = NULL; } if (iwqp->push_wqe_mmap_entry) { #if __FreeBSD_version >= 1400026 rdma_user_mmap_entry_remove(iwqp->push_wqe_mmap_entry); #else irdma_user_mmap_entry_del_hash(iwqp->push_wqe_mmap_entry); #endif iwqp->push_wqe_mmap_entry = NULL; } } static int irdma_setup_push_mmap_entries(struct irdma_ucontext *ucontext, struct irdma_qp *iwqp, u64 *push_wqe_mmap_key, u64 *push_db_mmap_key) { struct irdma_device *iwdev = ucontext->iwdev; u64 bar_off; WARN_ON_ONCE(iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_2); bar_off = irdma_compute_push_wqe_offset(iwdev, iwqp->sc_qp.push_idx); #if __FreeBSD_version >= 1400026 iwqp->push_wqe_mmap_entry = irdma_user_mmap_entry_insert(ucontext, bar_off, IRDMA_MMAP_IO_WC, push_wqe_mmap_key); #else iwqp->push_wqe_mmap_entry = irdma_user_mmap_entry_add_hash(ucontext, bar_off, IRDMA_MMAP_IO_WC, push_wqe_mmap_key); #endif if (!iwqp->push_wqe_mmap_entry) return -ENOMEM; /* push doorbell page */ bar_off += IRDMA_HW_PAGE_SIZE; #if __FreeBSD_version >= 1400026 iwqp->push_db_mmap_entry = irdma_user_mmap_entry_insert(ucontext, bar_off, IRDMA_MMAP_IO_NC, push_db_mmap_key); #else iwqp->push_db_mmap_entry = irdma_user_mmap_entry_add_hash(ucontext, bar_off, IRDMA_MMAP_IO_NC, push_db_mmap_key); #endif if (!iwqp->push_db_mmap_entry) { #if __FreeBSD_version >= 1400026 rdma_user_mmap_entry_remove(iwqp->push_wqe_mmap_entry); #else irdma_user_mmap_entry_del_hash(iwqp->push_wqe_mmap_entry); #endif return -ENOMEM; } return 0; } /** * irdma_setup_virt_qp - setup for allocation of virtual qp * @iwdev: irdma device * @iwqp: qp ptr * @init_info: initialize info to return */ void irdma_setup_virt_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, struct irdma_qp_init_info *init_info) { struct irdma_pbl *iwpbl = iwqp->iwpbl; struct irdma_qp_mr *qpmr = &iwpbl->qp_mr; iwqp->page = qpmr->sq_page; init_info->shadow_area_pa = qpmr->shadow; if (iwpbl->pbl_allocated) { init_info->virtual_map = true; init_info->sq_pa = qpmr->sq_pbl.idx; init_info->rq_pa = qpmr->rq_pbl.idx; } else { init_info->sq_pa = qpmr->sq_pbl.addr; init_info->rq_pa = qpmr->rq_pbl.addr; } } /** * irdma_setup_umode_qp - setup sq and rq size in user mode qp * @udata: user data * @iwdev: iwarp device * @iwqp: qp ptr (user or kernel) * @info: initialize info to return * @init_attr: Initial QP create attributes */ int irdma_setup_umode_qp(struct ib_udata *udata, struct irdma_device *iwdev, struct irdma_qp *iwqp, struct irdma_qp_init_info *info, struct ib_qp_init_attr *init_attr) { #if __FreeBSD_version >= 1400026 struct irdma_ucontext *ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); #else struct irdma_ucontext *ucontext = to_ucontext(iwqp->iwpd->ibpd.uobject->context); #endif struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; struct irdma_create_qp_req req = {0}; unsigned long flags; int ret; ret = ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen)); if (ret) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "ib_copy_from_data fail\n"); return ret; } iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx; iwqp->user_mode = 1; if (req.user_wqe_bufs) { info->qp_uk_init_info.legacy_mode = ucontext->legacy_mode; spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs, &ucontext->qp_reg_mem_list); spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); if (!iwqp->iwpbl) { ret = -ENODATA; irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "no pbl info\n"); return ret; } } if (!ucontext->use_raw_attrs) { /** * Maintain backward compat with older ABI which passes sq and * rq depth in quanta in cap.max_send_wr and cap.max_recv_wr. * There is no way to compute the correct value of * iwqp->max_send_wr/max_recv_wr in the kernel. */ iwqp->max_send_wr = init_attr->cap.max_send_wr; iwqp->max_recv_wr = init_attr->cap.max_recv_wr; ukinfo->sq_size = init_attr->cap.max_send_wr; ukinfo->rq_size = init_attr->cap.max_recv_wr; irdma_uk_calc_shift_wq(ukinfo, &ukinfo->sq_shift, &ukinfo->rq_shift); } else { ret = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth, &ukinfo->sq_shift); if (ret) return ret; ret = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth, &ukinfo->rq_shift); if (ret) return ret; iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift; iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift; ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift; ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift; } irdma_setup_virt_qp(iwdev, iwqp, info); return 0; } /** * irdma_setup_kmode_qp - setup initialization for kernel mode qp * @iwdev: iwarp device * @iwqp: qp ptr (user or kernel) * @info: initialize info to return * @init_attr: Initial QP create attributes */ int irdma_setup_kmode_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, struct irdma_qp_init_info *info, struct ib_qp_init_attr *init_attr) { struct irdma_dma_mem *mem = &iwqp->kqp.dma_mem; u32 size; int status; struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; status = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth, &ukinfo->sq_shift); if (status) return status; status = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth, &ukinfo->rq_shift); if (status) return status; iwqp->kqp.sq_wrid_mem = kcalloc(ukinfo->sq_depth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL); if (!iwqp->kqp.sq_wrid_mem) return -ENOMEM; iwqp->kqp.rq_wrid_mem = kcalloc(ukinfo->rq_depth, sizeof(*iwqp->kqp.rq_wrid_mem), GFP_KERNEL); if (!iwqp->kqp.rq_wrid_mem) { kfree(iwqp->kqp.sq_wrid_mem); iwqp->kqp.sq_wrid_mem = NULL; return -ENOMEM; } iwqp->kqp.sig_trk_mem = kcalloc(ukinfo->sq_depth, sizeof(u32), GFP_KERNEL); memset(iwqp->kqp.sig_trk_mem, 0, ukinfo->sq_depth * sizeof(u32)); if (!iwqp->kqp.sig_trk_mem) { kfree(iwqp->kqp.sq_wrid_mem); iwqp->kqp.sq_wrid_mem = NULL; kfree(iwqp->kqp.rq_wrid_mem); iwqp->kqp.rq_wrid_mem = NULL; return -ENOMEM; } ukinfo->sq_sigwrtrk_array = (void *)iwqp->kqp.sig_trk_mem; ukinfo->sq_wrtrk_array = iwqp->kqp.sq_wrid_mem; ukinfo->rq_wrid_array = iwqp->kqp.rq_wrid_mem; size = (ukinfo->sq_depth + ukinfo->rq_depth) * IRDMA_QP_WQE_MIN_SIZE; size += (IRDMA_SHADOW_AREA_SIZE << 3); mem->size = size; mem->va = irdma_allocate_dma_mem(&iwdev->rf->hw, mem, mem->size, 256); if (!mem->va) { kfree(iwqp->kqp.sq_wrid_mem); iwqp->kqp.sq_wrid_mem = NULL; kfree(iwqp->kqp.rq_wrid_mem); iwqp->kqp.rq_wrid_mem = NULL; return -ENOMEM; } ukinfo->sq = mem->va; info->sq_pa = mem->pa; ukinfo->rq = &ukinfo->sq[ukinfo->sq_depth]; info->rq_pa = info->sq_pa + (ukinfo->sq_depth * IRDMA_QP_WQE_MIN_SIZE); ukinfo->shadow_area = ukinfo->rq[ukinfo->rq_depth].elem; info->shadow_area_pa = info->rq_pa + (ukinfo->rq_depth * IRDMA_QP_WQE_MIN_SIZE); ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift; ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift; ukinfo->qp_id = iwqp->ibqp.qp_num; iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift; iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift; init_attr->cap.max_send_wr = iwqp->max_send_wr; init_attr->cap.max_recv_wr = iwqp->max_recv_wr; return 0; } int irdma_cqp_create_qp_cmd(struct irdma_qp *iwqp) { struct irdma_pci_f *rf = iwqp->iwdev->rf; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_create_qp_info *qp_info; int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; qp_info = &cqp_request->info.in.u.qp_create.info; memset(qp_info, 0, sizeof(*qp_info)); qp_info->mac_valid = true; qp_info->cq_num_valid = true; qp_info->next_iwarp_state = IRDMA_QP_STATE_IDLE; cqp_info->cqp_cmd = IRDMA_OP_QP_CREATE; cqp_info->post_sq = 1; cqp_info->in.u.qp_create.qp = &iwqp->sc_qp; cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); return status; } void irdma_roce_fill_and_set_qpctx_info(struct irdma_qp *iwqp, struct irdma_qp_host_ctx_info *ctx_info) { struct irdma_device *iwdev = iwqp->iwdev; struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; struct irdma_roce_offload_info *roce_info; struct irdma_udp_offload_info *udp_info; udp_info = &iwqp->udp_info; udp_info->snd_mss = ib_mtu_enum_to_int(ib_mtu_int_to_enum(iwdev->vsi.mtu)); udp_info->cwnd = iwdev->roce_cwnd; udp_info->rexmit_thresh = 2; udp_info->rnr_nak_thresh = 2; udp_info->src_port = 0xc000; udp_info->dst_port = ROCE_V2_UDP_DPORT; roce_info = &iwqp->roce_info; ether_addr_copy(roce_info->mac_addr, if_getlladdr(iwdev->netdev)); roce_info->rd_en = true; roce_info->wr_rdresp_en = true; roce_info->bind_en = true; roce_info->dcqcn_en = false; roce_info->rtomin = iwdev->roce_rtomin; roce_info->ack_credits = iwdev->roce_ackcreds; roce_info->ird_size = dev->hw_attrs.max_hw_ird; roce_info->ord_size = dev->hw_attrs.max_hw_ord; if (!iwqp->user_mode) { roce_info->priv_mode_en = true; roce_info->fast_reg_en = true; roce_info->udprivcq_en = true; } roce_info->roce_tver = 0; ctx_info->roce_info = &iwqp->roce_info; ctx_info->udp_info = &iwqp->udp_info; irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info); } void irdma_iw_fill_and_set_qpctx_info(struct irdma_qp *iwqp, struct irdma_qp_host_ctx_info *ctx_info) { struct irdma_device *iwdev = iwqp->iwdev; struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; struct irdma_iwarp_offload_info *iwarp_info; iwarp_info = &iwqp->iwarp_info; ether_addr_copy(iwarp_info->mac_addr, if_getlladdr(iwdev->netdev)); iwarp_info->rd_en = true; iwarp_info->wr_rdresp_en = true; iwarp_info->bind_en = true; iwarp_info->ecn_en = true; iwarp_info->rtomin = 5; if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) iwarp_info->ib_rd_en = true; if (!iwqp->user_mode) { iwarp_info->priv_mode_en = true; iwarp_info->fast_reg_en = true; } iwarp_info->ddp_ver = 1; iwarp_info->rdmap_ver = 1; ctx_info->iwarp_info = &iwqp->iwarp_info; ctx_info->iwarp_info_valid = true; irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info); ctx_info->iwarp_info_valid = false; } int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr, struct irdma_device *iwdev) { struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs; if (init_attr->create_flags) return -EOPNOTSUPP; if (init_attr->cap.max_inline_data > uk_attrs->max_hw_inline || init_attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags || init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags) return -EINVAL; if (rdma_protocol_roce(&iwdev->ibdev, 1)) { if (init_attr->qp_type != IB_QPT_RC && init_attr->qp_type != IB_QPT_UD && init_attr->qp_type != IB_QPT_GSI) return -EOPNOTSUPP; } else { if (init_attr->qp_type != IB_QPT_RC) return -EOPNOTSUPP; } return 0; } void irdma_sched_qp_flush_work(struct irdma_qp *iwqp) { + unsigned long flags; + if (iwqp->sc_qp.qp_uk.destroy_pending) return; irdma_qp_add_ref(&iwqp->ibqp); + spin_lock_irqsave(&iwqp->dwork_flush_lock, flags); if (mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush, msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS))) irdma_qp_rem_ref(&iwqp->ibqp); + spin_unlock_irqrestore(&iwqp->dwork_flush_lock, flags); } void irdma_flush_worker(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); struct irdma_qp *iwqp = container_of(dwork, struct irdma_qp, dwork_flush); irdma_generate_flush_completions(iwqp); /* For the add in irdma_sched_qp_flush_work */ irdma_qp_rem_ref(&iwqp->ibqp); } static int irdma_get_ib_acc_flags(struct irdma_qp *iwqp) { int acc_flags = 0; if (rdma_protocol_roce(iwqp->ibqp.device, 1)) { if (iwqp->roce_info.wr_rdresp_en) { acc_flags |= IB_ACCESS_LOCAL_WRITE; acc_flags |= IB_ACCESS_REMOTE_WRITE; } if (iwqp->roce_info.rd_en) acc_flags |= IB_ACCESS_REMOTE_READ; if (iwqp->roce_info.bind_en) acc_flags |= IB_ACCESS_MW_BIND; } else { if (iwqp->iwarp_info.wr_rdresp_en) { acc_flags |= IB_ACCESS_LOCAL_WRITE; acc_flags |= IB_ACCESS_REMOTE_WRITE; } if (iwqp->iwarp_info.rd_en) acc_flags |= IB_ACCESS_REMOTE_READ; if (iwqp->iwarp_info.bind_en) acc_flags |= IB_ACCESS_MW_BIND; } return acc_flags; } /** * irdma_query_qp - query qp attributes * @ibqp: qp pointer * @attr: attributes pointer * @attr_mask: Not used * @init_attr: qp attributes to return */ static int irdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr) { struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_sc_qp *qp = &iwqp->sc_qp; memset(attr, 0, sizeof(*attr)); memset(init_attr, 0, sizeof(*init_attr)); attr->qp_state = iwqp->ibqp_state; attr->cur_qp_state = iwqp->ibqp_state; attr->cap.max_send_wr = iwqp->max_send_wr; attr->cap.max_recv_wr = iwqp->max_recv_wr; attr->cap.max_inline_data = qp->qp_uk.max_inline_data; attr->cap.max_send_sge = qp->qp_uk.max_sq_frag_cnt; attr->cap.max_recv_sge = qp->qp_uk.max_rq_frag_cnt; attr->qp_access_flags = irdma_get_ib_acc_flags(iwqp); attr->port_num = 1; if (rdma_protocol_roce(ibqp->device, 1)) { attr->path_mtu = ib_mtu_int_to_enum(iwqp->udp_info.snd_mss); attr->qkey = iwqp->roce_info.qkey; attr->rq_psn = iwqp->udp_info.epsn; attr->sq_psn = iwqp->udp_info.psn_nxt; attr->dest_qp_num = iwqp->roce_info.dest_qp; attr->pkey_index = iwqp->roce_info.p_key; attr->retry_cnt = iwqp->udp_info.rexmit_thresh; attr->rnr_retry = iwqp->udp_info.rnr_nak_thresh; attr->max_rd_atomic = iwqp->roce_info.ord_size; attr->max_dest_rd_atomic = iwqp->roce_info.ird_size; } init_attr->event_handler = iwqp->ibqp.event_handler; init_attr->qp_context = iwqp->ibqp.qp_context; init_attr->send_cq = iwqp->ibqp.send_cq; init_attr->recv_cq = iwqp->ibqp.recv_cq; init_attr->cap = attr->cap; return 0; } +static int +irdma_wait_for_suspend(struct irdma_qp *iwqp) +{ + if (!wait_event_timeout(iwqp->iwdev->suspend_wq, + !iwqp->suspend_pending, + msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS))) { + iwqp->suspend_pending = false; + irdma_dev_warn(&iwqp->iwdev->ibdev, + "modify_qp timed out waiting for suspend. qp_id = %d, last_ae = 0x%x\n", + iwqp->ibqp.qp_num, iwqp->last_aeq); + return -EBUSY; + } + + return 0; +} + /** * irdma_modify_qp_roce - modify qp request * @ibqp: qp's pointer for modify * @attr: access attributes * @attr_mask: state mask * @udata: user data */ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { #define IRDMA_MODIFY_QP_MIN_REQ_LEN offsetofend(struct irdma_modify_qp_req, rq_flush) #define IRDMA_MODIFY_QP_MIN_RESP_LEN offsetofend(struct irdma_modify_qp_resp, push_valid) struct irdma_pd *iwpd = to_iwpd(ibqp->pd); struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_device *iwdev = iwqp->iwdev; struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; struct irdma_qp_host_ctx_info *ctx_info; struct irdma_roce_offload_info *roce_info; struct irdma_udp_offload_info *udp_info; struct irdma_modify_qp_info info = {0}; struct irdma_modify_qp_resp uresp = {}; struct irdma_modify_qp_req ureq; unsigned long flags; u8 issue_modify_qp = 0; int ret = 0; ctx_info = &iwqp->ctx_info; roce_info = &iwqp->roce_info; udp_info = &iwqp->udp_info; if (udata) { if ((udata->inlen && udata->inlen < IRDMA_MODIFY_QP_MIN_REQ_LEN) || (udata->outlen && udata->outlen < IRDMA_MODIFY_QP_MIN_RESP_LEN)) return -EINVAL; } if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) return -EOPNOTSUPP; if (attr_mask & IB_QP_DEST_QPN) roce_info->dest_qp = attr->dest_qp_num; if (attr_mask & IB_QP_PKEY_INDEX) { ret = irdma_query_pkey(ibqp->device, 0, attr->pkey_index, &roce_info->p_key); if (ret) return ret; } if (attr_mask & IB_QP_QKEY) roce_info->qkey = attr->qkey; if (attr_mask & IB_QP_PATH_MTU) udp_info->snd_mss = ib_mtu_enum_to_int(attr->path_mtu); if (attr_mask & IB_QP_SQ_PSN) { udp_info->psn_nxt = attr->sq_psn; udp_info->lsn = 0xffff; udp_info->psn_una = attr->sq_psn; udp_info->psn_max = attr->sq_psn; } if (attr_mask & IB_QP_RQ_PSN) udp_info->epsn = attr->rq_psn; if (attr_mask & IB_QP_RNR_RETRY) udp_info->rnr_nak_thresh = attr->rnr_retry; if (attr_mask & IB_QP_RETRY_CNT) udp_info->rexmit_thresh = attr->retry_cnt; ctx_info->roce_info->pd_id = iwpd->sc_pd.pd_id; if (attr_mask & IB_QP_AV) { struct irdma_av *av = &iwqp->roce_ah.av; u16 vlan_id = VLAN_N_VID; u32 local_ip[4] = {}; memset(&iwqp->roce_ah, 0, sizeof(iwqp->roce_ah)); if (attr->ah_attr.ah_flags & IB_AH_GRH) { udp_info->ttl = attr->ah_attr.grh.hop_limit; udp_info->flow_label = attr->ah_attr.grh.flow_label; udp_info->tos = attr->ah_attr.grh.traffic_class; udp_info->src_port = kc_rdma_get_udp_sport(udp_info->flow_label, ibqp->qp_num, roce_info->dest_qp); irdma_qp_rem_qos(&iwqp->sc_qp); dev->ws_remove(iwqp->sc_qp.vsi, ctx_info->user_pri); if (iwqp->sc_qp.vsi->dscp_mode) ctx_info->user_pri = iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(udp_info->tos)]; else ctx_info->user_pri = rt_tos2priority(udp_info->tos); } ret = kc_irdma_set_roce_cm_info(iwqp, attr, &vlan_id); if (ret) return ret; if (dev->ws_add(iwqp->sc_qp.vsi, ctx_info->user_pri)) return -ENOMEM; iwqp->sc_qp.user_pri = ctx_info->user_pri; irdma_qp_add_qos(&iwqp->sc_qp); if (vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode) vlan_id = 0; if (vlan_id < VLAN_N_VID) { udp_info->insert_vlan_tag = true; udp_info->vlan_tag = vlan_id | ctx_info->user_pri << VLAN_PRIO_SHIFT; } else { udp_info->insert_vlan_tag = false; } av->attrs = attr->ah_attr; rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &attr->ah_attr.grh.dgid); if (av->net_type == RDMA_NETWORK_IPV6) { __be32 *daddr = av->dgid_addr.saddr_in6.sin6_addr.__u6_addr.__u6_addr32; __be32 *saddr = av->sgid_addr.saddr_in6.sin6_addr.__u6_addr.__u6_addr32; irdma_copy_ip_ntohl(&udp_info->dest_ip_addr[0], daddr); irdma_copy_ip_ntohl(&udp_info->local_ipaddr[0], saddr); udp_info->ipv4 = false; irdma_copy_ip_ntohl(local_ip, daddr); } else if (av->net_type == RDMA_NETWORK_IPV4) { __be32 saddr = av->sgid_addr.saddr_in.sin_addr.s_addr; __be32 daddr = av->dgid_addr.saddr_in.sin_addr.s_addr; local_ip[0] = ntohl(daddr); udp_info->ipv4 = true; udp_info->dest_ip_addr[0] = 0; udp_info->dest_ip_addr[1] = 0; udp_info->dest_ip_addr[2] = 0; udp_info->dest_ip_addr[3] = local_ip[0]; udp_info->local_ipaddr[0] = 0; udp_info->local_ipaddr[1] = 0; udp_info->local_ipaddr[2] = 0; udp_info->local_ipaddr[3] = ntohl(saddr); } else { return -EINVAL; } udp_info->arp_idx = irdma_add_arp(iwdev->rf, local_ip, ah_attr_to_dmac(attr->ah_attr)); } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { if (attr->max_rd_atomic > dev->hw_attrs.max_hw_ord) { irdma_dev_err(&iwdev->ibdev, "rd_atomic = %d, above max_hw_ord=%d\n", attr->max_rd_atomic, dev->hw_attrs.max_hw_ord); return -EINVAL; } if (attr->max_rd_atomic) roce_info->ord_size = attr->max_rd_atomic; info.ord_valid = true; } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { if (attr->max_dest_rd_atomic > dev->hw_attrs.max_hw_ird) { irdma_dev_err(&iwdev->ibdev, "rd_atomic = %d, above max_hw_ird=%d\n", attr->max_rd_atomic, dev->hw_attrs.max_hw_ird); return -EINVAL; } if (attr->max_dest_rd_atomic) roce_info->ird_size = attr->max_dest_rd_atomic; } if (attr_mask & IB_QP_ACCESS_FLAGS) { if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE) roce_info->wr_rdresp_en = true; if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) roce_info->wr_rdresp_en = true; if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) roce_info->rd_en = true; } wait_event(iwqp->mod_qp_waitq, !atomic_read(&iwqp->hw_mod_qp_pend)); irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "caller: %pS qp_id=%d to_ibqpstate=%d ibqpstate=%d irdma_qpstate=%d attr_mask=0x%x\n", __builtin_return_address(0), ibqp->qp_num, attr->qp_state, iwqp->ibqp_state, iwqp->iwarp_state, attr_mask); spin_lock_irqsave(&iwqp->lock, flags); if (attr_mask & IB_QP_STATE) { - if (!kc_ib_modify_qp_is_ok(iwqp->ibqp_state, attr->qp_state, - iwqp->ibqp.qp_type, attr_mask, - IB_LINK_LAYER_ETHERNET)) { + if (!ib_modify_qp_is_ok(iwqp->ibqp_state, attr->qp_state, + iwqp->ibqp.qp_type, attr_mask)) { irdma_dev_warn(&iwdev->ibdev, "modify_qp invalid for qp_id=%d, old_state=0x%x, new_state=0x%x\n", iwqp->ibqp.qp_num, iwqp->ibqp_state, attr->qp_state); ret = -EINVAL; goto exit; } info.curr_iwarp_state = iwqp->iwarp_state; switch (attr->qp_state) { case IB_QPS_INIT: if (iwqp->iwarp_state > IRDMA_QP_STATE_IDLE) { ret = -EINVAL; goto exit; } if (iwqp->iwarp_state == IRDMA_QP_STATE_INVALID) { info.next_iwarp_state = IRDMA_QP_STATE_IDLE; issue_modify_qp = 1; } break; case IB_QPS_RTR: if (iwqp->iwarp_state > IRDMA_QP_STATE_IDLE) { ret = -EINVAL; goto exit; } info.arp_cache_idx_valid = true; info.cq_num_valid = true; info.next_iwarp_state = IRDMA_QP_STATE_RTR; issue_modify_qp = 1; break; case IB_QPS_RTS: if (iwqp->ibqp_state < IB_QPS_RTR || iwqp->ibqp_state == IB_QPS_ERR) { ret = -EINVAL; goto exit; } info.arp_cache_idx_valid = true; info.cq_num_valid = true; info.ord_valid = true; info.next_iwarp_state = IRDMA_QP_STATE_RTS; issue_modify_qp = 1; if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) iwdev->rf->check_fc(&iwdev->vsi, &iwqp->sc_qp); udp_info->cwnd = iwdev->roce_cwnd; roce_info->ack_credits = iwdev->roce_ackcreds; if (iwdev->push_mode && udata && iwqp->sc_qp.push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { spin_unlock_irqrestore(&iwqp->lock, flags); irdma_alloc_push_page(iwqp); spin_lock_irqsave(&iwqp->lock, flags); } break; case IB_QPS_SQD: if (iwqp->iwarp_state == IRDMA_QP_STATE_SQD) goto exit; if (iwqp->iwarp_state != IRDMA_QP_STATE_RTS) { ret = -EINVAL; goto exit; } info.next_iwarp_state = IRDMA_QP_STATE_SQD; issue_modify_qp = 1; + iwqp->suspend_pending = true; break; case IB_QPS_SQE: case IB_QPS_ERR: case IB_QPS_RESET: - if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS) { - if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) - irdma_cqp_qp_suspend_resume(&iwqp->sc_qp, IRDMA_OP_SUSPEND); - spin_unlock_irqrestore(&iwqp->lock, flags); - info.next_iwarp_state = IRDMA_QP_STATE_SQD; - irdma_hw_modify_qp(iwdev, iwqp, &info, true); - spin_lock_irqsave(&iwqp->lock, flags); - } - if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) { spin_unlock_irqrestore(&iwqp->lock, flags); if (udata && udata->inlen) { if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen))) return -EINVAL; irdma_flush_wqes(iwqp, (ureq.sq_flush ? IRDMA_FLUSH_SQ : 0) | (ureq.rq_flush ? IRDMA_FLUSH_RQ : 0) | IRDMA_REFLUSH); } return 0; } info.next_iwarp_state = IRDMA_QP_STATE_ERROR; issue_modify_qp = 1; break; default: ret = -EINVAL; goto exit; } iwqp->ibqp_state = attr->qp_state; } ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id; ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id; irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info); spin_unlock_irqrestore(&iwqp->lock, flags); if (attr_mask & IB_QP_STATE) { if (issue_modify_qp) { ctx_info->rem_endpoint_idx = udp_info->arp_idx; if (irdma_hw_modify_qp(iwdev, iwqp, &info, true)) return -EINVAL; + if (info.next_iwarp_state == IRDMA_QP_STATE_SQD) { + ret = irdma_wait_for_suspend(iwqp); + if (ret) + return ret; + } spin_lock_irqsave(&iwqp->lock, flags); if (iwqp->iwarp_state == info.curr_iwarp_state) { iwqp->iwarp_state = info.next_iwarp_state; iwqp->ibqp_state = attr->qp_state; } if (iwqp->ibqp_state > IB_QPS_RTS && !iwqp->flush_issued) { spin_unlock_irqrestore(&iwqp->lock, flags); irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_FLUSH_RQ | IRDMA_FLUSH_WAIT); iwqp->flush_issued = 1; } else { spin_unlock_irqrestore(&iwqp->lock, flags); } } else { iwqp->ibqp_state = attr->qp_state; } if (udata && udata->outlen && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { struct irdma_ucontext *ucontext; #if __FreeBSD_version >= 1400026 ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); #else ucontext = to_ucontext(ibqp->uobject->context); #endif if (iwqp->sc_qp.push_idx != IRDMA_INVALID_PUSH_PAGE_INDEX && !iwqp->push_wqe_mmap_entry && !irdma_setup_push_mmap_entries(ucontext, iwqp, &uresp.push_wqe_mmap_key, &uresp.push_db_mmap_key)) { uresp.push_valid = 1; uresp.push_offset = iwqp->sc_qp.push_offset; } uresp.rd_fence_rate = iwdev->rd_fence_rate; ret = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen)); if (ret) { irdma_remove_push_mmap_entries(iwqp); irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "copy_to_udata failed\n"); return ret; } } } return 0; exit: spin_unlock_irqrestore(&iwqp->lock, flags); return ret; } /** * irdma_modify_qp - modify qp request * @ibqp: qp's pointer for modify * @attr: access attributes * @attr_mask: state mask * @udata: user data */ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { #define IRDMA_MODIFY_QP_MIN_REQ_LEN offsetofend(struct irdma_modify_qp_req, rq_flush) #define IRDMA_MODIFY_QP_MIN_RESP_LEN offsetofend(struct irdma_modify_qp_resp, push_valid) struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_device *iwdev = iwqp->iwdev; struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; struct irdma_qp_host_ctx_info *ctx_info; struct irdma_tcp_offload_info *tcp_info; struct irdma_iwarp_offload_info *offload_info; struct irdma_modify_qp_info info = {0}; struct irdma_modify_qp_resp uresp = {}; struct irdma_modify_qp_req ureq = {}; u8 issue_modify_qp = 0; u8 dont_wait = 0; int err; unsigned long flags; if (udata) { if ((udata->inlen && udata->inlen < IRDMA_MODIFY_QP_MIN_REQ_LEN) || (udata->outlen && udata->outlen < IRDMA_MODIFY_QP_MIN_RESP_LEN)) return -EINVAL; } if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) return -EOPNOTSUPP; ctx_info = &iwqp->ctx_info; offload_info = &iwqp->iwarp_info; tcp_info = &iwqp->tcp_info; wait_event(iwqp->mod_qp_waitq, !atomic_read(&iwqp->hw_mod_qp_pend)); irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "caller: %pS qp_id=%d to_ibqpstate=%d ibqpstate=%d irdma_qpstate=%d last_aeq=%d hw_tcp_state=%d hw_iwarp_state=%d attr_mask=0x%x\n", __builtin_return_address(0), ibqp->qp_num, attr->qp_state, iwqp->ibqp_state, iwqp->iwarp_state, iwqp->last_aeq, iwqp->hw_tcp_state, iwqp->hw_iwarp_state, attr_mask); spin_lock_irqsave(&iwqp->lock, flags); if (attr_mask & IB_QP_STATE) { info.curr_iwarp_state = iwqp->iwarp_state; switch (attr->qp_state) { case IB_QPS_INIT: case IB_QPS_RTR: if (iwqp->iwarp_state > IRDMA_QP_STATE_IDLE) { err = -EINVAL; goto exit; } if (iwqp->iwarp_state == IRDMA_QP_STATE_INVALID) { info.next_iwarp_state = IRDMA_QP_STATE_IDLE; issue_modify_qp = 1; } if (iwdev->push_mode && udata && iwqp->sc_qp.push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { spin_unlock_irqrestore(&iwqp->lock, flags); irdma_alloc_push_page(iwqp); spin_lock_irqsave(&iwqp->lock, flags); } break; case IB_QPS_RTS: if (iwqp->iwarp_state > IRDMA_QP_STATE_RTS || !iwqp->cm_id) { err = -EINVAL; goto exit; } issue_modify_qp = 1; iwqp->hw_tcp_state = IRDMA_TCP_STATE_ESTABLISHED; iwqp->hte_added = 1; info.next_iwarp_state = IRDMA_QP_STATE_RTS; info.tcp_ctx_valid = true; info.ord_valid = true; info.arp_cache_idx_valid = true; info.cq_num_valid = true; break; case IB_QPS_SQD: if (iwqp->hw_iwarp_state > IRDMA_QP_STATE_RTS) { err = 0; goto exit; } if (iwqp->iwarp_state == IRDMA_QP_STATE_CLOSING || iwqp->iwarp_state < IRDMA_QP_STATE_RTS) { err = 0; goto exit; } if (iwqp->iwarp_state > IRDMA_QP_STATE_CLOSING) { err = -EINVAL; goto exit; } info.next_iwarp_state = IRDMA_QP_STATE_CLOSING; issue_modify_qp = 1; break; case IB_QPS_SQE: if (iwqp->iwarp_state >= IRDMA_QP_STATE_TERMINATE) { err = -EINVAL; goto exit; } info.next_iwarp_state = IRDMA_QP_STATE_TERMINATE; issue_modify_qp = 1; break; case IB_QPS_ERR: case IB_QPS_RESET: if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) { spin_unlock_irqrestore(&iwqp->lock, flags); if (udata && udata->inlen) { if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen))) return -EINVAL; irdma_flush_wqes(iwqp, (ureq.sq_flush ? IRDMA_FLUSH_SQ : 0) | (ureq.rq_flush ? IRDMA_FLUSH_RQ : 0) | IRDMA_REFLUSH); } return 0; } if (iwqp->sc_qp.term_flags) { spin_unlock_irqrestore(&iwqp->lock, flags); irdma_terminate_del_timer(&iwqp->sc_qp); spin_lock_irqsave(&iwqp->lock, flags); } info.next_iwarp_state = IRDMA_QP_STATE_ERROR; if (iwqp->hw_tcp_state > IRDMA_TCP_STATE_CLOSED && iwdev->iw_status && iwqp->hw_tcp_state != IRDMA_TCP_STATE_TIME_WAIT) info.reset_tcp_conn = true; else dont_wait = 1; issue_modify_qp = 1; info.next_iwarp_state = IRDMA_QP_STATE_ERROR; break; default: err = -EINVAL; goto exit; } iwqp->ibqp_state = attr->qp_state; } if (attr_mask & IB_QP_ACCESS_FLAGS) { ctx_info->iwarp_info_valid = true; if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE) offload_info->wr_rdresp_en = true; if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) offload_info->wr_rdresp_en = true; if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) offload_info->rd_en = true; } if (ctx_info->iwarp_info_valid) { ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id; ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id; irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info); } spin_unlock_irqrestore(&iwqp->lock, flags); if (attr_mask & IB_QP_STATE) { if (issue_modify_qp) { ctx_info->rem_endpoint_idx = tcp_info->arp_idx; if (irdma_hw_modify_qp(iwdev, iwqp, &info, true)) return -EINVAL; } spin_lock_irqsave(&iwqp->lock, flags); if (iwqp->iwarp_state == info.curr_iwarp_state) { iwqp->iwarp_state = info.next_iwarp_state; iwqp->ibqp_state = attr->qp_state; } spin_unlock_irqrestore(&iwqp->lock, flags); } if (issue_modify_qp && iwqp->ibqp_state > IB_QPS_RTS) { if (dont_wait) { if (iwqp->hw_tcp_state) { spin_lock_irqsave(&iwqp->lock, flags); iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED; iwqp->last_aeq = IRDMA_AE_RESET_SENT; spin_unlock_irqrestore(&iwqp->lock, flags); } irdma_cm_disconn(iwqp); } else { int close_timer_started; spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags); if (iwqp->cm_node) { atomic_inc(&iwqp->cm_node->refcnt); spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags); close_timer_started = atomic_inc_return(&iwqp->close_timer_started); if (iwqp->cm_id && close_timer_started == 1) irdma_schedule_cm_timer(iwqp->cm_node, (struct irdma_puda_buf *)iwqp, IRDMA_TIMER_TYPE_CLOSE, 1, 0); irdma_rem_ref_cm_node(iwqp->cm_node); } else { spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags); } } } if (attr_mask & IB_QP_STATE && udata && udata->outlen && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { struct irdma_ucontext *ucontext; #if __FreeBSD_version >= 1400026 ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); #else ucontext = to_ucontext(ibqp->uobject->context); #endif if (iwqp->sc_qp.push_idx != IRDMA_INVALID_PUSH_PAGE_INDEX && !iwqp->push_wqe_mmap_entry && !irdma_setup_push_mmap_entries(ucontext, iwqp, &uresp.push_wqe_mmap_key, &uresp.push_db_mmap_key)) { uresp.push_valid = 1; uresp.push_offset = iwqp->sc_qp.push_offset; } uresp.rd_fence_rate = iwdev->rd_fence_rate; err = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen)); if (err) { irdma_remove_push_mmap_entries(iwqp); - irdma_debug(&iwdev->rf->sc_dev, - IRDMA_DEBUG_VERBS, "copy_to_udata failed\n"); + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, + "copy_to_udata failed\n"); return err; } } return 0; exit: spin_unlock_irqrestore(&iwqp->lock, flags); return err; } /** * irdma_cq_free_rsrc - free up resources for cq * @rf: RDMA PCI function * @iwcq: cq ptr */ void irdma_cq_free_rsrc(struct irdma_pci_f *rf, struct irdma_cq *iwcq) { struct irdma_sc_cq *cq = &iwcq->sc_cq; if (!iwcq->user_mode) { irdma_free_dma_mem(rf->sc_dev.hw, &iwcq->kmem); irdma_free_dma_mem(rf->sc_dev.hw, &iwcq->kmem_shadow); } irdma_free_rsrc(rf, rf->allocated_cqs, cq->cq_uk.cq_id); } /** * irdma_free_cqbuf - worker to free a cq buffer * @work: provides access to the cq buffer to free */ static void irdma_free_cqbuf(struct work_struct *work) { struct irdma_cq_buf *cq_buf = container_of(work, struct irdma_cq_buf, work); irdma_free_dma_mem(cq_buf->hw, &cq_buf->kmem_buf); kfree(cq_buf); } /** * irdma_process_resize_list - remove resized cq buffers from the resize_list * @iwcq: cq which owns the resize_list * @iwdev: irdma device * @lcqe_buf: the buffer where the last cqe is received */ int irdma_process_resize_list(struct irdma_cq *iwcq, struct irdma_device *iwdev, struct irdma_cq_buf *lcqe_buf) { struct list_head *tmp_node, *list_node; struct irdma_cq_buf *cq_buf; int cnt = 0; list_for_each_safe(list_node, tmp_node, &iwcq->resize_list) { cq_buf = list_entry(list_node, struct irdma_cq_buf, list); if (cq_buf == lcqe_buf) return cnt; list_del(&cq_buf->list); queue_work(iwdev->cleanup_wq, &cq_buf->work); cnt++; } return cnt; } /** * irdma_resize_cq - resize cq * @ibcq: cq to be resized * @entries: desired cq size * @udata: user data */ static int irdma_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) { #define IRDMA_RESIZE_CQ_MIN_REQ_LEN offsetofend(struct irdma_resize_cq_req, user_cq_buffer) struct irdma_cq *iwcq = to_iwcq(ibcq); struct irdma_sc_dev *dev = iwcq->sc_cq.dev; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_modify_cq_info *m_info; struct irdma_modify_cq_info info = {0}; struct irdma_dma_mem kmem_buf; struct irdma_cq_mr *cqmr_buf; struct irdma_pbl *iwpbl_buf; struct irdma_device *iwdev; struct irdma_pci_f *rf; struct irdma_cq_buf *cq_buf = NULL; unsigned long flags; int ret; iwdev = to_iwdev(ibcq->device); rf = iwdev->rf; if (!(rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_CQ_RESIZE)) return -EOPNOTSUPP; if (udata && udata->inlen < IRDMA_RESIZE_CQ_MIN_REQ_LEN) return -EINVAL; if (entries > rf->max_cqe) return -EINVAL; if (!iwcq->user_mode) { entries++; if (rf->sc_dev.hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) entries *= 2; } info.cq_size = max(entries, 4); if (info.cq_size == iwcq->sc_cq.cq_uk.cq_size - 1) return 0; if (udata) { struct irdma_resize_cq_req req = {}; struct irdma_ucontext *ucontext = #if __FreeBSD_version >= 1400026 rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); #else to_ucontext(ibcq->uobject->context); #endif /* CQ resize not supported with legacy GEN_1 libi40iw */ if (ucontext->legacy_mode) return -EOPNOTSUPP; if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) return -EINVAL; spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); iwpbl_buf = irdma_get_pbl((unsigned long)req.user_cq_buffer, &ucontext->cq_reg_mem_list); spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); if (!iwpbl_buf) return -ENOMEM; cqmr_buf = &iwpbl_buf->cq_mr; if (iwpbl_buf->pbl_allocated) { info.virtual_map = true; info.pbl_chunk_size = 1; info.first_pm_pbl_idx = cqmr_buf->cq_pbl.idx; } else { info.cq_pa = cqmr_buf->cq_pbl.addr; } } else { /* Kmode CQ resize */ int rsize; rsize = info.cq_size * sizeof(struct irdma_cqe); kmem_buf.size = round_up(rsize, 256); kmem_buf.va = irdma_allocate_dma_mem(dev->hw, &kmem_buf, kmem_buf.size, 256); if (!kmem_buf.va) return -ENOMEM; info.cq_base = kmem_buf.va; info.cq_pa = kmem_buf.pa; cq_buf = kzalloc(sizeof(*cq_buf), GFP_KERNEL); if (!cq_buf) { ret = -ENOMEM; goto error; } } cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) { ret = -ENOMEM; goto error; } info.shadow_read_threshold = iwcq->sc_cq.shadow_read_threshold; info.cq_resize = true; cqp_info = &cqp_request->info; m_info = &cqp_info->in.u.cq_modify.info; memcpy(m_info, &info, sizeof(*m_info)); cqp_info->cqp_cmd = IRDMA_OP_CQ_MODIFY; cqp_info->in.u.cq_modify.cq = &iwcq->sc_cq; cqp_info->in.u.cq_modify.scratch = (uintptr_t)cqp_request; cqp_info->post_sq = 1; ret = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); if (ret) goto error; spin_lock_irqsave(&iwcq->lock, flags); if (cq_buf) { cq_buf->kmem_buf = iwcq->kmem; cq_buf->hw = dev->hw; memcpy(&cq_buf->cq_uk, &iwcq->sc_cq.cq_uk, sizeof(cq_buf->cq_uk)); INIT_WORK(&cq_buf->work, irdma_free_cqbuf); list_add_tail(&cq_buf->list, &iwcq->resize_list); iwcq->kmem = kmem_buf; } irdma_sc_cq_resize(&iwcq->sc_cq, &info); ibcq->cqe = info.cq_size - 1; spin_unlock_irqrestore(&iwcq->lock, flags); return 0; error: if (!udata) irdma_free_dma_mem(dev->hw, &kmem_buf); kfree(cq_buf); return ret; } /** * irdma_get_mr_access - get hw MR access permissions from IB access flags * @access: IB access flags */ static inline u16 irdma_get_mr_access(int access){ u16 hw_access = 0; hw_access |= (access & IB_ACCESS_LOCAL_WRITE) ? IRDMA_ACCESS_FLAGS_LOCALWRITE : 0; hw_access |= (access & IB_ACCESS_REMOTE_WRITE) ? IRDMA_ACCESS_FLAGS_REMOTEWRITE : 0; hw_access |= (access & IB_ACCESS_REMOTE_READ) ? IRDMA_ACCESS_FLAGS_REMOTEREAD : 0; hw_access |= (access & IB_ACCESS_MW_BIND) ? IRDMA_ACCESS_FLAGS_BIND_WINDOW : 0; hw_access |= (access & IB_ZERO_BASED) ? IRDMA_ACCESS_FLAGS_ZERO_BASED : 0; hw_access |= IRDMA_ACCESS_FLAGS_LOCALREAD; return hw_access; } /** * irdma_free_stag - free stag resource * @iwdev: irdma device * @stag: stag to free */ void irdma_free_stag(struct irdma_device *iwdev, u32 stag) { u32 stag_idx; stag_idx = (stag & iwdev->rf->mr_stagmask) >> IRDMA_CQPSQ_STAG_IDX_S; irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_mrs, stag_idx); } /** * irdma_create_stag - create random stag * @iwdev: irdma device */ u32 irdma_create_stag(struct irdma_device *iwdev) { u32 stag; u32 stag_index = 0; u32 next_stag_index; u32 driver_key; u32 random; u8 consumer_key; int ret; get_random_bytes(&random, sizeof(random)); consumer_key = (u8)random; driver_key = random & ~iwdev->rf->mr_stagmask; next_stag_index = (random & iwdev->rf->mr_stagmask) >> 8; next_stag_index %= iwdev->rf->max_mr; ret = irdma_alloc_rsrc(iwdev->rf, iwdev->rf->allocated_mrs, iwdev->rf->max_mr, &stag_index, &next_stag_index); if (ret) return 0; stag = stag_index << IRDMA_CQPSQ_STAG_IDX_S; stag |= driver_key; stag += (u32)consumer_key; return stag; } /** * irdma_check_mem_contiguous - check if pbls stored in arr are contiguous * @arr: lvl1 pbl array * @npages: page count * @pg_size: page size * */ static bool irdma_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size) { u32 pg_idx; for (pg_idx = 0; pg_idx < npages; pg_idx++) { if ((*arr + (pg_size * pg_idx)) != arr[pg_idx]) return false; } return true; } /** * irdma_check_mr_contiguous - check if MR is physically contiguous * @palloc: pbl allocation struct * @pg_size: page size */ static bool irdma_check_mr_contiguous(struct irdma_pble_alloc *palloc, u32 pg_size) { struct irdma_pble_level2 *lvl2 = &palloc->level2; struct irdma_pble_info *leaf = lvl2->leaf; u64 *arr = NULL; u64 *start_addr = NULL; int i; bool ret; if (palloc->level == PBLE_LEVEL_1) { arr = palloc->level1.addr; ret = irdma_check_mem_contiguous(arr, palloc->total_cnt, pg_size); return ret; } start_addr = leaf->addr; for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) { arr = leaf->addr; if ((*start_addr + (i * pg_size * PBLE_PER_PAGE)) != *arr) return false; ret = irdma_check_mem_contiguous(arr, leaf->cnt, pg_size); if (!ret) return false; } return true; } /** * irdma_setup_pbles - copy user pg address to pble's * @rf: RDMA PCI function * @iwmr: mr pointer for this memory registration * @lvl: requested pble levels */ static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr, u8 lvl) { struct irdma_pbl *iwpbl = &iwmr->iwpbl; struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; struct irdma_pble_info *pinfo; u64 *pbl; int status; enum irdma_pble_level level = PBLE_LEVEL_1; if (lvl) { status = irdma_get_pble(rf->pble_rsrc, palloc, iwmr->page_cnt, lvl); if (status) return status; iwpbl->pbl_allocated = true; level = palloc->level; pinfo = (level == PBLE_LEVEL_1) ? &palloc->level1 : palloc->level2.leaf; pbl = pinfo->addr; } else { pbl = iwmr->pgaddrmem; } irdma_copy_user_pgaddrs(iwmr, pbl, level); if (lvl) iwmr->pgaddrmem[0] = *pbl; return 0; } /** * irdma_handle_q_mem - handle memory for qp and cq * @iwdev: irdma device * @req: information for q memory management * @iwpbl: pble struct * @lvl: pble level mask */ static int irdma_handle_q_mem(struct irdma_device *iwdev, struct irdma_mem_reg_req *req, struct irdma_pbl *iwpbl, u8 lvl) { struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; struct irdma_mr *iwmr = iwpbl->iwmr; struct irdma_qp_mr *qpmr = &iwpbl->qp_mr; struct irdma_cq_mr *cqmr = &iwpbl->cq_mr; struct irdma_hmc_pble *hmc_p; u64 *arr = iwmr->pgaddrmem; u32 pg_size, total; int err = 0; bool ret = true; pg_size = iwmr->page_size; err = irdma_setup_pbles(iwdev->rf, iwmr, lvl); if (err) return err; if (lvl) arr = palloc->level1.addr; switch (iwmr->type) { case IRDMA_MEMREG_TYPE_QP: total = req->sq_pages + req->rq_pages; hmc_p = &qpmr->sq_pbl; qpmr->shadow = (dma_addr_t) arr[total]; if (lvl) { ret = irdma_check_mem_contiguous(arr, req->sq_pages, pg_size); if (ret) ret = irdma_check_mem_contiguous(&arr[req->sq_pages], req->rq_pages, pg_size); } if (!ret) { hmc_p->idx = palloc->level1.idx; hmc_p = &qpmr->rq_pbl; hmc_p->idx = palloc->level1.idx + req->sq_pages; } else { hmc_p->addr = arr[0]; hmc_p = &qpmr->rq_pbl; hmc_p->addr = arr[req->sq_pages]; } break; case IRDMA_MEMREG_TYPE_CQ: hmc_p = &cqmr->cq_pbl; if (!cqmr->split) cqmr->shadow = (dma_addr_t) arr[req->cq_pages]; if (lvl) ret = irdma_check_mem_contiguous(arr, req->cq_pages, pg_size); if (!ret) hmc_p->idx = palloc->level1.idx; else hmc_p->addr = arr[0]; break; default: irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "MR type error\n"); err = -EINVAL; } if (lvl && ret) { irdma_free_pble(iwdev->rf->pble_rsrc, palloc); iwpbl->pbl_allocated = false; } return err; } /** * irdma_hw_alloc_mw - create the hw memory window * @iwdev: irdma device * @iwmr: pointer to memory window info */ int irdma_hw_alloc_mw(struct irdma_device *iwdev, struct irdma_mr *iwmr) { struct irdma_mw_alloc_info *info; struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd); struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; info = &cqp_info->in.u.mw_alloc.info; memset(info, 0, sizeof(*info)); if (iwmr->ibmw.type == IB_MW_TYPE_1) info->mw_wide = true; info->page_size = PAGE_SIZE; info->mw_stag_index = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S; info->pd_id = iwpd->sc_pd.pd_id; info->remote_access = true; cqp_info->cqp_cmd = IRDMA_OP_MW_ALLOC; cqp_info->post_sq = 1; cqp_info->in.u.mw_alloc.dev = &iwdev->rf->sc_dev; cqp_info->in.u.mw_alloc.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); return status; } /** * irdma_dealloc_mw - Dealloc memory window * @ibmw: memory window structure. */ static int irdma_dealloc_mw(struct ib_mw *ibmw) { struct ib_pd *ibpd = ibmw->pd; struct irdma_pd *iwpd = to_iwpd(ibpd); struct irdma_mr *iwmr = to_iwmr((struct ib_mr *)ibmw); struct irdma_device *iwdev = to_iwdev(ibmw->device); struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_dealloc_stag_info *info; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; info = &cqp_info->in.u.dealloc_stag.info; memset(info, 0, sizeof(*info)); info->pd_id = iwpd->sc_pd.pd_id; info->stag_idx = RS_64_1(ibmw->rkey, IRDMA_CQPSQ_STAG_IDX_S); info->mr = false; cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG; cqp_info->post_sq = 1; cqp_info->in.u.dealloc_stag.dev = &iwdev->rf->sc_dev; cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request; irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); irdma_free_stag(iwdev, iwmr->stag); kfree(iwmr); return 0; } /** * irdma_hw_alloc_stag - cqp command to allocate stag * @iwdev: irdma device * @iwmr: irdma mr pointer */ int irdma_hw_alloc_stag(struct irdma_device *iwdev, struct irdma_mr *iwmr) { struct irdma_allocate_stag_info *info; struct ib_pd *pd = iwmr->ibmr.pd; struct irdma_pd *iwpd = to_iwpd(pd); struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; info = &cqp_info->in.u.alloc_stag.info; memset(info, 0, sizeof(*info)); info->page_size = PAGE_SIZE; info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S; info->pd_id = iwpd->sc_pd.pd_id; info->total_len = iwmr->len; info->all_memory = (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) ? true : false; info->remote_access = true; cqp_info->cqp_cmd = IRDMA_OP_ALLOC_STAG; cqp_info->post_sq = 1; cqp_info->in.u.alloc_stag.dev = &iwdev->rf->sc_dev; cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); if (!status) iwmr->is_hwreg = 1; return status; } /** * irdma_set_page - populate pbl list for fmr * @ibmr: ib mem to access iwarp mr pointer * @addr: page dma address fro pbl list */ static int irdma_set_page(struct ib_mr *ibmr, u64 addr) { struct irdma_mr *iwmr = to_iwmr(ibmr); struct irdma_pbl *iwpbl = &iwmr->iwpbl; struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; u64 *pbl; if (unlikely(iwmr->npages == iwmr->page_cnt)) return -ENOMEM; if (palloc->level == PBLE_LEVEL_2) { struct irdma_pble_info *palloc_info = palloc->level2.leaf + (iwmr->npages >> PBLE_512_SHIFT); palloc_info->addr[iwmr->npages & (PBLE_PER_PAGE - 1)] = addr; } else { pbl = palloc->level1.addr; pbl[iwmr->npages] = addr; } iwmr->npages++; return 0; } /** * irdma_map_mr_sg - map of sg list for fmr * @ibmr: ib mem to access iwarp mr pointer * @sg: scatter gather list * @sg_nents: number of sg pages * @sg_offset: scatter gather list for fmr */ static int irdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { struct irdma_mr *iwmr = to_iwmr(ibmr); iwmr->npages = 0; return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, irdma_set_page); } /** * irdma_hwreg_mr - send cqp command for memory registration * @iwdev: irdma device * @iwmr: irdma mr pointer * @access: access for MR */ int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr, u16 access) { struct irdma_pbl *iwpbl = &iwmr->iwpbl; struct irdma_reg_ns_stag_info *stag_info; struct ib_pd *pd = iwmr->ibmr.pd; struct irdma_pd *iwpd = to_iwpd(pd); struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int ret; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; stag_info = &cqp_info->in.u.mr_reg_non_shared.info; memset(stag_info, 0, sizeof(*stag_info)); stag_info->va = iwpbl->user_base; stag_info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S; stag_info->stag_key = (u8)iwmr->stag; stag_info->total_len = iwmr->len; stag_info->all_memory = (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) ? true : false; stag_info->access_rights = irdma_get_mr_access(access); stag_info->pd_id = iwpd->sc_pd.pd_id; if (stag_info->access_rights & IRDMA_ACCESS_FLAGS_ZERO_BASED) stag_info->addr_type = IRDMA_ADDR_TYPE_ZERO_BASED; else stag_info->addr_type = IRDMA_ADDR_TYPE_VA_BASED; stag_info->page_size = iwmr->page_size; if (iwpbl->pbl_allocated) { if (palloc->level == PBLE_LEVEL_1) { stag_info->first_pm_pbl_index = palloc->level1.idx; stag_info->chunk_size = 1; } else { stag_info->first_pm_pbl_index = palloc->level2.root.idx; stag_info->chunk_size = 3; } } else { stag_info->reg_addr_pa = iwmr->pgaddrmem[0]; } cqp_info->cqp_cmd = IRDMA_OP_MR_REG_NON_SHARED; cqp_info->post_sq = 1; cqp_info->in.u.mr_reg_non_shared.dev = &iwdev->rf->sc_dev; cqp_info->in.u.mr_reg_non_shared.scratch = (uintptr_t)cqp_request; ret = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); if (!ret) iwmr->is_hwreg = 1; return ret; } +/* + * irdma_alloc_iwmr - Allocate iwmr @region - memory region @pd - protection domain @virt - virtual address @reg_type - + * registration type + */ +static struct irdma_mr * +irdma_alloc_iwmr(struct ib_umem *region, + struct ib_pd *pd, u64 virt, + enum irdma_memreg_type reg_type) +{ + struct irdma_pbl *iwpbl; + struct irdma_mr *iwmr; + + iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); + if (!iwmr) + return ERR_PTR(-ENOMEM); + + iwpbl = &iwmr->iwpbl; + iwpbl->iwmr = iwmr; + iwmr->region = region; + iwmr->ibmr.pd = pd; + iwmr->ibmr.device = pd->device; + iwmr->ibmr.iova = virt; + iwmr->type = reg_type; + + /* Some OOT versions of irdma_copy_user_pg_addr require the pg mask */ + iwmr->page_msk = ~(IRDMA_HW_PAGE_SIZE - 1); + iwmr->page_size = IRDMA_HW_PAGE_SIZE; + iwmr->len = region->length; + iwpbl->user_base = virt; + iwmr->page_cnt = irdma_ib_umem_num_dma_blocks(region, iwmr->page_size, virt); + + return iwmr; +} + +static void +irdma_free_iwmr(struct irdma_mr *iwmr) +{ + kfree(iwmr); +} + +/* + * irdma_reg_user_mr_type_mem - Handle memory registration @iwmr - irdma mr @access - access rights + */ +static int +irdma_reg_user_mr_type_mem(struct irdma_mr *iwmr, int access) +{ + struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device); + struct irdma_pbl *iwpbl = &iwmr->iwpbl; + u32 stag; + int err; + u8 lvl; + + lvl = iwmr->page_cnt != 1 ? PBLE_LEVEL_1 | PBLE_LEVEL_2 : PBLE_LEVEL_0; + + err = irdma_setup_pbles(iwdev->rf, iwmr, lvl); + if (err) + return err; + + if (lvl) { + err = irdma_check_mr_contiguous(&iwpbl->pble_alloc, + iwmr->page_size); + if (err) { + irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc); + iwpbl->pbl_allocated = false; + } + } + + stag = irdma_create_stag(iwdev); + if (!stag) { + err = -ENOMEM; + goto free_pble; + } + + iwmr->stag = stag; + iwmr->ibmr.rkey = stag; + iwmr->ibmr.lkey = stag; + iwmr->access = access; + err = irdma_hwreg_mr(iwdev, iwmr, access); + if (err) + goto err_hwreg; + + return 0; + +err_hwreg: + irdma_free_stag(iwdev, stag); + +free_pble: + if (iwpbl->pble_alloc.level != PBLE_LEVEL_0 && iwpbl->pbl_allocated) + irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc); + + return err; +} + +/* + * irdma_reg_user_mr_type_qp - Handle QP memory registration @req - memory reg req @udata - user info @iwmr - irdma mr + */ +static int +irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req, + struct ib_udata *udata, + struct irdma_mr *iwmr) +{ + struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device); + struct irdma_pbl *iwpbl = &iwmr->iwpbl; + struct irdma_ucontext *ucontext; + unsigned long flags; + u32 total; + int err; + u8 lvl; + + total = req.sq_pages + req.rq_pages + IRDMA_SHADOW_PGCNT; + if (total > iwmr->page_cnt) + return -EINVAL; + + total = req.sq_pages + req.rq_pages; + lvl = total > 2 ? PBLE_LEVEL_1 : PBLE_LEVEL_0; + err = irdma_handle_q_mem(iwdev, &req, iwpbl, lvl); + if (err) + return err; + +#if __FreeBSD_version >= 1400026 + ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); +#else + ucontext = to_ucontext(iwmr->ibpd.pd->uobject->context); +#endif + spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); + list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list); + iwpbl->on_list = true; + spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); + + return 0; +} + +/* + * irdma_reg_user_mr_type_cq - Handle CQ memory registration @req - memory reg req @udata - user info @iwmr - irdma mr + */ +static int +irdma_reg_user_mr_type_cq(struct irdma_mem_reg_req req, + struct ib_udata *udata, + struct irdma_mr *iwmr) +{ + struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device); + struct irdma_pbl *iwpbl = &iwmr->iwpbl; + struct irdma_ucontext *ucontext; + unsigned long flags; + u32 total; + int err; + u8 lvl; + + total = req.cq_pages + + ((iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_CQ_RESIZE) ? 0 : IRDMA_SHADOW_PGCNT); + if (total > iwmr->page_cnt) + return -EINVAL; + + lvl = req.cq_pages > 1 ? PBLE_LEVEL_1 : PBLE_LEVEL_0; + err = irdma_handle_q_mem(iwdev, &req, iwpbl, lvl); + if (err) + return err; + +#if __FreeBSD_version >= 1400026 + ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); +#else + ucontext = to_ucontext(iwmr->ibmr.pd->uobject->context); +#endif + spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); + list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list); + iwpbl->on_list = true; + spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); + + return 0; +} + /** * irdma_reg_user_mr - Register a user memory region * @pd: ptr of pd * @start: virtual start address * @len: length of mr * @virt: virtual address * @access: access of mr * @udata: user data */ static struct ib_mr * irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, u64 virt, int access, struct ib_udata *udata) { #define IRDMA_MEM_REG_MIN_REQ_LEN offsetofend(struct irdma_mem_reg_req, sq_pages) struct irdma_device *iwdev = to_iwdev(pd->device); - struct irdma_ucontext *ucontext; - struct irdma_pble_alloc *palloc; - struct irdma_pbl *iwpbl; - struct irdma_mr *iwmr; - struct ib_umem *region; struct irdma_mem_reg_req req = {}; - u32 total, stag = 0; - u8 shadow_pgcnt = 1; - unsigned long flags; - int err = -EINVAL; - u8 lvl; - int ret; + struct ib_umem *region; + struct irdma_mr *iwmr; + int err; if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) return ERR_PTR(-EINVAL); if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) return ERR_PTR(-EINVAL); region = ib_umem_get(pd->uobject->context, start, len, access, 0); if (IS_ERR(region)) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "Failed to create ib_umem region\n"); return (struct ib_mr *)region; } if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) { ib_umem_release(region); return ERR_PTR(-EFAULT); } - iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); - if (!iwmr) { + iwmr = irdma_alloc_iwmr(region, pd, virt, req.reg_type); + if (IS_ERR(iwmr)) { ib_umem_release(region); - return ERR_PTR(-ENOMEM); + return (struct ib_mr *)iwmr; } - iwpbl = &iwmr->iwpbl; - iwpbl->iwmr = iwmr; - iwmr->region = region; - iwmr->ibmr.pd = pd; - iwmr->ibmr.device = pd->device; - iwmr->ibmr.iova = virt; - iwmr->page_size = IRDMA_HW_PAGE_SIZE; - iwmr->page_msk = ~(IRDMA_HW_PAGE_SIZE - 1); - - iwmr->len = region->length; - iwpbl->user_base = virt; - palloc = &iwpbl->pble_alloc; - iwmr->type = req.reg_type; - iwmr->page_cnt = irdma_ib_umem_num_dma_blocks(region, iwmr->page_size, virt); - switch (req.reg_type) { case IRDMA_MEMREG_TYPE_QP: - total = req.sq_pages + req.rq_pages + shadow_pgcnt; - if (total > iwmr->page_cnt) { - err = -EINVAL; - goto error; - } - total = req.sq_pages + req.rq_pages; - lvl = total > 2 ? PBLE_LEVEL_1 : PBLE_LEVEL_0; - err = irdma_handle_q_mem(iwdev, &req, iwpbl, lvl); + err = irdma_reg_user_mr_type_qp(req, udata, iwmr); if (err) goto error; -#if __FreeBSD_version >= 1400026 - ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); -#else - ucontext = to_ucontext(pd->uobject->context); -#endif - spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); - list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list); - iwpbl->on_list = true; - spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); break; case IRDMA_MEMREG_TYPE_CQ: - if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_CQ_RESIZE) - shadow_pgcnt = 0; - total = req.cq_pages + shadow_pgcnt; - if (total > iwmr->page_cnt) { - err = -EINVAL; - goto error; - } - - lvl = req.cq_pages > 1 ? PBLE_LEVEL_1 : PBLE_LEVEL_0; - err = irdma_handle_q_mem(iwdev, &req, iwpbl, lvl); + err = irdma_reg_user_mr_type_cq(req, udata, iwmr); if (err) goto error; -#if __FreeBSD_version >= 1400026 - ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext); -#else - ucontext = to_ucontext(pd->uobject->context); -#endif - spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); - list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list); - iwpbl->on_list = true; - spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); break; case IRDMA_MEMREG_TYPE_MEM: - lvl = iwmr->page_cnt != 1 ? PBLE_LEVEL_1 | PBLE_LEVEL_2 : PBLE_LEVEL_0; - err = irdma_setup_pbles(iwdev->rf, iwmr, lvl); + err = irdma_reg_user_mr_type_mem(iwmr, access); if (err) goto error; - if (lvl) { - ret = irdma_check_mr_contiguous(palloc, - iwmr->page_size); - if (ret) { - irdma_free_pble(iwdev->rf->pble_rsrc, palloc); - iwpbl->pbl_allocated = false; - } - } - - stag = irdma_create_stag(iwdev); - if (!stag) { - err = -ENOMEM; - goto error; - } - - iwmr->stag = stag; - iwmr->ibmr.rkey = stag; - iwmr->ibmr.lkey = stag; - iwmr->access = access; - err = irdma_hwreg_mr(iwdev, iwmr, access); - if (err) { - irdma_free_stag(iwdev, stag); - goto error; - } - break; default: + err = -EINVAL; goto error; } - iwmr->type = req.reg_type; - return &iwmr->ibmr; error: - if (palloc->level != PBLE_LEVEL_0 && iwpbl->pbl_allocated) - irdma_free_pble(iwdev->rf->pble_rsrc, palloc); ib_umem_release(region); - kfree(iwmr); + irdma_free_iwmr(iwmr); return ERR_PTR(err); } int irdma_hwdereg_mr(struct ib_mr *ib_mr) { struct irdma_device *iwdev = to_iwdev(ib_mr->device); struct irdma_mr *iwmr = to_iwmr(ib_mr); struct irdma_pd *iwpd = to_iwpd(ib_mr->pd); struct irdma_dealloc_stag_info *info; struct irdma_pbl *iwpbl = &iwmr->iwpbl; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; /* * Skip HW MR de-register when it is already de-registered during an MR re-reregister and the re-registration * fails */ if (!iwmr->is_hwreg) return 0; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_info = &cqp_request->info; info = &cqp_info->in.u.dealloc_stag.info; memset(info, 0, sizeof(*info)); info->pd_id = iwpd->sc_pd.pd_id; info->stag_idx = RS_64_1(ib_mr->rkey, IRDMA_CQPSQ_STAG_IDX_S); info->mr = true; if (iwpbl->pbl_allocated) info->dealloc_pbl = true; cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG; cqp_info->post_sq = 1; cqp_info->in.u.dealloc_stag.dev = &iwdev->rf->sc_dev; cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); if (!status) iwmr->is_hwreg = 0; return status; } /* * irdma_rereg_mr_trans - Re-register a user MR for a change translation. @iwmr: ptr of iwmr @start: virtual start * address @len: length of mr @virt: virtual address * * Re-register a user memory region when a change translation is requested. Re-register a new region while reusing the * stag from the original registration. */ struct ib_mr * irdma_rereg_mr_trans(struct irdma_mr *iwmr, u64 start, u64 len, u64 virt, struct ib_udata *udata) { struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device); struct irdma_pbl *iwpbl = &iwmr->iwpbl; struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; struct ib_pd *pd = iwmr->ibmr.pd; struct ib_umem *region; u8 lvl; int err; region = ib_umem_get(pd->uobject->context, start, len, iwmr->access, 0); if (IS_ERR(region)) { irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "Failed to create ib_umem region\n"); return (struct ib_mr *)region; } iwmr->region = region; iwmr->ibmr.iova = virt; iwmr->ibmr.pd = pd; iwmr->page_size = PAGE_SIZE; iwmr->len = region->length; iwpbl->user_base = virt; iwmr->page_cnt = irdma_ib_umem_num_dma_blocks(region, iwmr->page_size, virt); lvl = iwmr->page_cnt != 1 ? PBLE_LEVEL_1 | PBLE_LEVEL_2 : PBLE_LEVEL_0; err = irdma_setup_pbles(iwdev->rf, iwmr, lvl); if (err) goto error; if (lvl) { err = irdma_check_mr_contiguous(palloc, iwmr->page_size); if (err) { irdma_free_pble(iwdev->rf->pble_rsrc, palloc); iwpbl->pbl_allocated = false; } } err = irdma_hwreg_mr(iwdev, iwmr, iwmr->access); if (err) goto error; return &iwmr->ibmr; error: if (palloc->level != PBLE_LEVEL_0 && iwpbl->pbl_allocated) { irdma_free_pble(iwdev->rf->pble_rsrc, palloc); iwpbl->pbl_allocated = false; } ib_umem_release(region); iwmr->region = NULL; return ERR_PTR(err); } /** * irdma_reg_phys_mr - register kernel physical memory * @pd: ibpd pointer * @addr: physical address of memory to register * @size: size of memory to register * @access: Access rights * @iova_start: start of virtual address for physical buffers */ struct ib_mr * irdma_reg_phys_mr(struct ib_pd *pd, u64 addr, u64 size, int access, u64 *iova_start) { struct irdma_device *iwdev = to_iwdev(pd->device); struct irdma_pbl *iwpbl; struct irdma_mr *iwmr; u32 stag; int ret; iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); if (!iwmr) return ERR_PTR(-ENOMEM); iwmr->ibmr.pd = pd; iwmr->ibmr.device = pd->device; iwpbl = &iwmr->iwpbl; iwpbl->iwmr = iwmr; iwmr->type = IRDMA_MEMREG_TYPE_MEM; iwpbl->user_base = *iova_start; stag = irdma_create_stag(iwdev); if (!stag) { ret = -ENOMEM; goto err; } iwmr->stag = stag; iwmr->ibmr.iova = *iova_start; iwmr->ibmr.rkey = stag; iwmr->ibmr.lkey = stag; iwmr->page_cnt = 1; iwmr->pgaddrmem[0] = addr; iwmr->len = size; iwmr->page_size = SZ_4K; ret = irdma_hwreg_mr(iwdev, iwmr, access); if (ret) { irdma_free_stag(iwdev, stag); goto err; } return &iwmr->ibmr; err: kfree(iwmr); return ERR_PTR(ret); } /** * irdma_get_dma_mr - register physical mem * @pd: ptr of pd * @acc: access for memory */ static struct ib_mr * irdma_get_dma_mr(struct ib_pd *pd, int acc) { u64 kva = 0; return irdma_reg_phys_mr(pd, 0, 0, acc, &kva); } /** * irdma_del_memlist - Deleting pbl list entries for CQ/QP * @iwmr: iwmr for IB's user page addresses * @ucontext: ptr to user context */ void irdma_del_memlist(struct irdma_mr *iwmr, struct irdma_ucontext *ucontext) { struct irdma_pbl *iwpbl = &iwmr->iwpbl; unsigned long flags; switch (iwmr->type) { case IRDMA_MEMREG_TYPE_CQ: spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); if (iwpbl->on_list) { iwpbl->on_list = false; list_del(&iwpbl->list); } spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); break; case IRDMA_MEMREG_TYPE_QP: spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); if (iwpbl->on_list) { iwpbl->on_list = false; list_del(&iwpbl->list); } spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); break; default: break; } } /** * irdma_copy_sg_list - copy sg list for qp * @sg_list: copied into sg_list * @sgl: copy from sgl * @num_sges: count of sg entries */ static void irdma_copy_sg_list(struct irdma_sge *sg_list, struct ib_sge *sgl, int num_sges) { unsigned int i; for (i = 0; i < num_sges; i++) { sg_list[i].tag_off = sgl[i].addr; sg_list[i].len = sgl[i].length; sg_list[i].stag = sgl[i].lkey; } } /** * irdma_post_send - kernel application wr * @ibqp: qp ptr for wr * @ib_wr: work request ptr * @bad_wr: return of bad wr if err */ static int irdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *ib_wr, const struct ib_send_wr **bad_wr) { struct irdma_qp *iwqp; struct irdma_qp_uk *ukqp; struct irdma_sc_dev *dev; struct irdma_post_sq_info info; int err = 0; unsigned long flags; bool inv_stag; struct irdma_ah *ah; iwqp = to_iwqp(ibqp); ukqp = &iwqp->sc_qp.qp_uk; dev = &iwqp->iwdev->rf->sc_dev; spin_lock_irqsave(&iwqp->lock, flags); while (ib_wr) { memset(&info, 0, sizeof(info)); inv_stag = false; info.wr_id = (ib_wr->wr_id); if ((ib_wr->send_flags & IB_SEND_SIGNALED) || iwqp->sig_all) info.signaled = true; if (ib_wr->send_flags & IB_SEND_FENCE) info.read_fence = true; switch (ib_wr->opcode) { case IB_WR_SEND_WITH_IMM: if (ukqp->qp_caps & IRDMA_SEND_WITH_IMM) { info.imm_data_valid = true; info.imm_data = ntohl(ib_wr->ex.imm_data); } else { err = -EINVAL; break; } /* fallthrough */ case IB_WR_SEND: case IB_WR_SEND_WITH_INV: if (ib_wr->opcode == IB_WR_SEND || ib_wr->opcode == IB_WR_SEND_WITH_IMM) { if (ib_wr->send_flags & IB_SEND_SOLICITED) info.op_type = IRDMA_OP_TYPE_SEND_SOL; else info.op_type = IRDMA_OP_TYPE_SEND; } else { if (ib_wr->send_flags & IB_SEND_SOLICITED) info.op_type = IRDMA_OP_TYPE_SEND_SOL_INV; else info.op_type = IRDMA_OP_TYPE_SEND_INV; info.stag_to_inv = ib_wr->ex.invalidate_rkey; } info.op.send.num_sges = ib_wr->num_sge; info.op.send.sg_list = (struct irdma_sge *)ib_wr->sg_list; if (iwqp->ibqp.qp_type == IB_QPT_UD || iwqp->ibqp.qp_type == IB_QPT_GSI) { ah = to_iwah(ud_wr(ib_wr)->ah); info.op.send.ah_id = ah->sc_ah.ah_info.ah_idx; info.op.send.qkey = ud_wr(ib_wr)->remote_qkey; info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn; } if (ib_wr->send_flags & IB_SEND_INLINE) err = irdma_uk_inline_send(ukqp, &info, false); else err = irdma_uk_send(ukqp, &info, false); break; case IB_WR_RDMA_WRITE_WITH_IMM: if (ukqp->qp_caps & IRDMA_WRITE_WITH_IMM) { info.imm_data_valid = true; info.imm_data = ntohl(ib_wr->ex.imm_data); } else { err = -EINVAL; break; } /* fallthrough */ case IB_WR_RDMA_WRITE: if (ib_wr->send_flags & IB_SEND_SOLICITED) info.op_type = IRDMA_OP_TYPE_RDMA_WRITE_SOL; else info.op_type = IRDMA_OP_TYPE_RDMA_WRITE; info.op.rdma_write.num_lo_sges = ib_wr->num_sge; info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list; info.op.rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; info.op.rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey; if (ib_wr->send_flags & IB_SEND_INLINE) err = irdma_uk_inline_rdma_write(ukqp, &info, false); else err = irdma_uk_rdma_write(ukqp, &info, false); break; case IB_WR_RDMA_READ_WITH_INV: inv_stag = true; /* fallthrough */ case IB_WR_RDMA_READ: if (ib_wr->num_sge > dev->hw_attrs.uk_attrs.max_hw_read_sges) { err = -EINVAL; break; } info.op_type = IRDMA_OP_TYPE_RDMA_READ; info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey; info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list; info.op.rdma_read.num_lo_sges = ib_wr->num_sge; err = irdma_uk_rdma_read(ukqp, &info, inv_stag, false); break; case IB_WR_LOCAL_INV: info.op_type = IRDMA_OP_TYPE_INV_STAG; info.local_fence = info.read_fence; info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey; err = irdma_uk_stag_local_invalidate(ukqp, &info, true); break; case IB_WR_REG_MR:{ struct irdma_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr); struct irdma_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc; struct irdma_fast_reg_stag_info stag_info = {0}; stag_info.signaled = info.signaled; stag_info.read_fence = info.read_fence; stag_info.access_rights = irdma_get_mr_access(reg_wr(ib_wr)->access); stag_info.stag_key = reg_wr(ib_wr)->key & 0xff; stag_info.stag_idx = reg_wr(ib_wr)->key >> 8; stag_info.page_size = reg_wr(ib_wr)->mr->page_size; stag_info.wr_id = ib_wr->wr_id; stag_info.addr_type = IRDMA_ADDR_TYPE_VA_BASED; stag_info.va = (void *)(uintptr_t)iwmr->ibmr.iova; stag_info.total_len = iwmr->ibmr.length; if (palloc->level == PBLE_LEVEL_2) { stag_info.chunk_size = 3; stag_info.first_pm_pbl_index = palloc->level2.root.idx; } else { stag_info.chunk_size = 1; stag_info.first_pm_pbl_index = palloc->level1.idx; } stag_info.local_fence = ib_wr->send_flags & IB_SEND_FENCE; err = irdma_sc_mr_fast_register(&iwqp->sc_qp, &stag_info, true); break; } default: err = -EINVAL; irdma_debug(&iwqp->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "upost_send bad opcode = 0x%x\n", ib_wr->opcode); break; } if (err) break; ib_wr = ib_wr->next; } if (!iwqp->flush_issued) { if (iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS) irdma_uk_qp_post_wr(ukqp); spin_unlock_irqrestore(&iwqp->lock, flags); } else { spin_unlock_irqrestore(&iwqp->lock, flags); irdma_sched_qp_flush_work(iwqp); } if (err) *bad_wr = ib_wr; return err; } /** * irdma_post_recv - post receive wr for kernel application * @ibqp: ib qp pointer * @ib_wr: work request for receive * @bad_wr: bad wr caused an error */ static int irdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *ib_wr, const struct ib_recv_wr **bad_wr) { struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_qp_uk *ukqp = &iwqp->sc_qp.qp_uk; struct irdma_post_rq_info post_recv = {0}; struct irdma_sge *sg_list = iwqp->sg_list; unsigned long flags; int err = 0; spin_lock_irqsave(&iwqp->lock, flags); while (ib_wr) { if (ib_wr->num_sge > ukqp->max_rq_frag_cnt) { err = -EINVAL; goto out; } post_recv.num_sges = ib_wr->num_sge; post_recv.wr_id = ib_wr->wr_id; irdma_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge); post_recv.sg_list = sg_list; err = irdma_uk_post_receive(ukqp, &post_recv); if (err) { irdma_debug(&iwqp->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, - "post_recv err %d\n", - err); + "post_recv err %d\n", err); goto out; } ib_wr = ib_wr->next; } out: spin_unlock_irqrestore(&iwqp->lock, flags); if (iwqp->flush_issued) irdma_sched_qp_flush_work(iwqp); if (err) *bad_wr = ib_wr; return err; } /** * irdma_flush_err_to_ib_wc_status - return change flush error code to IB status * @opcode: iwarp flush code */ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode opcode) { switch (opcode) { case FLUSH_PROT_ERR: return IB_WC_LOC_PROT_ERR; case FLUSH_REM_ACCESS_ERR: return IB_WC_REM_ACCESS_ERR; case FLUSH_LOC_QP_OP_ERR: return IB_WC_LOC_QP_OP_ERR; case FLUSH_REM_OP_ERR: return IB_WC_REM_OP_ERR; case FLUSH_LOC_LEN_ERR: return IB_WC_LOC_LEN_ERR; case FLUSH_GENERAL_ERR: return IB_WC_WR_FLUSH_ERR; case FLUSH_MW_BIND_ERR: return IB_WC_MW_BIND_ERR; case FLUSH_REM_INV_REQ_ERR: return IB_WC_REM_INV_REQ_ERR; case FLUSH_RETRY_EXC_ERR: return IB_WC_RETRY_EXC_ERR; case FLUSH_FATAL_ERR: default: return IB_WC_FATAL_ERR; } } -static inline void -set_ib_wc_op_sq(struct irdma_cq_poll_info *cq_poll_info, - struct ib_wc *entry) -{ - struct irdma_sc_qp *qp; - - switch (cq_poll_info->op_type) { - case IRDMA_OP_TYPE_RDMA_WRITE: - case IRDMA_OP_TYPE_RDMA_WRITE_SOL: - entry->opcode = IB_WC_RDMA_WRITE; - break; - case IRDMA_OP_TYPE_RDMA_READ_INV_STAG: - case IRDMA_OP_TYPE_RDMA_READ: - entry->opcode = IB_WC_RDMA_READ; - break; - case IRDMA_OP_TYPE_SEND_SOL: - case IRDMA_OP_TYPE_SEND_SOL_INV: - case IRDMA_OP_TYPE_SEND_INV: - case IRDMA_OP_TYPE_SEND: - entry->opcode = IB_WC_SEND; - break; - case IRDMA_OP_TYPE_FAST_REG_NSMR: - entry->opcode = IB_WC_REG_MR; - break; - case IRDMA_OP_TYPE_INV_STAG: - entry->opcode = IB_WC_LOCAL_INV; - break; - default: - qp = cq_poll_info->qp_handle; - irdma_dev_err(to_ibdev(qp->dev), "Invalid opcode = %d in CQE\n", - cq_poll_info->op_type); - entry->status = IB_WC_GENERAL_ERR; - } -} - -static inline void -set_ib_wc_op_rq(struct irdma_cq_poll_info *cq_poll_info, - struct ib_wc *entry, bool send_imm_support) -{ - /** - * iWARP does not support sendImm, so the presence of Imm data - * must be WriteImm. - */ - if (!send_imm_support) { - entry->opcode = cq_poll_info->imm_valid ? - IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; - return; - } - switch (cq_poll_info->op_type) { - case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: - case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: - entry->opcode = IB_WC_RECV_RDMA_WITH_IMM; - break; - default: - entry->opcode = IB_WC_RECV; - } -} - /** * irdma_process_cqe - process cqe info * @entry: processed cqe * @cq_poll_info: cqe info */ static void irdma_process_cqe(struct ib_wc *entry, struct irdma_cq_poll_info *cq_poll_info) { struct irdma_sc_qp *qp; entry->wc_flags = 0; entry->pkey_index = 0; entry->wr_id = cq_poll_info->wr_id; qp = cq_poll_info->qp_handle; entry->qp = qp->qp_uk.back_qp; if (cq_poll_info->error) { entry->status = (cq_poll_info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ? irdma_flush_err_to_ib_wc_status(cq_poll_info->minor_err) : IB_WC_GENERAL_ERR; entry->vendor_err = cq_poll_info->major_err << 16 | cq_poll_info->minor_err; } else { entry->status = IB_WC_SUCCESS; if (cq_poll_info->imm_valid) { entry->ex.imm_data = htonl(cq_poll_info->imm_data); entry->wc_flags |= IB_WC_WITH_IMM; } if (cq_poll_info->ud_smac_valid) { ether_addr_copy(entry->smac, cq_poll_info->ud_smac); entry->wc_flags |= IB_WC_WITH_SMAC; } if (cq_poll_info->ud_vlan_valid) { u16 vlan = cq_poll_info->ud_vlan & EVL_VLID_MASK; entry->sl = cq_poll_info->ud_vlan >> VLAN_PRIO_SHIFT; if (vlan) { entry->vlan_id = vlan; entry->wc_flags |= IB_WC_WITH_VLAN; } } else { entry->sl = 0; } } if (cq_poll_info->q_type == IRDMA_CQE_QTYPE_SQ) { set_ib_wc_op_sq(cq_poll_info, entry); } else { set_ib_wc_op_rq(cq_poll_info, entry, qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM ? true : false); if (qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_UD && cq_poll_info->stag_invalid_set) { entry->ex.invalidate_rkey = cq_poll_info->inv_stag; entry->wc_flags |= IB_WC_WITH_INVALIDATE; } } if (qp->qp_uk.qp_type == IRDMA_QP_TYPE_ROCE_UD) { entry->src_qp = cq_poll_info->ud_src_qpn; entry->slid = 0; entry->wc_flags |= (IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE); entry->network_hdr_type = cq_poll_info->ipv4 ? RDMA_NETWORK_IPV4 : RDMA_NETWORK_IPV6; } else { entry->src_qp = cq_poll_info->qp_id; } entry->byte_len = cq_poll_info->bytes_xfered; } /** * irdma_poll_one - poll one entry of the CQ * @ukcq: ukcq to poll * @cur_cqe: current CQE info to be filled in * @entry: ibv_wc object to be filled for non-extended CQ or NULL for extended CQ * * Returns the internal irdma device error code or 0 on success */ static inline int irdma_poll_one(struct irdma_cq_uk *ukcq, struct irdma_cq_poll_info *cur_cqe, struct ib_wc *entry) { int ret = irdma_uk_cq_poll_cmpl(ukcq, cur_cqe); if (ret) return ret; irdma_process_cqe(entry, cur_cqe); return 0; } /** * __irdma_poll_cq - poll cq for completion (kernel apps) * @iwcq: cq to poll * @num_entries: number of entries to poll * @entry: wr of a completed entry */ static int __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc *entry) { struct list_head *tmp_node, *list_node; struct irdma_cq_buf *last_buf = NULL; struct irdma_cq_poll_info *cur_cqe = &iwcq->cur_cqe; struct irdma_cq_buf *cq_buf; int ret; struct irdma_device *iwdev; struct irdma_cq_uk *ukcq; bool cq_new_cqe = false; int resized_bufs = 0; int npolled = 0; iwdev = to_iwdev(iwcq->ibcq.device); ukcq = &iwcq->sc_cq.cq_uk; /* go through the list of previously resized CQ buffers */ list_for_each_safe(list_node, tmp_node, &iwcq->resize_list) { cq_buf = container_of(list_node, struct irdma_cq_buf, list); while (npolled < num_entries) { ret = irdma_poll_one(&cq_buf->cq_uk, cur_cqe, entry + npolled); if (!ret) { ++npolled; cq_new_cqe = true; continue; } if (ret == -ENOENT) break; /* QP using the CQ is destroyed. Skip reporting this CQE */ if (ret == -EFAULT) { cq_new_cqe = true; continue; } goto error; } /* save the resized CQ buffer which received the last cqe */ if (cq_new_cqe) last_buf = cq_buf; cq_new_cqe = false; } /* check the current CQ for new cqes */ while (npolled < num_entries) { ret = irdma_poll_one(ukcq, cur_cqe, entry + npolled); if (ret == -ENOENT) { ret = irdma_generated_cmpls(iwcq, cur_cqe); if (!ret) irdma_process_cqe(entry + npolled, cur_cqe); } if (!ret) { ++npolled; cq_new_cqe = true; continue; } if (ret == -ENOENT) break; /* QP using the CQ is destroyed. Skip reporting this CQE */ if (ret == -EFAULT) { cq_new_cqe = true; continue; } goto error; } if (cq_new_cqe) /* all previous CQ resizes are complete */ resized_bufs = irdma_process_resize_list(iwcq, iwdev, NULL); else if (last_buf) /* only CQ resizes up to the last_buf are complete */ resized_bufs = irdma_process_resize_list(iwcq, iwdev, last_buf); if (resized_bufs) /* report to the HW the number of complete CQ resizes */ irdma_uk_cq_set_resized_cnt(ukcq, resized_bufs); return npolled; error: irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, - "%s: Error polling CQ, irdma_err: %d\n", - __func__, ret); + "%s: Error polling CQ, irdma_err: %d\n", __func__, ret); return ret; } /** * irdma_poll_cq - poll cq for completion (kernel apps) * @ibcq: cq to poll * @num_entries: number of entries to poll * @entry: wr of a completed entry */ static int irdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) { struct irdma_cq *iwcq; unsigned long flags; int ret; iwcq = to_iwcq(ibcq); spin_lock_irqsave(&iwcq->lock, flags); ret = __irdma_poll_cq(iwcq, num_entries, entry); spin_unlock_irqrestore(&iwcq->lock, flags); return ret; } /** * irdma_req_notify_cq - arm cq kernel application * @ibcq: cq to arm * @notify_flags: notofication flags */ static int irdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) { struct irdma_cq *iwcq; struct irdma_cq_uk *ukcq; unsigned long flags; enum irdma_cmpl_notify cq_notify = IRDMA_CQ_COMPL_EVENT; bool promo_event = false; int ret = 0; iwcq = to_iwcq(ibcq); ukcq = &iwcq->sc_cq.cq_uk; spin_lock_irqsave(&iwcq->lock, flags); if (notify_flags == IB_CQ_SOLICITED) { cq_notify = IRDMA_CQ_COMPL_SOLICITED; } else { if (iwcq->last_notify == IRDMA_CQ_COMPL_SOLICITED) promo_event = true; } if (!atomic_cmpxchg(&iwcq->armed, 0, 1) || promo_event) { iwcq->last_notify = cq_notify; irdma_uk_cq_request_notification(ukcq, cq_notify); } if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && (!irdma_cq_empty(iwcq) || !list_empty(&iwcq->cmpl_generated))) ret = 1; spin_unlock_irqrestore(&iwcq->lock, flags); return ret; } /** * mcast_list_add - Add a new mcast item to list * @rf: RDMA PCI function * @new_elem: pointer to element to add */ static void mcast_list_add(struct irdma_pci_f *rf, struct mc_table_list *new_elem) { list_add(&new_elem->list, &rf->mc_qht_list.list); } /** * mcast_list_del - Remove an mcast item from list * @mc_qht_elem: pointer to mcast table list element */ static void mcast_list_del(struct mc_table_list *mc_qht_elem) { if (mc_qht_elem) list_del(&mc_qht_elem->list); } /** * mcast_list_lookup_ip - Search mcast list for address * @rf: RDMA PCI function * @ip_mcast: pointer to mcast IP address */ static struct mc_table_list * mcast_list_lookup_ip(struct irdma_pci_f *rf, u32 *ip_mcast) { struct mc_table_list *mc_qht_el; struct list_head *pos, *q; list_for_each_safe(pos, q, &rf->mc_qht_list.list) { mc_qht_el = list_entry(pos, struct mc_table_list, list); if (!memcmp(mc_qht_el->mc_info.dest_ip, ip_mcast, sizeof(mc_qht_el->mc_info.dest_ip))) return mc_qht_el; } return NULL; } /** * irdma_mcast_cqp_op - perform a mcast cqp operation * @iwdev: irdma device * @mc_grp_ctx: mcast group info * @op: operation * * returns error status */ static int irdma_mcast_cqp_op(struct irdma_device *iwdev, struct irdma_mcast_grp_info *mc_grp_ctx, u8 op) { struct cqp_cmds_info *cqp_info; struct irdma_cqp_request *cqp_request; int status; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) return -ENOMEM; cqp_request->info.in.u.mc_create.info = *mc_grp_ctx; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = op; cqp_info->post_sq = 1; cqp_info->in.u.mc_create.scratch = (uintptr_t)cqp_request; cqp_info->in.u.mc_create.cqp = &iwdev->rf->cqp.sc_cqp; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); return status; } /** * irdma_attach_mcast - attach a qp to a multicast group * @ibqp: ptr to qp * @ibgid: pointer to global ID * @lid: local ID * * returns error status */ static int irdma_attach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid) { struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_device *iwdev = iwqp->iwdev; struct irdma_pci_f *rf = iwdev->rf; struct mc_table_list *mc_qht_elem; struct irdma_mcast_grp_ctx_entry_info mcg_info = {0}; unsigned long flags; u32 ip_addr[4] = {0}; u32 mgn; u32 no_mgs; int ret = 0; bool ipv4; u16 vlan_id; - union { - struct sockaddr saddr; - struct sockaddr_in saddr_in; - struct sockaddr_in6 saddr_in6; - } sgid_addr; - unsigned char dmac[ETH_ALEN]; + union irdma_sockaddr sgid_addr; + unsigned char dmac[ETHER_ADDR_LEN]; rdma_gid2ip((struct sockaddr *)&sgid_addr, ibgid); if (!ipv6_addr_v4mapped((struct in6_addr *)ibgid)) { irdma_copy_ip_ntohl(ip_addr, sgid_addr.saddr_in6.sin6_addr.__u6_addr.__u6_addr32); irdma_netdev_vlan_ipv6(ip_addr, &vlan_id, NULL); ipv4 = false; irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, - "qp_id=%d, IP6address=%pI6\n", - ibqp->qp_num, - ip_addr); + "qp_id=%d, IP6address=%x:%x:%x:%x\n", ibqp->qp_num, + IRDMA_PRINT_IP6(ip_addr)); irdma_mcast_mac_v6(ip_addr, dmac); } else { ip_addr[0] = ntohl(sgid_addr.saddr_in.sin_addr.s_addr); ipv4 = true; vlan_id = irdma_get_vlan_ipv4(ip_addr); irdma_mcast_mac_v4(ip_addr, dmac); irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, - "qp_id=%d, IP4address=%pI4, MAC=%pM\n", - ibqp->qp_num, ip_addr, dmac); + "qp_id=%d, IP4address=%x, MAC=%x:%x:%x:%x:%x:%x\n", + ibqp->qp_num, ip_addr[0], dmac[0], dmac[1], dmac[2], + dmac[3], dmac[4], dmac[5]); } spin_lock_irqsave(&rf->qh_list_lock, flags); mc_qht_elem = mcast_list_lookup_ip(rf, ip_addr); if (!mc_qht_elem) { struct irdma_dma_mem *dma_mem_mc; spin_unlock_irqrestore(&rf->qh_list_lock, flags); mc_qht_elem = kzalloc(sizeof(*mc_qht_elem), GFP_KERNEL); if (!mc_qht_elem) return -ENOMEM; mc_qht_elem->mc_info.ipv4_valid = ipv4; memcpy(mc_qht_elem->mc_info.dest_ip, ip_addr, sizeof(mc_qht_elem->mc_info.dest_ip)); ret = irdma_alloc_rsrc(rf, rf->allocated_mcgs, rf->max_mcg, &mgn, &rf->next_mcg); if (ret) { kfree(mc_qht_elem); return -ENOMEM; } mc_qht_elem->mc_info.mgn = mgn; dma_mem_mc = &mc_qht_elem->mc_grp_ctx.dma_mem_mc; dma_mem_mc->size = sizeof(u64)* IRDMA_MAX_MGS_PER_CTX; dma_mem_mc->va = irdma_allocate_dma_mem(&rf->hw, dma_mem_mc, dma_mem_mc->size, IRDMA_HW_PAGE_SIZE); if (!dma_mem_mc->va) { irdma_free_rsrc(rf, rf->allocated_mcgs, mgn); kfree(mc_qht_elem); return -ENOMEM; } mc_qht_elem->mc_grp_ctx.mg_id = (u16)mgn; memcpy(mc_qht_elem->mc_grp_ctx.dest_ip_addr, ip_addr, sizeof(mc_qht_elem->mc_grp_ctx.dest_ip_addr)); mc_qht_elem->mc_grp_ctx.ipv4_valid = ipv4; mc_qht_elem->mc_grp_ctx.vlan_id = vlan_id; if (vlan_id < VLAN_N_VID) mc_qht_elem->mc_grp_ctx.vlan_valid = true; mc_qht_elem->mc_grp_ctx.hmc_fcn_id = iwdev->rf->sc_dev.hmc_fn_id; mc_qht_elem->mc_grp_ctx.qs_handle = iwqp->sc_qp.vsi->qos[iwqp->sc_qp.user_pri].qs_handle; ether_addr_copy(mc_qht_elem->mc_grp_ctx.dest_mac_addr, dmac); spin_lock_irqsave(&rf->qh_list_lock, flags); mcast_list_add(rf, mc_qht_elem); } else { if (mc_qht_elem->mc_grp_ctx.no_of_mgs == IRDMA_MAX_MGS_PER_CTX) { spin_unlock_irqrestore(&rf->qh_list_lock, flags); return -ENOMEM; } } mcg_info.qp_id = iwqp->ibqp.qp_num; no_mgs = mc_qht_elem->mc_grp_ctx.no_of_mgs; irdma_sc_add_mcast_grp(&mc_qht_elem->mc_grp_ctx, &mcg_info); spin_unlock_irqrestore(&rf->qh_list_lock, flags); /* Only if there is a change do we need to modify or create */ if (!no_mgs) { ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx, IRDMA_OP_MC_CREATE); } else if (no_mgs != mc_qht_elem->mc_grp_ctx.no_of_mgs) { ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx, IRDMA_OP_MC_MODIFY); } else { return 0; } if (ret) goto error; return 0; error: irdma_sc_del_mcast_grp(&mc_qht_elem->mc_grp_ctx, &mcg_info); if (!mc_qht_elem->mc_grp_ctx.no_of_mgs) { mcast_list_del(mc_qht_elem); irdma_free_dma_mem(&rf->hw, &mc_qht_elem->mc_grp_ctx.dma_mem_mc); irdma_free_rsrc(rf, rf->allocated_mcgs, mc_qht_elem->mc_grp_ctx.mg_id); kfree(mc_qht_elem); } return ret; } /** * irdma_detach_mcast - detach a qp from a multicast group * @ibqp: ptr to qp * @ibgid: pointer to global ID * @lid: local ID * * returns error status */ static int irdma_detach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid) { struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_device *iwdev = iwqp->iwdev; struct irdma_pci_f *rf = iwdev->rf; u32 ip_addr[4] = {0}; struct mc_table_list *mc_qht_elem; struct irdma_mcast_grp_ctx_entry_info mcg_info = {0}; int ret; unsigned long flags; - union { - struct sockaddr saddr; - struct sockaddr_in saddr_in; - struct sockaddr_in6 saddr_in6; - } sgid_addr; + union irdma_sockaddr sgid_addr; rdma_gid2ip((struct sockaddr *)&sgid_addr, ibgid); if (!ipv6_addr_v4mapped((struct in6_addr *)ibgid)) irdma_copy_ip_ntohl(ip_addr, sgid_addr.saddr_in6.sin6_addr.__u6_addr.__u6_addr32); else ip_addr[0] = ntohl(sgid_addr.saddr_in.sin_addr.s_addr); spin_lock_irqsave(&rf->qh_list_lock, flags); mc_qht_elem = mcast_list_lookup_ip(rf, ip_addr); if (!mc_qht_elem) { spin_unlock_irqrestore(&rf->qh_list_lock, flags); - irdma_debug(&iwdev->rf->sc_dev, - IRDMA_DEBUG_VERBS, "address not found MCG\n"); + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, + "address not found MCG\n"); return 0; } mcg_info.qp_id = iwqp->ibqp.qp_num; irdma_sc_del_mcast_grp(&mc_qht_elem->mc_grp_ctx, &mcg_info); if (!mc_qht_elem->mc_grp_ctx.no_of_mgs) { mcast_list_del(mc_qht_elem); spin_unlock_irqrestore(&rf->qh_list_lock, flags); ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx, IRDMA_OP_MC_DESTROY); if (ret) { - irdma_debug(&iwdev->rf->sc_dev, - IRDMA_DEBUG_VERBS, "failed MC_DESTROY MCG\n"); + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, + "failed MC_DESTROY MCG\n"); spin_lock_irqsave(&rf->qh_list_lock, flags); mcast_list_add(rf, mc_qht_elem); spin_unlock_irqrestore(&rf->qh_list_lock, flags); return -EAGAIN; } irdma_free_dma_mem(&rf->hw, &mc_qht_elem->mc_grp_ctx.dma_mem_mc); irdma_free_rsrc(rf, rf->allocated_mcgs, mc_qht_elem->mc_grp_ctx.mg_id); kfree(mc_qht_elem); } else { spin_unlock_irqrestore(&rf->qh_list_lock, flags); ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx, IRDMA_OP_MC_MODIFY); if (ret) { - irdma_debug(&iwdev->rf->sc_dev, - IRDMA_DEBUG_VERBS, "failed Modify MCG\n"); + irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, + "failed Modify MCG\n"); return ret; } } return 0; } /** * irdma_query_ah - Query address handle * @ibah: pointer to address handle * @ah_attr: address handle attributes */ static int irdma_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { struct irdma_ah *ah = to_iwah(ibah); memset(ah_attr, 0, sizeof(*ah_attr)); if (ah->av.attrs.ah_flags & IB_AH_GRH) { ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.flow_label = ah->sc_ah.ah_info.flow_label; ah_attr->grh.traffic_class = ah->sc_ah.ah_info.tc_tos; ah_attr->grh.hop_limit = ah->sc_ah.ah_info.hop_ttl; ah_attr->grh.sgid_index = ah->sgid_index; ah_attr->grh.sgid_index = ah->sgid_index; memcpy(&ah_attr->grh.dgid, &ah->dgid, sizeof(ah_attr->grh.dgid)); } return 0; } static if_t irdma_get_netdev(struct ib_device *ibdev, u8 port_num) { struct irdma_device *iwdev = to_iwdev(ibdev); if (iwdev->netdev) { dev_hold(iwdev->netdev); return iwdev->netdev; } return NULL; } static void irdma_set_device_ops(struct ib_device *ibdev) { struct ib_device *dev_ops = ibdev; #if __FreeBSD_version >= 1400000 dev_ops->ops.driver_id = RDMA_DRIVER_I40IW; dev_ops->ops.size_ib_ah = IRDMA_SET_RDMA_OBJ_SIZE(ib_ah, irdma_ah, ibah); dev_ops->ops.size_ib_cq = IRDMA_SET_RDMA_OBJ_SIZE(ib_cq, irdma_cq, ibcq); dev_ops->ops.size_ib_pd = IRDMA_SET_RDMA_OBJ_SIZE(ib_pd, irdma_pd, ibpd); dev_ops->ops.size_ib_ucontext = IRDMA_SET_RDMA_OBJ_SIZE(ib_ucontext, irdma_ucontext, ibucontext); #endif /* __FreeBSD_version >= 1400000 */ dev_ops->alloc_hw_stats = irdma_alloc_hw_stats; dev_ops->alloc_mr = irdma_alloc_mr; dev_ops->alloc_mw = irdma_alloc_mw; dev_ops->alloc_pd = irdma_alloc_pd; dev_ops->alloc_ucontext = irdma_alloc_ucontext; dev_ops->create_cq = irdma_create_cq; dev_ops->create_qp = irdma_create_qp; dev_ops->dealloc_mw = irdma_dealloc_mw; dev_ops->dealloc_pd = irdma_dealloc_pd; dev_ops->dealloc_ucontext = irdma_dealloc_ucontext; dev_ops->dereg_mr = irdma_dereg_mr; dev_ops->destroy_cq = irdma_destroy_cq; dev_ops->destroy_qp = irdma_destroy_qp; dev_ops->disassociate_ucontext = irdma_disassociate_ucontext; dev_ops->get_dev_fw_str = irdma_get_dev_fw_str; dev_ops->get_dma_mr = irdma_get_dma_mr; dev_ops->get_hw_stats = irdma_get_hw_stats; dev_ops->get_netdev = irdma_get_netdev; dev_ops->map_mr_sg = irdma_map_mr_sg; dev_ops->mmap = irdma_mmap; #if __FreeBSD_version >= 1400026 dev_ops->mmap_free = irdma_mmap_free; #endif dev_ops->poll_cq = irdma_poll_cq; dev_ops->post_recv = irdma_post_recv; dev_ops->post_send = irdma_post_send; dev_ops->query_device = irdma_query_device; dev_ops->query_port = irdma_query_port; dev_ops->modify_port = irdma_modify_port; dev_ops->query_qp = irdma_query_qp; dev_ops->reg_user_mr = irdma_reg_user_mr; dev_ops->rereg_user_mr = irdma_rereg_user_mr; dev_ops->req_notify_cq = irdma_req_notify_cq; dev_ops->resize_cq = irdma_resize_cq; } static void irdma_set_device_mcast_ops(struct ib_device *ibdev) { struct ib_device *dev_ops = ibdev; dev_ops->attach_mcast = irdma_attach_mcast; dev_ops->detach_mcast = irdma_detach_mcast; } static void irdma_set_device_roce_ops(struct ib_device *ibdev) { struct ib_device *dev_ops = ibdev; dev_ops->create_ah = irdma_create_ah; dev_ops->destroy_ah = irdma_destroy_ah; dev_ops->get_link_layer = irdma_get_link_layer; dev_ops->get_port_immutable = irdma_roce_port_immutable; dev_ops->modify_qp = irdma_modify_qp_roce; dev_ops->query_ah = irdma_query_ah; dev_ops->query_gid = irdma_query_gid_roce; dev_ops->query_pkey = irdma_query_pkey; ibdev->add_gid = irdma_add_gid; ibdev->del_gid = irdma_del_gid; } static void irdma_set_device_iw_ops(struct ib_device *ibdev) { struct ib_device *dev_ops = ibdev; ibdev->uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_CREATE_AH) | (1ull << IB_USER_VERBS_CMD_DESTROY_AH); dev_ops->create_ah = irdma_create_ah_stub; dev_ops->destroy_ah = irdma_destroy_ah_stub; dev_ops->get_port_immutable = irdma_iw_port_immutable; dev_ops->modify_qp = irdma_modify_qp; dev_ops->query_gid = irdma_query_gid; dev_ops->query_pkey = irdma_iw_query_pkey; } static inline void irdma_set_device_gen1_ops(struct ib_device *ibdev) { } /** * irdma_init_roce_device - initialization of roce rdma device * @iwdev: irdma device */ static void irdma_init_roce_device(struct irdma_device *iwdev) { kc_set_roce_uverbs_cmd_mask(iwdev); iwdev->ibdev.node_type = RDMA_NODE_IB_CA; addrconf_addr_eui48((u8 *)&iwdev->ibdev.node_guid, if_getlladdr(iwdev->netdev)); irdma_set_device_roce_ops(&iwdev->ibdev); if (iwdev->rf->rdma_ver == IRDMA_GEN_2) irdma_set_device_mcast_ops(&iwdev->ibdev); } /** * irdma_init_iw_device - initialization of iwarp rdma device * @iwdev: irdma device */ static int irdma_init_iw_device(struct irdma_device *iwdev) { if_t netdev = iwdev->netdev; iwdev->ibdev.node_type = RDMA_NODE_RNIC; addrconf_addr_eui48((u8 *)&iwdev->ibdev.node_guid, if_getlladdr(netdev)); iwdev->ibdev.iwcm = kzalloc(sizeof(*iwdev->ibdev.iwcm), GFP_KERNEL); if (!iwdev->ibdev.iwcm) return -ENOMEM; iwdev->ibdev.iwcm->add_ref = irdma_qp_add_ref; iwdev->ibdev.iwcm->rem_ref = irdma_qp_rem_ref; iwdev->ibdev.iwcm->get_qp = irdma_get_qp; iwdev->ibdev.iwcm->connect = irdma_connect; iwdev->ibdev.iwcm->accept = irdma_accept; iwdev->ibdev.iwcm->reject = irdma_reject; iwdev->ibdev.iwcm->create_listen = irdma_create_listen; iwdev->ibdev.iwcm->destroy_listen = irdma_destroy_listen; memcpy(iwdev->ibdev.iwcm->ifname, if_name(netdev), sizeof(iwdev->ibdev.iwcm->ifname)); irdma_set_device_iw_ops(&iwdev->ibdev); return 0; } /** * irdma_init_rdma_device - initialization of rdma device * @iwdev: irdma device */ static int irdma_init_rdma_device(struct irdma_device *iwdev) { int ret; iwdev->ibdev.owner = THIS_MODULE; iwdev->ibdev.uverbs_abi_ver = IRDMA_ABI_VER; kc_set_rdma_uverbs_cmd_mask(iwdev); if (iwdev->roce_mode) { irdma_init_roce_device(iwdev); } else { ret = irdma_init_iw_device(iwdev); if (ret) return ret; } iwdev->ibdev.phys_port_cnt = 1; iwdev->ibdev.num_comp_vectors = iwdev->rf->ceqs_count; iwdev->ibdev.dev.parent = iwdev->rf->dev_ctx.dev; set_ibdev_dma_device(iwdev->ibdev, &iwdev->rf->pcidev->dev); irdma_set_device_ops(&iwdev->ibdev); if (iwdev->rf->rdma_ver == IRDMA_GEN_1) irdma_set_device_gen1_ops(&iwdev->ibdev); return 0; } /** * irdma_port_ibevent - indicate port event * @iwdev: irdma device */ void irdma_port_ibevent(struct irdma_device *iwdev) { struct ib_event event; event.device = &iwdev->ibdev; event.element.port_num = 1; event.event = iwdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; ib_dispatch_event(&event); } /** * irdma_ib_unregister_device - unregister rdma device from IB * core * @iwdev: irdma device */ void irdma_ib_unregister_device(struct irdma_device *iwdev) { iwdev->iw_status = 0; irdma_port_ibevent(iwdev); ib_unregister_device(&iwdev->ibdev); dev_put(iwdev->netdev); kfree(iwdev->ibdev.iwcm); iwdev->ibdev.iwcm = NULL; } /** * irdma_ib_register_device - register irdma device to IB core * @iwdev: irdma device */ int irdma_ib_register_device(struct irdma_device *iwdev) { int ret; ret = irdma_init_rdma_device(iwdev); if (ret) return ret; dev_hold(iwdev->netdev); sprintf(iwdev->ibdev.name, "irdma-%s", if_name(iwdev->netdev)); ret = ib_register_device(&iwdev->ibdev, NULL); if (ret) goto error; iwdev->iw_status = 1; irdma_port_ibevent(iwdev); return 0; error: kfree(iwdev->ibdev.iwcm); iwdev->ibdev.iwcm = NULL; irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "Register RDMA device fail\n"); return ret; } diff --git a/sys/dev/irdma/irdma_verbs.h b/sys/dev/irdma/irdma_verbs.h index fd96d6950b34..2457572583db 100644 --- a/sys/dev/irdma/irdma_verbs.h +++ b/sys/dev/irdma/irdma_verbs.h @@ -1,357 +1,420 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2015 - 2022 Intel Corporation + * Copyright (c) 2015 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef IRDMA_VERBS_H #define IRDMA_VERBS_H #define IRDMA_MAX_SAVED_PHY_PGADDR 4 #define IRDMA_FLUSH_DELAY_MS 20 #define IRDMA_PKEY_TBL_SZ 1 #define IRDMA_DEFAULT_PKEY 0xFFFF +#define IRDMA_SHADOW_PGCNT 1 + #define iwdev_to_idev(iwdev) (&(iwdev)->rf->sc_dev) struct irdma_ucontext { struct ib_ucontext ibucontext; struct irdma_device *iwdev; #if __FreeBSD_version >= 1400026 struct rdma_user_mmap_entry *db_mmap_entry; #else struct irdma_user_mmap_entry *db_mmap_entry; DECLARE_HASHTABLE(mmap_hash_tbl, 6); spinlock_t mmap_tbl_lock; /* protect mmap hash table entries */ #endif struct list_head cq_reg_mem_list; spinlock_t cq_reg_mem_list_lock; /* protect CQ memory list */ struct list_head qp_reg_mem_list; spinlock_t qp_reg_mem_list_lock; /* protect QP memory list */ /* FIXME: Move to kcompat ideally. Used < 4.20.0 for old diassasscoaite flow */ struct list_head vma_list; struct mutex vma_list_mutex; /* protect the vma_list */ int abi_ver; bool legacy_mode:1; bool use_raw_attrs:1; }; struct irdma_pd { struct ib_pd ibpd; struct irdma_sc_pd sc_pd; struct list_head udqp_list; spinlock_t udqp_list_lock; }; +union irdma_sockaddr { + struct sockaddr_in saddr_in; + struct sockaddr_in6 saddr_in6; +}; + struct irdma_av { u8 macaddr[16]; struct ib_ah_attr attrs; - union { - struct sockaddr saddr; - struct sockaddr_in saddr_in; - struct sockaddr_in6 saddr_in6; - } sgid_addr, dgid_addr; + union irdma_sockaddr sgid_addr; + union irdma_sockaddr dgid_addr; u8 net_type; }; struct irdma_ah { struct ib_ah ibah; struct irdma_sc_ah sc_ah; struct irdma_pd *pd; struct irdma_av av; u8 sgid_index; union ib_gid dgid; }; struct irdma_hmc_pble { union { u32 idx; dma_addr_t addr; }; }; struct irdma_cq_mr { struct irdma_hmc_pble cq_pbl; dma_addr_t shadow; bool split; }; struct irdma_qp_mr { struct irdma_hmc_pble sq_pbl; struct irdma_hmc_pble rq_pbl; dma_addr_t shadow; struct page *sq_page; }; struct irdma_cq_buf { struct irdma_dma_mem kmem_buf; struct irdma_cq_uk cq_uk; struct irdma_hw *hw; struct list_head list; struct work_struct work; }; struct irdma_pbl { struct list_head list; union { struct irdma_qp_mr qp_mr; struct irdma_cq_mr cq_mr; }; bool pbl_allocated:1; bool on_list:1; u64 user_base; struct irdma_pble_alloc pble_alloc; struct irdma_mr *iwmr; }; struct irdma_mr { union { struct ib_mr ibmr; struct ib_mw ibmw; }; struct ib_umem *region; int access; u8 is_hwreg; u16 type; u32 page_cnt; u64 page_size; u64 page_msk; u32 npages; u32 stag; u64 len; u64 pgaddrmem[IRDMA_MAX_SAVED_PHY_PGADDR]; struct irdma_pbl iwpbl; }; struct irdma_cq { struct ib_cq ibcq; struct irdma_sc_cq sc_cq; u16 cq_head; u16 cq_size; u16 cq_num; bool user_mode; atomic_t armed; enum irdma_cmpl_notify last_notify; u32 polled_cmpls; u32 cq_mem_size; struct irdma_dma_mem kmem; struct irdma_dma_mem kmem_shadow; struct completion free_cq; atomic_t refcnt; spinlock_t lock; /* for poll cq */ struct irdma_pbl *iwpbl; struct irdma_pbl *iwpbl_shadow; struct list_head resize_list; struct irdma_cq_poll_info cur_cqe; struct list_head cmpl_generated; }; struct irdma_cmpl_gen { struct list_head list; struct irdma_cq_poll_info cpi; }; struct disconn_work { struct work_struct work; struct irdma_qp *iwqp; }; struct if_notify_work { struct work_struct work; struct irdma_device *iwdev; u32 ipaddr[4]; u16 vlan_id; bool ipv4:1; bool ifup:1; }; struct iw_cm_id; struct irdma_qp_kmode { struct irdma_dma_mem dma_mem; u32 *sig_trk_mem; struct irdma_sq_uk_wr_trk_info *sq_wrid_mem; u64 *rq_wrid_mem; }; struct irdma_qp { struct ib_qp ibqp; struct irdma_sc_qp sc_qp; struct irdma_device *iwdev; struct irdma_cq *iwscq; struct irdma_cq *iwrcq; struct irdma_pd *iwpd; #if __FreeBSD_version >= 1400026 struct rdma_user_mmap_entry *push_wqe_mmap_entry; struct rdma_user_mmap_entry *push_db_mmap_entry; #else struct irdma_user_mmap_entry *push_wqe_mmap_entry; struct irdma_user_mmap_entry *push_db_mmap_entry; #endif struct irdma_qp_host_ctx_info ctx_info; union { struct irdma_iwarp_offload_info iwarp_info; struct irdma_roce_offload_info roce_info; }; union { struct irdma_tcp_offload_info tcp_info; struct irdma_udp_offload_info udp_info; }; struct irdma_ah roce_ah; struct list_head teardown_entry; struct list_head ud_list_elem; atomic_t refcnt; struct iw_cm_id *cm_id; struct irdma_cm_node *cm_node; struct delayed_work dwork_flush; struct ib_mr *lsmm_mr; atomic_t hw_mod_qp_pend; enum ib_qp_state ibqp_state; u32 qp_mem_size; u32 last_aeq; int max_send_wr; int max_recv_wr; atomic_t close_timer_started; spinlock_t lock; /* serialize posting WRs to SQ/RQ */ + spinlock_t dwork_flush_lock; /* protect mod_delayed_work */ struct irdma_qp_context *iwqp_context; void *pbl_vbase; dma_addr_t pbl_pbase; struct page *page; u8 iwarp_state; u16 term_sq_flush_code; u16 term_rq_flush_code; u8 hw_iwarp_state; u8 hw_tcp_state; struct irdma_qp_kmode kqp; struct irdma_dma_mem host_ctx; struct timer_list terminate_timer; struct irdma_pbl *iwpbl; struct irdma_sge *sg_list; struct irdma_dma_mem q2_ctx_mem; struct irdma_dma_mem ietf_mem; struct completion free_qp; wait_queue_head_t waitq; wait_queue_head_t mod_qp_waitq; u8 rts_ae_rcvd; - u8 active_conn : 1; - u8 user_mode : 1; - u8 hte_added : 1; - u8 flush_issued : 1; - u8 sig_all : 1; - u8 pau_mode : 1; + bool active_conn:1; + bool user_mode:1; + bool hte_added:1; + bool flush_issued:1; + bool sig_all:1; + bool pau_mode:1; + bool suspend_pending:1; }; struct irdma_udqs_work { struct work_struct work; struct irdma_qp *iwqp; u8 user_prio; bool qs_change:1; }; enum irdma_mmap_flag { IRDMA_MMAP_IO_NC, IRDMA_MMAP_IO_WC, }; struct irdma_user_mmap_entry { #if __FreeBSD_version >= 1400026 struct rdma_user_mmap_entry rdma_entry; #else struct irdma_ucontext *ucontext; struct hlist_node hlist; u64 pgoff_key; /* Used to compute offset (in bytes) returned to user libc's mmap */ #endif u64 bar_offset; u8 mmap_flag; }; static inline u16 irdma_fw_major_ver(struct irdma_sc_dev *dev) { return (u16)FIELD_GET(IRDMA_FW_VER_MAJOR, dev->feature_info[IRDMA_FEATURE_FW_INFO]); } static inline u16 irdma_fw_minor_ver(struct irdma_sc_dev *dev) { return (u16)FIELD_GET(IRDMA_FW_VER_MINOR, dev->feature_info[IRDMA_FEATURE_FW_INFO]); } +static inline void set_ib_wc_op_sq(struct irdma_cq_poll_info *cq_poll_info, + struct ib_wc *entry) +{ + struct irdma_sc_qp *qp; + + switch (cq_poll_info->op_type) { + case IRDMA_OP_TYPE_RDMA_WRITE: + case IRDMA_OP_TYPE_RDMA_WRITE_SOL: + entry->opcode = IB_WC_RDMA_WRITE; + break; + case IRDMA_OP_TYPE_RDMA_READ_INV_STAG: + case IRDMA_OP_TYPE_RDMA_READ: + entry->opcode = IB_WC_RDMA_READ; + break; + case IRDMA_OP_TYPE_SEND_SOL: + case IRDMA_OP_TYPE_SEND_SOL_INV: + case IRDMA_OP_TYPE_SEND_INV: + case IRDMA_OP_TYPE_SEND: + entry->opcode = IB_WC_SEND; + break; + case IRDMA_OP_TYPE_FAST_REG_NSMR: + entry->opcode = IB_WC_REG_MR; + break; + case IRDMA_OP_TYPE_INV_STAG: + entry->opcode = IB_WC_LOCAL_INV; + break; + default: + qp = cq_poll_info->qp_handle; + irdma_dev_err(to_ibdev(qp->dev), "Invalid opcode = %d in CQE\n", + cq_poll_info->op_type); + entry->status = IB_WC_GENERAL_ERR; + } +} + +static inline void set_ib_wc_op_rq(struct irdma_cq_poll_info *cq_poll_info, + struct ib_wc *entry, bool send_imm_support) +{ + /** + * iWARP does not support sendImm, so the presence of Imm data + * must be WriteImm. + */ + if (!send_imm_support) { + entry->opcode = cq_poll_info->imm_valid ? + IB_WC_RECV_RDMA_WITH_IMM : + IB_WC_RECV; + return; + } + switch (cq_poll_info->op_type) { + case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: + case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: + entry->opcode = IB_WC_RECV_RDMA_WITH_IMM; + break; + default: + entry->opcode = IB_WC_RECV; + } +} + /** * irdma_mcast_mac_v4 - Get the multicast MAC for an IP address * @ip_addr: IPv4 address * @mac: pointer to result MAC address * */ static inline void irdma_mcast_mac_v4(u32 *ip_addr, u8 *mac) { u8 *ip = (u8 *)ip_addr; - unsigned char mac4[ETH_ALEN] = {0x01, 0x00, 0x5E, ip[2] & 0x7F, ip[1], + unsigned char mac4[ETHER_ADDR_LEN] = {0x01, 0x00, 0x5E, ip[2] & 0x7F, ip[1], ip[0]}; ether_addr_copy(mac, mac4); } /** * irdma_mcast_mac_v6 - Get the multicast MAC for an IP address * @ip_addr: IPv6 address * @mac: pointer to result MAC address * */ static inline void irdma_mcast_mac_v6(u32 *ip_addr, u8 *mac) { u8 *ip = (u8 *)ip_addr; - unsigned char mac6[ETH_ALEN] = {0x33, 0x33, ip[3], ip[2], ip[1], ip[0]}; + unsigned char mac6[ETHER_ADDR_LEN] = {0x33, 0x33, ip[3], ip[2], ip[1], ip[0]}; ether_addr_copy(mac, mac6); } #if __FreeBSD_version >= 1400026 struct rdma_user_mmap_entry* irdma_user_mmap_entry_insert(struct irdma_ucontext *ucontext, u64 bar_offset, enum irdma_mmap_flag mmap_flag, u64 *mmap_offset); #else struct irdma_user_mmap_entry * irdma_user_mmap_entry_add_hash(struct irdma_ucontext *ucontext, u64 bar_offset, enum irdma_mmap_flag mmap_flag, u64 *mmap_offset); void irdma_user_mmap_entry_del_hash(struct irdma_user_mmap_entry *entry); #endif int irdma_ib_register_device(struct irdma_device *iwdev); void irdma_ib_unregister_device(struct irdma_device *iwdev); void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event); void irdma_generate_flush_completions(struct irdma_qp *iwqp); void irdma_remove_cmpls_list(struct irdma_cq *iwcq); int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info); void irdma_sched_qp_flush_work(struct irdma_qp *iwqp); void irdma_flush_worker(struct work_struct *work); #endif /* IRDMA_VERBS_H */ diff --git a/sys/dev/irdma/irdma_ws.c b/sys/dev/irdma/irdma_ws.c index a9a0bd38e1b2..a2afba5c738a 100644 --- a/sys/dev/irdma/irdma_ws.c +++ b/sys/dev/irdma/irdma_ws.c @@ -1,444 +1,444 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2017 - 2022 Intel Corporation + * Copyright (c) 2017 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "osdep.h" #include "irdma_hmc.h" #include "irdma_defs.h" #include "irdma_type.h" #include "irdma_protos.h" #include "irdma_ws.h" /** * irdma_alloc_node - Allocate a WS node and init * @vsi: vsi pointer * @user_pri: user priority * @node_type: Type of node, leaf or parent * @parent: parent node pointer */ static struct irdma_ws_node * irdma_alloc_node(struct irdma_sc_vsi *vsi, u8 user_pri, enum irdma_ws_node_type node_type, struct irdma_ws_node *parent) { struct irdma_virt_mem ws_mem; struct irdma_ws_node *node; u16 node_index = 0; ws_mem.size = sizeof(struct irdma_ws_node); ws_mem.va = kzalloc(ws_mem.size, GFP_KERNEL); if (!ws_mem.va) return NULL; if (parent) { node_index = irdma_alloc_ws_node_id(vsi->dev); if (node_index == IRDMA_WS_NODE_INVALID) { kfree(ws_mem.va); return NULL; } } node = ws_mem.va; node->index = node_index; node->vsi_index = vsi->vsi_idx; INIT_LIST_HEAD(&node->child_list_head); if (node_type == WS_NODE_TYPE_LEAF) { node->type_leaf = true; node->traffic_class = vsi->qos[user_pri].traffic_class; node->user_pri = user_pri; node->rel_bw = vsi->qos[user_pri].rel_bw; if (!node->rel_bw) node->rel_bw = 1; node->prio_type = IRDMA_PRIO_WEIGHTED_RR; } else { node->rel_bw = 1; node->prio_type = IRDMA_PRIO_WEIGHTED_RR; node->enable = true; } node->parent = parent; return node; } /** * irdma_free_node - Free a WS node * @vsi: VSI stricture of device * @node: Pointer to node to free */ static void irdma_free_node(struct irdma_sc_vsi *vsi, struct irdma_ws_node *node) { struct irdma_virt_mem ws_mem; if (node->index) irdma_free_ws_node_id(vsi->dev, node->index); ws_mem.va = node; ws_mem.size = sizeof(struct irdma_ws_node); kfree(ws_mem.va); } /** * irdma_ws_cqp_cmd - Post CQP work scheduler node cmd * @vsi: vsi pointer * @node: pointer to node * @cmd: add, remove or modify */ static int irdma_ws_cqp_cmd(struct irdma_sc_vsi *vsi, struct irdma_ws_node *node, u8 cmd) { struct irdma_ws_node_info node_info = {0}; node_info.id = node->index; node_info.vsi = node->vsi_index; if (node->parent) node_info.parent_id = node->parent->index; else node_info.parent_id = node_info.id; node_info.weight = node->rel_bw; node_info.tc = node->traffic_class; node_info.prio_type = node->prio_type; node_info.type_leaf = node->type_leaf; node_info.enable = node->enable; if (irdma_cqp_ws_node_cmd(vsi->dev, cmd, &node_info)) { irdma_debug(vsi->dev, IRDMA_DEBUG_WS, "CQP WS CMD failed\n"); return -ENOMEM; } if (node->type_leaf && cmd == IRDMA_OP_WS_ADD_NODE) { node->qs_handle = node_info.qs_handle; vsi->qos[node->user_pri].qs_handle = node_info.qs_handle; } return 0; } /** * ws_find_node - Find SC WS node based on VSI id or TC * @parent: parent node of First VSI or TC node * @match_val: value to match * @type: match type VSI/TC */ static struct irdma_ws_node * ws_find_node(struct irdma_ws_node *parent, u16 match_val, enum irdma_ws_match_type type) { struct irdma_ws_node *node; switch (type) { case WS_MATCH_TYPE_VSI: list_for_each_entry(node, &parent->child_list_head, siblings) { if (node->vsi_index == match_val) return node; } break; case WS_MATCH_TYPE_TC: list_for_each_entry(node, &parent->child_list_head, siblings) { if (node->traffic_class == match_val) return node; } break; default: break; } return NULL; } /** * irdma_ws_in_use - Checks to see if a leaf node is in use * @vsi: vsi pointer * @user_pri: user priority */ static bool irdma_ws_in_use(struct irdma_sc_vsi *vsi, u8 user_pri) { int i; mutex_lock(&vsi->qos[user_pri].qos_mutex); if (!list_empty(&vsi->qos[user_pri].qplist)) { mutex_unlock(&vsi->qos[user_pri].qos_mutex); return true; } /* * Check if the qs handle associated with the given user priority is in use by any other user priority. If so, * nothing left to do */ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { if (vsi->qos[i].qs_handle == vsi->qos[user_pri].qs_handle && !list_empty(&vsi->qos[i].qplist)) { mutex_unlock(&vsi->qos[user_pri].qos_mutex); return true; } } mutex_unlock(&vsi->qos[user_pri].qos_mutex); return false; } /** * irdma_remove_leaf - Remove leaf node unconditionally * @vsi: vsi pointer * @user_pri: user priority */ static void irdma_remove_leaf(struct irdma_sc_vsi *vsi, u8 user_pri) { struct irdma_ws_node *ws_tree_root, *vsi_node, *tc_node; u16 qs_handle; int i; qs_handle = vsi->qos[user_pri].qs_handle; for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) if (vsi->qos[i].qs_handle == qs_handle) vsi->qos[i].valid = false; ws_tree_root = vsi->dev->ws_tree_root; if (!ws_tree_root) return; vsi_node = ws_find_node(ws_tree_root, vsi->vsi_idx, WS_MATCH_TYPE_VSI); if (!vsi_node) return; tc_node = ws_find_node(vsi_node, vsi->qos[user_pri].traffic_class, WS_MATCH_TYPE_TC); if (!tc_node) return; irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_DELETE_NODE); vsi->unregister_qset(vsi, tc_node); list_del(&tc_node->siblings); irdma_free_node(vsi, tc_node); /* Check if VSI node can be freed */ if (list_empty(&vsi_node->child_list_head)) { irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_DELETE_NODE); list_del(&vsi_node->siblings); irdma_free_node(vsi, vsi_node); /* Free head node there are no remaining VSI nodes */ if (list_empty(&ws_tree_root->child_list_head)) { irdma_ws_cqp_cmd(vsi, ws_tree_root, IRDMA_OP_WS_DELETE_NODE); irdma_free_node(vsi, ws_tree_root); vsi->dev->ws_tree_root = NULL; } } } /** * irdma_ws_add - Build work scheduler tree, set RDMA qs_handle * @vsi: vsi pointer * @user_pri: user priority */ int irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) { struct irdma_ws_node *ws_tree_root; struct irdma_ws_node *vsi_node; struct irdma_ws_node *tc_node; u16 traffic_class; int ret = 0; int i; mutex_lock(&vsi->dev->ws_mutex); if (vsi->tc_change_pending) { ret = -EBUSY; goto exit; } if (vsi->qos[user_pri].valid) goto exit; ws_tree_root = vsi->dev->ws_tree_root; if (!ws_tree_root) { - irdma_debug(vsi->dev, IRDMA_DEBUG_WS, "Creating root node\n"); ws_tree_root = irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_PARENT, NULL); if (!ws_tree_root) { ret = -ENOMEM; goto exit; } + irdma_debug(vsi->dev, IRDMA_DEBUG_WS, "Creating root node = %d\n", ws_tree_root->index); ret = irdma_ws_cqp_cmd(vsi, ws_tree_root, IRDMA_OP_WS_ADD_NODE); if (ret) { irdma_free_node(vsi, ws_tree_root); goto exit; } vsi->dev->ws_tree_root = ws_tree_root; } /* Find a second tier node that matches the VSI */ vsi_node = ws_find_node(ws_tree_root, vsi->vsi_idx, WS_MATCH_TYPE_VSI); /* If VSI node doesn't exist, add one */ if (!vsi_node) { irdma_debug(vsi->dev, IRDMA_DEBUG_WS, "Node not found matching VSI %d\n", vsi->vsi_idx); vsi_node = irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_PARENT, ws_tree_root); if (!vsi_node) { ret = -ENOMEM; goto vsi_add_err; } ret = irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_ADD_NODE); if (ret) { irdma_free_node(vsi, vsi_node); goto vsi_add_err; } list_add(&vsi_node->siblings, &ws_tree_root->child_list_head); } irdma_debug(vsi->dev, IRDMA_DEBUG_WS, "Using node %d which represents VSI %d\n", vsi_node->index, vsi->vsi_idx); traffic_class = vsi->qos[user_pri].traffic_class; tc_node = ws_find_node(vsi_node, traffic_class, WS_MATCH_TYPE_TC); if (!tc_node) { /* Add leaf node */ irdma_debug(vsi->dev, IRDMA_DEBUG_WS, "Node not found matching VSI %d and TC %d\n", vsi->vsi_idx, traffic_class); tc_node = irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_LEAF, vsi_node); if (!tc_node) { ret = -ENOMEM; goto leaf_add_err; } ret = irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_ADD_NODE); if (ret) { irdma_free_node(vsi, tc_node); goto leaf_add_err; } list_add(&tc_node->siblings, &vsi_node->child_list_head); /* * callback to LAN to update the LAN tree with our node */ ret = vsi->register_qset(vsi, tc_node); if (ret) goto reg_err; tc_node->enable = true; ret = irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_MODIFY_NODE); if (ret) { vsi->unregister_qset(vsi, tc_node); goto reg_err; } } irdma_debug(vsi->dev, IRDMA_DEBUG_WS, "Using node %d which represents VSI %d TC %d\n", tc_node->index, vsi->vsi_idx, traffic_class); /* * Iterate through other UPs and update the QS handle if they have a matching traffic class. */ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { if (vsi->qos[i].traffic_class == traffic_class) { vsi->qos[i].qs_handle = tc_node->qs_handle; vsi->qos[i].l2_sched_node_id = tc_node->l2_sched_node_id; vsi->qos[i].valid = true; } } goto exit; reg_err: irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_DELETE_NODE); list_del(&tc_node->siblings); irdma_free_node(vsi, tc_node); leaf_add_err: if (list_empty(&vsi_node->child_list_head)) { if (irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_DELETE_NODE)) goto exit; list_del(&vsi_node->siblings); irdma_free_node(vsi, vsi_node); } vsi_add_err: /* Free head node there are no remaining VSI nodes */ if (list_empty(&ws_tree_root->child_list_head)) { irdma_ws_cqp_cmd(vsi, ws_tree_root, IRDMA_OP_WS_DELETE_NODE); vsi->dev->ws_tree_root = NULL; irdma_free_node(vsi, ws_tree_root); } exit: mutex_unlock(&vsi->dev->ws_mutex); return ret; } /** * irdma_ws_remove - Free WS scheduler node, update WS tree * @vsi: vsi pointer * @user_pri: user priority */ void irdma_ws_remove(struct irdma_sc_vsi *vsi, u8 user_pri) { mutex_lock(&vsi->dev->ws_mutex); if (irdma_ws_in_use(vsi, user_pri)) goto exit; irdma_remove_leaf(vsi, user_pri); exit: mutex_unlock(&vsi->dev->ws_mutex); } /** * irdma_ws_reset - Reset entire WS tree * @vsi: vsi pointer */ void irdma_ws_reset(struct irdma_sc_vsi *vsi) { u8 i; mutex_lock(&vsi->dev->ws_mutex); for (i = 0; i < IRDMA_MAX_USER_PRIORITY; ++i) irdma_remove_leaf(vsi, i); mutex_unlock(&vsi->dev->ws_mutex); } diff --git a/sys/dev/irdma/osdep.h b/sys/dev/irdma/osdep.h index d5566c5d4883..5e6a76f6a186 100644 --- a/sys/dev/irdma/osdep.h +++ b/sys/dev/irdma/osdep.h @@ -1,250 +1,241 @@ /*- * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB * - * Copyright (c) 2021 - 2022 Intel Corporation + * Copyright (c) 2021 - 2023 Intel Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenFabrics.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef _ICRDMA_OSDEP_H_ #define _ICRDMA_OSDEP_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include -#define ATOMIC atomic_t #define IOMEM #define IRDMA_NTOHS(a) ntohs(a) #define MAKEMASK(m, s) ((m) << (s)) #define OS_TIMER timer_list #define DECLARE_HASHTABLE(n, b) struct hlist_head (n)[1 << (b)] #define HASH_MIN(v, b) (sizeof(v) <= 4 ? hash_32(v, b) : hash_long(v, b)) #define HASH_FOR_EACH_RCU(n, b, o, m) for ((b) = 0, o = NULL; o == NULL && (b) < ARRAY_SIZE(n);\ (b)++)\ hlist_for_each_entry_rcu(o, &n[(b)], m) #define HASH_FOR_EACH_POSSIBLE_RCU(n, o, m, k) \ hlist_for_each_entry_rcu(o, &n[jhash(&k, sizeof(k), 0) >> (32 - ilog2(ARRAY_SIZE(n)))],\ m) #define HASH_FOR_EACH_POSSIBLE(n, o, m, k) \ hlist_for_each_entry(o, &n[jhash(&k, sizeof(k), 0) >> (32 - ilog2(ARRAY_SIZE(n)))],\ m) #define HASH_ADD_RCU(h, n, k) \ hlist_add_head_rcu(n, &h[jhash(&k, sizeof(k), 0) >> (32 - ilog2(ARRAY_SIZE(h)))]) #define HASH_DEL_RCU(tbl, node) hlist_del_rcu(node) #define HASH_ADD(h, n, k) \ hlist_add_head(n, &h[jhash(&k, sizeof(k), 0) >> (32 - ilog2(ARRAY_SIZE(h)))]) #define HASH_DEL(tbl, node) hlist_del(node) #define WQ_UNBOUND_MAX_ACTIVE max_t(int, 512, num_possible_cpus() * 4) #define if_addr_rlock(x) #define if_addr_runlock(x) /* constants */ #define STATS_TIMER_DELAY 60000 /* a couple of linux size defines */ #define SZ_128 128 #define SPEED_1000 1000 #define SPEED_10000 10000 #define SPEED_20000 20000 #define SPEED_25000 25000 #define SPEED_40000 40000 #define SPEED_100000 100000 #define irdma_mb() mb() #define irdma_wmb() wmb() +#ifndef smp_mb +#define smp_mb() mb() +#endif #define irdma_get_virt_to_phy vtophys #define __aligned_u64 uint64_t __aligned(8) #define VLAN_PRIO_SHIFT 13 #if __FreeBSD_version < 1400000 #define IB_USER_VERBS_EX_CMD_MODIFY_QP IB_USER_VERBS_CMD_MODIFY_QP #endif /* * debug definition section */ #define irdma_print(S, ...) printf("%s:%d "S, __FUNCTION__, __LINE__, ##__VA_ARGS__) #define irdma_debug_buf(dev, mask, desc, buf, size) \ do { \ - u32 i; \ + u32 i; \ if (!((mask) & (dev)->debug_mask)) { \ break; \ } \ irdma_debug(dev, mask, "%s\n", desc); \ irdma_debug(dev, mask, "starting address virt=%p phy=%lxh\n", buf, irdma_get_virt_to_phy(buf)); \ for (i = 0; i < size ; i += 8) \ irdma_debug(dev, mask, "index %03d val: %016lx\n", i, ((unsigned long *)(buf))[i / 8]); \ } while(0) -#define irdma_debug(h, m, s, ...) \ -do { \ - if (!(h)) { \ - if ((m) == IRDMA_DEBUG_INIT) \ +#define irdma_debug(h, m, s, ...) \ +do { \ + if (!(h)) { \ + if ((m) == IRDMA_DEBUG_INIT) \ printf("irdma INIT " s, ##__VA_ARGS__); \ - } else if (((m) & (h)->debug_mask)) { \ - printf("irdma " s, ##__VA_ARGS__); \ - } \ + } else if (((m) & (h)->debug_mask)) { \ + printf("irdma " s, ##__VA_ARGS__); \ + } \ } while (0) #define irdma_dev_err(ibdev, fmt, ...) \ pr_err("%s:%s:%d ERR "fmt, (ibdev)->name, __func__, __LINE__, ##__VA_ARGS__) #define irdma_dev_warn(ibdev, fmt, ...) \ pr_warn("%s:%s:%d WARN "fmt, (ibdev)->name, __func__, __LINE__, ##__VA_ARGS__) #define irdma_dev_info(a, b, ...) printf(b, ##__VA_ARGS__) #define irdma_pr_warn printf -#define dump_struct(s, sz, name) \ -do { \ - unsigned char *a; \ - printf("%s %u", (name), (unsigned int)(sz)); \ - for (a = (unsigned char*)(s); a < (unsigned char *)(s) + (sz) ; a ++) { \ - if ((u64)a % 8 == 0) \ - printf("\n%p ", a); \ - printf("%2x ", *a); \ - } \ - printf("\n"); \ -}while(0) +#define IRDMA_PRINT_IP6(ip6) \ + ((u32*)ip6)[0], ((u32*)ip6)[1], ((u32*)ip6)[2], ((u32*)ip6)[3] /* * debug definition end */ typedef __be16 BE16; typedef __be32 BE32; typedef uintptr_t irdma_uintptr; struct irdma_hw; struct irdma_pci_f; struct irdma_sc_dev; struct irdma_sc_qp; struct irdma_sc_vsi; struct irdma_task_arg { struct irdma_device *iwdev; struct ice_rdma_peer *peer; atomic_t open_ongoing; atomic_t close_ongoing; }; struct irdma_dev_ctx { bus_space_tag_t mem_bus_space_tag; bus_space_handle_t mem_bus_space_handle; bus_size_t mem_bus_space_size; void *dev; struct irdma_task_arg task_arg; + atomic_t event_rfcnt; }; #define irdma_pr_info(fmt, args ...) printf("%s: WARN "fmt, __func__, ## args) #define irdma_pr_err(fmt, args ...) printf("%s: ERR "fmt, __func__, ## args) #define irdma_memcpy(a, b, c) memcpy((a), (b), (c)) #define irdma_memset(a, b, c) memset((a), (b), (c)) #define irdma_usec_delay(x) DELAY(x) #define mdelay(x) DELAY((x) * 1000) #define rt_tos2priority(tos) (tos >> 5) #define ah_attr_to_dmac(attr) ((attr).dmac) -#define kc_ib_modify_qp_is_ok(cur_state, next_state, type, mask, ll) \ - ib_modify_qp_is_ok(cur_state, next_state, type, mask) -#define kc_rdma_gid_attr_network_type(sgid_attr, gid_type, gid) \ - ib_gid_to_network_type(gid_type, gid) #define irdma_del_timer_compat(tt) del_timer((tt)) #define IRDMA_TAILQ_FOREACH CK_STAILQ_FOREACH #define IRDMA_TAILQ_FOREACH_SAFE CK_STAILQ_FOREACH_SAFE #define between(a, b, c) (bool)(c-a >= b-a) -#define rd32(a, reg) irdma_rd32((a)->dev_context, (reg)) -#define wr32(a, reg, value) irdma_wr32((a)->dev_context, (reg), (value)) +#define rd32(a, reg) irdma_rd32((a)->dev_context, (reg)) +#define wr32(a, reg, value) irdma_wr32((a)->dev_context, (reg), (value)) -#define rd64(a, reg) irdma_rd64((a)->dev_context, (reg)) -#define wr64(a, reg, value) irdma_wr64((a)->dev_context, (reg), (value)) +#define rd64(a, reg) irdma_rd64((a)->dev_context, (reg)) +#define wr64(a, reg, value) irdma_wr64((a)->dev_context, (reg), (value)) #define db_wr32(value, a) writel((value), (a)) void *hw_to_dev(struct irdma_hw *hw); struct irdma_dma_mem { - void *va; - u64 pa; + void *va; + u64 pa; bus_dma_tag_t tag; bus_dmamap_t map; bus_dma_segment_t seg; bus_size_t size; - int nseg; - int flags; + int nseg; + int flags; }; struct irdma_virt_mem { - void *va; - u32 size; + void *va; + u32 size; }; struct irdma_dma_info { dma_addr_t *dmaaddrs; }; struct list_head; u32 irdma_rd32(struct irdma_dev_ctx *dev_ctx, u32 reg); void irdma_wr32(struct irdma_dev_ctx *dev_ctx, u32 reg, u32 value); u64 irdma_rd64(struct irdma_dev_ctx *dev_ctx, u32 reg); void irdma_wr64(struct irdma_dev_ctx *dev_ctx, u32 reg, u64 value); void irdma_term_modify_qp(struct irdma_sc_qp *qp, u8 next_state, u8 term, u8 term_len); void irdma_terminate_done(struct irdma_sc_qp *qp, int timeout_occurred); void irdma_terminate_start_timer(struct irdma_sc_qp *qp); void irdma_terminate_del_timer(struct irdma_sc_qp *qp); void irdma_hw_stats_start_timer(struct irdma_sc_vsi *vsi); void irdma_hw_stats_stop_timer(struct irdma_sc_vsi *vsi); void irdma_send_ieq_ack(struct irdma_sc_qp *qp); u8* irdma_get_hw_addr(void *par); void irdma_unmap_vm_page_list(struct irdma_hw *hw, u64 *pg_arr, u32 pg_cnt); int irdma_map_vm_page_list(struct irdma_hw *hw, void *va, u64 *pg_arr, u32 pg_cnt); struct ib_device *to_ibdev(struct irdma_sc_dev *dev); #endif /* _ICRDMA_OSDEP_H_ */