diff --git a/sys/dev/mlx5/cq.h b/sys/dev/mlx5/cq.h index 55386422efc2..d5e167498fd2 100644 --- a/sys/dev/mlx5/cq.h +++ b/sys/dev/mlx5/cq.h @@ -1,175 +1,175 @@ /*- * Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef MLX5_CORE_CQ_H #define MLX5_CORE_CQ_H #include #include #include struct mlx5_eqe; struct mlx5_core_cq { u32 cqn; int cqe_sz; __be32 *set_ci_db; __be32 *arm_db; unsigned vector; int irqn; void (*comp) (struct mlx5_core_cq *, struct mlx5_eqe *); void (*event) (struct mlx5_core_cq *, int); - struct mlx5_uar *uar; + struct mlx5_uars_page *uar; u32 cons_index; unsigned arm_sn; struct mlx5_rsc_debug *dbg; int pid; int reset_notify_added; struct list_head reset_notify; }; enum { MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR = 0x01, MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR = 0x02, MLX5_CQE_SYNDROME_LOCAL_PROT_ERR = 0x04, MLX5_CQE_SYNDROME_WR_FLUSH_ERR = 0x05, MLX5_CQE_SYNDROME_MW_BIND_ERR = 0x06, MLX5_CQE_SYNDROME_BAD_RESP_ERR = 0x10, MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR = 0x11, MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12, MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR = 0x13, MLX5_CQE_SYNDROME_REMOTE_OP_ERR = 0x14, MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR = 0x15, MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR = 0x16, MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR = 0x22, }; enum { MLX5_CQE_OWNER_MASK = 1, MLX5_CQE_REQ = 0, MLX5_CQE_RESP_WR_IMM = 1, MLX5_CQE_RESP_SEND = 2, MLX5_CQE_RESP_SEND_IMM = 3, MLX5_CQE_RESP_SEND_INV = 4, MLX5_CQE_RESIZE_CQ = 5, MLX5_CQE_SIG_ERR = 12, MLX5_CQE_REQ_ERR = 13, MLX5_CQE_RESP_ERR = 14, MLX5_CQE_INVALID = 15, }; enum { MLX5_CQ_MODIFY_PERIOD = 1 << 0, MLX5_CQ_MODIFY_COUNT = 1 << 1, MLX5_CQ_MODIFY_OVERRUN = 1 << 2, MLX5_CQ_MODIFY_PERIOD_MODE = 1 << 4, }; enum { MLX5_CQ_OPMOD_RESIZE = 1, MLX5_MODIFY_CQ_MASK_LOG_SIZE = 1 << 0, MLX5_MODIFY_CQ_MASK_PG_OFFSET = 1 << 1, MLX5_MODIFY_CQ_MASK_PG_SIZE = 1 << 2, }; struct mlx5_cq_modify_params { int type; union { struct { u32 page_offset; u8 log_cq_size; } resize; struct { } moder; struct { } mapping; } params; }; static inline int cqe_sz_to_mlx_sz(u8 size) { return size == 64 ? CQE_SIZE_64 : CQE_SIZE_128; } static inline void mlx5_cq_set_ci(struct mlx5_core_cq *cq) { *cq->set_ci_db = cpu_to_be32(cq->cons_index & 0xffffff); } enum { MLX5_CQ_DB_REQ_NOT_SOL = 1 << 24, MLX5_CQ_DB_REQ_NOT = 0 << 24 }; static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd, void __iomem *uar_page, spinlock_t *doorbell_lock, u32 cons_index) { __be32 doorbell[2]; u32 sn; u32 ci; sn = cq->arm_sn & 3; ci = cons_index & 0xffffff; *cq->arm_db = cpu_to_be32(sn << 28 | cmd | ci); /* Make sure that the doorbell record in host memory is * written before ringing the doorbell via PCI MMIO. */ wmb(); doorbell[0] = cpu_to_be32(sn << 28 | cmd | ci); doorbell[1] = cpu_to_be32(cq->cqn); mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, doorbell_lock); } int mlx5_init_cq_table(struct mlx5_core_dev *dev); void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev); int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen, u32 *out, int outlen); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *out, int outlen); int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen); int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u16 cq_period, u16 cq_max_count); int mlx5_core_modify_cq_moderation_mode(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u16 cq_period, u16 cq_max_count, u8 cq_mode); int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); #endif /* MLX5_CORE_CQ_H */ diff --git a/sys/dev/mlx5/device.h b/sys/dev/mlx5/device.h index 3431193cdafd..69057d5f2470 100644 --- a/sys/dev/mlx5/device.h +++ b/sys/dev/mlx5/device.h @@ -1,1265 +1,1262 @@ /*- * Copyright (c) 2013-2019, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef MLX5_DEVICE_H #define MLX5_DEVICE_H #include #include #include #define FW_INIT_TIMEOUT_MILI 2000 #define FW_INIT_WAIT_MS 2 #define FW_PRE_INIT_TIMEOUT_MILI 120000 #define FW_INIT_WARN_MESSAGE_INTERVAL 20000 #if defined(__LITTLE_ENDIAN) #define MLX5_SET_HOST_ENDIANNESS 0 #elif defined(__BIG_ENDIAN) #define MLX5_SET_HOST_ENDIANNESS 0x80 #else #error Host endianness not defined #endif /* helper macros */ #define __mlx5_nullp(typ) ((struct mlx5_ifc_##typ##_bits *)0) #define __mlx5_bit_sz(typ, fld) sizeof(__mlx5_nullp(typ)->fld) #define __mlx5_bit_off(typ, fld) __offsetof(struct mlx5_ifc_##typ##_bits, fld) #define __mlx5_16_off(typ, fld) (__mlx5_bit_off(typ, fld) / 16) #define __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32) #define __mlx5_64_off(typ, fld) (__mlx5_bit_off(typ, fld) / 64) #define __mlx5_16_bit_off(typ, fld) (16 - __mlx5_bit_sz(typ, fld) - (__mlx5_bit_off(typ, fld) & 0xf)) #define __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - (__mlx5_bit_off(typ, fld) & 0x1f)) #define __mlx5_mask(typ, fld) ((u32)((1ull << __mlx5_bit_sz(typ, fld)) - 1)) #define __mlx5_dw_mask(typ, fld) (__mlx5_mask(typ, fld) << __mlx5_dw_bit_off(typ, fld)) #define __mlx5_mask16(typ, fld) ((u16)((1ull << __mlx5_bit_sz(typ, fld)) - 1)) #define __mlx5_16_mask(typ, fld) (__mlx5_mask16(typ, fld) << __mlx5_16_bit_off(typ, fld)) #define __mlx5_st_sz_bits(typ) sizeof(struct mlx5_ifc_##typ##_bits) #define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8) #define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8) #define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32) #define MLX5_ST_SZ_QW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 64) #define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8) #define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32) #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8) #define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld)) /* insert a value to a struct */ #define MLX5_SET(typ, p, fld, v) do { \ BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32); \ BUILD_BUG_ON(__mlx5_bit_sz(typ, fld) > 32); \ *((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \ cpu_to_be32((be32_to_cpu(*((__be32 *)(p) + __mlx5_dw_off(typ, fld))) & \ (~__mlx5_dw_mask(typ, fld))) | (((v) & __mlx5_mask(typ, fld)) \ << __mlx5_dw_bit_off(typ, fld))); \ } while (0) #define MLX5_SET_TO_ONES(typ, p, fld) do { \ BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32); \ BUILD_BUG_ON(__mlx5_bit_sz(typ, fld) > 32); \ *((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \ cpu_to_be32((be32_to_cpu(*((__be32 *)(p) + __mlx5_dw_off(typ, fld))) & \ (~__mlx5_dw_mask(typ, fld))) | ((__mlx5_mask(typ, fld)) \ << __mlx5_dw_bit_off(typ, fld))); \ } while (0) #define MLX5_GET(typ, p, fld) ((be32_to_cpu(*((__be32 *)(p) +\ __mlx5_dw_off(typ, fld))) >> __mlx5_dw_bit_off(typ, fld)) & \ __mlx5_mask(typ, fld)) #define MLX5_GET_PR(typ, p, fld) ({ \ u32 ___t = MLX5_GET(typ, p, fld); \ pr_debug(#fld " = 0x%x\n", ___t); \ ___t; \ }) #define __MLX5_SET64(typ, p, fld, v) do { \ BUILD_BUG_ON(__mlx5_bit_sz(typ, fld) != 64); \ *((__be64 *)(p) + __mlx5_64_off(typ, fld)) = cpu_to_be64(v); \ } while (0) #define MLX5_SET64(typ, p, fld, v) do { \ BUILD_BUG_ON(__mlx5_bit_off(typ, fld) % 64); \ __MLX5_SET64(typ, p, fld, v); \ } while (0) #define MLX5_ARRAY_SET64(typ, p, fld, idx, v) do { \ BUILD_BUG_ON(__mlx5_bit_off(typ, fld) % 64); \ __MLX5_SET64(typ, p, fld[idx], v); \ } while (0) #define MLX5_GET64(typ, p, fld) be64_to_cpu(*((__be64 *)(p) + __mlx5_64_off(typ, fld))) #define MLX5_GET16(typ, p, fld) ((be16_to_cpu(*((__be16 *)(p) +\ __mlx5_16_off(typ, fld))) >> __mlx5_16_bit_off(typ, fld)) & \ __mlx5_mask16(typ, fld)) #define MLX5_SET16(typ, p, fld, v) do { \ u16 _v = v; \ BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 16); \ *((__be16 *)(p) + __mlx5_16_off(typ, fld)) = \ cpu_to_be16((be16_to_cpu(*((__be16 *)(p) + __mlx5_16_off(typ, fld))) & \ (~__mlx5_16_mask(typ, fld))) | (((_v) & __mlx5_mask16(typ, fld)) \ << __mlx5_16_bit_off(typ, fld))); \ } while (0) #define MLX5_GET64_BE(typ, p, fld) (*((__be64 *)(p) +\ __mlx5_64_off(typ, fld))) #define MLX5_GET_BE(type_t, typ, p, fld) ({ \ type_t tmp; \ switch (sizeof(tmp)) { \ case sizeof(u8): \ tmp = (__force type_t)MLX5_GET(typ, p, fld); \ break; \ case sizeof(u16): \ tmp = (__force type_t)cpu_to_be16(MLX5_GET(typ, p, fld)); \ break; \ case sizeof(u32): \ tmp = (__force type_t)cpu_to_be32(MLX5_GET(typ, p, fld)); \ break; \ case sizeof(u64): \ tmp = (__force type_t)MLX5_GET64_BE(typ, p, fld); \ break; \ } \ tmp; \ }) #define MLX5_BY_PASS_NUM_REGULAR_PRIOS 8 #define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 8 #define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1 #define MLX5_BY_PASS_NUM_PRIOS (MLX5_BY_PASS_NUM_REGULAR_PRIOS +\ MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS +\ MLX5_BY_PASS_NUM_MULTICAST_PRIOS) /* insert a value to a struct */ #define MLX5_VSC_SET(typ, p, fld, v) do { \ BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32); \ BUILD_BUG_ON(__mlx5_bit_sz(typ, fld) > 32); \ *((__le32 *)(p) + __mlx5_dw_off(typ, fld)) = \ cpu_to_le32((le32_to_cpu(*((__le32 *)(p) + __mlx5_dw_off(typ, fld))) & \ (~__mlx5_dw_mask(typ, fld))) | (((v) & __mlx5_mask(typ, fld)) \ << __mlx5_dw_bit_off(typ, fld))); \ } while (0) #define MLX5_VSC_GET(typ, p, fld) ((le32_to_cpu(*((__le32 *)(p) +\ __mlx5_dw_off(typ, fld))) >> __mlx5_dw_bit_off(typ, fld)) & \ __mlx5_mask(typ, fld)) #define MLX5_VSC_GET_PR(typ, p, fld) ({ \ u32 ___t = MLX5_VSC_GET(typ, p, fld); \ pr_debug(#fld " = 0x%x\n", ___t); \ ___t; \ }) enum { MLX5_MAX_COMMANDS = 32, MLX5_CMD_DATA_BLOCK_SIZE = 512, MLX5_CMD_MBOX_SIZE = 1024, MLX5_PCI_CMD_XPORT = 7, MLX5_MKEY_BSF_OCTO_SIZE = 4, MLX5_MAX_PSVS = 4, }; enum { MLX5_EXTENDED_UD_AV = 0x80000000, }; enum { MLX5_CQ_FLAGS_OI = 2, }; enum { MLX5_STAT_RATE_OFFSET = 5, }; enum { MLX5_INLINE_SEG = 0x80000000, }; enum { MLX5_HW_START_PADDING = MLX5_INLINE_SEG, }; enum { MLX5_MIN_PKEY_TABLE_SIZE = 128, MLX5_MAX_LOG_PKEY_TABLE = 5, }; enum { MLX5_MKEY_INBOX_PG_ACCESS = 1U << 31 }; enum { MLX5_PERM_LOCAL_READ = 1 << 2, MLX5_PERM_LOCAL_WRITE = 1 << 3, MLX5_PERM_REMOTE_READ = 1 << 4, MLX5_PERM_REMOTE_WRITE = 1 << 5, MLX5_PERM_ATOMIC = 1 << 6, MLX5_PERM_UMR_EN = 1 << 7, }; enum { MLX5_PCIE_CTRL_SMALL_FENCE = 1 << 0, MLX5_PCIE_CTRL_RELAXED_ORDERING = 1 << 2, MLX5_PCIE_CTRL_NO_SNOOP = 1 << 3, MLX5_PCIE_CTRL_TLP_PROCE_EN = 1 << 6, MLX5_PCIE_CTRL_TPH_MASK = 3 << 4, }; enum { MLX5_MKEY_REMOTE_INVAL = 1 << 24, MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29, MLX5_MKEY_BSF_EN = 1 << 30, MLX5_MKEY_LEN64 = 1U << 31, }; enum { MLX5_EN_RD = (u64)1, MLX5_EN_WR = (u64)2 }; enum { - MLX5_BF_REGS_PER_PAGE = 4, - MLX5_MAX_UAR_PAGES = 1 << 8, - MLX5_NON_FP_BF_REGS_PER_PAGE = 2, - MLX5_MAX_UUARS = MLX5_MAX_UAR_PAGES * MLX5_NON_FP_BF_REGS_PER_PAGE, + MLX5_ADAPTER_PAGE_SHIFT = 12, + MLX5_ADAPTER_PAGE_SIZE = 1 << MLX5_ADAPTER_PAGE_SHIFT, +}; + +enum { + MLX5_BFREGS_PER_UAR = 4, + MLX5_MAX_UARS = 1 << 8, + MLX5_NON_FP_BFREGS_PER_UAR = 2, + MLX5_FP_BFREGS_PER_UAR = MLX5_BFREGS_PER_UAR - + MLX5_NON_FP_BFREGS_PER_UAR, + MLX5_MAX_BFREGS = MLX5_MAX_UARS * + MLX5_NON_FP_BFREGS_PER_UAR, + MLX5_UARS_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE, + MLX5_NON_FP_BFREGS_IN_PAGE = MLX5_NON_FP_BFREGS_PER_UAR * MLX5_UARS_IN_PAGE, + MLX5_MIN_DYN_BFREGS = 512, + MLX5_MAX_DYN_BFREGS = 1024, }; enum { MLX5_MKEY_MASK_LEN = 1ull << 0, MLX5_MKEY_MASK_PAGE_SIZE = 1ull << 1, MLX5_MKEY_MASK_START_ADDR = 1ull << 6, MLX5_MKEY_MASK_PD = 1ull << 7, MLX5_MKEY_MASK_EN_RINVAL = 1ull << 8, MLX5_MKEY_MASK_EN_SIGERR = 1ull << 9, MLX5_MKEY_MASK_BSF_EN = 1ull << 12, MLX5_MKEY_MASK_KEY = 1ull << 13, MLX5_MKEY_MASK_QPN = 1ull << 14, MLX5_MKEY_MASK_LR = 1ull << 17, MLX5_MKEY_MASK_LW = 1ull << 18, MLX5_MKEY_MASK_RR = 1ull << 19, MLX5_MKEY_MASK_RW = 1ull << 20, MLX5_MKEY_MASK_A = 1ull << 21, MLX5_MKEY_MASK_SMALL_FENCE = 1ull << 23, MLX5_MKEY_MASK_FREE = 1ull << 29, }; enum { MLX5_UMR_TRANSLATION_OFFSET_EN = (1 << 4), MLX5_UMR_CHECK_NOT_FREE = (1 << 5), MLX5_UMR_CHECK_FREE = (2 << 5), MLX5_UMR_INLINE = (1 << 7), }; #define MLX5_UMR_MTT_ALIGNMENT 0x40 #define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1) #define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT enum { MLX5_EVENT_QUEUE_TYPE_QP = 0, MLX5_EVENT_QUEUE_TYPE_RQ = 1, MLX5_EVENT_QUEUE_TYPE_SQ = 2, }; enum { MLX5_PORT_CHANGE_SUBTYPE_DOWN = 1, MLX5_PORT_CHANGE_SUBTYPE_ACTIVE = 4, MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED = 5, MLX5_PORT_CHANGE_SUBTYPE_LID = 6, MLX5_PORT_CHANGE_SUBTYPE_PKEY = 7, MLX5_PORT_CHANGE_SUBTYPE_GUID = 8, MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG = 9, }; enum { MLX5_DCBX_EVENT_SUBTYPE_ERROR_STATE_DCBX = 1, MLX5_DCBX_EVENT_SUBTYPE_REMOTE_CONFIG_CHANGE, MLX5_DCBX_EVENT_SUBTYPE_LOCAL_OPER_CHANGE, MLX5_DCBX_EVENT_SUBTYPE_REMOTE_CONFIG_APP_PRIORITY_CHANGE, MLX5_MAX_INLINE_RECEIVE_SIZE = 64 }; enum { MLX5_DEV_CAP_FLAG_XRC = 1LL << 3, MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8, MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9, MLX5_DEV_CAP_FLAG_APM = 1LL << 17, MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD = 1LL << 21, MLX5_DEV_CAP_FLAG_BLOCK_MCAST = 1LL << 23, MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29, MLX5_DEV_CAP_FLAG_RESIZE_CQ = 1LL << 30, MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 33, MLX5_DEV_CAP_FLAG_ROCE = 1LL << 34, MLX5_DEV_CAP_FLAG_DCT = 1LL << 37, MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40, MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 3LL << 46, MLX5_DEV_CAP_FLAG_DRAIN_SIGERR = 1LL << 48, }; enum { MLX5_ROCE_VERSION_1 = 0, MLX5_ROCE_VERSION_1_5 = 1, MLX5_ROCE_VERSION_2 = 2, }; enum { MLX5_ROCE_VERSION_1_CAP = 1 << MLX5_ROCE_VERSION_1, MLX5_ROCE_VERSION_1_5_CAP = 1 << MLX5_ROCE_VERSION_1_5, MLX5_ROCE_VERSION_2_CAP = 1 << MLX5_ROCE_VERSION_2, }; enum { MLX5_ROCE_L3_TYPE_IPV4 = 0, MLX5_ROCE_L3_TYPE_IPV6 = 1, }; enum { MLX5_ROCE_L3_TYPE_IPV4_CAP = 1 << 1, MLX5_ROCE_L3_TYPE_IPV6_CAP = 1 << 2, }; enum { MLX5_OPCODE_NOP = 0x00, MLX5_OPCODE_SEND_INVAL = 0x01, MLX5_OPCODE_RDMA_WRITE = 0x08, MLX5_OPCODE_RDMA_WRITE_IMM = 0x09, MLX5_OPCODE_SEND = 0x0a, MLX5_OPCODE_SEND_IMM = 0x0b, MLX5_OPCODE_LSO = 0x0e, MLX5_OPCODE_RDMA_READ = 0x10, MLX5_OPCODE_ATOMIC_CS = 0x11, MLX5_OPCODE_ATOMIC_FA = 0x12, MLX5_OPCODE_ATOMIC_MASKED_CS = 0x14, MLX5_OPCODE_ATOMIC_MASKED_FA = 0x15, MLX5_OPCODE_BIND_MW = 0x18, MLX5_OPCODE_CONFIG_CMD = 0x1f, MLX5_OPCODE_DUMP = 0x23, MLX5_RECV_OPCODE_RDMA_WRITE_IMM = 0x00, MLX5_RECV_OPCODE_SEND = 0x01, MLX5_RECV_OPCODE_SEND_IMM = 0x02, MLX5_RECV_OPCODE_SEND_INVAL = 0x03, MLX5_CQE_OPCODE_ERROR = 0x1e, MLX5_CQE_OPCODE_RESIZE = 0x16, MLX5_OPCODE_SET_PSV = 0x20, MLX5_OPCODE_GET_PSV = 0x21, MLX5_OPCODE_CHECK_PSV = 0x22, MLX5_OPCODE_RGET_PSV = 0x26, MLX5_OPCODE_RCHECK_PSV = 0x27, MLX5_OPCODE_UMR = 0x25, MLX5_OPCODE_SIGNATURE_CANCELED = (1 << 15), }; enum { MLX5_OPCODE_MOD_UMR_UMR = 0x0, MLX5_OPCODE_MOD_UMR_TLS_TIS_STATIC_PARAMS = 0x1, MLX5_OPCODE_MOD_UMR_TLS_TIR_STATIC_PARAMS = 0x2, }; enum { MLX5_OPCODE_MOD_PSV_PSV = 0x0, MLX5_OPCODE_MOD_PSV_TLS_TIS_PROGRESS_PARAMS = 0x1, MLX5_OPCODE_MOD_PSV_TLS_TIR_PROGRESS_PARAMS = 0x2, }; enum { MLX5_SET_PORT_RESET_QKEY = 0, MLX5_SET_PORT_GUID0 = 16, MLX5_SET_PORT_NODE_GUID = 17, MLX5_SET_PORT_SYS_GUID = 18, MLX5_SET_PORT_GID_TABLE = 19, MLX5_SET_PORT_PKEY_TABLE = 20, }; enum { MLX5_MAX_PAGE_SHIFT = 31 }; -enum { - MLX5_ADAPTER_PAGE_SHIFT = 12, - MLX5_ADAPTER_PAGE_SIZE = 1 << MLX5_ADAPTER_PAGE_SHIFT, -}; - enum { MLX5_CAP_OFF_CMDIF_CSUM = 46, }; enum { /* * Max wqe size for rdma read is 512 bytes, so this * limits our max_sge_rd as the wqe needs to fit: * - ctrl segment (16 bytes) * - rdma segment (16 bytes) * - scatter elements (16 bytes each) */ MLX5_MAX_SGE_RD = (512 - 16 - 16) / 16 }; struct mlx5_cmd_layout { u8 type; u8 rsvd0[3]; __be32 inlen; __be64 in_ptr; __be32 in[4]; __be32 out[4]; __be64 out_ptr; __be32 outlen; u8 token; u8 sig; u8 rsvd1; u8 status_own; }; enum mlx5_fatal_assert_bit_offsets { MLX5_RFR_OFFSET = 31, }; struct mlx5_health_buffer { __be32 assert_var[5]; __be32 rsvd0[3]; __be32 assert_exit_ptr; __be32 assert_callra; __be32 rsvd1[2]; __be32 fw_ver; __be32 hw_id; __be32 rfr; u8 irisc_index; u8 synd; __be16 ext_synd; }; enum mlx5_initializing_bit_offsets { MLX5_FW_RESET_SUPPORTED_OFFSET = 30, }; enum mlx5_cmd_addr_l_sz_offset { MLX5_NIC_IFC_OFFSET = 8, }; struct mlx5_init_seg { __be32 fw_rev; __be32 cmdif_rev_fw_sub; __be32 rsvd0[2]; __be32 cmdq_addr_h; __be32 cmdq_addr_l_sz; __be32 cmd_dbell; __be32 rsvd1[120]; __be32 initializing; struct mlx5_health_buffer health; __be32 rsvd2[880]; __be32 internal_timer_h; __be32 internal_timer_l; __be32 rsvd3[2]; __be32 health_counter; __be32 rsvd4[1019]; __be64 ieee1588_clk; __be32 ieee1588_clk_type; __be32 clr_intx; }; struct mlx5_eqe_comp { __be32 reserved[6]; __be32 cqn; }; struct mlx5_eqe_qp_srq { __be32 reserved[6]; __be32 qp_srq_n; }; struct mlx5_eqe_cq_err { __be32 cqn; u8 reserved1[7]; u8 syndrome; }; struct mlx5_eqe_port_state { u8 reserved0[8]; u8 port; }; struct mlx5_eqe_gpio { __be32 reserved0[2]; __be64 gpio_event; }; struct mlx5_eqe_congestion { u8 type; u8 rsvd0; u8 congestion_level; }; struct mlx5_eqe_stall_vl { u8 rsvd0[3]; u8 port_vl; }; struct mlx5_eqe_cmd { __be32 vector; __be32 rsvd[6]; }; struct mlx5_eqe_page_req { u8 rsvd0[2]; __be16 func_id; __be32 num_pages; __be32 rsvd1[5]; }; struct mlx5_eqe_vport_change { u8 rsvd0[2]; __be16 vport_num; __be32 rsvd1[6]; }; #define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF #define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF enum { MLX5_MODULE_STATUS_PLUGGED_ENABLED = 0x1, MLX5_MODULE_STATUS_UNPLUGGED = 0x2, MLX5_MODULE_STATUS_ERROR = 0x3, MLX5_MODULE_STATUS_NUM , }; enum { MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED = 0x0, MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE = 0x1, MLX5_MODULE_EVENT_ERROR_BUS_STUCK = 0x2, MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT = 0x3, MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4, MLX5_MODULE_EVENT_ERROR_UNSUPPORTED_CABLE = 0x5, MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6, MLX5_MODULE_EVENT_ERROR_CABLE_IS_SHORTED = 0x7, MLX5_MODULE_EVENT_ERROR_PMD_TYPE_NOT_ENABLED = 0x8, MLX5_MODULE_EVENT_ERROR_LASTER_TEC_FAILURE = 0x9, MLX5_MODULE_EVENT_ERROR_HIGH_CURRENT = 0xa, MLX5_MODULE_EVENT_ERROR_HIGH_VOLTAGE = 0xb, MLX5_MODULE_EVENT_ERROR_PCIE_SYS_POWER_SLOT_EXCEEDED = 0xc, MLX5_MODULE_EVENT_ERROR_HIGH_POWER = 0xd, MLX5_MODULE_EVENT_ERROR_MODULE_STATE_MACHINE_FAULT = 0xe, MLX5_MODULE_EVENT_ERROR_NUM , }; struct mlx5_eqe_port_module_event { u8 rsvd0; u8 module; u8 rsvd1; u8 module_status; u8 rsvd2[2]; u8 error_type; }; struct mlx5_eqe_general_notification_event { u32 rq_user_index_delay_drop; u32 rsvd0[6]; }; struct mlx5_eqe_temp_warning { __be64 sensor_warning_msb; __be64 sensor_warning_lsb; } __packed; union ev_data { __be32 raw[7]; struct mlx5_eqe_cmd cmd; struct mlx5_eqe_comp comp; struct mlx5_eqe_qp_srq qp_srq; struct mlx5_eqe_cq_err cq_err; struct mlx5_eqe_port_state port; struct mlx5_eqe_gpio gpio; struct mlx5_eqe_congestion cong; struct mlx5_eqe_stall_vl stall_vl; struct mlx5_eqe_page_req req_pages; struct mlx5_eqe_port_module_event port_module_event; struct mlx5_eqe_vport_change vport_change; struct mlx5_eqe_general_notification_event general_notifications; struct mlx5_eqe_temp_warning temp_warning; } __packed; struct mlx5_eqe { u8 rsvd0; u8 type; u8 rsvd1; u8 sub_type; __be32 rsvd2[7]; union ev_data data; __be16 rsvd3; u8 signature; u8 owner; } __packed; struct mlx5_cmd_prot_block { u8 data[MLX5_CMD_DATA_BLOCK_SIZE]; u8 rsvd0[48]; __be64 next; __be32 block_num; u8 rsvd1; u8 token; u8 ctrl_sig; u8 sig; }; #define MLX5_NUM_CMDS_IN_ADAPTER_PAGE \ (MLX5_ADAPTER_PAGE_SIZE / MLX5_CMD_MBOX_SIZE) CTASSERT(MLX5_CMD_MBOX_SIZE >= sizeof(struct mlx5_cmd_prot_block)); CTASSERT(MLX5_CMD_MBOX_SIZE <= MLX5_ADAPTER_PAGE_SIZE); enum { MLX5_CQE_SYND_FLUSHED_IN_ERROR = 5, }; struct mlx5_err_cqe { u8 rsvd0[32]; __be32 srqn; u8 rsvd1[18]; u8 vendor_err_synd; u8 syndrome; __be32 s_wqe_opcode_qpn; __be16 wqe_counter; u8 signature; u8 op_own; }; struct mlx5_cqe64 { u8 tunneled_etc; u8 rsvd0[3]; u8 lro_tcppsh_abort_dupack; u8 lro_min_ttl; __be16 lro_tcp_win; __be32 lro_ack_seq_num; __be32 rss_hash_result; u8 rss_hash_type; u8 ml_path; u8 rsvd20[2]; __be16 check_sum; __be16 slid; __be32 flags_rqpn; u8 hds_ip_ext; u8 l4_hdr_type_etc; __be16 vlan_info; __be32 srqn; /* [31:24]: lro_num_seg, [23:0]: srqn */ __be32 imm_inval_pkey; u8 rsvd40[4]; __be32 byte_cnt; __be64 timestamp; __be32 sop_drop_qpn; __be16 wqe_counter; u8 signature; u8 op_own; }; #define MLX5_CQE_TSTMP_PTP (1ULL << 63) static inline bool get_cqe_lro_timestamp_valid(struct mlx5_cqe64 *cqe) { return (cqe->lro_tcppsh_abort_dupack >> 7) & 1; } static inline bool get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) { return (cqe->lro_tcppsh_abort_dupack >> 6) & 1; } static inline u8 get_cqe_l4_hdr_type(struct mlx5_cqe64 *cqe) { return (cqe->l4_hdr_type_etc >> 4) & 0x7; } static inline u16 get_cqe_vlan(struct mlx5_cqe64 *cqe) { return be16_to_cpu(cqe->vlan_info) & 0xfff; } static inline void get_cqe_smac(struct mlx5_cqe64 *cqe, u8 *smac) { memcpy(smac, &cqe->rss_hash_type , 4); memcpy(smac + 4, &cqe->slid , 2); } static inline bool cqe_has_vlan(struct mlx5_cqe64 *cqe) { return cqe->l4_hdr_type_etc & 0x1; } static inline bool cqe_is_tunneled(struct mlx5_cqe64 *cqe) { return cqe->tunneled_etc & 0x1; } enum { CQE_L4_HDR_TYPE_NONE = 0x0, CQE_L4_HDR_TYPE_TCP_NO_ACK = 0x1, CQE_L4_HDR_TYPE_UDP = 0x2, CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA = 0x3, CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA = 0x4, }; enum { /* source L3 hash types */ CQE_RSS_SRC_HTYPE_IP = 0x3 << 0, CQE_RSS_SRC_HTYPE_IPV4 = 0x1 << 0, CQE_RSS_SRC_HTYPE_IPV6 = 0x2 << 0, /* destination L3 hash types */ CQE_RSS_DST_HTYPE_IP = 0x3 << 2, CQE_RSS_DST_HTYPE_IPV4 = 0x1 << 2, CQE_RSS_DST_HTYPE_IPV6 = 0x2 << 2, /* source L4 hash types */ CQE_RSS_SRC_HTYPE_L4 = 0x3 << 4, CQE_RSS_SRC_HTYPE_TCP = 0x1 << 4, CQE_RSS_SRC_HTYPE_UDP = 0x2 << 4, CQE_RSS_SRC_HTYPE_IPSEC = 0x3 << 4, /* destination L4 hash types */ CQE_RSS_DST_HTYPE_L4 = 0x3 << 6, CQE_RSS_DST_HTYPE_TCP = 0x1 << 6, CQE_RSS_DST_HTYPE_UDP = 0x2 << 6, CQE_RSS_DST_HTYPE_IPSEC = 0x3 << 6, }; enum { MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH = 0x0, MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6 = 0x1, MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4 = 0x2, }; enum { CQE_L2_OK = 1 << 0, CQE_L3_OK = 1 << 1, CQE_L4_OK = 1 << 2, }; struct mlx5_sig_err_cqe { u8 rsvd0[16]; __be32 expected_trans_sig; __be32 actual_trans_sig; __be32 expected_reftag; __be32 actual_reftag; __be16 syndrome; u8 rsvd22[2]; __be32 mkey; __be64 err_offset; u8 rsvd30[8]; __be32 qpn; u8 rsvd38[2]; u8 signature; u8 op_own; }; struct mlx5_wqe_srq_next_seg { u8 rsvd0[2]; __be16 next_wqe_index; u8 signature; u8 rsvd1[11]; }; union mlx5_ext_cqe { struct ib_grh grh; u8 inl[64]; }; struct mlx5_cqe128 { union mlx5_ext_cqe inl_grh; struct mlx5_cqe64 cqe64; }; enum { MLX5_MKEY_STATUS_FREE = 1 << 6, }; struct mlx5_mkey_seg { /* This is a two bit field occupying bits 31-30. * bit 31 is always 0, * bit 30 is zero for regular MRs and 1 (e.g free) for UMRs that do not have tanslation */ u8 status; u8 pcie_control; u8 flags; u8 version; __be32 qpn_mkey7_0; u8 rsvd1[4]; __be32 flags_pd; __be64 start_addr; __be64 len; __be32 bsfs_octo_size; u8 rsvd2[16]; __be32 xlt_oct_size; u8 rsvd3[3]; u8 log2_page_size; u8 rsvd4[4]; }; #define MLX5_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) enum { MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO = 1 << 0 }; static inline int mlx5_host_is_le(void) { #if defined(__LITTLE_ENDIAN) return 1; #elif defined(__BIG_ENDIAN) return 0; #else #error Host endianness not defined #endif } #define MLX5_CMD_OP_MAX 0x939 enum { VPORT_STATE_DOWN = 0x0, VPORT_STATE_UP = 0x1, VPORT_STATE_FOLLOW = 0x2, }; enum { MLX5_L3_PROT_TYPE_IPV4 = 0, MLX5_L3_PROT_TYPE_IPV6 = 1, }; enum { MLX5_L4_PROT_TYPE_TCP = 0, MLX5_L4_PROT_TYPE_UDP = 1, }; enum { MLX5_HASH_FIELD_SEL_SRC_IP = 1 << 0, MLX5_HASH_FIELD_SEL_DST_IP = 1 << 1, MLX5_HASH_FIELD_SEL_L4_SPORT = 1 << 2, MLX5_HASH_FIELD_SEL_L4_DPORT = 1 << 3, MLX5_HASH_FIELD_SEL_IPSEC_SPI = 1 << 4, }; enum { MLX5_MATCH_OUTER_HEADERS = 1 << 0, MLX5_MATCH_MISC_PARAMETERS = 1 << 1, MLX5_MATCH_INNER_HEADERS = 1 << 2, }; enum { MLX5_FLOW_TABLE_TYPE_NIC_RCV = 0, MLX5_FLOW_TABLE_TYPE_EGRESS_ACL = 2, MLX5_FLOW_TABLE_TYPE_INGRESS_ACL = 3, MLX5_FLOW_TABLE_TYPE_ESWITCH = 4, MLX5_FLOW_TABLE_TYPE_SNIFFER_RX = 5, MLX5_FLOW_TABLE_TYPE_SNIFFER_TX = 6, MLX5_FLOW_TABLE_TYPE_NIC_RX_RDMA = 7, }; enum { MLX5_MODIFY_ESW_VPORT_CONTEXT_CVLAN_INSERT_NONE = 0, MLX5_MODIFY_ESW_VPORT_CONTEXT_CVLAN_INSERT_IF_NO_VLAN = 1, MLX5_MODIFY_ESW_VPORT_CONTEXT_CVLAN_INSERT_OVERWRITE = 2 }; enum { MLX5_MODIFY_ESW_VPORT_CONTEXT_FIELD_SELECT_SVLAN_STRIP = 1 << 0, MLX5_MODIFY_ESW_VPORT_CONTEXT_FIELD_SELECT_CVLAN_STRIP = 1 << 1, MLX5_MODIFY_ESW_VPORT_CONTEXT_FIELD_SELECT_SVLAN_INSERT = 1 << 2, MLX5_MODIFY_ESW_VPORT_CONTEXT_FIELD_SELECT_CVLAN_INSERT = 1 << 3 }; enum { MLX5_UC_ADDR_CHANGE = (1 << 0), MLX5_MC_ADDR_CHANGE = (1 << 1), MLX5_VLAN_CHANGE = (1 << 2), MLX5_PROMISC_CHANGE = (1 << 3), MLX5_MTU_CHANGE = (1 << 4), }; enum mlx5_list_type { MLX5_NIC_VPORT_LIST_TYPE_UC = 0x0, MLX5_NIC_VPORT_LIST_TYPE_MC = 0x1, MLX5_NIC_VPORT_LIST_TYPE_VLAN = 0x2, }; enum { MLX5_ESW_VPORT_ADMIN_STATE_DOWN = 0x0, MLX5_ESW_VPORT_ADMIN_STATE_UP = 0x1, MLX5_ESW_VPORT_ADMIN_STATE_AUTO = 0x2, }; /* MLX5 DEV CAPs */ /* TODO: EAT.ME */ enum mlx5_cap_mode { HCA_CAP_OPMOD_GET_MAX = 0, HCA_CAP_OPMOD_GET_CUR = 1, }; enum mlx5_cap_type { MLX5_CAP_GENERAL = 0, MLX5_CAP_ETHERNET_OFFLOADS, MLX5_CAP_ODP, MLX5_CAP_ATOMIC, MLX5_CAP_ROCE, MLX5_CAP_IPOIB_OFFLOADS, MLX5_CAP_EOIB_OFFLOADS, MLX5_CAP_FLOW_TABLE, MLX5_CAP_ESWITCH_FLOW_TABLE, MLX5_CAP_ESWITCH, MLX5_CAP_SNAPSHOT, MLX5_CAP_VECTOR_CALC, MLX5_CAP_QOS, MLX5_CAP_DEBUG, MLX5_CAP_NVME, MLX5_CAP_DMC, MLX5_CAP_DEC, MLX5_CAP_TLS, /* NUM OF CAP Types */ MLX5_CAP_NUM }; enum mlx5_qcam_reg_groups { MLX5_QCAM_REGS_FIRST_128 = 0x0, }; enum mlx5_qcam_feature_groups { MLX5_QCAM_FEATURE_ENHANCED_FEATURES = 0x0, }; enum mlx5_pcam_reg_groups { MLX5_PCAM_REGS_5000_TO_507F = 0x0, }; enum mlx5_pcam_feature_groups { MLX5_PCAM_FEATURE_ENHANCED_FEATURES = 0x0, }; enum mlx5_mcam_reg_groups { MLX5_MCAM_REGS_FIRST_128 = 0x0, }; enum mlx5_mcam_feature_groups { MLX5_MCAM_FEATURE_ENHANCED_FEATURES = 0x0, }; /* GET Dev Caps macros */ #define MLX5_CAP_GEN(mdev, cap) \ MLX5_GET(cmd_hca_cap, mdev->hca_caps_cur[MLX5_CAP_GENERAL], cap) #define MLX5_CAP_GEN_64(mdev, cap) \ MLX5_GET64(cmd_hca_cap, mdev->hca_caps_cur[MLX5_CAP_GENERAL], cap) #define MLX5_CAP_GEN_MAX(mdev, cap) \ MLX5_GET(cmd_hca_cap, mdev->hca_caps_max[MLX5_CAP_GENERAL], cap) #define MLX5_CAP_ETH(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ mdev->hca_caps_cur[MLX5_CAP_ETHERNET_OFFLOADS], cap) #define MLX5_CAP_ETH_MAX(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ mdev->hca_caps_max[MLX5_CAP_ETHERNET_OFFLOADS], cap) #define MLX5_CAP_ROCE(mdev, cap) \ MLX5_GET(roce_cap, mdev->hca_caps_cur[MLX5_CAP_ROCE], cap) #define MLX5_CAP_ROCE_MAX(mdev, cap) \ MLX5_GET(roce_cap, mdev->hca_caps_max[MLX5_CAP_ROCE], cap) #define MLX5_CAP_ATOMIC(mdev, cap) \ MLX5_GET(atomic_caps, mdev->hca_caps_cur[MLX5_CAP_ATOMIC], cap) #define MLX5_CAP_ATOMIC_MAX(mdev, cap) \ MLX5_GET(atomic_caps, mdev->hca_caps_max[MLX5_CAP_ATOMIC], cap) #define MLX5_CAP_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_nic_cap, mdev->hca_caps_cur[MLX5_CAP_FLOW_TABLE], cap) #define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \ MLX5_GET(flow_table_nic_cap, mdev->hca_caps_max[MLX5_CAP_FLOW_TABLE], cap) #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) #define MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->hca_caps_max[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) #define MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_nic_esw_fdb.cap) #define MLX5_CAP_ESW_FLOWTABLE_FDB_MAX(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_nic_esw_fdb.cap) #define MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_esw_acl_egress.cap) #define MLX5_CAP_ESW_EGRESS_ACL_MAX(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_esw_acl_egress.cap) #define MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_esw_acl_ingress.cap) #define MLX5_CAP_ESW_INGRESS_ACL_MAX(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_esw_acl_ingress.cap) #define MLX5_CAP_ESW(mdev, cap) \ MLX5_GET(e_switch_cap, \ mdev->hca_caps_cur[MLX5_CAP_ESWITCH], cap) #define MLX5_CAP_ESW_MAX(mdev, cap) \ MLX5_GET(e_switch_cap, \ mdev->hca_caps_max[MLX5_CAP_ESWITCH], cap) #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap) #define MLX5_CAP_ODP_MAX(mdev, cap)\ MLX5_GET(odp_cap, mdev->hca_caps_max[MLX5_CAP_ODP], cap) #define MLX5_CAP_SNAPSHOT(mdev, cap) \ MLX5_GET(snapshot_cap, \ mdev->hca_caps_cur[MLX5_CAP_SNAPSHOT], cap) #define MLX5_CAP_SNAPSHOT_MAX(mdev, cap) \ MLX5_GET(snapshot_cap, \ mdev->hca_caps_max[MLX5_CAP_SNAPSHOT], cap) #define MLX5_CAP_EOIB_OFFLOADS(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ mdev->hca_caps_cur[MLX5_CAP_EOIB_OFFLOADS], cap) #define MLX5_CAP_EOIB_OFFLOADS_MAX(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ mdev->hca_caps_max[MLX5_CAP_EOIB_OFFLOADS], cap) #define MLX5_CAP_DEBUG(mdev, cap) \ MLX5_GET(debug_cap, \ mdev->hca_caps_cur[MLX5_CAP_DEBUG], cap) #define MLX5_CAP_DEBUG_MAX(mdev, cap) \ MLX5_GET(debug_cap, \ mdev->hca_caps_max[MLX5_CAP_DEBUG], cap) #define MLX5_CAP_QOS(mdev, cap) \ MLX5_GET(qos_cap,\ mdev->hca_caps_cur[MLX5_CAP_QOS], cap) #define MLX5_CAP_QOS_MAX(mdev, cap) \ MLX5_GET(qos_cap,\ mdev->hca_caps_max[MLX5_CAP_QOS], cap) #define MLX5_CAP_PCAM_FEATURE(mdev, fld) \ MLX5_GET(pcam_reg, (mdev)->caps.pcam, feature_cap_mask.enhanced_features.fld) #define MLX5_CAP_PCAM_REG(mdev, reg) \ MLX5_GET(pcam_reg, (mdev)->caps.pcam, port_access_reg_cap_mask.regs_5000_to_507f.reg) #define MLX5_CAP_MCAM_FEATURE(mdev, fld) \ MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld) #define MLX5_CAP_MCAM_REG(mdev, reg) \ MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_access_reg_cap_mask.access_regs.reg) #define MLX5_CAP_QCAM_REG(mdev, fld) \ MLX5_GET(qcam_reg, (mdev)->caps.qcam, qos_access_reg_cap_mask.reg_cap.fld) #define MLX5_CAP_QCAM_FEATURE(mdev, fld) \ MLX5_GET(qcam_reg, (mdev)->caps.qcam, qos_feature_cap_mask.feature_cap.fld) #define MLX5_CAP_FPGA(mdev, cap) \ MLX5_GET(fpga_cap, (mdev)->caps.fpga, cap) #define MLX5_CAP64_FPGA(mdev, cap) \ MLX5_GET64(fpga_cap, (mdev)->caps.fpga, cap) #define MLX5_CAP_TLS(mdev, cap) \ MLX5_GET(tls_capabilities, (mdev)->hca_caps_cur[MLX5_CAP_TLS], cap) enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, MLX5_CMD_STAT_BAD_OP_ERR = 0x2, MLX5_CMD_STAT_BAD_PARAM_ERR = 0x3, MLX5_CMD_STAT_BAD_SYS_STATE_ERR = 0x4, MLX5_CMD_STAT_BAD_RES_ERR = 0x5, MLX5_CMD_STAT_RES_BUSY = 0x6, MLX5_CMD_STAT_LIM_ERR = 0x8, MLX5_CMD_STAT_BAD_RES_STATE_ERR = 0x9, MLX5_CMD_STAT_IX_ERR = 0xa, MLX5_CMD_STAT_NO_RES_ERR = 0xf, MLX5_CMD_STAT_BAD_INP_LEN_ERR = 0x50, MLX5_CMD_STAT_BAD_OUTP_LEN_ERR = 0x51, MLX5_CMD_STAT_BAD_QP_STATE_ERR = 0x10, MLX5_CMD_STAT_BAD_PKT_ERR = 0x30, MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR = 0x40, }; enum { MLX5_IEEE_802_3_COUNTERS_GROUP = 0x0, MLX5_RFC_2863_COUNTERS_GROUP = 0x1, MLX5_RFC_2819_COUNTERS_GROUP = 0x2, MLX5_RFC_3635_COUNTERS_GROUP = 0x3, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP = 0x5, MLX5_ETHERNET_DISCARD_COUNTERS_GROUP = 0x6, MLX5_PER_PRIORITY_COUNTERS_GROUP = 0x10, MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP = 0x12, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP = 0x16, MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, }; enum { MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP = 0x0, MLX5_PCIE_LANE_COUNTERS_GROUP = 0x1, MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP = 0x2, }; -enum { - MLX5_NUM_UUARS_PER_PAGE = MLX5_NON_FP_BF_REGS_PER_PAGE, - MLX5_DEF_TOT_UUARS = 8 * MLX5_NUM_UUARS_PER_PAGE, -}; - -enum { - NUM_DRIVER_UARS = 4, - NUM_LOW_LAT_UUARS = 4, -}; - enum { MLX5_CAP_PORT_TYPE_IB = 0x0, MLX5_CAP_PORT_TYPE_ETH = 0x1, }; enum { MLX5_CMD_HCA_CAP_MIN_WQE_INLINE_MODE_L2 = 0x0, MLX5_CMD_HCA_CAP_MIN_WQE_INLINE_MODE_VPORT_CONFIG = 0x1, MLX5_CMD_HCA_CAP_MIN_WQE_INLINE_MODE_NOT_REQUIRED = 0x2 }; enum mlx5_inline_modes { MLX5_INLINE_MODE_NONE, MLX5_INLINE_MODE_L2, MLX5_INLINE_MODE_IP, MLX5_INLINE_MODE_TCP_UDP, }; enum { MLX5_QUERY_VPORT_STATE_OUT_STATE_FOLLOW = 0x2, }; static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) { if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE) return 0; return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; } struct mlx5_ifc_mcia_reg_bits { u8 l[0x1]; u8 reserved_0[0x7]; u8 module[0x8]; u8 reserved_1[0x8]; u8 status[0x8]; u8 i2c_device_address[0x8]; u8 page_number[0x8]; u8 device_address[0x10]; u8 reserved_2[0x10]; u8 size[0x10]; u8 reserved_3[0x20]; u8 dword_0[0x20]; u8 dword_1[0x20]; u8 dword_2[0x20]; u8 dword_3[0x20]; u8 dword_4[0x20]; u8 dword_5[0x20]; u8 dword_6[0x20]; u8 dword_7[0x20]; u8 dword_8[0x20]; u8 dword_9[0x20]; u8 dword_10[0x20]; u8 dword_11[0x20]; }; #define MLX5_CMD_OP_QUERY_EEPROM 0x93c struct mlx5_mini_cqe8 { union { __be32 rx_hash_result; __be16 checksum; __be16 rsvd; struct { __be16 wqe_counter; u8 s_wqe_opcode; u8 reserved; } s_wqe_info; }; __be32 byte_cnt; }; enum { MLX5_NO_INLINE_DATA, MLX5_INLINE_DATA32_SEG, MLX5_INLINE_DATA64_SEG, MLX5_COMPRESSED, }; enum mlx5_exp_cqe_zip_recv_type { MLX5_CQE_FORMAT_HASH, MLX5_CQE_FORMAT_CSUM, }; #define MLX5E_CQE_FORMAT_MASK 0xc static inline int mlx5_get_cqe_format(const struct mlx5_cqe64 *cqe) { return (cqe->op_own & MLX5E_CQE_FORMAT_MASK) >> 2; } enum { MLX5_GEN_EVENT_SUBTYPE_DELAY_DROP_TIMEOUT = 0x1, MLX5_GEN_EVENT_SUBTYPE_PCI_POWER_CHANGE_EVENT = 0x5, }; enum { MLX5_FRL_LEVEL3 = 0x8, MLX5_FRL_LEVEL6 = 0x40, }; /* 8 regular priorities + 1 for multicast */ #define MLX5_NUM_BYPASS_FTS 9 #endif /* MLX5_DEVICE_H */ diff --git a/sys/dev/mlx5/driver.h b/sys/dev/mlx5/driver.h index ec9c4f618f67..5d4f58d7e1fd 100644 --- a/sys/dev/mlx5/driver.h +++ b/sys/dev/mlx5/driver.h @@ -1,1210 +1,1205 @@ /*- * Copyright (c) 2013-2019, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef MLX5_DRIVER_H #define MLX5_DRIVER_H #include "opt_ratelimit.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MLX5_QCOUNTER_SETS_NETDEV 64 #define MLX5_MAX_NUMBER_OF_VFS 128 enum { MLX5_BOARD_ID_LEN = 64, MLX5_MAX_NAME_LEN = 16, }; enum { MLX5_CMD_TIMEOUT_MSEC = 60 * 1000, }; enum { CMD_OWNER_SW = 0x0, CMD_OWNER_HW = 0x1, CMD_STATUS_SUCCESS = 0, }; enum mlx5_sqp_t { MLX5_SQP_SMI = 0, MLX5_SQP_GSI = 1, MLX5_SQP_IEEE_1588 = 2, MLX5_SQP_SNIFFER = 3, MLX5_SQP_SYNC_UMR = 4, }; enum { MLX5_MAX_PORTS = 2, }; enum { MLX5_EQ_VEC_PAGES = 0, MLX5_EQ_VEC_CMD = 1, MLX5_EQ_VEC_ASYNC = 2, MLX5_EQ_VEC_COMP_BASE, }; enum { MLX5_ATOMIC_MODE_OFF = 16, MLX5_ATOMIC_MODE_NONE = 0 << MLX5_ATOMIC_MODE_OFF, MLX5_ATOMIC_MODE_IB_COMP = 1 << MLX5_ATOMIC_MODE_OFF, MLX5_ATOMIC_MODE_CX = 2 << MLX5_ATOMIC_MODE_OFF, MLX5_ATOMIC_MODE_8B = 3 << MLX5_ATOMIC_MODE_OFF, MLX5_ATOMIC_MODE_16B = 4 << MLX5_ATOMIC_MODE_OFF, MLX5_ATOMIC_MODE_32B = 5 << MLX5_ATOMIC_MODE_OFF, MLX5_ATOMIC_MODE_64B = 6 << MLX5_ATOMIC_MODE_OFF, MLX5_ATOMIC_MODE_128B = 7 << MLX5_ATOMIC_MODE_OFF, MLX5_ATOMIC_MODE_256B = 8 << MLX5_ATOMIC_MODE_OFF, }; enum { MLX5_ATOMIC_MODE_DCT_OFF = 20, MLX5_ATOMIC_MODE_DCT_NONE = 0 << MLX5_ATOMIC_MODE_DCT_OFF, MLX5_ATOMIC_MODE_DCT_IB_COMP = 1 << MLX5_ATOMIC_MODE_DCT_OFF, MLX5_ATOMIC_MODE_DCT_CX = 2 << MLX5_ATOMIC_MODE_DCT_OFF, MLX5_ATOMIC_MODE_DCT_8B = 3 << MLX5_ATOMIC_MODE_DCT_OFF, MLX5_ATOMIC_MODE_DCT_16B = 4 << MLX5_ATOMIC_MODE_DCT_OFF, MLX5_ATOMIC_MODE_DCT_32B = 5 << MLX5_ATOMIC_MODE_DCT_OFF, MLX5_ATOMIC_MODE_DCT_64B = 6 << MLX5_ATOMIC_MODE_DCT_OFF, MLX5_ATOMIC_MODE_DCT_128B = 7 << MLX5_ATOMIC_MODE_DCT_OFF, MLX5_ATOMIC_MODE_DCT_256B = 8 << MLX5_ATOMIC_MODE_DCT_OFF, }; enum { MLX5_ATOMIC_OPS_CMP_SWAP = 1 << 0, MLX5_ATOMIC_OPS_FETCH_ADD = 1 << 1, MLX5_ATOMIC_OPS_MASKED_CMP_SWAP = 1 << 2, MLX5_ATOMIC_OPS_MASKED_FETCH_ADD = 1 << 3, }; enum { MLX5_REG_QPTS = 0x4002, MLX5_REG_QETCR = 0x4005, MLX5_REG_QPDP = 0x4007, MLX5_REG_QTCT = 0x400A, MLX5_REG_QPDPM = 0x4013, MLX5_REG_QHLL = 0x4016, MLX5_REG_QCAM = 0x4019, MLX5_REG_DCBX_PARAM = 0x4020, MLX5_REG_DCBX_APP = 0x4021, MLX5_REG_FPGA_CAP = 0x4022, MLX5_REG_FPGA_CTRL = 0x4023, MLX5_REG_FPGA_ACCESS_REG = 0x4024, MLX5_REG_FPGA_SHELL_CNTR = 0x4025, MLX5_REG_PCAP = 0x5001, MLX5_REG_PMLP = 0x5002, MLX5_REG_PMTU = 0x5003, MLX5_REG_PTYS = 0x5004, MLX5_REG_PAOS = 0x5006, MLX5_REG_PFCC = 0x5007, MLX5_REG_PPCNT = 0x5008, MLX5_REG_PUDE = 0x5009, MLX5_REG_PPTB = 0x500B, MLX5_REG_PBMC = 0x500C, MLX5_REG_PELC = 0x500E, MLX5_REG_PVLC = 0x500F, MLX5_REG_PMPE = 0x5010, MLX5_REG_PMAOS = 0x5012, MLX5_REG_PPLM = 0x5023, MLX5_REG_PDDR = 0x5031, MLX5_REG_PBSR = 0x5038, MLX5_REG_PCAM = 0x507f, MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, MLX5_REG_MTMP = 0x900a, MLX5_REG_MCIA = 0x9014, MLX5_REG_MFRL = 0x9028, MLX5_REG_MPCNT = 0x9051, MLX5_REG_MCQI = 0x9061, MLX5_REG_MCC = 0x9062, MLX5_REG_MCDA = 0x9063, MLX5_REG_MCAM = 0x907f, }; enum dbg_rsc_type { MLX5_DBG_RSC_QP, MLX5_DBG_RSC_EQ, MLX5_DBG_RSC_CQ, }; enum { MLX5_INTERFACE_PROTOCOL_IB = 0, MLX5_INTERFACE_PROTOCOL_ETH = 1, MLX5_INTERFACE_NUMBER = 2, }; struct mlx5_field_desc { struct dentry *dent; int i; }; struct mlx5_rsc_debug { struct mlx5_core_dev *dev; void *object; enum dbg_rsc_type type; struct dentry *root; struct mlx5_field_desc fields[0]; }; enum mlx5_dev_event { MLX5_DEV_EVENT_SYS_ERROR, MLX5_DEV_EVENT_PORT_UP, MLX5_DEV_EVENT_PORT_DOWN, MLX5_DEV_EVENT_PORT_INITIALIZED, MLX5_DEV_EVENT_LID_CHANGE, MLX5_DEV_EVENT_PKEY_CHANGE, MLX5_DEV_EVENT_GUID_CHANGE, MLX5_DEV_EVENT_CLIENT_REREG, MLX5_DEV_EVENT_VPORT_CHANGE, MLX5_DEV_EVENT_ERROR_STATE_DCBX, MLX5_DEV_EVENT_REMOTE_CONFIG_CHANGE, MLX5_DEV_EVENT_LOCAL_OPER_CHANGE, MLX5_DEV_EVENT_REMOTE_CONFIG_APPLICATION_PRIORITY_CHANGE, }; enum mlx5_port_status { MLX5_PORT_UP = 1 << 0, MLX5_PORT_DOWN = 1 << 1, }; enum { MLX5_VSC_SPACE_SUPPORTED = 0x1, MLX5_VSC_SPACE_OFFSET = 0x4, MLX5_VSC_COUNTER_OFFSET = 0x8, MLX5_VSC_SEMA_OFFSET = 0xC, MLX5_VSC_ADDR_OFFSET = 0x10, MLX5_VSC_DATA_OFFSET = 0x14, MLX5_VSC_MAX_RETRIES = 0x1000, }; #define MLX5_PROT_MASK(link_mode) (1 << link_mode) -struct mlx5_uuar_info { - struct mlx5_uar *uars; - int num_uars; - int num_low_latency_uuars; - unsigned long *bitmap; - unsigned int *count; - struct mlx5_bf *bfs; - - /* - * protect uuar allocation data structs - */ - struct mutex lock; - u32 ver; -}; - -struct mlx5_bf { - void __iomem *reg; - void __iomem *regreg; - int buf_size; - struct mlx5_uar *uar; - unsigned long offset; - int need_lock; - /* protect blue flame buffer selection when needed - */ - spinlock_t lock; - - /* serialize 64 bit writes when done as two 32 bit accesses - */ - spinlock_t lock32; - int uuarn; -}; - struct mlx5_cmd_first { __be32 data[4]; }; struct cache_ent; struct mlx5_fw_page { union { struct rb_node rb_node; struct list_head list; }; struct mlx5_cmd_first first; struct mlx5_core_dev *dev; bus_dmamap_t dma_map; bus_addr_t dma_addr; void *virt_addr; struct cache_ent *cache; u32 numpages; u16 load_done; #define MLX5_LOAD_ST_NONE 0 #define MLX5_LOAD_ST_SUCCESS 1 #define MLX5_LOAD_ST_FAILURE 2 u16 func_id; }; #define mlx5_cmd_msg mlx5_fw_page struct mlx5_cmd_debug { struct dentry *dbg_root; struct dentry *dbg_in; struct dentry *dbg_out; struct dentry *dbg_outlen; struct dentry *dbg_status; struct dentry *dbg_run; void *in_msg; void *out_msg; u8 status; u16 inlen; u16 outlen; }; struct cache_ent { /* protect block chain allocations */ spinlock_t lock; struct list_head head; }; struct cmd_msg_cache { struct cache_ent large; struct cache_ent med; }; struct mlx5_traffic_counter { u64 packets; u64 octets; }; enum mlx5_cmd_mode { MLX5_CMD_MODE_POLLING, MLX5_CMD_MODE_EVENTS }; struct mlx5_cmd_stats { u64 sum; u64 n; struct dentry *root; struct dentry *avg; struct dentry *count; /* protect command average calculations */ spinlock_t lock; }; struct mlx5_cmd { struct mlx5_fw_page *cmd_page; bus_dma_tag_t dma_tag; struct sx dma_sx; struct mtx dma_mtx; #define MLX5_DMA_OWNED(dev) mtx_owned(&(dev)->cmd.dma_mtx) #define MLX5_DMA_LOCK(dev) mtx_lock(&(dev)->cmd.dma_mtx) #define MLX5_DMA_UNLOCK(dev) mtx_unlock(&(dev)->cmd.dma_mtx) struct cv dma_cv; #define MLX5_DMA_DONE(dev) cv_broadcast(&(dev)->cmd.dma_cv) #define MLX5_DMA_WAIT(dev) cv_wait(&(dev)->cmd.dma_cv, &(dev)->cmd.dma_mtx) void *cmd_buf; dma_addr_t dma; u16 cmdif_rev; u8 log_sz; u8 log_stride; int max_reg_cmds; int events; u32 __iomem *vector; /* protect command queue allocations */ spinlock_t alloc_lock; /* protect token allocations */ spinlock_t token_lock; u8 token; unsigned long bitmask; struct semaphore sem; struct semaphore pages_sem; enum mlx5_cmd_mode mode; struct mlx5_cmd_work_ent * volatile ent_arr[MLX5_MAX_COMMANDS]; volatile enum mlx5_cmd_mode ent_mode[MLX5_MAX_COMMANDS]; struct mlx5_cmd_debug dbg; struct cmd_msg_cache cache; int checksum_disabled; struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX]; }; struct mlx5_port_caps { int gid_table_len; int pkey_table_len; u8 ext_port_cap; }; struct mlx5_buf { bus_dma_tag_t dma_tag; bus_dmamap_t dma_map; struct mlx5_core_dev *dev; struct { void *buf; } direct; u64 *page_list; int npages; int size; u8 page_shift; u8 load_done; }; struct mlx5_frag_buf { struct mlx5_buf_list *frags; int npages; int size; u8 page_shift; }; struct mlx5_eq { struct mlx5_core_dev *dev; __be32 __iomem *doorbell; u32 cons_index; struct mlx5_buf buf; int size; u8 irqn; u8 eqn; int nent; u64 mask; struct list_head list; int index; struct mlx5_rsc_debug *dbg; }; struct mlx5_core_psv { u32 psv_idx; struct psv_layout { u32 pd; u16 syndrome; u16 reserved; u16 bg; u16 app_tag; u32 ref_tag; } psv; }; struct mlx5_core_sig_ctx { struct mlx5_core_psv psv_memory; struct mlx5_core_psv psv_wire; #if (__FreeBSD_version >= 1100000) struct ib_sig_err err_item; #endif bool sig_status_checked; bool sig_err_exists; u32 sigerr_count; }; enum { MLX5_MKEY_MR = 1, MLX5_MKEY_MW, MLX5_MKEY_MR_USER, }; struct mlx5_core_mkey { u64 iova; u64 size; u32 key; u32 pd; u32 type; }; struct mlx5_core_mr { u64 iova; u64 size; u32 key; u32 pd; }; enum mlx5_res_type { MLX5_RES_QP = MLX5_EVENT_QUEUE_TYPE_QP, MLX5_RES_RQ = MLX5_EVENT_QUEUE_TYPE_RQ, MLX5_RES_SQ = MLX5_EVENT_QUEUE_TYPE_SQ, MLX5_RES_SRQ = 3, MLX5_RES_XSRQ = 4, MLX5_RES_DCT = 5, }; struct mlx5_core_rsc_common { enum mlx5_res_type res; atomic_t refcount; struct completion free; }; +struct mlx5_uars_page { + void __iomem *map; + bool wc; + u32 index; + struct list_head list; + unsigned int bfregs; + unsigned long *reg_bitmap; /* for non fast path bf regs */ + unsigned long *fp_bitmap; + unsigned int reg_avail; + unsigned int fp_avail; + struct kref ref_count; + struct mlx5_core_dev *mdev; +}; + +struct mlx5_bfreg_head { + /* protect blue flame registers allocations */ + struct mutex lock; + struct list_head list; +}; + +struct mlx5_bfreg_data { + struct mlx5_bfreg_head reg_head; + struct mlx5_bfreg_head wc_head; +}; + +struct mlx5_sq_bfreg { + void __iomem *map; + struct mlx5_uars_page *up; + bool wc; + u32 index; + unsigned int offset; +}; + struct mlx5_core_srq { struct mlx5_core_rsc_common common; /* must be first */ u32 srqn; int max; size_t max_gs; size_t max_avail_gather; int wqe_shift; void (*event)(struct mlx5_core_srq *, int); atomic_t refcount; struct completion free; }; struct mlx5_eq_table { void __iomem *update_ci; void __iomem *update_arm_ci; struct list_head comp_eqs_list; struct mlx5_eq pages_eq; struct mlx5_eq async_eq; struct mlx5_eq cmd_eq; int num_comp_vectors; /* protect EQs list */ spinlock_t lock; }; -struct mlx5_uar { - u32 index; - void __iomem *bf_map; - void __iomem *map; -}; - - struct mlx5_core_health { struct mlx5_health_buffer __iomem *health; __be32 __iomem *health_counter; struct timer_list timer; u32 prev; int miss_counter; u32 fatal_error; struct workqueue_struct *wq_watchdog; struct work_struct work_watchdog; /* wq spinlock to synchronize draining */ spinlock_t wq_lock; struct workqueue_struct *wq; unsigned long flags; struct work_struct work; struct delayed_work recover_work; unsigned int last_reset_req; struct work_struct work_cmd_completion; struct workqueue_struct *wq_cmd; }; #define MLX5_CQ_LINEAR_ARRAY_SIZE 1024 struct mlx5_cq_linear_array_entry { struct mlx5_core_cq * volatile cq; }; struct mlx5_cq_table { /* protect radix tree */ spinlock_t writerlock; atomic_t writercount; struct radix_tree_root tree; struct mlx5_cq_linear_array_entry linear_array[MLX5_CQ_LINEAR_ARRAY_SIZE]; }; struct mlx5_qp_table { /* protect radix tree */ spinlock_t lock; struct radix_tree_root tree; }; struct mlx5_srq_table { /* protect radix tree */ spinlock_t lock; struct radix_tree_root tree; }; struct mlx5_mr_table { /* protect radix tree */ spinlock_t lock; struct radix_tree_root tree; }; #ifdef RATELIMIT struct mlx5_rl_entry { u32 rate; u16 burst; u16 index; u32 refcount; }; struct mlx5_rl_table { struct mutex rl_lock; u16 max_size; u32 max_rate; u32 min_rate; struct mlx5_rl_entry *rl_entry; }; #endif struct mlx5_pme_stats { u64 status_counters[MLX5_MODULE_STATUS_NUM]; u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; }; struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; struct msix_entry *msix_arr; - struct mlx5_uuar_info uuari; MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock); int disable_irqs; - struct io_mapping *bf_mapping; - /* pages stuff */ struct workqueue_struct *pg_wq; struct rb_root page_root; s64 fw_pages; atomic_t reg_pages; s64 pages_per_func[MLX5_MAX_NUMBER_OF_VFS]; struct mlx5_core_health health; struct mlx5_srq_table srq_table; /* start: qp staff */ struct mlx5_qp_table qp_table; struct dentry *qp_debugfs; struct dentry *eq_debugfs; struct dentry *cq_debugfs; struct dentry *cmdif_debugfs; /* end: qp staff */ /* start: cq staff */ struct mlx5_cq_table cq_table; /* end: cq staff */ /* start: mr staff */ struct mlx5_mr_table mr_table; /* end: mr staff */ /* start: alloc staff */ int numa_node; struct mutex pgdir_mutex; struct list_head pgdir_list; /* end: alloc staff */ struct dentry *dbg_root; /* protect mkey key part */ spinlock_t mkey_lock; u8 mkey_key; struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; unsigned long pci_dev_data; #ifdef RATELIMIT struct mlx5_rl_table rl_table; #endif struct mlx5_pme_stats pme_stats; struct mlx5_eswitch *eswitch; + + struct mlx5_bfreg_data bfregs; + struct mlx5_uars_page *uar; }; enum mlx5_device_state { MLX5_DEVICE_STATE_UP, MLX5_DEVICE_STATE_INTERNAL_ERROR, }; enum mlx5_interface_state { MLX5_INTERFACE_STATE_UP = 0x1, MLX5_INTERFACE_STATE_TEARDOWN = 0x2, }; enum mlx5_pci_status { MLX5_PCI_STATUS_DISABLED, MLX5_PCI_STATUS_ENABLED, }; #define MLX5_MAX_RESERVED_GIDS 8 struct mlx5_rsvd_gids { unsigned int start; unsigned int count; struct ida ida; }; struct mlx5_special_contexts { int resd_lkey; }; struct mlx5_flow_root_namespace; struct mlx5_core_dev { struct pci_dev *pdev; /* sync pci state */ struct mutex pci_status_mutex; enum mlx5_pci_status pci_status; char board_id[MLX5_BOARD_ID_LEN]; struct mlx5_cmd cmd; struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; u32 hca_caps_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; u32 hca_caps_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; struct { u32 pcam[MLX5_ST_SZ_DW(pcam_reg)]; u32 mcam[MLX5_ST_SZ_DW(mcam_reg)]; u32 qcam[MLX5_ST_SZ_DW(qcam_reg)]; u32 fpga[MLX5_ST_SZ_DW(fpga_cap)]; } caps; phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; enum mlx5_device_state state; /* sync interface state */ struct mutex intf_state_mutex; unsigned long intf_state; void (*event) (struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); struct mlx5_priv priv; struct mlx5_profile *profile; atomic_t num_qps; u32 vsc_addr; u32 issi; struct mlx5_special_contexts special_contexts; unsigned int module_status[MLX5_MAX_PORTS]; struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_root_namespace *fdb_root_ns; struct mlx5_flow_root_namespace *esw_egress_root_ns; struct mlx5_flow_root_namespace *esw_ingress_root_ns; struct mlx5_flow_root_namespace *sniffer_rx_root_ns; struct mlx5_flow_root_namespace *sniffer_tx_root_ns; u32 num_q_counter_allocated[MLX5_INTERFACE_NUMBER]; struct mlx5_crspace_regmap *dump_rege; uint32_t *dump_data; unsigned dump_size; bool dump_valid; bool dump_copyout; struct mtx dump_lock; struct sysctl_ctx_list sysctl_ctx; int msix_eqvec; int pwr_status; int pwr_value; struct { struct mlx5_rsvd_gids reserved_gids; atomic_t roce_en; } roce; struct { spinlock_t spinlock; #define MLX5_MPFS_TABLE_MAX 32 long bitmap[BITS_TO_LONGS(MLX5_MPFS_TABLE_MAX)]; } mpfs; #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; #endif }; enum { MLX5_WOL_DISABLE = 0, MLX5_WOL_SECURED_MAGIC = 1 << 1, MLX5_WOL_MAGIC = 1 << 2, MLX5_WOL_ARP = 1 << 3, MLX5_WOL_BROADCAST = 1 << 4, MLX5_WOL_MULTICAST = 1 << 5, MLX5_WOL_UNICAST = 1 << 6, MLX5_WOL_PHY_ACTIVITY = 1 << 7, }; struct mlx5_db { __be32 *db; union { struct mlx5_db_pgdir *pgdir; struct mlx5_ib_user_db_page *user_page; } u; dma_addr_t dma; int index; }; struct mlx5_net_counters { u64 packets; u64 octets; }; struct mlx5_ptys_reg { u8 an_dis_admin; u8 an_dis_ap; u8 local_port; u8 proto_mask; u32 eth_proto_cap; u16 ib_link_width_cap; u16 ib_proto_cap; u32 eth_proto_admin; u16 ib_link_width_admin; u16 ib_proto_admin; u32 eth_proto_oper; u16 ib_link_width_oper; u16 ib_proto_oper; u32 eth_proto_lp_advertise; }; struct mlx5_pvlc_reg { u8 local_port; u8 vl_hw_cap; u8 vl_admin; u8 vl_operational; }; struct mlx5_pmtu_reg { u8 local_port; u16 max_mtu; u16 admin_mtu; u16 oper_mtu; }; struct mlx5_vport_counters { struct mlx5_net_counters received_errors; struct mlx5_net_counters transmit_errors; struct mlx5_net_counters received_ib_unicast; struct mlx5_net_counters transmitted_ib_unicast; struct mlx5_net_counters received_ib_multicast; struct mlx5_net_counters transmitted_ib_multicast; struct mlx5_net_counters received_eth_broadcast; struct mlx5_net_counters transmitted_eth_broadcast; struct mlx5_net_counters received_eth_unicast; struct mlx5_net_counters transmitted_eth_unicast; struct mlx5_net_counters received_eth_multicast; struct mlx5_net_counters transmitted_eth_multicast; }; enum { MLX5_DB_PER_PAGE = MLX5_ADAPTER_PAGE_SIZE / L1_CACHE_BYTES, }; struct mlx5_core_dct { struct mlx5_core_rsc_common common; /* must be first */ void (*event)(struct mlx5_core_dct *, int); int dctn; struct completion drained; struct mlx5_rsc_debug *dbg; int pid; }; enum { MLX5_COMP_EQ_SIZE = 1024, }; enum { MLX5_PTYS_IB = 1 << 0, MLX5_PTYS_EN = 1 << 2, }; struct mlx5_db_pgdir { struct list_head list; DECLARE_BITMAP(bitmap, MLX5_DB_PER_PAGE); struct mlx5_fw_page *fw_page; __be32 *db_page; dma_addr_t db_dma; }; typedef void (*mlx5_cmd_cbk_t)(int status, void *context); struct mlx5_cmd_work_ent { struct mlx5_cmd_msg *in; struct mlx5_cmd_msg *out; int uin_size; void *uout; int uout_size; mlx5_cmd_cbk_t callback; struct delayed_work cb_timeout_work; void *context; int idx; struct completion done; struct mlx5_cmd *cmd; struct work_struct work; struct mlx5_cmd_layout *lay; int ret; int page_queue; u8 status; u8 token; u64 ts1; u64 ts2; u16 op; u8 busy; bool polling; }; struct mlx5_pas { u64 pa; u8 log_sz; }; enum port_state_policy { MLX5_POLICY_DOWN = 0, MLX5_POLICY_UP = 1, MLX5_POLICY_FOLLOW = 2, MLX5_POLICY_INVALID = 0xffffffff }; static inline void * mlx5_buf_offset(struct mlx5_buf *buf, int offset) { return ((char *)buf->direct.buf + offset); } extern struct workqueue_struct *mlx5_core_wq; #define STRUCT_FIELD(header, field) \ .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ .struct_size_bytes = sizeof((struct ib_unpacked_ ## header *)0)->field static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev) { return pci_get_drvdata(pdev); } extern struct dentry *mlx5_debugfs_root; static inline u16 fw_rev_maj(struct mlx5_core_dev *dev) { return ioread32be(&dev->iseg->fw_rev) & 0xffff; } static inline u16 fw_rev_min(struct mlx5_core_dev *dev) { return ioread32be(&dev->iseg->fw_rev) >> 16; } static inline u16 fw_rev_sub(struct mlx5_core_dev *dev) { return ioread32be(&dev->iseg->cmdif_rev_fw_sub) & 0xffff; } static inline u16 cmdif_rev_get(struct mlx5_core_dev *dev) { return ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; } static inline int mlx5_get_gid_table_len(u16 param) { if (param > 4) { printf("M4_CORE_DRV_NAME: WARN: ""gid table length is zero\n"); return 0; } return 8 * (1 << param); } static inline void *mlx5_vzalloc(unsigned long size) { void *rtn; rtn = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); return rtn; } static inline void *mlx5_vmalloc(unsigned long size) { void *rtn; rtn = kmalloc(size, GFP_KERNEL | __GFP_NOWARN); if (!rtn) rtn = vmalloc(size); return rtn; } static inline u32 mlx5_base_mkey(const u32 key) { return key & 0xffffff00u; } int mlx5_cmd_init(struct mlx5_core_dev *dev); void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); struct mlx5_async_ctx { struct mlx5_core_dev *dev; atomic_t num_inflight; struct wait_queue_head wait; }; struct mlx5_async_work; typedef void (*mlx5_async_cbk_t)(int status, struct mlx5_async_work *context); struct mlx5_async_work { struct mlx5_async_ctx *ctx; mlx5_async_cbk_t user_callback; }; void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, struct mlx5_async_ctx *ctx); void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx); int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, void *out, int out_size, mlx5_async_cbk_t callback, struct mlx5_async_work *work); int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); -int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); -int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); -int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar); -void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar); +int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path); +void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg); +struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); +void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health); void mlx5_drain_health_wq(struct mlx5_core_dev *dev); void mlx5_drain_health_recovery(struct mlx5_core_dev *dev); void mlx5_trigger_health_work(struct mlx5_core_dev *dev); void mlx5_trigger_health_watchdog(struct mlx5_core_dev *dev); #define mlx5_buf_alloc_node(dev, size, direct, buf, node) \ mlx5_buf_alloc(dev, size, direct, buf) int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct, struct mlx5_buf *buf); void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf); int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in); int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq); int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out); int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq); void mlx5_init_mr_table(struct mlx5_core_dev *dev); void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev); int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mr *mkey, struct mlx5_async_ctx *async_ctx, u32 *in, int inlen, u32 *out, int outlen, mlx5_async_cbk_t callback, struct mlx5_async_work *context); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, u32 *in, int inlen); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mkey); int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mkey, u32 *out, int outlen); int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, u32 *mkey); int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, u16 opmod, u8 port); void mlx5_fwp_flush(struct mlx5_fw_page *fwp); void mlx5_fwp_invalidate(struct mlx5_fw_page *fwp); struct mlx5_fw_page *mlx5_fwp_alloc(struct mlx5_core_dev *dev, gfp_t flags, unsigned num); void mlx5_fwp_free(struct mlx5_fw_page *fwp); u64 mlx5_fwp_get_dma(struct mlx5_fw_page *fwp, size_t offset); void *mlx5_fwp_get_virt(struct mlx5_fw_page *fwp, size_t offset); void mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); int mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, s32 npages); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); s64 mlx5_wait_for_reclaim_vfs_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); void mlx5_unregister_debugfs(void); int mlx5_eq_init(struct mlx5_core_dev *dev); void mlx5_eq_cleanup(struct mlx5_core_dev *dev); void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); void mlx5_cq_completion(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vector, enum mlx5_cmd_mode mode); void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, struct mlx5_uar *uar); + int nent, u64 mask); int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_start_eqs(struct mlx5_core_dev *dev); int mlx5_stop_eqs(struct mlx5_core_dev *dev); int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_set_dc_cnak_trace(struct mlx5_core_dev *dev, int enable, u64 addr); int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev); void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, void *data_out, int size_out, u16 reg_num, int arg, int write); void mlx5_toggle_port_link(struct mlx5_core_dev *dev); int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u32 *out, int outlen); int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db); int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db, int node); void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db); const char *mlx5_command_str(int command); int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, int npsvs, u32 *sig_index); int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num); void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); u8 mlx5_is_wol_supported(struct mlx5_core_dev *dev); int mlx5_set_wol(struct mlx5_core_dev *dev, u8 wol_mode); int mlx5_set_dropless_mode(struct mlx5_core_dev *dev, u16 timeout); int mlx5_query_dropless_mode(struct mlx5_core_dev *dev, u16 *timeout); int mlx5_query_wol(struct mlx5_core_dev *dev, u8 *wol_mode); int mlx5_core_access_pvlc(struct mlx5_core_dev *dev, struct mlx5_pvlc_reg *pvlc, int write); int mlx5_core_access_ptys(struct mlx5_core_dev *dev, struct mlx5_ptys_reg *ptys, int write); int mlx5_core_access_pmtu(struct mlx5_core_dev *dev, struct mlx5_pmtu_reg *pmtu, int write); int mlx5_vxlan_udp_port_add(struct mlx5_core_dev *dev, u16 port); int mlx5_vxlan_udp_port_delete(struct mlx5_core_dev *dev, u16 port); int mlx5_query_port_cong_status(struct mlx5_core_dev *mdev, int protocol, int priority, int *is_enable); int mlx5_modify_port_cong_status(struct mlx5_core_dev *mdev, int protocol, int priority, int enable); int mlx5_query_port_cong_params(struct mlx5_core_dev *mdev, int protocol, void *out, int out_size); int mlx5_modify_port_cong_params(struct mlx5_core_dev *mdev, void *in, int in_size); int mlx5_query_port_cong_statistics(struct mlx5_core_dev *mdev, int clear, void *out, int out_size); int mlx5_set_diagnostic_params(struct mlx5_core_dev *mdev, void *in, int in_size); int mlx5_query_diagnostic_counters(struct mlx5_core_dev *mdev, u8 num_of_samples, u16 sample_index, void *out, int out_size); int mlx5_vsc_find_cap(struct mlx5_core_dev *mdev); int mlx5_vsc_lock(struct mlx5_core_dev *mdev); void mlx5_vsc_unlock(struct mlx5_core_dev *mdev); int mlx5_vsc_set_space(struct mlx5_core_dev *mdev, u16 space); int mlx5_vsc_wait_on_flag(struct mlx5_core_dev *mdev, u32 expected); int mlx5_vsc_write(struct mlx5_core_dev *mdev, u32 addr, const u32 *data); int mlx5_vsc_read(struct mlx5_core_dev *mdev, u32 addr, u32 *data); int mlx5_vsc_lock_addr_space(struct mlx5_core_dev *mdev, u32 addr); int mlx5_vsc_unlock_addr_space(struct mlx5_core_dev *mdev, u32 addr); int mlx5_pci_read_power_status(struct mlx5_core_dev *mdev, u16 *p_power, u8 *p_status); static inline u32 mlx5_mkey_to_idx(u32 mkey) { return mkey >> 8; } static inline u32 mlx5_idx_to_mkey(u32 mkey_idx) { return mkey_idx << 8; } static inline u8 mlx5_mkey_variant(u32 mkey) { return mkey & 0xff; } enum { MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0, MLX5_PROF_MASK_MR_CACHE = (u64)1 << 1, }; enum { MAX_MR_CACHE_ENTRIES = 15, }; struct mlx5_interface { void * (*add)(struct mlx5_core_dev *dev); void (*remove)(struct mlx5_core_dev *dev, void *context); void (*event)(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param); void * (*get_dev)(void *context); int protocol; struct list_head list; }; void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol); int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev); int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, u8 roce_version, u8 roce_l3_type, const u8 *gid, const u8 *mac, bool vlan, u16 vlan_id); struct mlx5_profile { u64 mask; u8 log_max_qp; struct { int size; int limit; } mr_cache[MAX_MR_CACHE_ENTRIES]; }; enum { MLX5_PCI_DEV_IS_VF = 1 << 0, }; enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev) { return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF); } #ifdef RATELIMIT int mlx5_init_rl_table(struct mlx5_core_dev *dev); void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u32 burst, u16 *index); void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate, u32 burst); bool mlx5_rl_is_in_range(const struct mlx5_core_dev *dev, u32 rate, u32 burst); static inline bool mlx5_rl_is_supported(struct mlx5_core_dev *dev) { return !!(dev->priv.rl_table.max_size); } #endif void mlx5_disable_interrupts(struct mlx5_core_dev *); void mlx5_poll_interrupts(struct mlx5_core_dev *); #endif /* MLX5_DRIVER_H */ diff --git a/sys/dev/mlx5/mlx5_core/mlx5_cq.c b/sys/dev/mlx5/mlx5_core/mlx5_cq.c index 929fd1d6bc31..248d8c9d75c8 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_cq.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_cq.c @@ -1,266 +1,267 @@ /*- * Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include "mlx5_core.h" #include static void mlx5_cq_table_write_lock(struct mlx5_cq_table *table) { atomic_inc(&table->writercount); /* make sure all see the updated writercount */ NET_EPOCH_WAIT(); spin_lock(&table->writerlock); } static void mlx5_cq_table_write_unlock(struct mlx5_cq_table *table) { spin_unlock(&table->writerlock); atomic_dec(&table->writercount); /* drain all pending CQ callers */ NET_EPOCH_WAIT(); } void mlx5_cq_completion(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) { struct mlx5_cq_table *table = &dev->priv.cq_table; struct mlx5_core_cq *cq; struct epoch_tracker et; u32 cqn; bool do_lock; cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; NET_EPOCH_ENTER(et); do_lock = atomic_read(&table->writercount) != 0; if (unlikely(do_lock)) spin_lock(&table->writerlock); if (likely(cqn < MLX5_CQ_LINEAR_ARRAY_SIZE)) cq = table->linear_array[cqn].cq; else cq = radix_tree_lookup(&table->tree, cqn); if (unlikely(do_lock)) spin_unlock(&table->writerlock); if (likely(cq != NULL)) { ++cq->arm_sn; cq->comp(cq, eqe); } else { mlx5_core_warn(dev, "Completion event for bogus CQ 0x%x\n", cqn); } NET_EPOCH_EXIT(et); } void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type) { struct mlx5_cq_table *table = &dev->priv.cq_table; struct mlx5_core_cq *cq; struct epoch_tracker et; bool do_lock; NET_EPOCH_ENTER(et); do_lock = atomic_read(&table->writercount) != 0; if (unlikely(do_lock)) spin_lock(&table->writerlock); if (likely(cqn < MLX5_CQ_LINEAR_ARRAY_SIZE)) cq = table->linear_array[cqn].cq; else cq = radix_tree_lookup(&table->tree, cqn); if (unlikely(do_lock)) spin_unlock(&table->writerlock); if (likely(cq != NULL)) { cq->event(cq, event_type); } else { mlx5_core_warn(dev, "Asynchronous event for bogus CQ 0x%x\n", cqn); } NET_EPOCH_EXIT(et); } int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen, u32 *out, int outlen) { struct mlx5_cq_table *table = &dev->priv.cq_table; u32 din[MLX5_ST_SZ_DW(destroy_cq_in)] = {0}; u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)] = {0}; int err; memset(out, 0, outlen); MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ); err = mlx5_cmd_exec(dev, in, inlen, out, outlen); if (err) return err; cq->cqn = MLX5_GET(create_cq_out, out, cqn); cq->cons_index = 0; cq->arm_sn = 0; mlx5_cq_table_write_lock(table); err = radix_tree_insert(&table->tree, cq->cqn, cq); if (likely(err == 0 && cq->cqn < MLX5_CQ_LINEAR_ARRAY_SIZE)) table->linear_array[cq->cqn].cq = cq; mlx5_cq_table_write_unlock(table); if (err) goto err_cmd; cq->pid = curthread->td_proc->p_pid; + cq->uar = dev->priv.uar; return 0; err_cmd: MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ); MLX5_SET(destroy_cq_in, din, cqn, cq->cqn); mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); return err; } EXPORT_SYMBOL(mlx5_core_create_cq); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) { struct mlx5_cq_table *table = &dev->priv.cq_table; u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0}; struct mlx5_core_cq *tmp; mlx5_cq_table_write_lock(table); if (likely(cq->cqn < MLX5_CQ_LINEAR_ARRAY_SIZE)) table->linear_array[cq->cqn].cq = NULL; tmp = radix_tree_delete(&table->tree, cq->cqn); mlx5_cq_table_write_unlock(table); if (unlikely(tmp == NULL)) { mlx5_core_warn(dev, "cq 0x%x not found in tree\n", cq->cqn); return -EINVAL; } else if (unlikely(tmp != cq)) { mlx5_core_warn(dev, "corrupted cqn 0x%x\n", cq->cqn); return -EINVAL; } MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ); MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_destroy_cq); int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *out, int outlen) { u32 in[MLX5_ST_SZ_DW(query_cq_in)] = {0}; MLX5_SET(query_cq_in, in, opcode, MLX5_CMD_OP_QUERY_CQ); MLX5_SET(query_cq_in, in, cqn, cq->cqn); return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } EXPORT_SYMBOL(mlx5_core_query_cq); int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen) { u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0}; MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ); return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_modify_cq); int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u16 cq_period, u16 cq_max_count) { u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {0}; void *cqc; MLX5_SET(modify_cq_in, in, cqn, cq->cqn); cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); MLX5_SET(cqc, cqc, cq_period, cq_period); MLX5_SET(cqc, cqc, cq_max_count, cq_max_count); MLX5_SET(modify_cq_in, in, modify_field_select_resize_field_select.modify_field_select.modify_field_select, MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT); return mlx5_core_modify_cq(dev, cq, in, sizeof(in)); } int mlx5_core_modify_cq_moderation_mode(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u16 cq_period, u16 cq_max_count, u8 cq_mode) { u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {0}; void *cqc; MLX5_SET(modify_cq_in, in, cqn, cq->cqn); cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); MLX5_SET(cqc, cqc, cq_period, cq_period); MLX5_SET(cqc, cqc, cq_max_count, cq_max_count); MLX5_SET(cqc, cqc, cq_period_mode, cq_mode); MLX5_SET(modify_cq_in, in, modify_field_select_resize_field_select.modify_field_select.modify_field_select, MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT | MLX5_CQ_MODIFY_PERIOD_MODE); return mlx5_core_modify_cq(dev, cq, in, sizeof(in)); } int mlx5_init_cq_table(struct mlx5_core_dev *dev) { struct mlx5_cq_table *table = &dev->priv.cq_table; memset(table, 0, sizeof(*table)); spin_lock_init(&table->writerlock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); return 0; } void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev) { } diff --git a/sys/dev/mlx5/mlx5_core/mlx5_eq.c b/sys/dev/mlx5/mlx5_core/mlx5_eq.c index 9e744e132c36..2dc134d0f9c4 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_eq.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_eq.c @@ -1,784 +1,781 @@ /*- * Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include "mlx5_core.h" #include "eswitch.h" #include "opt_rss.h" #ifdef RSS #include #include #endif enum { MLX5_EQE_SIZE = sizeof(struct mlx5_eqe), MLX5_EQE_OWNER_INIT_VAL = 0x1, }; enum { MLX5_NUM_SPARE_EQE = 0x80, MLX5_NUM_ASYNC_EQE = 0x100, MLX5_NUM_CMD_EQE = 32, }; enum { MLX5_EQ_DOORBEL_OFFSET = 0x40, }; #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ (1ull << MLX5_EVENT_TYPE_COMM_EST) | \ (1ull << MLX5_EVENT_TYPE_SQ_DRAINED) | \ (1ull << MLX5_EVENT_TYPE_CQ_ERROR) | \ (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR) | \ (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED) | \ (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR) | \ (1ull << MLX5_EVENT_TYPE_PORT_CHANGE) | \ (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR) | \ (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT) | \ (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE)) struct map_eq_in { u64 mask; u32 reserved; u32 unmap_eqn; }; struct cre_des_eq { u8 reserved[15]; u8 eqn; }; /*Function prototype*/ static void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); static void mlx5_port_general_notification_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {0}; u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ); MLX5_SET(destroy_eq_in, in, eq_number, eqn); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) { return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE); } static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq) { struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1)); return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe; } static const char *eqe_type_str(u8 type) { switch (type) { case MLX5_EVENT_TYPE_COMP: return "MLX5_EVENT_TYPE_COMP"; case MLX5_EVENT_TYPE_PATH_MIG: return "MLX5_EVENT_TYPE_PATH_MIG"; case MLX5_EVENT_TYPE_COMM_EST: return "MLX5_EVENT_TYPE_COMM_EST"; case MLX5_EVENT_TYPE_SQ_DRAINED: return "MLX5_EVENT_TYPE_SQ_DRAINED"; case MLX5_EVENT_TYPE_SRQ_LAST_WQE: return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; case MLX5_EVENT_TYPE_CQ_ERROR: return "MLX5_EVENT_TYPE_CQ_ERROR"; case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; case MLX5_EVENT_TYPE_PATH_MIG_FAILED: return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; case MLX5_EVENT_TYPE_INTERNAL_ERROR: return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; case MLX5_EVENT_TYPE_PORT_CHANGE: return "MLX5_EVENT_TYPE_PORT_CHANGE"; case MLX5_EVENT_TYPE_GPIO_EVENT: return "MLX5_EVENT_TYPE_GPIO_EVENT"; case MLX5_EVENT_TYPE_CODING_PORT_MODULE_EVENT: return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; case MLX5_EVENT_TYPE_REMOTE_CONFIG: return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; case MLX5_EVENT_TYPE_DB_BF_CONGESTION: return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; case MLX5_EVENT_TYPE_STALL_EVENT: return "MLX5_EVENT_TYPE_STALL_EVENT"; case MLX5_EVENT_TYPE_CMD: return "MLX5_EVENT_TYPE_CMD"; case MLX5_EVENT_TYPE_PAGE_REQUEST: return "MLX5_EVENT_TYPE_PAGE_REQUEST"; case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; case MLX5_EVENT_TYPE_FPGA_ERROR: return "MLX5_EVENT_TYPE_FPGA_ERROR"; case MLX5_EVENT_TYPE_FPGA_QP_ERROR: return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; case MLX5_EVENT_TYPE_CODING_DCBX_CHANGE_EVENT: return "MLX5_EVENT_TYPE_CODING_DCBX_CHANGE_EVENT"; case MLX5_EVENT_TYPE_CODING_GENERAL_NOTIFICATION_EVENT: return "MLX5_EVENT_TYPE_CODING_GENERAL_NOTIFICATION_EVENT"; default: return "Unrecognized event"; } } static enum mlx5_dev_event port_subtype_event(u8 subtype) { switch (subtype) { case MLX5_PORT_CHANGE_SUBTYPE_DOWN: return MLX5_DEV_EVENT_PORT_DOWN; case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: return MLX5_DEV_EVENT_PORT_UP; case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: return MLX5_DEV_EVENT_PORT_INITIALIZED; case MLX5_PORT_CHANGE_SUBTYPE_LID: return MLX5_DEV_EVENT_LID_CHANGE; case MLX5_PORT_CHANGE_SUBTYPE_PKEY: return MLX5_DEV_EVENT_PKEY_CHANGE; case MLX5_PORT_CHANGE_SUBTYPE_GUID: return MLX5_DEV_EVENT_GUID_CHANGE; case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: return MLX5_DEV_EVENT_CLIENT_REREG; } return -1; } static enum mlx5_dev_event dcbx_subevent(u8 subtype) { switch (subtype) { case MLX5_DCBX_EVENT_SUBTYPE_ERROR_STATE_DCBX: return MLX5_DEV_EVENT_ERROR_STATE_DCBX; case MLX5_DCBX_EVENT_SUBTYPE_REMOTE_CONFIG_CHANGE: return MLX5_DEV_EVENT_REMOTE_CONFIG_CHANGE; case MLX5_DCBX_EVENT_SUBTYPE_LOCAL_OPER_CHANGE: return MLX5_DEV_EVENT_LOCAL_OPER_CHANGE; case MLX5_DCBX_EVENT_SUBTYPE_REMOTE_CONFIG_APP_PRIORITY_CHANGE: return MLX5_DEV_EVENT_REMOTE_CONFIG_APPLICATION_PRIORITY_CHANGE; } return -1; } static void eq_update_ci(struct mlx5_eq *eq, int arm) { __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); __raw_writel((__force u32) cpu_to_be32(val), addr); /* We still want ordering, just not swabbing, so add a barrier */ mb(); } static void mlx5_temp_warning_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) { mlx5_core_warn(dev, "High temperature on sensors with bit set %#jx %#jx\n", (uintmax_t)be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb), (uintmax_t)be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb)); } static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { struct mlx5_eqe *eqe; int eqes_found = 0; int set_ci = 0; u32 cqn; u32 rsn; u8 port; while ((eqe = next_eqe_sw(eq))) { /* * Make sure we read EQ entry contents after we've * checked the ownership bit. */ atomic_thread_fence_acq(); mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n", eq->eqn, eqe_type_str(eqe->type)); switch (eqe->type) { case MLX5_EVENT_TYPE_COMP: mlx5_cq_completion(dev, eqe); break; case MLX5_EVENT_TYPE_PATH_MIG: case MLX5_EVENT_TYPE_COMM_EST: case MLX5_EVENT_TYPE_SQ_DRAINED: case MLX5_EVENT_TYPE_SRQ_LAST_WQE: case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: case MLX5_EVENT_TYPE_PATH_MIG_FAILED: case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n", eqe_type_str(eqe->type), eqe->type, rsn); mlx5_rsc_event(dev, rsn, eqe->type); break; case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n", eqe_type_str(eqe->type), eqe->type, rsn); mlx5_srq_event(dev, rsn, eqe->type); break; case MLX5_EVENT_TYPE_CMD: if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), MLX5_CMD_MODE_EVENTS); } break; case MLX5_EVENT_TYPE_PORT_CHANGE: port = (eqe->data.port.port >> 4) & 0xf; switch (eqe->sub_type) { case MLX5_PORT_CHANGE_SUBTYPE_DOWN: case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: case MLX5_PORT_CHANGE_SUBTYPE_LID: case MLX5_PORT_CHANGE_SUBTYPE_PKEY: case MLX5_PORT_CHANGE_SUBTYPE_GUID: case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: if (dev->event) dev->event(dev, port_subtype_event(eqe->sub_type), (unsigned long)port); break; default: mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n", port, eqe->sub_type); } break; case MLX5_EVENT_TYPE_CODING_DCBX_CHANGE_EVENT: port = (eqe->data.port.port >> 4) & 0xf; switch (eqe->sub_type) { case MLX5_DCBX_EVENT_SUBTYPE_ERROR_STATE_DCBX: case MLX5_DCBX_EVENT_SUBTYPE_REMOTE_CONFIG_CHANGE: case MLX5_DCBX_EVENT_SUBTYPE_LOCAL_OPER_CHANGE: case MLX5_DCBX_EVENT_SUBTYPE_REMOTE_CONFIG_APP_PRIORITY_CHANGE: if (dev->event) dev->event(dev, dcbx_subevent(eqe->sub_type), 0); break; default: mlx5_core_warn(dev, "dcbx event with unrecognized subtype: port %d, sub_type %d\n", port, eqe->sub_type); } break; case MLX5_EVENT_TYPE_CODING_GENERAL_NOTIFICATION_EVENT: mlx5_port_general_notification_event(dev, eqe); break; case MLX5_EVENT_TYPE_CQ_ERROR: cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n", cqn, eqe->data.cq_err.syndrome); mlx5_cq_event(dev, cqn, eqe->type); break; case MLX5_EVENT_TYPE_PAGE_REQUEST: { u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id); s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages); mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n", func_id, npages); mlx5_core_req_pages_handler(dev, func_id, npages); } break; case MLX5_EVENT_TYPE_CODING_PORT_MODULE_EVENT: mlx5_port_module_event(dev, eqe); break; case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: { struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change; u16 vport_num = be16_to_cpu(vc_eqe->vport_num); if (dev->event) dev->event(dev, MLX5_DEV_EVENT_VPORT_CHANGE, (unsigned long)vport_num); } if (dev->priv.eswitch != NULL) mlx5_eswitch_vport_event(dev->priv.eswitch, eqe); break; case MLX5_EVENT_TYPE_FPGA_ERROR: case MLX5_EVENT_TYPE_FPGA_QP_ERROR: mlx5_fpga_event(dev, eqe->type, &eqe->data.raw); break; case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: mlx5_temp_warning_event(dev, eqe); break; default: mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", eqe->type, eq->eqn); break; } ++eq->cons_index; eqes_found = 1; ++set_ci; /* The HCA will think the queue has overflowed if we * don't tell it we've been processing events. We * create our EQs with MLX5_NUM_SPARE_EQE extra * entries, so we must update our consumer index at * least that often. */ if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { eq_update_ci(eq, 0); set_ci = 0; } } eq_update_ci(eq, 1); return eqes_found; } static irqreturn_t mlx5_msix_handler(int irq, void *eq_ptr) { struct mlx5_eq *eq = eq_ptr; struct mlx5_core_dev *dev = eq->dev; /* check if IRQs are not disabled */ if (likely(dev->priv.disable_irqs == 0)) mlx5_eq_int(dev, eq); /* MSI-X vectors always belong to us */ return IRQ_HANDLED; } static void init_eq_buf(struct mlx5_eq *eq) { struct mlx5_eqe *eqe; int i; for (i = 0; i < eq->nent; i++) { eqe = get_eqe(eq, i); eqe->owner = MLX5_EQE_OWNER_INIT_VAL; } } int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, struct mlx5_uar *uar) + int nent, u64 mask) { u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; __be64 *pas; void *eqc; int inlen; u32 *in; int err; eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); eq->cons_index = 0; err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, 2 * PAGE_SIZE, &eq->buf); if (err) return err; init_eq_buf(eq); inlen = MLX5_ST_SZ_BYTES(create_eq_in) + MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages; in = mlx5_vzalloc(inlen); if (!in) { err = -ENOMEM; goto err_buf; } pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas); mlx5_fill_page_array(&eq->buf, pas); MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); MLX5_SET64(create_eq_in, in, event_bitmask, mask); eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); - MLX5_SET(eqc, eqc, uar_page, uar->index); + MLX5_SET(eqc, eqc, uar_page, priv->uar->index); MLX5_SET(eqc, eqc, intr, vecidx); MLX5_SET(eqc, eqc, log_page_size, eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (err) goto err_in; eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = vecidx; eq->dev = dev; - eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET; + eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; err = request_irq(priv->msix_arr[vecidx].vector, mlx5_msix_handler, 0, "mlx5_core", eq); if (err) goto err_eq; #ifdef RSS if (vecidx >= MLX5_EQ_VEC_COMP_BASE) { u8 bucket = vecidx - MLX5_EQ_VEC_COMP_BASE; err = bind_irq_to_cpu(priv->msix_arr[vecidx].vector, rss_getcpu(bucket % rss_getnumbuckets())); if (err) goto err_irq; } #else if (0) goto err_irq; #endif /* EQs are created in ARMED state */ eq_update_ci(eq, 1); kvfree(in); return 0; err_irq: free_irq(priv->msix_arr[vecidx].vector, eq); err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); err_in: kvfree(in); err_buf: mlx5_buf_free(dev, &eq->buf); return err; } EXPORT_SYMBOL_GPL(mlx5_create_map_eq); int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { int err; free_irq(dev->priv.msix_arr[eq->irqn].vector, eq); err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); mlx5_buf_free(dev, &eq->buf); return err; } EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq); int mlx5_eq_init(struct mlx5_core_dev *dev) { int err; spin_lock_init(&dev->priv.eq_table.lock); err = 0; return err; } void mlx5_eq_cleanup(struct mlx5_core_dev *dev) { } int mlx5_start_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = &dev->priv.eq_table; u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; int err; if (MLX5_CAP_GEN(dev, port_module_event)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_CODING_PORT_MODULE_EVENT); if (MLX5_CAP_GEN(dev, nic_vport_change_event)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); if (MLX5_CAP_GEN(dev, dcbx)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_CODING_DCBX_CHANGE_EVENT); if (MLX5_CAP_GEN(dev, fpga)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR) | (1ull << MLX5_EVENT_TYPE_FPGA_QP_ERROR); if (MLX5_CAP_GEN(dev, temp_warn_event)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT); if (MLX5_CAP_GEN(dev, general_notification_event)) { async_event_mask |= (1ull << MLX5_EVENT_TYPE_CODING_GENERAL_NOTIFICATION_EVENT); } err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, - MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, - &dev->priv.uuari.uars[0]); + MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); return err; } mlx5_cmd_use_events(dev); err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC, - MLX5_NUM_ASYNC_EQE, async_event_mask, - &dev->priv.uuari.uars[0]); + MLX5_NUM_ASYNC_EQE, async_event_mask); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); goto err1; } err = mlx5_create_map_eq(dev, &table->pages_eq, MLX5_EQ_VEC_PAGES, /* TODO: sriov max_vf + */ 1, - 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, - &dev->priv.uuari.uars[0]); + 1 << MLX5_EVENT_TYPE_PAGE_REQUEST); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); goto err2; } return err; err2: mlx5_destroy_unmap_eq(dev, &table->async_eq); err1: mlx5_cmd_use_polling(dev); mlx5_destroy_unmap_eq(dev, &table->cmd_eq); return err; } int mlx5_stop_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = &dev->priv.eq_table; int err; err = mlx5_destroy_unmap_eq(dev, &table->pages_eq); if (err) return err; mlx5_destroy_unmap_eq(dev, &table->async_eq); mlx5_cmd_use_polling(dev); err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq); if (err) mlx5_cmd_use_events(dev); return err; } int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u32 *out, int outlen) { u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0}; memset(out, 0, outlen); MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ); MLX5_SET(query_eq_in, in, eq_number, eq->eqn); return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } EXPORT_SYMBOL_GPL(mlx5_core_eq_query); static const char *mlx5_port_module_event_error_type_to_string(u8 error_type) { switch (error_type) { case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED: return "Power budget exceeded"; case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE: return "Long Range for non MLNX cable"; case MLX5_MODULE_EVENT_ERROR_BUS_STUCK: return "Bus stuck(I2C or data shorted)"; case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT: return "No EEPROM/retry timeout"; case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST: return "Enforce part number list"; case MLX5_MODULE_EVENT_ERROR_UNSUPPORTED_CABLE: return "Unknown identifier"; case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE: return "High Temperature"; case MLX5_MODULE_EVENT_ERROR_CABLE_IS_SHORTED: return "Bad or shorted cable/module"; case MLX5_MODULE_EVENT_ERROR_PMD_TYPE_NOT_ENABLED: return "PMD type is not enabled"; case MLX5_MODULE_EVENT_ERROR_LASTER_TEC_FAILURE: return "Laster_TEC_failure"; case MLX5_MODULE_EVENT_ERROR_HIGH_CURRENT: return "High_current"; case MLX5_MODULE_EVENT_ERROR_HIGH_VOLTAGE: return "High_voltage"; case MLX5_MODULE_EVENT_ERROR_PCIE_SYS_POWER_SLOT_EXCEEDED: return "pcie_system_power_slot_Exceeded"; case MLX5_MODULE_EVENT_ERROR_HIGH_POWER: return "High_power"; case MLX5_MODULE_EVENT_ERROR_MODULE_STATE_MACHINE_FAULT: return "Module_state_machine_fault"; default: return "Unknown error type"; } } unsigned int mlx5_query_module_status(struct mlx5_core_dev *dev, int module_num) { if (module_num < 0 || module_num >= MLX5_MAX_PORTS) return 0; /* undefined */ return dev->module_status[module_num]; } static void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) { unsigned int module_num; unsigned int module_status; unsigned int error_type; struct mlx5_eqe_port_module_event *module_event_eqe; module_event_eqe = &eqe->data.port_module_event; module_num = (unsigned int)module_event_eqe->module; module_status = (unsigned int)module_event_eqe->module_status & PORT_MODULE_EVENT_MODULE_STATUS_MASK; error_type = (unsigned int)module_event_eqe->error_type & PORT_MODULE_EVENT_ERROR_TYPE_MASK; if (module_status < MLX5_MODULE_STATUS_NUM) dev->priv.pme_stats.status_counters[module_status]++; switch (module_status) { case MLX5_MODULE_STATUS_PLUGGED_ENABLED: mlx5_core_info(dev, "Module %u, status: plugged and enabled\n", module_num); break; case MLX5_MODULE_STATUS_UNPLUGGED: mlx5_core_info(dev, "Module %u, status: unplugged\n", module_num); break; case MLX5_MODULE_STATUS_ERROR: mlx5_core_err(dev, "Module %u, status: error, %s (%d)\n", module_num, mlx5_port_module_event_error_type_to_string(error_type), error_type); if (error_type < MLX5_MODULE_EVENT_ERROR_NUM) dev->priv.pme_stats.error_counters[error_type]++; break; default: mlx5_core_info(dev, "Module %u, unknown status %d\n", module_num, module_status); } /* store module status */ if (module_num < MLX5_MAX_PORTS) dev->module_status[module_num] = module_status; } static void mlx5_port_general_notification_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) { u8 port = (eqe->data.port.port >> 4) & 0xf; u32 rqn; struct mlx5_eqe_general_notification_event *general_event; switch (eqe->sub_type) { case MLX5_GEN_EVENT_SUBTYPE_DELAY_DROP_TIMEOUT: general_event = &eqe->data.general_notifications; rqn = be32_to_cpu(general_event->rq_user_index_delay_drop) & 0xffffff; break; case MLX5_GEN_EVENT_SUBTYPE_PCI_POWER_CHANGE_EVENT: mlx5_trigger_health_watchdog(dev); break; default: mlx5_core_warn(dev, "general event with unrecognized subtype: port %d, sub_type %d\n", port, eqe->sub_type); break; } } void mlx5_disable_interrupts(struct mlx5_core_dev *dev) { int nvec = dev->priv.eq_table.num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; int x; for (x = 0; x != nvec; x++) disable_irq(dev->priv.msix_arr[x].vector); } void mlx5_poll_interrupts(struct mlx5_core_dev *dev) { struct mlx5_eq *eq; if (unlikely(dev->priv.disable_irqs != 0)) return; mlx5_eq_int(dev, &dev->priv.eq_table.cmd_eq); mlx5_eq_int(dev, &dev->priv.eq_table.async_eq); mlx5_eq_int(dev, &dev->priv.eq_table.pages_eq); list_for_each_entry(eq, &dev->priv.eq_table.comp_eqs_list, list) mlx5_eq_int(dev, eq); } diff --git a/sys/dev/mlx5/mlx5_core/mlx5_main.c b/sys/dev/mlx5/mlx5_core/mlx5_main.c index 843227f928fa..27326d692261 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_main.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_main.c @@ -1,2071 +1,2062 @@ /*- - * Copyright (c) 2013-2019, Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2013-2020, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mlx5_core.h" #include "eswitch.h" #include "fs_core.h" #ifdef PCI_IOV #include #include #include #endif static const char mlx5_version[] = "Mellanox Core driver " DRIVER_VERSION " (" DRIVER_RELDATE ")"; MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DEPEND(mlx5, linuxkpi, 1, 1, 1); MODULE_DEPEND(mlx5, mlxfw, 1, 1, 1); MODULE_DEPEND(mlx5, firmware, 1, 1, 1); MODULE_VERSION(mlx5, 1); SYSCTL_NODE(_hw, OID_AUTO, mlx5, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "mlx5 hardware controls"); int mlx5_core_debug_mask; SYSCTL_INT(_hw_mlx5, OID_AUTO, debug_mask, CTLFLAG_RWTUN, &mlx5_core_debug_mask, 0, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0"); #define MLX5_DEFAULT_PROF 2 static int mlx5_prof_sel = MLX5_DEFAULT_PROF; SYSCTL_INT(_hw_mlx5, OID_AUTO, prof_sel, CTLFLAG_RWTUN, &mlx5_prof_sel, 0, "profile selector. Valid range 0 - 2"); static int mlx5_fast_unload_enabled = 1; SYSCTL_INT(_hw_mlx5, OID_AUTO, fast_unload_enabled, CTLFLAG_RWTUN, &mlx5_fast_unload_enabled, 0, "Set to enable fast unload. Clear to disable."); #define NUMA_NO_NODE -1 static LIST_HEAD(intf_list); static LIST_HEAD(dev_list); static DEFINE_MUTEX(intf_mutex); struct mlx5_device_context { struct list_head list; struct mlx5_interface *intf; void *context; }; enum { MLX5_ATOMIC_REQ_MODE_BE = 0x0, MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1, }; static struct mlx5_profile profiles[] = { [0] = { .mask = 0, }, [1] = { .mask = MLX5_PROF_MASK_QP_SIZE, .log_max_qp = 12, }, [2] = { .mask = MLX5_PROF_MASK_QP_SIZE | MLX5_PROF_MASK_MR_CACHE, .log_max_qp = 17, .mr_cache[0] = { .size = 500, .limit = 250 }, .mr_cache[1] = { .size = 500, .limit = 250 }, .mr_cache[2] = { .size = 500, .limit = 250 }, .mr_cache[3] = { .size = 500, .limit = 250 }, .mr_cache[4] = { .size = 500, .limit = 250 }, .mr_cache[5] = { .size = 500, .limit = 250 }, .mr_cache[6] = { .size = 500, .limit = 250 }, .mr_cache[7] = { .size = 500, .limit = 250 }, .mr_cache[8] = { .size = 500, .limit = 250 }, .mr_cache[9] = { .size = 500, .limit = 250 }, .mr_cache[10] = { .size = 500, .limit = 250 }, .mr_cache[11] = { .size = 500, .limit = 250 }, .mr_cache[12] = { .size = 64, .limit = 32 }, .mr_cache[13] = { .size = 32, .limit = 16 }, .mr_cache[14] = { .size = 16, .limit = 8 }, }, [3] = { .mask = MLX5_PROF_MASK_QP_SIZE, .log_max_qp = 17, }, }; #ifdef PCI_IOV static const char iov_mac_addr_name[] = "mac-addr"; static const char iov_node_guid_name[] = "node-guid"; static const char iov_port_guid_name[] = "port-guid"; #endif static int set_dma_caps(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); int err; err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) { mlx5_core_warn(dev, "couldn't set 64-bit PCI DMA mask\n"); err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { mlx5_core_err(dev, "Can't set PCI DMA mask, aborting\n"); return err; } } err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) { mlx5_core_warn(dev, "couldn't set 64-bit consistent PCI DMA mask\n"); err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { mlx5_core_err(dev, "Can't set consistent PCI DMA mask, aborting\n"); return err; } } dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024); return err; } int mlx5_pci_read_power_status(struct mlx5_core_dev *dev, u16 *p_power, u8 *p_status) { u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {}; u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {}; int err; err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_ACCESS_REG_SUMMARY_CTRL_ID_MPEIN, 0, 0); *p_status = MLX5_GET(mpein_reg, out, pwr_status); *p_power = MLX5_GET(mpein_reg, out, pci_power); return err; } static int mlx5_pci_enable_device(struct mlx5_core_dev *dev) { struct pci_dev *pdev = dev->pdev; int err = 0; mutex_lock(&dev->pci_status_mutex); if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) { err = pci_enable_device(pdev); if (!err) dev->pci_status = MLX5_PCI_STATUS_ENABLED; } mutex_unlock(&dev->pci_status_mutex); return err; } static void mlx5_pci_disable_device(struct mlx5_core_dev *dev) { struct pci_dev *pdev = dev->pdev; mutex_lock(&dev->pci_status_mutex); if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) { pci_disable_device(pdev); dev->pci_status = MLX5_PCI_STATUS_DISABLED; } mutex_unlock(&dev->pci_status_mutex); } static int request_bar(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); int err = 0; if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { mlx5_core_err(dev, "Missing registers BAR, aborting\n"); return -ENODEV; } err = pci_request_regions(pdev, DRIVER_NAME); if (err) mlx5_core_err(dev, "Couldn't get PCI resources, aborting\n"); return err; } static void release_bar(struct pci_dev *pdev) { pci_release_regions(pdev); } static int mlx5_enable_msix(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; struct mlx5_eq_table *table = &priv->eq_table; int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq); int limit = dev->msix_eqvec; int nvec = MLX5_EQ_VEC_COMP_BASE; int i; if (limit > 0) nvec += limit; else nvec += MLX5_CAP_GEN(dev, num_ports) * num_online_cpus(); if (nvec > num_eqs) nvec = num_eqs; if (nvec > 256) nvec = 256; /* limit of firmware API */ if (nvec <= MLX5_EQ_VEC_COMP_BASE) return -ENOMEM; priv->msix_arr = kzalloc(nvec * sizeof(*priv->msix_arr), GFP_KERNEL); for (i = 0; i < nvec; i++) priv->msix_arr[i].entry = i; nvec = pci_enable_msix_range(dev->pdev, priv->msix_arr, MLX5_EQ_VEC_COMP_BASE + 1, nvec); if (nvec < 0) return nvec; table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; return 0; } static void mlx5_disable_msix(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; pci_disable_msix(dev->pdev); kfree(priv->msix_arr); } struct mlx5_reg_host_endianess { u8 he; u8 rsvd[15]; }; #define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos)) enum { MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) | MLX5_DEV_CAP_FLAG_DCT | MLX5_DEV_CAP_FLAG_DRAIN_SIGERR, }; static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size) { switch (size) { case 128: return 0; case 256: return 1; case 512: return 2; case 1024: return 3; case 2048: return 4; case 4096: return 5; default: mlx5_core_warn(dev, "invalid pkey table size %d\n", size); return 0; } } static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, enum mlx5_cap_mode cap_mode) { u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)]; int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); void *out, *hca_caps; u16 opmod = (cap_type << 1) | (cap_mode & 0x01); int err; memset(in, 0, sizeof(in)); out = kzalloc(out_sz, GFP_KERNEL); MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); MLX5_SET(query_hca_cap_in, in, op_mod, opmod); err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); if (err) { mlx5_core_warn(dev, "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n", cap_type, cap_mode, err); goto query_ex; } hca_caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability); switch (cap_mode) { case HCA_CAP_OPMOD_GET_MAX: memcpy(dev->hca_caps_max[cap_type], hca_caps, MLX5_UN_SZ_BYTES(hca_cap_union)); break; case HCA_CAP_OPMOD_GET_CUR: memcpy(dev->hca_caps_cur[cap_type], hca_caps, MLX5_UN_SZ_BYTES(hca_cap_union)); break; default: mlx5_core_warn(dev, "Tried to query dev cap type(%x) with wrong opmode(%x)\n", cap_type, cap_mode); err = -EINVAL; break; } query_ex: kfree(out); return err; } int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type) { int ret; ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR); if (ret) return ret; return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX); } static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz) { u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0}; MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP); return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); } static int handle_hca_cap(struct mlx5_core_dev *dev) { void *set_ctx = NULL; struct mlx5_profile *prof = dev->profile; int err = -ENOMEM; int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); void *set_hca_cap; set_ctx = kzalloc(set_sz, GFP_KERNEL); err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); if (err) goto query_ex; set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); memcpy(set_hca_cap, dev->hca_caps_cur[MLX5_CAP_GENERAL], MLX5_ST_SZ_BYTES(cmd_hca_cap)); mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n", mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)), 128); /* we limit the size of the pkey table to 128 entries for now */ MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, to_fw_pkey_sz(dev, 128)); if (prof->mask & MLX5_PROF_MASK_QP_SIZE) MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, prof->log_max_qp); /* disable cmdif checksum */ MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0); + /* Enable 4K UAR only when HCA supports it and page size is bigger + * than 4K. + */ + if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096) + MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1); + /* enable drain sigerr */ MLX5_SET(cmd_hca_cap, set_hca_cap, drain_sigerr, 1); MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12); err = set_caps(dev, set_ctx, set_sz); query_ex: kfree(set_ctx); return err; } static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) { void *set_ctx; void *set_hca_cap; int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); int req_endianness; int err; if (MLX5_CAP_GEN(dev, atomic)) { err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC); if (err) return err; } else { return 0; } req_endianness = MLX5_CAP_ATOMIC(dev, supported_atomic_req_8B_endianess_mode_1); if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS) return 0; set_ctx = kzalloc(set_sz, GFP_KERNEL); if (!set_ctx) return -ENOMEM; MLX5_SET(set_hca_cap_in, set_ctx, op_mod, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC << 1); set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); /* Set requestor to host endianness */ MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode, MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS); err = set_caps(dev, set_ctx, set_sz); kfree(set_ctx); return err; } static int set_hca_ctrl(struct mlx5_core_dev *dev) { struct mlx5_reg_host_endianess he_in; struct mlx5_reg_host_endianess he_out; int err; if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH && !MLX5_CAP_GEN(dev, roce)) return 0; memset(&he_in, 0, sizeof(he_in)); he_in.he = MLX5_SET_HOST_ENDIANNESS; err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in), &he_out, sizeof(he_out), MLX5_REG_HOST_ENDIANNESS, 0, 1); return err; } static int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) { u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0}; u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {0}; MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); MLX5_SET(enable_hca_in, in, function_id, func_id); return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); } static int mlx5_core_disable_hca(struct mlx5_core_dev *dev) { u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {0}; u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {0}; MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } static int mlx5_core_set_issi(struct mlx5_core_dev *dev) { u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {0}; u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {0}; u32 sup_issi; int err; MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI); err = mlx5_cmd_exec(dev, query_in, sizeof(query_in), query_out, sizeof(query_out)); if (err) { u32 syndrome; u8 status; mlx5_cmd_mbox_status(query_out, &status, &syndrome); if (status == MLX5_CMD_STAT_BAD_OP_ERR) { mlx5_core_dbg(dev, "Only ISSI 0 is supported\n"); return 0; } mlx5_core_err(dev, "failed to query ISSI\n"); return err; } sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0); if (sup_issi & (1 << 1)) { u32 set_in[MLX5_ST_SZ_DW(set_issi_in)] = {0}; u32 set_out[MLX5_ST_SZ_DW(set_issi_out)] = {0}; MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI); MLX5_SET(set_issi_in, set_in, current_issi, 1); err = mlx5_cmd_exec(dev, set_in, sizeof(set_in), set_out, sizeof(set_out)); if (err) { mlx5_core_err(dev, "failed to set ISSI=1 err(%d)\n", err); return err; } dev->issi = 1; return 0; } else if (sup_issi & (1 << 0)) { return 0; } return -ENOTSUPP; } int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn) { struct mlx5_eq_table *table = &dev->priv.eq_table; struct mlx5_eq *eq; int err = -ENOENT; spin_lock(&table->lock); list_for_each_entry(eq, &table->comp_eqs_list, list) { if (eq->index == vector) { *eqn = eq->eqn; *irqn = eq->irqn; err = 0; break; } } spin_unlock(&table->lock); return err; } EXPORT_SYMBOL(mlx5_vector2eqn); static void free_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = &dev->priv.eq_table; struct mlx5_eq *eq, *n; spin_lock(&table->lock); list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); spin_unlock(&table->lock); if (mlx5_destroy_unmap_eq(dev, eq)) mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn); kfree(eq); spin_lock(&table->lock); } spin_unlock(&table->lock); } static int alloc_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = &dev->priv.eq_table; struct mlx5_eq *eq; int ncomp_vec; int nent; int err; int i; INIT_LIST_HEAD(&table->comp_eqs_list); ncomp_vec = table->num_comp_vectors; nent = MLX5_COMP_EQ_SIZE; for (i = 0; i < ncomp_vec; i++) { eq = kzalloc(sizeof(*eq), GFP_KERNEL); err = mlx5_create_map_eq(dev, eq, - i + MLX5_EQ_VEC_COMP_BASE, nent, 0, - &dev->priv.uuari.uars[0]); + i + MLX5_EQ_VEC_COMP_BASE, nent, 0); if (err) { kfree(eq); goto clean; } mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn); eq->index = i; spin_lock(&table->lock); list_add_tail(&eq->list, &table->comp_eqs_list); spin_unlock(&table->lock); } return 0; clean: free_comp_eqs(dev); return err; } -static int map_bf_area(struct mlx5_core_dev *dev) -{ - resource_size_t bf_start = pci_resource_start(dev->pdev, 0); - resource_size_t bf_len = pci_resource_len(dev->pdev, 0); - - dev->priv.bf_mapping = io_mapping_create_wc(bf_start, bf_len); - - return dev->priv.bf_mapping ? 0 : -ENOMEM; -} - -static void unmap_bf_area(struct mlx5_core_dev *dev) -{ - if (dev->priv.bf_mapping) - io_mapping_free(dev->priv.bf_mapping); -} - static inline int fw_initializing(struct mlx5_core_dev *dev) { return ioread32be(&dev->iseg->initializing) >> 31; } static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, u32 warn_time_mili) { int warn = jiffies + msecs_to_jiffies(warn_time_mili); int end = jiffies + msecs_to_jiffies(max_wait_mili); int err = 0; MPASS(max_wait_mili > warn_time_mili); while (fw_initializing(dev) == 1) { if (time_after(jiffies, end)) { err = -EBUSY; break; } if (warn_time_mili && time_after(jiffies, warn)) { mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %u s\n", (unsigned)(jiffies_to_msecs(end - warn) / 1000)); warn = jiffies + msecs_to_jiffies(warn_time_mili); } msleep(FW_INIT_WAIT_MS); } if (err != 0) mlx5_core_dbg(dev, "Full initializing bit dword = 0x%x\n", ioread32be(&dev->iseg->initializing)); return err; } static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) { struct mlx5_device_context *dev_ctx; struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL); if (!dev_ctx) return; dev_ctx->intf = intf; CURVNET_SET_QUIET(vnet0); dev_ctx->context = intf->add(dev); CURVNET_RESTORE(); if (dev_ctx->context) { spin_lock_irq(&priv->ctx_lock); list_add_tail(&dev_ctx->list, &priv->ctx_list); spin_unlock_irq(&priv->ctx_lock); } else { kfree(dev_ctx); } } static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) { struct mlx5_device_context *dev_ctx; struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); list_for_each_entry(dev_ctx, &priv->ctx_list, list) if (dev_ctx->intf == intf) { spin_lock_irq(&priv->ctx_lock); list_del(&dev_ctx->list); spin_unlock_irq(&priv->ctx_lock); intf->remove(dev, dev_ctx->context); kfree(dev_ctx); return; } } int mlx5_register_device(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; struct mlx5_interface *intf; mutex_lock(&intf_mutex); list_add_tail(&priv->dev_list, &dev_list); list_for_each_entry(intf, &intf_list, list) mlx5_add_device(intf, priv); mutex_unlock(&intf_mutex); return 0; } void mlx5_unregister_device(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; struct mlx5_interface *intf; mutex_lock(&intf_mutex); list_for_each_entry(intf, &intf_list, list) mlx5_remove_device(intf, priv); list_del(&priv->dev_list); mutex_unlock(&intf_mutex); } int mlx5_register_interface(struct mlx5_interface *intf) { struct mlx5_priv *priv; if (!intf->add || !intf->remove) return -EINVAL; mutex_lock(&intf_mutex); list_add_tail(&intf->list, &intf_list); list_for_each_entry(priv, &dev_list, dev_list) mlx5_add_device(intf, priv); mutex_unlock(&intf_mutex); return 0; } EXPORT_SYMBOL(mlx5_register_interface); void mlx5_unregister_interface(struct mlx5_interface *intf) { struct mlx5_priv *priv; mutex_lock(&intf_mutex); list_for_each_entry(priv, &dev_list, dev_list) mlx5_remove_device(intf, priv); list_del(&intf->list); mutex_unlock(&intf_mutex); } EXPORT_SYMBOL(mlx5_unregister_interface); void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) { struct mlx5_priv *priv = &mdev->priv; struct mlx5_device_context *dev_ctx; unsigned long flags; void *result = NULL; spin_lock_irqsave(&priv->ctx_lock, flags); list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list) if ((dev_ctx->intf->protocol == protocol) && dev_ctx->intf->get_dev) { result = dev_ctx->intf->get_dev(dev_ctx->context); break; } spin_unlock_irqrestore(&priv->ctx_lock, flags); return result; } EXPORT_SYMBOL(mlx5_get_protocol_dev); static int mlx5_auto_fw_update; SYSCTL_INT(_hw_mlx5, OID_AUTO, auto_fw_update, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &mlx5_auto_fw_update, 0, "Allow automatic firmware update on driver start"); static int mlx5_firmware_update(struct mlx5_core_dev *dev) { const struct firmware *fw; int err; TUNABLE_INT_FETCH("hw.mlx5.auto_fw_update", &mlx5_auto_fw_update); if (!mlx5_auto_fw_update) return (0); fw = firmware_get("mlx5fw_mfa"); if (fw) { err = mlx5_firmware_flash(dev, fw); firmware_put(fw, FIRMWARE_UNLOAD); } else return (-ENOENT); return err; } static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { struct pci_dev *pdev = dev->pdev; device_t bsddev; int err; pdev = dev->pdev; bsddev = pdev->dev.bsddev; pci_set_drvdata(dev->pdev, dev); strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN); priv->name[MLX5_MAX_NAME_LEN - 1] = 0; mutex_init(&priv->pgdir_mutex); INIT_LIST_HEAD(&priv->pgdir_list); spin_lock_init(&priv->mkey_lock); priv->numa_node = NUMA_NO_NODE; err = mlx5_pci_enable_device(dev); if (err) { mlx5_core_err(dev, "Cannot enable PCI device, aborting\n"); goto err_dbg; } err = request_bar(pdev); if (err) { mlx5_core_err(dev, "error requesting BARs, aborting\n"); goto err_disable; } pci_set_master(pdev); err = set_dma_caps(pdev); if (err) { mlx5_core_err(dev, "Failed setting DMA capabilities mask, aborting\n"); goto err_clr_master; } dev->iseg_base = pci_resource_start(dev->pdev, 0); dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg)); if (!dev->iseg) { err = -ENOMEM; mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n"); goto err_clr_master; } return 0; err_clr_master: release_bar(dev->pdev); err_disable: mlx5_pci_disable_device(dev); err_dbg: return err; } static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { #ifdef PCI_IOV if (MLX5_CAP_GEN(dev, eswitch_flow_table)) pci_iov_detach(dev->pdev->dev.bsddev); #endif iounmap(dev->iseg); release_bar(dev->pdev); mlx5_pci_disable_device(dev); } static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { int err; err = mlx5_vsc_find_cap(dev); if (err) mlx5_core_warn(dev, "Unable to find vendor specific capabilities\n"); err = mlx5_query_hca_caps(dev); if (err) { mlx5_core_err(dev, "query hca failed\n"); goto out; } err = mlx5_query_board_id(dev); if (err) { mlx5_core_err(dev, "query board id failed\n"); goto out; } err = mlx5_eq_init(dev); if (err) { mlx5_core_err(dev, "failed to initialize eq\n"); goto out; } MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); err = mlx5_init_cq_table(dev); if (err) { mlx5_core_err(dev, "failed to initialize cq table\n"); goto err_eq_cleanup; } mlx5_init_qp_table(dev); mlx5_init_srq_table(dev); mlx5_init_mr_table(dev); mlx5_init_reserved_gids(dev); mlx5_fpga_init(dev); #ifdef RATELIMIT err = mlx5_init_rl_table(dev); if (err) { mlx5_core_err(dev, "Failed to init rate limiting\n"); goto err_tables_cleanup; } #endif return 0; #ifdef RATELIMIT err_tables_cleanup: mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cleanup_cq_table(dev); #endif err_eq_cleanup: mlx5_eq_cleanup(dev); out: return err; } static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { #ifdef RATELIMIT mlx5_cleanup_rl_table(dev); #endif mlx5_fpga_cleanup(dev); mlx5_cleanup_reserved_gids(dev); mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cleanup_cq_table(dev); mlx5_eq_cleanup(dev); } static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, bool boot) { int err; mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { mlx5_core_warn(dev, "interface is up, NOP\n"); goto out; } mlx5_core_dbg(dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev)); /* * On load removing any previous indication of internal error, * device is up */ dev->state = MLX5_DEVICE_STATE_UP; /* wait for firmware to accept initialization segments configurations */ err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL); if (err) { dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n", FW_PRE_INIT_TIMEOUT_MILI); goto out_err; } err = mlx5_cmd_init(dev); if (err) { mlx5_core_err(dev, "Failed initializing command interface, aborting\n"); goto out_err; } err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0); if (err) { mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n", FW_INIT_TIMEOUT_MILI); goto err_cmd_cleanup; } err = mlx5_core_enable_hca(dev, 0); if (err) { mlx5_core_err(dev, "enable hca failed\n"); goto err_cmd_cleanup; } err = mlx5_core_set_issi(dev); if (err) { mlx5_core_err(dev, "failed to set issi\n"); goto err_disable_hca; } err = mlx5_pagealloc_start(dev); if (err) { mlx5_core_err(dev, "mlx5_pagealloc_start failed\n"); goto err_disable_hca; } err = mlx5_satisfy_startup_pages(dev, 1); if (err) { mlx5_core_err(dev, "failed to allocate boot pages\n"); goto err_pagealloc_stop; } err = set_hca_ctrl(dev); if (err) { mlx5_core_err(dev, "set_hca_ctrl failed\n"); goto reclaim_boot_pages; } err = handle_hca_cap(dev); if (err) { mlx5_core_err(dev, "handle_hca_cap failed\n"); goto reclaim_boot_pages; } err = handle_hca_cap_atomic(dev); if (err) { mlx5_core_err(dev, "handle_hca_cap_atomic failed\n"); goto reclaim_boot_pages; } err = mlx5_satisfy_startup_pages(dev, 0); if (err) { mlx5_core_err(dev, "failed to allocate init pages\n"); goto reclaim_boot_pages; } err = mlx5_cmd_init_hca(dev); if (err) { mlx5_core_err(dev, "init hca failed\n"); goto reclaim_boot_pages; } mlx5_start_health_poll(dev); if (boot && mlx5_init_once(dev, priv)) { mlx5_core_err(dev, "sw objs init failed\n"); goto err_stop_poll; } - err = mlx5_enable_msix(dev); - if (err) { - mlx5_core_err(dev, "enable msix failed\n"); + dev->priv.uar = mlx5_get_uars_page(dev); + if (IS_ERR(dev->priv.uar)) { + mlx5_core_err(dev, "Failed allocating uar, aborting\n"); + err = PTR_ERR(dev->priv.uar); goto err_cleanup_once; } - err = mlx5_alloc_uuars(dev, &priv->uuari); + err = mlx5_enable_msix(dev); if (err) { - mlx5_core_err(dev, "Failed allocating uar, aborting\n"); - goto err_disable_msix; + mlx5_core_err(dev, "enable msix failed\n"); + goto err_cleanup_uar; } err = mlx5_start_eqs(dev); if (err) { mlx5_core_err(dev, "Failed to start pages and async EQs\n"); - goto err_free_uar; + goto err_disable_msix; } err = alloc_comp_eqs(dev); if (err) { mlx5_core_err(dev, "Failed to alloc completion EQs\n"); goto err_stop_eqs; } - if (map_bf_area(dev)) - mlx5_core_err(dev, "Failed to map blue flame area\n"); - err = mlx5_init_fs(dev); if (err) { mlx5_core_err(dev, "flow steering init %d\n", err); goto err_free_comp_eqs; } err = mlx5_mpfs_init(dev); if (err) { mlx5_core_err(dev, "mpfs init failed %d\n", err); goto err_fs; } err = mlx5_fpga_device_start(dev); if (err) { mlx5_core_err(dev, "fpga device start failed %d\n", err); goto err_mpfs; } err = mlx5_register_device(dev); if (err) { mlx5_core_err(dev, "mlx5_register_device failed %d\n", err); goto err_fpga; } set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); out: mutex_unlock(&dev->intf_state_mutex); return 0; err_fpga: mlx5_fpga_device_stop(dev); err_mpfs: mlx5_mpfs_destroy(dev); err_fs: mlx5_cleanup_fs(dev); err_free_comp_eqs: free_comp_eqs(dev); - unmap_bf_area(dev); err_stop_eqs: mlx5_stop_eqs(dev); -err_free_uar: - mlx5_free_uuars(dev, &priv->uuari); - err_disable_msix: mlx5_disable_msix(dev); +err_cleanup_uar: + mlx5_put_uars_page(dev, dev->priv.uar); + err_cleanup_once: if (boot) mlx5_cleanup_once(dev); err_stop_poll: mlx5_stop_health_poll(dev, boot); if (mlx5_cmd_teardown_hca(dev)) { mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n"); goto out_err; } reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); err_pagealloc_stop: mlx5_pagealloc_stop(dev); err_disable_hca: mlx5_core_disable_hca(dev); err_cmd_cleanup: mlx5_cmd_cleanup(dev); out_err: dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mutex_unlock(&dev->intf_state_mutex); return err; } static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, bool cleanup) { int err = 0; if (cleanup) mlx5_drain_health_recovery(dev); mutex_lock(&dev->intf_state_mutex); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { mlx5_core_warn(dev, "%s: interface is down, NOP\n", __func__); if (cleanup) mlx5_cleanup_once(dev); goto out; } mlx5_unregister_device(dev); mlx5_eswitch_cleanup(dev->priv.eswitch); mlx5_fpga_device_stop(dev); mlx5_mpfs_destroy(dev); mlx5_cleanup_fs(dev); - unmap_bf_area(dev); mlx5_wait_for_reclaim_vfs_pages(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); - mlx5_free_uuars(dev, &priv->uuari); mlx5_disable_msix(dev); + mlx5_put_uars_page(dev, dev->priv.uar); if (cleanup) mlx5_cleanup_once(dev); mlx5_stop_health_poll(dev, cleanup); err = mlx5_cmd_teardown_hca(dev); if (err) { mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n"); goto out; } mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); mlx5_core_disable_hca(dev); mlx5_cmd_cleanup(dev); out: clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); mutex_unlock(&dev->intf_state_mutex); return err; } void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param) { struct mlx5_priv *priv = &dev->priv; struct mlx5_device_context *dev_ctx; unsigned long flags; spin_lock_irqsave(&priv->ctx_lock, flags); list_for_each_entry(dev_ctx, &priv->ctx_list, list) if (dev_ctx->intf->event) dev_ctx->intf->event(dev, dev_ctx->context, event, param); spin_unlock_irqrestore(&priv->ctx_lock, flags); } struct mlx5_core_event_handler { void (*event)(struct mlx5_core_dev *dev, enum mlx5_dev_event event, void *data); }; #define MLX5_STATS_DESC(a, b, c, d, e, ...) d, e, #define MLX5_PORT_MODULE_ERROR_STATS(m) \ m(+1, u64, power_budget_exceeded, "power_budget", "Module Power Budget Exceeded") \ m(+1, u64, long_range, "long_range", "Module Long Range for non MLNX cable/module") \ m(+1, u64, bus_stuck, "bus_stuck", "Module Bus stuck(I2C or data shorted)") \ m(+1, u64, no_eeprom, "no_eeprom", "No EEPROM/retry timeout") \ m(+1, u64, enforce_part_number, "enforce_part_number", "Module Enforce part number list") \ m(+1, u64, unknown_id, "unknown_id", "Module Unknown identifier") \ m(+1, u64, high_temp, "high_temp", "Module High Temperature") \ m(+1, u64, cable_shorted, "cable_shorted", "Module Cable is shorted") \ m(+1, u64, pmd_type_not_enabled, "pmd_type_not_enabled", "PMD type is not enabled") \ m(+1, u64, laster_tec_failure, "laster_tec_failure", "Laster TEC failure") \ m(+1, u64, high_current, "high_current", "High current") \ m(+1, u64, high_voltage, "high_voltage", "High voltage") \ m(+1, u64, pcie_sys_power_slot_exceeded, "pcie_sys_power_slot_exceeded", "PCIe system power slot Exceeded") \ m(+1, u64, high_power, "high_power", "High power") \ m(+1, u64, module_state_machine_fault, "module_state_machine_fault", "Module State Machine fault") static const char *mlx5_pme_err_desc[] = { MLX5_PORT_MODULE_ERROR_STATS(MLX5_STATS_DESC) }; static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) { struct mlx5_core_dev *dev; struct mlx5_priv *priv; device_t bsddev = pdev->dev.bsddev; #ifdef PCI_IOV nvlist_t *pf_schema, *vf_schema; int num_vfs, sriov_pos; #endif int i,err; struct sysctl_oid *pme_sysctl_node; struct sysctl_oid *pme_err_sysctl_node; struct sysctl_oid *cap_sysctl_node; struct sysctl_oid *current_cap_sysctl_node; struct sysctl_oid *max_cap_sysctl_node; dev = kzalloc(sizeof(*dev), GFP_KERNEL); priv = &dev->priv; if (id) priv->pci_dev_data = id->driver_data; if (mlx5_prof_sel < 0 || mlx5_prof_sel >= ARRAY_SIZE(profiles)) { device_printf(bsddev, "WARN: selected profile out of range, selecting default (%d)\n", MLX5_DEFAULT_PROF); mlx5_prof_sel = MLX5_DEFAULT_PROF; } dev->profile = &profiles[mlx5_prof_sel]; dev->pdev = pdev; dev->event = mlx5_core_event; /* Set desc */ device_set_desc(bsddev, mlx5_version); sysctl_ctx_init(&dev->sysctl_ctx); SYSCTL_ADD_INT(&dev->sysctl_ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)), OID_AUTO, "msix_eqvec", CTLFLAG_RDTUN, &dev->msix_eqvec, 0, "Maximum number of MSIX event queue vectors, if set"); SYSCTL_ADD_INT(&dev->sysctl_ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)), OID_AUTO, "power_status", CTLFLAG_RD, &dev->pwr_status, 0, "0:Invalid 1:Sufficient 2:Insufficient"); SYSCTL_ADD_INT(&dev->sysctl_ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)), OID_AUTO, "power_value", CTLFLAG_RD, &dev->pwr_value, 0, "Current power value in Watts"); pme_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)), OID_AUTO, "pme_stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Port module event statistics"); if (pme_sysctl_node == NULL) { err = -ENOMEM; goto clean_sysctl_ctx; } pme_err_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx, SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO, "errors", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Port module event error statistics"); if (pme_err_sysctl_node == NULL) { err = -ENOMEM; goto clean_sysctl_ctx; } SYSCTL_ADD_U64(&dev->sysctl_ctx, SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO, "module_plug", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_PLUGGED_ENABLED], 0, "Number of time module plugged"); SYSCTL_ADD_U64(&dev->sysctl_ctx, SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO, "module_unplug", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_UNPLUGGED], 0, "Number of time module unplugged"); for (i = 0 ; i < MLX5_MODULE_EVENT_ERROR_NUM; i++) { SYSCTL_ADD_U64(&dev->sysctl_ctx, SYSCTL_CHILDREN(pme_err_sysctl_node), OID_AUTO, mlx5_pme_err_desc[2 * i], CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->priv.pme_stats.error_counters[i], 0, mlx5_pme_err_desc[2 * i + 1]); } cap_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)), OID_AUTO, "caps", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "hardware capabilities raw bitstrings"); if (cap_sysctl_node == NULL) { err = -ENOMEM; goto clean_sysctl_ctx; } current_cap_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx, SYSCTL_CHILDREN(cap_sysctl_node), OID_AUTO, "current", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); if (current_cap_sysctl_node == NULL) { err = -ENOMEM; goto clean_sysctl_ctx; } max_cap_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx, SYSCTL_CHILDREN(cap_sysctl_node), OID_AUTO, "max", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); if (max_cap_sysctl_node == NULL) { err = -ENOMEM; goto clean_sysctl_ctx; } SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(current_cap_sysctl_node), OID_AUTO, "general", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_cur[MLX5_CAP_GENERAL], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(max_cap_sysctl_node), OID_AUTO, "general", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_max[MLX5_CAP_GENERAL], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(current_cap_sysctl_node), OID_AUTO, "ether", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_cur[MLX5_CAP_ETHERNET_OFFLOADS], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(max_cap_sysctl_node), OID_AUTO, "ether", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_max[MLX5_CAP_ETHERNET_OFFLOADS], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(current_cap_sysctl_node), OID_AUTO, "odp", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_cur[MLX5_CAP_ODP], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(max_cap_sysctl_node), OID_AUTO, "odp", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_max[MLX5_CAP_ODP], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(current_cap_sysctl_node), OID_AUTO, "atomic", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_cur[MLX5_CAP_ATOMIC], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(max_cap_sysctl_node), OID_AUTO, "atomic", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_max[MLX5_CAP_ATOMIC], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(current_cap_sysctl_node), OID_AUTO, "roce", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_cur[MLX5_CAP_ROCE], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(max_cap_sysctl_node), OID_AUTO, "roce", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_max[MLX5_CAP_ROCE], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(current_cap_sysctl_node), OID_AUTO, "ipoib", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_cur[MLX5_CAP_IPOIB_OFFLOADS], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(max_cap_sysctl_node), OID_AUTO, "ipoib", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_max[MLX5_CAP_IPOIB_OFFLOADS], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(current_cap_sysctl_node), OID_AUTO, "eoib", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_cur[MLX5_CAP_EOIB_OFFLOADS], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(max_cap_sysctl_node), OID_AUTO, "eoib", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_max[MLX5_CAP_EOIB_OFFLOADS], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(current_cap_sysctl_node), OID_AUTO, "flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(max_cap_sysctl_node), OID_AUTO, "flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_max[MLX5_CAP_FLOW_TABLE], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(current_cap_sysctl_node), OID_AUTO, "eswitch_flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(max_cap_sysctl_node), OID_AUTO, "eswitch_flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_max[MLX5_CAP_ESWITCH_FLOW_TABLE], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(current_cap_sysctl_node), OID_AUTO, "eswitch", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_cur[MLX5_CAP_ESWITCH], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(max_cap_sysctl_node), OID_AUTO, "eswitch", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_max[MLX5_CAP_ESWITCH], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(current_cap_sysctl_node), OID_AUTO, "snapshot", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_cur[MLX5_CAP_SNAPSHOT], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(max_cap_sysctl_node), OID_AUTO, "snapshot", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_max[MLX5_CAP_SNAPSHOT], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(current_cap_sysctl_node), OID_AUTO, "vector_calc", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_cur[MLX5_CAP_VECTOR_CALC], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(max_cap_sysctl_node), OID_AUTO, "vector_calc", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_max[MLX5_CAP_VECTOR_CALC], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(current_cap_sysctl_node), OID_AUTO, "qos", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_cur[MLX5_CAP_QOS], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(max_cap_sysctl_node), OID_AUTO, "qos", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_max[MLX5_CAP_QOS], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(current_cap_sysctl_node), OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_cur[MLX5_CAP_DEBUG], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(max_cap_sysctl_node), OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->hca_caps_max[MLX5_CAP_DEBUG], MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(cap_sysctl_node), OID_AUTO, "pcam", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->caps.pcam, sizeof(dev->caps.pcam), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(cap_sysctl_node), OID_AUTO, "mcam", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->caps.mcam, sizeof(dev->caps.mcam), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(cap_sysctl_node), OID_AUTO, "qcam", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->caps.qcam, sizeof(dev->caps.qcam), "IU", ""); SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx, SYSCTL_CHILDREN(cap_sysctl_node), OID_AUTO, "fpga", CTLFLAG_RD | CTLFLAG_MPSAFE, &dev->caps.fpga, sizeof(dev->caps.fpga), "IU", ""); INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); mutex_init(&dev->pci_status_mutex); mutex_init(&dev->intf_state_mutex); + + mutex_init(&priv->bfregs.reg_head.lock); + mutex_init(&priv->bfregs.wc_head.lock); + INIT_LIST_HEAD(&priv->bfregs.reg_head.list); + INIT_LIST_HEAD(&priv->bfregs.wc_head.list); + mtx_init(&dev->dump_lock, "mlx5dmp", NULL, MTX_DEF | MTX_NEW); err = mlx5_pci_init(dev, priv); if (err) { mlx5_core_err(dev, "mlx5_pci_init failed %d\n", err); goto clean_dev; } err = mlx5_health_init(dev); if (err) { mlx5_core_err(dev, "mlx5_health_init failed %d\n", err); goto close_pci; } mlx5_pagealloc_init(dev); err = mlx5_load_one(dev, priv, true); if (err) { mlx5_core_err(dev, "mlx5_load_one failed %d\n", err); goto clean_health; } mlx5_fwdump_prep(dev); mlx5_firmware_update(dev); #ifdef PCI_IOV if (MLX5_CAP_GEN(dev, vport_group_manager)) { if (pci_find_extcap(bsddev, PCIZ_SRIOV, &sriov_pos) == 0) { num_vfs = pci_read_config(bsddev, sriov_pos + PCIR_SRIOV_TOTAL_VFS, 2); } else { mlx5_core_info(dev, "cannot find SR-IOV PCIe cap\n"); num_vfs = 0; } err = mlx5_eswitch_init(dev, 1 + num_vfs); if (err == 0) { pf_schema = pci_iov_schema_alloc_node(); vf_schema = pci_iov_schema_alloc_node(); pci_iov_schema_add_unicast_mac(vf_schema, iov_mac_addr_name, 0, NULL); pci_iov_schema_add_uint64(vf_schema, iov_node_guid_name, 0, 0); pci_iov_schema_add_uint64(vf_schema, iov_port_guid_name, 0, 0); err = pci_iov_attach(bsddev, pf_schema, vf_schema); if (err != 0) { device_printf(bsddev, "Failed to initialize SR-IOV support, error %d\n", err); } } else { mlx5_core_err(dev, "eswitch init failed, error %d\n", err); } } #endif pci_save_state(bsddev); return 0; clean_health: mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); clean_dev: mtx_destroy(&dev->dump_lock); clean_sysctl_ctx: sysctl_ctx_free(&dev->sysctl_ctx); kfree(dev); return err; } static void remove_one(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_priv *priv = &dev->priv; #ifdef PCI_IOV pci_iov_detach(pdev->dev.bsddev); mlx5_eswitch_disable_sriov(priv->eswitch); #endif if (mlx5_unload_one(dev, priv, true)) { mlx5_core_err(dev, "mlx5_unload_one() failed, leaked %lld bytes\n", (long long)(dev->priv.fw_pages * MLX5_ADAPTER_PAGE_SIZE)); } mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); mlx5_fwdump_clean(dev); mlx5_pci_close(dev, priv); mtx_destroy(&dev->dump_lock); pci_set_drvdata(pdev, NULL); sysctl_ctx_free(&dev->sysctl_ctx); kfree(dev); } static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_priv *priv = &dev->priv; mlx5_core_info(dev, "%s was called\n", __func__); mlx5_enter_error_state(dev, false); mlx5_unload_one(dev, priv, false); if (state) { mlx5_drain_health_wq(dev); mlx5_pci_disable_device(dev); } return state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; } static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); int err = 0; mlx5_core_info(dev,"%s was called\n", __func__); err = mlx5_pci_enable_device(dev); if (err) { mlx5_core_err(dev, "mlx5_pci_enable_device failed with error code: %d\n" ,err); return PCI_ERS_RESULT_DISCONNECT; } pci_set_master(pdev); pci_set_powerstate(pdev->dev.bsddev, PCI_POWERSTATE_D0); pci_restore_state(pdev->dev.bsddev); pci_save_state(pdev->dev.bsddev); return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } /* wait for the device to show vital signs. For now we check * that we can read the device ID and that the health buffer * shows a non zero value which is different than 0xffffffff */ static void wait_vital(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_core_health *health = &dev->priv.health; const int niter = 100; u32 count; u16 did; int i; /* Wait for firmware to be ready after reset */ msleep(1000); for (i = 0; i < niter; i++) { if (pci_read_config_word(pdev, 2, &did)) { mlx5_core_warn(dev, "failed reading config word\n"); break; } if (did == pdev->device) { mlx5_core_info(dev, "device ID correctly read after %d iterations\n", i); break; } msleep(50); } if (i == niter) mlx5_core_warn(dev, "could not read device ID\n"); for (i = 0; i < niter; i++) { count = ioread32be(health->health_counter); if (count && count != 0xffffffff) { mlx5_core_info(dev, "Counter value 0x%x after %d iterations\n", count, i); break; } msleep(50); } if (i == niter) mlx5_core_warn(dev, "could not read device ID\n"); } static void mlx5_pci_resume(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_priv *priv = &dev->priv; int err; mlx5_core_info(dev,"%s was called\n", __func__); wait_vital(pdev); err = mlx5_load_one(dev, priv, false); if (err) mlx5_core_err(dev, "mlx5_load_one failed with error code: %d\n" ,err); else mlx5_core_info(dev,"device recovered\n"); } static const struct pci_error_handlers mlx5_err_handler = { .error_detected = mlx5_pci_err_detected, .slot_reset = mlx5_pci_slot_reset, .resume = mlx5_pci_resume }; #ifdef PCI_IOV static int mlx5_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *pf_config) { struct pci_dev *pdev; struct mlx5_core_dev *core_dev; struct mlx5_priv *priv; int err; pdev = device_get_softc(dev); core_dev = pci_get_drvdata(pdev); priv = &core_dev->priv; if (priv->eswitch == NULL) return (ENXIO); if (priv->eswitch->total_vports < num_vfs + 1) num_vfs = priv->eswitch->total_vports - 1; err = mlx5_eswitch_enable_sriov(priv->eswitch, num_vfs); return (-err); } static void mlx5_iov_uninit(device_t dev) { struct pci_dev *pdev; struct mlx5_core_dev *core_dev; struct mlx5_priv *priv; pdev = device_get_softc(dev); core_dev = pci_get_drvdata(pdev); priv = &core_dev->priv; mlx5_eswitch_disable_sriov(priv->eswitch); } static int mlx5_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *vf_config) { struct pci_dev *pdev; struct mlx5_core_dev *core_dev; struct mlx5_priv *priv; const void *mac; size_t mac_size; uint64_t node_guid, port_guid; int error; pdev = device_get_softc(dev); core_dev = pci_get_drvdata(pdev); priv = &core_dev->priv; if (vfnum + 1 >= priv->eswitch->total_vports) return (ENXIO); if (nvlist_exists_binary(vf_config, iov_mac_addr_name)) { mac = nvlist_get_binary(vf_config, iov_mac_addr_name, &mac_size); error = -mlx5_eswitch_set_vport_mac(priv->eswitch, vfnum + 1, __DECONST(u8 *, mac)); if (error != 0) { mlx5_core_err(core_dev, "setting MAC for VF %d failed, error %d\n", vfnum + 1, error); } } if (nvlist_exists_number(vf_config, iov_node_guid_name)) { node_guid = nvlist_get_number(vf_config, iov_node_guid_name); error = -mlx5_modify_nic_vport_node_guid(core_dev, vfnum + 1, node_guid); if (error != 0) { mlx5_core_err(core_dev, "modifying node GUID for VF %d failed, error %d\n", vfnum + 1, error); } } if (nvlist_exists_number(vf_config, iov_port_guid_name)) { port_guid = nvlist_get_number(vf_config, iov_port_guid_name); error = -mlx5_modify_nic_vport_port_guid(core_dev, vfnum + 1, port_guid); if (error != 0) { mlx5_core_err(core_dev, "modifying port GUID for VF %d failed, error %d\n", vfnum + 1, error); } } error = -mlx5_eswitch_set_vport_state(priv->eswitch, vfnum + 1, VPORT_STATE_FOLLOW); if (error != 0) { mlx5_core_err(core_dev, "upping vport for VF %d failed, error %d\n", vfnum + 1, error); } error = -mlx5_core_enable_hca(core_dev, vfnum + 1); if (error != 0) { mlx5_core_err(core_dev, "enabling VF %d failed, error %d\n", vfnum + 1, error); } return (error); } #endif static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) { bool fast_teardown, force_teardown; int err; if (!mlx5_fast_unload_enabled) { mlx5_core_dbg(dev, "fast unload is disabled by user\n"); return -EOPNOTSUPP; } fast_teardown = MLX5_CAP_GEN(dev, fast_teardown); force_teardown = MLX5_CAP_GEN(dev, force_teardown); mlx5_core_dbg(dev, "force teardown firmware support=%d\n", force_teardown); mlx5_core_dbg(dev, "fast teardown firmware support=%d\n", fast_teardown); if (!fast_teardown && !force_teardown) return -EOPNOTSUPP; if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { mlx5_core_dbg(dev, "Device in internal error state, giving up\n"); return -EAGAIN; } /* Panic tear down fw command will stop the PCI bus communication * with the HCA, so the health polll is no longer needed. */ mlx5_drain_health_wq(dev); mlx5_stop_health_poll(dev, false); err = mlx5_cmd_fast_teardown_hca(dev); if (!err) goto done; err = mlx5_cmd_force_teardown_hca(dev); if (!err) goto done; mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", err); mlx5_start_health_poll(dev); return err; done: mlx5_enter_error_state(dev, true); return 0; } static void mlx5_shutdown_disable_interrupts(struct mlx5_core_dev *mdev) { int nvec = mdev->priv.eq_table.num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; int x; mdev->priv.disable_irqs = 1; /* wait for all IRQ handlers to finish processing */ for (x = 0; x != nvec; x++) synchronize_irq(mdev->priv.msix_arr[x].vector); } static void shutdown_one(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_priv *priv = &dev->priv; int err; /* enter polling mode */ mlx5_cmd_use_polling(dev); set_bit(MLX5_INTERFACE_STATE_TEARDOWN, &dev->intf_state); /* disable all interrupts */ mlx5_shutdown_disable_interrupts(dev); err = mlx5_try_fast_unload(dev); if (err) mlx5_unload_one(dev, priv, false); mlx5_pci_disable_device(dev); } static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 4113) }, /* Connect-IB */ { PCI_VDEVICE(MELLANOX, 4114) }, /* Connect-IB VF */ { PCI_VDEVICE(MELLANOX, 4115) }, /* ConnectX-4 */ { PCI_VDEVICE(MELLANOX, 4116) }, /* ConnectX-4 VF */ { PCI_VDEVICE(MELLANOX, 4117) }, /* ConnectX-4LX */ { PCI_VDEVICE(MELLANOX, 4118) }, /* ConnectX-4LX VF */ { PCI_VDEVICE(MELLANOX, 4119) }, /* ConnectX-5, PCIe 3.0 */ { PCI_VDEVICE(MELLANOX, 4120) }, /* ConnectX-5 VF */ { PCI_VDEVICE(MELLANOX, 4121) }, /* ConnectX-5 Ex */ { PCI_VDEVICE(MELLANOX, 4122) }, /* ConnectX-5 Ex VF */ { PCI_VDEVICE(MELLANOX, 4123) }, /* ConnectX-6 */ { PCI_VDEVICE(MELLANOX, 4124) }, /* ConnectX-6 VF */ { PCI_VDEVICE(MELLANOX, 4125) }, /* ConnectX-6 Dx */ { PCI_VDEVICE(MELLANOX, 4126) }, /* ConnectX Family mlx5Gen Virtual Function */ { PCI_VDEVICE(MELLANOX, 4127) }, /* ConnectX-6 LX */ { PCI_VDEVICE(MELLANOX, 4128) }, { PCI_VDEVICE(MELLANOX, 4129) }, { PCI_VDEVICE(MELLANOX, 4130) }, { PCI_VDEVICE(MELLANOX, 4131) }, { PCI_VDEVICE(MELLANOX, 4132) }, { PCI_VDEVICE(MELLANOX, 4133) }, { PCI_VDEVICE(MELLANOX, 4134) }, { PCI_VDEVICE(MELLANOX, 4135) }, { PCI_VDEVICE(MELLANOX, 4136) }, { PCI_VDEVICE(MELLANOX, 4137) }, { PCI_VDEVICE(MELLANOX, 4138) }, { PCI_VDEVICE(MELLANOX, 4139) }, { PCI_VDEVICE(MELLANOX, 4140) }, { PCI_VDEVICE(MELLANOX, 4141) }, { PCI_VDEVICE(MELLANOX, 4142) }, { PCI_VDEVICE(MELLANOX, 4143) }, { PCI_VDEVICE(MELLANOX, 4144) }, { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ { PCI_VDEVICE(MELLANOX, 0xa2d3) }, /* BlueField integrated ConnectX-5 network controller VF */ { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */ { } }; MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table); void mlx5_disable_device(struct mlx5_core_dev *dev) { mlx5_pci_err_detected(dev->pdev, 0); } void mlx5_recover_device(struct mlx5_core_dev *dev) { mlx5_pci_disable_device(dev); if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED) mlx5_pci_resume(dev->pdev); } struct pci_driver mlx5_core_driver = { .name = DRIVER_NAME, .id_table = mlx5_core_pci_table, .shutdown = shutdown_one, .probe = init_one, .remove = remove_one, .err_handler = &mlx5_err_handler, #ifdef PCI_IOV .bsd_iov_init = mlx5_iov_init, .bsd_iov_uninit = mlx5_iov_uninit, .bsd_iov_add_vf = mlx5_iov_add_vf, #endif }; static int __init init(void) { int err; err = pci_register_driver(&mlx5_core_driver); if (err) goto err_debug; err = mlx5_ctl_init(); if (err) goto err_ctl; return 0; err_ctl: pci_unregister_driver(&mlx5_core_driver); err_debug: return err; } static void __exit cleanup(void) { mlx5_ctl_fini(); pci_unregister_driver(&mlx5_core_driver); } module_init_order(init, SI_ORDER_FIRST); module_exit_order(cleanup, SI_ORDER_FIRST); diff --git a/sys/dev/mlx5/mlx5_core/mlx5_uar.c b/sys/dev/mlx5/mlx5_core/mlx5_uar.c index 43a95d64bc88..1a662ade0aee 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_uar.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_uar.c @@ -1,203 +1,323 @@ /*- - * Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2013-2020, Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include "mlx5_core.h" int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn) { - u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {0}; u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {0}; int err; MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - if (err) - return err; - - *uarn = MLX5_GET(alloc_uar_out, out, uar); - - return 0; + if (!err) + *uarn = MLX5_GET(alloc_uar_out, out, uar); + return err; } EXPORT_SYMBOL(mlx5_cmd_alloc_uar); int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn) { - u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {0}; u32 out[MLX5_ST_SZ_DW(dealloc_uar_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {0}; MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR); MLX5_SET(dealloc_uar_in, in, uar, uarn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_cmd_free_uar); -static int need_uuar_lock(int uuarn) +static int uars_per_sys_page(struct mlx5_core_dev *mdev) { - int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE; - - if (uuarn == 0 || tot_uuars - NUM_LOW_LAT_UUARS) - return 0; + if (MLX5_CAP_GEN(mdev, uar_4k)) + return MLX5_CAP_GEN(mdev, num_of_uars_per_page); return 1; } -int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) +static u64 uar2pfn(struct mlx5_core_dev *mdev, u32 index) { - int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE; - struct mlx5_bf *bf; - phys_addr_t addr; - int err; - int i; + u32 system_page_index; - uuari->num_uars = NUM_DRIVER_UARS; - uuari->num_low_latency_uuars = NUM_LOW_LAT_UUARS; + if (MLX5_CAP_GEN(mdev, uar_4k)) + system_page_index = index >> (PAGE_SHIFT - MLX5_ADAPTER_PAGE_SHIFT); + else + system_page_index = index; - mutex_init(&uuari->lock); - uuari->uars = kcalloc(uuari->num_uars, sizeof(*uuari->uars), GFP_KERNEL); + return (pci_resource_start(mdev->pdev, 0) >> PAGE_SHIFT) + system_page_index; +} - uuari->bfs = kcalloc(tot_uuars, sizeof(*uuari->bfs), GFP_KERNEL); +static void up_rel_func(struct kref *kref) +{ + struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count); + + list_del(&up->list); + iounmap(up->map); + if (mlx5_cmd_free_uar(up->mdev, up->index)) + mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index); + bitmap_free(up->reg_bitmap); + bitmap_free(up->fp_bitmap); + kfree(up); +} - uuari->bitmap = kcalloc(BITS_TO_LONGS(tot_uuars), sizeof(*uuari->bitmap), - GFP_KERNEL); +static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev, + bool map_wc) +{ + struct mlx5_uars_page *up; + int err = -ENOMEM; + phys_addr_t pfn; + int bfregs; + int i; - uuari->count = kcalloc(tot_uuars, sizeof(*uuari->count), GFP_KERNEL); + bfregs = uars_per_sys_page(mdev) * MLX5_BFREGS_PER_UAR; + up = kzalloc(sizeof(*up), GFP_KERNEL); + if (!up) + return ERR_PTR(err); - for (i = 0; i < uuari->num_uars; i++) { - err = mlx5_cmd_alloc_uar(dev, &uuari->uars[i].index); - if (err) - goto out_count; + up->mdev = mdev; + up->reg_bitmap = bitmap_zalloc(bfregs, GFP_KERNEL); + if (!up->reg_bitmap) + goto error1; - addr = pci_resource_start(dev->pdev, 0) + - ((phys_addr_t)(uuari->uars[i].index) << PAGE_SHIFT); - uuari->uars[i].map = ioremap(addr, PAGE_SIZE); - if (!uuari->uars[i].map) { - mlx5_cmd_free_uar(dev, uuari->uars[i].index); - err = -ENOMEM; - goto out_count; - } - mlx5_core_dbg(dev, "allocated uar index 0x%x, mmaped at %p\n", - uuari->uars[i].index, uuari->uars[i].map); - } + up->fp_bitmap = bitmap_zalloc(bfregs, GFP_KERNEL); + if (!up->fp_bitmap) + goto error1; - for (i = 0; i < tot_uuars; i++) { - bf = &uuari->bfs[i]; - - bf->buf_size = (1 << MLX5_CAP_GEN(dev, log_bf_reg_size)) / 2; - bf->uar = &uuari->uars[i / MLX5_BF_REGS_PER_PAGE]; - bf->regreg = uuari->uars[i / MLX5_BF_REGS_PER_PAGE].map; - bf->reg = NULL; /* Add WC support */ - bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * - (1 << MLX5_CAP_GEN(dev, log_bf_reg_size)) + - MLX5_BF_OFFSET; - bf->need_lock = need_uuar_lock(i); - spin_lock_init(&bf->lock); - spin_lock_init(&bf->lock32); - bf->uuarn = i; - } + for (i = 0; i < bfregs; i++) + if ((i % MLX5_BFREGS_PER_UAR) < MLX5_NON_FP_BFREGS_PER_UAR) + set_bit(i, up->reg_bitmap); + else + set_bit(i, up->fp_bitmap); - return 0; + up->bfregs = bfregs; + up->fp_avail = bfregs * MLX5_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR; + up->reg_avail = bfregs * MLX5_NON_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR; -out_count: - for (i--; i >= 0; i--) { - iounmap(uuari->uars[i].map); - mlx5_cmd_free_uar(dev, uuari->uars[i].index); + err = mlx5_cmd_alloc_uar(mdev, &up->index); + if (err) { + mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err); + goto error1; } - kfree(uuari->count); - kfree(uuari->bitmap); + pfn = uar2pfn(mdev, up->index); + if (map_wc) { + up->map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!up->map) { + err = -EAGAIN; + goto error2; + } + } else { + up->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!up->map) { + err = -ENOMEM; + goto error2; + } + } + kref_init(&up->ref_count); + mlx5_core_dbg(mdev, "allocated UAR page: index %d, total bfregs %d\n", + up->index, up->bfregs); + return up; + +error2: + if (mlx5_cmd_free_uar(mdev, up->index)) + mlx5_core_warn(mdev, "failed to free uar index %d\n", up->index); +error1: + bitmap_free(up->fp_bitmap); + bitmap_free(up->reg_bitmap); + kfree(up); + return ERR_PTR(err); +} - kfree(uuari->bfs); +struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev) +{ + struct mlx5_uars_page *ret; + + mutex_lock(&mdev->priv.bfregs.reg_head.lock); + if (!list_empty(&mdev->priv.bfregs.reg_head.list)) { + ret = list_first_entry(&mdev->priv.bfregs.reg_head.list, + struct mlx5_uars_page, list); + kref_get(&ret->ref_count); + goto out; + } + ret = alloc_uars_page(mdev, false); + if (IS_ERR(ret)) + goto out; + list_add(&ret->list, &mdev->priv.bfregs.reg_head.list); +out: + mutex_unlock(&mdev->priv.bfregs.reg_head.lock); + + return ret; +} +EXPORT_SYMBOL(mlx5_get_uars_page); - kfree(uuari->uars); - return err; +void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up) +{ + mutex_lock(&mdev->priv.bfregs.reg_head.lock); + kref_put(&up->ref_count, up_rel_func); + mutex_unlock(&mdev->priv.bfregs.reg_head.lock); } +EXPORT_SYMBOL(mlx5_put_uars_page); -int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) +static unsigned long map_offset(struct mlx5_core_dev *mdev, int dbi) { - int i = uuari->num_uars; + /* return the offset in bytes from the start of the page to the + * blue flame area of the UAR + */ + return dbi / MLX5_BFREGS_PER_UAR * MLX5_ADAPTER_PAGE_SIZE + + (dbi % MLX5_BFREGS_PER_UAR) * + (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) + MLX5_BF_OFFSET; +} - for (i--; i >= 0; i--) { - iounmap(uuari->uars[i].map); - mlx5_cmd_free_uar(dev, uuari->uars[i].index); +static int alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path) +{ + struct mlx5_bfreg_data *bfregs; + struct mlx5_uars_page *up; + struct list_head *head; + unsigned long *bitmap; + unsigned int *avail; + struct mutex *lock; /* pointer to right mutex */ + int dbi; + + bfregs = &mdev->priv.bfregs; + if (map_wc) { + head = &bfregs->wc_head.list; + lock = &bfregs->wc_head.lock; + } else { + head = &bfregs->reg_head.list; + lock = &bfregs->reg_head.lock; } - - kfree(uuari->count); - kfree(uuari->bitmap); - kfree(uuari->bfs); - kfree(uuari->uars); + mutex_lock(lock); + if (list_empty(head)) { + up = alloc_uars_page(mdev, map_wc); + if (IS_ERR(up)) { + mutex_unlock(lock); + return PTR_ERR(up); + } + list_add(&up->list, head); + } else { + up = list_entry(head->next, struct mlx5_uars_page, list); + kref_get(&up->ref_count); + } + if (fast_path) { + bitmap = up->fp_bitmap; + avail = &up->fp_avail; + } else { + bitmap = up->reg_bitmap; + avail = &up->reg_avail; + } + dbi = find_first_bit(bitmap, up->bfregs); + clear_bit(dbi, bitmap); + (*avail)--; + if (!(*avail)) + list_del(&up->list); + + bfreg->map = up->map + map_offset(mdev, dbi); + bfreg->up = up; + bfreg->wc = map_wc; + bfreg->index = up->index + dbi / MLX5_BFREGS_PER_UAR; + mutex_unlock(lock); return 0; } -int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) +int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path) { - phys_addr_t pfn; - phys_addr_t uar_bar_start; int err; - err = mlx5_cmd_alloc_uar(mdev, &uar->index); - if (err) { - mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err); - return err; - } + err = alloc_bfreg(mdev, bfreg, map_wc, fast_path); + if (!err) + return 0; - uar_bar_start = pci_resource_start(mdev->pdev, 0); - pfn = (uar_bar_start >> PAGE_SHIFT) + uar->index; - uar->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); - if (!uar->map) { - mlx5_core_warn(mdev, "ioremap() failed, %d\n", err); - err = -ENOMEM; - goto err_free_uar; - } + if (err == -EAGAIN && map_wc) + return alloc_bfreg(mdev, bfreg, false, fast_path); - if (mdev->priv.bf_mapping) - uar->bf_map = io_mapping_map_wc(mdev->priv.bf_mapping, - uar->index << PAGE_SHIFT, - PAGE_SIZE); + return err; +} +EXPORT_SYMBOL(mlx5_alloc_bfreg); - return 0; +static unsigned int addr_to_dbi_in_syspage(struct mlx5_core_dev *dev, + struct mlx5_uars_page *up, + struct mlx5_sq_bfreg *bfreg) +{ + unsigned int uar_idx; + unsigned int bfreg_idx; + unsigned int bf_reg_size; -err_free_uar: - mlx5_cmd_free_uar(mdev, uar->index); + bf_reg_size = 1 << MLX5_CAP_GEN(dev, log_bf_reg_size); - return err; + uar_idx = (bfreg->map - up->map) >> MLX5_ADAPTER_PAGE_SHIFT; + bfreg_idx = (((uintptr_t)bfreg->map % MLX5_ADAPTER_PAGE_SIZE) - MLX5_BF_OFFSET) / bf_reg_size; + + return uar_idx * MLX5_BFREGS_PER_UAR + bfreg_idx; } -EXPORT_SYMBOL(mlx5_alloc_map_uar); -void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) +void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg) { - io_mapping_unmap(uar->bf_map); - iounmap(uar->map); - mlx5_cmd_free_uar(mdev, uar->index); + struct mlx5_bfreg_data *bfregs; + struct mlx5_uars_page *up; + struct mutex *lock; /* pointer to right mutex */ + unsigned int dbi; + bool fp; + unsigned int *avail; + unsigned long *bitmap; + struct list_head *head; + + bfregs = &mdev->priv.bfregs; + if (bfreg->wc) { + head = &bfregs->wc_head.list; + lock = &bfregs->wc_head.lock; + } else { + head = &bfregs->reg_head.list; + lock = &bfregs->reg_head.lock; + } + up = bfreg->up; + dbi = addr_to_dbi_in_syspage(mdev, up, bfreg); + fp = (dbi % MLX5_BFREGS_PER_UAR) >= MLX5_NON_FP_BFREGS_PER_UAR; + if (fp) { + avail = &up->fp_avail; + bitmap = up->fp_bitmap; + } else { + avail = &up->reg_avail; + bitmap = up->reg_bitmap; + } + mutex_lock(lock); + (*avail)++; + set_bit(dbi, bitmap); + if (*avail == 1) + list_add_tail(&up->list, head); + + kref_put(&up->ref_count, up_rel_func); + mutex_unlock(lock); } -EXPORT_SYMBOL(mlx5_unmap_free_uar); +EXPORT_SYMBOL(mlx5_free_bfreg); diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h index 4c3d696ed41c..b7c05264008a 100644 --- a/sys/dev/mlx5/mlx5_en/en.h +++ b/sys/dev/mlx5/mlx5_en/en.h @@ -1,1206 +1,1199 @@ /*- * Copyright (c) 2015-2019 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MLX5_EN_H_ #define _MLX5_EN_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_rss.h" #ifdef RSS #include #include #endif #include #include #include #include #include #include #include #include #include #include #define MLX5E_MAX_PRIORITY 8 #define MLX5E_MAX_FEC_10X_25X 4 #define MLX5E_MAX_FEC_50X 4 /* IEEE 802.1Qaz standard supported values */ #define IEEE_8021QAZ_MAX_TCS 8 #define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x7 #define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa #define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xe #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE 0x7 #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xe #define MLX5E_MAX_BUSDMA_RX_SEGS 15 #ifndef MLX5E_MAX_RX_BYTES #define MLX5E_MAX_RX_BYTES MCLBYTES #endif #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ \ MIN(65535, 7 * MLX5E_MAX_RX_BYTES) #define MLX5E_DIM_DEFAULT_PROFILE 3 #define MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO 16 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 #define MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ 0x7 #define MLX5E_CACHELINE_SIZE CACHE_LINE_SIZE #define MLX5E_HW2SW_MTU(hwmtu) \ ((hwmtu) - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN)) #define MLX5E_SW2HW_MTU(swmtu) \ ((swmtu) + (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN)) #define MLX5E_SW2MB_MTU(swmtu) \ (MLX5E_SW2HW_MTU(swmtu) + MLX5E_NET_IP_ALIGN) #define MLX5E_MTU_MIN 72 /* Min MTU allowed by the kernel */ #define MLX5E_MTU_MAX MIN(ETHERMTU_JUMBO, MJUM16BYTES) /* Max MTU of Ethernet * jumbo frames */ #define MLX5E_BUDGET_MAX 8192 /* RX and TX */ #define MLX5E_RX_BUDGET_MAX 256 #define MLX5E_SQ_BF_BUDGET 16 #define MLX5E_SQ_TX_QUEUE_SIZE 4096 /* SQ drbr queue size */ #define MLX5E_MAX_TX_NUM_TC 8 /* units */ #define MLX5E_MAX_TX_HEADER 128 /* bytes */ #define MLX5E_MAX_TX_PAYLOAD_SIZE 65536 /* bytes */ #define MLX5E_MAX_TX_MBUF_SIZE 65536 /* bytes */ #define MLX5E_MAX_TX_MBUF_FRAGS \ ((MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) - \ (MLX5E_MAX_TX_HEADER / MLX5_SEND_WQE_DS) - \ 1 /* the maximum value of the DS counter is 0x3F and not 0x40 */) /* units */ #define MLX5E_MAX_TX_INLINE \ (MLX5E_MAX_TX_HEADER - sizeof(struct mlx5e_tx_wqe) + \ sizeof(((struct mlx5e_tx_wqe *)0)->eth.inline_hdr_start)) /* bytes */ #define MLX5E_100MB (100000) #define MLX5E_1GB (1000000) #define MLX5E_ZERO(ptr, field) \ memset(&(ptr)->field, 0, \ sizeof(*(ptr)) - __offsetof(__typeof(*(ptr)), field)) MALLOC_DECLARE(M_MLX5EN); struct mlx5_core_dev; struct mlx5e_cq; typedef void (mlx5e_cq_comp_t)(struct mlx5_core_cq *, struct mlx5_eqe *); #define mlx5_en_err(_dev, format, ...) \ if_printf(_dev, "ERR: ""%s:%d:(pid %d): " format, \ __func__, __LINE__, curthread->td_proc->p_pid, \ ##__VA_ARGS__) #define mlx5_en_warn(_dev, format, ...) \ if_printf(_dev, "WARN: ""%s:%d:(pid %d): " format, \ __func__, __LINE__, curthread->td_proc->p_pid, \ ##__VA_ARGS__) #define mlx5_en_info(_dev, format, ...) \ if_printf(_dev, "INFO: ""%s:%d:(pid %d): " format, \ __func__, __LINE__, curthread->td_proc->p_pid, \ ##__VA_ARGS__) #define MLX5E_STATS_COUNT(a, ...) a #define MLX5E_STATS_VAR(a, b, c, ...) b c; #define MLX5E_STATS_COUNTER(a, b, c, ...) counter_##b##_t c; #define MLX5E_STATS_DESC(a, b, c, d, e, ...) d, e, #define MLX5E_VPORT_STATS(m) \ /* HW counters */ \ m(+1, u64, rx_packets, "rx_packets", "Received packets") \ m(+1, u64, rx_bytes, "rx_bytes", "Received bytes") \ m(+1, u64, tx_packets, "tx_packets", "Transmitted packets") \ m(+1, u64, tx_bytes, "tx_bytes", "Transmitted bytes") \ m(+1, u64, rx_error_packets, "rx_error_packets", "Received error packets") \ m(+1, u64, rx_error_bytes, "rx_error_bytes", "Received error bytes") \ m(+1, u64, tx_error_packets, "tx_error_packets", "Transmitted error packets") \ m(+1, u64, tx_error_bytes, "tx_error_bytes", "Transmitted error bytes") \ m(+1, u64, rx_unicast_packets, "rx_unicast_packets", "Received unicast packets") \ m(+1, u64, rx_unicast_bytes, "rx_unicast_bytes", "Received unicast bytes") \ m(+1, u64, tx_unicast_packets, "tx_unicast_packets", "Transmitted unicast packets") \ m(+1, u64, tx_unicast_bytes, "tx_unicast_bytes", "Transmitted unicast bytes") \ m(+1, u64, rx_multicast_packets, "rx_multicast_packets", "Received multicast packets") \ m(+1, u64, rx_multicast_bytes, "rx_multicast_bytes", "Received multicast bytes") \ m(+1, u64, tx_multicast_packets, "tx_multicast_packets", "Transmitted multicast packets") \ m(+1, u64, tx_multicast_bytes, "tx_multicast_bytes", "Transmitted multicast bytes") \ m(+1, u64, rx_broadcast_packets, "rx_broadcast_packets", "Received broadcast packets") \ m(+1, u64, rx_broadcast_bytes, "rx_broadcast_bytes", "Received broadcast bytes") \ m(+1, u64, tx_broadcast_packets, "tx_broadcast_packets", "Transmitted broadcast packets") \ m(+1, u64, tx_broadcast_bytes, "tx_broadcast_bytes", "Transmitted broadcast bytes") \ m(+1, u64, rx_out_of_buffer, "rx_out_of_buffer", "Receive out of buffer, no recv wqes events") \ /* SW counters */ \ m(+1, u64, tso_packets, "tso_packets", "Transmitted TSO packets") \ m(+1, u64, tso_bytes, "tso_bytes", "Transmitted TSO bytes") \ m(+1, u64, lro_packets, "lro_packets", "Received LRO packets") \ m(+1, u64, lro_bytes, "lro_bytes", "Received LRO bytes") \ m(+1, u64, sw_lro_queued, "sw_lro_queued", "Packets queued for SW LRO") \ m(+1, u64, sw_lro_flushed, "sw_lro_flushed", "Packets flushed from SW LRO") \ m(+1, u64, rx_csum_good, "rx_csum_good", "Received checksum valid packets") \ m(+1, u64, rx_csum_none, "rx_csum_none", "Received no checksum packets") \ m(+1, u64, tx_csum_offload, "tx_csum_offload", "Transmit checksum offload packets") \ m(+1, u64, tx_queue_dropped, "tx_queue_dropped", "Transmit queue dropped") \ m(+1, u64, tx_defragged, "tx_defragged", "Transmit queue defragged") \ m(+1, u64, rx_wqe_err, "rx_wqe_err", "Receive WQE errors") \ m(+1, u64, tx_jumbo_packets, "tx_jumbo_packets", "TX packets greater than 1518 octets") \ m(+1, u64, rx_steer_missed_packets, "rx_steer_missed_packets", "RX packets dropped by steering rule(s)") #define MLX5E_VPORT_STATS_NUM (0 MLX5E_VPORT_STATS(MLX5E_STATS_COUNT)) struct mlx5e_vport_stats { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_VPORT_STATS(MLX5E_STATS_VAR) }; #define MLX5E_PPORT_IEEE802_3_STATS(m) \ m(+1, u64, frames_tx, "frames_tx", "Frames transmitted") \ m(+1, u64, frames_rx, "frames_rx", "Frames received") \ m(+1, u64, check_seq_err, "check_seq_err", "Sequence errors") \ m(+1, u64, alignment_err, "alignment_err", "Alignment errors") \ m(+1, u64, octets_tx, "octets_tx", "Bytes transmitted") \ m(+1, u64, octets_received, "octets_received", "Bytes received") \ m(+1, u64, multicast_xmitted, "multicast_xmitted", "Multicast transmitted") \ m(+1, u64, broadcast_xmitted, "broadcast_xmitted", "Broadcast transmitted") \ m(+1, u64, multicast_rx, "multicast_rx", "Multicast received") \ m(+1, u64, broadcast_rx, "broadcast_rx", "Broadcast received") \ m(+1, u64, in_range_len_errors, "in_range_len_errors", "In range length errors") \ m(+1, u64, out_of_range_len, "out_of_range_len", "Out of range length errors") \ m(+1, u64, too_long_errors, "too_long_errors", "Too long errors") \ m(+1, u64, symbol_err, "symbol_err", "Symbol errors") \ m(+1, u64, mac_control_tx, "mac_control_tx", "MAC control transmitted") \ m(+1, u64, mac_control_rx, "mac_control_rx", "MAC control received") \ m(+1, u64, unsupported_op_rx, "unsupported_op_rx", "Unsupported operation received") \ m(+1, u64, pause_ctrl_rx, "pause_ctrl_rx", "Pause control received") \ m(+1, u64, pause_ctrl_tx, "pause_ctrl_tx", "Pause control transmitted") #define MLX5E_PPORT_RFC2819_STATS(m) \ m(+1, u64, drop_events, "drop_events", "Dropped events") \ m(+1, u64, octets, "octets", "Octets") \ m(+1, u64, pkts, "pkts", "Packets") \ m(+1, u64, broadcast_pkts, "broadcast_pkts", "Broadcast packets") \ m(+1, u64, multicast_pkts, "multicast_pkts", "Multicast packets") \ m(+1, u64, crc_align_errors, "crc_align_errors", "CRC alignment errors") \ m(+1, u64, undersize_pkts, "undersize_pkts", "Undersized packets") \ m(+1, u64, oversize_pkts, "oversize_pkts", "Oversized packets") \ m(+1, u64, fragments, "fragments", "Fragments") \ m(+1, u64, jabbers, "jabbers", "Jabbers") \ m(+1, u64, collisions, "collisions", "Collisions") #define MLX5E_PPORT_RFC2819_STATS_DEBUG(m) \ m(+1, u64, p64octets, "p64octets", "Bytes") \ m(+1, u64, p65to127octets, "p65to127octets", "Bytes") \ m(+1, u64, p128to255octets, "p128to255octets", "Bytes") \ m(+1, u64, p256to511octets, "p256to511octets", "Bytes") \ m(+1, u64, p512to1023octets, "p512to1023octets", "Bytes") \ m(+1, u64, p1024to1518octets, "p1024to1518octets", "Bytes") \ m(+1, u64, p1519to2047octets, "p1519to2047octets", "Bytes") \ m(+1, u64, p2048to4095octets, "p2048to4095octets", "Bytes") \ m(+1, u64, p4096to8191octets, "p4096to8191octets", "Bytes") \ m(+1, u64, p8192to10239octets, "p8192to10239octets", "Bytes") #define MLX5E_PPORT_RFC2863_STATS_DEBUG(m) \ m(+1, u64, in_octets, "in_octets", "In octets") \ m(+1, u64, in_ucast_pkts, "in_ucast_pkts", "In unicast packets") \ m(+1, u64, in_discards, "in_discards", "In discards") \ m(+1, u64, in_errors, "in_errors", "In errors") \ m(+1, u64, in_unknown_protos, "in_unknown_protos", "In unknown protocols") \ m(+1, u64, out_octets, "out_octets", "Out octets") \ m(+1, u64, out_ucast_pkts, "out_ucast_pkts", "Out unicast packets") \ m(+1, u64, out_discards, "out_discards", "Out discards") \ m(+1, u64, out_errors, "out_errors", "Out errors") \ m(+1, u64, in_multicast_pkts, "in_multicast_pkts", "In multicast packets") \ m(+1, u64, in_broadcast_pkts, "in_broadcast_pkts", "In broadcast packets") \ m(+1, u64, out_multicast_pkts, "out_multicast_pkts", "Out multicast packets") \ m(+1, u64, out_broadcast_pkts, "out_broadcast_pkts", "Out broadcast packets") #define MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG(m) \ m(+1, u64, port_transmit_wait, "port_transmit_wait", "Port transmit wait") \ m(+1, u64, ecn_marked, "ecn_marked", "ECN marked") \ m(+1, u64, no_buffer_discard_mc, "no_buffer_discard_mc", "No buffer discard mc") \ m(+1, u64, rx_ebp, "rx_ebp", "RX EBP") \ m(+1, u64, tx_ebp, "tx_ebp", "TX EBP") \ m(+1, u64, rx_buffer_almost_full, "rx_buffer_almost_full", "RX buffer almost full") \ m(+1, u64, rx_buffer_full, "rx_buffer_full", "RX buffer full") \ m(+1, u64, rx_icrc_encapsulated, "rx_icrc_encapsulated", "RX ICRC encapsulated") \ m(+1, u64, ex_reserved_0, "ex_reserved_0", "Reserved") \ m(+1, u64, ex_reserved_1, "ex_reserved_1", "Reserved") \ m(+1, u64, tx_stat_p64octets, "tx_stat_p64octets", "Bytes") \ m(+1, u64, tx_stat_p65to127octets, "tx_stat_p65to127octets", "Bytes") \ m(+1, u64, tx_stat_p128to255octets, "tx_stat_p128to255octets", "Bytes") \ m(+1, u64, tx_stat_p256to511octets, "tx_stat_p256to511octets", "Bytes") \ m(+1, u64, tx_stat_p512to1023octets, "tx_stat_p512to1023octets", "Bytes") \ m(+1, u64, tx_stat_p1024to1518octets, "tx_stat_p1024to1518octets", "Bytes") \ m(+1, u64, tx_stat_p1519to2047octets, "tx_stat_p1519to2047octets", "Bytes") \ m(+1, u64, tx_stat_p2048to4095octets, "tx_stat_p2048to4095octets", "Bytes") \ m(+1, u64, tx_stat_p4096to8191octets, "tx_stat_p4096to8191octets", "Bytes") \ m(+1, u64, tx_stat_p8192to10239octets, "tx_stat_p8192to10239octets", "Bytes") #define MLX5E_PPORT_STATISTICAL_DEBUG(m) \ m(+1, u64, phy_time_since_last_clear, "phy_time_since_last_clear", \ "Time since last clear in milliseconds") \ m(+1, u64, phy_received_bits, "phy_received_bits", \ "Total amount of traffic received in bits before error correction") \ m(+1, u64, phy_symbol_errors, "phy_symbol_errors", \ "Total number of symbol errors before error correction") \ m(+1, u64, phy_corrected_bits, "phy_corrected_bits", \ "Total number of corrected bits ") \ m(+1, u64, phy_corrected_bits_lane0, "phy_corrected_bits_lane0", \ "Total number of corrected bits for lane 0") \ m(+1, u64, phy_corrected_bits_lane1, "phy_corrected_bits_lane1", \ "Total number of corrected bits for lane 1") \ m(+1, u64, phy_corrected_bits_lane2, "phy_corrected_bits_lane2", \ "Total number of corrected bits for lane 2") \ m(+1, u64, phy_corrected_bits_lane3, "phy_corrected_bits_lane3", \ "Total number of corrected bits for lane 3") #define MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(m) \ m(+1, u64, time_since_last_clear, "time_since_last_clear", \ "Time since the last counters clear event (msec)") \ m(+1, u64, symbol_errors, "symbol_errors", "Symbol errors") \ m(+1, u64, sync_headers_errors, "sync_headers_errors", \ "Sync header error counter") \ m(+1, u64, bip_errors_lane0, "edpl_bip_errors_lane0", \ "Indicates the number of PRBS errors on lane 0") \ m(+1, u64, bip_errors_lane1, "edpl_bip_errors_lane1", \ "Indicates the number of PRBS errors on lane 1") \ m(+1, u64, bip_errors_lane2, "edpl_bip_errors_lane2", \ "Indicates the number of PRBS errors on lane 2") \ m(+1, u64, bip_errors_lane3, "edpl_bip_errors_lane3", \ "Indicates the number of PRBS errors on lane 3") \ m(+1, u64, fc_corrected_blocks_lane0, "fc_corrected_blocks_lane0", \ "FEC correctable block counter lane 0") \ m(+1, u64, fc_corrected_blocks_lane1, "fc_corrected_blocks_lane1", \ "FEC correctable block counter lane 1") \ m(+1, u64, fc_corrected_blocks_lane2, "fc_corrected_blocks_lane2", \ "FEC correctable block counter lane 2") \ m(+1, u64, fc_corrected_blocks_lane3, "fc_corrected_blocks_lane3", \ "FEC correctable block counter lane 3") \ m(+1, u64, rs_corrected_blocks, "rs_corrected_blocks", \ "FEC correcable block counter") \ m(+1, u64, rs_uncorrectable_blocks, "rs_uncorrectable_blocks", \ "FEC uncorrecable block counter") \ m(+1, u64, rs_no_errors_blocks, "rs_no_errors_blocks", \ "The number of RS-FEC blocks received that had no errors") \ m(+1, u64, rs_single_error_blocks, "rs_single_error_blocks", \ "The number of corrected RS-FEC blocks received that had" \ "exactly 1 error symbol") \ m(+1, u64, rs_corrected_symbols_total, "rs_corrected_symbols_total", \ "Port FEC corrected symbol counter") \ m(+1, u64, rs_corrected_symbols_lane0, "rs_corrected_symbols_lane0", \ "FEC corrected symbol counter lane 0") \ m(+1, u64, rs_corrected_symbols_lane1, "rs_corrected_symbols_lane1", \ "FEC corrected symbol counter lane 1") \ m(+1, u64, rs_corrected_symbols_lane2, "rs_corrected_symbols_lane2", \ "FEC corrected symbol counter lane 2") \ m(+1, u64, rs_corrected_symbols_lane3, "rs_corrected_symbols_lane3", \ "FEC corrected symbol counter lane 3") /* Per priority statistics for PFC */ #define MLX5E_PPORT_PER_PRIO_STATS_SUB(m,n,p) \ m(n, p, +1, u64, rx_octets, "rx_octets", "Received octets") \ m(n, p, +1, u64, rx_uc_frames, "rx_uc_frames", "Received unicast frames") \ m(n, p, +1, u64, rx_mc_frames, "rx_mc_frames", "Received multicast frames") \ m(n, p, +1, u64, rx_bc_frames, "rx_bc_frames", "Received broadcast frames") \ m(n, p, +1, u64, rx_frames, "rx_frames", "Received frames") \ m(n, p, +1, u64, tx_octets, "tx_octets", "Transmitted octets") \ m(n, p, +1, u64, tx_uc_frames, "tx_uc_frames", "Transmitted unicast frames") \ m(n, p, +1, u64, tx_mc_frames, "tx_mc_frames", "Transmitted multicast frames") \ m(n, p, +1, u64, tx_bc_frames, "tx_bc_frames", "Transmitted broadcast frames") \ m(n, p, +1, u64, tx_frames, "tx_frames", "Transmitted frames") \ m(n, p, +1, u64, rx_pause, "rx_pause", "Received pause frames") \ m(n, p, +1, u64, rx_pause_duration, "rx_pause_duration", \ "Received pause duration") \ m(n, p, +1, u64, tx_pause, "tx_pause", "Transmitted pause frames") \ m(n, p, +1, u64, tx_pause_duration, "tx_pause_duration", \ "Transmitted pause duration") \ m(n, p, +1, u64, rx_pause_transition, "rx_pause_transition", \ "Received pause transitions") \ m(n, p, +1, u64, rx_discards, "rx_discards", "Discarded received frames") \ m(n, p, +1, u64, device_stall_minor_watermark, \ "device_stall_minor_watermark", "Device stall minor watermark") \ m(n, p, +1, u64, device_stall_critical_watermark, \ "device_stall_critical_watermark", "Device stall critical watermark") #define MLX5E_PPORT_PER_PRIO_STATS_PREFIX(m,p,c,t,f,s,d) \ m(c, t, pri_##p##_##f, "prio" #p "_" s, "Priority " #p " - " d) #define MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO 8 #define MLX5E_PPORT_PER_PRIO_STATS(m) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,0) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,1) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,2) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,3) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,4) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,5) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,6) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,7) #define MLX5E_PCIE_PERFORMANCE_COUNTERS_64(m) \ m(+1, u64, life_time_counter_high, "life_time_counter", \ "Life time counter.", pcie_perf_counters) \ m(+1, u64, tx_overflow_buffer_pkt, "tx_overflow_buffer_pkt", \ "The number of packets dropped due to lack of PCIe buffers " \ "in receive path from NIC port toward the hosts.", \ pcie_perf_counters) \ m(+1, u64, tx_overflow_buffer_marked_pkt, \ "tx_overflow_buffer_marked_pkt", \ "The number of packets marked due to lack of PCIe buffers " \ "in receive path from NIC port toward the hosts.", \ pcie_perf_counters) #define MLX5E_PCIE_PERFORMANCE_COUNTERS_32(m) \ m(+1, u64, rx_errors, "rx_errors", \ "Number of transitions to recovery due to Framing " \ "errors and CRC errors.", pcie_perf_counters) \ m(+1, u64, tx_errors, "tx_errors", "Number of transitions " \ "to recovery due to EIEOS and TS errors.", pcie_perf_counters) \ m(+1, u64, l0_to_recovery_eieos, "l0_to_recovery_eieos", "Number of " \ "transitions to recovery due to getting EIEOS.", pcie_perf_counters)\ m(+1, u64, l0_to_recovery_ts, "l0_to_recovery_ts", "Number of " \ "transitions to recovery due to getting TS.", pcie_perf_counters) \ m(+1, u64, l0_to_recovery_framing, "l0_to_recovery_framing", "Number "\ "of transitions to recovery due to identifying framing " \ "errors at gen3/4.", pcie_perf_counters) \ m(+1, u64, l0_to_recovery_retrain, "l0_to_recovery_retrain", \ "Number of transitions to recovery due to link retrain request " \ "from data link.", pcie_perf_counters) \ m(+1, u64, crc_error_dllp, "crc_error_dllp", "Number of transitions " \ "to recovery due to identifying CRC DLLP errors.", \ pcie_perf_counters) \ m(+1, u64, crc_error_tlp, "crc_error_tlp", "Number of transitions to "\ "recovery due to identifying CRC TLP errors.", pcie_perf_counters) \ m(+1, u64, outbound_stalled_reads, "outbound_stalled_reads", \ "The percentage of time within the last second that the NIC had " \ "outbound non-posted read requests but could not perform the " \ "operation due to insufficient non-posted credits.", \ pcie_perf_counters) \ m(+1, u64, outbound_stalled_writes, "outbound_stalled_writes", \ "The percentage of time within the last second that the NIC had " \ "outbound posted writes requests but could not perform the " \ "operation due to insufficient posted credits.", \ pcie_perf_counters) \ m(+1, u64, outbound_stalled_reads_events, \ "outbound_stalled_reads_events", "The number of events where " \ "outbound_stalled_reads was above a threshold.", \ pcie_perf_counters) \ m(+1, u64, outbound_stalled_writes_events, \ "outbound_stalled_writes_events", \ "The number of events where outbound_stalled_writes was above " \ "a threshold.", pcie_perf_counters) #define MLX5E_PCIE_TIMERS_AND_STATES_COUNTERS_32(m) \ m(+1, u64, time_to_boot_image_start, "time_to_boot_image_start", \ "Time from start until FW boot image starts running in usec.", \ pcie_timers_states) \ m(+1, u64, time_to_link_image, "time_to_link_image", \ "Time from start until FW pci_link image starts running in usec.", \ pcie_timers_states) \ m(+1, u64, calibration_time, "calibration_time", \ "Time it took FW to do calibration in usec.", \ pcie_timers_states) \ m(+1, u64, time_to_first_perst, "time_to_first_perst", \ "Time form start until FW handle first perst. in usec.", \ pcie_timers_states) \ m(+1, u64, time_to_detect_state, "time_to_detect_state", \ "Time from start until first transition to LTSSM.Detect_Q in usec", \ pcie_timers_states) \ m(+1, u64, time_to_l0, "time_to_l0", \ "Time from start until first transition to LTSSM.L0 in usec", \ pcie_timers_states) \ m(+1, u64, time_to_crs_en, "time_to_crs_en", \ "Time from start until crs is enabled in usec", \ pcie_timers_states) \ m(+1, u64, time_to_plastic_image_start, "time_to_plastic_image_start",\ "Time form start until FW plastic image starts running in usec.", \ pcie_timers_states) \ m(+1, u64, time_to_iron_image_start, "time_to_iron_image_start", \ "Time form start until FW iron image starts running in usec.", \ pcie_timers_states) \ m(+1, u64, perst_handler, "perst_handler", \ "Number of persts arrived.", pcie_timers_states) \ m(+1, u64, times_in_l1, "times_in_l1", \ "Number of times LTSSM entered L1 flow.", pcie_timers_states) \ m(+1, u64, times_in_l23, "times_in_l23", \ "Number of times LTSSM entered L23 flow.", pcie_timers_states) \ m(+1, u64, dl_down, "dl_down", \ "Number of moves for DL_active to DL_down.", pcie_timers_states) \ m(+1, u64, config_cycle1usec, "config_cycle1usec", \ "Number of configuration requests that firmware " \ "handled in less than 1 usec.", pcie_timers_states) \ m(+1, u64, config_cycle2to7usec, "config_cycle2to7usec", \ "Number of configuration requests that firmware " \ "handled within 2 to 7 usec.", pcie_timers_states) \ m(+1, u64, config_cycle8to15usec, "config_cycle8to15usec", \ "Number of configuration requests that firmware " \ "handled within 8 to 15 usec.", pcie_timers_states) \ m(+1, u64, config_cycle16to63usec, "config_cycle16to63usec", \ "Number of configuration requests that firmware " \ "handled within 16 to 63 usec.", pcie_timers_states) \ m(+1, u64, config_cycle64usec, "config_cycle64usec", \ "Number of configuration requests that firmware " \ "handled took more than 64 usec.", pcie_timers_states) \ m(+1, u64, correctable_err_msg_sent, "correctable_err_msg_sent", \ "Number of correctable error messages sent.", pcie_timers_states) \ m(+1, u64, non_fatal_err_msg_sent, "non_fatal_err_msg_sent", \ "Number of non-Fatal error msg sent.", pcie_timers_states) \ m(+1, u64, fatal_err_msg_sent, "fatal_err_msg_sent", \ "Number of fatal error msg sent.", pcie_timers_states) #define MLX5E_PCIE_LANE_COUNTERS_32(m) \ m(+1, u64, error_counter_lane0, "error_counter_lane0", \ "Error counter for PCI lane 0", pcie_lanes_counters) \ m(+1, u64, error_counter_lane1, "error_counter_lane1", \ "Error counter for PCI lane 1", pcie_lanes_counters) \ m(+1, u64, error_counter_lane2, "error_counter_lane2", \ "Error counter for PCI lane 2", pcie_lanes_counters) \ m(+1, u64, error_counter_lane3, "error_counter_lane3", \ "Error counter for PCI lane 3", pcie_lanes_counters) \ m(+1, u64, error_counter_lane4, "error_counter_lane4", \ "Error counter for PCI lane 4", pcie_lanes_counters) \ m(+1, u64, error_counter_lane5, "error_counter_lane5", \ "Error counter for PCI lane 5", pcie_lanes_counters) \ m(+1, u64, error_counter_lane6, "error_counter_lane6", \ "Error counter for PCI lane 6", pcie_lanes_counters) \ m(+1, u64, error_counter_lane7, "error_counter_lane7", \ "Error counter for PCI lane 7", pcie_lanes_counters) \ m(+1, u64, error_counter_lane8, "error_counter_lane8", \ "Error counter for PCI lane 8", pcie_lanes_counters) \ m(+1, u64, error_counter_lane9, "error_counter_lane9", \ "Error counter for PCI lane 9", pcie_lanes_counters) \ m(+1, u64, error_counter_lane10, "error_counter_lane10", \ "Error counter for PCI lane 10", pcie_lanes_counters) \ m(+1, u64, error_counter_lane11, "error_counter_lane11", \ "Error counter for PCI lane 11", pcie_lanes_counters) \ m(+1, u64, error_counter_lane12, "error_counter_lane12", \ "Error counter for PCI lane 12", pcie_lanes_counters) \ m(+1, u64, error_counter_lane13, "error_counter_lane13", \ "Error counter for PCI lane 13", pcie_lanes_counters) \ m(+1, u64, error_counter_lane14, "error_counter_lane14", \ "Error counter for PCI lane 14", pcie_lanes_counters) \ m(+1, u64, error_counter_lane15, "error_counter_lane15", \ "Error counter for PCI lane 15", pcie_lanes_counters) /* * Make sure to update mlx5e_update_pport_counters() * when adding a new MLX5E_PPORT_STATS block */ #define MLX5E_PPORT_STATS(m) \ MLX5E_PPORT_PER_PRIO_STATS(m) \ MLX5E_PPORT_IEEE802_3_STATS(m) \ MLX5E_PPORT_RFC2819_STATS(m) #define MLX5E_PORT_STATS_DEBUG(m) \ MLX5E_PPORT_RFC2819_STATS_DEBUG(m) \ MLX5E_PPORT_RFC2863_STATS_DEBUG(m) \ MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(m) \ MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG(m) \ MLX5E_PPORT_STATISTICAL_DEBUG(m) \ MLX5E_PCIE_PERFORMANCE_COUNTERS_64(m) \ MLX5E_PCIE_PERFORMANCE_COUNTERS_32(m) \ MLX5E_PCIE_TIMERS_AND_STATES_COUNTERS_32(m) \ MLX5E_PCIE_LANE_COUNTERS_32(m) #define MLX5E_PPORT_IEEE802_3_STATS_NUM \ (0 MLX5E_PPORT_IEEE802_3_STATS(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_RFC2819_STATS_NUM \ (0 MLX5E_PPORT_RFC2819_STATS(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_STATS_NUM \ (0 MLX5E_PPORT_STATS(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_PER_PRIO_STATS_NUM \ (0 MLX5E_PPORT_PER_PRIO_STATS(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM \ (0 MLX5E_PPORT_RFC2819_STATS_DEBUG(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM \ (0 MLX5E_PPORT_RFC2863_STATS_DEBUG(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM \ (0 MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG_NUM \ (0 MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_STATISTICAL_DEBUG_NUM \ (0 MLX5E_PPORT_STATISTICAL_DEBUG(MLX5E_STATS_COUNT)) #define MLX5E_PORT_STATS_DEBUG_NUM \ (0 MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_COUNT)) struct mlx5e_pport_stats { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_PPORT_STATS(MLX5E_STATS_VAR) }; struct mlx5e_port_stats_debug { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_VAR) }; #define MLX5E_RQ_STATS(m) \ m(+1, u64, packets, "packets", "Received packets") \ m(+1, u64, bytes, "bytes", "Received bytes") \ m(+1, u64, csum_none, "csum_none", "Received packets") \ m(+1, u64, lro_packets, "lro_packets", "Received LRO packets") \ m(+1, u64, lro_bytes, "lro_bytes", "Received LRO bytes") \ m(+1, u64, sw_lro_queued, "sw_lro_queued", "Packets queued for SW LRO") \ m(+1, u64, sw_lro_flushed, "sw_lro_flushed", "Packets flushed from SW LRO") \ m(+1, u64, wqe_err, "wqe_err", "Received packets") #define MLX5E_RQ_STATS_NUM (0 MLX5E_RQ_STATS(MLX5E_STATS_COUNT)) struct mlx5e_rq_stats { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_RQ_STATS(MLX5E_STATS_VAR) }; #define MLX5E_SQ_STATS(m) \ m(+1, u64, packets, "packets", "Transmitted packets") \ m(+1, u64, bytes, "bytes", "Transmitted bytes") \ m(+1, u64, tso_packets, "tso_packets", "Transmitted packets") \ m(+1, u64, tso_bytes, "tso_bytes", "Transmitted bytes") \ m(+1, u64, csum_offload_none, "csum_offload_none", "Transmitted packets") \ m(+1, u64, defragged, "defragged", "Transmitted packets") \ m(+1, u64, dropped, "dropped", "Transmitted packets") \ m(+1, u64, enobuf, "enobuf", "Transmitted packets") \ m(+1, u64, nop, "nop", "Transmitted packets") #define MLX5E_SQ_STATS_NUM (0 MLX5E_SQ_STATS(MLX5E_STATS_COUNT)) struct mlx5e_sq_stats { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_SQ_STATS(MLX5E_STATS_VAR) }; struct mlx5e_stats { struct mlx5e_vport_stats vport; struct mlx5e_pport_stats pport; struct mlx5e_port_stats_debug port_stats_debug; }; struct mlx5e_rq_param { u32 rqc [MLX5_ST_SZ_DW(rqc)]; struct mlx5_wq_param wq; }; struct mlx5e_sq_param { u32 sqc [MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; }; struct mlx5e_cq_param { u32 cqc [MLX5_ST_SZ_DW(cqc)]; struct mlx5_wq_param wq; }; struct mlx5e_params { u8 log_sq_size; u8 log_rq_size; u16 num_channels; u8 default_vlan_prio; u8 num_tc; u8 rx_cq_moderation_mode; u8 tx_cq_moderation_mode; u16 rx_cq_moderation_usec; u16 rx_cq_moderation_pkts; u16 tx_cq_moderation_usec; u16 tx_cq_moderation_pkts; u16 min_rx_wqes; bool hw_lro_en; bool cqe_zipping_en; u32 lro_wqe_sz; u16 rx_hash_log_tbl_sz; u32 tx_pauseframe_control __aligned(4); u32 rx_pauseframe_control __aligned(4); u16 tx_max_inline; u8 tx_min_inline_mode; u8 tx_priority_flow_control; u8 rx_priority_flow_control; u8 channels_rsss; }; #define MLX5E_PARAMS(m) \ m(+1, u64, tx_queue_size_max, "tx_queue_size_max", "Max send queue size") \ m(+1, u64, rx_queue_size_max, "rx_queue_size_max", "Max receive queue size") \ m(+1, u64, tx_queue_size, "tx_queue_size", "Default send queue size") \ m(+1, u64, rx_queue_size, "rx_queue_size", "Default receive queue size") \ m(+1, u64, channels, "channels", "Default number of channels") \ m(+1, u64, channels_rsss, "channels_rsss", "Default channels receive side scaling stride") \ m(+1, u64, coalesce_usecs_max, "coalesce_usecs_max", "Maximum usecs for joining packets") \ m(+1, u64, coalesce_pkts_max, "coalesce_pkts_max", "Maximum packets to join") \ m(+1, u64, rx_coalesce_usecs, "rx_coalesce_usecs", "Limit in usec for joining rx packets") \ m(+1, u64, rx_coalesce_pkts, "rx_coalesce_pkts", "Maximum number of rx packets to join") \ m(+1, u64, rx_coalesce_mode, "rx_coalesce_mode", "0: EQE fixed mode 1: CQE fixed mode 2: EQE auto mode 3: CQE auto mode") \ m(+1, u64, tx_coalesce_usecs, "tx_coalesce_usecs", "Limit in usec for joining tx packets") \ m(+1, u64, tx_coalesce_pkts, "tx_coalesce_pkts", "Maximum number of tx packets to join") \ m(+1, u64, tx_coalesce_mode, "tx_coalesce_mode", "0: EQE mode 1: CQE mode") \ m(+1, u64, tx_completion_fact, "tx_completion_fact", "1..MAX: Completion event ratio") \ m(+1, u64, tx_completion_fact_max, "tx_completion_fact_max", "Maximum completion event ratio") \ m(+1, u64, hw_lro, "hw_lro", "set to enable hw_lro") \ m(+1, u64, cqe_zipping, "cqe_zipping", "0 : CQE zipping disabled") \ m(+1, u64, modify_tx_dma, "modify_tx_dma", "0: Enable TX 1: Disable TX") \ m(+1, u64, modify_rx_dma, "modify_rx_dma", "0: Enable RX 1: Disable RX") \ m(+1, u64, diag_pci_enable, "diag_pci_enable", "0: Disabled 1: Enabled") \ m(+1, u64, diag_general_enable, "diag_general_enable", "0: Disabled 1: Enabled") \ m(+1, u64, hw_mtu, "hw_mtu", "Current hardware MTU value") \ m(+1, u64, mc_local_lb, "mc_local_lb", "0: Local multicast loopback enabled 1: Disabled") \ m(+1, u64, uc_local_lb, "uc_local_lb", "0: Local unicast loopback enabled 1: Disabled") #define MLX5E_PARAMS_NUM (0 MLX5E_PARAMS(MLX5E_STATS_COUNT)) struct mlx5e_params_ethtool { u64 arg [0]; MLX5E_PARAMS(MLX5E_STATS_VAR) u64 max_bw_value[IEEE_8021QAZ_MAX_TCS]; u8 max_bw_share[IEEE_8021QAZ_MAX_TCS]; u8 prio_tc[MLX5E_MAX_PRIORITY]; u8 dscp2prio[MLX5_MAX_SUPPORTED_DSCP]; u8 trust_state; u8 fec_mask_10x_25x[MLX5E_MAX_FEC_10X_25X]; u16 fec_mask_50x[MLX5E_MAX_FEC_50X]; u8 fec_avail_10x_25x[MLX5E_MAX_FEC_10X_25X]; u16 fec_avail_50x[MLX5E_MAX_FEC_50X]; u32 fec_mode_active; u32 hw_mtu_msb; s32 hw_val_temp[MLX5_MAX_TEMPERATURE]; u32 hw_num_temp; }; struct mlx5e_cq { /* data path - accessed per cqe */ struct mlx5_cqwq wq; /* data path - accessed per HW polling */ struct mlx5_core_cq mcq; /* control */ struct mlx5e_priv *priv; struct mlx5_wq_ctrl wq_ctrl; } __aligned(MLX5E_CACHELINE_SIZE); struct mlx5e_rq_mbuf { bus_dmamap_t dma_map; caddr_t data; struct mbuf *mbuf; }; struct mlx5e_rq { /* persistant fields */ struct mtx mtx; struct mlx5e_rq_stats stats; struct callout watchdog; /* data path */ #define mlx5e_rq_zero_start wq struct mlx5_wq_ll wq; bus_dma_tag_t dma_tag; u32 wqe_sz; u32 nsegs; struct mlx5e_rq_mbuf *mbuf; struct ifnet *ifp; struct mlx5e_cq cq; struct lro_ctrl lro; volatile int enabled; int ix; /* Dynamic Interrupt Moderation */ struct net_dim dim; /* control */ struct mlx5_wq_ctrl wq_ctrl; u32 rqn; struct mlx5e_channel *channel; } __aligned(MLX5E_CACHELINE_SIZE); struct mlx5e_sq_mbuf { bus_dmamap_t dma_map; struct mbuf *mbuf; volatile s32 *p_refcount; /* in use refcount, if any */ u32 num_bytes; u32 num_wqebbs; }; enum { MLX5E_SQ_READY, MLX5E_SQ_FULL }; struct mlx5e_sq { /* persistant fields */ struct mtx lock; struct mtx comp_lock; struct mlx5e_sq_stats stats; struct callout cev_callout; /* data path */ #define mlx5e_sq_zero_start dma_tag bus_dma_tag_t dma_tag; /* dirtied @completion */ u16 cc; /* dirtied @xmit */ u16 pc __aligned(MLX5E_CACHELINE_SIZE); u16 bf_offset; u16 cev_counter; /* completion event counter */ u16 cev_factor; /* completion event factor */ u16 cev_next_state; /* next completion event state */ #define MLX5E_CEV_STATE_INITIAL 0 /* timer not started */ #define MLX5E_CEV_STATE_SEND_NOPS 1 /* send NOPs */ #define MLX5E_CEV_STATE_HOLD_NOPS 2 /* don't send NOPs yet */ u16 running; /* set if SQ is running */ union { u32 d32[2]; u64 d64; } doorbell; struct mlx5e_cq cq; /* pointers to per packet info: write@xmit, read@completion */ struct mlx5e_sq_mbuf *mbuf; /* read only */ struct mlx5_wq_cyc wq; - struct mlx5_uar uar; + void __iomem *uar_map; struct ifnet *ifp; u32 sqn; u32 bf_buf_size; u32 mkey_be; u16 max_inline; u8 min_inline_mode; u8 min_insert_caps; #define MLX5E_INSERT_VLAN 1 #define MLX5E_INSERT_NON_VLAN 2 /* control path */ struct mlx5_wq_ctrl wq_ctrl; struct mlx5e_priv *priv; int tc; } __aligned(MLX5E_CACHELINE_SIZE); static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) { u16 cc = sq->cc; u16 pc = sq->pc; return ((sq->wq.sz_m1 & (cc - pc)) >= n || cc == pc); } static inline u32 mlx5e_sq_queue_level(struct mlx5e_sq *sq) { u16 cc; u16 pc; if (sq == NULL) return (0); cc = sq->cc; pc = sq->pc; return (((sq->wq.sz_m1 & (pc - cc)) * IF_SND_QUEUE_LEVEL_MAX) / sq->wq.sz_m1); } struct mlx5e_channel { struct mlx5e_rq rq; struct m_snd_tag tag; struct mlx5e_sq sq[MLX5E_MAX_TX_NUM_TC]; struct mlx5e_priv *priv; struct completion completion; int ix; } __aligned(MLX5E_CACHELINE_SIZE); enum mlx5e_traffic_types { MLX5E_TT_IPV4_TCP, MLX5E_TT_IPV6_TCP, MLX5E_TT_IPV4_UDP, MLX5E_TT_IPV6_UDP, MLX5E_TT_IPV4_IPSEC_AH, MLX5E_TT_IPV6_IPSEC_AH, MLX5E_TT_IPV4_IPSEC_ESP, MLX5E_TT_IPV6_IPSEC_ESP, MLX5E_TT_IPV4, MLX5E_TT_IPV6, MLX5E_TT_ANY, MLX5E_NUM_TT, }; enum { MLX5E_RQT_SPREADING = 0, MLX5E_RQT_DEFAULT_RQ = 1, MLX5E_NUM_RQT = 2, }; struct mlx5_flow_rule; struct mlx5e_eth_addr_info { u8 addr [ETH_ALEN + 2]; u32 tt_vec; /* flow table rule per traffic type */ struct mlx5_flow_rule *ft_rule[MLX5E_NUM_TT]; }; #define MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE) struct mlx5e_eth_addr_hash_node; struct mlx5e_eth_addr_hash_head { struct mlx5e_eth_addr_hash_node *lh_first; }; struct mlx5e_eth_addr_db { struct mlx5e_eth_addr_hash_head if_uc[MLX5E_ETH_ADDR_HASH_SIZE]; struct mlx5e_eth_addr_hash_head if_mc[MLX5E_ETH_ADDR_HASH_SIZE]; struct mlx5e_eth_addr_info broadcast; struct mlx5e_eth_addr_info allmulti; struct mlx5e_eth_addr_info promisc; bool broadcast_enabled; bool allmulti_enabled; bool promisc_enabled; }; enum { MLX5E_STATE_ASYNC_EVENTS_ENABLE, MLX5E_STATE_OPENED, }; enum { MLX5_BW_NO_LIMIT = 0, MLX5_100_MBPS_UNIT = 3, MLX5_GBPS_UNIT = 4, }; struct mlx5e_vlan_db { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct mlx5_flow_rule *active_vlans_ft_rule[VLAN_N_VID]; struct mlx5_flow_rule *untagged_ft_rule; struct mlx5_flow_rule *any_cvlan_ft_rule; struct mlx5_flow_rule *any_svlan_ft_rule; bool filter_disabled; }; struct mlx5e_flow_table { int num_groups; struct mlx5_flow_table *t; struct mlx5_flow_group **g; }; struct mlx5e_flow_tables { struct mlx5_flow_namespace *ns; struct mlx5e_flow_table vlan; struct mlx5e_flow_table main; struct mlx5e_flow_table inner_rss; }; struct mlx5e_xmit_args { volatile s32 *pref; u32 tisn; u16 ihs; }; #include "en_rl.h" #include "en_hw_tls.h" #define MLX5E_TSTMP_PREC 10 struct mlx5e_clbr_point { uint64_t base_curr; uint64_t base_prev; uint64_t clbr_hw_prev; uint64_t clbr_hw_curr; u_int clbr_gen; }; struct mlx5e_dcbx { u32 cable_len; u32 xoff; }; struct mlx5e_priv { struct mlx5_core_dev *mdev; /* must be first */ /* priv data path fields - start */ int order_base_2_num_channels; int queue_mapping_channel_mask; int num_tc; int default_vlan_prio; /* priv data path fields - end */ unsigned long state; int gone; #define PRIV_LOCK(priv) sx_xlock(&(priv)->state_lock) #define PRIV_UNLOCK(priv) sx_xunlock(&(priv)->state_lock) #define PRIV_LOCKED(priv) sx_xlocked(&(priv)->state_lock) #define PRIV_ASSERT_LOCKED(priv) sx_assert(&(priv)->state_lock, SA_XLOCKED) struct sx state_lock; /* Protects Interface state */ - struct mlx5_uar cq_uar; u32 pdn; u32 tdn; struct mlx5_core_mr mr; u32 tisn[MLX5E_MAX_TX_NUM_TC]; u32 rqtn; u32 tirn[MLX5E_NUM_TT]; struct mlx5e_flow_tables fts; struct mlx5e_eth_addr_db eth_addr; struct mlx5e_vlan_db vlan; struct mlx5e_params params; struct mlx5e_params_ethtool params_ethtool; union mlx5_core_pci_diagnostics params_pci; union mlx5_core_general_diagnostics params_general; struct mtx async_events_mtx; /* sync hw events */ struct work_struct update_stats_work; struct work_struct update_carrier_work; struct work_struct set_rx_mode_work; MLX5_DECLARE_DOORBELL_LOCK(doorbell_lock) struct ifnet *ifp; struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_ifnet; struct sysctl_oid *sysctl_hw; int sysctl_debug; struct mlx5e_stats stats; int counter_set_id; struct workqueue_struct *wq; eventhandler_tag vlan_detach; eventhandler_tag vlan_attach; struct ifmedia media; int media_status_last; int media_active_last; struct callout watchdog; struct mlx5e_rl_priv_data rl; struct mlx5e_tls tls; struct callout tstmp_clbr; int clbr_done; int clbr_curr; struct mlx5e_clbr_point clbr_points[2]; u_int clbr_gen; struct mlx5e_dcbx dcbx; bool sw_is_port_buf_owner; + struct mlx5_sq_bfreg bfreg; + struct pfil_head *pfil; struct mlx5e_channel channel[]; }; #define MLX5E_NET_IP_ALIGN 2 struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_eth_seg eth; }; struct mlx5e_tx_umr_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_umr_ctrl_seg umr; uint8_t mkc[64]; }; struct mlx5e_tx_psv_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_seg_set_psv psv; }; struct mlx5e_rx_wqe { struct mlx5_wqe_srq_next_seg next; struct mlx5_wqe_data_seg data[]; }; /* the size of the structure above must be power of two */ CTASSERT(powerof2(sizeof(struct mlx5e_rx_wqe))); struct mlx5e_eeprom { int lock_bit; int i2c_addr; int page_num; int device_addr; int module_num; int len; int type; int page_valid; u32 *data; }; #define MLX5E_FLD_MAX(typ, fld) ((1ULL << __mlx5_bit_sz(typ, fld)) - 1ULL) bool mlx5e_do_send_cqe(struct mlx5e_sq *); int mlx5e_get_full_header_size(const struct mbuf *, const struct tcphdr **); int mlx5e_xmit(struct ifnet *, struct mbuf *); int mlx5e_open_locked(struct ifnet *); int mlx5e_close_locked(struct ifnet *); void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event); mlx5e_cq_comp_t mlx5e_rx_cq_comp; mlx5e_cq_comp_t mlx5e_tx_cq_comp; struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); void mlx5e_dim_work(struct work_struct *); void mlx5e_dim_build_cq_param(struct mlx5e_priv *, struct mlx5e_cq_param *); int mlx5e_open_flow_table(struct mlx5e_priv *priv); void mlx5e_close_flow_table(struct mlx5e_priv *priv); void mlx5e_set_rx_mode_core(struct mlx5e_priv *priv); void mlx5e_set_rx_mode_work(struct work_struct *work); void mlx5e_vlan_rx_add_vid(void *, struct ifnet *, u16); void mlx5e_vlan_rx_kill_vid(void *, struct ifnet *, u16); void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); int mlx5e_add_all_vlan_rules(struct mlx5e_priv *priv); void mlx5e_del_all_vlan_rules(struct mlx5e_priv *priv); static inline void -mlx5e_tx_notify_hw(struct mlx5e_sq *sq, u32 *wqe, int bf_sz) +mlx5e_tx_notify_hw(struct mlx5e_sq *sq, u32 *wqe) { u16 ofst = MLX5_BF_OFFSET + sq->bf_offset; /* ensure wqe is visible to device before updating doorbell record */ wmb(); *sq->wq.db = cpu_to_be32(sq->pc); /* * Ensure the doorbell record is visible to device before ringing * the doorbell: */ wmb(); - if (bf_sz) { - __iowrite64_copy(sq->uar.bf_map + ofst, wqe, bf_sz); - - /* flush the write-combining mapped buffer */ - wmb(); - - } else { - mlx5_write64(wqe, sq->uar.map + ofst, - MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock)); - } + mlx5_write64(wqe, sq->uar_map + ofst, + MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock)); sq->bf_offset ^= sq->bf_buf_size; } static inline void mlx5e_cq_arm(struct mlx5e_cq *cq, spinlock_t *dblock) { struct mlx5_core_cq *mcq; mcq = &cq->mcq; mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, dblock, cq->wq.cc); } #define mlx5e_dbg(_IGN, _priv, ...) mlx5_core_dbg((_priv)->mdev, __VA_ARGS__) extern const struct ethtool_ops mlx5e_ethtool_ops; void mlx5e_create_ethtool(struct mlx5e_priv *); void mlx5e_create_stats(struct sysctl_ctx_list *, struct sysctl_oid_list *, const char *, const char **, unsigned, u64 *); void mlx5e_create_counter_stats(struct sysctl_ctx_list *, struct sysctl_oid_list *, const char *, const char **, unsigned, counter_u64_t *); void mlx5e_send_nop(struct mlx5e_sq *, u32); int mlx5e_sq_dump_xmit(struct mlx5e_sq *, struct mlx5e_xmit_args *, struct mbuf **); int mlx5e_sq_xmit(struct mlx5e_sq *, struct mbuf **); void mlx5e_sq_cev_timeout(void *); int mlx5e_refresh_channel_params(struct mlx5e_priv *); int mlx5e_open_cq(struct mlx5e_priv *, struct mlx5e_cq_param *, struct mlx5e_cq *, mlx5e_cq_comp_t *, int eq_ix); void mlx5e_close_cq(struct mlx5e_cq *); void mlx5e_free_sq_db(struct mlx5e_sq *); int mlx5e_alloc_sq_db(struct mlx5e_sq *); int mlx5e_enable_sq(struct mlx5e_sq *, struct mlx5e_sq_param *, int tis_num); int mlx5e_modify_sq(struct mlx5e_sq *, int curr_state, int next_state); void mlx5e_disable_sq(struct mlx5e_sq *); void mlx5e_drain_sq(struct mlx5e_sq *); void mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value); void mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value); void mlx5e_resume_sq(struct mlx5e_sq *sq); void mlx5e_update_sq_inline(struct mlx5e_sq *sq); void mlx5e_refresh_sq_inline(struct mlx5e_priv *priv); int mlx5e_update_buf_lossy(struct mlx5e_priv *priv); int mlx5e_fec_update(struct mlx5e_priv *priv); int mlx5e_hw_temperature_update(struct mlx5e_priv *priv); if_snd_tag_alloc_t mlx5e_ul_snd_tag_alloc; if_snd_tag_modify_t mlx5e_ul_snd_tag_modify; if_snd_tag_query_t mlx5e_ul_snd_tag_query; if_snd_tag_free_t mlx5e_ul_snd_tag_free; #endif /* _MLX5_EN_H_ */ diff --git a/sys/dev/mlx5/mlx5_en/en_rl.h b/sys/dev/mlx5/mlx5_en/en_rl.h index 6ad2f26e5a12..f30e8ba8cc07 100644 --- a/sys/dev/mlx5/mlx5_en/en_rl.h +++ b/sys/dev/mlx5/mlx5_en/en_rl.h @@ -1,176 +1,175 @@ /*- * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __MLX5_EN_RL_H__ #define __MLX5_EN_RL_H__ #include #include #include #include #include #include #include #include #include #define MLX5E_RL_MAX_WORKERS 128 /* limited by Toeplitz hash */ #define MLX5E_RL_MAX_TX_RATES (64 * 1024) /* software limit */ #define MLX5E_RL_DEF_SQ_PER_WORKER (12 * 1024) /* software limit */ #define MLX5E_RL_MAX_SQS (120 * 1024) /* software limit */ #define MLX5E_RL_TX_COAL_USEC_DEFAULT 32 #define MLX5E_RL_TX_COAL_PKTS_DEFAULT 4 #define MLX5E_RL_TX_COAL_MODE_DEFAULT 0 #define MLX5E_RL_TX_COMP_FACT_DEFAULT 1 #define MLX5E_RL_WORKER_LOCK(rlw) mtx_lock(&(rlw)->mtx) #define MLX5E_RL_WORKER_UNLOCK(rlw) mtx_unlock(&(rlw)->mtx) #define MLX5E_RL_RLOCK(rl) sx_slock(&(rl)->rl_sxlock) #define MLX5E_RL_RUNLOCK(rl) sx_sunlock(&(rl)->rl_sxlock) #define MLX5E_RL_WLOCK(rl) sx_xlock(&(rl)->rl_sxlock) #define MLX5E_RL_WUNLOCK(rl) sx_xunlock(&(rl)->rl_sxlock) #define MLX5E_RL_PARAMS(m) \ m(+1, u64, tx_queue_size, "tx_queue_size", "Default send queue size") \ m(+1, u64, tx_coalesce_usecs, "tx_coalesce_usecs", "Limit in usec for joining TX packets") \ m(+1, u64, tx_coalesce_pkts, "tx_coalesce_pkts", "Maximum number of TX packets to join") \ m(+1, u64, tx_coalesce_mode, "tx_coalesce_mode", "0: EQE mode 1: CQE mode") \ m(+1, u64, tx_completion_fact, "tx_completion_fact", "1..MAX: Completion event ratio") \ m(+1, u64, tx_completion_fact_max, "tx_completion_fact_max", "Maximum completion event ratio") \ m(+1, u64, tx_worker_threads_max, "tx_worker_threads_max", "Max number of TX worker threads") \ m(+1, u64, tx_worker_threads_def, "tx_worker_threads_def", "Default number of TX worker threads") \ m(+1, u64, tx_channels_per_worker_max, "tx_channels_per_worker_max", "Max number of TX channels per worker") \ m(+1, u64, tx_channels_per_worker_def, "tx_channels_per_worker_def", "Default number of TX channels per worker") \ m(+1, u64, tx_rates_max, "tx_rates_max", "Max number of TX rates") \ m(+1, u64, tx_rates_def, "tx_rates_def", "Default number of TX rates") \ m(+1, u64, tx_limit_min, "tx_limit_min", "Minimum TX rate in bits/s") \ m(+1, u64, tx_limit_max, "tx_limit_max", "Maximum TX rate in bits/s") \ m(+1, u64, tx_burst_size, "tx_burst_size", "Current burst size in number of packets. A value of zero means use firmware default.") \ m(+1, u64, tx_burst_size_max, "tx_burst_size_max", "Maximum burst size in number of packets") \ m(+1, u64, tx_burst_size_min, "tx_burst_size_min", "Minimum burst size in number of packets") #define MLX5E_RL_PARAMS_NUM (0 MLX5E_RL_PARAMS(MLX5E_STATS_COUNT)) #define MLX5E_RL_STATS(m) \ m(+1, u64, tx_allocate_resource_failure, "tx_allocate_resource_failure", "Number of times firmware resource allocation failed") \ m(+1, u64, tx_add_new_rate_failure, "tx_add_new_rate_failure", "Number of times adding a new firmware rate failed") \ m(+1, u64, tx_modify_rate_failure, "tx_modify_rate_failure", "Number of times modifying a firmware rate failed") \ m(+1, u64, tx_active_connections, "tx_active_connections", "Number of active connections") \ m(+1, u64, tx_open_queues, "tx_open_queues", "Number of open TX queues") \ m(+1, u64, tx_available_resource_failure, "tx_available_resource_failure", "Number of times TX resources were not available") #define MLX5E_RL_STATS_NUM (0 MLX5E_RL_STATS(MLX5E_STATS_COUNT)) #define MLX5E_RL_TABLE_PARAMS(m) \ m(+1, u64, tx_limit_add, "tx_limit_add", "Add TX rate limit in bits/s to empty slot") \ m(+1, u64, tx_limit_clr, "tx_limit_clr", "Clear all TX rates in table") \ m(+1, u64, tx_allowed_deviation, "tx_allowed_deviation", "Relative rate deviation allowed in 1/1000") \ m(+1, u64, tx_allowed_deviation_min, "tx_allowed_deviation_min", "Minimum allowed rate deviation in 1/1000") \ m(+1, u64, tx_allowed_deviation_max, "tx_allowed_deviation_max", "Maximum allowed rate deviation in 1/1000") #define MLX5E_RL_TABLE_PARAMS_NUM (0 MLX5E_RL_TABLE_PARAMS(MLX5E_STATS_COUNT)) #define MLX5E_RL_PARAMS_INDEX(n) \ (__offsetof(struct mlx5e_rl_params, n) / sizeof(uint64_t)) struct mlx5e_priv; /* Indicates channel's state */ enum { MLX5E_RL_ST_FREE, MLX5E_RL_ST_USED, MLX5E_RL_ST_MODIFY, MLX5E_RL_ST_DESTROY, }; struct mlx5e_rl_stats { u64 arg [0]; MLX5E_RL_STATS(MLX5E_STATS_VAR) }; struct mlx5e_rl_params { u64 arg [0]; MLX5E_RL_PARAMS(MLX5E_STATS_VAR) u64 table_arg [0]; MLX5E_RL_TABLE_PARAMS(MLX5E_STATS_VAR) }; struct mlx5e_rl_channel_param { struct mlx5e_sq_param sq; struct mlx5e_cq_param cq; }; struct mlx5e_rl_channel { struct m_snd_tag tag; STAILQ_ENTRY(mlx5e_rl_channel) entry; struct mlx5e_sq * volatile sq; struct mlx5e_rl_worker *worker; uint64_t new_rate; uint64_t init_rate; uint64_t last_rate; uint16_t last_burst; uint16_t state; }; struct mlx5e_rl_worker { struct mtx mtx; struct cv cv; STAILQ_HEAD(, mlx5e_rl_channel) index_list_head; STAILQ_HEAD(, mlx5e_rl_channel) process_head; struct mlx5e_priv *priv; struct mlx5e_rl_channel *channels; unsigned worker_done; }; struct mlx5e_rl_priv_data { struct sx rl_sxlock; struct sysctl_ctx_list ctx; struct mlx5e_rl_channel_param chan_param; struct mlx5e_rl_params param; struct mlx5e_rl_stats stats; - struct mlx5_uar sq_uar; struct mlx5e_rl_worker *workers; struct mlx5e_priv *priv; uint64_t *rate_limit_table; unsigned opened; uint32_t tisn; }; int mlx5e_rl_init(struct mlx5e_priv *priv); void mlx5e_rl_cleanup(struct mlx5e_priv *priv); void mlx5e_rl_refresh_sq_inline(struct mlx5e_rl_priv_data *rl); if_snd_tag_alloc_t mlx5e_rl_snd_tag_alloc; if_snd_tag_modify_t mlx5e_rl_snd_tag_modify; if_snd_tag_query_t mlx5e_rl_snd_tag_query; if_snd_tag_free_t mlx5e_rl_snd_tag_free; #endif /* __MLX5_EN_RL_H__ */ diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c index 1e6562fed667..9fc3af186c0d 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -1,4815 +1,4807 @@ /*- * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_kern_tls.h" #include "en.h" #include #include #include #include #ifndef ETH_DRIVER_VERSION #define ETH_DRIVER_VERSION "3.6.0" #endif #define DRIVER_RELDATE "December 2020" static const char mlx5e_version[] = "mlx5en: Mellanox Ethernet driver " ETH_DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; static int mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs); struct mlx5e_channel_param { struct mlx5e_rq_param rq; struct mlx5e_sq_param sq; struct mlx5e_cq_param rx_cq; struct mlx5e_cq_param tx_cq; }; struct media { u32 subtype; u64 baudrate; }; static const struct media mlx5e_mode_table[MLX5E_LINK_SPEEDS_NUMBER][MLX5E_LINK_MODES_NUMBER] = { [MLX5E_1000BASE_CX_SGMII][MLX5E_SGMII] = { .subtype = IFM_1000_CX_SGMII, .baudrate = IF_Mbps(1000ULL), }, [MLX5E_1000BASE_KX][MLX5E_KX] = { .subtype = IFM_1000_KX, .baudrate = IF_Mbps(1000ULL), }, [MLX5E_10GBASE_CX4][MLX5E_CX4] = { .subtype = IFM_10G_CX4, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_KX4][MLX5E_KX4] = { .subtype = IFM_10G_KX4, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_KR][MLX5E_KR] = { .subtype = IFM_10G_KR, .baudrate = IF_Gbps(10ULL), }, [MLX5E_20GBASE_KR2][MLX5E_KR2] = { .subtype = IFM_20G_KR2, .baudrate = IF_Gbps(20ULL), }, [MLX5E_40GBASE_CR4][MLX5E_CR4] = { .subtype = IFM_40G_CR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_40GBASE_KR4][MLX5E_KR4] = { .subtype = IFM_40G_KR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_56GBASE_R4][MLX5E_R] = { .subtype = IFM_56G_R4, .baudrate = IF_Gbps(56ULL), }, [MLX5E_10GBASE_CR][MLX5E_CR1] = { .subtype = IFM_10G_CR1, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_SR][MLX5E_SR] = { .subtype = IFM_10G_SR, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_ER_LR][MLX5E_ER] = { .subtype = IFM_10G_ER, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_ER_LR][MLX5E_LR] = { .subtype = IFM_10G_LR, .baudrate = IF_Gbps(10ULL), }, [MLX5E_40GBASE_SR4][MLX5E_SR4] = { .subtype = IFM_40G_SR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_40GBASE_LR4_ER4][MLX5E_LR4] = { .subtype = IFM_40G_LR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_40GBASE_LR4_ER4][MLX5E_ER4] = { .subtype = IFM_40G_ER4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_100GBASE_CR4][MLX5E_CR4] = { .subtype = IFM_100G_CR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GBASE_SR4][MLX5E_SR4] = { .subtype = IFM_100G_SR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GBASE_KR4][MLX5E_KR4] = { .subtype = IFM_100G_KR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GBASE_LR4][MLX5E_LR4] = { .subtype = IFM_100G_LR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100BASE_TX][MLX5E_TX] = { .subtype = IFM_100_TX, .baudrate = IF_Mbps(100ULL), }, [MLX5E_1000BASE_T][MLX5E_T] = { .subtype = IFM_1000_T, .baudrate = IF_Mbps(1000ULL), }, [MLX5E_10GBASE_T][MLX5E_T] = { .subtype = IFM_10G_T, .baudrate = IF_Gbps(10ULL), }, [MLX5E_25GBASE_CR][MLX5E_CR] = { .subtype = IFM_25G_CR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GBASE_KR][MLX5E_KR] = { .subtype = IFM_25G_KR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GBASE_SR][MLX5E_SR] = { .subtype = IFM_25G_SR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_50GBASE_CR2][MLX5E_CR2] = { .subtype = IFM_50G_CR2, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GBASE_KR2][MLX5E_KR2] = { .subtype = IFM_50G_KR2, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GBASE_KR4][MLX5E_KR4] = { .subtype = IFM_50G_KR4, .baudrate = IF_Gbps(50ULL), }, }; static const struct media mlx5e_ext_mode_table[MLX5E_EXT_LINK_SPEEDS_NUMBER][MLX5E_LINK_MODES_NUMBER] = { [MLX5E_SGMII_100M][MLX5E_SGMII] = { .subtype = IFM_100_SGMII, .baudrate = IF_Mbps(100), }, [MLX5E_1000BASE_X_SGMII][MLX5E_KX] = { .subtype = IFM_1000_KX, .baudrate = IF_Mbps(1000), }, [MLX5E_1000BASE_X_SGMII][MLX5E_CX_SGMII] = { .subtype = IFM_1000_CX_SGMII, .baudrate = IF_Mbps(1000), }, [MLX5E_1000BASE_X_SGMII][MLX5E_CX] = { .subtype = IFM_1000_CX, .baudrate = IF_Mbps(1000), }, [MLX5E_1000BASE_X_SGMII][MLX5E_LX] = { .subtype = IFM_1000_LX, .baudrate = IF_Mbps(1000), }, [MLX5E_1000BASE_X_SGMII][MLX5E_SX] = { .subtype = IFM_1000_SX, .baudrate = IF_Mbps(1000), }, [MLX5E_1000BASE_X_SGMII][MLX5E_T] = { .subtype = IFM_1000_T, .baudrate = IF_Mbps(1000), }, [MLX5E_5GBASE_R][MLX5E_T] = { .subtype = IFM_5000_T, .baudrate = IF_Mbps(5000), }, [MLX5E_5GBASE_R][MLX5E_KR] = { .subtype = IFM_5000_KR, .baudrate = IF_Mbps(5000), }, [MLX5E_5GBASE_R][MLX5E_KR1] = { .subtype = IFM_5000_KR1, .baudrate = IF_Mbps(5000), }, [MLX5E_5GBASE_R][MLX5E_KR_S] = { .subtype = IFM_5000_KR_S, .baudrate = IF_Mbps(5000), }, [MLX5E_10GBASE_XFI_XAUI_1][MLX5E_ER] = { .subtype = IFM_10G_ER, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_XFI_XAUI_1][MLX5E_KR] = { .subtype = IFM_10G_KR, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_XFI_XAUI_1][MLX5E_LR] = { .subtype = IFM_10G_LR, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_XFI_XAUI_1][MLX5E_SR] = { .subtype = IFM_10G_SR, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_XFI_XAUI_1][MLX5E_T] = { .subtype = IFM_10G_T, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_XFI_XAUI_1][MLX5E_AOC] = { .subtype = IFM_10G_AOC, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_XFI_XAUI_1][MLX5E_CR1] = { .subtype = IFM_10G_CR1, .baudrate = IF_Gbps(10ULL), }, [MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_CR4] = { .subtype = IFM_40G_CR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_KR4] = { .subtype = IFM_40G_KR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_LR4] = { .subtype = IFM_40G_LR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_SR4] = { .subtype = IFM_40G_SR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_ER4] = { .subtype = IFM_40G_ER4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_CR] = { .subtype = IFM_25G_CR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_KR] = { .subtype = IFM_25G_KR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_SR] = { .subtype = IFM_25G_SR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_ACC] = { .subtype = IFM_25G_ACC, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_AOC] = { .subtype = IFM_25G_AOC, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_CR1] = { .subtype = IFM_25G_CR1, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_CR_S] = { .subtype = IFM_25G_CR_S, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_KR1] = { .subtype = IFM_5000_KR1, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_KR_S] = { .subtype = IFM_25G_KR_S, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_LR] = { .subtype = IFM_25G_LR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_T] = { .subtype = IFM_25G_T, .baudrate = IF_Gbps(25ULL), }, [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_CR2] = { .subtype = IFM_50G_CR2, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_KR2] = { .subtype = IFM_50G_KR2, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_KR4] = { .subtype = IFM_50G_KR4, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_SR2] = { .subtype = IFM_50G_SR2, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_LR2] = { .subtype = IFM_50G_LR2, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_LR] = { .subtype = IFM_50G_LR, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_SR] = { .subtype = IFM_50G_SR, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_CP] = { .subtype = IFM_50G_CP, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_FR] = { .subtype = IFM_50G_FR, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_KR_PAM4] = { .subtype = IFM_50G_KR_PAM4, .baudrate = IF_Gbps(50ULL), }, [MLX5E_CAUI_4_100GBASE_CR4_KR4][MLX5E_CR4] = { .subtype = IFM_100G_CR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_CAUI_4_100GBASE_CR4_KR4][MLX5E_KR4] = { .subtype = IFM_100G_KR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_CAUI_4_100GBASE_CR4_KR4][MLX5E_LR4] = { .subtype = IFM_100G_LR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_CAUI_4_100GBASE_CR4_KR4][MLX5E_SR4] = { .subtype = IFM_100G_SR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GAUI_2_100GBASE_CR2_KR2][MLX5E_SR2] = { .subtype = IFM_100G_SR2, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GAUI_2_100GBASE_CR2_KR2][MLX5E_CP2] = { .subtype = IFM_100G_CP2, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GAUI_2_100GBASE_CR2_KR2][MLX5E_KR2_PAM4] = { .subtype = IFM_100G_KR2_PAM4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_DR4] = { .subtype = IFM_200G_DR4, .baudrate = IF_Gbps(200ULL), }, [MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_LR4] = { .subtype = IFM_200G_LR4, .baudrate = IF_Gbps(200ULL), }, [MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_SR4] = { .subtype = IFM_200G_SR4, .baudrate = IF_Gbps(200ULL), }, [MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_FR4] = { .subtype = IFM_200G_FR4, .baudrate = IF_Gbps(200ULL), }, [MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_CR4_PAM4] = { .subtype = IFM_200G_CR4_PAM4, .baudrate = IF_Gbps(200ULL), }, [MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_KR4_PAM4] = { .subtype = IFM_200G_KR4_PAM4, .baudrate = IF_Gbps(200ULL), }, }; DEBUGNET_DEFINE(mlx5_en); MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet"); static void mlx5e_update_carrier(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; u32 out[MLX5_ST_SZ_DW(ptys_reg)]; u32 eth_proto_oper; int error; u8 port_state; u8 is_er_type; u8 i, j; bool ext; struct media media_entry = {}; port_state = mlx5_query_vport_state(mdev, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0); if (port_state == VPORT_STATE_UP) { priv->media_status_last |= IFM_ACTIVE; } else { priv->media_status_last &= ~IFM_ACTIVE; priv->media_active_last = IFM_ETHER; if_link_state_change(priv->ifp, LINK_STATE_DOWN); return; } error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); if (error) { priv->media_active_last = IFM_ETHER; priv->ifp->if_baudrate = 1; mlx5_en_err(priv->ifp, "query port ptys failed: 0x%x\n", error); return; } ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); eth_proto_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); i = ilog2(eth_proto_oper); for (j = 0; j != MLX5E_LINK_MODES_NUMBER; j++) { media_entry = ext ? mlx5e_ext_mode_table[i][j] : mlx5e_mode_table[i][j]; if (media_entry.baudrate != 0) break; } if (media_entry.subtype == 0) { mlx5_en_err(priv->ifp, "Could not find operational media subtype\n"); return; } switch (media_entry.subtype) { case IFM_10G_ER: error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type); if (error != 0) { mlx5_en_err(priv->ifp, "query port pddr failed: %d\n", error); } if (error != 0 || is_er_type == 0) media_entry.subtype = IFM_10G_LR; break; case IFM_40G_LR4: error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type); if (error != 0) { mlx5_en_err(priv->ifp, "query port pddr failed: %d\n", error); } if (error == 0 && is_er_type != 0) media_entry.subtype = IFM_40G_ER4; break; } priv->media_active_last = media_entry.subtype | IFM_ETHER | IFM_FDX; priv->ifp->if_baudrate = media_entry.baudrate; if_link_state_change(priv->ifp, LINK_STATE_UP); } static void mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr) { struct mlx5e_priv *priv = dev->if_softc; ifmr->ifm_status = priv->media_status_last; ifmr->ifm_current = ifmr->ifm_active = priv->media_active_last | (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) | (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0); } static u32 mlx5e_find_link_mode(u32 subtype, bool ext) { u32 i; u32 j; u32 link_mode = 0; u32 speeds_num = 0; struct media media_entry = {}; switch (subtype) { case IFM_10G_LR: subtype = IFM_10G_ER; break; case IFM_40G_ER4: subtype = IFM_40G_LR4; break; } speeds_num = ext ? MLX5E_EXT_LINK_SPEEDS_NUMBER : MLX5E_LINK_SPEEDS_NUMBER; for (i = 0; i != speeds_num; i++) { for (j = 0; j < MLX5E_LINK_MODES_NUMBER ; ++j) { media_entry = ext ? mlx5e_ext_mode_table[i][j] : mlx5e_mode_table[i][j]; if (media_entry.baudrate == 0) continue; if (media_entry.subtype == subtype) { link_mode |= MLX5E_PROT_MASK(i); } } } return (link_mode); } static int mlx5e_set_port_pause_and_pfc(struct mlx5e_priv *priv) { return (mlx5_set_port_pause_and_pfc(priv->mdev, 1, priv->params.rx_pauseframe_control, priv->params.tx_pauseframe_control, priv->params.rx_priority_flow_control, priv->params.tx_priority_flow_control)); } static int mlx5e_set_port_pfc(struct mlx5e_priv *priv) { int error; if (priv->gone != 0) { error = -ENXIO; } else if (priv->params.rx_pauseframe_control || priv->params.tx_pauseframe_control) { mlx5_en_err(priv->ifp, "Global pauseframes must be disabled before enabling PFC.\n"); error = -EINVAL; } else { error = mlx5e_set_port_pause_and_pfc(priv); } return (error); } static int mlx5e_media_change(struct ifnet *dev) { struct mlx5e_priv *priv = dev->if_softc; struct mlx5_core_dev *mdev = priv->mdev; u32 eth_proto_cap; u32 link_mode; u32 out[MLX5_ST_SZ_DW(ptys_reg)]; int was_opened; int locked; int error; bool ext; locked = PRIV_LOCKED(priv); if (!locked) PRIV_LOCK(priv); if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) { error = EINVAL; goto done; } error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); if (error != 0) { mlx5_en_err(dev, "Query port media capability failed\n"); goto done; } ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media), ext); /* query supported capabilities */ eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_capability); /* check for autoselect */ if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) { link_mode = eth_proto_cap; if (link_mode == 0) { mlx5_en_err(dev, "Port media capability is zero\n"); error = EINVAL; goto done; } } else { link_mode = link_mode & eth_proto_cap; if (link_mode == 0) { mlx5_en_err(dev, "Not supported link mode requested\n"); error = EINVAL; goto done; } } if (priv->media.ifm_media & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) { /* check if PFC is enabled */ if (priv->params.rx_priority_flow_control || priv->params.tx_priority_flow_control) { mlx5_en_err(dev, "PFC must be disabled before enabling global pauseframes.\n"); error = EINVAL; goto done; } } /* update pauseframe control bits */ priv->params.rx_pauseframe_control = (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0; priv->params.tx_pauseframe_control = (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0; /* check if device is opened */ was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); /* reconfigure the hardware */ mlx5_set_port_status(mdev, MLX5_PORT_DOWN); mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN, ext); error = -mlx5e_set_port_pause_and_pfc(priv); if (was_opened) mlx5_set_port_status(mdev, MLX5_PORT_UP); done: if (!locked) PRIV_UNLOCK(priv); return (error); } static void mlx5e_update_carrier_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, update_carrier_work); PRIV_LOCK(priv); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_update_carrier(priv); PRIV_UNLOCK(priv); } #define MLX5E_PCIE_PERF_GET_64(a,b,c,d,e,f) \ s_debug->c = MLX5_GET64(mpcnt_reg, out, counter_set.f.c); #define MLX5E_PCIE_PERF_GET_32(a,b,c,d,e,f) \ s_debug->c = MLX5_GET(mpcnt_reg, out, counter_set.f.c); static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug; const unsigned sz = MLX5_ST_SZ_BYTES(mpcnt_reg); void *out; void *in; int err; /* allocate firmware request structures */ in = mlx5_vzalloc(sz); out = mlx5_vzalloc(sz); if (in == NULL || out == NULL) goto free_out; MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP); err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); if (err != 0) goto free_out; MLX5E_PCIE_PERFORMANCE_COUNTERS_64(MLX5E_PCIE_PERF_GET_64) MLX5E_PCIE_PERFORMANCE_COUNTERS_32(MLX5E_PCIE_PERF_GET_32) MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP); err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); if (err != 0) goto free_out; MLX5E_PCIE_TIMERS_AND_STATES_COUNTERS_32(MLX5E_PCIE_PERF_GET_32) MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_LANE_COUNTERS_GROUP); err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); if (err != 0) goto free_out; MLX5E_PCIE_LANE_COUNTERS_32(MLX5E_PCIE_PERF_GET_32) free_out: /* free firmware request structures */ kvfree(in); kvfree(out); } /* * This function reads the physical port counters from the firmware * using a pre-defined layout defined by various MLX5E_PPORT_XXX() * macros. The output is converted from big-endian 64-bit values into * host endian ones and stored in the "priv->stats.pport" structure. */ static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_pport_stats *s = &priv->stats.pport; struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug; u32 *in; u32 *out; const u64 *ptr; unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg); unsigned x; unsigned y; unsigned z; /* allocate firmware request structures */ in = mlx5_vzalloc(sz); out = mlx5_vzalloc(sz); if (in == NULL || out == NULL) goto free_out; /* * Get pointer to the 64-bit counter set which is located at a * fixed offset in the output firmware request structure: */ ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set); MLX5_SET(ppcnt_reg, in, local_port, 1); /* read IEEE802_3 counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0, y = MLX5E_PPORT_PER_PRIO_STATS_NUM; x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++) s->arg[y] = be64toh(ptr[x]); /* read RFC2819 counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++) s->arg[y] = be64toh(ptr[x]); for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM + MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++) s_debug->arg[y] = be64toh(ptr[x]); /* read RFC2863 counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++) s_debug->arg[y] = be64toh(ptr[x]); /* read physical layer stats counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++) s_debug->arg[y] = be64toh(ptr[x]); /* read Extended Ethernet counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0; x != MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG_NUM; x++, y++) s_debug->arg[y] = be64toh(ptr[x]); /* read Extended Statistical Group */ if (MLX5_CAP_GEN(mdev, pcam_reg) && MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group) && MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) { /* read Extended Statistical counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0; x != MLX5E_PPORT_STATISTICAL_DEBUG_NUM; x++, y++) s_debug->arg[y] = be64toh(ptr[x]); } /* read PCIE counters */ mlx5e_update_pcie_counters(priv); /* read per-priority counters */ MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); /* iterate all the priorities */ for (y = z = 0; z != MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO; z++) { MLX5_SET(ppcnt_reg, in, prio_tc, z); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); /* read per priority stats counter group using predefined counter layout */ for (x = 0; x != (MLX5E_PPORT_PER_PRIO_STATS_NUM / MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO); x++, y++) s->arg[y] = be64toh(ptr[x]); } free_out: /* free firmware request structures */ kvfree(in); kvfree(out); } static void mlx5e_grp_vnic_env_update_stats(struct mlx5e_priv *priv) { u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {}; u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard)) return; MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); MLX5_SET(query_vnic_env_in, in, op_mod, 0); MLX5_SET(query_vnic_env_in, in, other_vport, 0); if (mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out)) != 0) return; priv->stats.vport.rx_steer_missed_packets = MLX5_GET64(query_vnic_env_out, out, vport_env.nic_receive_steering_discard); } /* * This function is called regularly to collect all statistics * counters from the firmware. The values can be viewed through the * sysctl interface. Execution is serialized using the priv's global * configuration lock. */ static void mlx5e_update_stats_locked(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_vport_stats *s = &priv->stats.vport; struct mlx5e_sq_stats *sq_stats; #if (__FreeBSD_version < 1100000) struct ifnet *ifp = priv->ifp; #endif u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); u64 tso_packets = 0; u64 tso_bytes = 0; u64 tx_queue_dropped = 0; u64 tx_defragged = 0; u64 tx_offload_none = 0; u64 lro_packets = 0; u64 lro_bytes = 0; u64 sw_lro_queued = 0; u64 sw_lro_flushed = 0; u64 rx_csum_none = 0; u64 rx_wqe_err = 0; u64 rx_packets = 0; u64 rx_bytes = 0; u32 rx_out_of_buffer = 0; int error; int i; int j; out = mlx5_vzalloc(outlen); if (out == NULL) goto free_out; /* Collect firts the SW counters and then HW for consistency */ for (i = 0; i < priv->params.num_channels; i++) { struct mlx5e_channel *pch = priv->channel + i; struct mlx5e_rq *rq = &pch->rq; struct mlx5e_rq_stats *rq_stats = &pch->rq.stats; /* collect stats from LRO */ rq_stats->sw_lro_queued = rq->lro.lro_queued; rq_stats->sw_lro_flushed = rq->lro.lro_flushed; sw_lro_queued += rq_stats->sw_lro_queued; sw_lro_flushed += rq_stats->sw_lro_flushed; lro_packets += rq_stats->lro_packets; lro_bytes += rq_stats->lro_bytes; rx_csum_none += rq_stats->csum_none; rx_wqe_err += rq_stats->wqe_err; rx_packets += rq_stats->packets; rx_bytes += rq_stats->bytes; for (j = 0; j < priv->num_tc; j++) { sq_stats = &pch->sq[j].stats; tso_packets += sq_stats->tso_packets; tso_bytes += sq_stats->tso_bytes; tx_queue_dropped += sq_stats->dropped; tx_queue_dropped += sq_stats->enobuf; tx_defragged += sq_stats->defragged; tx_offload_none += sq_stats->csum_offload_none; } } #ifdef RATELIMIT /* Collect statistics from all rate-limit queues */ for (j = 0; j < priv->rl.param.tx_worker_threads_def; j++) { struct mlx5e_rl_worker *rlw = priv->rl.workers + j; for (i = 0; i < priv->rl.param.tx_channels_per_worker_def; i++) { struct mlx5e_rl_channel *channel = rlw->channels + i; struct mlx5e_sq *sq = channel->sq; if (sq == NULL) continue; sq_stats = &sq->stats; tso_packets += sq_stats->tso_packets; tso_bytes += sq_stats->tso_bytes; tx_queue_dropped += sq_stats->dropped; tx_queue_dropped += sq_stats->enobuf; tx_defragged += sq_stats->defragged; tx_offload_none += sq_stats->csum_offload_none; } } #endif /* update counters */ s->tso_packets = tso_packets; s->tso_bytes = tso_bytes; s->tx_queue_dropped = tx_queue_dropped; s->tx_defragged = tx_defragged; s->lro_packets = lro_packets; s->lro_bytes = lro_bytes; s->sw_lro_queued = sw_lro_queued; s->sw_lro_flushed = sw_lro_flushed; s->rx_csum_none = rx_csum_none; s->rx_wqe_err = rx_wqe_err; s->rx_packets = rx_packets; s->rx_bytes = rx_bytes; mlx5e_grp_vnic_env_update_stats(priv); /* HW counters */ memset(in, 0, sizeof(in)); MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER); MLX5_SET(query_vport_counter_in, in, op_mod, 0); MLX5_SET(query_vport_counter_in, in, other_vport, 0); memset(out, 0, outlen); /* get number of out-of-buffer drops first */ if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0 && mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id, &rx_out_of_buffer) == 0) { s->rx_out_of_buffer = rx_out_of_buffer; } /* get port statistics */ if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen) == 0) { #define MLX5_GET_CTR(out, x) \ MLX5_GET64(query_vport_counter_out, out, x) s->rx_error_packets = MLX5_GET_CTR(out, received_errors.packets); s->rx_error_bytes = MLX5_GET_CTR(out, received_errors.octets); s->tx_error_packets = MLX5_GET_CTR(out, transmit_errors.packets); s->tx_error_bytes = MLX5_GET_CTR(out, transmit_errors.octets); s->rx_unicast_packets = MLX5_GET_CTR(out, received_eth_unicast.packets); s->rx_unicast_bytes = MLX5_GET_CTR(out, received_eth_unicast.octets); s->tx_unicast_packets = MLX5_GET_CTR(out, transmitted_eth_unicast.packets); s->tx_unicast_bytes = MLX5_GET_CTR(out, transmitted_eth_unicast.octets); s->rx_multicast_packets = MLX5_GET_CTR(out, received_eth_multicast.packets); s->rx_multicast_bytes = MLX5_GET_CTR(out, received_eth_multicast.octets); s->tx_multicast_packets = MLX5_GET_CTR(out, transmitted_eth_multicast.packets); s->tx_multicast_bytes = MLX5_GET_CTR(out, transmitted_eth_multicast.octets); s->rx_broadcast_packets = MLX5_GET_CTR(out, received_eth_broadcast.packets); s->rx_broadcast_bytes = MLX5_GET_CTR(out, received_eth_broadcast.octets); s->tx_broadcast_packets = MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); s->tx_broadcast_bytes = MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); s->tx_packets = s->tx_unicast_packets + s->tx_multicast_packets + s->tx_broadcast_packets; s->tx_bytes = s->tx_unicast_bytes + s->tx_multicast_bytes + s->tx_broadcast_bytes; /* Update calculated offload counters */ s->tx_csum_offload = s->tx_packets - tx_offload_none; s->rx_csum_good = s->rx_packets - s->rx_csum_none; } /* Get physical port counters */ mlx5e_update_pport_counters(priv); s->tx_jumbo_packets = priv->stats.port_stats_debug.tx_stat_p1519to2047octets + priv->stats.port_stats_debug.tx_stat_p2048to4095octets + priv->stats.port_stats_debug.tx_stat_p4096to8191octets + priv->stats.port_stats_debug.tx_stat_p8192to10239octets; #if (__FreeBSD_version < 1100000) /* no get_counters interface in fbsd 10 */ ifp->if_ipackets = s->rx_packets; ifp->if_ierrors = priv->stats.pport.in_range_len_errors + priv->stats.pport.out_of_range_len + priv->stats.pport.too_long_errors + priv->stats.pport.check_seq_err + priv->stats.pport.alignment_err; ifp->if_iqdrops = s->rx_out_of_buffer; ifp->if_opackets = s->tx_packets; ifp->if_oerrors = priv->stats.port_stats_debug.out_discards; ifp->if_snd.ifq_drops = s->tx_queue_dropped; ifp->if_ibytes = s->rx_bytes; ifp->if_obytes = s->tx_bytes; ifp->if_collisions = priv->stats.pport.collisions; #endif free_out: kvfree(out); /* Update diagnostics, if any */ if (priv->params_ethtool.diag_pci_enable || priv->params_ethtool.diag_general_enable) { error = mlx5_core_get_diagnostics_full(mdev, priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL, priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL); if (error != 0) mlx5_en_err(priv->ifp, "Failed reading diagnostics: %d\n", error); } /* Update FEC, if any */ error = mlx5e_fec_update(priv); if (error != 0 && error != EOPNOTSUPP) { mlx5_en_err(priv->ifp, "Updating FEC failed: %d\n", error); } /* Update temperature, if any */ if (priv->params_ethtool.hw_num_temp != 0) { error = mlx5e_hw_temperature_update(priv); if (error != 0 && error != EOPNOTSUPP) { mlx5_en_err(priv->ifp, "Updating temperature failed: %d\n", error); } } } static void mlx5e_update_stats_work(struct work_struct *work) { struct mlx5e_priv *priv; priv = container_of(work, struct mlx5e_priv, update_stats_work); PRIV_LOCK(priv); if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0 && !test_bit(MLX5_INTERFACE_STATE_TEARDOWN, &priv->mdev->intf_state)) mlx5e_update_stats_locked(priv); PRIV_UNLOCK(priv); } static void mlx5e_update_stats(void *arg) { struct mlx5e_priv *priv = arg; queue_work(priv->wq, &priv->update_stats_work); callout_reset(&priv->watchdog, hz / 4, &mlx5e_update_stats, priv); } static void mlx5e_async_event_sub(struct mlx5e_priv *priv, enum mlx5_dev_event event) { switch (event) { case MLX5_DEV_EVENT_PORT_UP: case MLX5_DEV_EVENT_PORT_DOWN: queue_work(priv->wq, &priv->update_carrier_work); break; default: break; } } static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, enum mlx5_dev_event event, unsigned long param) { struct mlx5e_priv *priv = vpriv; mtx_lock(&priv->async_events_mtx); if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state)) mlx5e_async_event_sub(priv, event); mtx_unlock(&priv->async_events_mtx); } static void mlx5e_enable_async_events(struct mlx5e_priv *priv) { set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); } static void mlx5e_disable_async_events(struct mlx5e_priv *priv) { mtx_lock(&priv->async_events_mtx); clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); mtx_unlock(&priv->async_events_mtx); } static void mlx5e_calibration_callout(void *arg); static int mlx5e_calibration_duration = 20; static int mlx5e_fast_calibration = 1; static int mlx5e_normal_calibration = 30; static SYSCTL_NODE(_hw_mlx5, OID_AUTO, calibr, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "MLX5 timestamp calibration parameteres"); SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, duration, CTLFLAG_RWTUN, &mlx5e_calibration_duration, 0, "Duration of initial calibration"); SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, fast, CTLFLAG_RWTUN, &mlx5e_fast_calibration, 0, "Recalibration interval during initial calibration"); SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, normal, CTLFLAG_RWTUN, &mlx5e_normal_calibration, 0, "Recalibration interval during normal operations"); /* * Ignites the calibration process. */ static void mlx5e_reset_calibration_callout(struct mlx5e_priv *priv) { if (priv->clbr_done == 0) mlx5e_calibration_callout(priv); else callout_reset_curcpu(&priv->tstmp_clbr, (priv->clbr_done < mlx5e_calibration_duration ? mlx5e_fast_calibration : mlx5e_normal_calibration) * hz, mlx5e_calibration_callout, priv); } static uint64_t mlx5e_timespec2usec(const struct timespec *ts) { return ((uint64_t)ts->tv_sec * 1000000000 + ts->tv_nsec); } static uint64_t mlx5e_hw_clock(struct mlx5e_priv *priv) { struct mlx5_init_seg *iseg; uint32_t hw_h, hw_h1, hw_l; iseg = priv->mdev->iseg; do { hw_h = ioread32be(&iseg->internal_timer_h); hw_l = ioread32be(&iseg->internal_timer_l); hw_h1 = ioread32be(&iseg->internal_timer_h); } while (hw_h1 != hw_h); return (((uint64_t)hw_h << 32) | hw_l); } /* * The calibration callout, it runs either in the context of the * thread which enables calibration, or in callout. It takes the * snapshot of system and adapter clocks, then advances the pointers to * the calibration point to allow rx path to read the consistent data * lockless. */ static void mlx5e_calibration_callout(void *arg) { struct mlx5e_priv *priv; struct mlx5e_clbr_point *next, *curr; struct timespec ts; int clbr_curr_next; priv = arg; curr = &priv->clbr_points[priv->clbr_curr]; clbr_curr_next = priv->clbr_curr + 1; if (clbr_curr_next >= nitems(priv->clbr_points)) clbr_curr_next = 0; next = &priv->clbr_points[clbr_curr_next]; next->base_prev = curr->base_curr; next->clbr_hw_prev = curr->clbr_hw_curr; next->clbr_hw_curr = mlx5e_hw_clock(priv); if (((next->clbr_hw_curr - curr->clbr_hw_curr) >> MLX5E_TSTMP_PREC) == 0) { if (priv->clbr_done != 0) { mlx5_en_err(priv->ifp, "HW failed tstmp frozen %#jx %#jx, disabling\n", next->clbr_hw_curr, curr->clbr_hw_prev); priv->clbr_done = 0; } atomic_store_rel_int(&curr->clbr_gen, 0); return; } nanouptime(&ts); next->base_curr = mlx5e_timespec2usec(&ts); curr->clbr_gen = 0; atomic_thread_fence_rel(); priv->clbr_curr = clbr_curr_next; atomic_store_rel_int(&next->clbr_gen, ++(priv->clbr_gen)); if (priv->clbr_done < mlx5e_calibration_duration) priv->clbr_done++; mlx5e_reset_calibration_callout(priv); } static const char *mlx5e_rq_stats_desc[] = { MLX5E_RQ_STATS(MLX5E_STATS_DESC) }; static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) { struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; char buffer[16]; void *rqc = param->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); int wq_sz; int err; int i; u32 nsegs, wqe_sz; err = mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs); if (err != 0) goto done; /* Create DMA descriptor TAG */ if ((err = -bus_dma_tag_create( bus_get_dma_tag(mdev->pdev->dev.bsddev), 1, /* any alignment */ 0, /* no boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ nsegs * MLX5E_MAX_RX_BYTES, /* maxsize */ nsegs, /* nsegments */ nsegs * MLX5E_MAX_RX_BYTES, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &rq->dma_tag))) goto done; err = mlx5_wq_ll_create(mdev, ¶m->wq, rqc_wq, &rq->wq, &rq->wq_ctrl); if (err) goto err_free_dma_tag; rq->wq.db = &rq->wq.db[MLX5_RCV_DBR]; err = mlx5e_get_wqe_sz(priv, &rq->wqe_sz, &rq->nsegs); if (err != 0) goto err_rq_wq_destroy; wq_sz = mlx5_wq_ll_get_size(&rq->wq); err = -tcp_lro_init_args(&rq->lro, priv->ifp, TCP_LRO_ENTRIES, wq_sz); if (err) goto err_rq_wq_destroy; rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO); for (i = 0; i != wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); int j; err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map); if (err != 0) { while (i--) bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map); goto err_rq_mbuf_free; } /* set value for constant fields */ for (j = 0; j < rq->nsegs; j++) wqe->data[j].lkey = cpu_to_be32(priv->mr.key); } INIT_WORK(&rq->dim.work, mlx5e_dim_work); if (priv->params.rx_cq_moderation_mode < 2) { rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED; } else { void *cqc = container_of(param, struct mlx5e_channel_param, rq)->rx_cq.cqc; switch (MLX5_GET(cqc, cqc, cq_period_mode)) { case MLX5_CQ_PERIOD_MODE_START_FROM_EQE: rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; break; case MLX5_CQ_PERIOD_MODE_START_FROM_CQE: rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE; break; default: rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED; break; } } rq->ifp = priv->ifp; rq->channel = c; rq->ix = c->ix; snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix); mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM, rq->stats.arg); return (0); err_rq_mbuf_free: free(rq->mbuf, M_MLX5EN); tcp_lro_free(&rq->lro); err_rq_wq_destroy: mlx5_wq_destroy(&rq->wq_ctrl); err_free_dma_tag: bus_dma_tag_destroy(rq->dma_tag); done: return (err); } static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { int wq_sz; int i; /* destroy all sysctl nodes */ sysctl_ctx_free(&rq->stats.ctx); /* free leftover LRO packets, if any */ tcp_lro_free(&rq->lro); wq_sz = mlx5_wq_ll_get_size(&rq->wq); for (i = 0; i != wq_sz; i++) { if (rq->mbuf[i].mbuf != NULL) { bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map); m_freem(rq->mbuf[i].mbuf); } bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map); } free(rq->mbuf, M_MLX5EN); mlx5_wq_destroy(&rq->wq_ctrl); bus_dma_tag_destroy(rq->dma_tag); } static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; void *in; void *rqc; void *wq; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rq->wq_ctrl.buf.npages; in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); wq = MLX5_ADDR_OF(rqc, rqc, wq); memcpy(rqc, param->rqc, sizeof(param->rqc)); MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); MLX5_SET(rqc, rqc, flush_in_error_en, 1); if (priv->counter_set_id >= 0) MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id); MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); mlx5_fill_page_array(&rq->wq_ctrl.buf, (__be64 *) MLX5_ADDR_OF(wq, wq, pas)); err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn); kvfree(in); return (err); } static int mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; void *in; void *rqc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(modify_rq_in); in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); MLX5_SET(modify_rq_in, in, rqn, rq->rqn); MLX5_SET(modify_rq_in, in, rq_state, curr_state); MLX5_SET(rqc, rqc, state, next_state); err = mlx5_core_modify_rq(mdev, in, inlen); kvfree(in); return (err); } static void mlx5e_disable_rq(struct mlx5e_rq *rq) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; mlx5_core_destroy_rq(mdev, rq->rqn); } static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; struct mlx5_wq_ll *wq = &rq->wq; int i; for (i = 0; i < 1000; i++) { if (wq->cur_sz >= priv->params.min_rx_wqes) return (0); msleep(4); } return (-ETIMEDOUT); } static int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) { int err; err = mlx5e_create_rq(c, param, rq); if (err) return (err); err = mlx5e_enable_rq(rq, param); if (err) goto err_destroy_rq; err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err) goto err_disable_rq; c->rq.enabled = 1; return (0); err_disable_rq: mlx5e_disable_rq(rq); err_destroy_rq: mlx5e_destroy_rq(rq); return (err); } static void mlx5e_close_rq(struct mlx5e_rq *rq) { mtx_lock(&rq->mtx); rq->enabled = 0; callout_stop(&rq->watchdog); mtx_unlock(&rq->mtx); mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); } static void mlx5e_close_rq_wait(struct mlx5e_rq *rq) { mlx5e_disable_rq(rq); mlx5e_close_cq(&rq->cq); cancel_work_sync(&rq->dim.work); mlx5e_destroy_rq(rq); } void mlx5e_free_sq_db(struct mlx5e_sq *sq) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); int x; for (x = 0; x != wq_sz; x++) { if (unlikely(sq->mbuf[x].p_refcount != NULL)) { atomic_add_int(sq->mbuf[x].p_refcount, -1); sq->mbuf[x].p_refcount = NULL; } if (sq->mbuf[x].mbuf != NULL) { bus_dmamap_unload(sq->dma_tag, sq->mbuf[x].dma_map); m_freem(sq->mbuf[x].mbuf); } bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map); } free(sq->mbuf, M_MLX5EN); } int mlx5e_alloc_sq_db(struct mlx5e_sq *sq) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); int err; int x; sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO); /* Create DMA descriptor MAPs */ for (x = 0; x != wq_sz; x++) { err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map); if (err != 0) { while (x--) bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map); free(sq->mbuf, M_MLX5EN); return (err); } } return (0); } static const char *mlx5e_sq_stats_desc[] = { MLX5E_SQ_STATS(MLX5E_STATS_DESC) }; void mlx5e_update_sq_inline(struct mlx5e_sq *sq) { sq->max_inline = sq->priv->params.tx_max_inline; sq->min_inline_mode = sq->priv->params.tx_min_inline_mode; /* * Check if trust state is DSCP or if inline mode is NONE which * indicates CX-5 or newer hardware. */ if (sq->priv->params_ethtool.trust_state != MLX5_QPTS_TRUST_PCP || sq->min_inline_mode == MLX5_INLINE_MODE_NONE) { if (MLX5_CAP_ETH(sq->priv->mdev, wqe_vlan_insert)) sq->min_insert_caps = MLX5E_INSERT_VLAN | MLX5E_INSERT_NON_VLAN; else sq->min_insert_caps = MLX5E_INSERT_NON_VLAN; } else { sq->min_insert_caps = 0; } } static void mlx5e_refresh_sq_inline_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c) { int i; for (i = 0; i != priv->num_tc; i++) { mtx_lock(&c->sq[i].lock); mlx5e_update_sq_inline(&c->sq[i]); mtx_unlock(&c->sq[i].lock); } } void mlx5e_refresh_sq_inline(struct mlx5e_priv *priv) { int i; /* check if channels are closed */ if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) return; for (i = 0; i < priv->params.num_channels; i++) mlx5e_refresh_sq_inline_sub(priv, &priv->channel[i]); } static int mlx5e_create_sq(struct mlx5e_channel *c, int tc, struct mlx5e_sq_param *param, struct mlx5e_sq *sq) { struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; char buffer[16]; void *sqc = param->sqc; void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); int err; /* Create DMA descriptor TAG */ if ((err = -bus_dma_tag_create( bus_get_dma_tag(mdev->pdev->dev.bsddev), 1, /* any alignment */ 0, /* no boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */ MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */ MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &sq->dma_tag))) goto done; - err = mlx5_alloc_map_uar(mdev, &sq->uar); - if (err) - goto err_free_dma_tag; + sq->uar_map = priv->bfreg.map; err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) - goto err_unmap_free_uar; + goto err_free_dma_tag; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; err = mlx5e_alloc_sq_db(sq); if (err) goto err_sq_wq_destroy; sq->mkey_be = cpu_to_be32(priv->mr.key); sq->ifp = priv->ifp; sq->priv = priv; sq->tc = tc; mlx5e_update_sq_inline(sq); snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc); mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM, sq->stats.arg); return (0); err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); -err_unmap_free_uar: - mlx5_unmap_free_uar(mdev, &sq->uar); - err_free_dma_tag: bus_dma_tag_destroy(sq->dma_tag); done: return (err); } static void mlx5e_destroy_sq(struct mlx5e_sq *sq) { /* destroy all sysctl nodes */ sysctl_ctx_free(&sq->stats.ctx); mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); - mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar); bus_dma_tag_destroy(sq->dma_tag); } int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param, int tis_num) { void *in; void *sqc; void *wq; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * sq->wq_ctrl.buf.npages; in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); wq = MLX5_ADDR_OF(sqc, sqc, wq); memcpy(sqc, param->sqc, sizeof(param->sqc)); MLX5_SET(sqc, sqc, tis_num_0, tis_num); MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, tis_lst_sz, 1); MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, uar_page, sq->uar.index); + MLX5_SET(wq, wq, uar_page, sq->priv->bfreg.index); MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); mlx5_fill_page_array(&sq->wq_ctrl.buf, (__be64 *) MLX5_ADDR_OF(wq, wq, pas)); err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn); kvfree(in); return (err); } int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state) { void *in; void *sqc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(modify_sq_in); in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); MLX5_SET(modify_sq_in, in, sqn, sq->sqn); MLX5_SET(modify_sq_in, in, sq_state, curr_state); MLX5_SET(sqc, sqc, state, next_state); err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen); kvfree(in); return (err); } void mlx5e_disable_sq(struct mlx5e_sq *sq) { mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn); } static int mlx5e_open_sq(struct mlx5e_channel *c, int tc, struct mlx5e_sq_param *param, struct mlx5e_sq *sq) { int err; sq->cev_factor = c->priv->params_ethtool.tx_completion_fact; /* ensure the TX completion event factor is not zero */ if (sq->cev_factor == 0) sq->cev_factor = 1; err = mlx5e_create_sq(c, tc, param, sq); if (err) return (err); err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]); if (err) goto err_destroy_sq; err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); if (err) goto err_disable_sq; WRITE_ONCE(sq->running, 1); return (0); err_disable_sq: mlx5e_disable_sq(sq); err_destroy_sq: mlx5e_destroy_sq(sq); return (err); } static void mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep) { /* fill up remainder with NOPs */ while (sq->cev_counter != 0) { while (!mlx5e_sq_has_room_for(sq, 1)) { if (can_sleep != 0) { mtx_unlock(&sq->lock); msleep(4); mtx_lock(&sq->lock); } else { goto done; } } /* send a single NOP */ mlx5e_send_nop(sq, 1); atomic_thread_fence_rel(); } done: /* Check if we need to write the doorbell */ if (likely(sq->doorbell.d64 != 0)) { - mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); + mlx5e_tx_notify_hw(sq, sq->doorbell.d32); sq->doorbell.d64 = 0; } } void mlx5e_sq_cev_timeout(void *arg) { struct mlx5e_sq *sq = arg; mtx_assert(&sq->lock, MA_OWNED); /* check next state */ switch (sq->cev_next_state) { case MLX5E_CEV_STATE_SEND_NOPS: /* fill TX ring with NOPs, if any */ mlx5e_sq_send_nops_locked(sq, 0); /* check if completed */ if (sq->cev_counter == 0) { sq->cev_next_state = MLX5E_CEV_STATE_INITIAL; return; } break; default: /* send NOPs on next timeout */ sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS; break; } /* restart timer */ callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq); } void mlx5e_drain_sq(struct mlx5e_sq *sq) { int error; struct mlx5_core_dev *mdev= sq->priv->mdev; /* * Check if already stopped. * * NOTE: Serialization of this function is managed by the * caller ensuring the priv's state lock is locked or in case * of rate limit support, a single thread manages drain and * resume of SQs. The "running" variable can therefore safely * be read without any locks. */ if (READ_ONCE(sq->running) == 0) return; /* don't put more packets into the SQ */ WRITE_ONCE(sq->running, 0); /* serialize access to DMA rings */ mtx_lock(&sq->lock); /* teardown event factor timer, if any */ sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS; callout_stop(&sq->cev_callout); /* send dummy NOPs in order to flush the transmit ring */ mlx5e_sq_send_nops_locked(sq, 1); mtx_unlock(&sq->lock); /* wait till SQ is empty or link is down */ mtx_lock(&sq->lock); while (sq->cc != sq->pc && (sq->priv->media_status_last & IFM_ACTIVE) != 0 && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { mtx_unlock(&sq->lock); msleep(1); sq->cq.mcq.comp(&sq->cq.mcq, NULL); mtx_lock(&sq->lock); } mtx_unlock(&sq->lock); /* error out remaining requests */ error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); if (error != 0) { mlx5_en_err(sq->ifp, "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error); } /* wait till SQ is empty */ mtx_lock(&sq->lock); while (sq->cc != sq->pc && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { mtx_unlock(&sq->lock); msleep(1); sq->cq.mcq.comp(&sq->cq.mcq, NULL); mtx_lock(&sq->lock); } mtx_unlock(&sq->lock); } static void mlx5e_close_sq_wait(struct mlx5e_sq *sq) { mlx5e_drain_sq(sq); mlx5e_disable_sq(sq); mlx5e_destroy_sq(sq); } static int mlx5e_create_cq(struct mlx5e_priv *priv, struct mlx5e_cq_param *param, struct mlx5e_cq *cq, mlx5e_cq_comp_t *comp, int eq_ix) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_core_cq *mcq = &cq->mcq; int eqn_not_used; int irqn; int err; u32 i; param->wq.buf_numa_node = 0; param->wq.db_numa_node = 0; err = mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn); if (err) return (err); err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, &cq->wq_ctrl); if (err) return (err); mcq->cqe_sz = 64; mcq->set_ci_db = cq->wq_ctrl.db.db; mcq->arm_db = cq->wq_ctrl.db.db + 1; *mcq->set_ci_db = 0; *mcq->arm_db = 0; mcq->vector = eq_ix; mcq->comp = comp; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; - mcq->uar = &priv->cq_uar; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); cqe->op_own = 0xf1; } cq->priv = priv; return (0); } static void mlx5e_destroy_cq(struct mlx5e_cq *cq) { mlx5_wq_destroy(&cq->wq_ctrl); } static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix) { struct mlx5_core_cq *mcq = &cq->mcq; u32 out[MLX5_ST_SZ_DW(create_cq_out)]; void *in; void *cqc; int inlen; int irqn_not_used; int eqn; int err; inlen = MLX5_ST_SZ_BYTES(create_cq_in) + sizeof(u64) * cq->wq_ctrl.buf.npages; in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); memcpy(cqc, param->cqc, sizeof(param->cqc)); mlx5_fill_page_array(&cq->wq_ctrl.buf, (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas)); mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used); MLX5_SET(cqc, cqc, c_eqn, eqn); - MLX5_SET(cqc, cqc, uar_page, mcq->uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - PAGE_SHIFT); MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen, out, sizeof(out)); kvfree(in); if (err) return (err); mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock)); return (0); } static void mlx5e_disable_cq(struct mlx5e_cq *cq) { mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq); } int mlx5e_open_cq(struct mlx5e_priv *priv, struct mlx5e_cq_param *param, struct mlx5e_cq *cq, mlx5e_cq_comp_t *comp, int eq_ix) { int err; err = mlx5e_create_cq(priv, param, cq, comp, eq_ix); if (err) return (err); err = mlx5e_enable_cq(cq, param, eq_ix); if (err) goto err_destroy_cq; return (0); err_destroy_cq: mlx5e_destroy_cq(cq); return (err); } void mlx5e_close_cq(struct mlx5e_cq *cq) { mlx5e_disable_cq(cq); mlx5e_destroy_cq(cq); } static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, struct mlx5e_channel_param *cparam) { int err; int tc; for (tc = 0; tc < c->priv->num_tc; tc++) { /* open completion queue */ err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq, &mlx5e_tx_cq_comp, c->ix); if (err) goto err_close_tx_cqs; } return (0); err_close_tx_cqs: for (tc--; tc >= 0; tc--) mlx5e_close_cq(&c->sq[tc].cq); return (err); } static void mlx5e_close_tx_cqs(struct mlx5e_channel *c) { int tc; for (tc = 0; tc < c->priv->num_tc; tc++) mlx5e_close_cq(&c->sq[tc].cq); } static int mlx5e_open_sqs(struct mlx5e_channel *c, struct mlx5e_channel_param *cparam) { int err; int tc; for (tc = 0; tc < c->priv->num_tc; tc++) { err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]); if (err) goto err_close_sqs; } return (0); err_close_sqs: for (tc--; tc >= 0; tc--) mlx5e_close_sq_wait(&c->sq[tc]); return (err); } static void mlx5e_close_sqs_wait(struct mlx5e_channel *c) { int tc; for (tc = 0; tc < c->priv->num_tc; tc++) mlx5e_close_sq_wait(&c->sq[tc]); } static void mlx5e_chan_static_init(struct mlx5e_priv *priv, struct mlx5e_channel *c, int ix) { int tc; /* setup priv and channel number */ c->priv = priv; c->ix = ix; /* setup send tag */ m_snd_tag_init(&c->tag, c->priv->ifp, IF_SND_TAG_TYPE_UNLIMITED); init_completion(&c->completion); mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0); for (tc = 0; tc != MLX5E_MAX_TX_NUM_TC; tc++) { struct mlx5e_sq *sq = c->sq + tc; mtx_init(&sq->lock, "mlx5tx", MTX_NETWORK_LOCK " TX", MTX_DEF); mtx_init(&sq->comp_lock, "mlx5comp", MTX_NETWORK_LOCK " TX", MTX_DEF); callout_init_mtx(&sq->cev_callout, &sq->lock, 0); } } static void mlx5e_chan_wait_for_completion(struct mlx5e_channel *c) { m_snd_tag_rele(&c->tag); wait_for_completion(&c->completion); } static void mlx5e_priv_wait_for_completion(struct mlx5e_priv *priv, const uint32_t channels) { uint32_t x; for (x = 0; x != channels; x++) mlx5e_chan_wait_for_completion(&priv->channel[x]); } static void mlx5e_chan_static_destroy(struct mlx5e_channel *c) { int tc; callout_drain(&c->rq.watchdog); mtx_destroy(&c->rq.mtx); for (tc = 0; tc != MLX5E_MAX_TX_NUM_TC; tc++) { callout_drain(&c->sq[tc].cev_callout); mtx_destroy(&c->sq[tc].lock); mtx_destroy(&c->sq[tc].comp_lock); } } static int mlx5e_open_channel(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam, struct mlx5e_channel *c) { struct epoch_tracker et; int i, err; /* zero non-persistant data */ MLX5E_ZERO(&c->rq, mlx5e_rq_zero_start); for (i = 0; i != priv->num_tc; i++) MLX5E_ZERO(&c->sq[i], mlx5e_sq_zero_start); /* open transmit completion queue */ err = mlx5e_open_tx_cqs(c, cparam); if (err) goto err_free; /* open receive completion queue */ err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq, &mlx5e_rx_cq_comp, c->ix); if (err) goto err_close_tx_cqs; err = mlx5e_open_sqs(c, cparam); if (err) goto err_close_rx_cq; err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) goto err_close_sqs; /* poll receive queue initially */ NET_EPOCH_ENTER(et); c->rq.cq.mcq.comp(&c->rq.cq.mcq, NULL); NET_EPOCH_EXIT(et); return (0); err_close_sqs: mlx5e_close_sqs_wait(c); err_close_rx_cq: mlx5e_close_cq(&c->rq.cq); err_close_tx_cqs: mlx5e_close_tx_cqs(c); err_free: return (err); } static void mlx5e_close_channel(struct mlx5e_channel *c) { mlx5e_close_rq(&c->rq); } static void mlx5e_close_channel_wait(struct mlx5e_channel *c) { mlx5e_close_rq_wait(&c->rq); mlx5e_close_sqs_wait(c); mlx5e_close_tx_cqs(c); } static int mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs) { u32 r, n; r = priv->params.hw_lro_en ? priv->params.lro_wqe_sz : MLX5E_SW2MB_MTU(priv->ifp->if_mtu); if (r > MJUM16BYTES) return (-ENOMEM); if (r > MJUM9BYTES) r = MJUM16BYTES; else if (r > MJUMPAGESIZE) r = MJUM9BYTES; else if (r > MCLBYTES) r = MJUMPAGESIZE; else r = MCLBYTES; /* * n + 1 must be a power of two, because stride size must be. * Stride size is 16 * (n + 1), as the first segment is * control. */ for (n = howmany(r, MLX5E_MAX_RX_BYTES); !powerof2(n + 1); n++) ; if (n > MLX5E_MAX_BUSDMA_RX_SEGS) return (-ENOMEM); *wqe_sz = r; *nsegs = n; return (0); } static void mlx5e_build_rq_param(struct mlx5e_priv *priv, struct mlx5e_rq_param *param) { void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); u32 wqe_sz, nsegs; mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe) + nsegs * sizeof(struct mlx5_wqe_data_seg))); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); MLX5_SET(wq, wq, pd, priv->pdn); param->wq.buf_numa_node = 0; param->wq.db_numa_node = 0; param->wq.linear = 1; } static void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, pd, priv->pdn); param->wq.buf_numa_node = 0; param->wq.db_numa_node = 0; param->wq.linear = 1; } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { void *cqc = param->cqc; - MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index); + MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index); } static void mlx5e_get_default_profile(struct mlx5e_priv *priv, int mode, struct net_dim_cq_moder *ptr) { *ptr = net_dim_get_profile(mode, MLX5E_DIM_DEFAULT_PROFILE); /* apply LRO restrictions */ if (priv->params.hw_lro_en && ptr->pkts > MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO) { ptr->pkts = MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO; } } static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { struct net_dim_cq_moder curr; void *cqc = param->cqc; /* * We use MLX5_CQE_FORMAT_HASH because the RX hash mini CQE * format is more beneficial for FreeBSD use case. * * Adding support for MLX5_CQE_FORMAT_CSUM will require changes * in mlx5e_decompress_cqe. */ if (priv->params.cqe_zipping_en) { MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_HASH); MLX5_SET(cqc, cqc, cqe_compression_en, 1); } MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size); switch (priv->params.rx_cq_moderation_mode) { case 0: MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec); MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts); MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; case 1: MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec); MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts); if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); else MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; case 2: mlx5e_get_default_profile(priv, NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE, &curr); MLX5_SET(cqc, cqc, cq_period, curr.usec); MLX5_SET(cqc, cqc, cq_max_count, curr.pkts); MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; case 3: mlx5e_get_default_profile(priv, NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE, &curr); MLX5_SET(cqc, cqc, cq_period, curr.usec); MLX5_SET(cqc, cqc, cq_max_count, curr.pkts); if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); else MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; default: break; } mlx5e_dim_build_cq_param(priv, param); mlx5e_build_common_cq_param(priv, param); } static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { void *cqc = param->cqc; MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec); MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts); switch (priv->params.tx_cq_moderation_mode) { case 0: MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; default: if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); else MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; } mlx5e_build_common_cq_param(priv, param); } static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) { memset(cparam, 0, sizeof(*cparam)); mlx5e_build_rq_param(priv, &cparam->rq); mlx5e_build_sq_param(priv, &cparam->sq); mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); } static int mlx5e_open_channels(struct mlx5e_priv *priv) { struct mlx5e_channel_param *cparam; int err; int i; int j; cparam = malloc(sizeof(*cparam), M_MLX5EN, M_WAITOK); mlx5e_build_channel_param(priv, cparam); for (i = 0; i < priv->params.num_channels; i++) { err = mlx5e_open_channel(priv, cparam, &priv->channel[i]); if (err) goto err_close_channels; } for (j = 0; j < priv->params.num_channels; j++) { err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j].rq); if (err) goto err_close_channels; } free(cparam, M_MLX5EN); return (0); err_close_channels: while (i--) { mlx5e_close_channel(&priv->channel[i]); mlx5e_close_channel_wait(&priv->channel[i]); } free(cparam, M_MLX5EN); return (err); } static void mlx5e_close_channels(struct mlx5e_priv *priv) { int i; for (i = 0; i < priv->params.num_channels; i++) mlx5e_close_channel(&priv->channel[i]); for (i = 0; i < priv->params.num_channels; i++) mlx5e_close_channel_wait(&priv->channel[i]); } static int mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq) { if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) { uint8_t cq_mode; switch (priv->params.tx_cq_moderation_mode) { case 0: case 2: cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE; break; default: cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE; break; } return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq, priv->params.tx_cq_moderation_usec, priv->params.tx_cq_moderation_pkts, cq_mode)); } return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq, priv->params.tx_cq_moderation_usec, priv->params.tx_cq_moderation_pkts)); } static int mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq) { if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) { uint8_t cq_mode; uint8_t dim_mode; int retval; switch (priv->params.rx_cq_moderation_mode) { case 0: case 2: cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE; dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; break; default: cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE; dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE; break; } /* tear down dynamic interrupt moderation */ mtx_lock(&rq->mtx); rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED; mtx_unlock(&rq->mtx); /* wait for dynamic interrupt moderation work task, if any */ cancel_work_sync(&rq->dim.work); if (priv->params.rx_cq_moderation_mode >= 2) { struct net_dim_cq_moder curr; mlx5e_get_default_profile(priv, dim_mode, &curr); retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq, curr.usec, curr.pkts, cq_mode); /* set dynamic interrupt moderation mode and zero defaults */ mtx_lock(&rq->mtx); rq->dim.mode = dim_mode; rq->dim.state = 0; rq->dim.profile_ix = MLX5E_DIM_DEFAULT_PROFILE; mtx_unlock(&rq->mtx); } else { retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq, priv->params.rx_cq_moderation_usec, priv->params.rx_cq_moderation_pkts, cq_mode); } return (retval); } return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq, priv->params.rx_cq_moderation_usec, priv->params.rx_cq_moderation_pkts)); } static int mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c) { int err; int i; err = mlx5e_refresh_rq_params(priv, &c->rq); if (err) goto done; for (i = 0; i != priv->num_tc; i++) { err = mlx5e_refresh_sq_params(priv, &c->sq[i]); if (err) goto done; } done: return (err); } int mlx5e_refresh_channel_params(struct mlx5e_priv *priv) { int i; /* check if channels are closed */ if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) return (EINVAL); for (i = 0; i < priv->params.num_channels; i++) { int err; err = mlx5e_refresh_channel_params_sub(priv, &priv->channel[i]); if (err) return (err); } return (0); } static int mlx5e_open_tis(struct mlx5e_priv *priv, int tc) { struct mlx5_core_dev *mdev = priv->mdev; u32 in[MLX5_ST_SZ_DW(create_tis_in)]; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); memset(in, 0, sizeof(in)); MLX5_SET(tisc, tisc, prio, tc); MLX5_SET(tisc, tisc, transport_domain, priv->tdn); return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc])); } static void mlx5e_close_tis(struct mlx5e_priv *priv, int tc) { mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]); } static int mlx5e_open_tises(struct mlx5e_priv *priv) { int num_tc = priv->num_tc; int err; int tc; for (tc = 0; tc < num_tc; tc++) { err = mlx5e_open_tis(priv, tc); if (err) goto err_close_tises; } return (0); err_close_tises: for (tc--; tc >= 0; tc--) mlx5e_close_tis(priv, tc); return (err); } static void mlx5e_close_tises(struct mlx5e_priv *priv) { int num_tc = priv->num_tc; int tc; for (tc = 0; tc < num_tc; tc++) mlx5e_close_tis(priv, tc); } static int mlx5e_open_rqt(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; u32 *in; u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0}; void *rqtc; int inlen; int err; int sz; int i; sz = 1 << priv->params.rx_hash_log_tbl_sz; inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); MLX5_SET(rqtc, rqtc, rqt_max_size, sz); for (i = 0; i < sz; i++) { int ix = i; #ifdef RSS ix = rss_get_indirection_to_bucket(ix); #endif /* ensure we don't overflow */ ix %= priv->params.num_channels; /* apply receive side scaling stride, if any */ ix -= ix % (int)priv->params.channels_rsss; MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix].rq.rqn); } MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT); err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); if (!err) priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn); kvfree(in); return (err); } static void mlx5e_close_rqt(struct mlx5e_priv *priv) { u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0}; u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0}; MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn); mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out)); } #define MLX5E_RSS_KEY_SIZE (10 * 4) /* bytes */ static void mlx5e_get_rss_key(void *key_ptr) { #ifdef RSS rss_getkey(key_ptr); #else static const u32 rsskey[] = { cpu_to_be32(0xD181C62C), cpu_to_be32(0xF7F4DB5B), cpu_to_be32(0x1983A2FC), cpu_to_be32(0x943E1ADB), cpu_to_be32(0xD9389E6B), cpu_to_be32(0xD1039C2C), cpu_to_be32(0xA74499AD), cpu_to_be32(0x593D56D9), cpu_to_be32(0xF3253C06), cpu_to_be32(0x2ADC1FFC), }; CTASSERT(sizeof(rsskey) == MLX5E_RSS_KEY_SIZE); memcpy(key_ptr, rsskey, MLX5E_RSS_KEY_SIZE); #endif } static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt) { void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); __be32 *hkey; MLX5_SET(tirc, tirc, transport_domain, priv->tdn); #define ROUGH_MAX_L2_L3_HDR_SZ 256 #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) #define MLX5_HASH_ALL (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_L4_SPORT |\ MLX5_HASH_FIELD_SEL_L4_DPORT) #define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_IPSEC_SPI) if (priv->params.hw_lro_en) { MLX5_SET(tirc, tirc, lro_enable_mask, MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); MLX5_SET(tirc, tirc, lro_max_msg_sz, (priv->params.lro_wqe_sz - ROUGH_MAX_L2_L3_HDR_SZ) >> 8); /* TODO: add the option to choose timer value dynamically */ MLX5_SET(tirc, tirc, lro_timeout_period_usecs, MLX5_CAP_ETH(priv->mdev, lro_timer_supported_periods[2])); } /* setup parameters for hashing TIR type, if any */ switch (tt) { case MLX5E_TT_ANY: MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); MLX5_SET(tirc, tirc, inline_rqn, priv->channel[0].rq.rqn); break; default: MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, priv->rqtn); MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ); hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); CTASSERT(MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key) >= MLX5E_RSS_KEY_SIZE); #ifdef RSS /* * The FreeBSD RSS implementation does currently not * support symmetric Toeplitz hashes: */ MLX5_SET(tirc, tirc, rx_hash_symmetric, 0); #else MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); #endif mlx5e_get_rss_key(hkey); break; } switch (tt) { case MLX5E_TT_IPV4_TCP: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); #ifdef RSS if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) { MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); } else #endif MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_ALL); break; case MLX5E_TT_IPV6_TCP: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); #ifdef RSS if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) { MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); } else #endif MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_ALL); break; case MLX5E_TT_IPV4_UDP: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, MLX5_L4_PROT_TYPE_UDP); #ifdef RSS if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) { MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); } else #endif MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_ALL); break; case MLX5E_TT_IPV6_UDP: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, MLX5_L4_PROT_TYPE_UDP); #ifdef RSS if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) { MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); } else #endif MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_ALL); break; case MLX5E_TT_IPV4_IPSEC_AH: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP_IPSEC_SPI); break; case MLX5E_TT_IPV6_IPSEC_AH: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP_IPSEC_SPI); break; case MLX5E_TT_IPV4_IPSEC_ESP: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP_IPSEC_SPI); break; case MLX5E_TT_IPV6_IPSEC_ESP: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP_IPSEC_SPI); break; case MLX5E_TT_IPV4: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); break; case MLX5E_TT_IPV6: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); break; default: break; } } static int mlx5e_open_tir(struct mlx5e_priv *priv, int tt) { struct mlx5_core_dev *mdev = priv->mdev; u32 *in; void *tirc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(create_tir_in); in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context); mlx5e_build_tir_ctx(priv, tirc, tt); err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]); kvfree(in); return (err); } static void mlx5e_close_tir(struct mlx5e_priv *priv, int tt) { mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]); } static int mlx5e_open_tirs(struct mlx5e_priv *priv) { int err; int i; for (i = 0; i < MLX5E_NUM_TT; i++) { err = mlx5e_open_tir(priv, i); if (err) goto err_close_tirs; } return (0); err_close_tirs: for (i--; i >= 0; i--) mlx5e_close_tir(priv, i); return (err); } static void mlx5e_close_tirs(struct mlx5e_priv *priv) { int i; for (i = 0; i < MLX5E_NUM_TT; i++) mlx5e_close_tir(priv, i); } /* * SW MTU does not include headers, * HW MTU includes all headers and checksums. */ static int mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu) { struct mlx5e_priv *priv = ifp->if_softc; struct mlx5_core_dev *mdev = priv->mdev; int hw_mtu; int err; hw_mtu = MLX5E_SW2HW_MTU(sw_mtu); err = mlx5_set_port_mtu(mdev, hw_mtu); if (err) { mlx5_en_err(ifp, "mlx5_set_port_mtu failed setting %d, err=%d\n", sw_mtu, err); return (err); } /* Update vport context MTU */ err = mlx5_set_vport_mtu(mdev, hw_mtu); if (err) { mlx5_en_err(ifp, "Failed updating vport context with MTU size, err=%d\n", err); } ifp->if_mtu = sw_mtu; err = mlx5_query_vport_mtu(mdev, &hw_mtu); if (err || !hw_mtu) { /* fallback to port oper mtu */ err = mlx5_query_port_oper_mtu(mdev, &hw_mtu); } if (err) { mlx5_en_err(ifp, "Query port MTU, after setting new MTU value, failed\n"); return (err); } else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) { err = -E2BIG, mlx5_en_err(ifp, "Port MTU %d is smaller than ifp mtu %d\n", hw_mtu, sw_mtu); } else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) { err = -EINVAL; mlx5_en_err(ifp, "Port MTU %d is bigger than ifp mtu %d\n", hw_mtu, sw_mtu); } priv->params_ethtool.hw_mtu = hw_mtu; /* compute MSB */ while (hw_mtu & (hw_mtu - 1)) hw_mtu &= (hw_mtu - 1); priv->params_ethtool.hw_mtu_msb = hw_mtu; return (err); } int mlx5e_open_locked(struct ifnet *ifp) { struct mlx5e_priv *priv = ifp->if_softc; int err; u16 set_id; /* check if already opened */ if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0) return (0); #ifdef RSS if (rss_getnumbuckets() > priv->params.num_channels) { mlx5_en_info(ifp, "NOTE: There are more RSS buckets(%u) than channels(%u) available\n", rss_getnumbuckets(), priv->params.num_channels); } #endif err = mlx5e_open_tises(priv); if (err) { mlx5_en_err(ifp, "mlx5e_open_tises failed, %d\n", err); return (err); } err = mlx5_vport_alloc_q_counter(priv->mdev, MLX5_INTERFACE_PROTOCOL_ETH, &set_id); if (err) { mlx5_en_err(priv->ifp, "mlx5_vport_alloc_q_counter failed: %d\n", err); goto err_close_tises; } /* store counter set ID */ priv->counter_set_id = set_id; err = mlx5e_open_channels(priv); if (err) { mlx5_en_err(ifp, "mlx5e_open_channels failed, %d\n", err); goto err_dalloc_q_counter; } err = mlx5e_open_rqt(priv); if (err) { mlx5_en_err(ifp, "mlx5e_open_rqt failed, %d\n", err); goto err_close_channels; } err = mlx5e_open_tirs(priv); if (err) { mlx5_en_err(ifp, "mlx5e_open_tir failed, %d\n", err); goto err_close_rqls; } err = mlx5e_open_flow_table(priv); if (err) { mlx5_en_err(ifp, "mlx5e_open_flow_table failed, %d\n", err); goto err_close_tirs; } err = mlx5e_add_all_vlan_rules(priv); if (err) { mlx5_en_err(ifp, "mlx5e_add_all_vlan_rules failed, %d\n", err); goto err_close_flow_table; } set_bit(MLX5E_STATE_OPENED, &priv->state); mlx5e_update_carrier(priv); mlx5e_set_rx_mode_core(priv); return (0); err_close_flow_table: mlx5e_close_flow_table(priv); err_close_tirs: mlx5e_close_tirs(priv); err_close_rqls: mlx5e_close_rqt(priv); err_close_channels: mlx5e_close_channels(priv); err_dalloc_q_counter: mlx5_vport_dealloc_q_counter(priv->mdev, MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id); err_close_tises: mlx5e_close_tises(priv); return (err); } static void mlx5e_open(void *arg) { struct mlx5e_priv *priv = arg; PRIV_LOCK(priv); if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP)) mlx5_en_err(priv->ifp, "Setting port status to up failed\n"); mlx5e_open_locked(priv->ifp); priv->ifp->if_drv_flags |= IFF_DRV_RUNNING; PRIV_UNLOCK(priv); } int mlx5e_close_locked(struct ifnet *ifp) { struct mlx5e_priv *priv = ifp->if_softc; /* check if already closed */ if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) return (0); clear_bit(MLX5E_STATE_OPENED, &priv->state); mlx5e_set_rx_mode_core(priv); mlx5e_del_all_vlan_rules(priv); if_link_state_change(priv->ifp, LINK_STATE_DOWN); mlx5e_close_flow_table(priv); mlx5e_close_tirs(priv); mlx5e_close_rqt(priv); mlx5e_close_channels(priv); mlx5_vport_dealloc_q_counter(priv->mdev, MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id); mlx5e_close_tises(priv); return (0); } #if (__FreeBSD_version >= 1100000) static uint64_t mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt) { struct mlx5e_priv *priv = ifp->if_softc; u64 retval; /* PRIV_LOCK(priv); XXX not allowed */ switch (cnt) { case IFCOUNTER_IPACKETS: retval = priv->stats.vport.rx_packets; break; case IFCOUNTER_IERRORS: retval = priv->stats.pport.in_range_len_errors + priv->stats.pport.out_of_range_len + priv->stats.pport.too_long_errors + priv->stats.pport.check_seq_err + priv->stats.pport.alignment_err; break; case IFCOUNTER_IQDROPS: retval = priv->stats.vport.rx_out_of_buffer; break; case IFCOUNTER_OPACKETS: retval = priv->stats.vport.tx_packets; break; case IFCOUNTER_OERRORS: retval = priv->stats.port_stats_debug.out_discards; break; case IFCOUNTER_IBYTES: retval = priv->stats.vport.rx_bytes; break; case IFCOUNTER_OBYTES: retval = priv->stats.vport.tx_bytes; break; case IFCOUNTER_IMCASTS: retval = priv->stats.vport.rx_multicast_packets; break; case IFCOUNTER_OMCASTS: retval = priv->stats.vport.tx_multicast_packets; break; case IFCOUNTER_OQDROPS: retval = priv->stats.vport.tx_queue_dropped; break; case IFCOUNTER_COLLISIONS: retval = priv->stats.pport.collisions; break; default: retval = if_get_counter_default(ifp, cnt); break; } /* PRIV_UNLOCK(priv); XXX not allowed */ return (retval); } #endif static void mlx5e_set_rx_mode(struct ifnet *ifp) { struct mlx5e_priv *priv = ifp->if_softc; queue_work(priv->wq, &priv->set_rx_mode_work); } static int mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct mlx5e_priv *priv; struct ifreq *ifr; struct ifdownreason *ifdr; struct ifi2creq i2c; struct ifrsskey *ifrk; struct ifrsshash *ifrh; int error = 0; int mask = 0; int size_read = 0; int module_status; int module_num; int max_mtu; uint8_t read_addr; priv = ifp->if_softc; /* check if detaching */ if (priv == NULL || priv->gone != 0) return (ENXIO); switch (command) { case SIOCSIFMTU: ifr = (struct ifreq *)data; PRIV_LOCK(priv); mlx5_query_port_max_mtu(priv->mdev, &max_mtu); if (ifr->ifr_mtu >= MLX5E_MTU_MIN && ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) { int was_opened; was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); if (was_opened) mlx5e_close_locked(ifp); /* set new MTU */ mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu); if (was_opened) mlx5e_open_locked(ifp); } else { error = EINVAL; mlx5_en_err(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n", MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu)); } PRIV_UNLOCK(priv); break; case SIOCSIFFLAGS: if ((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { mlx5e_set_rx_mode(ifp); break; } PRIV_LOCK(priv); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) mlx5e_open_locked(ifp); ifp->if_drv_flags |= IFF_DRV_RUNNING; mlx5_set_port_status(priv->mdev, MLX5_PORT_UP); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { mlx5_set_port_status(priv->mdev, MLX5_PORT_DOWN); if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0) mlx5e_close_locked(ifp); mlx5e_update_carrier(priv); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; } } PRIV_UNLOCK(priv); break; case SIOCADDMULTI: case SIOCDELMULTI: mlx5e_set_rx_mode(ifp); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: case SIOCGIFXMEDIA: ifr = (struct ifreq *)data; error = ifmedia_ioctl(ifp, ifr, &priv->media, command); break; case SIOCSIFCAP: ifr = (struct ifreq *)data; PRIV_LOCK(priv); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO4 & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { mask &= ~IFCAP_TSO4; ifp->if_capenable &= ~IFCAP_TSO4; ifp->if_hwassist &= ~CSUM_IP_TSO; mlx5_en_err(ifp, "tso4 disabled due to -txcsum.\n"); } } if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (IFCAP_TSO6 & ifp->if_capenable && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { mask &= ~IFCAP_TSO6; ifp->if_capenable &= ~IFCAP_TSO6; ifp->if_hwassist &= ~CSUM_IP6_TSO; mlx5_en_err(ifp, "tso6 disabled due to -txcsum6.\n"); } } if (mask & IFCAP_NOMAP) ifp->if_capenable ^= IFCAP_NOMAP; if (mask & IFCAP_TXTLS4) ifp->if_capenable ^= IFCAP_TXTLS4; if (mask & IFCAP_TXTLS6) ifp->if_capenable ^= IFCAP_TXTLS6; #ifdef RATELIMIT if (mask & IFCAP_TXTLS_RTLMT) ifp->if_capenable ^= IFCAP_TXTLS_RTLMT; #endif if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_TSO4) { if (!(IFCAP_TSO4 & ifp->if_capenable) && !(IFCAP_TXCSUM & ifp->if_capenable)) { mlx5_en_err(ifp, "enable txcsum first.\n"); error = EAGAIN; goto out; } ifp->if_capenable ^= IFCAP_TSO4; ifp->if_hwassist ^= CSUM_IP_TSO; } if (mask & IFCAP_TSO6) { if (!(IFCAP_TSO6 & ifp->if_capenable) && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { mlx5_en_err(ifp, "enable txcsum6 first.\n"); error = EAGAIN; goto out; } ifp->if_capenable ^= IFCAP_TSO6; ifp->if_hwassist ^= CSUM_IP6_TSO; } if (mask & IFCAP_VLAN_HWFILTER) { if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) mlx5e_disable_vlan_filter(priv); else mlx5e_enable_vlan_filter(priv); ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; } if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_WOL_MAGIC) ifp->if_capenable ^= IFCAP_WOL_MAGIC; VLAN_CAPABILITIES(ifp); /* turn off LRO means also turn of HW LRO - if it's on */ if (mask & IFCAP_LRO) { int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); bool need_restart = false; ifp->if_capenable ^= IFCAP_LRO; /* figure out if updating HW LRO is needed */ if (!(ifp->if_capenable & IFCAP_LRO)) { if (priv->params.hw_lro_en) { priv->params.hw_lro_en = false; need_restart = true; } } else { if (priv->params.hw_lro_en == false && priv->params_ethtool.hw_lro != 0) { priv->params.hw_lro_en = true; need_restart = true; } } if (was_opened && need_restart) { mlx5e_close_locked(ifp); mlx5e_open_locked(ifp); } } if (mask & IFCAP_HWRXTSTMP) { ifp->if_capenable ^= IFCAP_HWRXTSTMP; if (ifp->if_capenable & IFCAP_HWRXTSTMP) { if (priv->clbr_done == 0) mlx5e_reset_calibration_callout(priv); } else { callout_drain(&priv->tstmp_clbr); priv->clbr_done = 0; } } out: PRIV_UNLOCK(priv); break; case SIOCGI2C: ifr = (struct ifreq *)data; /* * Copy from the user-space address ifr_data to the * kernel-space address i2c */ error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (error) break; if (i2c.len > sizeof(i2c.data)) { error = EINVAL; break; } PRIV_LOCK(priv); /* Get module_num which is required for the query_eeprom */ error = mlx5_query_module_num(priv->mdev, &module_num); if (error) { mlx5_en_err(ifp, "Query module num failed, eeprom reading is not supported\n"); error = EINVAL; goto err_i2c; } /* Check if module is present before doing an access */ module_status = mlx5_query_module_status(priv->mdev, module_num); if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED) { error = EINVAL; goto err_i2c; } /* * Currently 0XA0 and 0xA2 are the only addresses permitted. * The internal conversion is as follows: */ if (i2c.dev_addr == 0xA0) read_addr = MLX5_I2C_ADDR_LOW; else if (i2c.dev_addr == 0xA2) read_addr = MLX5_I2C_ADDR_HIGH; else { mlx5_en_err(ifp, "Query eeprom failed, Invalid Address: %X\n", i2c.dev_addr); error = EINVAL; goto err_i2c; } error = mlx5_query_eeprom(priv->mdev, read_addr, MLX5_EEPROM_LOW_PAGE, (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num, (uint32_t *)i2c.data, &size_read); if (error) { mlx5_en_err(ifp, "Query eeprom failed, eeprom reading is not supported\n"); error = EINVAL; goto err_i2c; } if (i2c.len > MLX5_EEPROM_MAX_BYTES) { error = mlx5_query_eeprom(priv->mdev, read_addr, MLX5_EEPROM_LOW_PAGE, (uint32_t)(i2c.offset + size_read), (uint32_t)(i2c.len - size_read), module_num, (uint32_t *)(i2c.data + size_read), &size_read); } if (error) { mlx5_en_err(ifp, "Query eeprom failed, eeprom reading is not supported\n"); error = EINVAL; goto err_i2c; } error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); err_i2c: PRIV_UNLOCK(priv); break; case SIOCGIFDOWNREASON: ifdr = (struct ifdownreason *)data; bzero(ifdr->ifdr_msg, sizeof(ifdr->ifdr_msg)); PRIV_LOCK(priv); error = -mlx5_query_pddr_troubleshooting_info(priv->mdev, NULL, ifdr->ifdr_msg, sizeof(ifdr->ifdr_msg)); PRIV_UNLOCK(priv); if (error == 0) ifdr->ifdr_reason = IFDR_REASON_MSG; break; case SIOCGIFRSSKEY: ifrk = (struct ifrsskey *)data; ifrk->ifrk_func = RSS_FUNC_TOEPLITZ; ifrk->ifrk_keylen = MLX5E_RSS_KEY_SIZE; CTASSERT(sizeof(ifrk->ifrk_key) >= MLX5E_RSS_KEY_SIZE); mlx5e_get_rss_key(ifrk->ifrk_key); break; case SIOCGIFRSSHASH: ifrh = (struct ifrsshash *)data; ifrh->ifrh_func = RSS_FUNC_TOEPLITZ; ifrh->ifrh_types = RSS_TYPE_IPV4 | RSS_TYPE_TCP_IPV4 | RSS_TYPE_UDP_IPV4 | RSS_TYPE_IPV6 | RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6; break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) { /* * TODO: uncoment once FW really sets all these bits if * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap || * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap || * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return * -ENOTSUPP; */ /* TODO: add more must-to-have features */ if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return (-ENODEV); return (0); } static u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev) { const int min_size = ETHER_VLAN_ENCAP_LEN + ETHER_HDR_LEN; const int max_size = MLX5E_MAX_TX_INLINE; const int bf_buf_size = ((1U << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2U) - (sizeof(struct mlx5e_tx_wqe) - 2); /* verify against driver limits */ if (bf_buf_size > max_size) return (max_size); else if (bf_buf_size < min_size) return (min_size); else return (bf_buf_size); } static int mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv, int num_comp_vectors) { int err; /* * TODO: Consider link speed for setting "log_sq_size", * "log_rq_size" and "cq_moderation_xxx": */ priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; priv->params.rx_cq_moderation_usec = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE : MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; priv->params.rx_cq_moderation_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0; priv->params.rx_cq_moderation_pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; priv->params.tx_cq_moderation_usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; priv->params.tx_cq_moderation_pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; priv->params.min_rx_wqes = MLX5E_PARAMS_DEFAULT_MIN_RX_WQES; priv->params.rx_hash_log_tbl_sz = (order_base_2(num_comp_vectors) > MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ? order_base_2(num_comp_vectors) : MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ; priv->params.num_tc = 1; priv->params.default_vlan_prio = 0; priv->counter_set_id = -1; priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); err = mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode); if (err) return (err); /* * hw lro is currently defaulted to off. when it won't anymore we * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)" */ priv->params.hw_lro_en = false; priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; /* * CQE zipping is currently defaulted to off. when it won't * anymore we will consider the HW capability: * "!!MLX5_CAP_GEN(mdev, cqe_compression)" */ priv->params.cqe_zipping_en = false; priv->mdev = mdev; priv->params.num_channels = num_comp_vectors; priv->params.channels_rsss = 1; priv->order_base_2_num_channels = order_base_2(num_comp_vectors); priv->queue_mapping_channel_mask = roundup_pow_of_two(num_comp_vectors) - 1; priv->num_tc = priv->params.num_tc; priv->default_vlan_prio = priv->params.default_vlan_prio; INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work); INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); return (0); } static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, struct mlx5_core_mr *mkey) { struct ifnet *ifp = priv->ifp; struct mlx5_core_dev *mdev = priv->mdev; int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); void *mkc; u32 *in; int err; in = mlx5_vzalloc(inlen); if (in == NULL) { mlx5_en_err(ifp, "failed to allocate inbox\n"); return (-ENOMEM); } mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA); MLX5_SET(mkc, mkc, umr_en, 1); /* used by HW TLS */ MLX5_SET(mkc, mkc, lw, 1); MLX5_SET(mkc, mkc, lr, 1); MLX5_SET(mkc, mkc, pd, pdn); MLX5_SET(mkc, mkc, length64, 1); MLX5_SET(mkc, mkc, qpn, 0xffffff); err = mlx5_core_create_mkey(mdev, mkey, in, inlen); if (err) mlx5_en_err(ifp, "mlx5_core_create_mkey failed, %d\n", err); kvfree(in); return (err); } static const char *mlx5e_vport_stats_desc[] = { MLX5E_VPORT_STATS(MLX5E_STATS_DESC) }; static const char *mlx5e_pport_stats_desc[] = { MLX5E_PPORT_STATS(MLX5E_STATS_DESC) }; static void mlx5e_priv_static_init(struct mlx5e_priv *priv, const uint32_t channels) { uint32_t x; mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF); sx_init(&priv->state_lock, "mlx5state"); callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0); MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock); for (x = 0; x != channels; x++) mlx5e_chan_static_init(priv, &priv->channel[x], x); } static void mlx5e_priv_static_destroy(struct mlx5e_priv *priv, const uint32_t channels) { uint32_t x; for (x = 0; x != channels; x++) mlx5e_chan_static_destroy(&priv->channel[x]); callout_drain(&priv->watchdog); mtx_destroy(&priv->async_events_mtx); sx_destroy(&priv->state_lock); } static int sysctl_firmware(SYSCTL_HANDLER_ARGS) { /* * %d.%d%.d the string format. * fw_rev_{maj,min,sub} return u16, 2^16 = 65536. * We need at most 5 chars to store that. * It also has: two "." and NULL at the end, which means we need 18 * (5*3 + 3) chars at most. */ char fw[18]; struct mlx5e_priv *priv = arg1; int error; snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev), fw_rev_sub(priv->mdev)); error = sysctl_handle_string(oidp, fw, sizeof(fw), req); return (error); } static void mlx5e_disable_tx_dma(struct mlx5e_channel *ch) { int i; for (i = 0; i < ch->priv->num_tc; i++) mlx5e_drain_sq(&ch->sq[i]); } static void mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq) { sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP); sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8); - mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); + mlx5e_tx_notify_hw(sq, sq->doorbell.d32); sq->doorbell.d64 = 0; } void mlx5e_resume_sq(struct mlx5e_sq *sq) { int err; /* check if already enabled */ if (READ_ONCE(sq->running) != 0) return; err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR, MLX5_SQC_STATE_RST); if (err != 0) { mlx5_en_err(sq->ifp, "mlx5e_modify_sq() from ERR to RST failed: %d\n", err); } sq->cc = 0; sq->pc = 0; /* reset doorbell prior to moving from RST to RDY */ mlx5e_reset_sq_doorbell_record(sq); err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); if (err != 0) { mlx5_en_err(sq->ifp, "mlx5e_modify_sq() from RST to RDY failed: %d\n", err); } sq->cev_next_state = MLX5E_CEV_STATE_INITIAL; WRITE_ONCE(sq->running, 1); } static void mlx5e_enable_tx_dma(struct mlx5e_channel *ch) { int i; for (i = 0; i < ch->priv->num_tc; i++) mlx5e_resume_sq(&ch->sq[i]); } static void mlx5e_disable_rx_dma(struct mlx5e_channel *ch) { struct mlx5e_rq *rq = &ch->rq; struct epoch_tracker et; int err; mtx_lock(&rq->mtx); rq->enabled = 0; callout_stop(&rq->watchdog); mtx_unlock(&rq->mtx); err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); if (err != 0) { mlx5_en_err(rq->ifp, "mlx5e_modify_rq() from RDY to RST failed: %d\n", err); } while (!mlx5_wq_ll_is_empty(&rq->wq)) { msleep(1); NET_EPOCH_ENTER(et); rq->cq.mcq.comp(&rq->cq.mcq, NULL); NET_EPOCH_EXIT(et); } /* * Transitioning into RST state will allow the FW to track less ERR state queues, * thus reducing the recv queue flushing time */ err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST); if (err != 0) { mlx5_en_err(rq->ifp, "mlx5e_modify_rq() from ERR to RST failed: %d\n", err); } } static void mlx5e_enable_rx_dma(struct mlx5e_channel *ch) { struct mlx5e_rq *rq = &ch->rq; struct epoch_tracker et; int err; rq->wq.wqe_ctr = 0; mlx5_wq_ll_update_db_record(&rq->wq); err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err != 0) { mlx5_en_err(rq->ifp, "mlx5e_modify_rq() from RST to RDY failed: %d\n", err); } rq->enabled = 1; NET_EPOCH_ENTER(et); rq->cq.mcq.comp(&rq->cq.mcq, NULL); NET_EPOCH_EXIT(et); } void mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value) { int i; if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) return; for (i = 0; i < priv->params.num_channels; i++) { if (value) mlx5e_disable_tx_dma(&priv->channel[i]); else mlx5e_enable_tx_dma(&priv->channel[i]); } } void mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value) { int i; if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) return; for (i = 0; i < priv->params.num_channels; i++) { if (value) mlx5e_disable_rx_dma(&priv->channel[i]); else mlx5e_enable_rx_dma(&priv->channel[i]); } } static void mlx5e_add_hw_stats(struct mlx5e_priv *priv) { SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw), OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, priv, 0, sysctl_firmware, "A", "HCA firmware version"); SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw), OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0, "Board ID"); } static int mlx5e_sysctl_tx_priority_flow_control(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; uint8_t temp[MLX5E_MAX_PRIORITY]; uint32_t tx_pfc; int err; int i; PRIV_LOCK(priv); tx_pfc = priv->params.tx_priority_flow_control; for (i = 0; i != MLX5E_MAX_PRIORITY; i++) temp[i] = (tx_pfc >> i) & 1; err = SYSCTL_OUT(req, temp, MLX5E_MAX_PRIORITY); if (err || !req->newptr) goto done; err = SYSCTL_IN(req, temp, MLX5E_MAX_PRIORITY); if (err) goto done; priv->params.tx_priority_flow_control = 0; /* range check input value */ for (i = 0; i != MLX5E_MAX_PRIORITY; i++) { if (temp[i] > 1) { err = ERANGE; goto done; } priv->params.tx_priority_flow_control |= (temp[i] << i); } /* check if update is required */ if (tx_pfc != priv->params.tx_priority_flow_control) err = -mlx5e_set_port_pfc(priv); done: if (err != 0) priv->params.tx_priority_flow_control= tx_pfc; PRIV_UNLOCK(priv); return (err); } static int mlx5e_sysctl_rx_priority_flow_control(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; uint8_t temp[MLX5E_MAX_PRIORITY]; uint32_t rx_pfc; int err; int i; PRIV_LOCK(priv); rx_pfc = priv->params.rx_priority_flow_control; for (i = 0; i != MLX5E_MAX_PRIORITY; i++) temp[i] = (rx_pfc >> i) & 1; err = SYSCTL_OUT(req, temp, MLX5E_MAX_PRIORITY); if (err || !req->newptr) goto done; err = SYSCTL_IN(req, temp, MLX5E_MAX_PRIORITY); if (err) goto done; priv->params.rx_priority_flow_control = 0; /* range check input value */ for (i = 0; i != MLX5E_MAX_PRIORITY; i++) { if (temp[i] > 1) { err = ERANGE; goto done; } priv->params.rx_priority_flow_control |= (temp[i] << i); } /* check if update is required */ if (rx_pfc != priv->params.rx_priority_flow_control) { err = -mlx5e_set_port_pfc(priv); if (err == 0 && priv->sw_is_port_buf_owner) err = mlx5e_update_buf_lossy(priv); } done: if (err != 0) priv->params.rx_priority_flow_control= rx_pfc; PRIV_UNLOCK(priv); return (err); } static void mlx5e_setup_pauseframes(struct mlx5e_priv *priv) { #if (__FreeBSD_version < 1100000) char path[96]; #endif int error; /* enable pauseframes by default */ priv->params.tx_pauseframe_control = 1; priv->params.rx_pauseframe_control = 1; /* disable ports flow control, PFC, by default */ priv->params.tx_priority_flow_control = 0; priv->params.rx_priority_flow_control = 0; #if (__FreeBSD_version < 1100000) /* compute path for sysctl */ snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control", device_get_unit(priv->mdev->pdev->dev.bsddev)); /* try to fetch tunable, if any */ TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control); /* compute path for sysctl */ snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control", device_get_unit(priv->mdev->pdev->dev.bsddev)); /* try to fetch tunable, if any */ TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control); #endif /* register pauseframe SYSCTLs */ SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN, &priv->params.tx_pauseframe_control, 0, "Set to enable TX pause frames. Clear to disable."); SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN, &priv->params.rx_pauseframe_control, 0, "Set to enable RX pause frames. Clear to disable."); /* register priority flow control, PFC, SYSCTLs */ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "tx_priority_flow_control", CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, 0, &mlx5e_sysctl_tx_priority_flow_control, "CU", "Set to enable TX ports flow control frames for priorities 0..7. Clear to disable."); SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "rx_priority_flow_control", CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, 0, &mlx5e_sysctl_rx_priority_flow_control, "CU", "Set to enable RX ports flow control frames for priorities 0..7. Clear to disable."); PRIV_LOCK(priv); /* range check */ priv->params.tx_pauseframe_control = priv->params.tx_pauseframe_control ? 1 : 0; priv->params.rx_pauseframe_control = priv->params.rx_pauseframe_control ? 1 : 0; /* update firmware */ error = mlx5e_set_port_pause_and_pfc(priv); if (error == -EINVAL) { mlx5_en_err(priv->ifp, "Global pauseframes must be disabled before enabling PFC.\n"); priv->params.rx_priority_flow_control = 0; priv->params.tx_priority_flow_control = 0; /* update firmware */ (void) mlx5e_set_port_pause_and_pfc(priv); } PRIV_UNLOCK(priv); } int mlx5e_ul_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **ppmt) { struct mlx5e_priv *priv; struct mlx5e_channel *pch; priv = ifp->if_softc; if (unlikely(priv->gone || params->hdr.flowtype == M_HASHTYPE_NONE)) { return (EOPNOTSUPP); } else { /* keep this code synced with mlx5e_select_queue() */ u32 ch = priv->params.num_channels; #ifdef RSS u32 temp; if (rss_hash2bucket(params->hdr.flowid, params->hdr.flowtype, &temp) == 0) ch = temp % ch; else #endif ch = (params->hdr.flowid % 128) % ch; /* * NOTE: The channels array is only freed at detach * and it safe to return a pointer to the send tag * inside the channels structure as long as we * reference the priv. */ pch = priv->channel + ch; /* check if send queue is not running */ if (unlikely(pch->sq[0].running == 0)) return (ENXIO); m_snd_tag_ref(&pch->tag); *ppmt = &pch->tag; return (0); } } int mlx5e_ul_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params) { struct mlx5e_channel *pch = container_of(pmt, struct mlx5e_channel, tag); params->unlimited.max_rate = -1ULL; params->unlimited.queue_level = mlx5e_sq_queue_level(&pch->sq[0]); return (0); } void mlx5e_ul_snd_tag_free(struct m_snd_tag *pmt) { struct mlx5e_channel *pch = container_of(pmt, struct mlx5e_channel, tag); complete(&pch->completion); } static int mlx5e_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **ppmt) { switch (params->hdr.type) { #ifdef RATELIMIT case IF_SND_TAG_TYPE_RATE_LIMIT: return (mlx5e_rl_snd_tag_alloc(ifp, params, ppmt)); #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: return (mlx5e_tls_snd_tag_alloc(ifp, params, ppmt)); #endif #endif case IF_SND_TAG_TYPE_UNLIMITED: return (mlx5e_ul_snd_tag_alloc(ifp, params, ppmt)); #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS: return (mlx5e_tls_snd_tag_alloc(ifp, params, ppmt)); #endif default: return (EOPNOTSUPP); } } static int mlx5e_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params) { switch (pmt->type) { #ifdef RATELIMIT case IF_SND_TAG_TYPE_RATE_LIMIT: return (mlx5e_rl_snd_tag_modify(pmt, params)); #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: return (mlx5e_tls_snd_tag_modify(pmt, params)); #endif #endif case IF_SND_TAG_TYPE_UNLIMITED: #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS: #endif default: return (EOPNOTSUPP); } } static int mlx5e_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params) { switch (pmt->type) { #ifdef RATELIMIT case IF_SND_TAG_TYPE_RATE_LIMIT: return (mlx5e_rl_snd_tag_query(pmt, params)); #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: return (mlx5e_tls_snd_tag_query(pmt, params)); #endif #endif case IF_SND_TAG_TYPE_UNLIMITED: return (mlx5e_ul_snd_tag_query(pmt, params)); #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS: return (mlx5e_tls_snd_tag_query(pmt, params)); #endif default: return (EOPNOTSUPP); } } #ifdef RATELIMIT #define NUM_HDWR_RATES_MLX 13 static const uint64_t adapter_rates_mlx[NUM_HDWR_RATES_MLX] = { 135375, /* 1,083,000 */ 180500, /* 1,444,000 */ 270750, /* 2,166,000 */ 361000, /* 2,888,000 */ 541500, /* 4,332,000 */ 721875, /* 5,775,000 */ 1082875, /* 8,663,000 */ 1443875, /* 11,551,000 */ 2165750, /* 17,326,000 */ 2887750, /* 23,102,000 */ 4331625, /* 34,653,000 */ 5775500, /* 46,204,000 */ 8663125 /* 69,305,000 */ }; static void mlx5e_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q) { /* * This function needs updating by the driver maintainer! * For the MLX card there are currently (ConectX-4?) 13 * pre-set rates and others i.e. ConnectX-5, 6, 7?? * * This will change based on later adapters * and this code should be updated to look at ifp * and figure out the specific adapter type * settings i.e. how many rates as well * as if they are fixed (as is shown here) or * if they are dynamic (example chelsio t4). Also if there * is a maximum number of flows that the adapter * can handle that too needs to be updated in * the max_flows field. */ q->rate_table = adapter_rates_mlx; q->flags = RT_IS_FIXED_TABLE; q->max_flows = 0; /* mlx has no limit */ q->number_of_rates = NUM_HDWR_RATES_MLX; q->min_segment_burst = 1; } #endif static void mlx5e_snd_tag_free(struct m_snd_tag *pmt) { switch (pmt->type) { #ifdef RATELIMIT case IF_SND_TAG_TYPE_RATE_LIMIT: mlx5e_rl_snd_tag_free(pmt); break; #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: mlx5e_tls_snd_tag_free(pmt); break; #endif #endif case IF_SND_TAG_TYPE_UNLIMITED: mlx5e_ul_snd_tag_free(pmt); break; #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS: mlx5e_tls_snd_tag_free(pmt); break; #endif default: break; } } static void mlx5e_ifm_add(struct mlx5e_priv *priv, int type) { ifmedia_add(&priv->media, type | IFM_ETHER, 0, NULL); ifmedia_add(&priv->media, type | IFM_ETHER | IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL); ifmedia_add(&priv->media, type | IFM_ETHER | IFM_ETH_RXPAUSE, 0, NULL); ifmedia_add(&priv->media, type | IFM_ETHER | IFM_ETH_TXPAUSE, 0, NULL); ifmedia_add(&priv->media, type | IFM_ETHER | IFM_FDX, 0, NULL); ifmedia_add(&priv->media, type | IFM_ETHER | IFM_FDX | IFM_ETH_RXPAUSE, 0, NULL); ifmedia_add(&priv->media, type | IFM_ETHER | IFM_FDX | IFM_ETH_TXPAUSE, 0, NULL); ifmedia_add(&priv->media, type | IFM_ETHER | IFM_FDX | IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL); } static void * mlx5e_create_ifp(struct mlx5_core_dev *mdev) { struct ifnet *ifp; struct mlx5e_priv *priv; u8 dev_addr[ETHER_ADDR_LEN] __aligned(4); u8 connector_type; struct sysctl_oid_list *child; int ncv = mdev->priv.eq_table.num_comp_vectors; char unit[16]; struct pfil_head_args pa; int err; int i,j; u32 eth_proto_cap; u32 out[MLX5_ST_SZ_DW(ptys_reg)]; bool ext = 0; u32 speeds_num; struct media media_entry = {}; if (mlx5e_check_required_hca_cap(mdev)) { mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n"); return (NULL); } /* * Try to allocate the priv and make room for worst-case * number of channel structures: */ priv = malloc(sizeof(*priv) + (sizeof(priv->channel[0]) * mdev->priv.eq_table.num_comp_vectors), M_MLX5EN, M_WAITOK | M_ZERO); ifp = priv->ifp = if_alloc_dev(IFT_ETHER, mdev->pdev->dev.bsddev); if (ifp == NULL) { mlx5_core_err(mdev, "if_alloc() failed\n"); goto err_free_priv; } /* setup all static fields */ mlx5e_priv_static_init(priv, mdev->priv.eq_table.num_comp_vectors); ifp->if_softc = priv; if_initname(ifp, "mce", device_get_unit(mdev->pdev->dev.bsddev)); ifp->if_mtu = ETHERMTU; ifp->if_init = mlx5e_open; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | IFF_KNOWSEPOCH; ifp->if_ioctl = mlx5e_ioctl; ifp->if_transmit = mlx5e_xmit; ifp->if_qflush = if_qflush; #if (__FreeBSD_version >= 1100000) ifp->if_get_counter = mlx5e_get_counter; #endif ifp->if_snd.ifq_maxlen = ifqmaxlen; /* * Set driver features */ ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6; ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER; ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_LRO; ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO; ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP; ifp->if_capabilities |= IFCAP_NOMAP; ifp->if_capabilities |= IFCAP_TXTLS4 | IFCAP_TXTLS6; #ifdef RATELIMIT ifp->if_capabilities |= IFCAP_TXRTLMT | IFCAP_TXTLS_RTLMT; #endif ifp->if_snd_tag_alloc = mlx5e_snd_tag_alloc; ifp->if_snd_tag_free = mlx5e_snd_tag_free; ifp->if_snd_tag_modify = mlx5e_snd_tag_modify; ifp->if_snd_tag_query = mlx5e_snd_tag_query; #ifdef RATELIMIT ifp->if_ratelimit_query = mlx5e_ratelimit_query; #endif /* set TSO limits so that we don't have to drop TX packets */ ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */; ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE; ifp->if_capenable = ifp->if_capabilities; ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); /* ifnet sysctl tree */ sysctl_ctx_init(&priv->sysctl_ctx); priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev), OID_AUTO, ifp->if_dname, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "MLX5 ethernet - interface name"); if (priv->sysctl_ifnet == NULL) { mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n"); goto err_free_sysctl; } snprintf(unit, sizeof(unit), "%d", ifp->if_dunit); priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, unit, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "MLX5 ethernet - interface unit"); if (priv->sysctl_ifnet == NULL) { mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n"); goto err_free_sysctl; } /* HW sysctl tree */ child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev)); priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child, OID_AUTO, "hw", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "MLX5 ethernet dev hw"); if (priv->sysctl_hw == NULL) { mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n"); goto err_free_sysctl; } err = mlx5e_build_ifp_priv(mdev, priv, ncv); if (err) { mlx5_core_err(mdev, "mlx5e_build_ifp_priv() failed (%d)\n", err); goto err_free_sysctl; } /* reuse mlx5core's watchdog workqueue */ priv->wq = mdev->priv.health.wq_watchdog; - err = mlx5_alloc_map_uar(mdev, &priv->cq_uar); - if (err) { - mlx5_en_err(ifp, "mlx5_alloc_map_uar failed, %d\n", err); - goto err_free_wq; - } err = mlx5_core_alloc_pd(mdev, &priv->pdn); if (err) { mlx5_en_err(ifp, "mlx5_core_alloc_pd failed, %d\n", err); - goto err_unmap_free_uar; + goto err_free_wq; } err = mlx5_alloc_transport_domain(mdev, &priv->tdn); if (err) { mlx5_en_err(ifp, "mlx5_alloc_transport_domain failed, %d\n", err); goto err_dealloc_pd; } err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr); if (err) { mlx5_en_err(ifp, "mlx5e_create_mkey failed, %d\n", err); goto err_dealloc_transport_domain; } + err = mlx5_alloc_bfreg(mdev, &priv->bfreg, false, false); + if (err) { + mlx5_en_err(ifp, "alloc bfreg failed, %d\n", err); + goto err_create_mkey; + } mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr); /* check if we should generate a random MAC address */ if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 && is_zero_ether_addr(dev_addr)) { random_ether_addr(dev_addr); mlx5_en_err(ifp, "Assigned random MAC address\n"); } err = mlx5e_rl_init(priv); if (err) { mlx5_en_err(ifp, "mlx5e_rl_init failed, %d\n", err); - goto err_create_mkey; + goto err_alloc_bfreg; } err = mlx5e_tls_init(priv); if (err) { if_printf(ifp, "%s: mlx5e_tls_init failed\n", __func__); goto err_rl_init; } /* set default MTU */ mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu); /* Set default media status */ priv->media_status_last = IFM_AVALID; priv->media_active_last = IFM_ETHER | IFM_AUTO | IFM_FDX; /* setup default pauseframes configuration */ mlx5e_setup_pauseframes(priv); /* Setup supported medias */ //TODO: If we failed to query ptys is it ok to proceed?? if (!mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1)) { ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_capability); if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type)) connector_type = MLX5_GET(ptys_reg, out, connector_type); } else { eth_proto_cap = 0; mlx5_en_err(ifp, "Query port media capability failed, %d\n", err); } ifmedia_init(&priv->media, IFM_IMASK, mlx5e_media_change, mlx5e_media_status); speeds_num = ext ? MLX5E_EXT_LINK_SPEEDS_NUMBER : MLX5E_LINK_SPEEDS_NUMBER; for (i = 0; i != speeds_num; i++) { for (j = 0; j < MLX5E_LINK_MODES_NUMBER ; ++j) { media_entry = ext ? mlx5e_ext_mode_table[i][j] : mlx5e_mode_table[i][j]; if (media_entry.baudrate == 0) continue; if (MLX5E_PROT_MASK(i) & eth_proto_cap) mlx5e_ifm_add(priv, media_entry.subtype); } } mlx5e_ifm_add(priv, IFM_AUTO); /* Set autoselect by default */ ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX | IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE); DEBUGNET_SET(ifp, mlx5_en); ether_ifattach(ifp, dev_addr); /* Register for VLAN events */ priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST); priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST); /* Link is down by default */ if_link_state_change(ifp, LINK_STATE_DOWN); mlx5e_enable_async_events(priv); mlx5e_add_hw_stats(priv); mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM, priv->stats.vport.arg); mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM, priv->stats.pport.arg); mlx5e_create_ethtool(priv); mtx_lock(&priv->async_events_mtx); mlx5e_update_stats(priv); mtx_unlock(&priv->async_events_mtx); SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "rx_clbr_done", CTLFLAG_RD, &priv->clbr_done, 0, "RX timestamps calibration state"); callout_init(&priv->tstmp_clbr, CALLOUT_DIRECT); mlx5e_reset_calibration_callout(priv); pa.pa_version = PFIL_VERSION; pa.pa_flags = PFIL_IN; pa.pa_type = PFIL_TYPE_ETHERNET; pa.pa_headname = ifp->if_xname; priv->pfil = pfil_head_register(&pa); return (priv); err_rl_init: mlx5e_rl_cleanup(priv); +err_alloc_bfreg: + mlx5_free_bfreg(mdev, &priv->bfreg); + err_create_mkey: mlx5_core_destroy_mkey(priv->mdev, &priv->mr); err_dealloc_transport_domain: mlx5_dealloc_transport_domain(mdev, priv->tdn); err_dealloc_pd: mlx5_core_dealloc_pd(mdev, priv->pdn); -err_unmap_free_uar: - mlx5_unmap_free_uar(mdev, &priv->cq_uar); - err_free_wq: flush_workqueue(priv->wq); err_free_sysctl: sysctl_ctx_free(&priv->sysctl_ctx); if (priv->sysctl_debug) sysctl_ctx_free(&priv->stats.port_stats_debug.ctx); mlx5e_priv_static_destroy(priv, mdev->priv.eq_table.num_comp_vectors); if_free(ifp); err_free_priv: free(priv, M_MLX5EN); return (NULL); } static void mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv) { struct mlx5e_priv *priv = vpriv; struct ifnet *ifp = priv->ifp; /* don't allow more IOCTLs */ priv->gone = 1; /* XXX wait a bit to allow IOCTL handlers to complete */ pause("W", hz); #ifdef RATELIMIT /* * The kernel can have reference(s) via the m_snd_tag's into * the ratelimit channels, and these must go away before * detaching: */ while (READ_ONCE(priv->rl.stats.tx_active_connections) != 0) { mlx5_en_err(priv->ifp, "Waiting for all ratelimit connections to terminate\n"); pause("W", hz); } #endif /* wait for all unlimited send tags to complete */ mlx5e_priv_wait_for_completion(priv, mdev->priv.eq_table.num_comp_vectors); /* stop watchdog timer */ callout_drain(&priv->watchdog); callout_drain(&priv->tstmp_clbr); if (priv->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach); if (priv->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach); /* make sure device gets closed */ PRIV_LOCK(priv); mlx5e_close_locked(ifp); PRIV_UNLOCK(priv); /* deregister pfil */ if (priv->pfil != NULL) { pfil_head_unregister(priv->pfil); priv->pfil = NULL; } /* unregister device */ ifmedia_removeall(&priv->media); ether_ifdetach(ifp); mlx5e_tls_cleanup(priv); mlx5e_rl_cleanup(priv); /* destroy all remaining sysctl nodes */ sysctl_ctx_free(&priv->stats.vport.ctx); sysctl_ctx_free(&priv->stats.pport.ctx); if (priv->sysctl_debug) sysctl_ctx_free(&priv->stats.port_stats_debug.ctx); sysctl_ctx_free(&priv->sysctl_ctx); + mlx5_free_bfreg(priv->mdev, &priv->bfreg); mlx5_core_destroy_mkey(priv->mdev, &priv->mr); mlx5_dealloc_transport_domain(priv->mdev, priv->tdn); mlx5_core_dealloc_pd(priv->mdev, priv->pdn); - mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); mlx5e_disable_async_events(priv); flush_workqueue(priv->wq); mlx5e_priv_static_destroy(priv, mdev->priv.eq_table.num_comp_vectors); if_free(ifp); free(priv, M_MLX5EN); } #ifdef DEBUGNET static void mlx5_en_debugnet_init(struct ifnet *dev, int *nrxr, int *ncl, int *clsize) { struct mlx5e_priv *priv = if_getsoftc(dev); PRIV_LOCK(priv); *nrxr = priv->params.num_channels; *ncl = DEBUGNET_MAX_IN_FLIGHT; *clsize = MLX5E_MAX_RX_BYTES; PRIV_UNLOCK(priv); } static void mlx5_en_debugnet_event(struct ifnet *dev, enum debugnet_ev event) { } static int mlx5_en_debugnet_transmit(struct ifnet *dev, struct mbuf *m) { struct mlx5e_priv *priv = if_getsoftc(dev); struct mlx5e_sq *sq; int err; if ((if_getdrvflags(dev) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || (priv->media_status_last & IFM_ACTIVE) == 0) return (ENOENT); sq = &priv->channel[0].sq[0]; if (sq->running == 0) { m_freem(m); return (ENOENT); } if (mlx5e_sq_xmit(sq, &m) != 0) { m_freem(m); err = ENOBUFS; } else { err = 0; } if (likely(sq->doorbell.d64 != 0)) { - mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); + mlx5e_tx_notify_hw(sq, sq->doorbell.d32); sq->doorbell.d64 = 0; } return (err); } static int mlx5_en_debugnet_poll(struct ifnet *dev, int count) { struct mlx5e_priv *priv = if_getsoftc(dev); if ((if_getdrvflags(dev) & IFF_DRV_RUNNING) == 0 || (priv->media_status_last & IFM_ACTIVE) == 0) return (ENOENT); mlx5_poll_interrupts(priv->mdev); return (0); } #endif /* DEBUGNET */ static void * mlx5e_get_ifp(void *vpriv) { struct mlx5e_priv *priv = vpriv; return (priv->ifp); } static struct mlx5_interface mlx5e_interface = { .add = mlx5e_create_ifp, .remove = mlx5e_destroy_ifp, .event = mlx5e_async_event, .protocol = MLX5_INTERFACE_PROTOCOL_ETH, .get_dev = mlx5e_get_ifp, }; void mlx5e_init(void) { mlx5_register_interface(&mlx5e_interface); } void mlx5e_cleanup(void) { mlx5_unregister_interface(&mlx5e_interface); } static void mlx5e_show_version(void __unused *arg) { printf("%s", mlx5e_version); } SYSINIT(mlx5e_show_version, SI_SUB_DRIVERS, SI_ORDER_ANY, mlx5e_show_version, NULL); module_init_order(mlx5e_init, SI_ORDER_SIXTH); module_exit_order(mlx5e_cleanup, SI_ORDER_SIXTH); #if (__FreeBSD_version >= 1100000) MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1); #endif MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1); MODULE_VERSION(mlx5en, 1); diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c index 0e34403f2bf9..7291253b8168 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c @@ -1,1591 +1,1581 @@ /*- - * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2020 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "en.h" #ifdef RATELIMIT static int mlx5e_rl_open_workers(struct mlx5e_priv *); static void mlx5e_rl_close_workers(struct mlx5e_priv *); static int mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS); static void mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *, unsigned x, struct sysctl_oid *, const char *name, const char *desc); static void mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, struct sysctl_oid *node, const char *name, const char *desc); static int mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *, uint64_t value); static int mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *, uint64_t value); static void mlx5e_rl_build_sq_param(struct mlx5e_rl_priv_data *rl, struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size); MLX5_SET(wq, wq, log_wq_sz, log_sq_size); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, pd, rl->priv->pdn); param->wq.buf_numa_node = 0; param->wq.db_numa_node = 0; param->wq.linear = 1; } static void mlx5e_rl_build_cq_param(struct mlx5e_rl_priv_data *rl, struct mlx5e_cq_param *param) { void *cqc = param->cqc; uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size); MLX5_SET(cqc, cqc, log_cq_size, log_sq_size); MLX5_SET(cqc, cqc, cq_period, rl->param.tx_coalesce_usecs); MLX5_SET(cqc, cqc, cq_max_count, rl->param.tx_coalesce_pkts); switch (rl->param.tx_coalesce_mode) { case 0: MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; default: if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_start_from_cqe)) MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); else MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; } } static void mlx5e_rl_build_channel_param(struct mlx5e_rl_priv_data *rl, struct mlx5e_rl_channel_param *cparam) { memset(cparam, 0, sizeof(*cparam)); mlx5e_rl_build_sq_param(rl, &cparam->sq); mlx5e_rl_build_cq_param(rl, &cparam->cq); } static int mlx5e_rl_create_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, struct mlx5e_sq_param *param, int ix) { struct mlx5_core_dev *mdev = priv->mdev; void *sqc = param->sqc; void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); int err; /* Create DMA descriptor TAG */ if ((err = -bus_dma_tag_create( bus_get_dma_tag(mdev->pdev->dev.bsddev), 1, /* any alignment */ 0, /* no boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */ MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */ MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &sq->dma_tag))) goto done; /* use shared UAR */ - sq->uar = priv->rl.sq_uar; + sq->uar_map = priv->bfreg.map; err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) goto err_free_dma_tag; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; /* * The sq->bf_buf_size variable is intentionally left zero so * that the doorbell writes will occur at the same memory * location. */ err = mlx5e_alloc_sq_db(sq); if (err) goto err_sq_wq_destroy; sq->mkey_be = cpu_to_be32(priv->mr.key); sq->ifp = priv->ifp; sq->priv = priv; mlx5e_update_sq_inline(sq); return (0); err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); err_free_dma_tag: bus_dma_tag_destroy(sq->dma_tag); done: return (err); } static void mlx5e_rl_destroy_sq(struct mlx5e_sq *sq) { mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); bus_dma_tag_destroy(sq->dma_tag); } static int mlx5e_rl_open_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, struct mlx5e_sq_param *param, int ix) { int err; err = mlx5e_rl_create_sq(priv, sq, param, ix); if (err) return (err); err = mlx5e_enable_sq(sq, param, priv->rl.tisn); if (err) goto err_destroy_sq; err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); if (err) goto err_disable_sq; WRITE_ONCE(sq->running, 1); return (0); err_disable_sq: mlx5e_disable_sq(sq); err_destroy_sq: mlx5e_rl_destroy_sq(sq); return (err); } static void mlx5e_rl_chan_mtx_init(struct mlx5e_priv *priv, struct mlx5e_sq *sq) { mtx_init(&sq->lock, "mlx5tx-rl", NULL, MTX_DEF); mtx_init(&sq->comp_lock, "mlx5comp-rl", NULL, MTX_DEF); callout_init_mtx(&sq->cev_callout, &sq->lock, 0); sq->cev_factor = priv->rl.param.tx_completion_fact; /* ensure the TX completion event factor is not zero */ if (sq->cev_factor == 0) sq->cev_factor = 1; } static int mlx5e_rl_open_channel(struct mlx5e_rl_worker *rlw, int eq_ix, struct mlx5e_rl_channel_param *cparam, struct mlx5e_sq *volatile *ppsq) { struct mlx5e_priv *priv = rlw->priv; struct mlx5e_sq *sq; int err; sq = malloc(sizeof(*sq), M_MLX5EN, M_WAITOK | M_ZERO); /* init mutexes */ mlx5e_rl_chan_mtx_init(priv, sq); /* open TX completion queue */ err = mlx5e_open_cq(priv, &cparam->cq, &sq->cq, &mlx5e_tx_cq_comp, eq_ix); if (err) goto err_free; err = mlx5e_rl_open_sq(priv, sq, &cparam->sq, eq_ix); if (err) goto err_close_tx_cq; /* store TX channel pointer */ *ppsq = sq; /* poll TX queue initially */ sq->cq.mcq.comp(&sq->cq.mcq, NULL); return (0); err_close_tx_cq: mlx5e_close_cq(&sq->cq); err_free: /* destroy mutexes */ mtx_destroy(&sq->lock); mtx_destroy(&sq->comp_lock); free(sq, M_MLX5EN); atomic_add_64(&priv->rl.stats.tx_allocate_resource_failure, 1ULL); return (err); } static void mlx5e_rl_close_channel(struct mlx5e_sq *volatile *ppsq) { struct mlx5e_sq *sq = *ppsq; /* check if channel is already closed */ if (sq == NULL) return; /* ensure channel pointer is no longer used */ *ppsq = NULL; /* teardown and destroy SQ */ mlx5e_drain_sq(sq); mlx5e_disable_sq(sq); mlx5e_rl_destroy_sq(sq); /* close CQ */ mlx5e_close_cq(&sq->cq); /* destroy mutexes */ mtx_destroy(&sq->lock); mtx_destroy(&sq->comp_lock); free(sq, M_MLX5EN); } static void mlx5e_rl_sync_tx_completion_fact(struct mlx5e_rl_priv_data *rl) { /* * Limit the maximum distance between completion events to * half of the currently set TX queue size. * * The maximum number of queue entries a single IP packet can * consume is given by MLX5_SEND_WQE_MAX_WQEBBS. * * The worst case max value is then given as below: */ uint64_t max = rl->param.tx_queue_size / (2 * MLX5_SEND_WQE_MAX_WQEBBS); /* * Update the maximum completion factor value in case the * tx_queue_size field changed. Ensure we don't overflow * 16-bits. */ if (max < 1) max = 1; else if (max > 65535) max = 65535; rl->param.tx_completion_fact_max = max; /* * Verify that the current TX completion factor is within the * given limits: */ if (rl->param.tx_completion_fact < 1) rl->param.tx_completion_fact = 1; else if (rl->param.tx_completion_fact > max) rl->param.tx_completion_fact = max; } static int mlx5e_rl_modify_sq(struct mlx5e_sq *sq, uint16_t rl_index) { struct mlx5e_priv *priv = sq->priv; struct mlx5_core_dev *mdev = priv->mdev; void *in; void *sqc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(modify_sq_in); in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); MLX5_SET(modify_sq_in, in, sqn, sq->sqn); MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RDY); MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY); MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); err = mlx5_core_modify_sq(mdev, in, inlen); kvfree(in); return (err); } /* * This function will search the configured rate limit table for the * best match to avoid that a single socket based application can * allocate all the available hardware rates. If the user selected * rate deviates too much from the closes rate available in the rate * limit table, unlimited rate will be selected. */ static uint64_t mlx5e_rl_find_best_rate_locked(struct mlx5e_rl_priv_data *rl, uint64_t user_rate) { uint64_t distance = -1ULL; uint64_t diff; uint64_t retval = 0; /* unlimited */ uint64_t x; /* search for closest rate */ for (x = 0; x != rl->param.tx_rates_def; x++) { uint64_t rate = rl->rate_limit_table[x]; if (rate == 0) continue; if (rate > user_rate) diff = rate - user_rate; else diff = user_rate - rate; /* check if distance is smaller than previous rate */ if (diff < distance) { distance = diff; retval = rate; } } /* range check for multiplication below */ if (user_rate > rl->param.tx_limit_max) user_rate = rl->param.tx_limit_max; /* fallback to unlimited, if rate deviates too much */ if (distance > howmany(user_rate * rl->param.tx_allowed_deviation, 1000ULL)) retval = 0; return (retval); } /* * This function sets the requested rate for a rate limit channel, in * bits per second. The requested rate will be filtered through the * find best rate function above. */ static int mlx5e_rlw_channel_set_rate_locked(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, uint64_t rate) { struct mlx5e_rl_priv_data *rl = &rlw->priv->rl; struct mlx5e_sq *sq; uint64_t temp; uint16_t index; uint16_t burst; int error; if (rate != 0) { MLX5E_RL_WORKER_UNLOCK(rlw); MLX5E_RL_RLOCK(rl); /* get current burst size in bytes */ temp = rl->param.tx_burst_size * MLX5E_SW2HW_MTU(rlw->priv->ifp->if_mtu); /* limit burst size to 64K currently */ if (temp > 65535) temp = 65535; burst = temp; /* find best rate */ rate = mlx5e_rl_find_best_rate_locked(rl, rate); MLX5E_RL_RUNLOCK(rl); if (rate == 0) { /* rate doesn't exist, fallback to unlimited */ index = 0; rate = 0; atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL); } else { /* get a reference on the new rate */ error = -mlx5_rl_add_rate(rlw->priv->mdev, howmany(rate, 1000), burst, &index); if (error != 0) { /* adding rate failed, fallback to unlimited */ index = 0; rate = 0; atomic_add_64(&rlw->priv->rl.stats.tx_add_new_rate_failure, 1ULL); } } MLX5E_RL_WORKER_LOCK(rlw); } else { index = 0; burst = 0; /* default */ } /* atomically swap rates */ temp = channel->last_rate; channel->last_rate = rate; rate = temp; /* atomically swap burst size */ temp = channel->last_burst; channel->last_burst = burst; burst = temp; MLX5E_RL_WORKER_UNLOCK(rlw); /* put reference on the old rate, if any */ if (rate != 0) { mlx5_rl_remove_rate(rlw->priv->mdev, howmany(rate, 1000), burst); } /* set new rate, if SQ is running */ sq = channel->sq; if (sq != NULL && READ_ONCE(sq->running) != 0) { error = mlx5e_rl_modify_sq(sq, index); if (error != 0) atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL); } else error = 0; MLX5E_RL_WORKER_LOCK(rlw); return (-error); } static void mlx5e_rl_worker(void *arg) { struct thread *td; struct mlx5e_rl_worker *rlw = arg; struct mlx5e_rl_channel *channel; struct mlx5e_priv *priv; unsigned ix; uint64_t x; int error; /* set thread priority */ td = curthread; thread_lock(td); sched_prio(td, PI_SWI(SWI_NET)); thread_unlock(td); priv = rlw->priv; /* compute completion vector */ ix = (rlw - priv->rl.workers) % priv->mdev->priv.eq_table.num_comp_vectors; /* TODO bind to CPU */ /* open all the SQs */ MLX5E_RL_WORKER_LOCK(rlw); for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) { struct mlx5e_rl_channel *channel = rlw->channels + x; #if !defined(HAVE_RL_PRE_ALLOCATE_CHANNELS) if (channel->state == MLX5E_RL_ST_FREE) continue; #endif MLX5E_RL_WORKER_UNLOCK(rlw); MLX5E_RL_RLOCK(&priv->rl); error = mlx5e_rl_open_channel(rlw, ix, &priv->rl.chan_param, &channel->sq); MLX5E_RL_RUNLOCK(&priv->rl); MLX5E_RL_WORKER_LOCK(rlw); if (error != 0) { mlx5_en_err(priv->ifp, "mlx5e_rl_open_channel failed: %d\n", error); break; } mlx5e_rlw_channel_set_rate_locked(rlw, channel, channel->init_rate); } while (1) { if (STAILQ_FIRST(&rlw->process_head) == NULL) { /* check if we are tearing down */ if (rlw->worker_done != 0) break; cv_wait(&rlw->cv, &rlw->mtx); } /* check if we are tearing down */ if (rlw->worker_done != 0) break; channel = STAILQ_FIRST(&rlw->process_head); if (channel != NULL) { STAILQ_REMOVE_HEAD(&rlw->process_head, entry); switch (channel->state) { case MLX5E_RL_ST_MODIFY: channel->state = MLX5E_RL_ST_USED; MLX5E_RL_WORKER_UNLOCK(rlw); /* create channel by demand */ if (channel->sq == NULL) { MLX5E_RL_RLOCK(&priv->rl); error = mlx5e_rl_open_channel(rlw, ix, &priv->rl.chan_param, &channel->sq); MLX5E_RL_RUNLOCK(&priv->rl); if (error != 0) { mlx5_en_err(priv->ifp, "mlx5e_rl_open_channel failed: %d\n", error); } else { atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, 1ULL); } } else { mlx5e_resume_sq(channel->sq); } MLX5E_RL_WORKER_LOCK(rlw); /* convert from bytes/s to bits/s and set new rate */ error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, channel->new_rate * 8ULL); if (error != 0) { mlx5_en_err(priv->ifp, "mlx5e_rlw_channel_set_rate_locked failed: %d\n", error); } break; case MLX5E_RL_ST_DESTROY: error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0); if (error != 0) { mlx5_en_err(priv->ifp, "mlx5e_rlw_channel_set_rate_locked failed: %d\n", error); } if (channel->sq != NULL) { /* * Make sure all packets are * transmitted before SQ is * returned to free list: */ MLX5E_RL_WORKER_UNLOCK(rlw); mlx5e_drain_sq(channel->sq); MLX5E_RL_WORKER_LOCK(rlw); } /* put the channel back into the free list */ STAILQ_INSERT_HEAD(&rlw->index_list_head, channel, entry); channel->state = MLX5E_RL_ST_FREE; atomic_add_64(&priv->rl.stats.tx_active_connections, -1ULL); break; default: /* NOP */ break; } } } /* close all the SQs */ for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) { struct mlx5e_rl_channel *channel = rlw->channels + x; /* update the initial rate */ channel->init_rate = channel->last_rate; /* make sure we free up the rate resource */ mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0); if (channel->sq != NULL) { MLX5E_RL_WORKER_UNLOCK(rlw); mlx5e_rl_close_channel(&channel->sq); atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, -1ULL); MLX5E_RL_WORKER_LOCK(rlw); } } rlw->worker_done = 0; cv_broadcast(&rlw->cv); MLX5E_RL_WORKER_UNLOCK(rlw); kthread_exit(); } static int mlx5e_rl_open_tis(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; u32 in[MLX5_ST_SZ_DW(create_tis_in)]; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); memset(in, 0, sizeof(in)); MLX5_SET(tisc, tisc, prio, 0); MLX5_SET(tisc, tisc, transport_domain, priv->tdn); return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->rl.tisn)); } static void mlx5e_rl_close_tis(struct mlx5e_priv *priv) { mlx5_core_destroy_tis(priv->mdev, priv->rl.tisn); } static void mlx5e_rl_set_default_params(struct mlx5e_rl_params *param, struct mlx5_core_dev *mdev) { /* ratelimit workers */ param->tx_worker_threads_def = mdev->priv.eq_table.num_comp_vectors; param->tx_worker_threads_max = MLX5E_RL_MAX_WORKERS; /* range check */ if (param->tx_worker_threads_def == 0 || param->tx_worker_threads_def > param->tx_worker_threads_max) param->tx_worker_threads_def = param->tx_worker_threads_max; /* ratelimit channels */ param->tx_channels_per_worker_def = MLX5E_RL_MAX_SQS / param->tx_worker_threads_def; param->tx_channels_per_worker_max = MLX5E_RL_MAX_SQS; /* range check */ if (param->tx_channels_per_worker_def > MLX5E_RL_DEF_SQ_PER_WORKER) param->tx_channels_per_worker_def = MLX5E_RL_DEF_SQ_PER_WORKER; /* set default burst size */ param->tx_burst_size = 4; /* MTUs */ /* * Set maximum burst size * * The burst size is multiplied by the MTU and clamped to the * range 0 ... 65535 bytes inclusivly before fed into the * firmware. * * NOTE: If the burst size or MTU is changed only ratelimit * connections made after the change will use the new burst * size. */ param->tx_burst_size_max = 255; /* get firmware rate limits in 1000bit/s and convert them to bit/s */ param->tx_limit_min = mdev->priv.rl_table.min_rate * 1000ULL; param->tx_limit_max = mdev->priv.rl_table.max_rate * 1000ULL; /* ratelimit table size */ param->tx_rates_max = mdev->priv.rl_table.max_size; /* range check */ if (param->tx_rates_max > MLX5E_RL_MAX_TX_RATES) param->tx_rates_max = MLX5E_RL_MAX_TX_RATES; /* set default number of rates */ param->tx_rates_def = param->tx_rates_max; /* set maximum allowed rate deviation */ if (param->tx_limit_max != 0) { /* * Make sure the deviation multiplication doesn't * overflow unsigned 64-bit: */ param->tx_allowed_deviation_max = -1ULL / param->tx_limit_max; } /* set default rate deviation */ param->tx_allowed_deviation = 50; /* 5.0% */ /* channel parameters */ param->tx_queue_size = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); param->tx_coalesce_usecs = MLX5E_RL_TX_COAL_USEC_DEFAULT; param->tx_coalesce_pkts = MLX5E_RL_TX_COAL_PKTS_DEFAULT; param->tx_coalesce_mode = MLX5E_RL_TX_COAL_MODE_DEFAULT; param->tx_completion_fact = MLX5E_RL_TX_COMP_FACT_DEFAULT; } static const char *mlx5e_rl_params_desc[] = { MLX5E_RL_PARAMS(MLX5E_STATS_DESC) }; static const char *mlx5e_rl_table_params_desc[] = { MLX5E_RL_TABLE_PARAMS(MLX5E_STATS_DESC) }; static const char *mlx5e_rl_stats_desc[] = { MLX5E_RL_STATS(MLX5E_STATS_DESC) }; int mlx5e_rl_init(struct mlx5e_priv *priv) { struct mlx5e_rl_priv_data *rl = &priv->rl; struct sysctl_oid *node; struct sysctl_oid *stats; char buf[64]; uint64_t i; uint64_t j; int error; /* check if there is support for packet pacing */ if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing)) return (0); rl->priv = priv; sysctl_ctx_init(&rl->ctx); sx_init(&rl->rl_sxlock, "ratelimit-sxlock"); - /* allocate shared UAR for SQs */ - error = mlx5_alloc_map_uar(priv->mdev, &rl->sq_uar); - if (error) - goto done; - /* open own TIS domain for ratelimit SQs */ error = mlx5e_rl_open_tis(priv); if (error) - goto err_uar; + goto done; /* setup default value for parameters */ mlx5e_rl_set_default_params(&rl->param, priv->mdev); /* update the completion factor */ mlx5e_rl_sync_tx_completion_fact(rl); /* create root node */ node = SYSCTL_ADD_NODE(&rl->ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "rate_limit", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Rate limiting support"); if (node != NULL) { /* create SYSCTLs */ for (i = 0; i != MLX5E_RL_PARAMS_NUM; i++) { mlx5e_rl_sysctl_add_u64_oid(rl, MLX5E_RL_PARAMS_INDEX(arg[i]), node, mlx5e_rl_params_desc[2 * i], mlx5e_rl_params_desc[2 * i + 1]); } stats = SYSCTL_ADD_NODE(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Rate limiting statistics"); if (stats != NULL) { /* create SYSCTLs */ for (i = 0; i != MLX5E_RL_STATS_NUM; i++) { mlx5e_rl_sysctl_add_stats_u64_oid(rl, i, stats, mlx5e_rl_stats_desc[2 * i], mlx5e_rl_stats_desc[2 * i + 1]); } } } /* allocate workers array */ rl->workers = malloc(sizeof(rl->workers[0]) * rl->param.tx_worker_threads_def, M_MLX5EN, M_WAITOK | M_ZERO); /* allocate rate limit array */ rl->rate_limit_table = malloc(sizeof(rl->rate_limit_table[0]) * rl->param.tx_rates_def, M_MLX5EN, M_WAITOK | M_ZERO); if (node != NULL) { /* create more SYSCTls */ SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, "tx_rate_show", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, rl, 0, &mlx5e_rl_sysctl_show_rate_table, "A", "Show table of all configured TX rates"); /* try to fetch rate table from kernel environment */ for (i = 0; i != rl->param.tx_rates_def; i++) { /* compute path for tunable */ snprintf(buf, sizeof(buf), "dev.mce.%d.rate_limit.tx_rate_add_%d", device_get_unit(priv->mdev->pdev->dev.bsddev), (int)i); if (TUNABLE_QUAD_FETCH(buf, &j)) mlx5e_rl_tx_limit_add(rl, j); } /* setup rate table sysctls */ for (i = 0; i != MLX5E_RL_TABLE_PARAMS_NUM; i++) { mlx5e_rl_sysctl_add_u64_oid(rl, MLX5E_RL_PARAMS_INDEX(table_arg[i]), node, mlx5e_rl_table_params_desc[2 * i], mlx5e_rl_table_params_desc[2 * i + 1]); } } for (j = 0; j < rl->param.tx_worker_threads_def; j++) { struct mlx5e_rl_worker *rlw = rl->workers + j; rlw->priv = priv; cv_init(&rlw->cv, "mlx5-worker-cv"); mtx_init(&rlw->mtx, "mlx5-worker-mtx", NULL, MTX_DEF); STAILQ_INIT(&rlw->index_list_head); STAILQ_INIT(&rlw->process_head); rlw->channels = malloc(sizeof(rlw->channels[0]) * rl->param.tx_channels_per_worker_def, M_MLX5EN, M_WAITOK | M_ZERO); MLX5E_RL_WORKER_LOCK(rlw); for (i = 0; i < rl->param.tx_channels_per_worker_def; i++) { struct mlx5e_rl_channel *channel = rlw->channels + i; channel->worker = rlw; channel->tag.type = IF_SND_TAG_TYPE_RATE_LIMIT; STAILQ_INSERT_TAIL(&rlw->index_list_head, channel, entry); } MLX5E_RL_WORKER_UNLOCK(rlw); } PRIV_LOCK(priv); error = mlx5e_rl_open_workers(priv); PRIV_UNLOCK(priv); if (error != 0) { mlx5_en_err(priv->ifp, "mlx5e_rl_open_workers failed: %d\n", error); } return (0); -err_uar: - mlx5_unmap_free_uar(priv->mdev, &rl->sq_uar); done: sysctl_ctx_free(&rl->ctx); sx_destroy(&rl->rl_sxlock); return (error); } static int mlx5e_rl_open_workers(struct mlx5e_priv *priv) { struct mlx5e_rl_priv_data *rl = &priv->rl; struct thread *rl_thread = NULL; struct proc *rl_proc = NULL; uint64_t j; int error; if (priv->gone || rl->opened) return (-EINVAL); MLX5E_RL_WLOCK(rl); /* compute channel parameters once */ mlx5e_rl_build_channel_param(rl, &rl->chan_param); MLX5E_RL_WUNLOCK(rl); for (j = 0; j < rl->param.tx_worker_threads_def; j++) { struct mlx5e_rl_worker *rlw = rl->workers + j; /* start worker thread */ error = kproc_kthread_add(mlx5e_rl_worker, rlw, &rl_proc, &rl_thread, RFHIGHPID, 0, "mlx5-ratelimit", "mlx5-rl-worker-thread-%d", (int)j); if (error != 0) { mlx5_en_err(rl->priv->ifp, "kproc_kthread_add failed: %d\n", error); rlw->worker_done = 1; } } rl->opened = 1; return (0); } static void mlx5e_rl_close_workers(struct mlx5e_priv *priv) { struct mlx5e_rl_priv_data *rl = &priv->rl; uint64_t y; if (rl->opened == 0) return; /* tear down worker threads simultaneously */ for (y = 0; y < rl->param.tx_worker_threads_def; y++) { struct mlx5e_rl_worker *rlw = rl->workers + y; /* tear down worker before freeing SQs */ MLX5E_RL_WORKER_LOCK(rlw); if (rlw->worker_done == 0) { rlw->worker_done = 1; cv_broadcast(&rlw->cv); } else { /* XXX thread not started */ rlw->worker_done = 0; } MLX5E_RL_WORKER_UNLOCK(rlw); } /* wait for worker threads to exit */ for (y = 0; y < rl->param.tx_worker_threads_def; y++) { struct mlx5e_rl_worker *rlw = rl->workers + y; /* tear down worker before freeing SQs */ MLX5E_RL_WORKER_LOCK(rlw); while (rlw->worker_done != 0) cv_wait(&rlw->cv, &rlw->mtx); MLX5E_RL_WORKER_UNLOCK(rlw); } rl->opened = 0; } static void mlx5e_rl_reset_rates(struct mlx5e_rl_priv_data *rl) { unsigned x; MLX5E_RL_WLOCK(rl); for (x = 0; x != rl->param.tx_rates_def; x++) rl->rate_limit_table[x] = 0; MLX5E_RL_WUNLOCK(rl); } void mlx5e_rl_cleanup(struct mlx5e_priv *priv) { struct mlx5e_rl_priv_data *rl = &priv->rl; uint64_t y; /* check if there is support for packet pacing */ if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing)) return; /* TODO check if there is support for packet pacing */ sysctl_ctx_free(&rl->ctx); PRIV_LOCK(priv); mlx5e_rl_close_workers(priv); PRIV_UNLOCK(priv); mlx5e_rl_reset_rates(rl); - /* free shared UAR for SQs */ - mlx5_unmap_free_uar(priv->mdev, &rl->sq_uar); - /* close TIS domain */ mlx5e_rl_close_tis(priv); for (y = 0; y < rl->param.tx_worker_threads_def; y++) { struct mlx5e_rl_worker *rlw = rl->workers + y; cv_destroy(&rlw->cv); mtx_destroy(&rlw->mtx); free(rlw->channels, M_MLX5EN); } free(rl->rate_limit_table, M_MLX5EN); free(rl->workers, M_MLX5EN); sx_destroy(&rl->rl_sxlock); } static void mlx5e_rlw_queue_channel_locked(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel) { STAILQ_INSERT_TAIL(&rlw->process_head, channel, entry); cv_broadcast(&rlw->cv); } static void mlx5e_rl_free(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel) { if (channel == NULL) return; MLX5E_RL_WORKER_LOCK(rlw); switch (channel->state) { case MLX5E_RL_ST_MODIFY: channel->state = MLX5E_RL_ST_DESTROY; break; case MLX5E_RL_ST_USED: channel->state = MLX5E_RL_ST_DESTROY; mlx5e_rlw_queue_channel_locked(rlw, channel); break; default: break; } MLX5E_RL_WORKER_UNLOCK(rlw); } static int mlx5e_rl_modify(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, uint64_t rate) { MLX5E_RL_WORKER_LOCK(rlw); channel->new_rate = rate; switch (channel->state) { case MLX5E_RL_ST_USED: channel->state = MLX5E_RL_ST_MODIFY; mlx5e_rlw_queue_channel_locked(rlw, channel); break; default: break; } MLX5E_RL_WORKER_UNLOCK(rlw); return (0); } static int mlx5e_rl_query(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, union if_snd_tag_query_params *params) { int retval; MLX5E_RL_WORKER_LOCK(rlw); switch (channel->state) { case MLX5E_RL_ST_USED: params->rate_limit.max_rate = channel->last_rate; params->rate_limit.queue_level = mlx5e_sq_queue_level(channel->sq); retval = 0; break; case MLX5E_RL_ST_MODIFY: params->rate_limit.max_rate = channel->last_rate; params->rate_limit.queue_level = mlx5e_sq_queue_level(channel->sq); retval = EBUSY; break; default: retval = EINVAL; break; } MLX5E_RL_WORKER_UNLOCK(rlw); return (retval); } static int mlx5e_find_available_tx_ring_index(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel **pchannel) { struct mlx5e_rl_channel *channel; int retval = ENOMEM; MLX5E_RL_WORKER_LOCK(rlw); /* Check for available channel in free list */ if ((channel = STAILQ_FIRST(&rlw->index_list_head)) != NULL) { retval = 0; /* Remove head index from available list */ STAILQ_REMOVE_HEAD(&rlw->index_list_head, entry); channel->state = MLX5E_RL_ST_USED; atomic_add_64(&rlw->priv->rl.stats.tx_active_connections, 1ULL); } else { atomic_add_64(&rlw->priv->rl.stats.tx_available_resource_failure, 1ULL); } MLX5E_RL_WORKER_UNLOCK(rlw); *pchannel = channel; #ifdef RATELIMIT_DEBUG mlx5_en_info(rlw->priv->ifp, "Channel pointer for rate limit connection is %p\n", channel); #endif return (retval); } int mlx5e_rl_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **ppmt) { struct mlx5e_rl_channel *channel; struct mlx5e_rl_worker *rlw; struct mlx5e_priv *priv; int error; priv = ifp->if_softc; /* check if there is support for packet pacing or if device is going away */ if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing) || priv->gone || params->rate_limit.hdr.type != IF_SND_TAG_TYPE_RATE_LIMIT) return (EOPNOTSUPP); /* compute worker thread this TCP connection belongs to */ rlw = priv->rl.workers + ((params->rate_limit.hdr.flowid % 128) % priv->rl.param.tx_worker_threads_def); error = mlx5e_find_available_tx_ring_index(rlw, &channel); if (error != 0) goto done; error = mlx5e_rl_modify(rlw, channel, params->rate_limit.max_rate); if (error != 0) { mlx5e_rl_free(rlw, channel); goto done; } /* store pointer to mbuf tag */ MPASS(channel->tag.refcount == 0); m_snd_tag_init(&channel->tag, ifp, IF_SND_TAG_TYPE_RATE_LIMIT); *ppmt = &channel->tag; done: return (error); } int mlx5e_rl_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params) { struct mlx5e_rl_channel *channel = container_of(pmt, struct mlx5e_rl_channel, tag); return (mlx5e_rl_modify(channel->worker, channel, params->rate_limit.max_rate)); } int mlx5e_rl_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params) { struct mlx5e_rl_channel *channel = container_of(pmt, struct mlx5e_rl_channel, tag); return (mlx5e_rl_query(channel->worker, channel, params)); } void mlx5e_rl_snd_tag_free(struct m_snd_tag *pmt) { struct mlx5e_rl_channel *channel = container_of(pmt, struct mlx5e_rl_channel, tag); mlx5e_rl_free(channel->worker, channel); } static int mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS) { struct mlx5e_rl_priv_data *rl = arg1; struct mlx5e_priv *priv = rl->priv; struct sbuf sbuf; unsigned x; int error; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); PRIV_LOCK(priv); sbuf_new_for_sysctl(&sbuf, NULL, 128 * rl->param.tx_rates_def, req); sbuf_printf(&sbuf, "\n\n" "\t" "ENTRY" "\t" "BURST" "\t" "RATE [bit/s]\n" "\t" "--------------------------------------------\n"); MLX5E_RL_RLOCK(rl); for (x = 0; x != rl->param.tx_rates_def; x++) { if (rl->rate_limit_table[x] == 0) continue; sbuf_printf(&sbuf, "\t" "%3u" "\t" "%3u" "\t" "%lld\n", x, (unsigned)rl->param.tx_burst_size, (long long)rl->rate_limit_table[x]); } MLX5E_RL_RUNLOCK(rl); error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); PRIV_UNLOCK(priv); return (error); } static int mlx5e_rl_refresh_channel_params(struct mlx5e_rl_priv_data *rl) { uint64_t x; uint64_t y; MLX5E_RL_WLOCK(rl); /* compute channel parameters once */ mlx5e_rl_build_channel_param(rl, &rl->chan_param); MLX5E_RL_WUNLOCK(rl); for (y = 0; y != rl->param.tx_worker_threads_def; y++) { struct mlx5e_rl_worker *rlw = rl->workers + y; for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) { struct mlx5e_rl_channel *channel; struct mlx5e_sq *sq; channel = rlw->channels + x; sq = channel->sq; if (sq == NULL) continue; if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_mode_modify)) { mlx5_core_modify_cq_moderation_mode(rl->priv->mdev, &sq->cq.mcq, rl->param.tx_coalesce_usecs, rl->param.tx_coalesce_pkts, rl->param.tx_coalesce_mode); } else { mlx5_core_modify_cq_moderation(rl->priv->mdev, &sq->cq.mcq, rl->param.tx_coalesce_usecs, rl->param.tx_coalesce_pkts); } } } return (0); } void mlx5e_rl_refresh_sq_inline(struct mlx5e_rl_priv_data *rl) { uint64_t x; uint64_t y; for (y = 0; y != rl->param.tx_worker_threads_def; y++) { struct mlx5e_rl_worker *rlw = rl->workers + y; for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) { struct mlx5e_rl_channel *channel; struct mlx5e_sq *sq; channel = rlw->channels + x; sq = channel->sq; if (sq == NULL) continue; mtx_lock(&sq->lock); mlx5e_update_sq_inline(sq); mtx_unlock(&sq->lock); } } } static int mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *rl, uint64_t value) { unsigned x; int error; if (value < 1000 || mlx5_rl_is_in_range(rl->priv->mdev, howmany(value, 1000), 0) == 0) return (EINVAL); MLX5E_RL_WLOCK(rl); error = ENOMEM; /* check if rate already exists */ for (x = 0; x != rl->param.tx_rates_def; x++) { if (rl->rate_limit_table[x] != value) continue; error = EEXIST; break; } /* check if there is a free rate entry */ if (x == rl->param.tx_rates_def) { for (x = 0; x != rl->param.tx_rates_def; x++) { if (rl->rate_limit_table[x] != 0) continue; rl->rate_limit_table[x] = value; error = 0; break; } } MLX5E_RL_WUNLOCK(rl); return (error); } static int mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *rl, uint64_t value) { unsigned x; int error; if (value == 0) return (EINVAL); MLX5E_RL_WLOCK(rl); /* check if rate already exists */ for (x = 0; x != rl->param.tx_rates_def; x++) { if (rl->rate_limit_table[x] != value) continue; /* free up rate */ rl->rate_limit_table[x] = 0; break; } /* check if there is a free rate entry */ if (x == rl->param.tx_rates_def) error = ENOENT; else error = 0; MLX5E_RL_WUNLOCK(rl); return (error); } static int mlx5e_rl_sysctl_handler(SYSCTL_HANDLER_ARGS) { struct mlx5e_rl_priv_data *rl = arg1; struct mlx5e_priv *priv = rl->priv; unsigned mode_modify; unsigned was_opened; uint64_t value; uint64_t old; int error; PRIV_LOCK(priv); MLX5E_RL_RLOCK(rl); value = rl->param.arg[arg2]; MLX5E_RL_RUNLOCK(rl); if (req != NULL) { old = value; error = sysctl_handle_64(oidp, &value, 0, req); if (error || req->newptr == NULL || value == rl->param.arg[arg2]) goto done; } else { old = 0; error = 0; } /* check if device is gone */ if (priv->gone) { error = ENXIO; goto done; } was_opened = rl->opened; mode_modify = MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify); switch (MLX5E_RL_PARAMS_INDEX(arg[arg2])) { case MLX5E_RL_PARAMS_INDEX(tx_worker_threads_def): if (value > rl->param.tx_worker_threads_max) value = rl->param.tx_worker_threads_max; else if (value < 1) value = 1; /* store new value */ rl->param.arg[arg2] = value; break; case MLX5E_RL_PARAMS_INDEX(tx_channels_per_worker_def): if (value > rl->param.tx_channels_per_worker_max) value = rl->param.tx_channels_per_worker_max; else if (value < 1) value = 1; /* store new value */ rl->param.arg[arg2] = value; break; case MLX5E_RL_PARAMS_INDEX(tx_rates_def): if (value > rl->param.tx_rates_max) value = rl->param.tx_rates_max; else if (value < 1) value = 1; /* store new value */ rl->param.arg[arg2] = value; break; case MLX5E_RL_PARAMS_INDEX(tx_coalesce_usecs): /* range check */ if (value < 1) value = 0; else if (value > MLX5E_FLD_MAX(cqc, cq_period)) value = MLX5E_FLD_MAX(cqc, cq_period); /* store new value */ rl->param.arg[arg2] = value; /* check to avoid down and up the network interface */ if (was_opened) error = mlx5e_rl_refresh_channel_params(rl); break; case MLX5E_RL_PARAMS_INDEX(tx_coalesce_pkts): /* import TX coal pkts */ if (value < 1) value = 0; else if (value > MLX5E_FLD_MAX(cqc, cq_max_count)) value = MLX5E_FLD_MAX(cqc, cq_max_count); /* store new value */ rl->param.arg[arg2] = value; /* check to avoid down and up the network interface */ if (was_opened) error = mlx5e_rl_refresh_channel_params(rl); break; case MLX5E_RL_PARAMS_INDEX(tx_coalesce_mode): /* network interface must be down */ if (was_opened != 0 && mode_modify == 0) mlx5e_rl_close_workers(priv); /* import TX coalesce mode */ if (value != 0) value = 1; /* store new value */ rl->param.arg[arg2] = value; /* restart network interface, if any */ if (was_opened != 0) { if (mode_modify == 0) mlx5e_rl_open_workers(priv); else error = mlx5e_rl_refresh_channel_params(rl); } break; case MLX5E_RL_PARAMS_INDEX(tx_queue_size): /* network interface must be down */ if (was_opened) mlx5e_rl_close_workers(priv); /* import TX queue size */ if (value < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) value = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); else if (value > priv->params_ethtool.tx_queue_size_max) value = priv->params_ethtool.tx_queue_size_max; /* store actual TX queue size */ value = 1ULL << order_base_2(value); /* store new value */ rl->param.arg[arg2] = value; /* verify TX completion factor */ mlx5e_rl_sync_tx_completion_fact(rl); /* restart network interface, if any */ if (was_opened) mlx5e_rl_open_workers(priv); break; case MLX5E_RL_PARAMS_INDEX(tx_completion_fact): /* network interface must be down */ if (was_opened) mlx5e_rl_close_workers(priv); /* store new value */ rl->param.arg[arg2] = value; /* verify parameter */ mlx5e_rl_sync_tx_completion_fact(rl); /* restart network interface, if any */ if (was_opened) mlx5e_rl_open_workers(priv); break; case MLX5E_RL_PARAMS_INDEX(tx_limit_add): error = mlx5e_rl_tx_limit_add(rl, value); break; case MLX5E_RL_PARAMS_INDEX(tx_limit_clr): error = mlx5e_rl_tx_limit_clr(rl, value); break; case MLX5E_RL_PARAMS_INDEX(tx_allowed_deviation): /* range check */ if (value > rl->param.tx_allowed_deviation_max) value = rl->param.tx_allowed_deviation_max; else if (value < rl->param.tx_allowed_deviation_min) value = rl->param.tx_allowed_deviation_min; MLX5E_RL_WLOCK(rl); rl->param.arg[arg2] = value; MLX5E_RL_WUNLOCK(rl); break; case MLX5E_RL_PARAMS_INDEX(tx_burst_size): /* range check */ if (value > rl->param.tx_burst_size_max) value = rl->param.tx_burst_size_max; else if (value < rl->param.tx_burst_size_min) value = rl->param.tx_burst_size_min; MLX5E_RL_WLOCK(rl); rl->param.arg[arg2] = value; MLX5E_RL_WUNLOCK(rl); break; default: break; } done: PRIV_UNLOCK(priv); return (error); } static void mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, struct sysctl_oid *node, const char *name, const char *desc) { /* * NOTE: In FreeBSD-11 and newer the CTLFLAG_RWTUN flag will * take care of loading default sysctl value from the kernel * environment, if any: */ if (strstr(name, "_max") != 0 || strstr(name, "_min") != 0) { /* read-only SYSCTLs */ SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, name, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); } else { if (strstr(name, "_def") != 0) { #ifdef RATELIMIT_DEBUG /* tunable read-only advanced SYSCTLs */ SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, name, CTLTYPE_U64 | CTLFLAG_RDTUN | CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); #endif } else { /* read-write SYSCTLs */ SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, name, CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); } } } static void mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, struct sysctl_oid *node, const char *name, const char *desc) { /* read-only SYSCTLs */ SYSCTL_ADD_U64(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, name, CTLFLAG_RD, &rl->stats.arg[x], 0, desc); } #else int mlx5e_rl_init(struct mlx5e_priv *priv) { return (0); } void mlx5e_rl_cleanup(struct mlx5e_priv *priv) { /* NOP */ } #endif /* RATELIMIT */ diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c index 0da68fc5ac24..c8dd21f6aadc 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c @@ -1,882 +1,882 @@ /*- * Copyright (c) 2015-2019 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_kern_tls.h" #include "en.h" #include static inline bool mlx5e_do_send_cqe_inline(struct mlx5e_sq *sq) { sq->cev_counter++; /* interleave the CQEs */ if (sq->cev_counter >= sq->cev_factor) { sq->cev_counter = 0; return (true); } return (false); } bool mlx5e_do_send_cqe(struct mlx5e_sq *sq) { return (mlx5e_do_send_cqe_inline(sq)); } void mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt) { u16 pi = sq->pc & sq->wq.sz_m1; struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); if (mlx5e_do_send_cqe_inline(sq)) wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; else wqe->ctrl.fm_ce_se = 0; /* Copy data for doorbell */ memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); sq->mbuf[pi].mbuf = NULL; sq->mbuf[pi].num_bytes = 0; sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->pc += sq->mbuf[pi].num_wqebbs; } #if (__FreeBSD_version >= 1100000) static uint32_t mlx5e_hash_value; static void mlx5e_hash_init(void *arg) { mlx5e_hash_value = m_ether_tcpip_hash_init(); } /* Make kernel call mlx5e_hash_init after the random stack finished initializing */ SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL); #endif static struct mlx5e_sq * mlx5e_select_queue_by_send_tag(struct ifnet *ifp, struct mbuf *mb) { struct m_snd_tag *mb_tag; struct mlx5e_sq *sq; mb_tag = mb->m_pkthdr.snd_tag; #ifdef KERN_TLS top: #endif /* get pointer to sendqueue */ switch (mb_tag->type) { #ifdef RATELIMIT case IF_SND_TAG_TYPE_RATE_LIMIT: sq = container_of(mb_tag, struct mlx5e_rl_channel, tag)->sq; break; #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag; goto top; #endif #endif case IF_SND_TAG_TYPE_UNLIMITED: sq = &container_of(mb_tag, struct mlx5e_channel, tag)->sq[0]; KASSERT((mb_tag->refcount > 0), ("mlx5e_select_queue: Channel refs are zero for unlimited tag")); break; #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS: mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag; goto top; #endif default: sq = NULL; break; } /* check if valid */ if (sq != NULL && READ_ONCE(sq->running) != 0) return (sq); return (NULL); } static struct mlx5e_sq * mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb) { struct mlx5e_priv *priv = ifp->if_softc; struct mlx5e_sq *sq; u32 ch; u32 tc; /* obtain VLAN information if present */ if (mb->m_flags & M_VLANTAG) { tc = (mb->m_pkthdr.ether_vtag >> 13); if (tc >= priv->num_tc) tc = priv->default_vlan_prio; } else { tc = priv->default_vlan_prio; } ch = priv->params.num_channels; /* check if flowid is set */ if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) { #ifdef RSS u32 temp; if (rss_hash2bucket(mb->m_pkthdr.flowid, M_HASHTYPE_GET(mb), &temp) == 0) ch = temp % ch; else #endif ch = (mb->m_pkthdr.flowid % 128) % ch; } else { #if (__FreeBSD_version >= 1100000) ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 | MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch; #else /* * m_ether_tcpip_hash not present in stable, so just * throw unhashed mbufs on queue 0 */ ch = 0; #endif } /* check if send queue is running */ sq = &priv->channel[ch].sq[tc]; if (likely(READ_ONCE(sq->running) != 0)) return (sq); return (NULL); } static inline u16 mlx5e_get_l2_header_size(struct mlx5e_sq *sq, struct mbuf *mb) { struct ether_vlan_header *eh; uint16_t eth_type; int min_inline; eh = mtod(mb, struct ether_vlan_header *); if (unlikely(mb->m_len < ETHER_HDR_LEN)) { goto max_inline; } else if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))) goto max_inline; eth_type = ntohs(eh->evl_proto); min_inline = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { eth_type = ntohs(eh->evl_encap_proto); min_inline = ETHER_HDR_LEN; } switch (eth_type) { case ETHERTYPE_IP: case ETHERTYPE_IPV6: /* * Make sure the TOS(IPv4) or traffic class(IPv6) * field gets inlined. Else the SQ may stall. */ min_inline += 4; break; default: goto max_inline; } /* * m_copydata() will be used on the remaining header which * does not need to reside within the first m_len bytes of * data: */ if (mb->m_pkthdr.len < min_inline) goto max_inline; return (min_inline); max_inline: return (MIN(mb->m_pkthdr.len, sq->max_inline)); } /* * This function parse IPv4 and IPv6 packets looking for TCP and UDP * headers. * * Upon return the pointer at which the "ppth" argument points, is set * to the location of the TCP header. NULL is used if no TCP header is * present. * * The return value indicates the number of bytes from the beginning * of the packet until the first byte after the TCP or UDP header. If * this function returns zero, the parsing failed. */ int mlx5e_get_full_header_size(const struct mbuf *mb, const struct tcphdr **ppth) { const struct ether_vlan_header *eh; const struct tcphdr *th; const struct ip *ip; int ip_hlen, tcp_hlen; const struct ip6_hdr *ip6; uint16_t eth_type; int eth_hdr_len; eh = mtod(mb, const struct ether_vlan_header *); if (unlikely(mb->m_len < ETHER_HDR_LEN)) goto failure; if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))) goto failure; eth_type = ntohs(eh->evl_proto); eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { eth_type = ntohs(eh->evl_encap_proto); eth_hdr_len = ETHER_HDR_LEN; } switch (eth_type) { case ETHERTYPE_IP: ip = (const struct ip *)(mb->m_data + eth_hdr_len); if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip))) goto failure; switch (ip->ip_p) { case IPPROTO_TCP: ip_hlen = ip->ip_hl << 2; eth_hdr_len += ip_hlen; goto tcp_packet; case IPPROTO_UDP: ip_hlen = ip->ip_hl << 2; eth_hdr_len += ip_hlen + 8; th = NULL; goto udp_packet; default: goto failure; } break; case ETHERTYPE_IPV6: ip6 = (const struct ip6_hdr *)(mb->m_data + eth_hdr_len); if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip6))) goto failure; switch (ip6->ip6_nxt) { case IPPROTO_TCP: eth_hdr_len += sizeof(*ip6); goto tcp_packet; case IPPROTO_UDP: eth_hdr_len += sizeof(*ip6) + 8; th = NULL; goto udp_packet; default: goto failure; } break; default: goto failure; } tcp_packet: if (unlikely(mb->m_len < eth_hdr_len + sizeof(*th))) { const struct mbuf *m_th = mb->m_next; if (unlikely(mb->m_len != eth_hdr_len || m_th == NULL || m_th->m_len < sizeof(*th))) goto failure; th = (const struct tcphdr *)(m_th->m_data); } else { th = (const struct tcphdr *)(mb->m_data + eth_hdr_len); } tcp_hlen = th->th_off << 2; eth_hdr_len += tcp_hlen; udp_packet: /* * m_copydata() will be used on the remaining header which * does not need to reside within the first m_len bytes of * data: */ if (unlikely(mb->m_pkthdr.len < eth_hdr_len)) goto failure; if (ppth != NULL) *ppth = th; return (eth_hdr_len); failure: if (ppth != NULL) *ppth = NULL; return (0); } struct mlx5_wqe_dump_seg { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_data_seg data; } __aligned(MLX5_SEND_WQE_BB); CTASSERT(DIV_ROUND_UP(2, MLX5_SEND_WQEBB_NUM_DS) == 1); int mlx5e_sq_dump_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf **mbp) { bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; struct mlx5_wqe_dump_seg *wqe; struct mlx5_wqe_dump_seg *wqe_last; int nsegs; int xsegs; u32 off; u32 msb; int err; int x; struct mbuf *mb; const u32 ds_cnt = 2; u16 pi; const u8 opcode = MLX5_OPCODE_DUMP; /* get pointer to mbuf */ mb = *mbp; /* get producer index */ pi = sq->pc & sq->wq.sz_m1; sq->mbuf[pi].num_bytes = mb->m_pkthdr.len; sq->mbuf[pi].num_wqebbs = 0; /* check number of segments in mbuf */ err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); if (err == EFBIG) { /* update statistics */ sq->stats.defragged++; /* too many mbuf fragments */ mb = m_defrag(*mbp, M_NOWAIT); if (mb == NULL) { mb = *mbp; goto tx_drop; } /* try again */ err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); } if (err != 0) goto tx_drop; /* make sure all mbuf data, if any, is visible to the bus */ bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, BUS_DMASYNC_PREWRITE); /* compute number of real DUMP segments */ msb = sq->priv->params_ethtool.hw_mtu_msb; for (x = xsegs = 0; x != nsegs; x++) xsegs += howmany((u32)segs[x].ds_len, msb); /* check if there are no segments */ if (unlikely(xsegs == 0)) { bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); m_freem(mb); *mbp = NULL; /* safety clear */ return (0); } /* return ENOBUFS if the queue is full */ if (unlikely(!mlx5e_sq_has_room_for(sq, xsegs))) { sq->stats.enobuf++; bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); m_freem(mb); *mbp = NULL; /* safety clear */ return (ENOBUFS); } wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, sq->wq.sz_m1); for (x = 0; x != nsegs; x++) { for (off = 0; off < segs[x].ds_len; off += msb) { u32 len = segs[x].ds_len - off; /* limit length */ if (likely(len > msb)) len = msb; memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); /* fill control segment */ wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); wqe->ctrl.imm = cpu_to_be32(parg->tisn << 8); /* fill data segment */ wqe->data.addr = cpu_to_be64((uint64_t)segs[x].ds_addr + off); wqe->data.lkey = sq->mkey_be; wqe->data.byte_count = cpu_to_be32(len); /* advance to next building block */ if (unlikely(wqe == wqe_last)) wqe = mlx5_wq_cyc_get_wqe(&sq->wq, 0); else wqe++; sq->mbuf[pi].num_wqebbs++; sq->pc++; } } wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, (sq->pc - 1) & sq->wq.sz_m1); /* put in place data fence */ wqe->ctrl.fm_ce_se |= MLX5_FENCE_MODE_INITIATOR_SMALL; /* check if we should generate a completion event */ if (mlx5e_do_send_cqe_inline(sq)) wqe_last->ctrl.fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; /* copy data for doorbell */ memcpy(sq->doorbell.d32, wqe_last, sizeof(sq->doorbell.d32)); /* store pointer to mbuf */ sq->mbuf[pi].mbuf = mb; sq->mbuf[pi].p_refcount = parg->pref; atomic_add_int(parg->pref, 1); /* count all traffic going out */ sq->stats.packets++; sq->stats.bytes += sq->mbuf[pi].num_bytes; *mbp = NULL; /* safety clear */ return (0); tx_drop: sq->stats.dropped++; *mbp = NULL; m_freem(mb); return err; } int mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) { bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; struct mlx5e_xmit_args args = {}; struct mlx5_wqe_data_seg *dseg; struct mlx5e_tx_wqe *wqe; struct ifnet *ifp; int nsegs; int err; int x; struct mbuf *mb; u16 ds_cnt; u16 pi; u8 opcode; #ifdef KERN_TLS top: #endif /* Return ENOBUFS if the queue is full */ if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) { sq->stats.enobuf++; return (ENOBUFS); } /* Align SQ edge with NOPs to avoid WQE wrap around */ pi = ((~sq->pc) & sq->wq.sz_m1); if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { /* Send one multi NOP message instead of many */ mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS); pi = ((~sq->pc) & sq->wq.sz_m1); if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { sq->stats.enobuf++; return (ENOMEM); } } #ifdef KERN_TLS /* Special handling for TLS packets, if any */ switch (mlx5e_sq_tls_xmit(sq, &args, mbp)) { case MLX5E_TLS_LOOP: goto top; case MLX5E_TLS_FAILURE: mb = *mbp; err = ENOMEM; goto tx_drop; case MLX5E_TLS_DEFERRED: return (0); case MLX5E_TLS_CONTINUE: default: break; } #endif /* Setup local variables */ pi = sq->pc & sq->wq.sz_m1; wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); ifp = sq->ifp; memset(wqe, 0, sizeof(*wqe)); /* get pointer to mbuf */ mb = *mbp; /* Send a copy of the frame to the BPF listener, if any */ if (ifp != NULL && ifp->if_bpf != NULL) ETHER_BPF_MTAP(ifp, mb); if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) { wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM; } if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) { wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM; } if (wqe->eth.cs_flags == 0) { sq->stats.csum_offload_none++; } if (mb->m_pkthdr.csum_flags & CSUM_TSO) { u32 payload_len; u32 mss = mb->m_pkthdr.tso_segsz; u32 num_pkts; wqe->eth.mss = cpu_to_be16(mss); opcode = MLX5_OPCODE_LSO; if (args.ihs == 0) args.ihs = mlx5e_get_full_header_size(mb, NULL); if (unlikely(args.ihs == 0)) { err = EINVAL; goto tx_drop; } payload_len = mb->m_pkthdr.len - args.ihs; if (payload_len == 0) num_pkts = 1; else num_pkts = DIV_ROUND_UP(payload_len, mss); sq->mbuf[pi].num_bytes = payload_len + (num_pkts * args.ihs); sq->stats.tso_packets++; sq->stats.tso_bytes += payload_len; } else { opcode = MLX5_OPCODE_SEND; if (args.ihs == 0) { switch (sq->min_inline_mode) { case MLX5_INLINE_MODE_IP: case MLX5_INLINE_MODE_TCP_UDP: args.ihs = mlx5e_get_full_header_size(mb, NULL); if (unlikely(args.ihs == 0)) args.ihs = mlx5e_get_l2_header_size(sq, mb); break; case MLX5_INLINE_MODE_L2: args.ihs = mlx5e_get_l2_header_size(sq, mb); break; case MLX5_INLINE_MODE_NONE: /* FALLTHROUGH */ default: if ((mb->m_flags & M_VLANTAG) != 0 && (sq->min_insert_caps & MLX5E_INSERT_VLAN) != 0) { /* inlining VLAN data is not required */ wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */ wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag); args.ihs = 0; } else if ((mb->m_flags & M_VLANTAG) == 0 && (sq->min_insert_caps & MLX5E_INSERT_NON_VLAN) != 0) { /* inlining non-VLAN data is not required */ args.ihs = 0; } else { /* we are forced to inlining L2 header, if any */ args.ihs = mlx5e_get_l2_header_size(sq, mb); } break; } } sq->mbuf[pi].num_bytes = max_t (unsigned int, mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN); } if (likely(args.ihs == 0)) { /* nothing to inline */ } else if ((mb->m_flags & M_VLANTAG) != 0) { struct ether_vlan_header *eh = (struct ether_vlan_header *) wqe->eth.inline_hdr_start; /* Range checks */ if (unlikely(args.ihs > (sq->max_inline - ETHER_VLAN_ENCAP_LEN))) { if (mb->m_pkthdr.csum_flags & CSUM_TSO) { err = EINVAL; goto tx_drop; } args.ihs = (sq->max_inline - ETHER_VLAN_ENCAP_LEN); } else if (unlikely(args.ihs < ETHER_HDR_LEN)) { err = EINVAL; goto tx_drop; } m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh); m_adj(mb, ETHER_HDR_LEN); /* Insert 4 bytes VLAN tag into data stream */ eh->evl_proto = eh->evl_encap_proto; eh->evl_encap_proto = htons(ETHERTYPE_VLAN); eh->evl_tag = htons(mb->m_pkthdr.ether_vtag); /* Copy rest of header data, if any */ m_copydata(mb, 0, args.ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1)); m_adj(mb, args.ihs - ETHER_HDR_LEN); /* Extend header by 4 bytes */ args.ihs += ETHER_VLAN_ENCAP_LEN; wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs); } else { /* check if inline header size is too big */ if (unlikely(args.ihs > sq->max_inline)) { if (unlikely(mb->m_pkthdr.csum_flags & CSUM_TSO)) { err = EINVAL; goto tx_drop; } args.ihs = sq->max_inline; } m_copydata(mb, 0, args.ihs, wqe->eth.inline_hdr_start); m_adj(mb, args.ihs); wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs); } ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; if (args.ihs > sizeof(wqe->eth.inline_hdr_start)) { ds_cnt += DIV_ROUND_UP(args.ihs - sizeof(wqe->eth.inline_hdr_start), MLX5_SEND_WQE_DS); } dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt; err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); if (err == EFBIG) { /* Update statistics */ sq->stats.defragged++; /* Too many mbuf fragments */ mb = m_defrag(*mbp, M_NOWAIT); if (mb == NULL) { mb = *mbp; goto tx_drop; } /* Try again */ err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); } /* Catch errors */ if (err != 0) goto tx_drop; /* Make sure all mbuf data, if any, is visible to the bus */ if (nsegs != 0) { bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, BUS_DMASYNC_PREWRITE); } else { /* All data was inlined, free the mbuf. */ bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); m_freem(mb); mb = NULL; } for (x = 0; x != nsegs; x++) { if (segs[x].ds_len == 0) continue; dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr); dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len); dseg++; } ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl)); wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); wqe->ctrl.imm = cpu_to_be32(args.tisn << 8); if (mlx5e_do_send_cqe_inline(sq)) wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; else wqe->ctrl.fm_ce_se = 0; /* Copy data for doorbell */ memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); /* Store pointer to mbuf */ sq->mbuf[pi].mbuf = mb; sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->mbuf[pi].p_refcount = args.pref; if (unlikely(args.pref != NULL)) atomic_add_int(args.pref, 1); sq->pc += sq->mbuf[pi].num_wqebbs; /* Count all traffic going out */ sq->stats.packets++; sq->stats.bytes += sq->mbuf[pi].num_bytes; *mbp = NULL; /* safety clear */ return (0); tx_drop: sq->stats.dropped++; *mbp = NULL; m_freem(mb); return err; } static void mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget) { u16 sqcc; /* * sq->cc must be updated only after mlx5_cqwq_update_db_record(), * otherwise a cq overrun may occur */ sqcc = sq->cc; while (budget > 0) { struct mlx5_cqe64 *cqe; struct mbuf *mb; u16 x; u16 ci; cqe = mlx5e_get_cqe(&sq->cq); if (!cqe) break; mlx5_cqwq_pop(&sq->cq.wq); /* update budget according to the event factor */ budget -= sq->cev_factor; for (x = 0; x != sq->cev_factor; x++) { ci = sqcc & sq->wq.sz_m1; mb = sq->mbuf[ci].mbuf; sq->mbuf[ci].mbuf = NULL; if (unlikely(sq->mbuf[ci].p_refcount != NULL)) { atomic_add_int(sq->mbuf[ci].p_refcount, -1); sq->mbuf[ci].p_refcount = NULL; } if (mb == NULL) { if (sq->mbuf[ci].num_bytes == 0) { /* NOP */ sq->stats.nop++; } } else { bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map); /* Free transmitted mbuf */ m_freem(mb); } sqcc += sq->mbuf[ci].num_wqebbs; } } mlx5_cqwq_update_db_record(&sq->cq.wq); /* Ensure cq space is freed before enabling more cqes */ atomic_thread_fence_rel(); sq->cc = sqcc; } static int mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb) { int err = 0; if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || READ_ONCE(sq->running) == 0)) { m_freem(mb); return (ENETDOWN); } /* Do transmit */ if (mlx5e_sq_xmit(sq, &mb) != 0) { /* NOTE: m_freem() is NULL safe */ m_freem(mb); err = ENOBUFS; } /* Check if we need to write the doorbell */ if (likely(sq->doorbell.d64 != 0)) { - mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); + mlx5e_tx_notify_hw(sq, sq->doorbell.d32); sq->doorbell.d64 = 0; } /* * Check if we need to start the event timer which flushes the * transmit ring on timeout: */ if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL && sq->cev_factor != 1)) { /* start the timer */ mlx5e_sq_cev_timeout(sq); } else { /* don't send NOPs yet */ sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS; } return (err); } int mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb) { struct mlx5e_sq *sq; int ret; if (mb->m_pkthdr.csum_flags & CSUM_SND_TAG) { MPASS(mb->m_pkthdr.snd_tag->ifp == ifp); sq = mlx5e_select_queue_by_send_tag(ifp, mb); if (unlikely(sq == NULL)) { goto select_queue; } } else { select_queue: sq = mlx5e_select_queue(ifp, mb); if (unlikely(sq == NULL)) { /* Free mbuf */ m_freem(mb); /* Invalid send queue */ return (ENXIO); } } mtx_lock(&sq->lock); ret = mlx5e_xmit_locked(ifp, sq, mb); mtx_unlock(&sq->lock); return (ret); } void mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused) { struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq); mtx_lock(&sq->comp_lock); mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX); mlx5e_cq_arm(&sq->cq, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock)); mtx_unlock(&sq->comp_lock); } diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib.h b/sys/dev/mlx5/mlx5_ib/mlx5_ib.h index 974d39308c79..b9828f1da466 100644 --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib.h +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib.h @@ -1,1058 +1,1105 @@ /*- * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef MLX5_IB_H #define MLX5_IB_H #include #include #include #include #include #include #include #include #include #include #include #include #define mlx5_ib_dbg(dev, format, arg...) \ pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ __LINE__, current->pid, ##arg) #define mlx5_ib_err(dev, format, arg...) \ pr_err("%s: ERR: %s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ __LINE__, current->pid, ##arg) #define mlx5_ib_warn(dev, format, arg...) \ pr_warn("%s: WARN: %s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ __LINE__, current->pid, ##arg) #define field_avail(type, fld, sz) (offsetof(type, fld) + \ sizeof(((type *)0)->fld) <= (sz)) #define MLX5_IB_DEFAULT_UIDX 0xffffff #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index) enum { MLX5_IB_MMAP_CMD_SHIFT = 8, MLX5_IB_MMAP_CMD_MASK = 0xff, }; enum mlx5_ib_mmap_cmd { MLX5_IB_MMAP_REGULAR_PAGE = 0, MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, MLX5_IB_MMAP_WC_PAGE = 2, MLX5_IB_MMAP_NC_PAGE = 3, /* 5 is chosen in order to be compatible with old versions of libmlx5 */ MLX5_IB_MMAP_CORE_CLOCK = 5, }; enum { MLX5_RES_SCAT_DATA32_CQE = 0x1, MLX5_RES_SCAT_DATA64_CQE = 0x2, MLX5_REQ_SCAT_DATA32_CQE = 0x11, MLX5_REQ_SCAT_DATA64_CQE = 0x22, }; enum mlx5_ib_latency_class { MLX5_IB_LATENCY_CLASS_LOW, MLX5_IB_LATENCY_CLASS_MEDIUM, MLX5_IB_LATENCY_CLASS_HIGH, MLX5_IB_LATENCY_CLASS_FAST_PATH }; enum mlx5_ib_mad_ifc_flags { MLX5_MAD_IFC_IGNORE_MKEY = 1, MLX5_MAD_IFC_IGNORE_BKEY = 2, MLX5_MAD_IFC_NET_VIEW = 4, }; enum { - MLX5_CROSS_CHANNEL_UUAR = 0, + MLX5_CROSS_CHANNEL_BFREG = 0, }; enum { MLX5_CQE_VERSION_V0, MLX5_CQE_VERSION_V1, }; +enum { + MLX5_IB_INVALID_UAR_INDEX = BIT(31), + MLX5_IB_INVALID_BFREG = BIT(31), +}; + struct mlx5_ib_vma_private_data { struct list_head list; struct vm_area_struct *vma; }; +struct mlx5_bfreg_info { + u32 *sys_pages; + int num_low_latency_bfregs; + unsigned int *count; + + /* + * protect bfreg allocation data structs + */ + struct mutex lock; + u32 ver; + u8 lib_uar_4k : 1; + u8 lib_uar_dyn : 1; + u32 num_sys_pages; + u32 num_static_sys_pages; + u32 total_num_bfregs; + u32 num_dyn_bfregs; +}; + struct mlx5_ib_ucontext { struct ib_ucontext ibucontext; struct list_head db_page_list; /* protect doorbell record alloc/free */ struct mutex db_page_mutex; - struct mlx5_uuar_info uuari; + struct mlx5_bfreg_info bfregi; u8 cqe_version; /* Transport Domain number */ u32 tdn; struct list_head vma_private_list; }; static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) { return container_of(ibucontext, struct mlx5_ib_ucontext, ibucontext); } struct mlx5_ib_pd { struct ib_pd ibpd; u32 pdn; }; #define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1) #define MLX5_IB_FLOW_LAST_PRIO (MLX5_BY_PASS_NUM_REGULAR_PRIOS - 1) #if (MLX5_IB_FLOW_LAST_PRIO <= 0) #error "Invalid number of bypass priorities" #endif #define MLX5_IB_FLOW_LEFTOVERS_PRIO (MLX5_IB_FLOW_MCAST_PRIO + 1) #define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1) #define MLX5_IB_NUM_SNIFFER_FTS 2 struct mlx5_ib_flow_prio { struct mlx5_flow_table *flow_table; unsigned int refcount; }; struct mlx5_ib_flow_handler { struct list_head list; struct ib_flow ibflow; struct mlx5_ib_flow_prio *prio; struct mlx5_flow_rule *rule; }; struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; struct mlx5_flow_table *lag_demux_ft; /* Protect flow steering bypass flow tables * when add/del flow rules. * only single add/removal of flow steering rule could be done * simultaneously. */ struct mutex lock; }; /* Use macros here so that don't have to duplicate * enum ib_send_flags and enum ib_qp_type for low-level driver */ #define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START #define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1) #define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2) #define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 3) #define MLX5_IB_SEND_UMR_UPDATE_PD (IB_SEND_RESERVED_START << 4) #define MLX5_IB_SEND_UMR_UPDATE_ACCESS IB_SEND_RESERVED_END #define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1 /* * IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI * creates the actual hardware QP. */ #define MLX5_IB_QPT_HW_GSI IB_QPT_RESERVED2 #define MLX5_IB_WR_UMR IB_WR_RESERVED1 /* Private QP creation flags to be passed in ib_qp_init_attr.create_flags. * * These flags are intended for internal use by the mlx5_ib driver, and they * rely on the range reserved for that use in the ib_qp_create_flags enum. */ - -/* Create a UD QP whose source QP number is 1 */ -static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void) -{ - return IB_QP_CREATE_RESERVED_START; -} +#define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START +#define MLX5_IB_QP_CREATE_WC_TEST (IB_QP_CREATE_RESERVED_START << 1) struct wr_list { u16 opcode; u16 next; }; struct mlx5_ib_wq { u64 *wrid; u32 *wr_data; struct wr_list *w_list; unsigned *wqe_head; u16 unsig_count; /* serialize post to the work queue */ spinlock_t lock; int wqe_cnt; int max_post; int max_gs; int offset; int wqe_shift; unsigned head; unsigned tail; u16 cur_post; u16 last_poll; void *qend; }; struct mlx5_ib_rwq { struct ib_wq ibwq; struct mlx5_core_qp core_qp; u32 rq_num_pas; u32 log_rq_stride; u32 log_rq_size; u32 rq_page_offset; u32 log_page_size; struct ib_umem *umem; size_t buf_size; unsigned int page_shift; int create_type; struct mlx5_db db; u32 user_index; u32 wqe_count; u32 wqe_shift; int wq_sig; }; enum { MLX5_QP_USER, MLX5_QP_KERNEL, MLX5_QP_EMPTY }; enum { MLX5_WQ_USER, MLX5_WQ_KERNEL }; struct mlx5_ib_rwq_ind_table { struct ib_rwq_ind_table ib_rwq_ind_tbl; u32 rqtn; }; /* * Connect-IB can trigger up to four concurrent pagefaults * per-QP. */ enum mlx5_ib_pagefault_context { MLX5_IB_PAGEFAULT_RESPONDER_READ, MLX5_IB_PAGEFAULT_REQUESTOR_READ, MLX5_IB_PAGEFAULT_RESPONDER_WRITE, MLX5_IB_PAGEFAULT_REQUESTOR_WRITE, MLX5_IB_PAGEFAULT_CONTEXTS }; static inline enum mlx5_ib_pagefault_context mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault) { return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE); } struct mlx5_ib_pfault { struct work_struct work; struct mlx5_pagefault mpfault; }; struct mlx5_ib_ubuffer { struct ib_umem *umem; int buf_size; u64 buf_addr; }; struct mlx5_ib_qp_base { struct mlx5_ib_qp *container_mibqp; struct mlx5_core_qp mqp; struct mlx5_ib_ubuffer ubuffer; }; struct mlx5_ib_qp_trans { struct mlx5_ib_qp_base base; u16 xrcdn; u8 alt_port; u8 atomic_rd_en; u8 resp_depth; }; struct mlx5_ib_rss_qp { u32 tirn; }; struct mlx5_ib_rq { struct mlx5_ib_qp_base base; struct mlx5_ib_wq *rq; struct mlx5_ib_ubuffer ubuffer; struct mlx5_db *doorbell; u32 tirn; u8 state; }; struct mlx5_ib_sq { struct mlx5_ib_qp_base base; struct mlx5_ib_wq *sq; struct mlx5_ib_ubuffer ubuffer; struct mlx5_db *doorbell; u32 tisn; u8 state; }; struct mlx5_ib_raw_packet_qp { struct mlx5_ib_sq sq; struct mlx5_ib_rq rq; }; +struct mlx5_bf { + int buf_size; + unsigned long offset; + struct mlx5_sq_bfreg *bfreg; + spinlock_t lock32; +}; + struct mlx5_ib_qp { struct ib_qp ibqp; union { struct mlx5_ib_qp_trans trans_qp; struct mlx5_ib_raw_packet_qp raw_packet_qp; struct mlx5_ib_rss_qp rss_qp; }; struct mlx5_buf buf; struct mlx5_db db; struct mlx5_ib_wq rq; u8 sq_signal_bits; u8 fm_cache; struct mlx5_ib_wq sq; /* serialize qp state modifications */ struct mutex mutex; u32 flags; u8 port; u8 state; int wq_sig; int scat_cqe; int max_inline_data; - struct mlx5_bf *bf; + struct mlx5_bf bf; int has_rq; /* only for user space QPs. For kernel * we have it from the bf object */ - int uuarn; + int bfregn; int create_type; /* Store signature errors */ bool signature_en; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING /* * A flag that is true for QP's that are in a state that doesn't * allow page faults, and shouldn't schedule any more faults. */ int disable_page_faults; /* * The disable_page_faults_lock protects a QP's disable_page_faults * field, allowing for a thread to atomically check whether the QP * allows page faults, and if so schedule a page fault. */ spinlock_t disable_page_faults_lock; struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS]; #endif struct list_head qps_list; struct list_head cq_recv_list; struct list_head cq_send_list; }; struct mlx5_ib_cq_buf { struct mlx5_buf buf; struct ib_umem *umem; int cqe_size; int nent; }; enum mlx5_ib_qp_flags { MLX5_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, MLX5_IB_QP_CROSS_CHANNEL = IB_QP_CREATE_CROSS_CHANNEL, MLX5_IB_QP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND, MLX5_IB_QP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV, MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5, /* QP uses 1 as its source QP number */ MLX5_IB_QP_SQPN_QP1 = 1 << 6, MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7, MLX5_IB_QP_RSS = 1 << 8, }; struct mlx5_umr_wr { struct ib_send_wr wr; union { u64 virt_addr; u64 offset; } target; struct ib_pd *pd; unsigned int page_shift; unsigned int npages; u32 length; int access_flags; u32 mkey; }; static inline struct mlx5_umr_wr *umr_wr(struct ib_send_wr *wr) { return container_of(wr, struct mlx5_umr_wr, wr); } struct mlx5_shared_mr_info { int mr_id; struct ib_umem *umem; }; struct mlx5_ib_cq { struct ib_cq ibcq; struct mlx5_core_cq mcq; struct mlx5_ib_cq_buf buf; struct mlx5_db db; /* serialize access to the CQ */ spinlock_t lock; /* protect resize cq */ struct mutex resize_mutex; struct mlx5_ib_cq_buf *resize_buf; struct ib_umem *resize_umem; int cqe_size; struct list_head list_send_qp; struct list_head list_recv_qp; u32 create_flags; struct list_head wc_list; enum ib_cq_notify_flags notify_flags; struct work_struct notify_work; }; struct mlx5_ib_wc { struct ib_wc wc; struct list_head list; }; struct mlx5_ib_srq { struct ib_srq ibsrq; struct mlx5_core_srq msrq; struct mlx5_buf buf; struct mlx5_db db; u64 *wrid; /* protect SRQ hanlding */ spinlock_t lock; int head; int tail; u16 wqe_ctr; struct ib_umem *umem; /* serialize arming a SRQ */ struct mutex mutex; int wq_sig; }; struct mlx5_ib_xrcd { struct ib_xrcd ibxrcd; u32 xrcdn; }; enum mlx5_ib_mtt_access_flags { MLX5_IB_MTT_READ = (1 << 0), MLX5_IB_MTT_WRITE = (1 << 1), }; #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) struct mlx5_ib_mr { struct ib_mr ibmr; void *descs; dma_addr_t desc_map; int ndescs; int max_descs; int desc_size; int access_mode; struct mlx5_core_mr mmkey; struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; struct list_head list; int order; int umred; int npages; struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; int live; void *descs_alloc; int access_flags; /* Needed for rereg MR */ struct mlx5_async_work cb_work; }; struct mlx5_ib_mw { struct ib_mw ibmw; struct mlx5_core_mr mmkey; }; struct mlx5_ib_umr_context { struct ib_cqe cqe; enum ib_wc_status status; struct completion done; }; struct umr_common { struct ib_pd *pd; struct ib_cq *cq; struct ib_qp *qp; /* control access to UMR QP */ struct semaphore sem; }; enum { MLX5_FMR_INVALID, MLX5_FMR_VALID, MLX5_FMR_BUSY, }; struct mlx5_cache_ent { struct list_head head; /* sync access to the cahce entry */ spinlock_t lock; struct dentry *dir; char name[4]; u32 order; u32 size; u32 cur; u32 miss; u32 limit; struct dentry *fsize; struct dentry *fcur; struct dentry *fmiss; struct dentry *flimit; struct mlx5_ib_dev *dev; struct work_struct work; struct delayed_work dwork; int pending; }; struct mlx5_mr_cache { struct workqueue_struct *wq; struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES]; int stopped; struct dentry *root; unsigned long last_add; }; struct mlx5_ib_gsi_qp; struct mlx5_ib_port_resources { struct mlx5_ib_resources *devr; struct mlx5_ib_gsi_qp *gsi; struct work_struct pkey_change_work; }; struct mlx5_ib_resources { struct ib_cq *c0; struct ib_xrcd *x0; struct ib_xrcd *x1; struct ib_pd *p0; struct ib_srq *s0; struct ib_srq *s1; struct mlx5_ib_port_resources ports[2]; /* Protects changes to the port resources */ struct mutex mutex; }; struct mlx5_ib_port { u16 q_cnt_id; }; struct mlx5_roce { /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL * netdev pointer */ rwlock_t netdev_lock; struct net_device *netdev; struct notifier_block nb; atomic_t next_port; }; #define MLX5_IB_STATS_COUNT(a,b,c,d) a #define MLX5_IB_STATS_VAR(a,b,c,d) b; #define MLX5_IB_STATS_DESC(a,b,c,d) c, d, #define MLX5_IB_CONG_PARAMS(m) \ /* ECN RP */ \ m(+1, u64 rp_clamp_tgt_rate, "rp_clamp_tgt_rate", "If set, whenever a CNP is processed, the target rate is updated to be the current rate") \ m(+1, u64 rp_clamp_tgt_rate_ati, "rp_clamp_tgt_rate_ati", "If set, when receiving a CNP, the target rate should be updated if the transission rate was increased due to the timer, and not only due to the byte counter") \ m(+1, u64 rp_time_reset, "rp_time_reset", "Time in microseconds between rate increases if no CNPs are received") \ m(+1, u64 rp_byte_reset, "rp_byte_reset", "Transmitted data in bytes between rate increases if no CNP's are received. A value of zero means disabled.") \ m(+1, u64 rp_threshold, "rp_threshold", "The number of times rpByteStage or rpTimeStage can count before the RP rate control state machine advances states") \ m(+1, u64 rp_ai_rate, "rp_ai_rate", "The rate, in Mbits per second, used to increase rpTargetRate in the active increase state") \ m(+1, u64 rp_hai_rate, "rp_hai_rate", "The rate, in Mbits per second, used to increase rpTargetRate in the hyper increase state") \ m(+1, u64 rp_min_dec_fac, "rp_min_dec_fac", "The minimum factor by which the current transmit rate can be changed when processing a CNP. Value is given as a percentage, [1 .. 100]") \ m(+1, u64 rp_min_rate, "rp_min_rate", "The minimum value, in Mbps per second, for rate to limit") \ m(+1, u64 rp_rate_to_set_on_first_cnp, "rp_rate_to_set_on_first_cnp", "The rate that is set for the flow when a rate limiter is allocated to it upon first CNP received, in Mbps. A value of zero means use full port speed") \ m(+1, u64 rp_dce_tcp_g, "rp_dce_tcp_g", "Used to update the congestion estimator, alpha, once every dce_tcp_rtt once every dce_tcp_rtt microseconds") \ m(+1, u64 rp_dce_tcp_rtt, "rp_dce_tcp_rtt", "The time between updates of the aolpha value, in microseconds") \ m(+1, u64 rp_rate_reduce_monitor_period, "rp_rate_reduce_monitor_period", "The minimum time between two consecutive rate reductions for a single flow") \ m(+1, u64 rp_initial_alpha_value, "rp_initial_alpha_value", "The initial value of alpha to use when receiving the first CNP for a flow") \ m(+1, u64 rp_gd, "rp_gd", "If a CNP is received, the flow rate is reduced at the beginning of the next rate_reduce_monitor_period interval") \ /* ECN NP */ \ m(+1, u64 np_cnp_dscp, "np_cnp_dscp", "The DiffServ Code Point of the generated CNP for this port") \ m(+1, u64 np_cnp_prio_mode, "np_cnp_prio_mode", "The 802.1p priority value of the generated CNP for this port") \ m(+1, u64 np_cnp_prio, "np_cnp_prio", "The 802.1p priority value of the generated CNP for this port") #define MLX5_IB_CONG_PARAMS_NUM (0 MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_COUNT)) #define MLX5_IB_CONG_STATS(m) \ m(+1, u64 syndrome, "syndrome", "Syndrome number") \ m(+1, u64 rp_cur_flows, "rp_cur_flows", "Number of flows limited") \ m(+1, u64 sum_flows, "sum_flows", "Sum of the number of flows limited over time") \ m(+1, u64 rp_cnp_ignored, "rp_cnp_ignored", "Number of CNPs and CNMs ignored") \ m(+1, u64 rp_cnp_handled, "rp_cnp_handled", "Number of CNPs and CNMs successfully handled") \ m(+1, u64 time_stamp, "time_stamp", "Time stamp in microseconds") \ m(+1, u64 accumulators_period, "accumulators_period", "The value of X variable for accumulating counters") \ m(+1, u64 np_ecn_marked_roce_packets, "np_ecn_marked_roce_packets", "Number of ECN marked packets seen") \ m(+1, u64 np_cnp_sent, "np_cnp_sent", "Number of CNPs sent") #define MLX5_IB_CONG_STATS_NUM (0 MLX5_IB_CONG_STATS(MLX5_IB_STATS_COUNT)) struct mlx5_ib_congestion { struct sysctl_ctx_list ctx; struct sx lock; struct delayed_work dwork; union { u64 arg[1]; struct { MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_VAR) MLX5_IB_CONG_STATS(MLX5_IB_STATS_VAR) }; }; }; struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; struct mlx5_roce roce; MLX5_DECLARE_DOORBELL_LOCK(uar_lock); int num_ports; /* serialize update of capability mask */ struct mutex cap_mask_mutex; bool ib_active; struct umr_common umrc; /* sync used page count stats */ struct mlx5_ib_resources devr; struct mlx5_mr_cache cache; struct timer_list delay_timer; /* Prevents soft lock on massive reg MRs */ struct mutex slow_path_mutex; int fill_delay; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_odp_caps odp_caps; /* * Sleepable RCU that prevents destruction of MRs while they are still * being used by a page fault handler. */ struct srcu_struct mr_srcu; #endif struct mlx5_ib_flow_db flow_db; /* protect resources needed as part of reset flow */ spinlock_t reset_flow_resource_lock; struct list_head qp_list; /* Array with num_ports elements */ struct mlx5_ib_port *port; + struct mlx5_sq_bfreg bfreg; + struct mlx5_sq_bfreg wc_bfreg; + struct mlx5_sq_bfreg fp_bfreg; struct mlx5_ib_congestion congestion; struct mlx5_async_ctx async_ctx; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) { return container_of(mcq, struct mlx5_ib_cq, mcq); } static inline struct mlx5_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd) { return container_of(ibxrcd, struct mlx5_ib_xrcd, ibxrcd); } static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev) { return container_of(ibdev, struct mlx5_ib_dev, ib_dev); } static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq) { return container_of(ibcq, struct mlx5_ib_cq, ibcq); } static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp) { return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp; } static inline struct mlx5_ib_rwq *to_mibrwq(struct mlx5_core_qp *core_qp) { return container_of(core_qp, struct mlx5_ib_rwq, core_qp); } static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmkey) { return container_of(mmkey, struct mlx5_ib_mr, mmkey); } static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) { return container_of(ibpd, struct mlx5_ib_pd, ibpd); } static inline struct mlx5_ib_srq *to_msrq(struct ib_srq *ibsrq) { return container_of(ibsrq, struct mlx5_ib_srq, ibsrq); } static inline struct mlx5_ib_qp *to_mqp(struct ib_qp *ibqp) { return container_of(ibqp, struct mlx5_ib_qp, ibqp); } static inline struct mlx5_ib_rwq *to_mrwq(struct ib_wq *ibwq) { return container_of(ibwq, struct mlx5_ib_rwq, ibwq); } static inline struct mlx5_ib_rwq_ind_table *to_mrwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) { return container_of(ib_rwq_ind_tbl, struct mlx5_ib_rwq_ind_table, ib_rwq_ind_tbl); } static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq) { return container_of(msrq, struct mlx5_ib_srq, msrq); } static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr) { return container_of(ibmr, struct mlx5_ib_mr, ibmr); } static inline struct mlx5_ib_mw *to_mmw(struct ib_mw *ibmw) { return container_of(ibmw, struct mlx5_ib_mw, ibmw); } struct mlx5_ib_ah { struct ib_ah ibah; struct mlx5_av av; }; static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah) { return container_of(ibah, struct mlx5_ib_ah, ibah); } int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, struct mlx5_db *db); void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db); void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); int mlx5_ib_destroy_ah(struct ib_ah *ah); struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr); int mlx5_ib_destroy_srq(struct ib_srq *srq); int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int mlx5_ib_destroy_qp(struct ib_qp *qp); int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, void *buffer, u32 length, struct mlx5_ib_qp_base *base); struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); int mlx5_ib_destroy_cq(struct ib_cq *cq); int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, int zap); int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_pd *pd, struct ib_udata *udata); int mlx5_ib_dereg_mr(struct ib_mr *ibmr); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, struct ib_mad_hdr *out, size_t *out_mad_size, u16 *out_mad_pkey_index); struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd); int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, struct ib_smp *out_mad); int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid); int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev, u16 *max_pkeys); int mlx5_query_mad_ifc_vendor_id(struct ib_device *ibdev, u32 *vendor_id); int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc); int mlx5_query_mad_ifc_node_guid(struct mlx5_ib_dev *dev, __be64 *node_guid); int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey); int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid); int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev); void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev); void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, int *ncont, int *order); void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, size_t offset, size_t num_pages, __be64 *pas, int access_flags); void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, __be64 *pas, int access_flags); void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_destroy_wq(struct ib_wq *wq); int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, struct ib_rwq_ind_table_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING extern struct workqueue_struct *mlx5_ib_page_fault_wq; void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev); void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault); void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp); int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev); void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev); int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp); void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp); void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, unsigned long end); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { return; } static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) {} static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; } static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {} static inline int mlx5_ib_odp_init(void) { return 0; } static inline void mlx5_ib_odp_cleanup(void) {} static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {} static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ int mlx5_ib_get_vf_config(struct ib_device *device, int vf, u8 port, struct ifla_vf_info *info); int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state); int mlx5_ib_get_vf_stats(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats); int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type); __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, int index); int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, int index, enum ib_gid_type *gid_type); /* GSI QP helper functions */ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr); int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp); int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask); int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi); int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc); +void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, + int bfregn); static inline void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; mad->class_version = 1; mad->method = IB_MGMT_METHOD_GET; } static inline u8 convert_access(int acc) { return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | MLX5_PERM_LOCAL_READ; } static inline int is_qp1(enum ib_qp_type qp_type) { return qp_type == MLX5_IB_QPT_HW_GSI; } #define MLX5_MAX_UMR_SHIFT 16 #define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT) static inline u32 check_cq_create_flags(u32 flags) { /* * It returns non-zero value for unsupported CQ * create flags, otherwise it returns zero. */ return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN | IB_CQ_FLAGS_TIMESTAMP_COMPLETION)); } static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx, u32 *user_index) { if (cqe_version) { if ((cmd_uidx == MLX5_IB_DEFAULT_UIDX) || (cmd_uidx & ~MLX5_USER_ASSIGNED_UIDX_MASK)) return -EINVAL; *user_index = cmd_uidx; } else { *user_index = MLX5_IB_DEFAULT_UIDX; } return 0; } static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext, struct mlx5_ib_create_qp *ucmd, int inlen, u32 *user_index) { u8 cqe_version = ucontext->cqe_version; if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) && !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) return 0; if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) != !!cqe_version)) return -EINVAL; return verify_assign_uidx(cqe_version, ucmd->uidx, user_index); } static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext, struct mlx5_ib_create_srq *ucmd, int inlen, u32 *user_index) { u8 cqe_version = ucontext->cqe_version; if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) && !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) return 0; if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) != !!cqe_version)) return -EINVAL; return verify_assign_uidx(cqe_version, ucmd->uidx, user_index); } void mlx5_ib_cleanup_congestion(struct mlx5_ib_dev *); int mlx5_ib_init_congestion(struct mlx5_ib_dev *); +static inline int get_uars_per_sys_page(struct mlx5_ib_dev *dev, bool lib_support) +{ + return lib_support && MLX5_CAP_GEN(dev->mdev, uar_4k) ? + MLX5_UARS_IN_PAGE : 1; +} + +static inline int get_num_static_uars(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi) +{ + return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages; +} + +int bfregn_to_uar_index(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi, u32 bfregn, + bool dyn_bfreg); + #endif /* MLX5_IB_H */ diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_cq.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_cq.c index 78d0518fe2bb..15c33fdee440 100644 --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_cq.c +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_cq.c @@ -1,1403 +1,1411 @@ /*- * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include "mlx5_ib.h" static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe __unused) { struct ib_cq *ibcq = &to_mibcq(cq)->ibcq; ibcq->comp_handler(ibcq, ibcq->cq_context); } static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, int type) { struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq); struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); struct ib_cq *ibcq = &cq->ibcq; struct ib_event event; if (type != MLX5_EVENT_TYPE_CQ_ERROR) { mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n", type, mcq->cqn); return; } if (ibcq->event_handler) { event.device = &dev->ib_dev; event.event = IB_EVENT_CQ_ERR; event.element.cq = ibcq; ibcq->event_handler(&event, ibcq->cq_context); } } static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size) { return mlx5_buf_offset(&buf->buf, n * size); } static void *get_cqe(struct mlx5_ib_cq *cq, int n) { return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz); } static u8 sw_ownership_bit(int n, int nent) { return (n & nent) ? 1 : 0; } static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n) { void *cqe = get_cqe(cq, n & cq->ibcq.cqe); struct mlx5_cqe64 *cqe64; cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) && !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) { return cqe; } else { return NULL; } } static void *next_cqe_sw(struct mlx5_ib_cq *cq) { return get_sw_cqe(cq, cq->mcq.cons_index); } static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx) { switch (wq->wr_data[idx]) { case MLX5_IB_WR_UMR: return 0; case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; case IB_WR_REG_MR: return IB_WC_REG_MR; default: pr_warn("unknown completion status\n"); return 0; } } static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe, struct mlx5_ib_wq *wq, int idx) { wc->wc_flags = 0; switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) { case MLX5_OPCODE_RDMA_WRITE_IMM: wc->wc_flags |= IB_WC_WITH_IMM; case MLX5_OPCODE_RDMA_WRITE: wc->opcode = IB_WC_RDMA_WRITE; break; case MLX5_OPCODE_SEND_IMM: wc->wc_flags |= IB_WC_WITH_IMM; case MLX5_OPCODE_SEND: case MLX5_OPCODE_SEND_INVAL: wc->opcode = IB_WC_SEND; break; case MLX5_OPCODE_RDMA_READ: wc->opcode = IB_WC_RDMA_READ; wc->byte_len = be32_to_cpu(cqe->byte_cnt); break; case MLX5_OPCODE_ATOMIC_CS: wc->opcode = IB_WC_COMP_SWAP; wc->byte_len = 8; break; case MLX5_OPCODE_ATOMIC_FA: wc->opcode = IB_WC_FETCH_ADD; wc->byte_len = 8; break; case MLX5_OPCODE_ATOMIC_MASKED_CS: wc->opcode = IB_WC_MASKED_COMP_SWAP; wc->byte_len = 8; break; case MLX5_OPCODE_ATOMIC_MASKED_FA: wc->opcode = IB_WC_MASKED_FETCH_ADD; wc->byte_len = 8; break; case MLX5_OPCODE_UMR: wc->opcode = get_umr_comp(wq, idx); break; } } enum { MLX5_GRH_IN_BUFFER = 1, MLX5_GRH_IN_CQE = 2, }; static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, struct mlx5_ib_qp *qp) { enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1); struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device); struct mlx5_ib_srq *srq; struct mlx5_ib_wq *wq; u16 wqe_ctr; u8 roce_packet_type; bool vlan_present; u8 g; if (qp->ibqp.srq || qp->ibqp.xrcd) { struct mlx5_core_srq *msrq = NULL; if (qp->ibqp.xrcd) { msrq = mlx5_core_get_srq(dev->mdev, be32_to_cpu(cqe->srqn)); srq = to_mibsrq(msrq); } else { srq = to_msrq(qp->ibqp.srq); } if (srq) { wqe_ctr = be16_to_cpu(cqe->wqe_counter); wc->wr_id = srq->wrid[wqe_ctr]; mlx5_ib_free_srq_wqe(srq, wqe_ctr); if (msrq && atomic_dec_and_test(&msrq->refcount)) complete(&msrq->free); } } else { wq = &qp->rq; wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++wq->tail; } wc->byte_len = be32_to_cpu(cqe->byte_cnt); switch (cqe->op_own >> 4) { case MLX5_CQE_RESP_WR_IMM: wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; wc->wc_flags = IB_WC_WITH_IMM; wc->ex.imm_data = cqe->imm_inval_pkey; break; case MLX5_CQE_RESP_SEND: wc->opcode = IB_WC_RECV; wc->wc_flags = IB_WC_IP_CSUM_OK; if (unlikely(!((cqe->hds_ip_ext & CQE_L3_OK) && (cqe->hds_ip_ext & CQE_L4_OK)))) wc->wc_flags = 0; break; case MLX5_CQE_RESP_SEND_IMM: wc->opcode = IB_WC_RECV; wc->wc_flags = IB_WC_WITH_IMM; wc->ex.imm_data = cqe->imm_inval_pkey; break; case MLX5_CQE_RESP_SEND_INV: wc->opcode = IB_WC_RECV; wc->wc_flags = IB_WC_WITH_INVALIDATE; wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey); break; } wc->slid = be16_to_cpu(cqe->slid); wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff; wc->dlid_path_bits = cqe->ml_path; g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; wc->wc_flags |= g ? IB_WC_GRH : 0; if (unlikely(is_qp1(qp->ibqp.qp_type))) { u16 pkey = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff; ib_find_cached_pkey(&dev->ib_dev, qp->port, pkey, &wc->pkey_index); } else { wc->pkey_index = 0; } if (ll != IB_LINK_LAYER_ETHERNET) { wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf; return; } vlan_present = cqe_has_vlan(cqe); roce_packet_type = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3; if (vlan_present) { wc->vlan_id = (be16_to_cpu(cqe->vlan_info)) & 0xfff; wc->sl = (be16_to_cpu(cqe->vlan_info) >> 13) & 0x7; wc->wc_flags |= IB_WC_WITH_VLAN; } else { wc->sl = 0; } switch (roce_packet_type) { case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH: wc->network_hdr_type = RDMA_NETWORK_IB; break; case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6: wc->network_hdr_type = RDMA_NETWORK_IPV6; break; case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4: wc->network_hdr_type = RDMA_NETWORK_IPV4; break; } wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE; } static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe) { __be32 *p = (__be32 *)cqe; int i; mlx5_ib_warn(dev, "dump error cqe\n"); for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4) pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]), be32_to_cpu(p[1]), be32_to_cpu(p[2]), be32_to_cpu(p[3])); } static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe, struct ib_wc *wc) { int dump = 1; switch (cqe->syndrome) { case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR: wc->status = IB_WC_LOC_LEN_ERR; break; case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR: wc->status = IB_WC_LOC_QP_OP_ERR; break; case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR: wc->status = IB_WC_LOC_PROT_ERR; break; case MLX5_CQE_SYNDROME_WR_FLUSH_ERR: dump = 0; wc->status = IB_WC_WR_FLUSH_ERR; break; case MLX5_CQE_SYNDROME_MW_BIND_ERR: wc->status = IB_WC_MW_BIND_ERR; break; case MLX5_CQE_SYNDROME_BAD_RESP_ERR: wc->status = IB_WC_BAD_RESP_ERR; break; case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR: wc->status = IB_WC_LOC_ACCESS_ERR; break; case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: wc->status = IB_WC_REM_INV_REQ_ERR; break; case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR: wc->status = IB_WC_REM_ACCESS_ERR; break; case MLX5_CQE_SYNDROME_REMOTE_OP_ERR: wc->status = IB_WC_REM_OP_ERR; break; case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: wc->status = IB_WC_RETRY_EXC_ERR; dump = 0; break; case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR: wc->status = IB_WC_RNR_RETRY_EXC_ERR; dump = 0; break; case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR: wc->status = IB_WC_REM_ABORT_ERR; break; default: wc->status = IB_WC_GENERAL_ERR; break; } wc->vendor_err = cqe->vendor_err_synd; if (dump) dump_cqe(dev, cqe); } static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx) { /* TBD: waiting decision */ return 0; } static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx) { struct mlx5_wqe_data_seg *dpseg; void *addr; dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) + sizeof(struct mlx5_wqe_raddr_seg) + sizeof(struct mlx5_wqe_atomic_seg); addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr); return addr; } static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, uint16_t idx) { void *addr; int byte_count; int i; if (!is_atomic_response(qp, idx)) return; byte_count = be32_to_cpu(cqe64->byte_cnt); addr = mlx5_get_atomic_laddr(qp, idx); if (byte_count == 4) { *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr)); } else { for (i = 0; i < byte_count; i += 8) { *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr)); addr += 8; } } return; } static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, u16 tail, u16 head) { u16 idx; do { idx = tail & (qp->sq.wqe_cnt - 1); handle_atomic(qp, cqe64, idx); if (idx == head) break; tail = qp->sq.w_list[idx].next; } while (1); tail = qp->sq.w_list[idx].next; qp->sq.last_poll = tail; } static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) { mlx5_buf_free(dev->mdev, &buf->buf); } static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, struct ib_sig_err *item) { u16 syndrome = be16_to_cpu(cqe->syndrome); #define GUARD_ERR (1 << 13) #define APPTAG_ERR (1 << 12) #define REFTAG_ERR (1 << 11) if (syndrome & GUARD_ERR) { item->err_type = IB_SIG_BAD_GUARD; item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16; item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16; } else if (syndrome & REFTAG_ERR) { item->err_type = IB_SIG_BAD_REFTAG; item->expected = be32_to_cpu(cqe->expected_reftag); item->actual = be32_to_cpu(cqe->actual_reftag); } else if (syndrome & APPTAG_ERR) { item->err_type = IB_SIG_BAD_APPTAG; item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff; item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff; } else { pr_err("Got signature completion error with bad syndrome %04x\n", syndrome); } item->sig_err_offset = be64_to_cpu(cqe->err_offset); item->key = be32_to_cpu(cqe->mkey); } static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc, int *npolled) { struct mlx5_ib_wq *wq; unsigned int cur; unsigned int idx; int np; int i; wq = &qp->sq; cur = wq->head - wq->tail; np = *npolled; if (cur == 0) return; for (i = 0; i < cur && np < num_entries; i++) { idx = wq->last_poll & (wq->wqe_cnt - 1); wc->wr_id = wq->wrid[idx]; wc->status = IB_WC_WR_FLUSH_ERR; wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; wq->tail++; np++; wc->qp = &qp->ibqp; wc++; wq->last_poll = wq->w_list[idx].next; } *npolled = np; } static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc, int *npolled) { struct mlx5_ib_wq *wq; unsigned int cur; int np; int i; wq = &qp->rq; cur = wq->head - wq->tail; np = *npolled; if (cur == 0) return; for (i = 0; i < cur && np < num_entries; i++) { wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; wc->status = IB_WC_WR_FLUSH_ERR; wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; wq->tail++; np++; wc->qp = &qp->ibqp; wc++; } *npolled = np; } static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries, struct ib_wc *wc, int *npolled) { struct mlx5_ib_qp *qp; *npolled = 0; /* Find uncompleted WQEs belonging to that cq and retrun mmics ones */ list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { sw_send_comp(qp, num_entries, wc + *npolled, npolled); if (*npolled >= num_entries) return; } list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) { sw_recv_comp(qp, num_entries, wc + *npolled, npolled); if (*npolled >= num_entries) return; } } static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_ib_qp **cur_qp, struct ib_wc *wc) { struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); struct mlx5_err_cqe *err_cqe; struct mlx5_cqe64 *cqe64; struct mlx5_core_qp *mqp; struct mlx5_ib_wq *wq; struct mlx5_sig_err_cqe *sig_err_cqe; struct mlx5_core_mr *mmkey; struct mlx5_ib_mr *mr; unsigned long flags; uint8_t opcode; uint32_t qpn; u16 wqe_ctr; void *cqe; int idx; repoll: cqe = next_cqe_sw(cq); if (!cqe) return -EAGAIN; cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; ++cq->mcq.cons_index; /* Make sure we read CQ entry contents after we've checked the * ownership bit. */ rmb(); opcode = cqe64->op_own >> 4; if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) { if (likely(cq->resize_buf)) { free_cq_buf(dev, &cq->buf); cq->buf = *cq->resize_buf; kfree(cq->resize_buf); cq->resize_buf = NULL; goto repoll; } else { mlx5_ib_warn(dev, "unexpected resize cqe\n"); } } qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff; if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) { /* We do not have to take the QP table lock here, * because CQs will be locked while QPs are removed * from the table. */ mqp = __mlx5_qp_lookup(dev->mdev, qpn); *cur_qp = to_mibqp(mqp); } wc->qp = &(*cur_qp)->ibqp; switch (opcode) { case MLX5_CQE_REQ: wq = &(*cur_qp)->sq; wqe_ctr = be16_to_cpu(cqe64->wqe_counter); idx = wqe_ctr & (wq->wqe_cnt - 1); handle_good_req(wc, cqe64, wq, idx); handle_atomics(*cur_qp, cqe64, wq->last_poll, idx); wc->wr_id = wq->wrid[idx]; wq->tail = wq->wqe_head[idx] + 1; wc->status = IB_WC_SUCCESS; break; case MLX5_CQE_RESP_WR_IMM: case MLX5_CQE_RESP_SEND: case MLX5_CQE_RESP_SEND_IMM: case MLX5_CQE_RESP_SEND_INV: handle_responder(wc, cqe64, *cur_qp); wc->status = IB_WC_SUCCESS; break; case MLX5_CQE_RESIZE_CQ: break; case MLX5_CQE_REQ_ERR: case MLX5_CQE_RESP_ERR: err_cqe = (struct mlx5_err_cqe *)cqe64; mlx5_handle_error_cqe(dev, err_cqe, wc); mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n", opcode == MLX5_CQE_REQ_ERR ? "Requestor" : "Responder", cq->mcq.cqn); mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n", err_cqe->syndrome, err_cqe->vendor_err_synd); if (opcode == MLX5_CQE_REQ_ERR) { wq = &(*cur_qp)->sq; wqe_ctr = be16_to_cpu(cqe64->wqe_counter); idx = wqe_ctr & (wq->wqe_cnt - 1); wc->wr_id = wq->wrid[idx]; wq->tail = wq->wqe_head[idx] + 1; } else { struct mlx5_ib_srq *srq; if ((*cur_qp)->ibqp.srq) { srq = to_msrq((*cur_qp)->ibqp.srq); wqe_ctr = be16_to_cpu(cqe64->wqe_counter); wc->wr_id = srq->wrid[wqe_ctr]; mlx5_ib_free_srq_wqe(srq, wqe_ctr); } else { wq = &(*cur_qp)->rq; wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++wq->tail; } } break; case MLX5_CQE_SIG_ERR: sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; spin_lock_irqsave(&dev->mdev->priv.mr_table.lock, flags); mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); mr = to_mibmr(mmkey); get_sig_err_item(sig_err_cqe, &mr->sig->err_item); mr->sig->sig_err_exists = true; mr->sig->sigerr_count++; mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n", cq->mcq.cqn, mr->sig->err_item.key, mr->sig->err_item.err_type, (long long)mr->sig->err_item.sig_err_offset, mr->sig->err_item.expected, mr->sig->err_item.actual); spin_unlock_irqrestore(&dev->mdev->priv.mr_table.lock, flags); goto repoll; } return 0; } static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries, struct ib_wc *wc) { struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); struct mlx5_ib_wc *soft_wc, *next; int npolled = 0; list_for_each_entry_safe(soft_wc, next, &cq->wc_list, list) { if (npolled >= num_entries) break; mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n", cq->mcq.cqn); wc[npolled++] = soft_wc->wc; list_del(&soft_wc->list); kfree(soft_wc); } return npolled; } int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) { struct mlx5_ib_cq *cq = to_mcq(ibcq); struct mlx5_ib_qp *cur_qp = NULL; struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); struct mlx5_core_dev *mdev = dev->mdev; unsigned long flags; int soft_polled = 0; int npolled; spin_lock_irqsave(&cq->lock, flags); if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) { mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled); goto out; } if (unlikely(!list_empty(&cq->wc_list))) soft_polled = poll_soft_wc(cq, num_entries, wc); for (npolled = 0; npolled < num_entries - soft_polled; npolled++) { if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled)) break; } if (npolled) mlx5_cq_set_ci(&cq->mcq); out: spin_unlock_irqrestore(&cq->lock, flags); return soft_polled + npolled; } int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev; struct mlx5_ib_cq *cq = to_mcq(ibcq); - void __iomem *uar_page = mdev->priv.uuari.uars[0].map; + void __iomem *uar_page = mdev->priv.uar->map; unsigned long irq_flags; int ret = 0; if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) return -1; spin_lock_irqsave(&cq->lock, irq_flags); if (cq->notify_flags != IB_CQ_NEXT_COMP) cq->notify_flags = flags & IB_CQ_SOLICITED_MASK; if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !list_empty(&cq->wc_list)) ret = 1; spin_unlock_irqrestore(&cq->lock, irq_flags); mlx5_cq_arm(&cq->mcq, (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT, uar_page, MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock), cq->mcq.cons_index); return ret; } static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, int nent, int cqe_size) { int err; err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, 2 * PAGE_SIZE, &buf->buf); if (err) return err; buf->cqe_size = cqe_size; buf->nent = nent; return 0; } static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, struct ib_ucontext *context, struct mlx5_ib_cq *cq, int entries, u32 **cqb, int *cqe_size, int *index, int *inlen) { struct mlx5_ib_create_cq ucmd; size_t ucmdlen; int page_shift; __be64 *pas; int npages; int ncont; void *cqc; int err; - ucmdlen = - (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < - sizeof(ucmd)) ? (sizeof(ucmd) - - sizeof(ucmd.reserved)) : sizeof(ucmd); + ucmdlen = min(udata->inlen, sizeof(ucmd)); + if (ucmdlen < offsetof(struct mlx5_ib_create_cq, flags)) + return -EINVAL; if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) return -EFAULT; - if (ucmdlen == sizeof(ucmd) && - ucmd.reserved != 0) + if ((ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX))) return -EINVAL; if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128) return -EINVAL; *cqe_size = ucmd.cqe_size; cq->buf.umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(cq->buf.umem)) { err = PTR_ERR(cq->buf.umem); return err; } err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr, &cq->db); if (err) goto err_umem; mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift, &ncont, NULL); mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", (long long)ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont; *cqb = mlx5_vzalloc(*inlen); if (!*cqb) { err = -ENOMEM; goto err_db; } pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0); cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); MLX5_SET(cqc, cqc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - *index = to_mucontext(context)->uuari.uars[0].index; + if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) { + *index = ucmd.uar_page_index; + } else if (to_mucontext(context)->bfregi.lib_uar_dyn) { + err = -EINVAL; + goto err_cqb; + } else { + *index = to_mucontext(context)->bfregi.sys_pages[0]; + } return 0; +err_cqb: + kvfree(*cqb); + err_db: mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); err_umem: ib_umem_release(cq->buf.umem); return err; } static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context) { mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); ib_umem_release(cq->buf.umem); } static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf) { int i; void *cqe; struct mlx5_cqe64 *cqe64; for (i = 0; i < buf->nent; i++) { cqe = get_cqe_from_buf(buf, i, buf->cqe_size); cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64; cqe64->op_own = MLX5_CQE_INVALID << 4; } } static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, int entries, int cqe_size, u32 **cqb, int *index, int *inlen) { __be64 *pas; void *cqc; int err; err = mlx5_db_alloc(dev->mdev, &cq->db); if (err) return err; cq->mcq.set_ci_db = cq->db.db; cq->mcq.arm_db = cq->db.db + 1; cq->mcq.cqe_sz = cqe_size; err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size); if (err) goto err_db; init_cq_buf(cq, &cq->buf); *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages; *cqb = mlx5_vzalloc(*inlen); if (!*cqb) { err = -ENOMEM; goto err_buf; } pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); mlx5_fill_page_array(&cq->buf.buf, pas); cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); MLX5_SET(cqc, cqc, log_page_size, cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); - *index = dev->mdev->priv.uuari.uars[0].index; + *index = dev->mdev->priv.uar->index; return 0; err_buf: free_cq_buf(dev, &cq->buf); err_db: mlx5_db_free(dev->mdev, &cq->db); return err; } static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) { free_cq_buf(dev, &cq->buf); mlx5_db_free(dev->mdev, &cq->db); } static void notify_soft_wc_handler(struct work_struct *work) { struct mlx5_ib_cq *cq = container_of(work, struct mlx5_ib_cq, notify_work); cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata) { int entries = attr->cqe; int vector = attr->comp_vector; struct mlx5_ib_dev *dev = to_mdev(ibdev); u32 out[MLX5_ST_SZ_DW(create_cq_out)]; struct mlx5_ib_cq *cq; int uninitialized_var(index); int uninitialized_var(inlen); u32 *cqb = NULL; void *cqc; int cqe_size; unsigned int irqn; int eqn; int err; if (entries < 0 || (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))) return ERR_PTR(-EINVAL); if (check_cq_create_flags(attr->flags)) return ERR_PTR(-EOPNOTSUPP); entries = roundup_pow_of_two(entries + 1); if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) return ERR_PTR(-EINVAL); cq = kzalloc(sizeof(*cq), GFP_KERNEL); if (!cq) return ERR_PTR(-ENOMEM); cq->ibcq.cqe = entries - 1; mutex_init(&cq->resize_mutex); spin_lock_init(&cq->lock); cq->resize_buf = NULL; cq->resize_umem = NULL; cq->create_flags = attr->flags; INIT_LIST_HEAD(&cq->list_send_qp); INIT_LIST_HEAD(&cq->list_recv_qp); if (context) { err = create_cq_user(dev, udata, context, cq, entries, &cqb, &cqe_size, &index, &inlen); if (err) goto err_create; } else { cqe_size = cache_line_size() == 128 ? 128 : 64; err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb, &index, &inlen); if (err) goto err_create; INIT_WORK(&cq->notify_work, notify_soft_wc_handler); } err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn); if (err) goto err_cqb; cq->cqe_size = cqe_size; cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context); MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size)); MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); MLX5_SET(cqc, cqc, uar_page, index); MLX5_SET(cqc, cqc, c_eqn, eqn); MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma); if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN) MLX5_SET(cqc, cqc, oi, 1); err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out)); if (err) goto err_cqb; mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); cq->mcq.irqn = irqn; cq->mcq.comp = mlx5_ib_cq_comp; cq->mcq.event = mlx5_ib_cq_event; INIT_LIST_HEAD(&cq->wc_list); if (context) if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) { err = -EFAULT; goto err_cmd; } kvfree(cqb); return &cq->ibcq; err_cmd: mlx5_core_destroy_cq(dev->mdev, &cq->mcq); err_cqb: kvfree(cqb); if (context) destroy_cq_user(cq, context); else destroy_cq_kernel(dev, cq); err_create: kfree(cq); return ERR_PTR(err); } int mlx5_ib_destroy_cq(struct ib_cq *cq) { struct mlx5_ib_dev *dev = to_mdev(cq->device); struct mlx5_ib_cq *mcq = to_mcq(cq); struct ib_ucontext *context = NULL; if (cq->uobject) context = cq->uobject->context; mlx5_core_destroy_cq(dev->mdev, &mcq->mcq); if (context) destroy_cq_user(mcq, context); else destroy_cq_kernel(dev, mcq); kfree(mcq); return 0; } static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn) { return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff); } void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq) { struct mlx5_cqe64 *cqe64, *dest64; void *cqe, *dest; u32 prod_index; int nfreed = 0; u8 owner_bit; if (!cq) return; /* First we need to find the current producer index, so we * know where to start cleaning from. It doesn't matter if HW * adds new entries after this loop -- the QP we're worried * about is already in RESET, so the new entries won't come * from our QP and therefore don't need to be checked. */ for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); prod_index++) if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe) break; /* Now sweep backwards through the CQ, removing CQ entries * that match our QP by copying older entries on top of them. */ while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; if (is_equal_rsn(cqe64, rsn)) { if (srq && (ntohl(cqe64->srqn) & 0xffffff)) mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter)); ++nfreed; } else if (nfreed) { dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64; owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK; memcpy(dest, cqe, cq->mcq.cqe_sz); dest64->op_own = owner_bit | (dest64->op_own & ~MLX5_CQE_OWNER_MASK); } } if (nfreed) { cq->mcq.cons_index += nfreed; /* Make sure update of buffer contents is done before * updating consumer index. */ wmb(); mlx5_cq_set_ci(&cq->mcq); } } void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq) { if (!cq) return; spin_lock_irq(&cq->lock); __mlx5_ib_cq_clean(cq, qpn, srq); spin_unlock_irq(&cq->lock); } int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) { struct mlx5_ib_dev *dev = to_mdev(cq->device); struct mlx5_ib_cq *mcq = to_mcq(cq); int err; if (!MLX5_CAP_GEN(dev->mdev, cq_moderation)) return -ENOSYS; err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq, cq_period, cq_count); if (err) mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn); return err; } static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, int entries, struct ib_udata *udata, int *npas, int *page_shift, int *cqe_size) { struct mlx5_ib_resize_cq ucmd; struct ib_umem *umem; int err; int npages; struct ib_ucontext *context = cq->buf.umem->context; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (err) return err; if (ucmd.reserved0 || ucmd.reserved1) return -EINVAL; /* check multiplication overflow */ if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1) return -EINVAL; umem = ib_umem_get(context, ucmd.buf_addr, (size_t)ucmd.cqe_size * entries, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(umem)) { err = PTR_ERR(umem); return err; } mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift, npas, NULL); cq->resize_umem = umem; *cqe_size = ucmd.cqe_size; return 0; } static void un_resize_user(struct mlx5_ib_cq *cq) { ib_umem_release(cq->resize_umem); } static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, int entries, int cqe_size) { int err; cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL); if (!cq->resize_buf) return -ENOMEM; err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size); if (err) goto ex; init_cq_buf(cq, cq->resize_buf); return 0; ex: kfree(cq->resize_buf); return err; } static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) { free_cq_buf(dev, cq->resize_buf); cq->resize_buf = NULL; } static int copy_resize_cqes(struct mlx5_ib_cq *cq) { struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); struct mlx5_cqe64 *scqe64; struct mlx5_cqe64 *dcqe64; void *start_cqe; void *scqe; void *dcqe; int ssize; int dsize; int i; u8 sw_own; ssize = cq->buf.cqe_size; dsize = cq->resize_buf->cqe_size; if (ssize != dsize) { mlx5_ib_warn(dev, "resize from different cqe size is not supported\n"); return -EINVAL; } i = cq->mcq.cons_index; scqe = get_sw_cqe(cq, i); scqe64 = ssize == 64 ? scqe : scqe + 64; start_cqe = scqe; if (!scqe) { mlx5_ib_warn(dev, "expected cqe in sw ownership\n"); return -EINVAL; } while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) { dcqe = get_cqe_from_buf(cq->resize_buf, (i + 1) & (cq->resize_buf->nent), dsize); dcqe64 = dsize == 64 ? dcqe : dcqe + 64; sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent); memcpy(dcqe, scqe, dsize); dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own; ++i; scqe = get_sw_cqe(cq, i); scqe64 = ssize == 64 ? scqe : scqe + 64; if (!scqe) { mlx5_ib_warn(dev, "expected cqe in sw ownership\n"); return -EINVAL; } if (scqe == start_cqe) { pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n", cq->mcq.cqn); return -ENOMEM; } } ++cq->mcq.cons_index; return 0; } int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibcq->device); struct mlx5_ib_cq *cq = to_mcq(ibcq); void *cqc; u32 *in; int err; int npas; __be64 *pas; int page_shift; int inlen; int uninitialized_var(cqe_size); unsigned long flags; if (!MLX5_CAP_GEN(dev->mdev, cq_resize)) { pr_info("Firmware does not support resize CQ\n"); return -ENOSYS; } if (entries < 1 || entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) { mlx5_ib_warn(dev, "wrong entries number %d, max %d\n", entries, 1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)); return -EINVAL; } entries = roundup_pow_of_two(entries + 1); if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1) return -EINVAL; if (entries == ibcq->cqe + 1) return 0; mutex_lock(&cq->resize_mutex); if (udata) { err = resize_user(dev, cq, entries, udata, &npas, &page_shift, &cqe_size); } else { cqe_size = 64; err = resize_kernel(dev, cq, entries, cqe_size); if (!err) { npas = cq->resize_buf->buf.npages; page_shift = cq->resize_buf->buf.page_shift; } } if (err) goto ex; inlen = MLX5_ST_SZ_BYTES(modify_cq_in) + MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas; in = mlx5_vzalloc(inlen); if (!in) { err = -ENOMEM; goto ex_resize; } pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas); if (udata) mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, pas, 0); else mlx5_fill_page_array(&cq->resize_buf->buf, pas); MLX5_SET(modify_cq_in, in, modify_field_select_resize_field_select.resize_field_select.resize_field_select, MLX5_MODIFY_CQ_MASK_LOG_SIZE | MLX5_MODIFY_CQ_MASK_PG_OFFSET | MLX5_MODIFY_CQ_MASK_PG_SIZE); cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); MLX5_SET(cqc, cqc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size)); MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE); MLX5_SET(modify_cq_in, in, cqn, cq->mcq.cqn); err = mlx5_core_modify_cq(dev->mdev, &cq->mcq, in, inlen); if (err) goto ex_alloc; if (udata) { cq->ibcq.cqe = entries - 1; ib_umem_release(cq->buf.umem); cq->buf.umem = cq->resize_umem; cq->resize_umem = NULL; } else { struct mlx5_ib_cq_buf tbuf; int resized = 0; spin_lock_irqsave(&cq->lock, flags); if (cq->resize_buf) { err = copy_resize_cqes(cq); if (!err) { tbuf = cq->buf; cq->buf = *cq->resize_buf; kfree(cq->resize_buf); cq->resize_buf = NULL; resized = 1; } } cq->ibcq.cqe = entries - 1; spin_unlock_irqrestore(&cq->lock, flags); if (resized) free_cq_buf(dev, &tbuf); } mutex_unlock(&cq->resize_mutex); kvfree(in); return 0; ex_alloc: kvfree(in); ex_resize: if (udata) un_resize_user(cq); else un_resize_kernel(dev, cq); ex: mutex_unlock(&cq->resize_mutex); return err; } int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq) { struct mlx5_ib_cq *cq; if (!ibcq) return 128; cq = to_mcq(ibcq); return cq->cqe_size; } /* Called from atomic context */ int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc) { struct mlx5_ib_wc *soft_wc; struct mlx5_ib_cq *cq = to_mcq(ibcq); unsigned long flags; soft_wc = kmalloc(sizeof(*soft_wc), GFP_ATOMIC); if (!soft_wc) return -ENOMEM; soft_wc->wc = *wc; spin_lock_irqsave(&cq->lock, flags); list_add_tail(&soft_wc->list, &cq->wc_list); if (cq->notify_flags == IB_CQ_NEXT_COMP || wc->status != IB_WC_SUCCESS) { cq->notify_flags = 0; schedule_work(&cq->notify_work); } spin_unlock_irqrestore(&cq->lock, flags); return 0; } diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_gsi.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_gsi.c index 31d1c11a2d35..6c0417851665 100644 --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_gsi.c +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_gsi.c @@ -1,536 +1,536 @@ /*- * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "mlx5_ib.h" struct mlx5_ib_gsi_wr { struct ib_cqe cqe; struct ib_wc wc; int send_flags; bool completed:1; }; struct mlx5_ib_gsi_qp { struct ib_qp ibqp; struct ib_qp *rx_qp; u8 port_num; struct ib_qp_cap cap; enum ib_sig_type sq_sig_type; /* Serialize qp state modifications */ struct mutex mutex; struct ib_cq *cq; struct mlx5_ib_gsi_wr *outstanding_wrs; u32 outstanding_pi, outstanding_ci; int num_qps; /* Protects access to the tx_qps. Post send operations synchronize * with tx_qp creation in setup_qp(). Also protects the * outstanding_wrs array and indices. */ spinlock_t lock; struct ib_qp **tx_qps; }; static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp) { return container_of(qp, struct mlx5_ib_gsi_qp, ibqp); } static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev) { return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn); } /* Call with gsi->lock locked */ static void generate_completions(struct mlx5_ib_gsi_qp *gsi) { struct ib_cq *gsi_cq = gsi->ibqp.send_cq; struct mlx5_ib_gsi_wr *wr; u32 index; for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; index++) { wr = &gsi->outstanding_wrs[index % gsi->cap.max_send_wr]; if (!wr->completed) break; if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR || wr->send_flags & IB_SEND_SIGNALED) WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc)); wr->completed = false; } gsi->outstanding_ci = index; } static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc) { struct mlx5_ib_gsi_qp *gsi = cq->cq_context; struct mlx5_ib_gsi_wr *wr = container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe); u64 wr_id; unsigned long flags; spin_lock_irqsave(&gsi->lock, flags); wr->completed = true; wr_id = wr->wc.wr_id; wr->wc = *wc; wr->wc.wr_id = wr_id; wr->wc.qp = &gsi->ibqp; generate_completions(gsi); spin_unlock_irqrestore(&gsi->lock, flags); } struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_gsi_qp *gsi; struct ib_qp_init_attr hw_init_attr = *init_attr; const u8 port_num = init_attr->port_num; const int num_pkeys = pd->device->attrs.max_pkeys; const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0; int ret; mlx5_ib_dbg(dev, "creating GSI QP\n"); if (port_num > ARRAY_SIZE(dev->devr.ports) || port_num < 1) { mlx5_ib_warn(dev, "invalid port number %d during GSI QP creation\n", port_num); return ERR_PTR(-EINVAL); } gsi = kzalloc(sizeof(*gsi), GFP_KERNEL); if (!gsi) return ERR_PTR(-ENOMEM); gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL); if (!gsi->tx_qps) { ret = -ENOMEM; goto err_free; } gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr, sizeof(*gsi->outstanding_wrs), GFP_KERNEL); if (!gsi->outstanding_wrs) { ret = -ENOMEM; goto err_free_tx; } mutex_init(&gsi->mutex); mutex_lock(&dev->devr.mutex); if (dev->devr.ports[port_num - 1].gsi) { mlx5_ib_warn(dev, "GSI QP already exists on port %d\n", port_num); ret = -EBUSY; goto err_free_wrs; } gsi->num_qps = num_qps; spin_lock_init(&gsi->lock); gsi->cap = init_attr->cap; gsi->sq_sig_type = init_attr->sq_sig_type; gsi->ibqp.qp_num = 1; gsi->port_num = port_num; gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0, IB_POLL_SOFTIRQ); if (IS_ERR(gsi->cq)) { mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n", PTR_ERR(gsi->cq)); ret = PTR_ERR(gsi->cq); goto err_free_wrs; } hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI; hw_init_attr.send_cq = gsi->cq; if (num_qps) { hw_init_attr.cap.max_send_wr = 0; hw_init_attr.cap.max_send_sge = 0; hw_init_attr.cap.max_inline_data = 0; } gsi->rx_qp = ib_create_qp(pd, &hw_init_attr); if (IS_ERR(gsi->rx_qp)) { mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n", PTR_ERR(gsi->rx_qp)); ret = PTR_ERR(gsi->rx_qp); goto err_destroy_cq; } dev->devr.ports[init_attr->port_num - 1].gsi = gsi; mutex_unlock(&dev->devr.mutex); return &gsi->ibqp; err_destroy_cq: ib_free_cq(gsi->cq); err_free_wrs: mutex_unlock(&dev->devr.mutex); kfree(gsi->outstanding_wrs); err_free_tx: kfree(gsi->tx_qps); err_free: kfree(gsi); return ERR_PTR(ret); } int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp) { struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); const int port_num = gsi->port_num; int qp_index; int ret; mlx5_ib_dbg(dev, "destroying GSI QP\n"); mutex_lock(&dev->devr.mutex); ret = ib_destroy_qp(gsi->rx_qp); if (ret) { mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n", ret); mutex_unlock(&dev->devr.mutex); return ret; } dev->devr.ports[port_num - 1].gsi = NULL; mutex_unlock(&dev->devr.mutex); gsi->rx_qp = NULL; for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) { if (!gsi->tx_qps[qp_index]) continue; WARN_ON_ONCE(ib_destroy_qp(gsi->tx_qps[qp_index])); gsi->tx_qps[qp_index] = NULL; } ib_free_cq(gsi->cq); kfree(gsi->outstanding_wrs); kfree(gsi->tx_qps); kfree(gsi); return 0; } static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi) { struct ib_pd *pd = gsi->rx_qp->pd; struct ib_qp_init_attr init_attr = { .event_handler = gsi->rx_qp->event_handler, .qp_context = gsi->rx_qp->qp_context, .send_cq = gsi->cq, .recv_cq = gsi->rx_qp->recv_cq, .cap = { .max_send_wr = gsi->cap.max_send_wr, .max_send_sge = gsi->cap.max_send_sge, .max_inline_data = gsi->cap.max_inline_data, }, .sq_sig_type = gsi->sq_sig_type, .qp_type = IB_QPT_UD, - .create_flags = mlx5_ib_create_qp_sqpn_qp1(), + .create_flags = MLX5_IB_QP_CREATE_SQPN_QP1, }; return ib_create_qp(pd, &init_attr); } static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp, u16 qp_index) { struct mlx5_ib_dev *dev = to_mdev(qp->device); struct ib_qp_attr attr; int mask; int ret; mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT; attr.qp_state = IB_QPS_INIT; attr.pkey_index = qp_index; attr.qkey = IB_QP1_QKEY; attr.port_num = gsi->port_num; ret = ib_modify_qp(qp, &attr, mask); if (ret) { mlx5_ib_err(dev, "could not change QP%d state to INIT: %d\n", qp->qp_num, ret); return ret; } attr.qp_state = IB_QPS_RTR; ret = ib_modify_qp(qp, &attr, IB_QP_STATE); if (ret) { mlx5_ib_err(dev, "could not change QP%d state to RTR: %d\n", qp->qp_num, ret); return ret; } attr.qp_state = IB_QPS_RTS; attr.sq_psn = 0; ret = ib_modify_qp(qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN); if (ret) { mlx5_ib_err(dev, "could not change QP%d state to RTS: %d\n", qp->qp_num, ret); return ret; } return 0; } static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index) { struct ib_device *device = gsi->rx_qp->device; struct mlx5_ib_dev *dev = to_mdev(device); struct ib_qp *qp; unsigned long flags; u16 pkey; int ret; ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey); if (ret) { mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n", gsi->port_num, qp_index); return; } if (!pkey) { mlx5_ib_dbg(dev, "invalid P_Key at port %d, index %d. Skipping.\n", gsi->port_num, qp_index); return; } spin_lock_irqsave(&gsi->lock, flags); qp = gsi->tx_qps[qp_index]; spin_unlock_irqrestore(&gsi->lock, flags); if (qp) { mlx5_ib_dbg(dev, "already existing GSI TX QP at port %d, index %d. Skipping\n", gsi->port_num, qp_index); return; } qp = create_gsi_ud_qp(gsi); if (IS_ERR(qp)) { mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n", PTR_ERR(qp)); return; } ret = modify_to_rts(gsi, qp, qp_index); if (ret) goto err_destroy_qp; spin_lock_irqsave(&gsi->lock, flags); WARN_ON_ONCE(gsi->tx_qps[qp_index]); gsi->tx_qps[qp_index] = qp; spin_unlock_irqrestore(&gsi->lock, flags); return; err_destroy_qp: WARN_ON_ONCE(qp); } static void setup_qps(struct mlx5_ib_gsi_qp *gsi) { u16 qp_index; for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) setup_qp(gsi, qp_index); } int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask) { struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); int ret; mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state); mutex_lock(&gsi->mutex); ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask); if (ret) { mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret); goto unlock; } if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS) setup_qps(gsi); unlock: mutex_unlock(&gsi->mutex); return ret; } int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); int ret; mutex_lock(&gsi->mutex); ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr); qp_init_attr->cap = gsi->cap; mutex_unlock(&gsi->mutex); return ret; } /* Call with gsi->lock locked */ static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr, struct ib_wc *wc) { struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); struct mlx5_ib_gsi_wr *gsi_wr; if (gsi->outstanding_pi == gsi->outstanding_ci + gsi->cap.max_send_wr) { mlx5_ib_warn(dev, "no available GSI work request.\n"); return -ENOMEM; } gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi % gsi->cap.max_send_wr]; gsi->outstanding_pi++; if (!wc) { memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc)); gsi_wr->wc.pkey_index = wr->pkey_index; gsi_wr->wc.wr_id = wr->wr.wr_id; } else { gsi_wr->wc = *wc; gsi_wr->completed = true; } gsi_wr->cqe.done = &handle_single_completion; wr->wr.wr_cqe = &gsi_wr->cqe; return 0; } /* Call with gsi->lock locked */ static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr) { struct ib_wc wc = { { .wr_id = wr->wr.wr_id }, .status = IB_WC_SUCCESS, .opcode = IB_WC_SEND, .qp = &gsi->ibqp, }; int ret; ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc); if (ret) return ret; generate_completions(gsi); return 0; } /* Call with gsi->lock locked */ static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr) { struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); int qp_index = wr->pkey_index; if (!mlx5_ib_deth_sqpn_cap(dev)) return gsi->rx_qp; if (qp_index >= gsi->num_qps) return NULL; return gsi->tx_qps[qp_index]; } int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); struct ib_qp *tx_qp; unsigned long flags; int ret; for (; wr; wr = wr->next) { struct ib_ud_wr cur_wr = *ud_wr(wr); cur_wr.wr.next = NULL; spin_lock_irqsave(&gsi->lock, flags); tx_qp = get_tx_qp(gsi, &cur_wr); if (!tx_qp) { ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr); if (ret) goto err; spin_unlock_irqrestore(&gsi->lock, flags); continue; } ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL); if (ret) goto err; ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr); if (ret) { /* Undo the effect of adding the outstanding wr */ gsi->outstanding_pi = (gsi->outstanding_pi - 1) % gsi->cap.max_send_wr; goto err; } spin_unlock_irqrestore(&gsi->lock, flags); } return 0; err: spin_unlock_irqrestore(&gsi->lock, flags); *bad_wr = wr; return ret; } int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); return ib_post_recv(gsi->rx_qp, wr, bad_wr); } void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi) { if (!gsi) return; mutex_lock(&gsi->mutex); setup_qps(gsi); mutex_unlock(&gsi->mutex); } diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c index 9f891d5e9599..58f9379e867b 100644 --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c @@ -1,3403 +1,3545 @@ /*- * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #if defined(CONFIG_X86) #include #endif #include #include #include #undef inode #include #include #include #include #include #include #include #include #include #include #include #include "mlx5_ib.h" #define DRIVER_NAME "mlx5ib" #ifndef DRIVER_VERSION #define DRIVER_VERSION "3.6.0" #endif #define DRIVER_RELDATE "December 2020" MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DEPEND(mlx5ib, linuxkpi, 1, 1, 1); MODULE_DEPEND(mlx5ib, mlx5, 1, 1, 1); MODULE_DEPEND(mlx5ib, ibcore, 1, 1, 1); MODULE_VERSION(mlx5ib, 1); static const char mlx5_version[] = DRIVER_NAME ": Mellanox Connect-IB Infiniband driver " DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; enum { MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3, }; static enum rdma_link_layer mlx5_port_type_cap_to_rdma_ll(int port_type_cap) { switch (port_type_cap) { case MLX5_CAP_PORT_TYPE_IB: return IB_LINK_LAYER_INFINIBAND; case MLX5_CAP_PORT_TYPE_ETH: return IB_LINK_LAYER_ETHERNET; default: return IB_LINK_LAYER_UNSPECIFIED; } } static enum rdma_link_layer mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num) { struct mlx5_ib_dev *dev = to_mdev(device); int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type); return mlx5_port_type_cap_to_rdma_ll(port_type_cap); } static bool mlx5_netdev_match(struct net_device *ndev, struct mlx5_core_dev *mdev, const char *dname) { return ndev->if_type == IFT_ETHER && ndev->if_dname != NULL && strcmp(ndev->if_dname, dname) == 0 && ndev->if_softc != NULL && *(struct mlx5_core_dev **)ndev->if_softc == mdev; } static int mlx5_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *ndev = netdev_notifier_info_to_dev(ptr); struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev, roce.nb); switch (event) { case NETDEV_REGISTER: case NETDEV_UNREGISTER: write_lock(&ibdev->roce.netdev_lock); /* check if network interface belongs to mlx5en */ if (mlx5_netdev_match(ndev, ibdev->mdev, "mce")) ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev; write_unlock(&ibdev->roce.netdev_lock); break; case NETDEV_UP: case NETDEV_DOWN: { struct net_device *upper = NULL; if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev)) && ibdev->ib_active) { struct ib_event ibev = {0}; ibev.device = &ibdev->ib_dev; ibev.event = (event == NETDEV_UP) ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; ibev.element.port_num = 1; ib_dispatch_event(&ibev); } break; } default: break; } return NOTIFY_DONE; } static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, u8 port_num) { struct mlx5_ib_dev *ibdev = to_mdev(device); struct net_device *ndev; /* Ensure ndev does not disappear before we invoke dev_hold() */ read_lock(&ibdev->roce.netdev_lock); ndev = ibdev->roce.netdev; if (ndev) dev_hold(ndev); read_unlock(&ibdev->roce.netdev_lock); return ndev; } static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, u8 *active_width) { switch (eth_proto_oper) { case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII): case MLX5E_PROT_MASK(MLX5E_1000BASE_KX): case MLX5E_PROT_MASK(MLX5E_100BASE_TX): case MLX5E_PROT_MASK(MLX5E_1000BASE_T): *active_width = IB_WIDTH_1X; *active_speed = IB_SPEED_SDR; break; case MLX5E_PROT_MASK(MLX5E_10GBASE_T): case MLX5E_PROT_MASK(MLX5E_10GBASE_CX4): case MLX5E_PROT_MASK(MLX5E_10GBASE_KX4): case MLX5E_PROT_MASK(MLX5E_10GBASE_KR): case MLX5E_PROT_MASK(MLX5E_10GBASE_CR): case MLX5E_PROT_MASK(MLX5E_10GBASE_SR): case MLX5E_PROT_MASK(MLX5E_10GBASE_ER_LR): *active_width = IB_WIDTH_1X; *active_speed = IB_SPEED_QDR; break; case MLX5E_PROT_MASK(MLX5E_25GBASE_CR): case MLX5E_PROT_MASK(MLX5E_25GBASE_KR): case MLX5E_PROT_MASK(MLX5E_25GBASE_SR): *active_width = IB_WIDTH_1X; *active_speed = IB_SPEED_EDR; break; case MLX5E_PROT_MASK(MLX5E_40GBASE_CR4): case MLX5E_PROT_MASK(MLX5E_40GBASE_KR4): case MLX5E_PROT_MASK(MLX5E_40GBASE_SR4): case MLX5E_PROT_MASK(MLX5E_40GBASE_LR4_ER4): *active_width = IB_WIDTH_4X; *active_speed = IB_SPEED_QDR; break; case MLX5E_PROT_MASK(MLX5E_50GBASE_CR2): case MLX5E_PROT_MASK(MLX5E_50GBASE_KR2): case MLX5E_PROT_MASK(MLX5E_50GBASE_KR4): case MLX5E_PROT_MASK(MLX5E_50GBASE_SR2): *active_width = IB_WIDTH_1X; *active_speed = IB_SPEED_HDR; break; case MLX5E_PROT_MASK(MLX5E_56GBASE_R4): *active_width = IB_WIDTH_4X; *active_speed = IB_SPEED_FDR; break; case MLX5E_PROT_MASK(MLX5E_100GBASE_CR4): case MLX5E_PROT_MASK(MLX5E_100GBASE_SR4): case MLX5E_PROT_MASK(MLX5E_100GBASE_KR4): case MLX5E_PROT_MASK(MLX5E_100GBASE_LR4): *active_width = IB_WIDTH_4X; *active_speed = IB_SPEED_EDR; break; default: *active_width = IB_WIDTH_4X; *active_speed = IB_SPEED_QDR; return -EINVAL; } return 0; } static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed, u8 *active_width) { switch (eth_proto_oper) { case MLX5E_PROT_MASK(MLX5E_SGMII_100M): case MLX5E_PROT_MASK(MLX5E_1000BASE_X_SGMII): *active_width = IB_WIDTH_1X; *active_speed = IB_SPEED_SDR; break; case MLX5E_PROT_MASK(MLX5E_5GBASE_R): *active_width = IB_WIDTH_1X; *active_speed = IB_SPEED_DDR; break; case MLX5E_PROT_MASK(MLX5E_10GBASE_XFI_XAUI_1): *active_width = IB_WIDTH_1X; *active_speed = IB_SPEED_QDR; break; case MLX5E_PROT_MASK(MLX5E_40GBASE_XLAUI_4_XLPPI_4): *active_width = IB_WIDTH_4X; *active_speed = IB_SPEED_QDR; break; case MLX5E_PROT_MASK(MLX5E_25GAUI_1_25GBASE_CR_KR): *active_width = IB_WIDTH_1X; *active_speed = IB_SPEED_EDR; break; case MLX5E_PROT_MASK(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2): *active_width = IB_WIDTH_2X; *active_speed = IB_SPEED_EDR; break; case MLX5E_PROT_MASK(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR): *active_width = IB_WIDTH_1X; *active_speed = IB_SPEED_HDR; break; case MLX5E_PROT_MASK(MLX5E_CAUI_4_100GBASE_CR4_KR4): *active_width = IB_WIDTH_4X; *active_speed = IB_SPEED_EDR; break; case MLX5E_PROT_MASK(MLX5E_100GAUI_2_100GBASE_CR2_KR2): *active_width = IB_WIDTH_2X; *active_speed = IB_SPEED_HDR; break; case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4): *active_width = IB_WIDTH_4X; *active_speed = IB_SPEED_HDR; break; default: *active_width = IB_WIDTH_4X; *active_speed = IB_SPEED_QDR; return -EINVAL; } return 0; } static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, struct ib_port_attr *props) { struct mlx5_ib_dev *dev = to_mdev(device); u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {}; struct net_device *ndev; enum ib_mtu ndev_ib_mtu; u16 qkey_viol_cntr; u32 eth_prot_oper; bool ext; int err; memset(props, 0, sizeof(*props)); /* Possible bad flows are checked before filling out props so in case * of an error it will still be zeroed out. */ err = mlx5_query_port_ptys(dev->mdev, out, sizeof(out), MLX5_PTYS_EN, port_num); if (err) return err; ext = MLX5_CAP_PCAM_FEATURE(dev->mdev, ptys_extended_ethernet); eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); if (ext) translate_eth_ext_proto_oper(eth_prot_oper, &props->active_speed, &props->active_width); else translate_eth_proto_oper(eth_prot_oper, &props->active_speed, &props->active_width); props->port_cap_flags |= IB_PORT_CM_SUP; props->port_cap_flags |= IB_PORT_IP_BASED_GIDS; props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, roce_address_table_size); props->max_mtu = IB_MTU_4096; props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg); props->pkey_tbl_len = 1; props->state = IB_PORT_DOWN; props->phys_state = 3; mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr); props->qkey_viol_cntr = qkey_viol_cntr; ndev = mlx5_ib_get_netdev(device, port_num); if (!ndev) return 0; if (netif_running(ndev) && netif_carrier_ok(ndev)) { props->state = IB_PORT_ACTIVE; props->phys_state = 5; } ndev_ib_mtu = iboe_get_mtu(ndev->if_mtu); dev_put(ndev); props->active_mtu = min(props->max_mtu, ndev_ib_mtu); return 0; } static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid, const struct ib_gid_attr *attr, void *mlx5_addr) { #define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v) char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr, source_l3_address); void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr, source_mac_47_32); u16 vlan_id; if (!gid) return; ether_addr_copy(mlx5_addr_mac, IF_LLADDR(attr->ndev)); vlan_id = rdma_vlan_dev_vlan_id(attr->ndev); if (vlan_id != 0xffff) { MLX5_SET_RA(mlx5_addr, vlan_valid, 1); MLX5_SET_RA(mlx5_addr, vlan_id, vlan_id); } switch (attr->gid_type) { case IB_GID_TYPE_IB: MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1); break; case IB_GID_TYPE_ROCE_UDP_ENCAP: MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2); break; default: WARN_ON(true); } if (attr->gid_type != IB_GID_TYPE_IB) { if (ipv6_addr_v4mapped((void *)gid)) MLX5_SET_RA(mlx5_addr, roce_l3_type, MLX5_ROCE_L3_TYPE_IPV4); else MLX5_SET_RA(mlx5_addr, roce_l3_type, MLX5_ROCE_L3_TYPE_IPV6); } if ((attr->gid_type == IB_GID_TYPE_IB) || !ipv6_addr_v4mapped((void *)gid)) memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid)); else memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4); } static int set_roce_addr(struct ib_device *device, u8 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr) { struct mlx5_ib_dev *dev = to_mdev(device); u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0}; u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0}; void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address); enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num); if (ll != IB_LINK_LAYER_ETHERNET) return -EINVAL; ib_gid_to_mlx5_roce_addr(gid, attr, in_addr); MLX5_SET(set_roce_address_in, in, roce_address_index, index); MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS); return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr, __always_unused void **context) { return set_roce_addr(device, port_num, index, gid, attr); } static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num, unsigned int index, __always_unused void **context) { return set_roce_addr(device, port_num, index, NULL, NULL); } __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, int index) { struct ib_gid_attr attr; union ib_gid gid; if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr)) return 0; if (!attr.ndev) return 0; dev_put(attr.ndev); if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) return 0; return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port)); } int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, int index, enum ib_gid_type *gid_type) { struct ib_gid_attr attr; union ib_gid gid; int ret; ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr); if (ret) return ret; if (!attr.ndev) return -ENODEV; dev_put(attr.ndev); *gid_type = attr.gid_type; return 0; } static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) return !MLX5_CAP_GEN(dev->mdev, ib_virt); return 0; } enum { MLX5_VPORT_ACCESS_METHOD_MAD, MLX5_VPORT_ACCESS_METHOD_HCA, MLX5_VPORT_ACCESS_METHOD_NIC, }; static int mlx5_get_vport_access_method(struct ib_device *ibdev) { if (mlx5_use_mad_ifc(to_mdev(ibdev))) return MLX5_VPORT_ACCESS_METHOD_MAD; if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) return MLX5_VPORT_ACCESS_METHOD_NIC; return MLX5_VPORT_ACCESS_METHOD_HCA; } static void get_atomic_caps(struct mlx5_ib_dev *dev, struct ib_device_attr *props) { u8 tmp; u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations); u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp); u8 atomic_req_8B_endianness_mode = MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode); /* Check if HW supports 8 bytes standard atomic operations and capable * of host endianness respond */ tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD; if (((atomic_operations & tmp) == tmp) && (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) && (atomic_req_8B_endianness_mode)) { props->atomic_cap = IB_ATOMIC_HCA; } else { props->atomic_cap = IB_ATOMIC_NONE; } } static int mlx5_query_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid) { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; u64 tmp; int err; switch (mlx5_get_vport_access_method(ibdev)) { case MLX5_VPORT_ACCESS_METHOD_MAD: return mlx5_query_mad_ifc_system_image_guid(ibdev, sys_image_guid); case MLX5_VPORT_ACCESS_METHOD_HCA: err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp); break; case MLX5_VPORT_ACCESS_METHOD_NIC: err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp); break; default: return -EINVAL; } if (!err) *sys_image_guid = cpu_to_be64(tmp); return err; } static int mlx5_query_max_pkeys(struct ib_device *ibdev, u16 *max_pkeys) { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; switch (mlx5_get_vport_access_method(ibdev)) { case MLX5_VPORT_ACCESS_METHOD_MAD: return mlx5_query_mad_ifc_max_pkeys(ibdev, max_pkeys); case MLX5_VPORT_ACCESS_METHOD_HCA: case MLX5_VPORT_ACCESS_METHOD_NIC: *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size)); return 0; default: return -EINVAL; } } static int mlx5_query_vendor_id(struct ib_device *ibdev, u32 *vendor_id) { struct mlx5_ib_dev *dev = to_mdev(ibdev); switch (mlx5_get_vport_access_method(ibdev)) { case MLX5_VPORT_ACCESS_METHOD_MAD: return mlx5_query_mad_ifc_vendor_id(ibdev, vendor_id); case MLX5_VPORT_ACCESS_METHOD_HCA: case MLX5_VPORT_ACCESS_METHOD_NIC: return mlx5_core_query_vendor_id(dev->mdev, vendor_id); default: return -EINVAL; } } static int mlx5_query_node_guid(struct mlx5_ib_dev *dev, __be64 *node_guid) { u64 tmp; int err; switch (mlx5_get_vport_access_method(&dev->ib_dev)) { case MLX5_VPORT_ACCESS_METHOD_MAD: return mlx5_query_mad_ifc_node_guid(dev, node_guid); case MLX5_VPORT_ACCESS_METHOD_HCA: err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp); break; case MLX5_VPORT_ACCESS_METHOD_NIC: err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp); break; default: return -EINVAL; } if (!err) *node_guid = cpu_to_be64(tmp); return err; } struct mlx5_reg_node_desc { u8 desc[IB_DEVICE_NODE_DESC_MAX]; }; static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc) { struct mlx5_reg_node_desc in; if (mlx5_use_mad_ifc(dev)) return mlx5_query_mad_ifc_node_desc(dev, node_desc); memset(&in, 0, sizeof(in)); return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc, sizeof(struct mlx5_reg_node_desc), MLX5_REG_NODE_DESC, 0, 0); } static int mlx5_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; int err = -ENOMEM; int max_sq_desc; int max_rq_sg; int max_sq_sg; u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); struct mlx5_ib_query_device_resp resp = {}; size_t resp_len; u64 max_tso; resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length); if (uhw->outlen && uhw->outlen < resp_len) return -EINVAL; else resp.response_length = resp_len; if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) return -EINVAL; memset(props, 0, sizeof(*props)); err = mlx5_query_system_image_guid(ibdev, &props->sys_image_guid); if (err) return err; err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys); if (err) return err; err = mlx5_query_vendor_id(ibdev, &props->vendor_id); if (err) return err; props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) | ((u32)fw_rev_min(dev->mdev) << 16) | fw_rev_sub(dev->mdev); props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN; if (MLX5_CAP_GEN(mdev, pkv)) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; if (MLX5_CAP_GEN(mdev, qkv)) props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; if (MLX5_CAP_GEN(mdev, apm)) props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; if (MLX5_CAP_GEN(mdev, xrc)) props->device_cap_flags |= IB_DEVICE_XRC; if (MLX5_CAP_GEN(mdev, imaicl)) { props->device_cap_flags |= IB_DEVICE_MEM_WINDOW | IB_DEVICE_MEM_WINDOW_TYPE_2B; props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); /* We support 'Gappy' memory registration too */ props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG; } props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (MLX5_CAP_GEN(mdev, sho)) { props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; /* At this stage no support for signature handover */ props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | IB_PROT_T10DIF_TYPE_2 | IB_PROT_T10DIF_TYPE_3; props->sig_guard_cap = IB_GUARD_T10DIF_CRC | IB_GUARD_T10DIF_CSUM; } if (MLX5_CAP_GEN(mdev, block_lb_mc)) props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) { if (MLX5_CAP_ETH(mdev, csum_cap)) props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; if (field_avail(typeof(resp), tso_caps, uhw->outlen)) { max_tso = MLX5_CAP_ETH(mdev, max_lso_cap); if (max_tso) { resp.tso_caps.max_tso = 1 << max_tso; resp.tso_caps.supported_qpts |= 1 << IB_QPT_RAW_PACKET; resp.response_length += sizeof(resp.tso_caps); } } if (field_avail(typeof(resp), rss_caps, uhw->outlen)) { resp.rss_caps.rx_hash_function = MLX5_RX_HASH_FUNC_TOEPLITZ; resp.rss_caps.rx_hash_fields_mask = MLX5_RX_HASH_SRC_IPV4 | MLX5_RX_HASH_DST_IPV4 | MLX5_RX_HASH_SRC_IPV6 | MLX5_RX_HASH_DST_IPV6 | MLX5_RX_HASH_SRC_PORT_TCP | MLX5_RX_HASH_DST_PORT_TCP | MLX5_RX_HASH_SRC_PORT_UDP | MLX5_RX_HASH_DST_PORT_UDP; resp.response_length += sizeof(resp.rss_caps); } } else { if (field_avail(typeof(resp), tso_caps, uhw->outlen)) resp.response_length += sizeof(resp.tso_caps); if (field_avail(typeof(resp), rss_caps, uhw->outlen)) resp.response_length += sizeof(resp.rss_caps); } if (MLX5_CAP_GEN(mdev, ipoib_ipoib_offloads)) { props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; props->device_cap_flags |= IB_DEVICE_UD_TSO; } if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && MLX5_CAP_ETH(dev->mdev, scatter_fcs)) props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS; if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; props->vendor_part_id = mdev->pdev->device; props->hw_ver = mdev->pdev->revision; props->max_mr_size = ~0ull; props->page_size_cap = ~(min_page_size - 1); props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp); props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / sizeof(struct mlx5_wqe_data_seg); max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512); max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) - sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg); props->max_sge = min(max_rq_sg, max_sq_sg); props->max_sge_rd = MLX5_MAX_SGE_RD; props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd); props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp); props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp); props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq); props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1; props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay); props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq_sge = max_rq_sg - 1; props->max_fast_reg_page_list_len = 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size); get_atomic_caps(dev, props); props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz); props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING if (MLX5_CAP_GEN(mdev, pg)) props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; props->odp_caps = dev->odp_caps; #endif if (MLX5_CAP_GEN(mdev, cd)) props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; if (!mlx5_core_is_pf(mdev)) props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) { props->rss_caps.max_rwq_indirection_tables = 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt); props->rss_caps.max_rwq_indirection_table_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt_size); props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET; props->max_wq_type_rq = 1 << MLX5_CAP_GEN(dev->mdev, log_max_rq); } if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); if (err) return err; } return 0; } enum mlx5_ib_width { MLX5_IB_WIDTH_1X = 1 << 0, MLX5_IB_WIDTH_2X = 1 << 1, MLX5_IB_WIDTH_4X = 1 << 2, MLX5_IB_WIDTH_8X = 1 << 3, MLX5_IB_WIDTH_12X = 1 << 4 }; static int translate_active_width(struct ib_device *ibdev, u8 active_width, u8 *ib_width) { struct mlx5_ib_dev *dev = to_mdev(ibdev); int err = 0; if (active_width & MLX5_IB_WIDTH_1X) { *ib_width = IB_WIDTH_1X; } else if (active_width & MLX5_IB_WIDTH_2X) { *ib_width = IB_WIDTH_2X; } else if (active_width & MLX5_IB_WIDTH_4X) { *ib_width = IB_WIDTH_4X; } else if (active_width & MLX5_IB_WIDTH_8X) { *ib_width = IB_WIDTH_8X; } else if (active_width & MLX5_IB_WIDTH_12X) { *ib_width = IB_WIDTH_12X; } else { mlx5_ib_dbg(dev, "Invalid active_width %d\n", (int)active_width); err = -EINVAL; } return err; } enum ib_max_vl_num { __IB_MAX_VL_0 = 1, __IB_MAX_VL_0_1 = 2, __IB_MAX_VL_0_3 = 3, __IB_MAX_VL_0_7 = 4, __IB_MAX_VL_0_14 = 5, }; enum mlx5_vl_hw_cap { MLX5_VL_HW_0 = 1, MLX5_VL_HW_0_1 = 2, MLX5_VL_HW_0_2 = 3, MLX5_VL_HW_0_3 = 4, MLX5_VL_HW_0_4 = 5, MLX5_VL_HW_0_5 = 6, MLX5_VL_HW_0_6 = 7, MLX5_VL_HW_0_7 = 8, MLX5_VL_HW_0_14 = 15 }; static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap, u8 *max_vl_num) { switch (vl_hw_cap) { case MLX5_VL_HW_0: *max_vl_num = __IB_MAX_VL_0; break; case MLX5_VL_HW_0_1: *max_vl_num = __IB_MAX_VL_0_1; break; case MLX5_VL_HW_0_3: *max_vl_num = __IB_MAX_VL_0_3; break; case MLX5_VL_HW_0_7: *max_vl_num = __IB_MAX_VL_0_7; break; case MLX5_VL_HW_0_14: *max_vl_num = __IB_MAX_VL_0_14; break; default: return -EINVAL; } return 0; } static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; u32 *rep; int replen = MLX5_ST_SZ_BYTES(query_hca_vport_context_out); struct mlx5_ptys_reg *ptys; struct mlx5_pmtu_reg *pmtu; struct mlx5_pvlc_reg pvlc; void *ctx; int err; rep = mlx5_vzalloc(replen); ptys = kzalloc(sizeof(*ptys), GFP_KERNEL); pmtu = kzalloc(sizeof(*pmtu), GFP_KERNEL); if (!rep || !ptys || !pmtu) { err = -ENOMEM; goto out; } memset(props, 0, sizeof(*props)); err = mlx5_query_hca_vport_context(mdev, port, 0, rep, replen); if (err) goto out; ctx = MLX5_ADDR_OF(query_hca_vport_context_out, rep, hca_vport_context); props->lid = MLX5_GET(hca_vport_context, ctx, lid); props->lmc = MLX5_GET(hca_vport_context, ctx, lmc); props->sm_lid = MLX5_GET(hca_vport_context, ctx, sm_lid); props->sm_sl = MLX5_GET(hca_vport_context, ctx, sm_sl); props->state = MLX5_GET(hca_vport_context, ctx, vport_state); props->phys_state = MLX5_GET(hca_vport_context, ctx, port_physical_state); props->port_cap_flags = MLX5_GET(hca_vport_context, ctx, cap_mask1); props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size)); props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size)); props->bad_pkey_cntr = MLX5_GET(hca_vport_context, ctx, pkey_violation_counter); props->qkey_viol_cntr = MLX5_GET(hca_vport_context, ctx, qkey_violation_counter); props->subnet_timeout = MLX5_GET(hca_vport_context, ctx, subnet_timeout); props->init_type_reply = MLX5_GET(hca_vport_context, ctx, init_type_reply); props->grh_required = MLX5_GET(hca_vport_context, ctx, grh_required); ptys->proto_mask |= MLX5_PTYS_IB; ptys->local_port = port; err = mlx5_core_access_ptys(mdev, ptys, 0); if (err) goto out; err = translate_active_width(ibdev, ptys->ib_link_width_oper, &props->active_width); if (err) goto out; props->active_speed = (u8)ptys->ib_proto_oper; pmtu->local_port = port; err = mlx5_core_access_pmtu(mdev, pmtu, 0); if (err) goto out; props->max_mtu = pmtu->max_mtu; props->active_mtu = pmtu->oper_mtu; memset(&pvlc, 0, sizeof(pvlc)); pvlc.local_port = port; err = mlx5_core_access_pvlc(mdev, &pvlc, 0); if (err) goto out; err = translate_max_vl_num(ibdev, pvlc.vl_hw_cap, &props->max_vl_num); out: kvfree(rep); kfree(ptys); kfree(pmtu); return err; } int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { switch (mlx5_get_vport_access_method(ibdev)) { case MLX5_VPORT_ACCESS_METHOD_MAD: return mlx5_query_mad_ifc_port(ibdev, port, props); case MLX5_VPORT_ACCESS_METHOD_HCA: return mlx5_query_hca_port(ibdev, port, props); case MLX5_VPORT_ACCESS_METHOD_NIC: return mlx5_query_port_roce(ibdev, port, props); default: return -EINVAL; } } static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; switch (mlx5_get_vport_access_method(ibdev)) { case MLX5_VPORT_ACCESS_METHOD_MAD: return mlx5_query_mad_ifc_gids(ibdev, port, index, gid); case MLX5_VPORT_ACCESS_METHOD_HCA: return mlx5_query_hca_vport_gid(mdev, port, 0, index, gid); default: return -EINVAL; } } static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; switch (mlx5_get_vport_access_method(ibdev)) { case MLX5_VPORT_ACCESS_METHOD_MAD: return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey); case MLX5_VPORT_ACCESS_METHOD_HCA: case MLX5_VPORT_ACCESS_METHOD_NIC: return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index, pkey); default: return -EINVAL; } } static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask, struct ib_device_modify *props) { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_reg_node_desc in; struct mlx5_reg_node_desc out; int err; if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) return -EOPNOTSUPP; if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) return 0; /* * If possible, pass node desc to FW, so it can generate * a 144 trap. If cmd fails, just ignore. */ memcpy(&in, props->node_desc, IB_DEVICE_NODE_DESC_MAX); err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out, sizeof(out), MLX5_REG_NODE_DESC, 0, 1); if (err) return err; memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX); return err; } static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct ib_port_attr attr; u32 tmp; int err; /* * CM layer calls ib_modify_port() regardless of the link * layer. For Ethernet ports, qkey violation and Port * capabilities are meaningless. */ if (mlx5_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_ETHERNET) return 0; mutex_lock(&dev->cap_mask_mutex); err = mlx5_ib_query_port(ibdev, port, &attr); if (err) goto out; tmp = (attr.port_cap_flags | props->set_port_cap_mask) & ~props->clr_port_cap_mask; err = mlx5_set_port_caps(dev->mdev, port, tmp); out: mutex_unlock(&dev->cap_mask_mutex); return err; } +static u16 calc_dynamic_bfregs(int uars_per_sys_page) +{ + /* Large page with non 4k uar support might limit the dynamic size */ + if (uars_per_sys_page == 1 && PAGE_SIZE > 4096) + return MLX5_MIN_DYN_BFREGS; + + return MLX5_MAX_DYN_BFREGS; +} + +static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k, + struct mlx5_ib_alloc_ucontext_req_v2 *req, + struct mlx5_bfreg_info *bfregi) +{ + int uars_per_sys_page; + int bfregs_per_sys_page; + int ref_bfregs = req->total_num_bfregs; + + if (req->total_num_bfregs == 0) + return -EINVAL; + + BUILD_BUG_ON(MLX5_MAX_BFREGS % MLX5_NON_FP_BFREGS_IN_PAGE); + BUILD_BUG_ON(MLX5_MAX_BFREGS < MLX5_NON_FP_BFREGS_IN_PAGE); + + if (req->total_num_bfregs > MLX5_MAX_BFREGS) + return -ENOMEM; + + uars_per_sys_page = get_uars_per_sys_page(dev, lib_uar_4k); + bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR; + /* This holds the required static allocation asked by the user */ + req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page); + if (req->num_low_latency_bfregs > req->total_num_bfregs - 1) + return -EINVAL; + + bfregi->num_static_sys_pages = req->total_num_bfregs / bfregs_per_sys_page; + bfregi->num_dyn_bfregs = ALIGN(calc_dynamic_bfregs(uars_per_sys_page), bfregs_per_sys_page); + bfregi->total_num_bfregs = req->total_num_bfregs + bfregi->num_dyn_bfregs; + bfregi->num_sys_pages = bfregi->total_num_bfregs / bfregs_per_sys_page; + + mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, total bfregs %d, using %d sys pages\n", + MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no", + lib_uar_4k ? "yes" : "no", ref_bfregs, + req->total_num_bfregs, bfregi->total_num_bfregs, + bfregi->num_sys_pages); + + return 0; +} + +static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) +{ + struct mlx5_bfreg_info *bfregi; + int err; + int i; + + bfregi = &context->bfregi; + for (i = 0; i < bfregi->num_static_sys_pages; i++) { + err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]); + if (err) + goto error; + + mlx5_ib_dbg(dev, "allocated uar %d\n", bfregi->sys_pages[i]); + } + + for (i = bfregi->num_static_sys_pages; i < bfregi->num_sys_pages; i++) + bfregi->sys_pages[i] = MLX5_IB_INVALID_UAR_INDEX; + + return 0; + +error: + for (--i; i >= 0; i--) + if (mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i])) + mlx5_ib_warn(dev, "failed to free uar %d\n", i); + + return err; +} + +static void deallocate_uars(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context) +{ + struct mlx5_bfreg_info *bfregi; + int i; + + bfregi = &context->bfregi; + for (i = 0; i < bfregi->num_sys_pages; i++) + if (i < bfregi->num_static_sys_pages || + bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) + mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); +} + static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_alloc_ucontext_req_v2 req = {}; struct mlx5_ib_alloc_ucontext_resp resp = {}; struct mlx5_ib_ucontext *context; - struct mlx5_uuar_info *uuari; - struct mlx5_uar *uars; - int gross_uuars; - int num_uars; + struct mlx5_bfreg_info *bfregi; int ver; - int uuarn; int err; - int i; size_t reqlen; size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, max_cqe_version); + bool lib_uar_4k; + bool lib_uar_dyn; if (!dev->ib_active) return ERR_PTR(-EAGAIN); if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr)) return ERR_PTR(-EINVAL); reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) ver = 0; else if (reqlen >= min_req_v2) ver = 2; else return ERR_PTR(-EINVAL); err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req))); if (err) return ERR_PTR(err); if (req.flags) return ERR_PTR(-EINVAL); - if (req.total_num_uuars > MLX5_MAX_UUARS) - return ERR_PTR(-ENOMEM); - - if (req.total_num_uuars == 0) - return ERR_PTR(-EINVAL); - if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2) return ERR_PTR(-EOPNOTSUPP); + req.total_num_bfregs = ALIGN(req.total_num_bfregs, + MLX5_NON_FP_BFREGS_PER_UAR); + if (req.num_low_latency_bfregs > req.total_num_bfregs - 1) + return ERR_PTR(-EINVAL); + if (reqlen > sizeof(req) && !ib_is_udata_cleared(udata, sizeof(req), reqlen - sizeof(req))) return ERR_PTR(-EOPNOTSUPP); - req.total_num_uuars = ALIGN(req.total_num_uuars, - MLX5_NON_FP_BF_REGS_PER_PAGE); - if (req.num_low_latency_uuars > req.total_num_uuars - 1) - return ERR_PTR(-EINVAL); - - num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; - gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); resp.cache_line_size = cache_line_size(); resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); resp.cqe_version = min_t(__u8, (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), req.max_cqe_version); + resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ? + MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT; + resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? + MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1; resp.response_length = min(offsetof(typeof(resp), response_length) + sizeof(resp.response_length), udata->outlen); context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return ERR_PTR(-ENOMEM); - uuari = &context->uuari; - mutex_init(&uuari->lock); - uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL); - if (!uars) { - err = -ENOMEM; - goto out_ctx; + lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR; + lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR; + bfregi = &context->bfregi; + + if (lib_uar_dyn) { + bfregi->lib_uar_dyn = lib_uar_dyn; + goto uar_done; } - uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars), - sizeof(*uuari->bitmap), + /* updates req->total_num_bfregs */ + err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi); + if (err) + goto out_ctx; + + mutex_init(&bfregi->lock); + bfregi->lib_uar_4k = lib_uar_4k; + bfregi->count = kcalloc(bfregi->total_num_bfregs, sizeof(*bfregi->count), GFP_KERNEL); - if (!uuari->bitmap) { + if (!bfregi->count) { err = -ENOMEM; - goto out_uar_ctx; - } - /* - * clear all fast path uuars - */ - for (i = 0; i < gross_uuars; i++) { - uuarn = i & 3; - if (uuarn == 2 || uuarn == 3) - set_bit(i, uuari->bitmap); + goto out_ctx; } - uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL); - if (!uuari->count) { + bfregi->sys_pages = kcalloc(bfregi->num_sys_pages, + sizeof(*bfregi->sys_pages), + GFP_KERNEL); + if (!bfregi->sys_pages) { err = -ENOMEM; - goto out_bitmap; + goto out_count; } - for (i = 0; i < num_uars; i++) { - err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index); - if (err) - goto out_count; - } + err = allocate_uars(dev, context); + if (err) + goto out_sys_pages; + +uar_done: #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; #endif if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) { err = mlx5_alloc_transport_domain(dev->mdev, &context->tdn); if (err) goto out_uars; } INIT_LIST_HEAD(&context->vma_private_list); INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); - resp.tot_uuars = req.total_num_uuars; + resp.tot_bfregs = lib_uar_dyn ? 0 : req.total_num_bfregs; resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports); if (field_avail(typeof(resp), cqe_version, udata->outlen)) resp.response_length += sizeof(resp.cqe_version); if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) { resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; resp.response_length += sizeof(resp.cmds_supp_uhw); } /* * We don't want to expose information from the PCI bar that is located * after 4096 bytes, so if the arch only supports larger pages, let's * pretend we don't support reading the HCA's core clock. This is also * forced by mmap function. */ - if (PAGE_SIZE <= 4096 && - field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) { - resp.comp_mask |= - MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; - resp.hca_core_clock_offset = - offsetof(struct mlx5_init_seg, internal_timer_h) % - PAGE_SIZE; - resp.response_length += sizeof(resp.hca_core_clock_offset) + - sizeof(resp.reserved2); + if (offsetofend(typeof(resp), hca_core_clock_offset) <= udata->outlen) { + if (PAGE_SIZE <= 4096) { + resp.comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; + resp.hca_core_clock_offset = + offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE; + } + resp.response_length += sizeof(resp.hca_core_clock_offset); + } + + if (offsetofend(typeof(resp), log_uar_size) <= udata->outlen) + resp.response_length += sizeof(resp.log_uar_size); + + if (offsetofend(typeof(resp), num_uars_per_page) <= udata->outlen) + resp.response_length += sizeof(resp.num_uars_per_page); + + if (offsetofend(typeof(resp), num_dyn_bfregs) <= udata->outlen) { + resp.num_dyn_bfregs = bfregi->num_dyn_bfregs; + resp.response_length += sizeof(resp.num_dyn_bfregs); } err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) goto out_td; - uuari->ver = ver; - uuari->num_low_latency_uuars = req.num_low_latency_uuars; - uuari->uars = uars; - uuari->num_uars = num_uars; + bfregi->ver = ver; + bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs; context->cqe_version = resp.cqe_version; return &context->ibucontext; out_td: if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) mlx5_dealloc_transport_domain(dev->mdev, context->tdn); out_uars: - for (i--; i >= 0; i--) - mlx5_cmd_free_uar(dev->mdev, uars[i].index); -out_count: - kfree(uuari->count); + deallocate_uars(dev, context); -out_bitmap: - kfree(uuari->bitmap); +out_sys_pages: + kfree(bfregi->sys_pages); -out_uar_ctx: - kfree(uars); +out_count: + kfree(bfregi->count); out_ctx: kfree(context); return ERR_PTR(err); } static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); - struct mlx5_uuar_info *uuari = &context->uuari; - int i; + struct mlx5_bfreg_info *bfregi; + bfregi = &context->bfregi; if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) mlx5_dealloc_transport_domain(dev->mdev, context->tdn); - for (i = 0; i < uuari->num_uars; i++) { - if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index)) - mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index); - } - - kfree(uuari->count); - kfree(uuari->bitmap); - kfree(uuari->uars); + deallocate_uars(dev, context); + kfree(bfregi->sys_pages); + kfree(bfregi->count); kfree(context); return 0; } -static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index) +static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, + int uar_idx) { - return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index; + int fw_uars_per_page; + + fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1; + + return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + uar_idx / fw_uars_per_page; } static int get_command(unsigned long offset) { return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK; } static int get_arg(unsigned long offset) { return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1); } static int get_index(unsigned long offset) { return get_arg(offset); } +/* Index resides in an extra byte to enable larger values than 255 */ +static int get_extended_index(unsigned long offset) +{ + return get_arg(offset) | ((offset >> 16) & 0xff) << 8; +} + static void mlx5_ib_vma_open(struct vm_area_struct *area) { /* vma_open is called when a new VMA is created on top of our VMA. This * is done through either mremap flow or split_vma (usually due to * mlock, madvise, munmap, etc.) We do not support a clone of the VMA, * as this VMA is strongly hardware related. Therefore we set the * vm_ops of the newly created/cloned VMA to NULL, to prevent it from * calling us again and trying to do incorrect actions. We assume that * the original VMA size is exactly a single page, and therefore all * "splitting" operation will not happen to it. */ area->vm_ops = NULL; } static void mlx5_ib_vma_close(struct vm_area_struct *area) { struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data; /* It's guaranteed that all VMAs opened on a FD are closed before the * file itself is closed, therefore no sync is needed with the regular * closing flow. (e.g. mlx5 ib_dealloc_ucontext) * However need a sync with accessing the vma as part of * mlx5_ib_disassociate_ucontext. * The close operation is usually called under mm->mmap_sem except when * process is exiting. * The exiting case is handled explicitly as part of * mlx5_ib_disassociate_ucontext. */ mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data; /* setting the vma context pointer to null in the mlx5_ib driver's * private data, to protect a race condition in * mlx5_ib_disassociate_ucontext(). */ mlx5_ib_vma_priv_data->vma = NULL; list_del(&mlx5_ib_vma_priv_data->list); kfree(mlx5_ib_vma_priv_data); } static const struct vm_operations_struct mlx5_ib_vm_ops = { .open = mlx5_ib_vma_open, .close = mlx5_ib_vma_close }; static int mlx5_ib_set_vma_data(struct vm_area_struct *vma, struct mlx5_ib_ucontext *ctx) { struct mlx5_ib_vma_private_data *vma_prv; struct list_head *vma_head = &ctx->vma_private_list; vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL); if (!vma_prv) return -ENOMEM; vma_prv->vma = vma; vma->vm_private_data = vma_prv; vma->vm_ops = &mlx5_ib_vm_ops; list_add(&vma_prv->list, vma_head); return 0; } static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) { int ret; struct vm_area_struct *vma; struct mlx5_ib_vma_private_data *vma_private, *n; struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); struct task_struct *owning_process = NULL; struct mm_struct *owning_mm = NULL; owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); if (!owning_process) return; owning_mm = get_task_mm(owning_process); if (!owning_mm) { pr_info("no mm, disassociate ucontext is pending task termination\n"); while (1) { put_task_struct(owning_process); usleep_range(1000, 2000); owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); if (!owning_process || owning_process->task_thread-> td_proc->p_state == PRS_ZOMBIE) { pr_info("disassociate ucontext done, task was terminated\n"); /* in case task was dead need to release the * task struct. */ if (owning_process) put_task_struct(owning_process); return; } } } /* need to protect from a race on closing the vma as part of * mlx5_ib_vma_close. */ down_write(&owning_mm->mmap_sem); list_for_each_entry_safe(vma_private, n, &context->vma_private_list, list) { vma = vma_private->vma; ret = zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE); if (ret == -ENOTSUP) { if (bootverbose) WARN_ONCE( "%s: zap_vma_ptes not implemented for unmanaged mappings", __func__); } else { WARN(ret, "%s: zap_vma_ptes failed, error %d", __func__, -ret); } /* context going to be destroyed, should * not access ops any more. */ /* XXXKIB vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); */ vma->vm_ops = NULL; list_del(&vma_private->list); kfree(vma_private); } up_write(&owning_mm->mmap_sem); mmput(owning_mm); put_task_struct(owning_process); } static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) { switch (cmd) { case MLX5_IB_MMAP_WC_PAGE: return "WC"; case MLX5_IB_MMAP_REGULAR_PAGE: return "best effort WC"; case MLX5_IB_MMAP_NC_PAGE: return "NC"; default: return NULL; } } static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, struct vm_area_struct *vma, struct mlx5_ib_ucontext *context) { - struct mlx5_uuar_info *uuari = &context->uuari; + struct mlx5_bfreg_info *bfregi = &context->bfregi; int err; unsigned long idx; - phys_addr_t pfn, pa; + phys_addr_t pfn; pgprot_t prot; + u32 bfreg_dyn_idx = 0; + u32 uar_index; + int dyn_uar = (cmd == MLX5_IB_MMAP_WC_PAGE); + int max_valid_idx = dyn_uar ? bfregi->num_sys_pages : + bfregi->num_static_sys_pages; + + if (bfregi->lib_uar_dyn) + return -EINVAL; + + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + if (dyn_uar) + idx = get_extended_index(vma->vm_pgoff) + bfregi->num_static_sys_pages; + else + idx = get_index(vma->vm_pgoff); + + if (idx >= max_valid_idx) { + mlx5_ib_warn(dev, "invalid uar index %lu, max=%d\n", + idx, max_valid_idx); + return -EINVAL; + } switch (cmd) { case MLX5_IB_MMAP_WC_PAGE: -/* Some architectures don't support WC memory */ -#if defined(CONFIG_X86) - if (!pat_enabled()) - return -EPERM; -#elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU))) - return -EPERM; -#endif - /* fall through */ case MLX5_IB_MMAP_REGULAR_PAGE: /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */ prot = pgprot_writecombine(vma->vm_page_prot); break; case MLX5_IB_MMAP_NC_PAGE: prot = pgprot_noncached(vma->vm_page_prot); break; default: return -EINVAL; } - if (vma->vm_end - vma->vm_start != PAGE_SIZE) - return -EINVAL; + if (dyn_uar) { + int uars_per_page; - idx = get_index(vma->vm_pgoff); - if (idx >= uuari->num_uars) - return -EINVAL; + uars_per_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k); + bfreg_dyn_idx = idx * (uars_per_page * MLX5_NON_FP_BFREGS_PER_UAR); + if (bfreg_dyn_idx >= bfregi->total_num_bfregs) { + mlx5_ib_warn(dev, "invalid bfreg_dyn_idx %u, max=%u\n", + bfreg_dyn_idx, bfregi->total_num_bfregs); + return -EINVAL; + } + + mutex_lock(&bfregi->lock); + /* Fail if uar already allocated, first bfreg index of each + * page holds its count. + */ + if (bfregi->count[bfreg_dyn_idx]) { + mlx5_ib_warn(dev, "wrong offset, idx %lu is busy, bfregn=%u\n", idx, bfreg_dyn_idx); + mutex_unlock(&bfregi->lock); + return -EINVAL; + } + + bfregi->count[bfreg_dyn_idx]++; + mutex_unlock(&bfregi->lock); - pfn = uar_index2pfn(dev, uuari->uars[idx].index); + err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index); + if (err) { + mlx5_ib_warn(dev, "UAR alloc failed\n"); + goto free_bfreg; + } + } else { + uar_index = bfregi->sys_pages[idx]; + } + + pfn = uar_index2pfn(dev, uar_index); mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn); vma->vm_page_prot = prot; err = io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, vma->vm_page_prot); if (err) { mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%llx, pfn=%pa, mmap_cmd=%s\n", err, (unsigned long long)vma->vm_start, &pfn, mmap_cmd2str(cmd)); - return -EAGAIN; + goto err; } - pa = pfn << PAGE_SHIFT; - mlx5_ib_dbg(dev, "mapped %s at 0x%llx, PA %pa\n", mmap_cmd2str(cmd), - (unsigned long long)vma->vm_start, &pa); - + if (dyn_uar) + bfregi->sys_pages[idx] = uar_index; return mlx5_ib_set_vma_data(vma, context); + +err: + if (!dyn_uar) + return err; + + mlx5_cmd_free_uar(dev->mdev, idx); + +free_bfreg: + mlx5_ib_free_bfreg(dev, bfregi, bfreg_dyn_idx); + + return err; } static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); unsigned long command; phys_addr_t pfn; command = get_command(vma->vm_pgoff); switch (command) { case MLX5_IB_MMAP_WC_PAGE: case MLX5_IB_MMAP_NC_PAGE: case MLX5_IB_MMAP_REGULAR_PAGE: return uar_mmap(dev, command, vma, context); case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: return -ENOSYS; case MLX5_IB_MMAP_CORE_CLOCK: if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; if (vma->vm_flags & VM_WRITE) return -EPERM; /* Don't expose to user-space information it shouldn't have */ if (PAGE_SIZE > 4096) return -EOPNOTSUPP; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); pfn = (dev->mdev->iseg_base + offsetof(struct mlx5_init_seg, internal_timer_h)) >> PAGE_SHIFT; if (io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; mlx5_ib_dbg(dev, "mapped internal timer at 0x%llx, PA 0x%llx\n", (unsigned long long)vma->vm_start, (unsigned long long)pfn << PAGE_SHIFT); break; default: return -EINVAL; } return 0; } static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) { struct mlx5_ib_alloc_pd_resp resp; struct mlx5_ib_pd *pd; int err; pd = kmalloc(sizeof(*pd), GFP_KERNEL); if (!pd) return ERR_PTR(-ENOMEM); err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn); if (err) { kfree(pd); return ERR_PTR(err); } if (context) { resp.pdn = pd->pdn; if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn); kfree(pd); return ERR_PTR(-EFAULT); } } return &pd->ibpd; } static int mlx5_ib_dealloc_pd(struct ib_pd *pd) { struct mlx5_ib_dev *mdev = to_mdev(pd->device); struct mlx5_ib_pd *mpd = to_mpd(pd); mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn); kfree(mpd); return 0; } enum { MATCH_CRITERIA_ENABLE_OUTER_BIT, MATCH_CRITERIA_ENABLE_MISC_BIT, MATCH_CRITERIA_ENABLE_INNER_BIT }; #define HEADER_IS_ZERO(match_criteria, headers) \ !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \ 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \ static u8 get_match_criteria_enable(u32 *match_criteria) { u8 match_criteria_enable; match_criteria_enable = (!HEADER_IS_ZERO(match_criteria, outer_headers)) << MATCH_CRITERIA_ENABLE_OUTER_BIT; match_criteria_enable |= (!HEADER_IS_ZERO(match_criteria, misc_parameters)) << MATCH_CRITERIA_ENABLE_MISC_BIT; match_criteria_enable |= (!HEADER_IS_ZERO(match_criteria, inner_headers)) << MATCH_CRITERIA_ENABLE_INNER_BIT; return match_criteria_enable; } static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) { MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); } static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) { MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val); MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2); MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); } #define LAST_ETH_FIELD vlan_tag #define LAST_IB_FIELD sl #define LAST_IPV4_FIELD tos #define LAST_IPV6_FIELD traffic_class #define LAST_TCP_UDP_FIELD src_port /* Field is the last supported field */ #define FIELDS_NOT_SUPPORTED(filter, field)\ memchr_inv((void *)&filter.field +\ sizeof(filter.field), 0,\ sizeof(filter) -\ offsetof(typeof(filter), field) -\ sizeof(filter.field)) static int parse_flow_attr(u32 *match_c, u32 *match_v, const union ib_flow_spec *ib_spec) { void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers); void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers); void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters); switch (ib_spec->type) { case IB_FLOW_SPEC_ETH: if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) return -ENOTSUPP; ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, dmac_47_16), ib_spec->eth.mask.dst_mac); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, dmac_47_16), ib_spec->eth.val.dst_mac); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, smac_47_16), ib_spec->eth.mask.src_mac); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, smac_47_16), ib_spec->eth.val.src_mac); if (ib_spec->eth.mask.vlan_tag) { MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, cvlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, cvlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, first_vid, ntohs(ib_spec->eth.val.vlan_tag)); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, first_cfi, ntohs(ib_spec->eth.mask.vlan_tag) >> 12); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, first_cfi, ntohs(ib_spec->eth.val.vlan_tag) >> 12); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, first_prio, ntohs(ib_spec->eth.mask.vlan_tag) >> 13); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, first_prio, ntohs(ib_spec->eth.val.vlan_tag) >> 13); } MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ethertype, ntohs(ib_spec->eth.mask.ether_type)); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ethertype, ntohs(ib_spec->eth.val.ether_type)); break; case IB_FLOW_SPEC_IPV4: if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) return -ENOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ethertype, 0xffff); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ethertype, ETH_P_IP); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.mask.src_ip, sizeof(ib_spec->ipv4.mask.src_ip)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.val.src_ip, sizeof(ib_spec->ipv4.val.src_ip)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.mask.dst_ip, sizeof(ib_spec->ipv4.mask.dst_ip)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.val.dst_ip, sizeof(ib_spec->ipv4.val.dst_ip)); set_tos(outer_headers_c, outer_headers_v, ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); set_proto(outer_headers_c, outer_headers_v, ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto); break; case IB_FLOW_SPEC_IPV6: if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) return -ENOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ethertype, 0xffff); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ethertype, IPPROTO_IPV6); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.mask.src_ip, sizeof(ib_spec->ipv6.mask.src_ip)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, src_ipv4_src_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.val.src_ip, sizeof(ib_spec->ipv6.val.src_ip)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.mask.dst_ip, sizeof(ib_spec->ipv6.mask.dst_ip)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.val.dst_ip, sizeof(ib_spec->ipv6.val.dst_ip)); set_tos(outer_headers_c, outer_headers_v, ib_spec->ipv6.mask.traffic_class, ib_spec->ipv6.val.traffic_class); set_proto(outer_headers_c, outer_headers_v, ib_spec->ipv6.mask.next_hdr, ib_spec->ipv6.val.next_hdr); MLX5_SET(fte_match_set_misc, misc_params_c, outer_ipv6_flow_label, ntohl(ib_spec->ipv6.mask.flow_label)); MLX5_SET(fte_match_set_misc, misc_params_v, outer_ipv6_flow_label, ntohl(ib_spec->ipv6.val.flow_label)); break; case IB_FLOW_SPEC_TCP: if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD)) return -ENOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, 0xff); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, IPPROTO_TCP); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport, ntohs(ib_spec->tcp_udp.mask.src_port)); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport, ntohs(ib_spec->tcp_udp.val.src_port)); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport, ntohs(ib_spec->tcp_udp.mask.dst_port)); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport, ntohs(ib_spec->tcp_udp.val.dst_port)); break; case IB_FLOW_SPEC_UDP: if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD)) return -ENOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, 0xff); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, IPPROTO_UDP); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport, ntohs(ib_spec->tcp_udp.mask.src_port)); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport, ntohs(ib_spec->tcp_udp.val.src_port)); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport, ntohs(ib_spec->tcp_udp.mask.dst_port)); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport, ntohs(ib_spec->tcp_udp.val.dst_port)); break; default: return -EINVAL; } return 0; } /* If a flow could catch both multicast and unicast packets, * it won't fall into the multicast flow steering table and this rule * could steal other multicast packets. */ static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr) { struct ib_flow_spec_eth *eth_spec; if (ib_attr->type != IB_FLOW_ATTR_NORMAL || ib_attr->size < sizeof(struct ib_flow_attr) + sizeof(struct ib_flow_spec_eth) || ib_attr->num_of_specs < 1) return false; eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1); if (eth_spec->type != IB_FLOW_SPEC_ETH || eth_spec->size != sizeof(*eth_spec)) return false; return is_multicast_ether_addr(eth_spec->mask.dst_mac) && is_multicast_ether_addr(eth_spec->val.dst_mac); } static bool is_valid_attr(const struct ib_flow_attr *flow_attr) { union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); bool has_ipv4_spec = false; bool eth_type_ipv4 = true; unsigned int spec_index; /* Validate that ethertype is correct */ for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { if (ib_spec->type == IB_FLOW_SPEC_ETH && ib_spec->eth.mask.ether_type) { if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) && ib_spec->eth.val.ether_type == htons(ETH_P_IP))) eth_type_ipv4 = false; } else if (ib_spec->type == IB_FLOW_SPEC_IPV4) { has_ipv4_spec = true; } ib_spec = (void *)ib_spec + ib_spec->size; } return !has_ipv4_spec || eth_type_ipv4; } static void put_flow_table(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *prio, bool ft_added) { prio->refcount -= !!ft_added; if (!prio->refcount) { mlx5_destroy_flow_table(prio->flow_table); prio->flow_table = NULL; } } static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) { struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device); struct mlx5_ib_flow_handler *handler = container_of(flow_id, struct mlx5_ib_flow_handler, ibflow); struct mlx5_ib_flow_handler *iter, *tmp; mutex_lock(&dev->flow_db.lock); list_for_each_entry_safe(iter, tmp, &handler->list, list) { mlx5_del_flow_rule(iter->rule); put_flow_table(dev, iter->prio, true); list_del(&iter->list); kfree(iter); } mlx5_del_flow_rule(handler->rule); put_flow_table(dev, handler->prio, true); mutex_unlock(&dev->flow_db.lock); kfree(handler); return 0; } static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) { priority *= 2; if (!dont_trap) priority++; return priority; } enum flow_table_type { MLX5_IB_FT_RX, MLX5_IB_FT_TX }; #define MLX5_FS_MAX_TYPES 10 #define MLX5_FS_MAX_ENTRIES 32000UL static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, struct ib_flow_attr *flow_attr, enum flow_table_type ft_type) { bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; struct mlx5_flow_namespace *ns = NULL; struct mlx5_ib_flow_prio *prio; struct mlx5_flow_table *ft; int num_entries; int num_groups; int priority; int err = 0; if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { if (flow_is_multicast_only(flow_attr) && !dont_trap) priority = MLX5_IB_FLOW_MCAST_PRIO; else priority = ib_prio_to_core_prio(flow_attr->priority, dont_trap); ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS); num_entries = MLX5_FS_MAX_ENTRIES; num_groups = MLX5_FS_MAX_TYPES; prio = &dev->flow_db.prios[priority]; } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_LEFTOVERS); build_leftovers_ft_param("bypass", &priority, &num_entries, &num_groups); prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { if (!MLX5_CAP_FLOWTABLE(dev->mdev, allow_sniffer_and_nic_rx_shared_tir)) return ERR_PTR(-ENOTSUPP); ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? MLX5_FLOW_NAMESPACE_SNIFFER_RX : MLX5_FLOW_NAMESPACE_SNIFFER_TX); prio = &dev->flow_db.sniffer[ft_type]; priority = 0; num_entries = 1; num_groups = 1; } if (!ns) return ERR_PTR(-ENOTSUPP); ft = prio->flow_table; if (!ft) { ft = mlx5_create_auto_grouped_flow_table(ns, priority, "bypass", num_entries, num_groups); if (!IS_ERR(ft)) { prio->refcount = 0; prio->flow_table = ft; } else { err = PTR_ERR(ft); } } return err ? ERR_PTR(err) : prio; } static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, const struct ib_flow_attr *flow_attr, struct mlx5_flow_destination *dst) { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; struct mlx5_flow_spec *spec; const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); unsigned int spec_index; u32 action; int err = 0; if (!is_valid_attr(flow_attr)) return ERR_PTR(-EINVAL); spec = mlx5_vzalloc(sizeof(*spec)); handler = kzalloc(sizeof(*handler), GFP_KERNEL); if (!handler || !spec) { err = -ENOMEM; goto free; } INIT_LIST_HEAD(&handler->list); for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { err = parse_flow_attr(spec->match_criteria, spec->match_value, ib_flow); if (err < 0) goto free; ib_flow += ((union ib_flow_spec *)ib_flow)->size; } spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; handler->rule = mlx5_add_flow_rule(ft, spec->match_criteria_enable, spec->match_criteria, spec->match_value, action, MLX5_FS_DEFAULT_FLOW_TAG, dst); if (IS_ERR(handler->rule)) { err = PTR_ERR(handler->rule); goto free; } ft_prio->refcount++; handler->prio = ft_prio; ft_prio->flow_table = ft; free: if (err) kfree(handler); kvfree(spec); return err ? ERR_PTR(err) : handler; } static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, struct ib_flow_attr *flow_attr, struct mlx5_flow_destination *dst) { struct mlx5_ib_flow_handler *handler_dst = NULL; struct mlx5_ib_flow_handler *handler = NULL; handler = create_flow_rule(dev, ft_prio, flow_attr, NULL); if (!IS_ERR(handler)) { handler_dst = create_flow_rule(dev, ft_prio, flow_attr, dst); if (IS_ERR(handler_dst)) { mlx5_del_flow_rule(handler->rule); ft_prio->refcount--; kfree(handler); handler = handler_dst; } else { list_add(&handler_dst->list, &handler->list); } } return handler; } enum { LEFTOVERS_MC, LEFTOVERS_UC, }; static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, struct ib_flow_attr *flow_attr, struct mlx5_flow_destination *dst) { struct mlx5_ib_flow_handler *handler_ucast = NULL; struct mlx5_ib_flow_handler *handler = NULL; static struct { struct ib_flow_attr flow_attr; struct ib_flow_spec_eth eth_flow; } leftovers_specs[] = { [LEFTOVERS_MC] = { .flow_attr = { .num_of_specs = 1, .size = sizeof(leftovers_specs[0]) }, .eth_flow = { .type = IB_FLOW_SPEC_ETH, .size = sizeof(struct ib_flow_spec_eth), .mask = {.dst_mac = {0x1} }, .val = {.dst_mac = {0x1} } } }, [LEFTOVERS_UC] = { .flow_attr = { .num_of_specs = 1, .size = sizeof(leftovers_specs[0]) }, .eth_flow = { .type = IB_FLOW_SPEC_ETH, .size = sizeof(struct ib_flow_spec_eth), .mask = {.dst_mac = {0x1} }, .val = {.dst_mac = {} } } } }; handler = create_flow_rule(dev, ft_prio, &leftovers_specs[LEFTOVERS_MC].flow_attr, dst); if (!IS_ERR(handler) && flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) { handler_ucast = create_flow_rule(dev, ft_prio, &leftovers_specs[LEFTOVERS_UC].flow_attr, dst); if (IS_ERR(handler_ucast)) { mlx5_del_flow_rule(handler->rule); ft_prio->refcount--; kfree(handler); handler = handler_ucast; } else { list_add(&handler_ucast->list, &handler->list); } } return handler; } static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_rx, struct mlx5_ib_flow_prio *ft_tx, struct mlx5_flow_destination *dst) { struct mlx5_ib_flow_handler *handler_rx; struct mlx5_ib_flow_handler *handler_tx; int err; static const struct ib_flow_attr flow_attr = { .num_of_specs = 0, .size = sizeof(flow_attr) }; handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst); if (IS_ERR(handler_rx)) { err = PTR_ERR(handler_rx); goto err; } handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst); if (IS_ERR(handler_tx)) { err = PTR_ERR(handler_tx); goto err_tx; } list_add(&handler_tx->list, &handler_rx->list); return handler_rx; err_tx: mlx5_del_flow_rule(handler_rx->rule); ft_rx->refcount--; kfree(handler_rx); err: return ERR_PTR(err); } static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain) { struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_qp *mqp = to_mqp(qp); struct mlx5_ib_flow_handler *handler = NULL; struct mlx5_flow_destination *dst = NULL; struct mlx5_ib_flow_prio *ft_prio_tx = NULL; struct mlx5_ib_flow_prio *ft_prio; int err; if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) return ERR_PTR(-ENOSPC); if (domain != IB_FLOW_DOMAIN_USER || flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) || (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)) return ERR_PTR(-EINVAL); dst = kzalloc(sizeof(*dst), GFP_KERNEL); if (!dst) return ERR_PTR(-ENOMEM); mutex_lock(&dev->flow_db.lock); ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX); if (IS_ERR(ft_prio)) { err = PTR_ERR(ft_prio); goto unlock; } if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX); if (IS_ERR(ft_prio_tx)) { err = PTR_ERR(ft_prio_tx); ft_prio_tx = NULL; goto destroy_ft; } } dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; if (mqp->flags & MLX5_IB_QP_RSS) dst->tir_num = mqp->rss_qp.tirn; else dst->tir_num = mqp->raw_packet_qp.rq.tirn; if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) { handler = create_dont_trap_rule(dev, ft_prio, flow_attr, dst); } else { handler = create_flow_rule(dev, ft_prio, flow_attr, dst); } } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst); } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); } else { err = -EINVAL; goto destroy_ft; } if (IS_ERR(handler)) { err = PTR_ERR(handler); handler = NULL; goto destroy_ft; } mutex_unlock(&dev->flow_db.lock); kfree(dst); return &handler->ibflow; destroy_ft: put_flow_table(dev, ft_prio, false); if (ft_prio_tx) put_flow_table(dev, ft_prio_tx, false); unlock: mutex_unlock(&dev->flow_db.lock); kfree(dst); kfree(handler); return ERR_PTR(err); } static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); int err; err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num); if (err) mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n", ibqp->qp_num, gid->raw); return err; } static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); int err; err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num); if (err) mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n", ibqp->qp_num, gid->raw); return err; } static int init_node_data(struct mlx5_ib_dev *dev) { int err; err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc); if (err) return err; return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid); } static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = container_of(device, struct mlx5_ib_dev, ib_dev.dev); return sprintf(buf, "%lld\n", (long long)dev->mdev->priv.fw_pages); } static ssize_t show_reg_pages(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = container_of(device, struct mlx5_ib_dev, ib_dev.dev); return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); } static ssize_t show_hca(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = container_of(device, struct mlx5_ib_dev, ib_dev.dev); return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); } static ssize_t show_rev(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = container_of(device, struct mlx5_ib_dev, ib_dev.dev); return sprintf(buf, "%x\n", dev->mdev->pdev->revision); } static ssize_t show_board(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = container_of(device, struct mlx5_ib_dev, ib_dev.dev); return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, dev->mdev->board_id); } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL); static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL); static struct device_attribute *mlx5_class_attributes[] = { &dev_attr_hw_rev, &dev_attr_hca_type, &dev_attr_board_id, &dev_attr_fw_pages, &dev_attr_reg_pages, }; static void pkey_change_handler(struct work_struct *work) { struct mlx5_ib_port_resources *ports = container_of(work, struct mlx5_ib_port_resources, pkey_change_work); mutex_lock(&ports->devr->mutex); mlx5_ib_gsi_pkey_change(ports->gsi); mutex_unlock(&ports->devr->mutex); } static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev) { struct mlx5_ib_qp *mqp; struct mlx5_ib_cq *send_mcq, *recv_mcq; struct mlx5_core_cq *mcq; struct list_head cq_armed_list; unsigned long flags_qp; unsigned long flags_cq; unsigned long flags; INIT_LIST_HEAD(&cq_armed_list); /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/ spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags); list_for_each_entry(mqp, &ibdev->qp_list, qps_list) { spin_lock_irqsave(&mqp->sq.lock, flags_qp); if (mqp->sq.tail != mqp->sq.head) { send_mcq = to_mcq(mqp->ibqp.send_cq); spin_lock_irqsave(&send_mcq->lock, flags_cq); if (send_mcq->mcq.comp && mqp->ibqp.send_cq->comp_handler) { if (!send_mcq->mcq.reset_notify_added) { send_mcq->mcq.reset_notify_added = 1; list_add_tail(&send_mcq->mcq.reset_notify, &cq_armed_list); } } spin_unlock_irqrestore(&send_mcq->lock, flags_cq); } spin_unlock_irqrestore(&mqp->sq.lock, flags_qp); spin_lock_irqsave(&mqp->rq.lock, flags_qp); /* no handling is needed for SRQ */ if (!mqp->ibqp.srq) { if (mqp->rq.tail != mqp->rq.head) { recv_mcq = to_mcq(mqp->ibqp.recv_cq); spin_lock_irqsave(&recv_mcq->lock, flags_cq); if (recv_mcq->mcq.comp && mqp->ibqp.recv_cq->comp_handler) { if (!recv_mcq->mcq.reset_notify_added) { recv_mcq->mcq.reset_notify_added = 1; list_add_tail(&recv_mcq->mcq.reset_notify, &cq_armed_list); } } spin_unlock_irqrestore(&recv_mcq->lock, flags_cq); } } spin_unlock_irqrestore(&mqp->rq.lock, flags_qp); } /*At that point all inflight post send were put to be executed as of we * lock/unlock above locks Now need to arm all involved CQs. */ list_for_each_entry(mcq, &cq_armed_list, reset_notify) { mcq->comp(mcq, NULL); } spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags); } static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param) { struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context; struct ib_event ibev; bool fatal = false; u8 port = (u8)param; switch (event) { case MLX5_DEV_EVENT_SYS_ERROR: ibev.event = IB_EVENT_DEVICE_FATAL; mlx5_ib_handle_internal_error(ibdev); fatal = true; break; case MLX5_DEV_EVENT_PORT_UP: case MLX5_DEV_EVENT_PORT_DOWN: case MLX5_DEV_EVENT_PORT_INITIALIZED: /* In RoCE, port up/down events are handled in * mlx5_netdev_event(). */ if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == IB_LINK_LAYER_ETHERNET) return; ibev.event = (event == MLX5_DEV_EVENT_PORT_UP) ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; break; case MLX5_DEV_EVENT_LID_CHANGE: ibev.event = IB_EVENT_LID_CHANGE; break; case MLX5_DEV_EVENT_PKEY_CHANGE: ibev.event = IB_EVENT_PKEY_CHANGE; schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); break; case MLX5_DEV_EVENT_GUID_CHANGE: ibev.event = IB_EVENT_GID_CHANGE; break; case MLX5_DEV_EVENT_CLIENT_REREG: ibev.event = IB_EVENT_CLIENT_REREGISTER; break; default: /* unsupported event */ return; } ibev.device = &ibdev->ib_dev; ibev.element.port_num = port; if (!rdma_is_port_valid(&ibdev->ib_dev, port)) { mlx5_ib_warn(ibdev, "warning: event(%d) on port %d\n", event, port); return; } if (ibdev->ib_active) ib_dispatch_event(&ibev); if (fatal) ibdev->ib_active = false; } static void get_ext_port_caps(struct mlx5_ib_dev *dev) { int port; for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) mlx5_query_ext_port_caps(dev, port); } static int get_port_caps(struct mlx5_ib_dev *dev) { struct ib_device_attr *dprops = NULL; struct ib_port_attr *pprops = NULL; int err = -ENOMEM; int port; struct ib_udata uhw = {.inlen = 0, .outlen = 0}; pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); if (!pprops) goto out; dprops = kmalloc(sizeof(*dprops), GFP_KERNEL); if (!dprops) goto out; err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw); if (err) { mlx5_ib_warn(dev, "query_device failed %d\n", err); goto out; } for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) { err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); if (err) { mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err); break; } dev->mdev->port_caps[port - 1].pkey_table_len = dprops->max_pkeys; dev->mdev->port_caps[port - 1].gid_table_len = pprops->gid_tbl_len; mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n", dprops->max_pkeys, pprops->gid_tbl_len); } out: kfree(pprops); kfree(dprops); return err; } static void destroy_umrc_res(struct mlx5_ib_dev *dev) { int err; err = mlx5_mr_cache_cleanup(dev); if (err) mlx5_ib_warn(dev, "mr cache cleanup failed\n"); mlx5_ib_destroy_qp(dev->umrc.qp); ib_free_cq(dev->umrc.cq); ib_dealloc_pd(dev->umrc.pd); } enum { MAX_UMR_WR = 128, }; static int create_umr_res(struct mlx5_ib_dev *dev) { struct ib_qp_init_attr *init_attr = NULL; struct ib_qp_attr *attr = NULL; struct ib_pd *pd; struct ib_cq *cq; struct ib_qp *qp; int ret; attr = kzalloc(sizeof(*attr), GFP_KERNEL); init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); if (!attr || !init_attr) { ret = -ENOMEM; goto error_0; } pd = ib_alloc_pd(&dev->ib_dev, 0); if (IS_ERR(pd)) { mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); ret = PTR_ERR(pd); goto error_0; } cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); if (IS_ERR(cq)) { mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); ret = PTR_ERR(cq); goto error_2; } init_attr->send_cq = cq; init_attr->recv_cq = cq; init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; init_attr->cap.max_send_wr = MAX_UMR_WR; init_attr->cap.max_send_sge = 1; init_attr->qp_type = MLX5_IB_QPT_REG_UMR; init_attr->port_num = 1; qp = mlx5_ib_create_qp(pd, init_attr, NULL); if (IS_ERR(qp)) { mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); ret = PTR_ERR(qp); goto error_3; } qp->device = &dev->ib_dev; qp->real_qp = qp; qp->uobject = NULL; qp->qp_type = MLX5_IB_QPT_REG_UMR; attr->qp_state = IB_QPS_INIT; attr->port_num = 1; ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT, NULL); if (ret) { mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); goto error_4; } memset(attr, 0, sizeof(*attr)); attr->qp_state = IB_QPS_RTR; attr->path_mtu = IB_MTU_256; ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); if (ret) { mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); goto error_4; } memset(attr, 0, sizeof(*attr)); attr->qp_state = IB_QPS_RTS; ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); if (ret) { mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); goto error_4; } dev->umrc.qp = qp; dev->umrc.cq = cq; dev->umrc.pd = pd; sema_init(&dev->umrc.sem, MAX_UMR_WR); ret = mlx5_mr_cache_init(dev); if (ret) { mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); goto error_4; } kfree(attr); kfree(init_attr); return 0; error_4: mlx5_ib_destroy_qp(qp); error_3: ib_free_cq(cq); error_2: ib_dealloc_pd(pd); error_0: kfree(attr); kfree(init_attr); return ret; } static int create_dev_resources(struct mlx5_ib_resources *devr) { struct ib_srq_init_attr attr; struct mlx5_ib_dev *dev; struct ib_cq_init_attr cq_attr = {.cqe = 1}; int port; int ret = 0; dev = container_of(devr, struct mlx5_ib_dev, devr); mutex_init(&devr->mutex); devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); if (IS_ERR(devr->p0)) { ret = PTR_ERR(devr->p0); goto error0; } devr->p0->device = &dev->ib_dev; devr->p0->uobject = NULL; atomic_set(&devr->p0->usecnt, 0); devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL); if (IS_ERR(devr->c0)) { ret = PTR_ERR(devr->c0); goto error1; } devr->c0->device = &dev->ib_dev; devr->c0->uobject = NULL; devr->c0->comp_handler = NULL; devr->c0->event_handler = NULL; devr->c0->cq_context = NULL; atomic_set(&devr->c0->usecnt, 0); devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); if (IS_ERR(devr->x0)) { ret = PTR_ERR(devr->x0); goto error2; } devr->x0->device = &dev->ib_dev; devr->x0->inode = NULL; atomic_set(&devr->x0->usecnt, 0); mutex_init(&devr->x0->tgt_qp_mutex); INIT_LIST_HEAD(&devr->x0->tgt_qp_list); devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); if (IS_ERR(devr->x1)) { ret = PTR_ERR(devr->x1); goto error3; } devr->x1->device = &dev->ib_dev; devr->x1->inode = NULL; atomic_set(&devr->x1->usecnt, 0); mutex_init(&devr->x1->tgt_qp_mutex); INIT_LIST_HEAD(&devr->x1->tgt_qp_list); memset(&attr, 0, sizeof(attr)); attr.attr.max_sge = 1; attr.attr.max_wr = 1; attr.srq_type = IB_SRQT_XRC; attr.ext.xrc.cq = devr->c0; attr.ext.xrc.xrcd = devr->x0; devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL); if (IS_ERR(devr->s0)) { ret = PTR_ERR(devr->s0); goto error4; } devr->s0->device = &dev->ib_dev; devr->s0->pd = devr->p0; devr->s0->uobject = NULL; devr->s0->event_handler = NULL; devr->s0->srq_context = NULL; devr->s0->srq_type = IB_SRQT_XRC; devr->s0->ext.xrc.xrcd = devr->x0; devr->s0->ext.xrc.cq = devr->c0; atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); atomic_inc(&devr->s0->ext.xrc.cq->usecnt); atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s0->usecnt, 0); memset(&attr, 0, sizeof(attr)); attr.attr.max_sge = 1; attr.attr.max_wr = 1; attr.srq_type = IB_SRQT_BASIC; devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL); if (IS_ERR(devr->s1)) { ret = PTR_ERR(devr->s1); goto error5; } devr->s1->device = &dev->ib_dev; devr->s1->pd = devr->p0; devr->s1->uobject = NULL; devr->s1->event_handler = NULL; devr->s1->srq_context = NULL; devr->s1->srq_type = IB_SRQT_BASIC; devr->s1->ext.xrc.cq = devr->c0; atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s0->usecnt, 0); for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) { INIT_WORK(&devr->ports[port].pkey_change_work, pkey_change_handler); devr->ports[port].devr = devr; } return 0; error5: mlx5_ib_destroy_srq(devr->s0); error4: mlx5_ib_dealloc_xrcd(devr->x1); error3: mlx5_ib_dealloc_xrcd(devr->x0); error2: mlx5_ib_destroy_cq(devr->c0); error1: mlx5_ib_dealloc_pd(devr->p0); error0: return ret; } static void destroy_dev_resources(struct mlx5_ib_resources *devr) { struct mlx5_ib_dev *dev = container_of(devr, struct mlx5_ib_dev, devr); int port; mlx5_ib_destroy_srq(devr->s1); mlx5_ib_destroy_srq(devr->s0); mlx5_ib_dealloc_xrcd(devr->x0); mlx5_ib_dealloc_xrcd(devr->x1); mlx5_ib_destroy_cq(devr->c0); mlx5_ib_dealloc_pd(devr->p0); /* Make sure no change P_Key work items are still executing */ for (port = 0; port < dev->num_ports; ++port) cancel_work_sync(&devr->ports[port].pkey_change_work); } static u32 get_core_cap_flags(struct ib_device *ibdev) { struct mlx5_ib_dev *dev = to_mdev(ibdev); enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1); u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type); u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version); u32 ret = 0; if (ll == IB_LINK_LAYER_INFINIBAND) return RDMA_CORE_PORT_IBA_IB; if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP)) return 0; if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP)) return 0; if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP) ret |= RDMA_CORE_PORT_IBA_ROCE; if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP) ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; return ret; } static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { struct ib_port_attr attr; int err; err = mlx5_ib_query_port(ibdev, port_num, &attr); if (err) return err; immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; immutable->core_cap_flags = get_core_cap_flags(ibdev); immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; } static void get_dev_fw_str(struct ib_device *ibdev, char *str, size_t str_len) { struct mlx5_ib_dev *dev = container_of(ibdev, struct mlx5_ib_dev, ib_dev); snprintf(str, str_len, "%d.%d.%04d", fw_rev_maj(dev->mdev), fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); } static int mlx5_roce_lag_init(struct mlx5_ib_dev *dev) { return 0; } static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev) { } static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev) { if (dev->roce.nb.notifier_call) { unregister_netdevice_notifier(&dev->roce.nb); dev->roce.nb.notifier_call = NULL; } } static int mlx5_enable_roce(struct mlx5_ib_dev *dev) { VNET_ITERATOR_DECL(vnet_iter); struct net_device *idev; int err; /* Check if mlx5en net device already exists */ VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { IFNET_RLOCK(); CURVNET_SET_QUIET(vnet_iter); CK_STAILQ_FOREACH(idev, &V_ifnet, if_link) { /* check if network interface belongs to mlx5en */ if (!mlx5_netdev_match(idev, dev->mdev, "mce")) continue; write_lock(&dev->roce.netdev_lock); dev->roce.netdev = idev; write_unlock(&dev->roce.netdev_lock); } CURVNET_RESTORE(); IFNET_RUNLOCK(); } VNET_LIST_RUNLOCK(); dev->roce.nb.notifier_call = mlx5_netdev_event; err = register_netdevice_notifier(&dev->roce.nb); if (err) { dev->roce.nb.notifier_call = NULL; return err; } err = mlx5_nic_vport_enable_roce(dev->mdev); if (err) goto err_unregister_netdevice_notifier; err = mlx5_roce_lag_init(dev); if (err) goto err_disable_roce; return 0; err_disable_roce: mlx5_nic_vport_disable_roce(dev->mdev); err_unregister_netdevice_notifier: mlx5_remove_roce_notifier(dev); return err; } static void mlx5_disable_roce(struct mlx5_ib_dev *dev) { mlx5_roce_lag_cleanup(dev); mlx5_nic_vport_disable_roce(dev->mdev); } static void mlx5_ib_dealloc_q_port_counter(struct mlx5_ib_dev *dev, u8 port_num) { mlx5_vport_dealloc_q_counter(dev->mdev, MLX5_INTERFACE_PROTOCOL_IB, dev->port[port_num].q_cnt_id); dev->port[port_num].q_cnt_id = 0; } static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) { unsigned int i; for (i = 0; i < dev->num_ports; i++) mlx5_ib_dealloc_q_port_counter(dev, i); } static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) { int i; int ret; for (i = 0; i < dev->num_ports; i++) { ret = mlx5_vport_alloc_q_counter(dev->mdev, MLX5_INTERFACE_PROTOCOL_IB, &dev->port[i].q_cnt_id); if (ret) { mlx5_ib_warn(dev, "couldn't allocate queue counter for port %d, err %d\n", i + 1, ret); goto dealloc_counters; } } return 0; dealloc_counters: while (--i >= 0) mlx5_ib_dealloc_q_port_counter(dev, i); return ret; } static const char * const names[] = { "rx_write_requests", "rx_read_requests", "rx_atomic_requests", "out_of_buffer", "out_of_sequence", "duplicate_request", "rnr_nak_retry_err", "packet_seq_err", "implied_nak_seq_err", "local_ack_timeout_err", }; static const size_t stats_offsets[] = { MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests), MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests), MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests), MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer), MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence), MLX5_BYTE_OFF(query_q_counter_out, duplicate_request), MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err), MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err), MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err), MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err), }; static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, u8 port_num) { BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets)); /* We support only per port stats */ if (port_num == 0) return NULL; return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names), RDMA_HW_STATS_DEFAULT_LIFESPAN); } static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u8 port, int index) { struct mlx5_ib_dev *dev = to_mdev(ibdev); int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); void *out; __be32 val; int ret; int i; if (!port || !stats) return -ENOSYS; out = mlx5_vzalloc(outlen); if (!out) return -ENOMEM; ret = mlx5_vport_query_q_counter(dev->mdev, dev->port[port - 1].q_cnt_id, 0, out, outlen); if (ret) goto free; for (i = 0; i < ARRAY_SIZE(names); i++) { val = *(__be32 *)(out + stats_offsets[i]); stats->value[i] = (u64)be32_to_cpu(val); } free: kvfree(out); return ARRAY_SIZE(names); } static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; enum rdma_link_layer ll; int port_type_cap; int err; int i; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce)) return NULL; dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); if (!dev) return NULL; dev->mdev = mdev; dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port), GFP_KERNEL); if (!dev->port) goto err_dealloc; rwlock_init(&dev->roce.netdev_lock); err = get_port_caps(dev); if (err) goto err_free_port; if (mlx5_use_mad_ifc(dev)) get_ext_port_caps(dev); MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock); snprintf(dev->ib_dev.name, IB_DEVICE_NAME_MAX, "mlx5_%d", device_get_unit(mdev->pdev->dev.bsddev)); dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; dev->num_ports = MLX5_CAP_GEN(mdev, num_ports); dev->ib_dev.phys_port_cnt = dev->num_ports; dev->ib_dev.num_comp_vectors = dev->mdev->priv.eq_table.num_comp_vectors; dev->ib_dev.dma_device = &mdev->pdev->dev; dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION; dev->ib_dev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | (1ull << IB_USER_VERBS_CMD_CREATE_AH) | (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | (1ull << IB_USER_VERBS_CMD_REG_MR) | (1ull << IB_USER_VERBS_CMD_REREG_MR) | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | (1ull << IB_USER_VERBS_CMD_CREATE_QP) | (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | (1ull << IB_USER_VERBS_CMD_QUERY_QP) | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | (1ull << IB_USER_VERBS_CMD_OPEN_QP); dev->ib_dev.uverbs_ex_cmd_mask = (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); dev->ib_dev.query_device = mlx5_ib_query_device; dev->ib_dev.query_port = mlx5_ib_query_port; dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer; if (ll == IB_LINK_LAYER_ETHERNET) dev->ib_dev.get_netdev = mlx5_ib_get_netdev; dev->ib_dev.query_gid = mlx5_ib_query_gid; dev->ib_dev.add_gid = mlx5_ib_add_gid; dev->ib_dev.del_gid = mlx5_ib_del_gid; dev->ib_dev.query_pkey = mlx5_ib_query_pkey; dev->ib_dev.modify_device = mlx5_ib_modify_device; dev->ib_dev.modify_port = mlx5_ib_modify_port; dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext; dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext; dev->ib_dev.mmap = mlx5_ib_mmap; dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd; dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd; dev->ib_dev.create_ah = mlx5_ib_create_ah; dev->ib_dev.query_ah = mlx5_ib_query_ah; dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah; dev->ib_dev.create_srq = mlx5_ib_create_srq; dev->ib_dev.modify_srq = mlx5_ib_modify_srq; dev->ib_dev.query_srq = mlx5_ib_query_srq; dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq; dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv; dev->ib_dev.create_qp = mlx5_ib_create_qp; dev->ib_dev.modify_qp = mlx5_ib_modify_qp; dev->ib_dev.query_qp = mlx5_ib_query_qp; dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; dev->ib_dev.post_send = mlx5_ib_post_send; dev->ib_dev.post_recv = mlx5_ib_post_recv; dev->ib_dev.create_cq = mlx5_ib_create_cq; dev->ib_dev.modify_cq = mlx5_ib_modify_cq; dev->ib_dev.resize_cq = mlx5_ib_resize_cq; dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq; dev->ib_dev.poll_cq = mlx5_ib_poll_cq; dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr; dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; dev->ib_dev.process_mad = mlx5_ib_process_mad; dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr; dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg; dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; dev->ib_dev.get_port_immutable = mlx5_port_immutable; dev->ib_dev.get_dev_fw_str = get_dev_fw_str; if (mlx5_core_is_pf(mdev)) { dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config; dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state; dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats; dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid; } dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext; mlx5_ib_internal_fill_odp_caps(dev); if (MLX5_CAP_GEN(mdev, imaicl)) { dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw; dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw; dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); } if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) && MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats; dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats; } if (MLX5_CAP_GEN(mdev, xrc)) { dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); } if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) { dev->ib_dev.create_flow = mlx5_ib_create_flow; dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; dev->ib_dev.create_wq = mlx5_ib_create_wq; dev->ib_dev.modify_wq = mlx5_ib_modify_wq; dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq; dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table; dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table; dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); } err = init_node_data(dev); if (err) goto err_free_port; mutex_init(&dev->flow_db.lock); mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); if (ll == IB_LINK_LAYER_ETHERNET) { err = mlx5_enable_roce(dev); if (err) goto err_free_port; } err = create_dev_resources(&dev->devr); if (err) goto err_disable_roce; err = mlx5_ib_odp_init_one(dev); if (err) goto err_rsrc; err = mlx5_ib_alloc_q_counters(dev); if (err) goto err_odp; err = ib_register_device(&dev->ib_dev, NULL); if (err) goto err_q_cnt; err = create_umr_res(dev); if (err) goto err_dev; for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { err = device_create_file(&dev->ib_dev.dev, mlx5_class_attributes[i]); if (err) goto err_umrc; } err = mlx5_ib_init_congestion(dev); if (err) goto err_umrc; dev->ib_active = true; return dev; err_umrc: destroy_umrc_res(dev); err_dev: ib_unregister_device(&dev->ib_dev); err_q_cnt: mlx5_ib_dealloc_q_counters(dev); err_odp: mlx5_ib_odp_remove_one(dev); err_rsrc: destroy_dev_resources(&dev->devr); err_disable_roce: if (ll == IB_LINK_LAYER_ETHERNET) { mlx5_disable_roce(dev); mlx5_remove_roce_notifier(dev); } err_free_port: kfree(dev->port); err_dealloc: ib_dealloc_device((struct ib_device *)dev); return NULL; } static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) { struct mlx5_ib_dev *dev = context; enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); mlx5_ib_cleanup_congestion(dev); mlx5_remove_roce_notifier(dev); ib_unregister_device(&dev->ib_dev); mlx5_ib_dealloc_q_counters(dev); destroy_umrc_res(dev); mlx5_ib_odp_remove_one(dev); destroy_dev_resources(&dev->devr); if (ll == IB_LINK_LAYER_ETHERNET) mlx5_disable_roce(dev); kfree(dev->port); ib_dealloc_device(&dev->ib_dev); } static struct mlx5_interface mlx5_ib_interface = { .add = mlx5_ib_add, .remove = mlx5_ib_remove, .event = mlx5_ib_event, .protocol = MLX5_INTERFACE_PROTOCOL_IB, }; static int __init mlx5_ib_init(void) { int err; err = mlx5_ib_odp_init(); if (err) return err; err = mlx5_register_interface(&mlx5_ib_interface); if (err) goto clean_odp; return err; clean_odp: mlx5_ib_odp_cleanup(); return err; } static void __exit mlx5_ib_cleanup(void) { mlx5_unregister_interface(&mlx5_ib_interface); mlx5_ib_odp_cleanup(); } static void mlx5_ib_show_version(void __unused *arg) { printf("%s", mlx5_version); } SYSINIT(mlx5_ib_show_version, SI_SUB_DRIVERS, SI_ORDER_ANY, mlx5_ib_show_version, NULL); module_init_order(mlx5_ib_init, SI_ORDER_SEVENTH); module_exit_order(mlx5_ib_cleanup, SI_ORDER_SEVENTH); diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c index 68a1c8cc25df..94561d32acc1 100644 --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c @@ -1,4951 +1,4924 @@ /*- - * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2013-2020, Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include "mlx5_ib.h" /* not supported currently */ static int wq_signature; enum { MLX5_IB_ACK_REQ_FREQ = 8, }; enum { MLX5_IB_DEFAULT_SCHED_QUEUE = 0x83, MLX5_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f, MLX5_IB_LINK_TYPE_IB = 0, MLX5_IB_LINK_TYPE_ETH = 1 }; enum { MLX5_IB_SQ_STRIDE = 6, }; static const u32 mlx5_ib_opcode[] = { [IB_WR_SEND] = MLX5_OPCODE_SEND, [IB_WR_LSO] = MLX5_OPCODE_LSO, [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM, [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE, [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM, [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ, [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS, [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA, [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL, [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR, [IB_WR_REG_MR] = MLX5_OPCODE_UMR, [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS, [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA, [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, }; struct mlx5_wqe_eth_pad { u8 rsvd0[16]; }; enum raw_qp_set_mask_map { MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID = 1UL << 0, }; struct mlx5_modify_raw_qp_param { u16 operation; u32 set_mask; /* raw_qp_set_mask_map */ u8 rq_q_ctr_id; }; static void get_cqs(enum ib_qp_type qp_type, struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq, struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq); static int is_qp0(enum ib_qp_type qp_type) { return qp_type == IB_QPT_SMI; } static int is_sqp(enum ib_qp_type qp_type) { return is_qp0(qp_type) || is_qp1(qp_type); } static void *get_wqe(struct mlx5_ib_qp *qp, int offset) { return mlx5_buf_offset(&qp->buf, offset); } static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n) { return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); } void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n) { return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE)); } /** * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space. * * @qp: QP to copy from. * @send: copy from the send queue when non-zero, use the receive queue * otherwise. * @wqe_index: index to start copying from. For send work queues, the * wqe_index is in units of MLX5_SEND_WQE_BB. * For receive work queue, it is the number of work queue * element in the queue. * @buffer: destination buffer. * @length: maximum number of bytes to copy. * * Copies at least a single WQE, but may copy more data. * * Return: the number of bytes copied, or an error code. */ int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, void *buffer, u32 length, struct mlx5_ib_qp_base *base) { struct ib_device *ibdev = qp->ibqp.device; struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq; size_t offset; size_t wq_end; struct ib_umem *umem = base->ubuffer.umem; u32 first_copy_length; int wqe_length; int ret; if (wq->wqe_cnt == 0) { mlx5_ib_dbg(dev, "mlx5_ib_read_user_wqe for a QP with wqe_cnt == 0. qp_type: 0x%x\n", qp->ibqp.qp_type); return -EINVAL; } offset = wq->offset + ((wqe_index % wq->wqe_cnt) << wq->wqe_shift); wq_end = wq->offset + (wq->wqe_cnt << wq->wqe_shift); if (send && length < sizeof(struct mlx5_wqe_ctrl_seg)) return -EINVAL; if (offset > umem->length || (send && offset + sizeof(struct mlx5_wqe_ctrl_seg) > umem->length)) return -EINVAL; first_copy_length = min_t(u32, offset + length, wq_end) - offset; ret = ib_umem_copy_from(buffer, umem, offset, first_copy_length); if (ret) return ret; if (send) { struct mlx5_wqe_ctrl_seg *ctrl = buffer; int ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK; wqe_length = ds * MLX5_WQE_DS_UNITS; } else { wqe_length = 1 << wq->wqe_shift; } if (wqe_length <= first_copy_length) return first_copy_length; ret = ib_umem_copy_from(buffer + first_copy_length, umem, wq->offset, wqe_length - first_copy_length); if (ret) return ret; return wqe_length; } static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) { struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; struct ib_event event; if (type == MLX5_EVENT_TYPE_PATH_MIG) { /* This event is only valid for trans_qps */ to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port; } if (ibqp->event_handler) { event.device = ibqp->device; event.element.qp = ibqp; switch (type) { case MLX5_EVENT_TYPE_PATH_MIG: event.event = IB_EVENT_PATH_MIG; break; case MLX5_EVENT_TYPE_COMM_EST: event.event = IB_EVENT_COMM_EST; break; case MLX5_EVENT_TYPE_SQ_DRAINED: event.event = IB_EVENT_SQ_DRAINED; break; case MLX5_EVENT_TYPE_SRQ_LAST_WQE: event.event = IB_EVENT_QP_LAST_WQE_REACHED; break; case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: event.event = IB_EVENT_QP_FATAL; break; case MLX5_EVENT_TYPE_PATH_MIG_FAILED: event.event = IB_EVENT_PATH_MIG_ERR; break; case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: event.event = IB_EVENT_QP_REQ_ERR; break; case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: event.event = IB_EVENT_QP_ACCESS_ERR; break; default: pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn); return; } ibqp->event_handler(&event, ibqp->qp_context); } } static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd) { int wqe_size; int wq_size; /* Sanity check RQ size before proceeding */ if (cap->max_recv_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) return -EINVAL; if (!has_rq) { qp->rq.max_gs = 0; qp->rq.wqe_cnt = 0; qp->rq.wqe_shift = 0; cap->max_recv_wr = 0; cap->max_recv_sge = 0; } else { if (ucmd) { qp->rq.wqe_cnt = ucmd->rq_wqe_count; qp->rq.wqe_shift = ucmd->rq_wqe_shift; qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig; qp->rq.max_post = qp->rq.wqe_cnt; } else { wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0; wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg); wqe_size = roundup_pow_of_two(wqe_size); wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size; wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB); qp->rq.wqe_cnt = wq_size / wqe_size; if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq)) { mlx5_ib_dbg(dev, "wqe_size %d, max %d\n", wqe_size, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq)); return -EINVAL; } qp->rq.wqe_shift = ilog2(wqe_size); qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig; qp->rq.max_post = qp->rq.wqe_cnt; } } return 0; } static int sq_overhead(struct ib_qp_init_attr *attr) { int size = 0; switch (attr->qp_type) { case IB_QPT_XRC_INI: size += sizeof(struct mlx5_wqe_xrc_seg); /* fall through */ case IB_QPT_RC: size += sizeof(struct mlx5_wqe_ctrl_seg) + max(sizeof(struct mlx5_wqe_atomic_seg) + sizeof(struct mlx5_wqe_raddr_seg), sizeof(struct mlx5_wqe_umr_ctrl_seg) + sizeof(struct mlx5_mkey_seg)); break; case IB_QPT_XRC_TGT: return 0; case IB_QPT_UC: size += sizeof(struct mlx5_wqe_ctrl_seg) + max(sizeof(struct mlx5_wqe_raddr_seg), sizeof(struct mlx5_wqe_umr_ctrl_seg) + sizeof(struct mlx5_mkey_seg)); break; case IB_QPT_UD: if (attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) size += sizeof(struct mlx5_wqe_eth_pad) + sizeof(struct mlx5_wqe_eth_seg); /* fall through */ case IB_QPT_SMI: case MLX5_IB_QPT_HW_GSI: size += sizeof(struct mlx5_wqe_ctrl_seg) + sizeof(struct mlx5_wqe_datagram_seg); break; case MLX5_IB_QPT_REG_UMR: size += sizeof(struct mlx5_wqe_ctrl_seg) + sizeof(struct mlx5_wqe_umr_ctrl_seg) + sizeof(struct mlx5_mkey_seg); break; default: return -EINVAL; } return size; } static int calc_send_wqe(struct ib_qp_init_attr *attr) { int inl_size = 0; int size; size = sq_overhead(attr); if (size < 0) return size; if (attr->cap.max_inline_data) { inl_size = size + sizeof(struct mlx5_wqe_inline_seg) + attr->cap.max_inline_data; } size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg); if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN && ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE) return MLX5_SIG_WQE_SIZE; else return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); } static int get_send_sge(struct ib_qp_init_attr *attr, int wqe_size) { int max_sge; if (attr->qp_type == IB_QPT_RC) max_sge = (min_t(int, wqe_size, 512) - sizeof(struct mlx5_wqe_ctrl_seg) - sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg); else if (attr->qp_type == IB_QPT_XRC_INI) max_sge = (min_t(int, wqe_size, 512) - sizeof(struct mlx5_wqe_ctrl_seg) - sizeof(struct mlx5_wqe_xrc_seg) - sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg); else max_sge = (wqe_size - sq_overhead(attr)) / sizeof(struct mlx5_wqe_data_seg); return min_t(int, max_sge, wqe_size - sq_overhead(attr) / sizeof(struct mlx5_wqe_data_seg)); } static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, struct mlx5_ib_qp *qp) { int wqe_size; int wq_size; if (!attr->cap.max_send_wr) return 0; wqe_size = calc_send_wqe(attr); mlx5_ib_dbg(dev, "wqe_size %d\n", wqe_size); if (wqe_size < 0) return wqe_size; if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) { mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n", wqe_size, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)); return -EINVAL; } qp->max_inline_data = wqe_size - sq_overhead(attr) - sizeof(struct mlx5_wqe_inline_seg); attr->cap.max_inline_data = qp->max_inline_data; if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) qp->signature_en = true; wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) { mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n", qp->sq.wqe_cnt, 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz)); return -ENOMEM; } qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); qp->sq.max_gs = get_send_sge(attr, wqe_size); if (qp->sq.max_gs < attr->cap.max_send_sge) return -ENOMEM; attr->cap.max_send_sge = qp->sq.max_gs; qp->sq.max_post = wq_size / wqe_size; attr->cap.max_send_wr = qp->sq.max_post; return wq_size; } static int set_user_buf_size(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd, struct mlx5_ib_qp_base *base, struct ib_qp_init_attr *attr) { int desc_sz = 1 << qp->sq.wqe_shift; if (desc_sz > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) { mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n", desc_sz, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)); return -EINVAL; } if (ucmd->sq_wqe_count && ((1 << ilog2(ucmd->sq_wqe_count)) != ucmd->sq_wqe_count)) { mlx5_ib_warn(dev, "sq_wqe_count %d, sq_wqe_count %d\n", ucmd->sq_wqe_count, ucmd->sq_wqe_count); return -EINVAL; } qp->sq.wqe_cnt = ucmd->sq_wqe_count; if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) { mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n", qp->sq.wqe_cnt, 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz)); return -EINVAL; } if (attr->qp_type == IB_QPT_RAW_PACKET) { base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift; qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6; } else { base->ubuffer.buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + (qp->sq.wqe_cnt << 6); } return 0; } static int qp_has_rq(struct ib_qp_init_attr *attr) { if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT || attr->srq || attr->qp_type == MLX5_IB_QPT_REG_UMR || !attr->cap.max_recv_wr) return 0; return 1; } -static int first_med_uuar(void) -{ - return 1; -} +enum { + /* this is the first blue flame register in the array of bfregs assigned + * to a processes. Since we do not use it for blue flame but rather + * regular 64 bit doorbells, we do not need a lock for maintaiing + * "odd/even" order + */ + NUM_NON_BLUE_FLAME_BFREGS = 1, +}; -static int next_uuar(int n) +static int max_bfregs(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi) { - n++; - - while (((n % 4) & 2)) - n++; - - return n; + return get_num_static_uars(dev, bfregi) * MLX5_NON_FP_BFREGS_PER_UAR; } -static int num_med_uuar(struct mlx5_uuar_info *uuari) +static int num_med_bfreg(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi) { int n; - n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE - - uuari->num_low_latency_uuars - 1; + n = max_bfregs(dev, bfregi) - bfregi->num_low_latency_bfregs - + NUM_NON_BLUE_FLAME_BFREGS; return n >= 0 ? n : 0; } -static int max_uuari(struct mlx5_uuar_info *uuari) +static int first_med_bfreg(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi) { - return uuari->num_uars * 4; + return num_med_bfreg(dev, bfregi) ? 1 : -ENOMEM; } -static int first_hi_uuar(struct mlx5_uuar_info *uuari) +static int first_hi_bfreg(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi) { int med; - int i; - int t; - - med = num_med_uuar(uuari); - for (t = 0, i = first_med_uuar();; i = next_uuar(i)) { - t++; - if (t == med) - return next_uuar(i); - } - return 0; + med = num_med_bfreg(dev, bfregi); + return ++med; } -static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari) +static int alloc_high_class_bfreg(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi) { int i; - for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) { - if (!test_bit(i, uuari->bitmap)) { - set_bit(i, uuari->bitmap); - uuari->count[i]++; + for (i = first_hi_bfreg(dev, bfregi); i < max_bfregs(dev, bfregi); i++) { + if (!bfregi->count[i]) { + bfregi->count[i]++; return i; } } return -ENOMEM; } -static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari) +static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi) { - int minidx = first_med_uuar(); + int minidx = first_med_bfreg(dev, bfregi); int i; - for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) { - if (uuari->count[i] < uuari->count[minidx]) + if (minidx < 0) + return minidx; + + for (i = minidx; i < first_hi_bfreg(dev, bfregi); i++) { + if (bfregi->count[i] < bfregi->count[minidx]) minidx = i; + if (!bfregi->count[minidx]) + break; } - uuari->count[minidx]++; + bfregi->count[minidx]++; return minidx; } -static int alloc_uuar(struct mlx5_uuar_info *uuari, - enum mlx5_ib_latency_class lat) +static int alloc_bfreg(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi) { - int uuarn = -EINVAL; + int bfregn = -ENOMEM; - mutex_lock(&uuari->lock); - switch (lat) { - case MLX5_IB_LATENCY_CLASS_LOW: - uuarn = 0; - uuari->count[uuarn]++; - break; - - case MLX5_IB_LATENCY_CLASS_MEDIUM: - if (uuari->ver < 2) - uuarn = -ENOMEM; - else - uuarn = alloc_med_class_uuar(uuari); - break; - - case MLX5_IB_LATENCY_CLASS_HIGH: - if (uuari->ver < 2) - uuarn = -ENOMEM; - else - uuarn = alloc_high_class_uuar(uuari); - break; + if (bfregi->lib_uar_dyn) + return -EINVAL; - case MLX5_IB_LATENCY_CLASS_FAST_PATH: - uuarn = 2; - break; + mutex_lock(&bfregi->lock); + if (bfregi->ver >= 2) { + bfregn = alloc_high_class_bfreg(dev, bfregi); + if (bfregn < 0) + bfregn = alloc_med_class_bfreg(dev, bfregi); } - mutex_unlock(&uuari->lock); - return uuarn; -} + if (bfregn < 0) { + BUILD_BUG_ON(NUM_NON_BLUE_FLAME_BFREGS != 1); + bfregn = 0; + bfregi->count[bfregn]++; + } + mutex_unlock(&bfregi->lock); -static void free_med_class_uuar(struct mlx5_uuar_info *uuari, int uuarn) -{ - clear_bit(uuarn, uuari->bitmap); - --uuari->count[uuarn]; + return bfregn; } -static void free_high_class_uuar(struct mlx5_uuar_info *uuari, int uuarn) +void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, int bfregn) { - clear_bit(uuarn, uuari->bitmap); - --uuari->count[uuarn]; -} - -static void free_uuar(struct mlx5_uuar_info *uuari, int uuarn) -{ - int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE; - int high_uuar = nuuars - uuari->num_low_latency_uuars; - - mutex_lock(&uuari->lock); - if (uuarn == 0) { - --uuari->count[uuarn]; - goto out; - } - - if (uuarn < high_uuar) { - free_med_class_uuar(uuari, uuarn); - goto out; - } - - free_high_class_uuar(uuari, uuarn); - -out: - mutex_unlock(&uuari->lock); + mutex_lock(&bfregi->lock); + bfregi->count[bfregn]--; + mutex_unlock(&bfregi->lock); } static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state) { switch (state) { case IB_QPS_RESET: return MLX5_QP_STATE_RST; case IB_QPS_INIT: return MLX5_QP_STATE_INIT; case IB_QPS_RTR: return MLX5_QP_STATE_RTR; case IB_QPS_RTS: return MLX5_QP_STATE_RTS; case IB_QPS_SQD: return MLX5_QP_STATE_SQD; case IB_QPS_SQE: return MLX5_QP_STATE_SQER; case IB_QPS_ERR: return MLX5_QP_STATE_ERR; default: return -1; } } static int to_mlx5_st(enum ib_qp_type type) { switch (type) { case IB_QPT_RC: return MLX5_QP_ST_RC; case IB_QPT_UC: return MLX5_QP_ST_UC; case IB_QPT_UD: return MLX5_QP_ST_UD; case MLX5_IB_QPT_REG_UMR: return MLX5_QP_ST_REG_UMR; case IB_QPT_XRC_INI: case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC; case IB_QPT_SMI: return MLX5_QP_ST_QP0; case MLX5_IB_QPT_HW_GSI: return MLX5_QP_ST_QP1; case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6; case IB_QPT_RAW_PACKET: case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE; case IB_QPT_MAX: default: return -EINVAL; } } static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq); static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq); -static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn) +int bfregn_to_uar_index(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi, u32 bfregn, + bool dyn_bfreg) { - return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index; + unsigned int bfregs_per_sys_page; + u32 index_of_sys_page; + u32 offset; + + if (bfregi->lib_uar_dyn) + return -EINVAL; + + bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * + MLX5_NON_FP_BFREGS_PER_UAR; + index_of_sys_page = bfregn / bfregs_per_sys_page; + + if (dyn_bfreg) { + index_of_sys_page += bfregi->num_static_sys_pages; + + if (index_of_sys_page >= bfregi->num_sys_pages) + return -EINVAL; + + if (bfregn > bfregi->num_dyn_bfregs || + bfregi->sys_pages[index_of_sys_page] == MLX5_IB_INVALID_UAR_INDEX) { + mlx5_ib_dbg(dev, "Invalid dynamic uar index\n"); + return -EINVAL; + } + } + + offset = bfregn % bfregs_per_sys_page / MLX5_NON_FP_BFREGS_PER_UAR; + return bfregi->sys_pages[index_of_sys_page] + offset; } static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_pd *pd, unsigned long addr, size_t size, struct ib_umem **umem, int *npages, int *page_shift, int *ncont, u32 *offset) { int err; *umem = ib_umem_get(pd->uobject->context, addr, size, 0, 0); if (IS_ERR(*umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); return PTR_ERR(*umem); } mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL); err = mlx5_ib_get_buf_offset(addr, *page_shift, offset); if (err) { mlx5_ib_warn(dev, "bad offset\n"); goto err_umem; } mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n", addr, size, *npages, *page_shift, *ncont, *offset); return 0; err_umem: ib_umem_release(*umem); *umem = NULL; return err; } static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq) { struct mlx5_ib_ucontext *context; context = to_mucontext(pd->uobject->context); mlx5_ib_db_unmap_user(context, &rwq->db); if (rwq->umem) ib_umem_release(rwq->umem); } static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_rwq *rwq, struct mlx5_ib_create_wq *ucmd) { struct mlx5_ib_ucontext *context; int page_shift = 0; int npages; u32 offset = 0; int ncont = 0; int err; if (!ucmd->buf_addr) return -EINVAL; context = to_mucontext(pd->uobject->context); rwq->umem = ib_umem_get(pd->uobject->context, ucmd->buf_addr, rwq->buf_size, 0, 0); if (IS_ERR(rwq->umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); err = PTR_ERR(rwq->umem); return err; } mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, &npages, &page_shift, &ncont, NULL); err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, &rwq->rq_page_offset); if (err) { mlx5_ib_warn(dev, "bad offset\n"); goto err_umem; } rwq->rq_num_pas = ncont; rwq->page_shift = page_shift; rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE); mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n", (unsigned long long)ucmd->buf_addr, rwq->buf_size, npages, page_shift, ncont, offset); err = mlx5_ib_db_map_user(context, ucmd->db_addr, &rwq->db); if (err) { mlx5_ib_dbg(dev, "map failed\n"); goto err_umem; } rwq->create_type = MLX5_WQ_USER; return 0; err_umem: ib_umem_release(rwq->umem); return err; } +static int adjust_bfregn(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi, int bfregn) +{ + return bfregn / MLX5_NON_FP_BFREGS_PER_UAR * MLX5_BFREGS_PER_UAR + + bfregn % MLX5_NON_FP_BFREGS_PER_UAR; +} + static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_qp *qp, struct ib_udata *udata, struct ib_qp_init_attr *attr, u32 **in, struct mlx5_ib_create_qp_resp *resp, int *inlen, struct mlx5_ib_qp_base *base) { struct mlx5_ib_ucontext *context; struct mlx5_ib_create_qp ucmd; struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer; int page_shift = 0; - int uar_index; + int uar_index = 0; int npages; u32 offset = 0; - int uuarn; + int bfregn; int ncont = 0; __be64 *pas; void *qpc; int err; + u32 uar_flags; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (err) { mlx5_ib_dbg(dev, "copy failed\n"); return err; } context = to_mucontext(pd->uobject->context); - /* - * TBD: should come from the verbs when we have the API - */ - if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) - /* In CROSS_CHANNEL CQ and QP must use the same UAR */ - uuarn = MLX5_CROSS_CHANNEL_UUAR; - else { - uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH); - if (uuarn < 0) { - mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n"); - mlx5_ib_dbg(dev, "reverting to medium latency\n"); - uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM); - if (uuarn < 0) { - mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n"); - mlx5_ib_dbg(dev, "reverting to high latency\n"); - uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW); - if (uuarn < 0) { - mlx5_ib_warn(dev, "uuar allocation failed\n"); - return uuarn; - } - } - } + uar_flags = ucmd.flags & (MLX5_QP_FLAG_UAR_PAGE_INDEX | + MLX5_QP_FLAG_BFREG_INDEX); + switch (uar_flags) { + case MLX5_QP_FLAG_UAR_PAGE_INDEX: + uar_index = ucmd.bfreg_index; + bfregn = MLX5_IB_INVALID_BFREG; + break; + case MLX5_QP_FLAG_BFREG_INDEX: + uar_index = bfregn_to_uar_index(dev, &context->bfregi, + ucmd.bfreg_index, true); + if (uar_index < 0) + return uar_index; + bfregn = MLX5_IB_INVALID_BFREG; + break; + case 0: + if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) + return -EINVAL; + bfregn = alloc_bfreg(dev, &context->bfregi); + if (bfregn < 0) + return bfregn; + break; + default: + return -EINVAL; } - uar_index = uuarn_to_uar_index(&context->uuari, uuarn); - mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index); + mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index); + if (bfregn != MLX5_IB_INVALID_BFREG) + uar_index = bfregn_to_uar_index(dev, &context->bfregi, bfregn, + false); qp->rq.offset = 0; qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; err = set_user_buf_size(dev, qp, &ucmd, base, attr); if (err) - goto err_uuar; + goto err_bfreg; if (ucmd.buf_addr && ubuffer->buf_size) { ubuffer->buf_addr = ucmd.buf_addr; err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size, &ubuffer->umem, &npages, &page_shift, &ncont, &offset); if (err) - goto err_uuar; + goto err_bfreg; } else { ubuffer->umem = NULL; } *inlen = MLX5_ST_SZ_BYTES(create_qp_in) + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * ncont; *in = mlx5_vzalloc(*inlen); if (!*in) { err = -ENOMEM; goto err_umem; } pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); if (ubuffer->umem) mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET(qpc, qpc, page_offset, offset); MLX5_SET(qpc, qpc, uar_page, uar_index); - resp->uuar_index = uuarn; - qp->uuarn = uuarn; + if (bfregn != MLX5_IB_INVALID_BFREG) + resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn); + else + resp->bfreg_index = MLX5_IB_INVALID_BFREG; + qp->bfregn = bfregn; err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db); if (err) { mlx5_ib_dbg(dev, "map failed\n"); goto err_free; } err = ib_copy_to_udata(udata, resp, sizeof(*resp)); if (err) { mlx5_ib_dbg(dev, "copy failed\n"); goto err_unmap; } qp->create_type = MLX5_QP_USER; return 0; err_unmap: mlx5_ib_db_unmap_user(context, &qp->db); err_free: kvfree(*in); err_umem: if (ubuffer->umem) ib_umem_release(ubuffer->umem); -err_uuar: - free_uuar(&context->uuari, uuarn); +err_bfreg: + if (bfregn != MLX5_IB_INVALID_BFREG) + mlx5_ib_free_bfreg(dev, &context->bfregi, bfregn); return err; } -static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp, +static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_qp *qp, struct mlx5_ib_qp_base *base) { struct mlx5_ib_ucontext *context; context = to_mucontext(pd->uobject->context); mlx5_ib_db_unmap_user(context, &qp->db); if (base->ubuffer.umem) ib_umem_release(base->ubuffer.umem); - free_uuar(&context->uuari, qp->uuarn); + + /* + * Free only the BFREGs which are handled by the kernel. + * BFREGs of UARs allocated dynamically are handled by user. + */ + if (qp->bfregn != MLX5_IB_INVALID_BFREG) + mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn); } static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *init_attr, struct mlx5_ib_qp *qp, u32 **in, int *inlen, struct mlx5_ib_qp_base *base) { - enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW; - struct mlx5_uuar_info *uuari; int uar_index; void *qpc; - int uuarn; int err; - uuari = &dev->mdev->priv.uuari; if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | IB_QP_CREATE_IPOIB_UD_LSO | - mlx5_ib_create_qp_sqpn_qp1())) + MLX5_IB_QP_CREATE_SQPN_QP1 | + MLX5_IB_QP_CREATE_WC_TEST)) return -EINVAL; - if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) - lc = MLX5_IB_LATENCY_CLASS_FAST_PATH; + spin_lock_init(&qp->bf.lock32); - uuarn = alloc_uuar(uuari, lc); - if (uuarn < 0) { - mlx5_ib_dbg(dev, "\n"); - return -ENOMEM; - } + if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) + qp->bf.bfreg = &dev->fp_bfreg; + else if (init_attr->create_flags & MLX5_IB_QP_CREATE_WC_TEST) + qp->bf.bfreg = &dev->wc_bfreg; + else + qp->bf.bfreg = &dev->bfreg; - qp->bf = &uuari->bfs[uuarn]; - uar_index = qp->bf->uar->index; + /* We need to divide by two since each register is comprised of + * two buffers of identical size, namely odd and even + */ + qp->bf.buf_size = (1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size)) / 2; + uar_index = qp->bf.bfreg->index; err = calc_sq_size(dev, init_attr, qp); if (err < 0) { mlx5_ib_dbg(dev, "err %d\n", err); - goto err_uuar; + return err; } qp->rq.offset = 0; qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift); err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, 2 * PAGE_SIZE, &qp->buf); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); - goto err_uuar; + return err; } qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt); *inlen = MLX5_ST_SZ_BYTES(create_qp_in) + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages; *in = mlx5_vzalloc(*inlen); if (!*in) { err = -ENOMEM; goto err_buf; } qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); MLX5_SET(qpc, qpc, uar_page, uar_index); MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); /* Set "fast registration enabled" for all kernel QPs */ MLX5_SET(qpc, qpc, fre, 1); MLX5_SET(qpc, qpc, rlky, 1); - if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) { + if (init_attr->create_flags & MLX5_IB_QP_CREATE_SQPN_QP1) { MLX5_SET(qpc, qpc, deth_sqpn, 1); qp->flags |= MLX5_IB_QP_SQPN_QP1; } mlx5_fill_page_array(&qp->buf, (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas)); err = mlx5_db_alloc(dev->mdev, &qp->db); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); goto err_free; } qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid), GFP_KERNEL); qp->sq.wr_data = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data), GFP_KERNEL); qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(*qp->rq.wrid), GFP_KERNEL); qp->sq.w_list = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.w_list), GFP_KERNEL); qp->sq.wqe_head = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head), GFP_KERNEL); if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid || !qp->sq.w_list || !qp->sq.wqe_head) { err = -ENOMEM; goto err_wrid; } qp->create_type = MLX5_QP_KERNEL; return 0; err_wrid: kfree(qp->sq.wqe_head); kfree(qp->sq.w_list); kfree(qp->sq.wrid); kfree(qp->sq.wr_data); kfree(qp->rq.wrid); mlx5_db_free(dev->mdev, &qp->db); err_free: kvfree(*in); err_buf: mlx5_buf_free(dev->mdev, &qp->buf); - -err_uuar: - free_uuar(&dev->mdev->priv.uuari, uuarn); return err; } static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { kfree(qp->sq.wqe_head); kfree(qp->sq.w_list); kfree(qp->sq.wrid); kfree(qp->sq.wr_data); kfree(qp->rq.wrid); mlx5_db_free(dev->mdev, &qp->db); mlx5_buf_free(dev->mdev, &qp->buf); - free_uuar(&dev->mdev->priv.uuari, qp->bf->uuarn); } static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) { if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) || (attr->qp_type == IB_QPT_XRC_INI)) return MLX5_SRQ_RQ; else if (!qp->has_rq) return MLX5_ZERO_LEN_RQ; else return MLX5_NON_ZERO_RQ; } static int is_connected(enum ib_qp_type qp_type) { if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC) return 1; return 0; } static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq, u32 tdn) { u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); MLX5_SET(tisc, tisc, transport_domain, tdn); return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn); } static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq) { mlx5_core_destroy_tis(dev->mdev, sq->tisn); } static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq, void *qpin, struct ib_pd *pd) { struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer; __be64 *pas; void *in; void *sqc; void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc); void *wq; int inlen; int err; int page_shift = 0; int npages; int ncont = 0; u32 offset = 0; err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size, &sq->ubuffer.umem, &npages, &page_shift, &ncont, &offset); if (err) return err; inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont; in = mlx5_vzalloc(inlen); if (!in) { err = -ENOMEM; goto err_umem; } sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd)); MLX5_SET(sqc, sqc, tis_lst_sz, 1); MLX5_SET(sqc, sqc, tis_num_0, sq->tisn); wq = MLX5_ADDR_OF(sqc, sqc, wq); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd)); MLX5_SET(wq, wq, uar_page, MLX5_GET(qpc, qpc, uar_page)); MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size)); MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET(wq, wq, page_offset, offset); pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0); err = mlx5_core_create_sq_tracked(dev->mdev, in, inlen, &sq->base.mqp); kvfree(in); if (err) goto err_umem; return 0; err_umem: ib_umem_release(sq->ubuffer.umem); sq->ubuffer.umem = NULL; return err; } static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq) { mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp); ib_umem_release(sq->ubuffer.umem); } static int get_rq_pas_size(void *qpc) { u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12; u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride); u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size); u32 page_offset = MLX5_GET(qpc, qpc, page_offset); u32 po_quanta = 1 << (log_page_size - 6); u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride); u32 page_size = 1 << log_page_size; u32 rq_sz_po = rq_sz + (page_offset * po_quanta); u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size; return rq_num_pas * sizeof(u64); } static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, void *qpin) { struct mlx5_ib_qp *mqp = rq->base.container_mibqp; __be64 *pas; __be64 *qp_pas; void *in; void *rqc; void *wq; void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc); int inlen; int err; u32 rq_pas_size = get_rq_pas_size(qpc); inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size; in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); MLX5_SET(rqc, rqc, vlan_strip_disable, 1); MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_RQ_TYPE_MEMORY_RQ_INLINE); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); MLX5_SET(rqc, rqc, flush_in_error_en, 1); MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv)); if (mqp->flags & MLX5_IB_QP_CAP_SCATTER_FCS) MLX5_SET(rqc, rqc, scatter_fcs, 1); wq = MLX5_ADDR_OF(rqc, rqc, wq); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); MLX5_SET(wq, wq, end_padding_mode, MLX5_GET(qpc, qpc, end_padding_mode)); MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset)); MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd)); MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4); MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size)); MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size)); pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas); memcpy(pas, qp_pas, rq_pas_size); err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rq->base.mqp); kvfree(in); return err; } static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq) { mlx5_core_destroy_rq_tracked(dev->mdev, &rq->base.mqp); } static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, u32 tdn) { u32 *in; void *tirc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(create_tir_in); in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn); MLX5_SET(tirc, tirc, transport_domain, tdn); err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn); kvfree(in); return err; } static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq) { mlx5_core_destroy_tir(dev->mdev, rq->tirn); } static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, u32 *in, struct ib_pd *pd) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_sq *sq = &raw_packet_qp->sq; struct mlx5_ib_rq *rq = &raw_packet_qp->rq; struct ib_uobject *uobj = pd->uobject; struct ib_ucontext *ucontext = uobj->context; struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext); int err; u32 tdn = mucontext->tdn; if (qp->sq.wqe_cnt) { err = create_raw_packet_qp_tis(dev, sq, tdn); if (err) return err; err = create_raw_packet_qp_sq(dev, sq, in, pd); if (err) goto err_destroy_tis; sq->base.container_mibqp = qp; } if (qp->rq.wqe_cnt) { rq->base.container_mibqp = qp; err = create_raw_packet_qp_rq(dev, rq, in); if (err) goto err_destroy_sq; err = create_raw_packet_qp_tir(dev, rq, tdn); if (err) goto err_destroy_rq; } qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn : rq->base.mqp.qpn; return 0; err_destroy_rq: destroy_raw_packet_qp_rq(dev, rq); err_destroy_sq: if (!qp->sq.wqe_cnt) return err; destroy_raw_packet_qp_sq(dev, sq); err_destroy_tis: destroy_raw_packet_qp_tis(dev, sq); return err; } static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_sq *sq = &raw_packet_qp->sq; struct mlx5_ib_rq *rq = &raw_packet_qp->rq; if (qp->rq.wqe_cnt) { destroy_raw_packet_qp_tir(dev, rq); destroy_raw_packet_qp_rq(dev, rq); } if (qp->sq.wqe_cnt) { destroy_raw_packet_qp_sq(dev, sq); destroy_raw_packet_qp_tis(dev, sq); } } static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp, struct mlx5_ib_raw_packet_qp *raw_packet_qp) { struct mlx5_ib_sq *sq = &raw_packet_qp->sq; struct mlx5_ib_rq *rq = &raw_packet_qp->rq; sq->sq = &qp->sq; rq->rq = &qp->rq; sq->doorbell = &qp->db; rq->doorbell = &qp->db; } static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn); } static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { struct ib_uobject *uobj = pd->uobject; struct ib_ucontext *ucontext = uobj->context; struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext); struct mlx5_ib_create_qp_resp resp = {}; int inlen; int err; u32 *in; void *tirc; void *hfso; u32 selected_fields = 0; size_t min_resp_len; u32 tdn = mucontext->tdn; struct mlx5_ib_create_qp_rss ucmd = {}; size_t required_cmd_sz; if (init_attr->qp_type != IB_QPT_RAW_PACKET) return -EOPNOTSUPP; if (init_attr->create_flags || init_attr->send_cq) return -EINVAL; - min_resp_len = offsetof(typeof(resp), uuar_index) + sizeof(resp.uuar_index); + min_resp_len = offsetof(typeof(resp), bfreg_index) + sizeof(resp.bfreg_index); if (udata->outlen < min_resp_len) return -EINVAL; required_cmd_sz = offsetof(typeof(ucmd), reserved1) + sizeof(ucmd.reserved1); if (udata->inlen < required_cmd_sz) { mlx5_ib_dbg(dev, "invalid inlen\n"); return -EINVAL; } if (udata->inlen > sizeof(ucmd) && !ib_is_udata_cleared(udata, sizeof(ucmd), udata->inlen - sizeof(ucmd))) { mlx5_ib_dbg(dev, "inlen is not supported\n"); return -EOPNOTSUPP; } if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) { mlx5_ib_dbg(dev, "copy failed\n"); return -EFAULT; } if (ucmd.comp_mask) { mlx5_ib_dbg(dev, "invalid comp mask\n"); return -EOPNOTSUPP; } if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved)) || ucmd.reserved1) { mlx5_ib_dbg(dev, "invalid reserved\n"); return -EOPNOTSUPP; } err = ib_copy_to_udata(udata, &resp, min_resp_len); if (err) { mlx5_ib_dbg(dev, "copy failed\n"); return -EINVAL; } inlen = MLX5_ST_SZ_BYTES(create_tir_in); in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, init_attr->rwq_ind_tbl->ind_tbl_num); MLX5_SET(tirc, tirc, transport_domain, tdn); hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); switch (ucmd.rx_hash_function) { case MLX5_RX_HASH_FUNC_TOEPLITZ: { void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key); if (len != ucmd.rx_key_len) { err = -EINVAL; goto err; } MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FUNC_TOEPLITZ); MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); memcpy(rss_key, ucmd.rx_hash_key, len); break; } default: err = -EOPNOTSUPP; goto err; } if (!ucmd.rx_hash_fields_mask) { /* special case when this TIR serves as steering entry without hashing */ if (!init_attr->rwq_ind_tbl->log_ind_tbl_size) goto create_tir; err = -EINVAL; goto err; } if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) && ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) || (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) { err = -EINVAL; goto err; } /* If none of IPV4 & IPV6 SRC/DST was set - this bit field is ignored */ if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) || (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6)) MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) && ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))) { err = -EINVAL; goto err; } /* If none of TCP & UDP SRC/DST was set - this bit field is ignored */ if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, MLX5_L4_PROT_TYPE_UDP); if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6)) selected_fields |= MLX5_HASH_FIELD_SEL_SRC_IP; if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) || (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6)) selected_fields |= MLX5_HASH_FIELD_SEL_DST_IP; if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP)) selected_fields |= MLX5_HASH_FIELD_SEL_L4_SPORT; if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) || (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT; MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields); create_tir: err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn); if (err) goto err; kvfree(in); /* qpn is reserved for that QP */ qp->trans_qp.base.mqp.qpn = 0; qp->flags |= MLX5_IB_QP_RSS; return 0; err: kvfree(in); return err; } static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, struct mlx5_ib_qp *qp) { struct mlx5_ib_resources *devr = &dev->devr; int inlen = MLX5_ST_SZ_BYTES(create_qp_in); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_create_qp_resp resp; struct mlx5_ib_cq *send_cq; struct mlx5_ib_cq *recv_cq; unsigned long flags; u32 uidx = MLX5_IB_DEFAULT_UIDX; struct mlx5_ib_create_qp ucmd; struct mlx5_ib_qp_base *base; void *qpc; u32 *in; int err; base = init_attr->qp_type == IB_QPT_RAW_PACKET ? &qp->raw_packet_qp.rq.base : &qp->trans_qp.base; if (init_attr->qp_type != IB_QPT_RAW_PACKET) mlx5_ib_odp_create_qp(qp); mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); if (init_attr->rwq_ind_tbl) { if (!udata) return -ENOSYS; err = create_rss_raw_qp_tir(dev, qp, pd, init_attr, udata); return err; } if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { if (!MLX5_CAP_GEN(mdev, block_lb_mc)) { mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n"); return -EINVAL; } else { qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK; } } if (init_attr->create_flags & (IB_QP_CREATE_CROSS_CHANNEL | IB_QP_CREATE_MANAGED_SEND | IB_QP_CREATE_MANAGED_RECV)) { if (!MLX5_CAP_GEN(mdev, cd)) { mlx5_ib_dbg(dev, "cross-channel isn't supported\n"); return -EINVAL; } if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL) qp->flags |= MLX5_IB_QP_CROSS_CHANNEL; if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND) qp->flags |= MLX5_IB_QP_MANAGED_SEND; if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV) qp->flags |= MLX5_IB_QP_MANAGED_RECV; } if (init_attr->qp_type == IB_QPT_UD && (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) if (!MLX5_CAP_GEN(mdev, ipoib_ipoib_offloads)) { mlx5_ib_dbg(dev, "ipoib UD lso qp isn't supported\n"); return -EOPNOTSUPP; } if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) { if (init_attr->qp_type != IB_QPT_RAW_PACKET) { mlx5_ib_dbg(dev, "Scatter FCS is supported only for Raw Packet QPs"); return -EOPNOTSUPP; } if (!MLX5_CAP_GEN(dev->mdev, eth_net_offloads) || !MLX5_CAP_ETH(dev->mdev, scatter_fcs)) { mlx5_ib_dbg(dev, "Scatter FCS isn't supported\n"); return -EOPNOTSUPP; } qp->flags |= MLX5_IB_QP_CAP_SCATTER_FCS; } if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; if (pd && pd->uobject) { if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { mlx5_ib_dbg(dev, "copy failed\n"); return -EFAULT; } err = get_qp_user_index(to_mucontext(pd->uobject->context), &ucmd, udata->inlen, &uidx); if (err) return err; qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE); qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); } else { qp->wq_sig = !!wq_signature; } qp->has_rq = qp_has_rq(init_attr); err = set_rq_size(dev, &init_attr->cap, qp->has_rq, qp, (pd && pd->uobject) ? &ucmd : NULL); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); return err; } if (pd) { if (pd->uobject) { __u32 max_wqes = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count); if (ucmd.rq_wqe_shift != qp->rq.wqe_shift || ucmd.rq_wqe_count != qp->rq.wqe_cnt) { mlx5_ib_dbg(dev, "invalid rq params\n"); return -EINVAL; } if (ucmd.sq_wqe_count > max_wqes) { mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n", ucmd.sq_wqe_count, max_wqes); return -EINVAL; } if (init_attr->create_flags & - mlx5_ib_create_qp_sqpn_qp1()) { + MLX5_IB_QP_CREATE_SQPN_QP1) { mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n"); return -EINVAL; } err = create_user_qp(dev, pd, qp, udata, init_attr, &in, &resp, &inlen, base); if (err) mlx5_ib_dbg(dev, "err %d\n", err); } else { err = create_kernel_qp(dev, init_attr, qp, &in, &inlen, base); if (err) mlx5_ib_dbg(dev, "err %d\n", err); } if (err) return err; } else { in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; qp->create_type = MLX5_QP_EMPTY; } if (is_sqp(init_attr->qp_type)) qp->port = init_attr->port_num; qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); MLX5_SET(qpc, qpc, st, to_mlx5_st(init_attr->qp_type)); MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR) MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn); else MLX5_SET(qpc, qpc, latency_sensitive, 1); if (qp->wq_sig) MLX5_SET(qpc, qpc, wq_signature, 1); if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) MLX5_SET(qpc, qpc, block_lb_mc, 1); if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) MLX5_SET(qpc, qpc, cd_master, 1); if (qp->flags & MLX5_IB_QP_MANAGED_SEND) MLX5_SET(qpc, qpc, cd_slave_send, 1); if (qp->flags & MLX5_IB_QP_MANAGED_RECV) MLX5_SET(qpc, qpc, cd_slave_receive, 1); if (qp->scat_cqe && is_connected(init_attr->qp_type)) { int rcqe_sz; int scqe_sz; rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq); scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq); if (rcqe_sz == 128) MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); else MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE); if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) { if (scqe_sz == 128) MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE); else MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE); } } if (qp->rq.wqe_cnt) { MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4); MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt)); } MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr)); if (qp->sq.wqe_cnt) MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt)); else MLX5_SET(qpc, qpc, no_sq, 1); /* Set default resources */ switch (init_attr->qp_type) { case IB_QPT_XRC_TGT: MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn); MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn); MLX5_SET(qpc, qpc, srqn_rmpn, to_msrq(devr->s0)->msrq.srqn); MLX5_SET(qpc, qpc, xrcd, to_mxrcd(init_attr->xrcd)->xrcdn); break; case IB_QPT_XRC_INI: MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn); MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); MLX5_SET(qpc, qpc, srqn_rmpn, to_msrq(devr->s0)->msrq.srqn); break; default: if (init_attr->srq) { MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn); MLX5_SET(qpc, qpc, srqn_rmpn, to_msrq(init_attr->srq)->msrq.srqn); } else { MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); MLX5_SET(qpc, qpc, srqn_rmpn, to_msrq(devr->s1)->msrq.srqn); } } if (init_attr->send_cq) MLX5_SET(qpc, qpc, cqn_snd, to_mcq(init_attr->send_cq)->mcq.cqn); if (init_attr->recv_cq) MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(init_attr->recv_cq)->mcq.cqn); MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma); /* 0xffffff means we ask to work with cqe version 0 */ if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) MLX5_SET(qpc, qpc, user_index, uidx); /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */ if (init_attr->qp_type == IB_QPT_UD && (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) { MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1); qp->flags |= MLX5_IB_QP_LSO; } if (init_attr->qp_type == IB_QPT_RAW_PACKET) { qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr; raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); err = create_raw_packet_qp(dev, qp, in, pd); } else { err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen); } if (err) { mlx5_ib_dbg(dev, "create qp failed\n"); goto err_create; } kvfree(in); base->container_mibqp = qp; base->mqp.event = mlx5_ib_qp_event; get_cqs(init_attr->qp_type, init_attr->send_cq, init_attr->recv_cq, &send_cq, &recv_cq); spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); mlx5_ib_lock_cqs(send_cq, recv_cq); /* Maintain device to QPs access, needed for further handling via reset * flow */ list_add_tail(&qp->qps_list, &dev->qp_list); /* Maintain CQ to QPs access, needed for further handling via reset flow */ if (send_cq) list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp); if (recv_cq) list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp); mlx5_ib_unlock_cqs(send_cq, recv_cq); spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); return 0; err_create: if (qp->create_type == MLX5_QP_USER) - destroy_qp_user(pd, qp, base); + destroy_qp_user(dev, pd, qp, base); else if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); kvfree(in); return err; } static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq) __acquires(&send_cq->lock) __acquires(&recv_cq->lock) { if (send_cq) { if (recv_cq) { if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { spin_lock(&send_cq->lock); spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) { spin_lock(&send_cq->lock); __acquire(&recv_cq->lock); } else { spin_lock(&recv_cq->lock); spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); } } else { spin_lock(&send_cq->lock); __acquire(&recv_cq->lock); } } else if (recv_cq) { spin_lock(&recv_cq->lock); __acquire(&send_cq->lock); } else { __acquire(&send_cq->lock); __acquire(&recv_cq->lock); } } static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq) __releases(&send_cq->lock) __releases(&recv_cq->lock) { if (send_cq) { if (recv_cq) { if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { spin_unlock(&recv_cq->lock); spin_unlock(&send_cq->lock); } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) { __release(&recv_cq->lock); spin_unlock(&send_cq->lock); } else { spin_unlock(&send_cq->lock); spin_unlock(&recv_cq->lock); } } else { __release(&recv_cq->lock); spin_unlock(&send_cq->lock); } } else if (recv_cq) { __release(&send_cq->lock); spin_unlock(&recv_cq->lock); } else { __release(&recv_cq->lock); __release(&send_cq->lock); } } static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp) { return to_mpd(qp->ibqp.pd); } static void get_cqs(enum ib_qp_type qp_type, struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq, struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq) { switch (qp_type) { case IB_QPT_XRC_TGT: *send_cq = NULL; *recv_cq = NULL; break; case MLX5_IB_QPT_REG_UMR: case IB_QPT_XRC_INI: *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL; *recv_cq = NULL; break; case IB_QPT_SMI: case MLX5_IB_QPT_HW_GSI: case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_UD: case IB_QPT_RAW_IPV6: case IB_QPT_RAW_ETHERTYPE: case IB_QPT_RAW_PACKET: *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL; *recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL; break; case IB_QPT_MAX: default: *send_cq = NULL; *recv_cq = NULL; break; } } static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, const struct mlx5_modify_raw_qp_param *raw_qp_param, u8 lag_tx_affinity); static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_ib_qp_base *base = &qp->trans_qp.base; unsigned long flags; int err; if (qp->ibqp.rwq_ind_tbl) { destroy_rss_raw_qp_tir(dev, qp); return; } base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ? &qp->raw_packet_qp.rq.base : &qp->trans_qp.base; if (qp->state != IB_QPS_RESET) { if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) { mlx5_ib_qp_disable_pagefaults(qp); err = mlx5_core_qp_modify(dev->mdev, MLX5_CMD_OP_2RST_QP, 0, NULL, &base->mqp); } else { struct mlx5_modify_raw_qp_param raw_qp_param = { .operation = MLX5_CMD_OP_2RST_QP }; err = modify_raw_packet_qp(dev, qp, &raw_qp_param, 0); } if (err) mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n", base->mqp.qpn); } get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq, &send_cq, &recv_cq); spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); mlx5_ib_lock_cqs(send_cq, recv_cq); /* del from lists under both locks above to protect reset flow paths */ list_del(&qp->qps_list); if (send_cq) list_del(&qp->cq_send_list); if (recv_cq) list_del(&qp->cq_recv_list); if (qp->create_type == MLX5_QP_KERNEL) { __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); if (send_cq != recv_cq) __mlx5_ib_cq_clean(send_cq, base->mqp.qpn, NULL); } mlx5_ib_unlock_cqs(send_cq, recv_cq); spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { destroy_raw_packet_qp(dev, qp); } else { err = mlx5_core_destroy_qp(dev->mdev, &base->mqp); if (err) mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", base->mqp.qpn); } if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); else if (qp->create_type == MLX5_QP_USER) - destroy_qp_user(&get_pd(qp)->ibpd, qp, base); + destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base); } static const char *ib_qp_type_str(enum ib_qp_type type) { switch (type) { case IB_QPT_SMI: return "IB_QPT_SMI"; case IB_QPT_GSI: return "IB_QPT_GSI"; case IB_QPT_RC: return "IB_QPT_RC"; case IB_QPT_UC: return "IB_QPT_UC"; case IB_QPT_UD: return "IB_QPT_UD"; case IB_QPT_RAW_IPV6: return "IB_QPT_RAW_IPV6"; case IB_QPT_RAW_ETHERTYPE: return "IB_QPT_RAW_ETHERTYPE"; case IB_QPT_XRC_INI: return "IB_QPT_XRC_INI"; case IB_QPT_XRC_TGT: return "IB_QPT_XRC_TGT"; case IB_QPT_RAW_PACKET: return "IB_QPT_RAW_PACKET"; case MLX5_IB_QPT_REG_UMR: return "MLX5_IB_QPT_REG_UMR"; case IB_QPT_MAX: default: return "Invalid QP type"; } } struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { struct mlx5_ib_dev *dev; struct mlx5_ib_qp *qp; u16 xrcdn = 0; int err; if (pd) { dev = to_mdev(pd->device); if (init_attr->qp_type == IB_QPT_RAW_PACKET) { if (!pd->uobject) { mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n"); return ERR_PTR(-EINVAL); } else if (!to_mucontext(pd->uobject->context)->cqe_version) { mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n"); return ERR_PTR(-EINVAL); } } } else { /* being cautious here */ if (init_attr->qp_type != IB_QPT_XRC_TGT && init_attr->qp_type != MLX5_IB_QPT_REG_UMR) { pr_warn("%s: no PD for transport %s\n", __func__, ib_qp_type_str(init_attr->qp_type)); return ERR_PTR(-EINVAL); } dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); } switch (init_attr->qp_type) { case IB_QPT_XRC_TGT: case IB_QPT_XRC_INI: if (!MLX5_CAP_GEN(dev->mdev, xrc)) { mlx5_ib_dbg(dev, "XRC not supported\n"); return ERR_PTR(-ENOSYS); } init_attr->recv_cq = NULL; if (init_attr->qp_type == IB_QPT_XRC_TGT) { xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; init_attr->send_cq = NULL; } /* fall through */ case IB_QPT_RAW_PACKET: case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_UD: case IB_QPT_SMI: case MLX5_IB_QPT_HW_GSI: case MLX5_IB_QPT_REG_UMR: qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); err = create_qp_common(dev, pd, init_attr, udata, qp); if (err) { mlx5_ib_dbg(dev, "create_qp_common failed\n"); kfree(qp); return ERR_PTR(err); } if (is_qp0(init_attr->qp_type)) qp->ibqp.qp_num = 0; else if (is_qp1(init_attr->qp_type)) qp->ibqp.qp_num = 1; else qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn; mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n", qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn, init_attr->recv_cq ? to_mcq(init_attr->recv_cq)->mcq.cqn : -1, init_attr->send_cq ? to_mcq(init_attr->send_cq)->mcq.cqn : -1); qp->trans_qp.xrcdn = xrcdn; break; case IB_QPT_GSI: return mlx5_ib_gsi_create_qp(pd, init_attr); case IB_QPT_RAW_IPV6: case IB_QPT_RAW_ETHERTYPE: case IB_QPT_MAX: default: mlx5_ib_dbg(dev, "unsupported qp type %d\n", init_attr->qp_type); /* Don't support raw QPs */ return ERR_PTR(-EINVAL); } return &qp->ibqp; } int mlx5_ib_destroy_qp(struct ib_qp *qp) { struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_qp *mqp = to_mqp(qp); if (unlikely(qp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_destroy_qp(qp); destroy_qp_common(dev, mqp); kfree(mqp); return 0; } static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr, int attr_mask) { u32 hw_access_flags = 0; u8 dest_rd_atomic; u32 access_flags; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) dest_rd_atomic = attr->max_dest_rd_atomic; else dest_rd_atomic = qp->trans_qp.resp_depth; if (attr_mask & IB_QP_ACCESS_FLAGS) access_flags = attr->qp_access_flags; else access_flags = qp->trans_qp.atomic_rd_en; if (!dest_rd_atomic) access_flags &= IB_ACCESS_REMOTE_WRITE; if (access_flags & IB_ACCESS_REMOTE_READ) hw_access_flags |= MLX5_QP_BIT_RRE; if (access_flags & IB_ACCESS_REMOTE_ATOMIC) hw_access_flags |= (MLX5_QP_BIT_RAE | MLX5_ATOMIC_MODE_CX); if (access_flags & IB_ACCESS_REMOTE_WRITE) hw_access_flags |= MLX5_QP_BIT_RWE; return cpu_to_be32(hw_access_flags); } enum { MLX5_PATH_FLAG_FL = 1 << 0, MLX5_PATH_FLAG_FREE_AR = 1 << 1, MLX5_PATH_FLAG_COUNTER = 1 << 2, }; static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) { if (rate == IB_RATE_PORT_CURRENT) { return 0; } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS) { return -EINVAL; } else { while (rate != IB_RATE_2_5_GBPS && !(1 << (rate + MLX5_STAT_RATE_OFFSET) & MLX5_CAP_GEN(dev->mdev, stat_rate_support))) --rate; } return rate + MLX5_STAT_RATE_OFFSET; } static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, struct mlx5_ib_sq *sq, u8 sl) { void *in; void *tisc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(modify_tis_in); in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; MLX5_SET(modify_tis_in, in, bitmask.prio, 1); tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx); MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1)); err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen); kvfree(in); return err; } static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev, struct mlx5_ib_sq *sq, u8 tx_affinity) { void *in; void *tisc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(modify_tis_in); in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1); tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx); MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity); err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen); kvfree(in); return err; } static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, const struct ib_ah_attr *ah, struct mlx5_qp_path *path, u8 port, int attr_mask, u32 path_flags, const struct ib_qp_attr *attr, bool alt) { enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port); int err; enum ib_gid_type gid_type; if (attr_mask & IB_QP_PKEY_INDEX) path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index : attr->pkey_index); if (ah->ah_flags & IB_AH_GRH) { if (ah->grh.sgid_index >= dev->mdev->port_caps[port - 1].gid_table_len) { pr_err("sgid_index (%u) too large. max is %d\n", ah->grh.sgid_index, dev->mdev->port_caps[port - 1].gid_table_len); return -EINVAL; } } if (ll == IB_LINK_LAYER_ETHERNET) { if (!(ah->ah_flags & IB_AH_GRH)) return -EINVAL; err = mlx5_get_roce_gid_type(dev, port, ah->grh.sgid_index, &gid_type); if (err) return err; memcpy(path->rmac, ah->dmac, sizeof(ah->dmac)); path->udp_sport = mlx5_get_roce_udp_sport(dev, port, ah->grh.sgid_index); path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4; if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) path->ecn_dscp = (ah->grh.traffic_class >> 2) & 0x3f; } else { path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; path->fl_free_ar |= (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x40 : 0; path->rlid = cpu_to_be16(ah->dlid); path->grh_mlid = ah->src_path_bits & 0x7f; if (ah->ah_flags & IB_AH_GRH) path->grh_mlid |= 1 << 7; path->dci_cfi_prio_sl = ah->sl & 0xf; } if (ah->ah_flags & IB_AH_GRH) { path->mgid_index = ah->grh.sgid_index; path->hop_limit = ah->grh.hop_limit; path->tclass_flowlabel = cpu_to_be32((ah->grh.traffic_class << 20) | (ah->grh.flow_label)); memcpy(path->rgid, ah->grh.dgid.raw, 16); } err = ib_rate_to_mlx5(dev, ah->static_rate); if (err < 0) return err; path->static_rate = err; path->port = port; if (attr_mask & IB_QP_TIMEOUT) path->ackto_lt = (alt ? attr->alt_timeout : attr->timeout) << 3; if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt) return modify_raw_packet_eth_prio(dev->mdev, &qp->raw_packet_qp.sq, ah->sl & 0xf); return 0; } static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_QP_ST_MAX] = { [MLX5_QP_STATE_INIT] = { [MLX5_QP_STATE_INIT] = { [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PKEY_INDEX | MLX5_QP_OPTPAR_PRI_PORT, [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PKEY_INDEX | MLX5_QP_OPTPAR_PRI_PORT, [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | MLX5_QP_OPTPAR_Q_KEY | MLX5_QP_OPTPAR_PRI_PORT, }, [MLX5_QP_STATE_RTR] = { [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PKEY_INDEX, [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PKEY_INDEX, [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | MLX5_QP_OPTPAR_Q_KEY, [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX | MLX5_QP_OPTPAR_Q_KEY, [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PKEY_INDEX, }, }, [MLX5_QP_STATE_RTR] = { [MLX5_QP_STATE_RTS] = { [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PM_STATE | MLX5_QP_OPTPAR_RNR_TIMEOUT, [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PM_STATE, [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY, }, }, [MLX5_QP_STATE_RTS] = { [MLX5_QP_STATE_RTS] = { [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RNR_TIMEOUT | MLX5_QP_OPTPAR_PM_STATE | MLX5_QP_OPTPAR_ALT_ADDR_PATH, [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PM_STATE | MLX5_QP_OPTPAR_ALT_ADDR_PATH, [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY | MLX5_QP_OPTPAR_SRQN | MLX5_QP_OPTPAR_CQN_RCV, }, }, [MLX5_QP_STATE_SQER] = { [MLX5_QP_STATE_RTS] = { [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY, [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY, [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE, [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RNR_TIMEOUT | MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RRE, }, }, }; static int ib_nr_to_mlx5_nr(int ib_mask) { switch (ib_mask) { case IB_QP_STATE: return 0; case IB_QP_CUR_STATE: return 0; case IB_QP_EN_SQD_ASYNC_NOTIFY: return 0; case IB_QP_ACCESS_FLAGS: return MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE; case IB_QP_PKEY_INDEX: return MLX5_QP_OPTPAR_PKEY_INDEX; case IB_QP_PORT: return MLX5_QP_OPTPAR_PRI_PORT; case IB_QP_QKEY: return MLX5_QP_OPTPAR_Q_KEY; case IB_QP_AV: return MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH | MLX5_QP_OPTPAR_PRI_PORT; case IB_QP_PATH_MTU: return 0; case IB_QP_TIMEOUT: return MLX5_QP_OPTPAR_ACK_TIMEOUT; case IB_QP_RETRY_CNT: return MLX5_QP_OPTPAR_RETRY_COUNT; case IB_QP_RNR_RETRY: return MLX5_QP_OPTPAR_RNR_RETRY; case IB_QP_RQ_PSN: return 0; case IB_QP_MAX_QP_RD_ATOMIC: return MLX5_QP_OPTPAR_SRA_MAX; case IB_QP_ALT_PATH: return MLX5_QP_OPTPAR_ALT_ADDR_PATH; case IB_QP_MIN_RNR_TIMER: return MLX5_QP_OPTPAR_RNR_TIMEOUT; case IB_QP_SQ_PSN: return 0; case IB_QP_MAX_DEST_RD_ATOMIC: return MLX5_QP_OPTPAR_RRA_MAX | MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE; case IB_QP_PATH_MIG_STATE: return MLX5_QP_OPTPAR_PM_STATE; case IB_QP_CAP: return 0; case IB_QP_DEST_QPN: return 0; } return 0; } static int ib_mask_to_mlx5_opt(int ib_mask) { int result = 0; int i; for (i = 0; i < 8 * sizeof(int); i++) { if ((1 << i) & ib_mask) result |= ib_nr_to_mlx5_nr(1 << i); } return result; } static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, int new_state, const struct mlx5_modify_raw_qp_param *raw_qp_param) { void *in; void *rqc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(modify_rq_in); in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; MLX5_SET(modify_rq_in, in, rqn, rq->base.mqp.qpn); MLX5_SET(modify_rq_in, in, rq_state, rq->state); rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); MLX5_SET(rqc, rqc, state, new_state); if (raw_qp_param->set_mask & MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID) { if (MLX5_CAP_GEN(dev->mdev, modify_rq_counters_set_id)) { MLX5_SET64(modify_rq_in, in, modify_bitmask, MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID); MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id); } else pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n", dev->ib_dev.name); } err = mlx5_core_modify_rq(dev->mdev, in, inlen); if (err) goto out; rq->state = new_state; out: kvfree(in); return err; } static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, struct mlx5_ib_sq *sq, int new_state) { void *in; void *sqc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(modify_sq_in); in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; MLX5_SET(modify_sq_in, in, sqn, sq->base.mqp.qpn); MLX5_SET(modify_sq_in, in, sq_state, sq->state); sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); MLX5_SET(sqc, sqc, state, new_state); err = mlx5_core_modify_sq(dev, in, inlen); if (err) goto out; sq->state = new_state; out: kvfree(in); return err; } static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, const struct mlx5_modify_raw_qp_param *raw_qp_param, u8 tx_affinity) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_rq *rq = &raw_packet_qp->rq; struct mlx5_ib_sq *sq = &raw_packet_qp->sq; int rq_state; int sq_state; int err; switch (raw_qp_param->operation) { case MLX5_CMD_OP_RST2INIT_QP: rq_state = MLX5_RQC_STATE_RDY; sq_state = MLX5_SQC_STATE_RDY; break; case MLX5_CMD_OP_2ERR_QP: rq_state = MLX5_RQC_STATE_ERR; sq_state = MLX5_SQC_STATE_ERR; break; case MLX5_CMD_OP_2RST_QP: rq_state = MLX5_RQC_STATE_RST; sq_state = MLX5_SQC_STATE_RST; break; case MLX5_CMD_OP_INIT2INIT_QP: case MLX5_CMD_OP_INIT2RTR_QP: case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: if (raw_qp_param->set_mask) return -EINVAL; else return 0; default: WARN_ON(1); return -EINVAL; } if (qp->rq.wqe_cnt) { err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param); if (err) return err; } if (qp->sq.wqe_cnt) { if (tx_affinity) { err = modify_raw_packet_tx_affinity(dev->mdev, sq, tx_affinity); if (err) return err; } return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state); } return 0; } static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) { static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = { [MLX5_QP_STATE_RST] = { [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_RST2INIT_QP, }, [MLX5_QP_STATE_INIT] = { [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_INIT2INIT_QP, [MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP, }, [MLX5_QP_STATE_RTR] = { [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP, }, [MLX5_QP_STATE_RTS] = { [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP, }, [MLX5_QP_STATE_SQD] = { [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, }, [MLX5_QP_STATE_SQER] = { [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP, }, [MLX5_QP_STATE_ERR] = { [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, } }; struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_ib_qp_base *base = &qp->trans_qp.base; struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_qp_context *context; struct mlx5_ib_pd *pd; struct mlx5_ib_port *mibport = NULL; enum mlx5_qp_state mlx5_cur, mlx5_new; enum mlx5_qp_optpar optpar; int sqd_event; int mlx5_st; int err; u16 op; context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return -ENOMEM; err = to_mlx5_st(ibqp->qp_type); if (err < 0) { mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type); goto out; } context->flags = cpu_to_be32(err << 16); if (!(attr_mask & IB_QP_PATH_MIG_STATE)) { context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); } else { switch (attr->path_mig_state) { case IB_MIG_MIGRATED: context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); break; case IB_MIG_REARM: context->flags |= cpu_to_be32(MLX5_QP_PM_REARM << 11); break; case IB_MIG_ARMED: context->flags |= cpu_to_be32(MLX5_QP_PM_ARMED << 11); break; } } if (is_sqp(ibqp->qp_type)) { context->mtu_msgmax = (IB_MTU_256 << 5) | 8; } else if (ibqp->qp_type == IB_QPT_UD || ibqp->qp_type == MLX5_IB_QPT_REG_UMR) { context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; } else if (attr_mask & IB_QP_PATH_MTU) { if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { mlx5_ib_warn(dev, "invalid mtu %d\n", attr->path_mtu); err = -EINVAL; goto out; } context->mtu_msgmax = (attr->path_mtu << 5) | (u8)MLX5_CAP_GEN(dev->mdev, log_max_msg); } if (attr_mask & IB_QP_DEST_QPN) context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num); if (attr_mask & IB_QP_PKEY_INDEX) context->pri_path.pkey_index = cpu_to_be16(attr->pkey_index); /* todo implement counter_index functionality */ if (is_sqp(ibqp->qp_type)) context->pri_path.port = qp->port; if (attr_mask & IB_QP_PORT) context->pri_path.port = attr->port_num; if (attr_mask & IB_QP_AV) { err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path, attr_mask & IB_QP_PORT ? attr->port_num : qp->port, attr_mask, 0, attr, false); if (err) goto out; } if (attr_mask & IB_QP_TIMEOUT) context->pri_path.ackto_lt |= attr->timeout << 3; if (attr_mask & IB_QP_ALT_PATH) { err = mlx5_set_path(dev, qp, &attr->alt_ah_attr, &context->alt_path, attr->alt_port_num, attr_mask | IB_QP_PKEY_INDEX | IB_QP_TIMEOUT, 0, attr, true); if (err) goto out; } pd = get_pd(qp); get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq, &send_cq, &recv_cq); context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn); context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0; context->cqn_recv = recv_cq ? cpu_to_be32(recv_cq->mcq.cqn) : 0; context->params1 = cpu_to_be32(MLX5_IB_ACK_REQ_FREQ << 28); if (attr_mask & IB_QP_RNR_RETRY) context->params1 |= cpu_to_be32(attr->rnr_retry << 13); if (attr_mask & IB_QP_RETRY_CNT) context->params1 |= cpu_to_be32(attr->retry_cnt << 16); if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { if (attr->max_rd_atomic) context->params1 |= cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21); } if (attr_mask & IB_QP_SQ_PSN) context->next_send_psn = cpu_to_be32(attr->sq_psn); if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { if (attr->max_dest_rd_atomic) context->params2 |= cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); } if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask); if (attr_mask & IB_QP_MIN_RNR_TIMER) context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); if (attr_mask & IB_QP_RQ_PSN) context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); if (attr_mask & IB_QP_QKEY) context->qkey = cpu_to_be32(attr->qkey); if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->db_rec_addr = cpu_to_be64(qp->db.dma); if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) sqd_event = 1; else sqd_event = 0; if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num : qp->port) - 1; mibport = &dev->port[port_num]; context->qp_counter_set_usr_page |= cpu_to_be32((u32)(mibport->q_cnt_id) << 24); } if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->sq_crq_size |= cpu_to_be16(1 << 4); if (qp->flags & MLX5_IB_QP_SQPN_QP1) context->deth_sqpn = cpu_to_be32(1); mlx5_cur = to_mlx5_state(cur_state); mlx5_new = to_mlx5_state(new_state); mlx5_st = to_mlx5_st(ibqp->qp_type); if (mlx5_st < 0) goto out; /* If moving to a reset or error state, we must disable page faults on * this QP and flush all current page faults. Otherwise a stale page * fault may attempt to work on this QP after it is reset and moved * again to RTS, and may cause the driver and the device to get out of * sync. */ if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR && (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR) && (qp->ibqp.qp_type != IB_QPT_RAW_PACKET)) mlx5_ib_qp_disable_pagefaults(qp); if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE || !optab[mlx5_cur][mlx5_new]) goto out; op = optab[mlx5_cur][mlx5_new]; optpar = ib_mask_to_mlx5_opt(attr_mask); optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { struct mlx5_modify_raw_qp_param raw_qp_param = {}; raw_qp_param.operation = op; if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id; raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID; } err = modify_raw_packet_qp(dev, qp, &raw_qp_param, 0); } else { err = mlx5_core_qp_modify(dev->mdev, op, optpar, context, &base->mqp); } if (err) goto out; if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT && (qp->ibqp.qp_type != IB_QPT_RAW_PACKET)) mlx5_ib_qp_enable_pagefaults(qp); qp->state = new_state; if (attr_mask & IB_QP_ACCESS_FLAGS) qp->trans_qp.atomic_rd_en = attr->qp_access_flags; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) qp->trans_qp.resp_depth = attr->max_dest_rd_atomic; if (attr_mask & IB_QP_PORT) qp->port = attr->port_num; if (attr_mask & IB_QP_ALT_PATH) qp->trans_qp.alt_port = attr->alt_port_num; /* * If we moved a kernel QP to RESET, clean up all old CQ * entries and reinitialize the QP. */ if (new_state == IB_QPS_RESET && !ibqp->uobject) { mlx5_ib_cq_clean(recv_cq, base->mqp.qpn, ibqp->srq ? to_msrq(ibqp->srq) : NULL); if (send_cq != recv_cq) mlx5_ib_cq_clean(send_cq, base->mqp.qpn, NULL); qp->rq.head = 0; qp->rq.tail = 0; qp->sq.head = 0; qp->sq.tail = 0; qp->sq.cur_post = 0; qp->sq.last_poll = 0; qp->db.db[MLX5_RCV_DBR] = 0; qp->db.db[MLX5_SND_DBR] = 0; } out: kfree(context); return err; } int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_qp *qp = to_mqp(ibqp); enum ib_qp_type qp_type; enum ib_qp_state cur_state, new_state; int err = -EINVAL; int port; enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; if (ibqp->rwq_ind_tbl) return -ENOSYS; if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ? IB_QPT_GSI : ibqp->qp_type; mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) { port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port); } if (qp_type != MLX5_IB_QPT_REG_UMR && !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) { mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n", cur_state, new_state, ibqp->qp_type, attr_mask); goto out; } if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) { mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n", attr->port_num, dev->num_ports); goto out; } if (attr_mask & IB_QP_PKEY_INDEX) { port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; if (attr->pkey_index >= dev->mdev->port_caps[port - 1].pkey_table_len) { mlx5_ib_dbg(dev, "invalid pkey index %d\n", attr->pkey_index); goto out; } } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) { mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n", attr->max_rd_atomic); goto out; } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic > (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) { mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n", attr->max_dest_rd_atomic); goto out; } if (cur_state == new_state && cur_state == IB_QPS_RESET) { err = 0; goto out; } err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); out: mutex_unlock(&qp->mutex); return err; } static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq) { struct mlx5_ib_cq *cq; unsigned cur; cur = wq->head - wq->tail; if (likely(cur + nreq < wq->max_post)) return 0; cq = to_mcq(ib_cq); spin_lock(&cq->lock); cur = wq->head - wq->tail; spin_unlock(&cq->lock); return cur + nreq >= wq->max_post; } static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, u64 remote_addr, u32 rkey) { rseg->raddr = cpu_to_be64(remote_addr); rseg->rkey = cpu_to_be32(rkey); rseg->reserved = 0; } static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, struct ib_send_wr *wr, void *qend, struct mlx5_ib_qp *qp, int *size) { void *seg = eseg; memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg)); if (wr->send_flags & IB_SEND_IP_CSUM) eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; seg += sizeof(struct mlx5_wqe_eth_seg); *size += sizeof(struct mlx5_wqe_eth_seg) / 16; if (wr->opcode == IB_WR_LSO) { struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr); int size_of_inl_hdr_start = sizeof(eseg->inline_hdr_start); u64 left, leftlen, copysz; void *pdata = ud_wr->header; left = ud_wr->hlen; eseg->mss = cpu_to_be16(ud_wr->mss); eseg->inline_hdr_sz = cpu_to_be16(left); /* * check if there is space till the end of queue, if yes, * copy all in one shot, otherwise copy till the end of queue, * rollback and than the copy the left */ leftlen = qend - (void *)eseg->inline_hdr_start; copysz = min_t(u64, leftlen, left); memcpy(seg - size_of_inl_hdr_start, pdata, copysz); if (likely(copysz > size_of_inl_hdr_start)) { seg += ALIGN(copysz - size_of_inl_hdr_start, 16); *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16; } if (unlikely(copysz < left)) { /* the last wqe in the queue */ seg = mlx5_get_send_wqe(qp, 0); left -= copysz; pdata += copysz; memcpy(seg, pdata, left); seg += ALIGN(left, 16); *size += ALIGN(left, 16) / 16; } } return seg; } static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, struct ib_send_wr *wr) { memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av)); dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV); dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey); } static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg) { dseg->byte_count = cpu_to_be32(sg->length); dseg->lkey = cpu_to_be32(sg->lkey); dseg->addr = cpu_to_be64(sg->addr); } static __be16 get_klm_octo(int npages) { return cpu_to_be16(ALIGN(npages, 8) / 2); } static __be64 frwr_mkey_mask(void) { u64 result; result = MLX5_MKEY_MASK_LEN | MLX5_MKEY_MASK_PAGE_SIZE | MLX5_MKEY_MASK_START_ADDR | MLX5_MKEY_MASK_EN_RINVAL | MLX5_MKEY_MASK_KEY | MLX5_MKEY_MASK_LR | MLX5_MKEY_MASK_LW | MLX5_MKEY_MASK_RR | MLX5_MKEY_MASK_RW | MLX5_MKEY_MASK_A | MLX5_MKEY_MASK_SMALL_FENCE | MLX5_MKEY_MASK_FREE; return cpu_to_be64(result); } static __be64 sig_mkey_mask(void) { u64 result; result = MLX5_MKEY_MASK_LEN | MLX5_MKEY_MASK_PAGE_SIZE | MLX5_MKEY_MASK_START_ADDR | MLX5_MKEY_MASK_EN_SIGERR | MLX5_MKEY_MASK_EN_RINVAL | MLX5_MKEY_MASK_KEY | MLX5_MKEY_MASK_LR | MLX5_MKEY_MASK_LW | MLX5_MKEY_MASK_RR | MLX5_MKEY_MASK_RW | MLX5_MKEY_MASK_SMALL_FENCE | MLX5_MKEY_MASK_FREE | MLX5_MKEY_MASK_BSF_EN; return cpu_to_be64(result); } static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, struct mlx5_ib_mr *mr) { int ndescs = mr->ndescs; memset(umr, 0, sizeof(*umr)); if (mr->access_mode == MLX5_ACCESS_MODE_KLM) /* KLMs take twice the size of MTTs */ ndescs *= 2; umr->flags = MLX5_UMR_CHECK_NOT_FREE; umr->klm_octowords = get_klm_octo(ndescs); umr->mkey_mask = frwr_mkey_mask(); } static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) { memset(umr, 0, sizeof(*umr)); umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); umr->flags = 1 << 7; } static __be64 get_umr_reg_mr_mask(void) { u64 result; result = MLX5_MKEY_MASK_LEN | MLX5_MKEY_MASK_PAGE_SIZE | MLX5_MKEY_MASK_START_ADDR | MLX5_MKEY_MASK_PD | MLX5_MKEY_MASK_LR | MLX5_MKEY_MASK_LW | MLX5_MKEY_MASK_KEY | MLX5_MKEY_MASK_RR | MLX5_MKEY_MASK_RW | MLX5_MKEY_MASK_A | MLX5_MKEY_MASK_FREE; return cpu_to_be64(result); } static __be64 get_umr_unreg_mr_mask(void) { u64 result; result = MLX5_MKEY_MASK_FREE; return cpu_to_be64(result); } static __be64 get_umr_update_mtt_mask(void) { u64 result; result = MLX5_MKEY_MASK_FREE; return cpu_to_be64(result); } static __be64 get_umr_update_translation_mask(void) { u64 result; result = MLX5_MKEY_MASK_LEN | MLX5_MKEY_MASK_PAGE_SIZE | MLX5_MKEY_MASK_START_ADDR | MLX5_MKEY_MASK_KEY | MLX5_MKEY_MASK_FREE; return cpu_to_be64(result); } static __be64 get_umr_update_access_mask(void) { u64 result; result = MLX5_MKEY_MASK_LW | MLX5_MKEY_MASK_RR | MLX5_MKEY_MASK_RW | MLX5_MKEY_MASK_A | MLX5_MKEY_MASK_KEY | MLX5_MKEY_MASK_FREE; return cpu_to_be64(result); } static __be64 get_umr_update_pd_mask(void) { u64 result; result = MLX5_MKEY_MASK_PD | MLX5_MKEY_MASK_KEY | MLX5_MKEY_MASK_FREE; return cpu_to_be64(result); } static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, struct ib_send_wr *wr) { struct mlx5_umr_wr *umrwr = umr_wr(wr); memset(umr, 0, sizeof(*umr)); if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE) umr->flags = MLX5_UMR_CHECK_FREE; /* fail if free */ else umr->flags = MLX5_UMR_CHECK_NOT_FREE; /* fail if not free */ if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) { umr->klm_octowords = get_klm_octo(umrwr->npages); if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT) { umr->mkey_mask = get_umr_update_mtt_mask(); umr->bsf_octowords = get_klm_octo(umrwr->target.offset); umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; } if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION) umr->mkey_mask |= get_umr_update_translation_mask(); if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_ACCESS) umr->mkey_mask |= get_umr_update_access_mask(); if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD) umr->mkey_mask |= get_umr_update_pd_mask(); if (!umr->mkey_mask) umr->mkey_mask = get_umr_reg_mr_mask(); } else { umr->mkey_mask = get_umr_unreg_mr_mask(); } if (!wr->num_sge) umr->flags |= MLX5_UMR_INLINE; } static u8 get_umr_flags(int acc) { return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN; } static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, struct mlx5_ib_mr *mr, u32 key, int access) { int ndescs = ALIGN(mr->ndescs, 8) >> 1; memset(seg, 0, sizeof(*seg)); if (mr->access_mode == MLX5_ACCESS_MODE_MTT) seg->log2_page_size = ilog2(mr->ibmr.page_size); else if (mr->access_mode == MLX5_ACCESS_MODE_KLM) /* KLMs take twice the size of MTTs */ ndescs *= 2; seg->flags = get_umr_flags(access) | mr->access_mode; seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); seg->start_addr = cpu_to_be64(mr->ibmr.iova); seg->len = cpu_to_be64(mr->ibmr.length); seg->xlt_oct_size = cpu_to_be32(ndescs); } static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) { memset(seg, 0, sizeof(*seg)); seg->status = MLX5_MKEY_STATUS_FREE; } static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr) { struct mlx5_umr_wr *umrwr = umr_wr(wr); memset(seg, 0, sizeof(*seg)); if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) { seg->status = MLX5_MKEY_STATUS_FREE; return; } seg->flags = convert_access(umrwr->access_flags); if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) { if (umrwr->pd) seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); seg->start_addr = cpu_to_be64(umrwr->target.virt_addr); } seg->len = cpu_to_be64(umrwr->length); seg->log2_page_size = umrwr->page_shift; seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 | mlx5_mkey_variant(umrwr->mkey)); } static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, struct mlx5_ib_mr *mr, struct mlx5_ib_pd *pd) { int bcount = mr->desc_size * mr->ndescs; dseg->addr = cpu_to_be64(mr->desc_map); dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64)); dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); } static __be32 send_ieth(struct ib_send_wr *wr) { switch (wr->opcode) { case IB_WR_SEND_WITH_IMM: case IB_WR_RDMA_WRITE_WITH_IMM: return wr->ex.imm_data; case IB_WR_SEND_WITH_INV: return cpu_to_be32(wr->ex.invalidate_rkey); default: return 0; } } static u8 calc_sig(void *wqe, int size) { u8 *p = wqe; u8 res = 0; int i; for (i = 0; i < size; i++) res ^= p[i]; return ~res; } static u8 wq_sig(void *wqe) { return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4); } static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr, void *wqe, int *sz) { struct mlx5_wqe_inline_seg *seg; void *qend = qp->sq.qend; void *addr; int inl = 0; int copy; int len; int i; seg = wqe; wqe += sizeof(*seg); for (i = 0; i < wr->num_sge; i++) { addr = (void *)(unsigned long)(wr->sg_list[i].addr); len = wr->sg_list[i].length; inl += len; if (unlikely(inl > qp->max_inline_data)) return -ENOMEM; if (unlikely(wqe + len > qend)) { copy = qend - wqe; memcpy(wqe, addr, copy); addr += copy; len -= copy; wqe = mlx5_get_send_wqe(qp, 0); } memcpy(wqe, addr, len); wqe += len; } seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG); *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16; return 0; } static u16 prot_field_size(enum ib_signature_type type) { switch (type) { case IB_SIG_TYPE_T10_DIF: return MLX5_DIF_SIZE; default: return 0; } } static u8 bs_selector(int block_size) { switch (block_size) { case 512: return 0x1; case 520: return 0x2; case 4096: return 0x3; case 4160: return 0x4; case 1073741824: return 0x5; default: return 0; } } static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain, struct mlx5_bsf_inl *inl) { /* Valid inline section and allow BSF refresh */ inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID | MLX5_BSF_REFRESH_DIF); inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag); inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag); /* repeating block */ inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK; inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ? MLX5_DIF_CRC : MLX5_DIF_IPCS; if (domain->sig.dif.ref_remap) inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG; if (domain->sig.dif.app_escape) { if (domain->sig.dif.ref_escape) inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE; else inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE; } inl->dif_app_bitmask_check = cpu_to_be16(domain->sig.dif.apptag_check_mask); } static int mlx5_set_bsf(struct ib_mr *sig_mr, struct ib_sig_attrs *sig_attrs, struct mlx5_bsf *bsf, u32 data_size) { struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig; struct mlx5_bsf_basic *basic = &bsf->basic; struct ib_sig_domain *mem = &sig_attrs->mem; struct ib_sig_domain *wire = &sig_attrs->wire; memset(bsf, 0, sizeof(*bsf)); /* Basic + Extended + Inline */ basic->bsf_size_sbs = 1 << 7; /* Input domain check byte mask */ basic->check_byte_mask = sig_attrs->check_mask; basic->raw_data_size = cpu_to_be32(data_size); /* Memory domain */ switch (sig_attrs->mem.sig_type) { case IB_SIG_TYPE_NONE: break; case IB_SIG_TYPE_T10_DIF: basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx); mlx5_fill_inl_bsf(mem, &bsf->m_inl); break; default: return -EINVAL; } /* Wire domain */ switch (sig_attrs->wire.sig_type) { case IB_SIG_TYPE_NONE: break; case IB_SIG_TYPE_T10_DIF: if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval && mem->sig_type == wire->sig_type) { /* Same block structure */ basic->bsf_size_sbs |= 1 << 4; if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK; if (mem->sig.dif.app_tag == wire->sig.dif.app_tag) basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK; if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag) basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK; } else basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval); basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx); mlx5_fill_inl_bsf(wire, &bsf->w_inl); break; default: return -EINVAL; } return 0; } static int set_sig_data_segment(struct ib_sig_handover_wr *wr, struct mlx5_ib_qp *qp, void **seg, int *size) { struct ib_sig_attrs *sig_attrs = wr->sig_attrs; struct ib_mr *sig_mr = wr->sig_mr; struct mlx5_bsf *bsf; u32 data_len = wr->wr.sg_list->length; u32 data_key = wr->wr.sg_list->lkey; u64 data_va = wr->wr.sg_list->addr; int ret; int wqe_size; if (!wr->prot || (data_key == wr->prot->lkey && data_va == wr->prot->addr && data_len == wr->prot->length)) { /** * Source domain doesn't contain signature information * or data and protection are interleaved in memory. * So need construct: * ------------------ * | data_klm | * ------------------ * | BSF | * ------------------ **/ struct mlx5_klm *data_klm = *seg; data_klm->bcount = cpu_to_be32(data_len); data_klm->key = cpu_to_be32(data_key); data_klm->va = cpu_to_be64(data_va); wqe_size = ALIGN(sizeof(*data_klm), 64); } else { /** * Source domain contains signature information * So need construct a strided block format: * --------------------------- * | stride_block_ctrl | * --------------------------- * | data_klm | * --------------------------- * | prot_klm | * --------------------------- * | BSF | * --------------------------- **/ struct mlx5_stride_block_ctrl_seg *sblock_ctrl; struct mlx5_stride_block_entry *data_sentry; struct mlx5_stride_block_entry *prot_sentry; u32 prot_key = wr->prot->lkey; u64 prot_va = wr->prot->addr; u16 block_size = sig_attrs->mem.sig.dif.pi_interval; int prot_size; sblock_ctrl = *seg; data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl); prot_sentry = (void *)data_sentry + sizeof(*data_sentry); prot_size = prot_field_size(sig_attrs->mem.sig_type); if (!prot_size) { pr_err("Bad block size given: %u\n", block_size); return -EINVAL; } sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size + prot_size); sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP); sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size); sblock_ctrl->num_entries = cpu_to_be16(2); data_sentry->bcount = cpu_to_be16(block_size); data_sentry->key = cpu_to_be32(data_key); data_sentry->va = cpu_to_be64(data_va); data_sentry->stride = cpu_to_be16(block_size); prot_sentry->bcount = cpu_to_be16(prot_size); prot_sentry->key = cpu_to_be32(prot_key); prot_sentry->va = cpu_to_be64(prot_va); prot_sentry->stride = cpu_to_be16(prot_size); wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) + sizeof(*prot_sentry), 64); } *seg += wqe_size; *size += wqe_size / 16; if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); bsf = *seg; ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len); if (ret) return -EINVAL; *seg += sizeof(*bsf); *size += sizeof(*bsf) / 16; if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); return 0; } static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_sig_handover_wr *wr, u32 nelements, u32 length, u32 pdn) { struct ib_mr *sig_mr = wr->sig_mr; u32 sig_key = sig_mr->rkey; u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; memset(seg, 0, sizeof(*seg)); seg->flags = get_umr_flags(wr->access_flags) | MLX5_ACCESS_MODE_KLM; seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | MLX5_MKEY_BSF_EN | pdn); seg->len = cpu_to_be64(length); seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements))); seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); } static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, u32 nelements) { memset(umr, 0, sizeof(*umr)); umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE; umr->klm_octowords = get_klm_octo(nelements); umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE); umr->mkey_mask = sig_mkey_mask(); } static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp, void **seg, int *size) { struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr); u32 pdn = get_pd(qp)->pdn; u32 klm_oct_size; int region_len, ret; if (unlikely(wr->wr.num_sge != 1) || unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) || unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) || unlikely(!sig_mr->sig->sig_status_checked)) return -EINVAL; /* length of the protected region, data + protection */ region_len = wr->wr.sg_list->length; if (wr->prot && (wr->prot->lkey != wr->wr.sg_list->lkey || wr->prot->addr != wr->wr.sg_list->addr || wr->prot->length != wr->wr.sg_list->length)) region_len += wr->prot->length; /** * KLM octoword size - if protection was provided * then we use strided block format (3 octowords), * else we use single KLM (1 octoword) **/ klm_oct_size = wr->prot ? 3 : 1; set_sig_umr_segment(*seg, klm_oct_size); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); ret = set_sig_data_segment(wr, qp, seg, size); if (ret) return ret; sig_mr->sig->sig_status_checked = false; return 0; } static int set_psv_wr(struct ib_sig_domain *domain, u32 psv_idx, void **seg, int *size) { struct mlx5_seg_set_psv *psv_seg = *seg; memset(psv_seg, 0, sizeof(*psv_seg)); psv_seg->psv_num = cpu_to_be32(psv_idx); switch (domain->sig_type) { case IB_SIG_TYPE_NONE: break; case IB_SIG_TYPE_T10_DIF: psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 | domain->sig.dif.app_tag); psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); break; default: pr_err("Bad signature type given.\n"); return 1; } *seg += sizeof(*psv_seg); *size += sizeof(*psv_seg) / 16; return 0; } static int set_reg_wr(struct mlx5_ib_qp *qp, struct ib_reg_wr *wr, void **seg, int *size) { struct mlx5_ib_mr *mr = to_mmr(wr->mr); struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { mlx5_ib_warn(to_mdev(qp->ibqp.device), "Invalid IB_SEND_INLINE send flag\n"); return -EINVAL; } set_reg_umr_seg(*seg, mr); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); set_reg_mkey_seg(*seg, mr, wr->key, wr->access); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); set_reg_data_seg(*seg, mr, pd); *seg += sizeof(struct mlx5_wqe_data_seg); *size += (sizeof(struct mlx5_wqe_data_seg) / 16); return 0; } static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size) { set_linv_umr_seg(*seg); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); set_linv_mkey_seg(*seg); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); } static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) { __be32 *p = NULL; int tidx = idx; int i, j; pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx)); for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) { if ((i & 0xf) == 0) { void *buf = mlx5_get_send_wqe(qp, tidx); tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1); p = buf; j = 0; } pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]), be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]), be32_to_cpu(p[j + 3])); } } -static void mlx5_bf_copy(u64 __iomem *dst, u64 *src, - unsigned bytecnt, struct mlx5_ib_qp *qp) -{ - while (bytecnt > 0) { - __iowrite64_copy(dst++, src++, 8); - __iowrite64_copy(dst++, src++, 8); - __iowrite64_copy(dst++, src++, 8); - __iowrite64_copy(dst++, src++, 8); - __iowrite64_copy(dst++, src++, 8); - __iowrite64_copy(dst++, src++, 8); - __iowrite64_copy(dst++, src++, 8); - __iowrite64_copy(dst++, src++, 8); - bytecnt -= 64; - if (unlikely(src == qp->sq.qend)) - src = mlx5_get_send_wqe(qp, 0); - } -} - static u8 get_fence(u8 fence, struct ib_send_wr *wr) { if (unlikely(wr->opcode == IB_WR_LOCAL_INV && wr->send_flags & IB_SEND_FENCE)) return MLX5_FENCE_MODE_STRONG_ORDERING; if (unlikely(fence)) { if (wr->send_flags & IB_SEND_FENCE) return MLX5_FENCE_MODE_SMALL_AND_FENCE; else return fence; } else if (unlikely(wr->send_flags & IB_SEND_FENCE)) { return MLX5_FENCE_MODE_FENCE; } return 0; } static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, struct ib_send_wr *wr, unsigned *idx, int *size, int nreq) { if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) return -ENOMEM; *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); *seg = mlx5_get_send_wqe(qp, *idx); *ctrl = *seg; *(uint32_t *)(*seg + 8) = 0; (*ctrl)->imm = send_ieth(wr); (*ctrl)->fm_ce_se = qp->sq_signal_bits | (wr->send_flags & IB_SEND_SIGNALED ? MLX5_WQE_CTRL_CQ_UPDATE : 0) | (wr->send_flags & IB_SEND_SOLICITED ? MLX5_WQE_CTRL_SOLICITED : 0); *seg += sizeof(**ctrl); *size = sizeof(**ctrl) / 16; return 0; } static void finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, u8 size, unsigned idx, u64 wr_id, int nreq, u8 fence, u8 next_fence, u32 mlx5_opcode) { u8 opmod = 0; ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | mlx5_opcode | ((u32)opmod << 24)); ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8)); ctrl->fm_ce_se |= fence; qp->fm_cache = next_fence; if (unlikely(qp->wq_sig)) ctrl->signature = wq_sig(ctrl); qp->sq.wrid[idx] = wr_id; qp->sq.w_list[idx].opcode = mlx5_opcode; qp->sq.wqe_head[idx] = qp->sq.head + nreq; qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); qp->sq.w_list[idx].next = qp->sq.cur_post; } int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_qp *qp; struct mlx5_ib_mr *mr; struct mlx5_wqe_data_seg *dpseg; struct mlx5_wqe_xrc_seg *xrc; struct mlx5_bf *bf; int uninitialized_var(size); void *qend; unsigned long flags; unsigned idx; int err = 0; int inl = 0; int num_sge; void *seg; int nreq; int i; u8 next_fence = 0; u8 fence; if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); qp = to_mqp(ibqp); - bf = qp->bf; + bf = &qp->bf; qend = qp->sq.qend; spin_lock_irqsave(&qp->sq.lock, flags); if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { err = -EIO; *bad_wr = wr; nreq = 0; goto out; } for (nreq = 0; wr; nreq++, wr = wr->next) { if (unlikely(wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) { mlx5_ib_warn(dev, "\n"); err = -EINVAL; *bad_wr = wr; goto out; } fence = qp->fm_cache; num_sge = wr->num_sge; if (unlikely(num_sge > qp->sq.max_gs)) { mlx5_ib_warn(dev, "\n"); err = -EINVAL; *bad_wr = wr; goto out; } err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; *bad_wr = wr; goto out; } switch (ibqp->qp_type) { case IB_QPT_XRC_INI: xrc = seg; seg += sizeof(*xrc); size += sizeof(*xrc) / 16; /* fall through */ case IB_QPT_RC: switch (wr->opcode) { case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: set_raddr_seg(seg, rdma_wr(wr)->remote_addr, rdma_wr(wr)->rkey); seg += sizeof(struct mlx5_wqe_raddr_seg); size += sizeof(struct mlx5_wqe_raddr_seg) / 16; break; case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: mlx5_ib_warn(dev, "Atomic operations are not supported yet\n"); err = -ENOSYS; *bad_wr = wr; goto out; case IB_WR_LOCAL_INV: next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); set_linv_wr(qp, &seg, &size); num_sge = 0; break; case IB_WR_REG_MR: next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; qp->sq.wr_data[idx] = IB_WR_REG_MR; ctrl->imm = cpu_to_be32(reg_wr(wr)->key); err = set_reg_wr(qp, reg_wr(wr), &seg, &size); if (err) { *bad_wr = wr; goto out; } num_sge = 0; break; case IB_WR_REG_SIG_MR: qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; mr = to_mmr(sig_handover_wr(wr)->sig_mr); ctrl->imm = cpu_to_be32(mr->ibmr.rkey); err = set_sig_umr_wr(wr, qp, &seg, &size); if (err) { mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, get_fence(fence, wr), next_fence, MLX5_OPCODE_UMR); /* * SET_PSV WQEs are not signaled and solicited * on error */ wr->send_flags &= ~IB_SEND_SIGNALED; wr->send_flags |= IB_SEND_SOLICITED; err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; *bad_wr = wr; goto out; } err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->mem, mr->sig->psv_memory.psv_idx, &seg, &size); if (err) { mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, get_fence(fence, wr), next_fence, MLX5_OPCODE_SET_PSV); err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; *bad_wr = wr; goto out; } next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire, mr->sig->psv_wire.psv_idx, &seg, &size); if (err) { mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, get_fence(fence, wr), next_fence, MLX5_OPCODE_SET_PSV); num_sge = 0; goto skip_psv; default: break; } break; case IB_QPT_UC: switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: set_raddr_seg(seg, rdma_wr(wr)->remote_addr, rdma_wr(wr)->rkey); seg += sizeof(struct mlx5_wqe_raddr_seg); size += sizeof(struct mlx5_wqe_raddr_seg) / 16; break; default: break; } break; case IB_QPT_SMI: case MLX5_IB_QPT_HW_GSI: set_datagram_seg(seg, wr); seg += sizeof(struct mlx5_wqe_datagram_seg); size += sizeof(struct mlx5_wqe_datagram_seg) / 16; if (unlikely((seg == qend))) seg = mlx5_get_send_wqe(qp, 0); break; case IB_QPT_UD: set_datagram_seg(seg, wr); seg += sizeof(struct mlx5_wqe_datagram_seg); size += sizeof(struct mlx5_wqe_datagram_seg) / 16; if (unlikely((seg == qend))) seg = mlx5_get_send_wqe(qp, 0); /* handle qp that supports ud offload */ if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) { struct mlx5_wqe_eth_pad *pad; pad = seg; memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad)); seg += sizeof(struct mlx5_wqe_eth_pad); size += sizeof(struct mlx5_wqe_eth_pad) / 16; seg = set_eth_seg(seg, wr, qend, qp, &size); if (unlikely((seg == qend))) seg = mlx5_get_send_wqe(qp, 0); } break; case MLX5_IB_QPT_REG_UMR: if (wr->opcode != MLX5_IB_WR_UMR) { err = -EINVAL; mlx5_ib_warn(dev, "bad opcode\n"); goto out; } qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey); set_reg_umr_segment(seg, wr); seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((seg == qend))) seg = mlx5_get_send_wqe(qp, 0); set_reg_mkey_segment(seg, wr); seg += sizeof(struct mlx5_mkey_seg); size += sizeof(struct mlx5_mkey_seg) / 16; if (unlikely((seg == qend))) seg = mlx5_get_send_wqe(qp, 0); break; default: break; } if (wr->send_flags & IB_SEND_INLINE && num_sge) { int uninitialized_var(sz); err = set_data_inl_seg(qp, wr, seg, &sz); if (unlikely(err)) { mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } inl = 1; size += sz; } else { dpseg = seg; for (i = 0; i < num_sge; i++) { if (unlikely(dpseg == qend)) { seg = mlx5_get_send_wqe(qp, 0); dpseg = seg; } if (likely(wr->sg_list[i].length)) { set_data_ptr_seg(dpseg, wr->sg_list + i); size += sizeof(struct mlx5_wqe_data_seg) / 16; dpseg++; } } } finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, get_fence(fence, wr), next_fence, mlx5_ib_opcode[wr->opcode]); skip_psv: if (0) dump_wqe(qp, idx, size); } out: if (likely(nreq)) { qp->sq.head += nreq; /* Make sure that descriptors are written before * updating doorbell record and ringing the doorbell */ wmb(); qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); /* Make sure doorbell record is visible to the HCA before * we hit doorbell */ wmb(); - if (bf->need_lock) - spin_lock(&bf->lock); - else - __acquire(&bf->lock); - - /* TBD enable WC */ - if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) { - mlx5_bf_copy(bf->reg + bf->offset, (u64 *)ctrl, ALIGN(size * 16, 64), qp); - /* wc_wmb(); */ - } else { - mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset, - MLX5_GET_DOORBELL_LOCK(&bf->lock32)); - /* Make sure doorbells don't leak out of SQ spinlock - * and reach the HCA out of order. - */ - mmiowb(); - } + mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset, + MLX5_GET_DOORBELL_LOCK(&bf->lock32)); + /* Make sure doorbells don't leak out of SQ spinlock + * and reach the HCA out of order. + */ bf->offset ^= bf->buf_size; - if (bf->need_lock) - spin_unlock(&bf->lock); - else - __release(&bf->lock); } spin_unlock_irqrestore(&qp->sq.lock, flags); return err; } static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size) { sig->signature = calc_sig(sig, size); } int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_wqe_data_seg *scat; struct mlx5_rwqe_sig *sig; struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_core_dev *mdev = dev->mdev; unsigned long flags; int err = 0; int nreq; int ind; int i; if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr); spin_lock_irqsave(&qp->rq.lock, flags); if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { err = -EIO; *bad_wr = wr; nreq = 0; goto out; } ind = qp->rq.head & (qp->rq.wqe_cnt - 1); for (nreq = 0; wr; nreq++, wr = wr->next) { if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { err = -ENOMEM; *bad_wr = wr; goto out; } if (unlikely(wr->num_sge > qp->rq.max_gs)) { err = -EINVAL; *bad_wr = wr; goto out; } scat = get_recv_wqe(qp, ind); if (qp->wq_sig) scat++; for (i = 0; i < wr->num_sge; i++) set_data_ptr_seg(scat + i, wr->sg_list + i); if (i < qp->rq.max_gs) { scat[i].byte_count = 0; scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY); scat[i].addr = 0; } if (qp->wq_sig) { sig = (struct mlx5_rwqe_sig *)scat; set_sig_seg(sig, (qp->rq.max_gs + 1) << 2); } qp->rq.wrid[ind] = wr->wr_id; ind = (ind + 1) & (qp->rq.wqe_cnt - 1); } out: if (likely(nreq)) { qp->rq.head += nreq; /* Make sure that descriptors are written before * doorbell record. */ wmb(); *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff); } spin_unlock_irqrestore(&qp->rq.lock, flags); return err; } static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state) { switch (mlx5_state) { case MLX5_QP_STATE_RST: return IB_QPS_RESET; case MLX5_QP_STATE_INIT: return IB_QPS_INIT; case MLX5_QP_STATE_RTR: return IB_QPS_RTR; case MLX5_QP_STATE_RTS: return IB_QPS_RTS; case MLX5_QP_STATE_SQ_DRAINING: case MLX5_QP_STATE_SQD: return IB_QPS_SQD; case MLX5_QP_STATE_SQER: return IB_QPS_SQE; case MLX5_QP_STATE_ERR: return IB_QPS_ERR; default: return -1; } } static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state) { switch (mlx5_mig_state) { case MLX5_QP_PM_ARMED: return IB_MIG_ARMED; case MLX5_QP_PM_REARM: return IB_MIG_REARM; case MLX5_QP_PM_MIGRATED: return IB_MIG_MIGRATED; default: return -1; } } static int to_ib_qp_access_flags(int mlx5_flags) { int ib_flags = 0; if (mlx5_flags & MLX5_QP_BIT_RRE) ib_flags |= IB_ACCESS_REMOTE_READ; if (mlx5_flags & MLX5_QP_BIT_RWE) ib_flags |= IB_ACCESS_REMOTE_WRITE; if (mlx5_flags & MLX5_QP_BIT_RAE) ib_flags |= IB_ACCESS_REMOTE_ATOMIC; return ib_flags; } static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr, struct mlx5_qp_path *path) { struct mlx5_core_dev *dev = ibdev->mdev; memset(ib_ah_attr, 0, sizeof(*ib_ah_attr)); ib_ah_attr->port_num = path->port; if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports)) return; ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf; ib_ah_attr->dlid = be16_to_cpu(path->rlid); ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f; ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0; ib_ah_attr->ah_flags = (path->grh_mlid & (1 << 7)) ? IB_AH_GRH : 0; if (ib_ah_attr->ah_flags) { ib_ah_attr->grh.sgid_index = path->mgid_index; ib_ah_attr->grh.hop_limit = path->hop_limit; ib_ah_attr->grh.traffic_class = (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff; ib_ah_attr->grh.flow_label = be32_to_cpu(path->tclass_flowlabel) & 0xfffff; memcpy(ib_ah_attr->grh.dgid.raw, path->rgid, sizeof(ib_ah_attr->grh.dgid.raw)); } } static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq, u8 *sq_state) { void *out; void *sqc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(query_sq_out); out = mlx5_vzalloc(inlen); if (!out) return -ENOMEM; err = mlx5_core_query_sq(dev->mdev, sq->base.mqp.qpn, out); if (err) goto out; sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context); *sq_state = MLX5_GET(sqc, sqc, state); sq->state = *sq_state; out: kvfree(out); return err; } static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, u8 *rq_state) { void *out; void *rqc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(query_rq_out); out = mlx5_vzalloc(inlen); if (!out) return -ENOMEM; err = mlx5_core_query_rq(dev->mdev, rq->base.mqp.qpn, out); if (err) goto out; rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context); *rq_state = MLX5_GET(rqc, rqc, state); rq->state = *rq_state; out: kvfree(out); return err; } static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state, struct mlx5_ib_qp *qp, u8 *qp_state) { static const u8 sqrq_trans[MLX5_RQ_NUM_STATE][MLX5_SQ_NUM_STATE] = { [MLX5_RQC_STATE_RST] = { [MLX5_SQC_STATE_RST] = IB_QPS_RESET, [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE_BAD, [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE_BAD, [MLX5_SQ_STATE_NA] = IB_QPS_RESET, }, [MLX5_RQC_STATE_RDY] = { [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD, [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE, [MLX5_SQC_STATE_ERR] = IB_QPS_SQE, [MLX5_SQ_STATE_NA] = MLX5_QP_STATE, }, [MLX5_RQC_STATE_ERR] = { [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD, [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE_BAD, [MLX5_SQC_STATE_ERR] = IB_QPS_ERR, [MLX5_SQ_STATE_NA] = IB_QPS_ERR, }, [MLX5_RQ_STATE_NA] = { [MLX5_SQC_STATE_RST] = IB_QPS_RESET, [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE, [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE, [MLX5_SQ_STATE_NA] = MLX5_QP_STATE_BAD, }, }; *qp_state = sqrq_trans[rq_state][sq_state]; if (*qp_state == MLX5_QP_STATE_BAD) { WARN(1, "Buggy Raw Packet QP state, SQ 0x%x state: 0x%x, RQ 0x%x state: 0x%x", qp->raw_packet_qp.sq.base.mqp.qpn, sq_state, qp->raw_packet_qp.rq.base.mqp.qpn, rq_state); return -EINVAL; } if (*qp_state == MLX5_QP_STATE) *qp_state = qp->state; return 0; } static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, u8 *raw_packet_qp_state) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_sq *sq = &raw_packet_qp->sq; struct mlx5_ib_rq *rq = &raw_packet_qp->rq; int err; u8 sq_state = MLX5_SQ_STATE_NA; u8 rq_state = MLX5_RQ_STATE_NA; if (qp->sq.wqe_cnt) { err = query_raw_packet_qp_sq_state(dev, sq, &sq_state); if (err) return err; } if (qp->rq.wqe_cnt) { err = query_raw_packet_qp_rq_state(dev, rq, &rq_state); if (err) return err; } return sqrq_state_to_qp_state(sq_state, rq_state, qp, raw_packet_qp_state); } static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct ib_qp_attr *qp_attr) { int outlen = MLX5_ST_SZ_BYTES(query_qp_out); struct mlx5_qp_context *context; int mlx5_state; u32 *outb; int err = 0; outb = kzalloc(outlen, GFP_KERNEL); if (!outb) return -ENOMEM; err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb, outlen); if (err) goto out; /* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */ context = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, outb, qpc); mlx5_state = be32_to_cpu(context->flags) >> 28; qp->state = to_ib_qp_state(mlx5_state); qp_attr->path_mtu = context->mtu_msgmax >> 5; qp_attr->path_mig_state = to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3); qp_attr->qkey = be32_to_cpu(context->qkey); qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff; qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff; qp_attr->dest_qp_num = be32_to_cpu(context->log_pg_sz_remote_qpn) & 0xffffff; qp_attr->qp_access_flags = to_ib_qp_access_flags(be32_to_cpu(context->params2)); if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path); qp_attr->alt_pkey_index = be16_to_cpu(context->alt_path.pkey_index); qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; } qp_attr->pkey_index = be16_to_cpu(context->pri_path.pkey_index); qp_attr->port_num = context->pri_path.port; /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ qp_attr->sq_draining = mlx5_state == MLX5_QP_STATE_SQ_DRAINING; qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7); qp_attr->max_dest_rd_atomic = 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7); qp_attr->min_rnr_timer = (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f; qp_attr->timeout = context->pri_path.ackto_lt >> 3; qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7; qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7; qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3; out: kfree(outb); return err; } int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_qp *qp = to_mqp(ibqp); int err = 0; u8 raw_packet_qp_state; if (ibqp->rwq_ind_tbl) return -ENOSYS; if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING /* * Wait for any outstanding page faults, in case the user frees memory * based upon this query's result. */ flush_workqueue(mlx5_ib_page_fault_wq); #endif mutex_lock(&qp->mutex); if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state); if (err) goto out; qp->state = raw_packet_qp_state; qp_attr->port_num = 1; } else { err = query_qp_attr(dev, qp, qp_attr); if (err) goto out; } qp_attr->qp_state = qp->state; qp_attr->cur_qp_state = qp_attr->qp_state; qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; qp_attr->cap.max_recv_sge = qp->rq.max_gs; if (!ibqp->uobject) { qp_attr->cap.max_send_wr = qp->sq.max_post; qp_attr->cap.max_send_sge = qp->sq.max_gs; qp_init_attr->qp_context = ibqp->qp_context; } else { qp_attr->cap.max_send_wr = 0; qp_attr->cap.max_send_sge = 0; } qp_init_attr->qp_type = ibqp->qp_type; qp_init_attr->recv_cq = ibqp->recv_cq; qp_init_attr->send_cq = ibqp->send_cq; qp_init_attr->srq = ibqp->srq; qp_attr->cap.max_inline_data = qp->max_inline_data; qp_init_attr->cap = qp_attr->cap; qp_init_attr->create_flags = 0; if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL; if (qp->flags & MLX5_IB_QP_MANAGED_SEND) qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND; if (qp->flags & MLX5_IB_QP_MANAGED_RECV) qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV; if (qp->flags & MLX5_IB_QP_SQPN_QP1) - qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1(); + qp_init_attr->create_flags |= MLX5_IB_QP_CREATE_SQPN_QP1; qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; out: mutex_unlock(&qp->mutex); return err; } struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_xrcd *xrcd; int err; if (!MLX5_CAP_GEN(dev->mdev, xrc)) return ERR_PTR(-ENOSYS); xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL); if (!xrcd) return ERR_PTR(-ENOMEM); err = mlx5_core_xrcd_alloc(dev->mdev, &xrcd->xrcdn); if (err) { kfree(xrcd); return ERR_PTR(-ENOMEM); } return &xrcd->ibxrcd; } int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd) { struct mlx5_ib_dev *dev = to_mdev(xrcd->device); u32 xrcdn = to_mxrcd(xrcd)->xrcdn; int err; err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn); if (err) { mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn); return err; } kfree(xrcd); return 0; } static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type) { struct mlx5_ib_rwq *rwq = to_mibrwq(core_qp); struct mlx5_ib_dev *dev = to_mdev(rwq->ibwq.device); struct ib_event event; if (rwq->ibwq.event_handler) { event.device = rwq->ibwq.device; event.element.wq = &rwq->ibwq; switch (type) { case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: event.event = IB_EVENT_WQ_FATAL; break; default: mlx5_ib_warn(dev, "Unexpected event type %d on WQ %06x\n", type, core_qp->qpn); return; } rwq->ibwq.event_handler(&event, rwq->ibwq.wq_context); } } static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, struct ib_wq_init_attr *init_attr) { struct mlx5_ib_dev *dev; __be64 *rq_pas0; void *in; void *rqc; void *wq; int inlen; int err; dev = to_mdev(pd->device); inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rwq->rq_num_pas; in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_RQ_TYPE_MEMORY_RQ_INLINE); MLX5_SET(rqc, rqc, user_index, rwq->user_index); MLX5_SET(rqc, rqc, cqn, to_mcq(init_attr->cq)->mcq.cqn); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); MLX5_SET(rqc, rqc, flush_in_error_en, 1); wq = MLX5_ADDR_OF(rqc, rqc, wq); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride); MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size); MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn); MLX5_SET(wq, wq, page_offset, rwq->rq_page_offset); MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size); MLX5_SET(wq, wq, wq_signature, rwq->wq_sig); MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma); rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp); kvfree(in); return err; } static int set_user_rq_size(struct mlx5_ib_dev *dev, struct ib_wq_init_attr *wq_init_attr, struct mlx5_ib_create_wq *ucmd, struct mlx5_ib_rwq *rwq) { /* Sanity check RQ size before proceeding */ if (wq_init_attr->max_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_wq_sz))) return -EINVAL; if (!ucmd->rq_wqe_count) return -EINVAL; rwq->wqe_count = ucmd->rq_wqe_count; rwq->wqe_shift = ucmd->rq_wqe_shift; rwq->buf_size = (rwq->wqe_count << rwq->wqe_shift); rwq->log_rq_stride = rwq->wqe_shift; rwq->log_rq_size = ilog2(rwq->wqe_count); return 0; } static int prepare_user_rq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata, struct mlx5_ib_rwq *rwq) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_create_wq ucmd = {}; int err; size_t required_cmd_sz; required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved); if (udata->inlen < required_cmd_sz) { mlx5_ib_dbg(dev, "invalid inlen\n"); return -EINVAL; } if (udata->inlen > sizeof(ucmd) && !ib_is_udata_cleared(udata, sizeof(ucmd), udata->inlen - sizeof(ucmd))) { mlx5_ib_dbg(dev, "inlen is not supported\n"); return -EOPNOTSUPP; } if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) { mlx5_ib_dbg(dev, "copy failed\n"); return -EFAULT; } if (ucmd.comp_mask) { mlx5_ib_dbg(dev, "invalid comp mask\n"); return -EOPNOTSUPP; } if (ucmd.reserved) { mlx5_ib_dbg(dev, "invalid reserved\n"); return -EOPNOTSUPP; } err = set_user_rq_size(dev, init_attr, &ucmd, rwq); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); return err; } err = create_user_rq(dev, pd, rwq, &ucmd); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); if (err) return err; } rwq->user_index = ucmd.user_index; return 0; } struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata) { struct mlx5_ib_dev *dev; struct mlx5_ib_rwq *rwq; struct mlx5_ib_create_wq_resp resp = {}; size_t min_resp_len; int err; if (!udata) return ERR_PTR(-ENOSYS); min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); if (udata->outlen && udata->outlen < min_resp_len) return ERR_PTR(-EINVAL); dev = to_mdev(pd->device); switch (init_attr->wq_type) { case IB_WQT_RQ: rwq = kzalloc(sizeof(*rwq), GFP_KERNEL); if (!rwq) return ERR_PTR(-ENOMEM); err = prepare_user_rq(pd, init_attr, udata, rwq); if (err) goto err; err = create_rq(rwq, pd, init_attr); if (err) goto err_user_rq; break; default: mlx5_ib_dbg(dev, "unsupported wq type %d\n", init_attr->wq_type); return ERR_PTR(-EINVAL); } rwq->ibwq.wq_num = rwq->core_qp.qpn; rwq->ibwq.state = IB_WQS_RESET; if (udata->outlen) { resp.response_length = offsetof(typeof(resp), response_length) + sizeof(resp.response_length); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) goto err_copy; } rwq->core_qp.event = mlx5_ib_wq_event; rwq->ibwq.event_handler = init_attr->event_handler; return &rwq->ibwq; err_copy: mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); err_user_rq: destroy_user_rq(pd, rwq); err: kfree(rwq); return ERR_PTR(err); } int mlx5_ib_destroy_wq(struct ib_wq *wq) { struct mlx5_ib_dev *dev = to_mdev(wq->device); struct mlx5_ib_rwq *rwq = to_mrwq(wq); mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); destroy_user_rq(wq->pd, rwq); kfree(rwq); return 0; } struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, struct ib_rwq_ind_table_init_attr *init_attr, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(device); struct mlx5_ib_rwq_ind_table *rwq_ind_tbl; int sz = 1 << init_attr->log_ind_tbl_size; struct mlx5_ib_create_rwq_ind_tbl_resp resp = {}; size_t min_resp_len; int inlen; int err; int i; u32 *in; void *rqtc; if (udata->inlen > 0 && !ib_is_udata_cleared(udata, 0, udata->inlen)) return ERR_PTR(-EOPNOTSUPP); if (init_attr->log_ind_tbl_size > MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) { mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n", init_attr->log_ind_tbl_size, MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)); return ERR_PTR(-EINVAL); } min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); if (udata->outlen && udata->outlen < min_resp_len) return ERR_PTR(-EINVAL); rwq_ind_tbl = kzalloc(sizeof(*rwq_ind_tbl), GFP_KERNEL); if (!rwq_ind_tbl) return ERR_PTR(-ENOMEM); inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; in = mlx5_vzalloc(inlen); if (!in) { err = -ENOMEM; goto err; } rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); MLX5_SET(rqtc, rqtc, rqt_max_size, sz); for (i = 0; i < sz; i++) MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num); err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn); kvfree(in); if (err) goto err; rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn; if (udata->outlen) { resp.response_length = offsetof(typeof(resp), response_length) + sizeof(resp.response_length); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) goto err_copy; } return &rwq_ind_tbl->ib_rwq_ind_tbl; err_copy: mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn); err: kfree(rwq_ind_tbl); return ERR_PTR(err); } int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) { struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl); struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device); mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn); kfree(rwq_ind_tbl); return 0; } int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(wq->device); struct mlx5_ib_rwq *rwq = to_mrwq(wq); struct mlx5_ib_modify_wq ucmd = {}; size_t required_cmd_sz; int curr_wq_state; int wq_state; int inlen; int err; void *rqc; void *in; required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved); if (udata->inlen < required_cmd_sz) return -EINVAL; if (udata->inlen > sizeof(ucmd) && !ib_is_udata_cleared(udata, sizeof(ucmd), udata->inlen - sizeof(ucmd))) return -EOPNOTSUPP; if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) return -EFAULT; if (ucmd.comp_mask || ucmd.reserved) return -EOPNOTSUPP; inlen = MLX5_ST_SZ_BYTES(modify_rq_in); in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); MLX5_SET(modify_rq_in, in, rqn, rwq->core_qp.qpn); curr_wq_state = (wq_attr_mask & IB_WQ_CUR_STATE) ? wq_attr->curr_wq_state : wq->state; wq_state = (wq_attr_mask & IB_WQ_STATE) ? wq_attr->wq_state : curr_wq_state; if (curr_wq_state == IB_WQS_ERR) curr_wq_state = MLX5_RQC_STATE_ERR; if (wq_state == IB_WQS_ERR) wq_state = MLX5_RQC_STATE_ERR; MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state); MLX5_SET(rqc, rqc, state, wq_state); err = mlx5_core_modify_rq(dev->mdev, in, inlen); kvfree(in); if (!err) rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state; return err; } diff --git a/sys/dev/mlx5/mlx5_ifc.h b/sys/dev/mlx5/mlx5_ifc.h index 17fa0ca01bdf..f748ec876ef4 100644 --- a/sys/dev/mlx5/mlx5_ifc.h +++ b/sys/dev/mlx5/mlx5_ifc.h @@ -1,10799 +1,10802 @@ /*- * Copyright (c) 2013-2019, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef MLX5_IFC_H #define MLX5_IFC_H #include enum { MLX5_EVENT_TYPE_COMP = 0x0, MLX5_EVENT_TYPE_PATH_MIG = 0x1, MLX5_EVENT_TYPE_COMM_EST = 0x2, MLX5_EVENT_TYPE_SQ_DRAINED = 0x3, MLX5_EVENT_TYPE_SRQ_LAST_WQE = 0x13, MLX5_EVENT_TYPE_SRQ_RQ_LIMIT = 0x14, MLX5_EVENT_TYPE_DCT_DRAINED = 0x1c, MLX5_EVENT_TYPE_DCT_KEY_VIOLATION = 0x1d, MLX5_EVENT_TYPE_CQ_ERROR = 0x4, MLX5_EVENT_TYPE_WQ_CATAS_ERROR = 0x5, MLX5_EVENT_TYPE_PATH_MIG_FAILED = 0x7, MLX5_EVENT_TYPE_PAGE_FAULT = 0xc, MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10, MLX5_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11, MLX5_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12, MLX5_EVENT_TYPE_INTERNAL_ERROR = 0x8, MLX5_EVENT_TYPE_PORT_CHANGE = 0x9, MLX5_EVENT_TYPE_GPIO_EVENT = 0x15, MLX5_EVENT_TYPE_CODING_PORT_MODULE_EVENT = 0x16, MLX5_EVENT_TYPE_TEMP_WARN_EVENT = 0x17, MLX5_EVENT_TYPE_REMOTE_CONFIG = 0x19, MLX5_EVENT_TYPE_CODING_DCBX_CHANGE_EVENT = 0x1e, MLX5_EVENT_TYPE_CODING_PPS_EVENT = 0x25, MLX5_EVENT_TYPE_CODING_GENERAL_NOTIFICATION_EVENT = 0x22, MLX5_EVENT_TYPE_DB_BF_CONGESTION = 0x1a, MLX5_EVENT_TYPE_STALL_EVENT = 0x1b, MLX5_EVENT_TYPE_DROPPED_PACKET_LOGGED_EVENT = 0x1f, MLX5_EVENT_TYPE_CMD = 0xa, MLX5_EVENT_TYPE_PAGE_REQUEST = 0xb, MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd, MLX5_EVENT_TYPE_FPGA_ERROR = 0x20, MLX5_EVENT_TYPE_FPGA_QP_ERROR = 0x21, MLX5_EVENT_TYPE_CODING_GENERAL_OBJ_EVENT = 0x27, }; enum { MLX5_MODIFY_TIR_BITMASK_LRO = 0x0, MLX5_MODIFY_TIR_BITMASK_INDIRECT_TABLE = 0x1, MLX5_MODIFY_TIR_BITMASK_HASH = 0x2, MLX5_MODIFY_TIR_BITMASK_TUNNELED_OFFLOAD_EN = 0x3, MLX5_MODIFY_TIR_BITMASK_SELF_LB_EN = 0x4 }; enum { MLX5_MODIFY_RQT_BITMASK_RQN_LIST = 0x1, }; enum { MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, }; enum { MLX5_CMD_OP_QUERY_HCA_CAP = 0x100, MLX5_CMD_OP_QUERY_ADAPTER = 0x101, MLX5_CMD_OP_INIT_HCA = 0x102, MLX5_CMD_OP_TEARDOWN_HCA = 0x103, MLX5_CMD_OP_ENABLE_HCA = 0x104, MLX5_CMD_OP_DISABLE_HCA = 0x105, MLX5_CMD_OP_QUERY_PAGES = 0x107, MLX5_CMD_OP_MANAGE_PAGES = 0x108, MLX5_CMD_OP_SET_HCA_CAP = 0x109, MLX5_CMD_OP_QUERY_ISSI = 0x10a, MLX5_CMD_OP_SET_ISSI = 0x10b, MLX5_CMD_OP_SET_DRIVER_VERSION = 0x10d, MLX5_CMD_OP_QUERY_OTHER_HCA_CAP = 0x10e, MLX5_CMD_OP_MODIFY_OTHER_HCA_CAP = 0x10f, MLX5_CMD_OP_CREATE_MKEY = 0x200, MLX5_CMD_OP_QUERY_MKEY = 0x201, MLX5_CMD_OP_DESTROY_MKEY = 0x202, MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS = 0x203, MLX5_CMD_OP_PAGE_FAULT_RESUME = 0x204, MLX5_CMD_OP_CREATE_EQ = 0x301, MLX5_CMD_OP_DESTROY_EQ = 0x302, MLX5_CMD_OP_QUERY_EQ = 0x303, MLX5_CMD_OP_GEN_EQE = 0x304, MLX5_CMD_OP_CREATE_CQ = 0x400, MLX5_CMD_OP_DESTROY_CQ = 0x401, MLX5_CMD_OP_QUERY_CQ = 0x402, MLX5_CMD_OP_MODIFY_CQ = 0x403, MLX5_CMD_OP_CREATE_QP = 0x500, MLX5_CMD_OP_DESTROY_QP = 0x501, MLX5_CMD_OP_RST2INIT_QP = 0x502, MLX5_CMD_OP_INIT2RTR_QP = 0x503, MLX5_CMD_OP_RTR2RTS_QP = 0x504, MLX5_CMD_OP_RTS2RTS_QP = 0x505, MLX5_CMD_OP_SQERR2RTS_QP = 0x506, MLX5_CMD_OP_2ERR_QP = 0x507, MLX5_CMD_OP_2RST_QP = 0x50a, MLX5_CMD_OP_QUERY_QP = 0x50b, MLX5_CMD_OP_SQD_RTS_QP = 0x50c, MLX5_CMD_OP_INIT2INIT_QP = 0x50e, MLX5_CMD_OP_CREATE_PSV = 0x600, MLX5_CMD_OP_DESTROY_PSV = 0x601, MLX5_CMD_OP_CREATE_SRQ = 0x700, MLX5_CMD_OP_DESTROY_SRQ = 0x701, MLX5_CMD_OP_QUERY_SRQ = 0x702, MLX5_CMD_OP_ARM_RQ = 0x703, MLX5_CMD_OP_CREATE_XRC_SRQ = 0x705, MLX5_CMD_OP_DESTROY_XRC_SRQ = 0x706, MLX5_CMD_OP_QUERY_XRC_SRQ = 0x707, MLX5_CMD_OP_ARM_XRC_SRQ = 0x708, MLX5_CMD_OP_CREATE_DCT = 0x710, MLX5_CMD_OP_DESTROY_DCT = 0x711, MLX5_CMD_OP_DRAIN_DCT = 0x712, MLX5_CMD_OP_QUERY_DCT = 0x713, MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION = 0x714, MLX5_CMD_OP_SET_DC_CNAK_TRACE = 0x715, MLX5_CMD_OP_QUERY_DC_CNAK_TRACE = 0x716, MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750, MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751, MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752, MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT = 0x753, MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT = 0x754, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT = 0x755, MLX5_CMD_OP_QUERY_ROCE_ADDRESS = 0x760, MLX5_CMD_OP_SET_ROCE_ADDRESS = 0x761, MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT = 0x762, MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT = 0x763, MLX5_CMD_OP_QUERY_HCA_VPORT_GID = 0x764, MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY = 0x765, MLX5_CMD_OP_QUERY_VNIC_ENV = 0x76f, MLX5_CMD_OP_QUERY_VPORT_COUNTER = 0x770, MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772, MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, MLX5_CMD_OP_SET_RATE_LIMIT = 0x780, MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781, MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT = 0x782, MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT = 0x783, MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT = 0x784, MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT = 0x785, MLX5_CMD_OP_CREATE_QOS_PARA_VPORT = 0x786, MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT = 0x787, MLX5_CMD_OP_ALLOC_PD = 0x800, MLX5_CMD_OP_DEALLOC_PD = 0x801, MLX5_CMD_OP_ALLOC_UAR = 0x802, MLX5_CMD_OP_DEALLOC_UAR = 0x803, MLX5_CMD_OP_CONFIG_INT_MODERATION = 0x804, MLX5_CMD_OP_ACCESS_REG = 0x805, MLX5_CMD_OP_ATTACH_TO_MCG = 0x806, MLX5_CMD_OP_DETACH_FROM_MCG = 0x807, MLX5_CMD_OP_GET_DROPPED_PACKET_LOG = 0x80a, MLX5_CMD_OP_MAD_IFC = 0x50d, MLX5_CMD_OP_QUERY_MAD_DEMUX = 0x80b, MLX5_CMD_OP_SET_MAD_DEMUX = 0x80c, MLX5_CMD_OP_NOP = 0x80d, MLX5_CMD_OP_ALLOC_XRCD = 0x80e, MLX5_CMD_OP_DEALLOC_XRCD = 0x80f, MLX5_CMD_OP_SET_BURST_SIZE = 0x812, MLX5_CMD_OP_QUERY_BURST_SIZE = 0x813, MLX5_CMD_OP_ACTIVATE_TRACER = 0x814, MLX5_CMD_OP_DEACTIVATE_TRACER = 0x815, MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN = 0x816, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN = 0x817, MLX5_CMD_OP_SET_DIAGNOSTICS = 0x820, MLX5_CMD_OP_QUERY_DIAGNOSTICS = 0x821, MLX5_CMD_OP_QUERY_CONG_STATUS = 0x822, MLX5_CMD_OP_MODIFY_CONG_STATUS = 0x823, MLX5_CMD_OP_QUERY_CONG_PARAMS = 0x824, MLX5_CMD_OP_MODIFY_CONG_PARAMS = 0x825, MLX5_CMD_OP_QUERY_CONG_STATISTICS = 0x826, MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT = 0x827, MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT = 0x828, MLX5_CMD_OP_SET_L2_TABLE_ENTRY = 0x829, MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY = 0x82a, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY = 0x82b, MLX5_CMD_OP_SET_WOL_ROL = 0x830, MLX5_CMD_OP_QUERY_WOL_ROL = 0x831, MLX5_CMD_OP_CREATE_LAG = 0x840, MLX5_CMD_OP_MODIFY_LAG = 0x841, MLX5_CMD_OP_QUERY_LAG = 0x842, MLX5_CMD_OP_DESTROY_LAG = 0x843, MLX5_CMD_OP_CREATE_VPORT_LAG = 0x844, MLX5_CMD_OP_DESTROY_VPORT_LAG = 0x845, MLX5_CMD_OP_CREATE_TIR = 0x900, MLX5_CMD_OP_MODIFY_TIR = 0x901, MLX5_CMD_OP_DESTROY_TIR = 0x902, MLX5_CMD_OP_QUERY_TIR = 0x903, MLX5_CMD_OP_CREATE_SQ = 0x904, MLX5_CMD_OP_MODIFY_SQ = 0x905, MLX5_CMD_OP_DESTROY_SQ = 0x906, MLX5_CMD_OP_QUERY_SQ = 0x907, MLX5_CMD_OP_CREATE_RQ = 0x908, MLX5_CMD_OP_MODIFY_RQ = 0x909, MLX5_CMD_OP_DESTROY_RQ = 0x90a, MLX5_CMD_OP_QUERY_RQ = 0x90b, MLX5_CMD_OP_CREATE_RMP = 0x90c, MLX5_CMD_OP_MODIFY_RMP = 0x90d, MLX5_CMD_OP_DESTROY_RMP = 0x90e, MLX5_CMD_OP_QUERY_RMP = 0x90f, MLX5_CMD_OP_SET_DELAY_DROP_PARAMS = 0x910, MLX5_CMD_OP_QUERY_DELAY_DROP_PARAMS = 0x911, MLX5_CMD_OP_CREATE_TIS = 0x912, MLX5_CMD_OP_MODIFY_TIS = 0x913, MLX5_CMD_OP_DESTROY_TIS = 0x914, MLX5_CMD_OP_QUERY_TIS = 0x915, MLX5_CMD_OP_CREATE_RQT = 0x916, MLX5_CMD_OP_MODIFY_RQT = 0x917, MLX5_CMD_OP_DESTROY_RQT = 0x918, MLX5_CMD_OP_QUERY_RQT = 0x919, MLX5_CMD_OP_SET_FLOW_TABLE_ROOT = 0x92f, MLX5_CMD_OP_CREATE_FLOW_TABLE = 0x930, MLX5_CMD_OP_DESTROY_FLOW_TABLE = 0x931, MLX5_CMD_OP_QUERY_FLOW_TABLE = 0x932, MLX5_CMD_OP_CREATE_FLOW_GROUP = 0x933, MLX5_CMD_OP_DESTROY_FLOW_GROUP = 0x934, MLX5_CMD_OP_QUERY_FLOW_GROUP = 0x935, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY = 0x936, MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY = 0x937, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY = 0x938, MLX5_CMD_OP_ALLOC_FLOW_COUNTER = 0x939, MLX5_CMD_OP_DEALLOC_FLOW_COUNTER = 0x93a, MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b, MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c, MLX5_CMD_OP_ALLOC_ENCAP_HEADER = 0x93d, MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e, MLX5_CMD_OP_FPGA_CREATE_QP = 0x960, MLX5_CMD_OP_FPGA_MODIFY_QP = 0x961, MLX5_CMD_OP_FPGA_QUERY_QP = 0x962, MLX5_CMD_OP_FPGA_DESTROY_QP = 0x963, MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS = 0x964, MLX5_CMD_OP_CREATE_GENERAL_OBJ = 0xa00, MLX5_CMD_OP_MODIFY_GENERAL_OBJ = 0xa01, MLX5_CMD_OP_QUERY_GENERAL_OBJ = 0xa02, MLX5_CMD_OP_DESTROY_GENERAL_OBJ = 0xa03, }; enum { MLX5_ICMD_CMDS_OPCODE_ICMD_OPCODE_QUERY_FW_INFO = 0x8007, MLX5_ICMD_CMDS_OPCODE_ICMD_QUERY_CAPABILITY = 0x8400, MLX5_ICMD_CMDS_OPCODE_ICMD_ACCESS_REGISTER = 0x9001, MLX5_ICMD_CMDS_OPCODE_ICMD_QUERY_VIRTUAL_MAC = 0x9003, MLX5_ICMD_CMDS_OPCODE_ICMD_SET_VIRTUAL_MAC = 0x9004, MLX5_ICMD_CMDS_OPCODE_ICMD_QUERY_WOL_ROL = 0x9005, MLX5_ICMD_CMDS_OPCODE_ICMD_SET_WOL_ROL = 0x9006, MLX5_ICMD_CMDS_OPCODE_ICMD_OCBB_INIT = 0x9007, MLX5_ICMD_CMDS_OPCODE_ICMD_OCBB_QUERY_HEADER_STATUS = 0x9008, MLX5_ICMD_CMDS_OPCODE_ICMD_OCBB_QUERY_ETOC_STATUS = 0x9009, MLX5_ICMD_CMDS_OPCODE_ICMD_OCBB_SET_EVENT = 0x900a, MLX5_ICMD_CMDS_OPCODE_ICMD_OPCODE_INIT_OCSD = 0xf004 }; enum { MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = 0xc, }; enum { MLX5_HCA_CAP_GENERAL_OBJ_TYPES_ENCRYPTION_KEY = 1 << 0xc, }; enum { MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128 = 0x0, MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256 = 0x1, }; enum { MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK = 0x1, }; struct mlx5_ifc_flow_table_fields_supported_bits { u8 outer_dmac[0x1]; u8 outer_smac[0x1]; u8 outer_ether_type[0x1]; u8 reserved_0[0x1]; u8 outer_first_prio[0x1]; u8 outer_first_cfi[0x1]; u8 outer_first_vid[0x1]; u8 reserved_1[0x1]; u8 outer_second_prio[0x1]; u8 outer_second_cfi[0x1]; u8 outer_second_vid[0x1]; u8 outer_ipv6_flow_label[0x1]; u8 outer_sip[0x1]; u8 outer_dip[0x1]; u8 outer_frag[0x1]; u8 outer_ip_protocol[0x1]; u8 outer_ip_ecn[0x1]; u8 outer_ip_dscp[0x1]; u8 outer_udp_sport[0x1]; u8 outer_udp_dport[0x1]; u8 outer_tcp_sport[0x1]; u8 outer_tcp_dport[0x1]; u8 outer_tcp_flags[0x1]; u8 outer_gre_protocol[0x1]; u8 outer_gre_key[0x1]; u8 outer_vxlan_vni[0x1]; u8 outer_geneve_vni[0x1]; u8 outer_geneve_oam[0x1]; u8 outer_geneve_protocol_type[0x1]; u8 outer_geneve_opt_len[0x1]; u8 reserved_2[0x1]; u8 source_eswitch_port[0x1]; u8 inner_dmac[0x1]; u8 inner_smac[0x1]; u8 inner_ether_type[0x1]; u8 reserved_3[0x1]; u8 inner_first_prio[0x1]; u8 inner_first_cfi[0x1]; u8 inner_first_vid[0x1]; u8 reserved_4[0x1]; u8 inner_second_prio[0x1]; u8 inner_second_cfi[0x1]; u8 inner_second_vid[0x1]; u8 inner_ipv6_flow_label[0x1]; u8 inner_sip[0x1]; u8 inner_dip[0x1]; u8 inner_frag[0x1]; u8 inner_ip_protocol[0x1]; u8 inner_ip_ecn[0x1]; u8 inner_ip_dscp[0x1]; u8 inner_udp_sport[0x1]; u8 inner_udp_dport[0x1]; u8 inner_tcp_sport[0x1]; u8 inner_tcp_dport[0x1]; u8 inner_tcp_flags[0x1]; u8 reserved_5[0x9]; u8 reserved_6[0x1a]; u8 bth_dst_qp[0x1]; u8 reserved_7[0x4]; u8 source_sqn[0x1]; u8 reserved_8[0x20]; }; struct mlx5_ifc_eth_discard_cntrs_grp_bits { u8 ingress_general_high[0x20]; u8 ingress_general_low[0x20]; u8 ingress_policy_engine_high[0x20]; u8 ingress_policy_engine_low[0x20]; u8 ingress_vlan_membership_high[0x20]; u8 ingress_vlan_membership_low[0x20]; u8 ingress_tag_frame_type_high[0x20]; u8 ingress_tag_frame_type_low[0x20]; u8 egress_vlan_membership_high[0x20]; u8 egress_vlan_membership_low[0x20]; u8 loopback_filter_high[0x20]; u8 loopback_filter_low[0x20]; u8 egress_general_high[0x20]; u8 egress_general_low[0x20]; u8 reserved_at_1c0[0x40]; u8 egress_hoq_high[0x20]; u8 egress_hoq_low[0x20]; u8 port_isolation_high[0x20]; u8 port_isolation_low[0x20]; u8 egress_policy_engine_high[0x20]; u8 egress_policy_engine_low[0x20]; u8 ingress_tx_link_down_high[0x20]; u8 ingress_tx_link_down_low[0x20]; u8 egress_stp_filter_high[0x20]; u8 egress_stp_filter_low[0x20]; u8 egress_hoq_stall_high[0x20]; u8 egress_hoq_stall_low[0x20]; u8 reserved_at_340[0x440]; }; struct mlx5_ifc_flow_table_prop_layout_bits { u8 ft_support[0x1]; u8 flow_tag[0x1]; u8 flow_counter[0x1]; u8 flow_modify_en[0x1]; u8 modify_root[0x1]; u8 identified_miss_table[0x1]; u8 flow_table_modify[0x1]; u8 encap[0x1]; u8 decap[0x1]; u8 reset_root_to_default[0x1]; u8 reserved_at_a[0x16]; u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; u8 reserved_at_28[0x10]; u8 max_ft_level[0x8]; u8 reserved_at_40[0x20]; u8 reserved_at_60[0x18]; u8 log_max_ft_num[0x8]; u8 reserved_at_80[0x10]; u8 log_max_flow_counter[0x8]; u8 log_max_destination[0x8]; u8 reserved_at_a0[0x18]; u8 log_max_flow[0x8]; u8 reserved_at_c0[0x40]; struct mlx5_ifc_flow_table_fields_supported_bits ft_field_support; struct mlx5_ifc_flow_table_fields_supported_bits ft_field_bitmask_support; }; struct mlx5_ifc_odp_per_transport_service_cap_bits { u8 send[0x1]; u8 receive[0x1]; u8 write[0x1]; u8 read[0x1]; u8 atomic[0x1]; u8 srq_receive[0x1]; u8 reserved_0[0x1a]; }; struct mlx5_ifc_flow_counter_list_bits { u8 reserved_0[0x10]; u8 flow_counter_id[0x10]; u8 reserved_1[0x20]; }; enum { MLX5_FLOW_CONTEXT_DEST_TYPE_VPORT = 0x0, MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE = 0x1, MLX5_FLOW_CONTEXT_DEST_TYPE_TIR = 0x2, MLX5_FLOW_CONTEXT_DEST_TYPE_QP = 0x3, }; struct mlx5_ifc_dest_format_struct_bits { u8 destination_type[0x8]; u8 destination_id[0x18]; u8 reserved_0[0x20]; }; struct mlx5_ifc_ipv4_layout_bits { u8 reserved_at_0[0x60]; u8 ipv4[0x20]; }; struct mlx5_ifc_ipv6_layout_bits { u8 ipv6[16][0x8]; }; union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits { struct mlx5_ifc_ipv6_layout_bits ipv6_layout; struct mlx5_ifc_ipv4_layout_bits ipv4_layout; u8 reserved_at_0[0x80]; }; struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 smac_47_16[0x20]; u8 smac_15_0[0x10]; u8 ethertype[0x10]; u8 dmac_47_16[0x20]; u8 dmac_15_0[0x10]; u8 first_prio[0x3]; u8 first_cfi[0x1]; u8 first_vid[0xc]; u8 ip_protocol[0x8]; u8 ip_dscp[0x6]; u8 ip_ecn[0x2]; u8 cvlan_tag[0x1]; u8 svlan_tag[0x1]; u8 frag[0x1]; u8 reserved_1[0x4]; u8 tcp_flags[0x9]; u8 tcp_sport[0x10]; u8 tcp_dport[0x10]; u8 reserved_2[0x20]; u8 udp_sport[0x10]; u8 udp_dport[0x10]; union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6; union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6; }; struct mlx5_ifc_fte_match_set_misc_bits { u8 reserved_0[0x8]; u8 source_sqn[0x18]; u8 reserved_1[0x10]; u8 source_port[0x10]; u8 outer_second_prio[0x3]; u8 outer_second_cfi[0x1]; u8 outer_second_vid[0xc]; u8 inner_second_prio[0x3]; u8 inner_second_cfi[0x1]; u8 inner_second_vid[0xc]; u8 outer_second_vlan_tag[0x1]; u8 inner_second_vlan_tag[0x1]; u8 reserved_2[0xe]; u8 gre_protocol[0x10]; u8 gre_key_h[0x18]; u8 gre_key_l[0x8]; u8 vxlan_vni[0x18]; u8 reserved_3[0x8]; u8 geneve_vni[0x18]; u8 reserved4[0x7]; u8 geneve_oam[0x1]; u8 reserved_5[0xc]; u8 outer_ipv6_flow_label[0x14]; u8 reserved_6[0xc]; u8 inner_ipv6_flow_label[0x14]; u8 reserved_7[0xa]; u8 geneve_opt_len[0x6]; u8 geneve_protocol_type[0x10]; u8 reserved_8[0x8]; u8 bth_dst_qp[0x18]; u8 reserved_9[0xa0]; }; struct mlx5_ifc_cmd_pas_bits { u8 pa_h[0x20]; u8 pa_l[0x14]; u8 reserved_0[0xc]; }; struct mlx5_ifc_uint64_bits { u8 hi[0x20]; u8 lo[0x20]; }; struct mlx5_ifc_application_prio_entry_bits { u8 reserved_0[0x8]; u8 priority[0x3]; u8 reserved_1[0x2]; u8 sel[0x3]; u8 protocol_id[0x10]; }; struct mlx5_ifc_nodnic_ring_doorbell_bits { u8 reserved_0[0x8]; u8 ring_pi[0x10]; u8 reserved_1[0x8]; }; enum { MLX5_ADS_STAT_RATE_NO_LIMIT = 0x0, MLX5_ADS_STAT_RATE_2_5GBPS = 0x7, MLX5_ADS_STAT_RATE_10GBPS = 0x8, MLX5_ADS_STAT_RATE_30GBPS = 0x9, MLX5_ADS_STAT_RATE_5GBPS = 0xa, MLX5_ADS_STAT_RATE_20GBPS = 0xb, MLX5_ADS_STAT_RATE_40GBPS = 0xc, MLX5_ADS_STAT_RATE_60GBPS = 0xd, MLX5_ADS_STAT_RATE_80GBPS = 0xe, MLX5_ADS_STAT_RATE_120GBPS = 0xf, }; struct mlx5_ifc_ads_bits { u8 fl[0x1]; u8 free_ar[0x1]; u8 reserved_0[0xe]; u8 pkey_index[0x10]; u8 reserved_1[0x8]; u8 grh[0x1]; u8 mlid[0x7]; u8 rlid[0x10]; u8 ack_timeout[0x5]; u8 reserved_2[0x3]; u8 src_addr_index[0x8]; u8 log_rtm[0x4]; u8 stat_rate[0x4]; u8 hop_limit[0x8]; u8 reserved_3[0x4]; u8 tclass[0x8]; u8 flow_label[0x14]; u8 rgid_rip[16][0x8]; u8 reserved_4[0x4]; u8 f_dscp[0x1]; u8 f_ecn[0x1]; u8 reserved_5[0x1]; u8 f_eth_prio[0x1]; u8 ecn[0x2]; u8 dscp[0x6]; u8 udp_sport[0x10]; u8 dei_cfi[0x1]; u8 eth_prio[0x3]; u8 sl[0x4]; u8 port[0x8]; u8 rmac_47_32[0x10]; u8 rmac_31_0[0x20]; }; struct mlx5_ifc_diagnostic_counter_cap_bits { u8 sync[0x1]; u8 reserved_0[0xf]; u8 counter_id[0x10]; }; struct mlx5_ifc_debug_cap_bits { u8 reserved_0[0x18]; u8 log_max_samples[0x8]; u8 single[0x1]; u8 repetitive[0x1]; u8 health_mon_rx_activity[0x1]; u8 reserved_1[0x15]; u8 log_min_sample_period[0x8]; u8 reserved_2[0x1c0]; struct mlx5_ifc_diagnostic_counter_cap_bits diagnostic_counter[0x1f0]; }; struct mlx5_ifc_qos_cap_bits { u8 packet_pacing[0x1]; u8 esw_scheduling[0x1]; u8 esw_bw_share[0x1]; u8 esw_rate_limit[0x1]; u8 hll[0x1]; u8 packet_pacing_burst_bound[0x1]; u8 packet_pacing_typical_size[0x1]; u8 reserved_at_7[0x19]; u8 reserved_at_20[0x20]; u8 packet_pacing_max_rate[0x20]; u8 packet_pacing_min_rate[0x20]; u8 reserved_at_80[0x10]; u8 packet_pacing_rate_table_size[0x10]; u8 esw_element_type[0x10]; u8 esw_tsar_type[0x10]; u8 reserved_at_c0[0x10]; u8 max_qos_para_vport[0x10]; u8 max_tsar_bw_share[0x20]; u8 reserved_at_100[0x700]; }; struct mlx5_ifc_snapshot_cap_bits { u8 reserved_0[0x1d]; u8 suspend_qp_uc[0x1]; u8 suspend_qp_ud[0x1]; u8 suspend_qp_rc[0x1]; u8 reserved_1[0x1c]; u8 restore_pd[0x1]; u8 restore_uar[0x1]; u8 restore_mkey[0x1]; u8 restore_qp[0x1]; u8 reserved_2[0x1e]; u8 named_mkey[0x1]; u8 named_qp[0x1]; u8 reserved_3[0x7a0]; }; struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_strip[0x1]; u8 vport_cvlan_strip[0x1]; u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; u8 reserved_0[0x19]; u8 nic_vport_node_guid_modify[0x1]; u8 nic_vport_port_guid_modify[0x1]; u8 reserved_1[0x7e0]; }; struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 reserved_0[0x200]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_ingress; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_egress; u8 reserved_1[0x7800]; }; struct mlx5_ifc_flow_table_nic_cap_bits { u8 nic_rx_multi_path_tirs[0x1]; u8 nic_rx_multi_path_tirs_fts[0x1]; u8 allow_sniffer_and_nic_rx_shared_tir[0x1]; u8 reserved_at_3[0x1fd]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive_rdma; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive_sniffer; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_rdma; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer; u8 reserved_1[0x7200]; }; struct mlx5_ifc_pddr_module_info_bits { u8 cable_technology[0x8]; u8 cable_breakout[0x8]; u8 ext_ethernet_compliance_code[0x8]; u8 ethernet_compliance_code[0x8]; u8 cable_type[0x4]; u8 cable_vendor[0x4]; u8 cable_length[0x8]; u8 cable_identifier[0x8]; u8 cable_power_class[0x8]; u8 reserved_at_40[0x8]; u8 cable_rx_amp[0x8]; u8 cable_rx_emphasis[0x8]; u8 cable_tx_equalization[0x8]; u8 reserved_at_60[0x8]; u8 cable_attenuation_12g[0x8]; u8 cable_attenuation_7g[0x8]; u8 cable_attenuation_5g[0x8]; u8 reserved_at_80[0x8]; u8 rx_cdr_cap[0x4]; u8 tx_cdr_cap[0x4]; u8 reserved_at_90[0x4]; u8 rx_cdr_state[0x4]; u8 reserved_at_98[0x4]; u8 tx_cdr_state[0x4]; u8 vendor_name[16][0x8]; u8 vendor_pn[16][0x8]; u8 vendor_rev[0x20]; u8 fw_version[0x20]; u8 vendor_sn[16][0x8]; u8 temperature[0x10]; u8 voltage[0x10]; u8 rx_power_lane0[0x10]; u8 rx_power_lane1[0x10]; u8 rx_power_lane2[0x10]; u8 rx_power_lane3[0x10]; u8 reserved_at_2c0[0x40]; u8 tx_power_lane0[0x10]; u8 tx_power_lane1[0x10]; u8 tx_power_lane2[0x10]; u8 tx_power_lane3[0x10]; u8 reserved_at_340[0x40]; u8 tx_bias_lane0[0x10]; u8 tx_bias_lane1[0x10]; u8 tx_bias_lane2[0x10]; u8 tx_bias_lane3[0x10]; u8 reserved_at_3c0[0x40]; u8 temperature_high_th[0x10]; u8 temperature_low_th[0x10]; u8 voltage_high_th[0x10]; u8 voltage_low_th[0x10]; u8 rx_power_high_th[0x10]; u8 rx_power_low_th[0x10]; u8 tx_power_high_th[0x10]; u8 tx_power_low_th[0x10]; u8 tx_bias_high_th[0x10]; u8 tx_bias_low_th[0x10]; u8 reserved_at_4a0[0x10]; u8 wavelength[0x10]; u8 reserved_at_4c0[0x300]; }; struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 csum_cap[0x1]; u8 vlan_cap[0x1]; u8 lro_cap[0x1]; u8 lro_psh_flag[0x1]; u8 lro_time_stamp[0x1]; u8 lro_max_msg_sz_mode[0x2]; u8 wqe_vlan_insert[0x1]; u8 self_lb_en_modifiable[0x1]; u8 self_lb_mc[0x1]; u8 self_lb_uc[0x1]; u8 max_lso_cap[0x5]; u8 multi_pkt_send_wqe[0x2]; u8 wqe_inline_mode[0x2]; u8 rss_ind_tbl_cap[0x4]; u8 scatter_fcs[0x1]; u8 reserved_1[0x2]; u8 tunnel_lso_const_out_ip_id[0x1]; u8 tunnel_lro_gre[0x1]; u8 tunnel_lro_vxlan[0x1]; u8 tunnel_statless_gre[0x1]; u8 tunnel_stateless_vxlan[0x1]; u8 swp[0x1]; u8 swp_csum[0x1]; u8 swp_lso[0x1]; u8 reserved_2[0x1b]; u8 max_geneve_opt_len[0x1]; u8 tunnel_stateless_geneve_rx[0x1]; u8 reserved_3[0x10]; u8 lro_min_mss_size[0x10]; u8 reserved_4[0x120]; u8 lro_timer_supported_periods[4][0x20]; u8 reserved_5[0x600]; }; enum { MLX5_ROCE_CAP_L3_TYPE_GRH = 0x1, MLX5_ROCE_CAP_L3_TYPE_IPV4 = 0x2, MLX5_ROCE_CAP_L3_TYPE_IPV6 = 0x4, }; struct mlx5_ifc_roce_cap_bits { u8 roce_apm[0x1]; u8 rts2rts_primary_eth_prio[0x1]; u8 roce_rx_allow_untagged[0x1]; u8 rts2rts_src_addr_index_for_vlan_valid_vlan_id[0x1]; u8 reserved_0[0x1c]; u8 reserved_1[0x60]; u8 reserved_2[0xc]; u8 l3_type[0x4]; u8 reserved_3[0x8]; u8 roce_version[0x8]; u8 reserved_4[0x10]; u8 r_roce_dest_udp_port[0x10]; u8 r_roce_max_src_udp_port[0x10]; u8 r_roce_min_src_udp_port[0x10]; u8 reserved_5[0x10]; u8 roce_address_table_size[0x10]; u8 reserved_6[0x700]; }; enum { MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_1_BYTE = 0x1, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_2_BYTES = 0x2, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_4_BYTES = 0x4, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_8_BYTES = 0x8, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_16_BYTES = 0x10, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_32_BYTES = 0x20, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_64_BYTES = 0x40, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_128_BYTES = 0x80, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_256_BYTES = 0x100, }; enum { MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_1_BYTE = 0x1, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_2_BYTES = 0x2, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_4_BYTES = 0x4, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_8_BYTES = 0x8, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_16_BYTES = 0x10, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_32_BYTES = 0x20, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_64_BYTES = 0x40, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_128_BYTES = 0x80, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_256_BYTES = 0x100, }; struct mlx5_ifc_atomic_caps_bits { u8 reserved_0[0x40]; u8 atomic_req_8B_endianess_mode[0x2]; u8 reserved_1[0x4]; u8 supported_atomic_req_8B_endianess_mode_1[0x1]; u8 reserved_2[0x19]; u8 reserved_3[0x20]; u8 reserved_4[0x10]; u8 atomic_operations[0x10]; u8 reserved_5[0x10]; u8 atomic_size_qp[0x10]; u8 reserved_6[0x10]; u8 atomic_size_dc[0x10]; u8 reserved_7[0x720]; }; struct mlx5_ifc_odp_cap_bits { u8 reserved_0[0x40]; u8 sig[0x1]; u8 reserved_1[0x1f]; u8 reserved_2[0x20]; struct mlx5_ifc_odp_per_transport_service_cap_bits rc_odp_caps; struct mlx5_ifc_odp_per_transport_service_cap_bits uc_odp_caps; struct mlx5_ifc_odp_per_transport_service_cap_bits ud_odp_caps; struct mlx5_ifc_odp_per_transport_service_cap_bits xrc_odp_caps; struct mlx5_ifc_odp_per_transport_service_cap_bits dc_odp_caps; u8 reserved_3[0x6e0]; }; enum { MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_8_GID_ENTRIES = 0x0, MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_16_GID_ENTRIES = 0x1, MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_32_GID_ENTRIES = 0x2, MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_64_GID_ENTRIES = 0x3, MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_128_GID_ENTRIES = 0x4, }; enum { MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_128_ENTRIES = 0x0, MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_256_ENTRIES = 0x1, MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_512_ENTRIES = 0x2, MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_1K_ENTRIES = 0x3, MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_2K_ENTRIES = 0x4, MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_4K_ENTRIES = 0x5, }; enum { MLX5_CMD_HCA_CAP_PORT_TYPE_IB = 0x0, MLX5_CMD_HCA_CAP_PORT_TYPE_ETHERNET = 0x1, }; enum { MLX5_CMD_HCA_CAP_CMDIF_CHECKSUM_DISABLED = 0x0, MLX5_CMD_HCA_CAP_CMDIF_CHECKSUM_INITIAL_STATE = 0x1, MLX5_CMD_HCA_CAP_CMDIF_CHECKSUM_ENABLED = 0x3, }; struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_0[0x80]; u8 log_max_srq_sz[0x8]; u8 log_max_qp_sz[0x8]; u8 reserved_1[0xb]; u8 log_max_qp[0x5]; u8 reserved_2[0xb]; u8 log_max_srq[0x5]; u8 reserved_3[0x10]; u8 reserved_4[0x8]; u8 log_max_cq_sz[0x8]; u8 reserved_5[0xb]; u8 log_max_cq[0x5]; u8 log_max_eq_sz[0x8]; u8 relaxed_ordering_write[1]; u8 reserved_6[0x1]; u8 log_max_mkey[0x6]; u8 reserved_7[0xb]; u8 fast_teardown[0x1]; u8 log_max_eq[0x4]; u8 max_indirection[0x8]; u8 reserved_8[0x1]; u8 log_max_mrw_sz[0x7]; u8 force_teardown[0x1]; u8 reserved_9[0x1]; u8 log_max_bsf_list_size[0x6]; u8 reserved_10[0x2]; u8 log_max_klm_list_size[0x6]; u8 reserved_11[0xa]; u8 log_max_ra_req_dc[0x6]; u8 reserved_12[0xa]; u8 log_max_ra_res_dc[0x6]; u8 reserved_13[0xa]; u8 log_max_ra_req_qp[0x6]; u8 reserved_14[0xa]; u8 log_max_ra_res_qp[0x6]; u8 pad_cap[0x1]; u8 cc_query_allowed[0x1]; u8 cc_modify_allowed[0x1]; u8 start_pad[0x1]; u8 cache_line_128byte[0x1]; u8 reserved_at_165[0xa]; u8 qcam_reg[0x1]; u8 gid_table_size[0x10]; u8 out_of_seq_cnt[0x1]; u8 vport_counters[0x1]; u8 retransmission_q_counters[0x1]; u8 debug[0x1]; u8 modify_rq_counters_set_id[0x1]; u8 rq_delay_drop[0x1]; u8 max_qp_cnt[0xa]; u8 pkey_table_size[0x10]; u8 vport_group_manager[0x1]; u8 vhca_group_manager[0x1]; u8 ib_virt[0x1]; u8 eth_virt[0x1]; u8 reserved_17[0x1]; u8 ets[0x1]; u8 nic_flow_table[0x1]; u8 eswitch_flow_table[0x1]; u8 reserved_18[0x1]; u8 mcam_reg[0x1]; u8 pcam_reg[0x1]; u8 local_ca_ack_delay[0x5]; u8 port_module_event[0x1]; u8 reserved_19[0x5]; u8 port_type[0x2]; u8 num_ports[0x8]; u8 snapshot[0x1]; u8 reserved_20[0x2]; u8 log_max_msg[0x5]; u8 reserved_21[0x4]; u8 max_tc[0x4]; u8 temp_warn_event[0x1]; u8 dcbx[0x1]; u8 general_notification_event[0x1]; u8 reserved_at_1d3[0x2]; u8 fpga[0x1]; u8 rol_s[0x1]; u8 rol_g[0x1]; u8 reserved_23[0x1]; u8 wol_s[0x1]; u8 wol_g[0x1]; u8 wol_a[0x1]; u8 wol_b[0x1]; u8 wol_m[0x1]; u8 wol_u[0x1]; u8 wol_p[0x1]; u8 stat_rate_support[0x10]; u8 reserved_24[0xc]; u8 cqe_version[0x4]; u8 compact_address_vector[0x1]; u8 striding_rq[0x1]; u8 reserved_25[0x1]; u8 ipoib_enhanced_offloads[0x1]; u8 ipoib_ipoib_offloads[0x1]; u8 reserved_26[0x8]; u8 dc_connect_qp[0x1]; u8 dc_cnak_trace[0x1]; u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; u8 reserved_27[0x1]; u8 wq_signature[0x1]; u8 sctr_data_cqe[0x1]; u8 reserved_28[0x1]; u8 sho[0x1]; u8 tph[0x1]; u8 rf[0x1]; u8 dct[0x1]; u8 qos[0x1]; u8 eth_net_offloads[0x1]; u8 roce[0x1]; u8 atomic[0x1]; u8 reserved_30[0x1]; u8 cq_oi[0x1]; u8 cq_resize[0x1]; u8 cq_moderation[0x1]; u8 cq_period_mode_modify[0x1]; u8 cq_invalidate[0x1]; u8 reserved_at_225[0x1]; u8 cq_eq_remap[0x1]; u8 pg[0x1]; u8 block_lb_mc[0x1]; u8 exponential_backoff[0x1]; u8 scqe_break_moderation[0x1]; u8 cq_period_start_from_cqe[0x1]; u8 cd[0x1]; u8 atm[0x1]; u8 apm[0x1]; u8 imaicl[0x1]; u8 reserved_32[0x6]; u8 qkv[0x1]; u8 pkv[0x1]; u8 set_deth_sqpn[0x1]; u8 reserved_33[0x3]; u8 xrc[0x1]; u8 ud[0x1]; u8 uc[0x1]; u8 rc[0x1]; - u8 reserved_34[0xa]; + u8 uar_4k[0x1]; + u8 reserved_at_241[0x9]; u8 uar_sz[0x6]; u8 reserved_35[0x8]; u8 log_pg_sz[0x8]; u8 bf[0x1]; u8 driver_version[0x1]; u8 pad_tx_eth_packet[0x1]; u8 reserved_36[0x8]; u8 log_bf_reg_size[0x5]; u8 reserved_37[0x10]; u8 num_of_diagnostic_counters[0x10]; u8 max_wqe_sz_sq[0x10]; u8 reserved_38[0x10]; u8 max_wqe_sz_rq[0x10]; u8 reserved_39[0x10]; u8 max_wqe_sz_sq_dc[0x10]; u8 reserved_40[0x7]; u8 max_qp_mcg[0x19]; u8 reserved_41[0x18]; u8 log_max_mcg[0x8]; u8 reserved_42[0x3]; u8 log_max_transport_domain[0x5]; u8 reserved_43[0x3]; u8 log_max_pd[0x5]; u8 reserved_44[0xb]; u8 log_max_xrcd[0x5]; u8 nic_receive_steering_discard[0x1]; u8 reserved_45[0x7]; u8 log_max_flow_counter_bulk[0x8]; u8 max_flow_counter[0x10]; u8 reserved_46[0x3]; u8 log_max_rq[0x5]; u8 reserved_47[0x3]; u8 log_max_sq[0x5]; u8 reserved_48[0x3]; u8 log_max_tir[0x5]; u8 reserved_49[0x3]; u8 log_max_tis[0x5]; u8 basic_cyclic_rcv_wqe[0x1]; u8 reserved_50[0x2]; u8 log_max_rmp[0x5]; u8 reserved_51[0x3]; u8 log_max_rqt[0x5]; u8 reserved_52[0x3]; u8 log_max_rqt_size[0x5]; u8 reserved_53[0x3]; u8 log_max_tis_per_sq[0x5]; u8 reserved_54[0x3]; u8 log_max_stride_sz_rq[0x5]; u8 reserved_55[0x3]; u8 log_min_stride_sz_rq[0x5]; u8 reserved_56[0x3]; u8 log_max_stride_sz_sq[0x5]; u8 reserved_57[0x3]; u8 log_min_stride_sz_sq[0x5]; u8 reserved_58[0x1b]; u8 log_max_wq_sz[0x5]; u8 nic_vport_change_event[0x1]; u8 disable_local_lb[0x1]; u8 reserved_59[0x9]; u8 log_max_vlan_list[0x5]; u8 reserved_60[0x3]; u8 log_max_current_mc_list[0x5]; u8 reserved_61[0x3]; u8 log_max_current_uc_list[0x5]; u8 general_obj_types[0x40]; u8 reserved_at_440[0x8]; u8 create_qp_start_hint[0x18]; u8 reserved_at_460[0x3]; u8 log_max_uctx[0x5]; u8 reserved_at_468[0x3]; u8 log_max_umem[0x5]; u8 max_num_eqs[0x10]; u8 reserved_at_480[0x1]; u8 tls_tx[0x1]; u8 reserved_at_482[0x1]; u8 log_max_l2_table[0x5]; u8 reserved_64[0x8]; u8 log_uar_page_sz[0x10]; u8 reserved_65[0x20]; u8 device_frequency_mhz[0x20]; u8 device_frequency_khz[0x20]; - u8 reserved_66[0x80]; + u8 reserved_at_500[0x20]; + u8 num_of_uars_per_page[0x20]; + u8 reserved_at_540[0x40]; u8 log_max_atomic_size_qp[0x8]; u8 reserved_67[0x10]; u8 log_max_atomic_size_dc[0x8]; u8 reserved_at_5a0[0x13]; u8 log_max_dek[0x5]; u8 reserved_at_5b8[0x4]; u8 mini_cqe_resp_stride_index[0x1]; u8 cqe_128_always[0x1]; u8 cqe_compression_128b[0x1]; u8 cqe_compression[0x1]; u8 cqe_compression_timeout[0x10]; u8 cqe_compression_max_num[0x10]; u8 reserved_69[0x220]; }; enum mlx5_flow_destination_type { MLX5_FLOW_DESTINATION_TYPE_VPORT = 0x0, MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1, MLX5_FLOW_DESTINATION_TYPE_TIR = 0x2, }; union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits { struct mlx5_ifc_dest_format_struct_bits dest_format_struct; struct mlx5_ifc_flow_counter_list_bits flow_counter_list; u8 reserved_0[0x40]; }; struct mlx5_ifc_fte_match_param_bits { struct mlx5_ifc_fte_match_set_lyr_2_4_bits outer_headers; struct mlx5_ifc_fte_match_set_misc_bits misc_parameters; struct mlx5_ifc_fte_match_set_lyr_2_4_bits inner_headers; u8 reserved_0[0xa00]; }; enum { MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_SRC_IP = 0x0, MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_DST_IP = 0x1, MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_L4_SPORT = 0x2, MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_L4_DPORT = 0x3, MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_IPSEC_SPI = 0x4, }; struct mlx5_ifc_rx_hash_field_select_bits { u8 l3_prot_type[0x1]; u8 l4_prot_type[0x1]; u8 selected_fields[0x1e]; }; struct mlx5_ifc_tls_capabilities_bits { u8 tls_1_2_aes_gcm_128[0x1]; u8 tls_1_3_aes_gcm_128[0x1]; u8 tls_1_2_aes_gcm_256[0x1]; u8 tls_1_3_aes_gcm_256[0x1]; u8 reserved_at_4[0x1c]; u8 reserved_at_20[0x7e0]; }; enum { MLX5_WQ_TYPE_LINKED_LIST = 0x0, MLX5_WQ_TYPE_CYCLIC = 0x1, MLX5_WQ_TYPE_STRQ_LINKED_LIST = 0x2, MLX5_WQ_TYPE_STRQ_CYCLIC = 0x3, }; enum rq_type { RQ_TYPE_NONE, RQ_TYPE_STRIDE, }; enum { MLX5_WQ_END_PAD_MODE_NONE = 0x0, MLX5_WQ_END_PAD_MODE_ALIGN = 0x1, }; struct mlx5_ifc_wq_bits { u8 wq_type[0x4]; u8 wq_signature[0x1]; u8 end_padding_mode[0x2]; u8 cd_slave[0x1]; u8 reserved_0[0x18]; u8 hds_skip_first_sge[0x1]; u8 log2_hds_buf_size[0x3]; u8 reserved_1[0x7]; u8 page_offset[0x5]; u8 lwm[0x10]; u8 reserved_2[0x8]; u8 pd[0x18]; u8 reserved_3[0x8]; u8 uar_page[0x18]; u8 dbr_addr[0x40]; u8 hw_counter[0x20]; u8 sw_counter[0x20]; u8 reserved_4[0xc]; u8 log_wq_stride[0x4]; u8 reserved_5[0x3]; u8 log_wq_pg_sz[0x5]; u8 reserved_6[0x3]; u8 log_wq_sz[0x5]; u8 reserved_7[0x15]; u8 single_wqe_log_num_of_strides[0x3]; u8 two_byte_shift_en[0x1]; u8 reserved_8[0x4]; u8 single_stride_log_num_of_bytes[0x3]; u8 reserved_9[0x4c0]; struct mlx5_ifc_cmd_pas_bits pas[0]; }; struct mlx5_ifc_rq_num_bits { u8 reserved_0[0x8]; u8 rq_num[0x18]; }; struct mlx5_ifc_mac_address_layout_bits { u8 reserved_0[0x10]; u8 mac_addr_47_32[0x10]; u8 mac_addr_31_0[0x20]; }; struct mlx5_ifc_cong_control_r_roce_ecn_np_bits { u8 reserved_0[0xa0]; u8 min_time_between_cnps[0x20]; u8 reserved_1[0x12]; u8 cnp_dscp[0x6]; u8 reserved_2[0x4]; u8 cnp_prio_mode[0x1]; u8 cnp_802p_prio[0x3]; u8 reserved_3[0x720]; }; struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits { u8 reserved_0[0x60]; u8 reserved_1[0x4]; u8 clamp_tgt_rate[0x1]; u8 reserved_2[0x3]; u8 clamp_tgt_rate_after_time_inc[0x1]; u8 reserved_3[0x17]; u8 reserved_4[0x20]; u8 rpg_time_reset[0x20]; u8 rpg_byte_reset[0x20]; u8 rpg_threshold[0x20]; u8 rpg_max_rate[0x20]; u8 rpg_ai_rate[0x20]; u8 rpg_hai_rate[0x20]; u8 rpg_gd[0x20]; u8 rpg_min_dec_fac[0x20]; u8 rpg_min_rate[0x20]; u8 reserved_5[0xe0]; u8 rate_to_set_on_first_cnp[0x20]; u8 dce_tcp_g[0x20]; u8 dce_tcp_rtt[0x20]; u8 rate_reduce_monitor_period[0x20]; u8 reserved_6[0x20]; u8 initial_alpha_value[0x20]; u8 reserved_7[0x4a0]; }; struct mlx5_ifc_cong_control_802_1qau_rp_bits { u8 reserved_0[0x80]; u8 rppp_max_rps[0x20]; u8 rpg_time_reset[0x20]; u8 rpg_byte_reset[0x20]; u8 rpg_threshold[0x20]; u8 rpg_max_rate[0x20]; u8 rpg_ai_rate[0x20]; u8 rpg_hai_rate[0x20]; u8 rpg_gd[0x20]; u8 rpg_min_dec_fac[0x20]; u8 rpg_min_rate[0x20]; u8 reserved_1[0x640]; }; enum { MLX5_RESIZE_FIELD_SELECT_RESIZE_FIELD_SELECT_LOG_CQ_SIZE = 0x1, MLX5_RESIZE_FIELD_SELECT_RESIZE_FIELD_SELECT_PAGE_OFFSET = 0x2, MLX5_RESIZE_FIELD_SELECT_RESIZE_FIELD_SELECT_LOG_PAGE_SIZE = 0x4, }; struct mlx5_ifc_resize_field_select_bits { u8 resize_field_select[0x20]; }; enum { MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_CQ_PERIOD = 0x1, MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_CQ_MAX_COUNT = 0x2, MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_OI = 0x4, MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_C_EQN = 0x8, MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_CQ_PERIOD_MODE = 0x10, MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_STATUS = 0x20, }; struct mlx5_ifc_modify_field_select_bits { u8 modify_field_select[0x20]; }; struct mlx5_ifc_field_select_r_roce_np_bits { u8 field_select_r_roce_np[0x20]; }; enum { MLX5_FIELD_SELECT_R_ROCE_RP_FIELD_SELECT_R_ROCE_RP_CLAMP_TGT_RATE = 0x2, MLX5_FIELD_SELECT_R_ROCE_RP_FIELD_SELECT_R_ROCE_RP_CLAMP_TGT_RATE_AFTER_TIME_INC = 0x4, MLX5_FIELD_SELECT_R_ROCE_RP_FIELD_SELECT_R_ROCE_RP_RPG_TIME_RESET = 0x8, MLX5_FIELD_SELECT_R_ROCE_RP_FIELD_SELECT_R_ROCE_RP_RPG_BYTE_RESET = 0x10, MLX5_FIELD_SELECT_R_ROCE_RP_FIELD_SELECT_R_ROCE_RP_RPG_THRESHOLD = 0x20, MLX5_FIELD_SELECT_R_ROCE_RP_FIELD_SELECT_R_ROCE_RP_RPG_MAX_RATE = 0x40, MLX5_FIELD_SELECT_R_ROCE_RP_FIELD_SELECT_R_ROCE_RP_RPG_AI_RATE = 0x80, MLX5_FIELD_SELECT_R_ROCE_RP_FIELD_SELECT_R_ROCE_RP_RPG_HAI_RATE = 0x100, MLX5_FIELD_SELECT_R_ROCE_RP_FIELD_SELECT_R_ROCE_RP_RPG_MIN_DEC_FAC = 0x200, MLX5_FIELD_SELECT_R_ROCE_RP_FIELD_SELECT_R_ROCE_RP_RPG_MIN_RATE = 0x400, MLX5_FIELD_SELECT_R_ROCE_RP_FIELD_SELECT_R_ROCE_RP_RATE_TO_SET_ON_FIRST_CNP = 0x800, MLX5_FIELD_SELECT_R_ROCE_RP_FIELD_SELECT_R_ROCE_RP_DCE_TCP_G = 0x1000, MLX5_FIELD_SELECT_R_ROCE_RP_FIELD_SELECT_R_ROCE_RP_DCE_TCP_RTT = 0x2000, MLX5_FIELD_SELECT_R_ROCE_RP_FIELD_SELECT_R_ROCE_RP_RATE_REDUCE_MONITOR_PERIOD = 0x4000, MLX5_FIELD_SELECT_R_ROCE_RP_FIELD_SELECT_R_ROCE_RP_INITIAL_ALPHA_VALUE = 0x8000, }; struct mlx5_ifc_field_select_r_roce_rp_bits { u8 field_select_r_roce_rp[0x20]; }; enum { MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPPP_MAX_RPS = 0x4, MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_TIME_RESET = 0x8, MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_BYTE_RESET = 0x10, MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_THRESHOLD = 0x20, MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_MAX_RATE = 0x40, MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_AI_RATE = 0x80, MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_HAI_RATE = 0x100, MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_GD = 0x200, MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_MIN_DEC_FAC = 0x400, MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_MIN_RATE = 0x800, }; struct mlx5_ifc_field_select_802_1qau_rp_bits { u8 field_select_8021qaurp[0x20]; }; struct mlx5_ifc_pptb_reg_bits { u8 reserved_at_0[0x2]; u8 mm[0x2]; u8 reserved_at_4[0x4]; u8 local_port[0x8]; u8 reserved_at_10[0x6]; u8 cm[0x1]; u8 um[0x1]; u8 pm[0x8]; u8 prio_x_buff[0x20]; u8 pm_msb[0x8]; u8 reserved_at_48[0x10]; u8 ctrl_buff[0x4]; u8 untagged_buff[0x4]; }; struct mlx5_ifc_dcbx_app_reg_bits { u8 reserved_0[0x8]; u8 port_number[0x8]; u8 reserved_1[0x10]; u8 reserved_2[0x1a]; u8 num_app_prio[0x6]; u8 reserved_3[0x40]; struct mlx5_ifc_application_prio_entry_bits app_prio[0]; }; struct mlx5_ifc_dcbx_param_reg_bits { u8 dcbx_cee_cap[0x1]; u8 dcbx_ieee_cap[0x1]; u8 dcbx_standby_cap[0x1]; u8 reserved_0[0x5]; u8 port_number[0x8]; u8 reserved_1[0xa]; u8 max_application_table_size[0x6]; u8 reserved_2[0x15]; u8 version_oper[0x3]; u8 reserved_3[0x5]; u8 version_admin[0x3]; u8 willing_admin[0x1]; u8 reserved_4[0x3]; u8 pfc_cap_oper[0x4]; u8 reserved_5[0x4]; u8 pfc_cap_admin[0x4]; u8 reserved_6[0x4]; u8 num_of_tc_oper[0x4]; u8 reserved_7[0x4]; u8 num_of_tc_admin[0x4]; u8 remote_willing[0x1]; u8 reserved_8[0x3]; u8 remote_pfc_cap[0x4]; u8 reserved_9[0x14]; u8 remote_num_of_tc[0x4]; u8 reserved_10[0x18]; u8 error[0x8]; u8 reserved_11[0x160]; }; struct mlx5_ifc_qhll_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x1b]; u8 hll_time[0x5]; u8 stall_en[0x1]; u8 reserved_at_41[0x1c]; u8 stall_cnt[0x3]; }; struct mlx5_ifc_qetcr_reg_bits { u8 operation_type[0x2]; u8 cap_local_admin[0x1]; u8 cap_remote_admin[0x1]; u8 reserved_0[0x4]; u8 port_number[0x8]; u8 reserved_1[0x10]; u8 reserved_2[0x20]; u8 tc[8][0x40]; u8 global_configuration[0x40]; }; struct mlx5_ifc_nodnic_ring_config_reg_bits { u8 queue_address_63_32[0x20]; u8 queue_address_31_12[0x14]; u8 reserved_0[0x6]; u8 log_size[0x6]; struct mlx5_ifc_nodnic_ring_doorbell_bits doorbell; u8 reserved_1[0x8]; u8 queue_number[0x18]; u8 q_key[0x20]; u8 reserved_2[0x10]; u8 pkey_index[0x10]; u8 reserved_3[0x40]; }; struct mlx5_ifc_nodnic_cq_arming_word_bits { u8 reserved_0[0x8]; u8 cq_ci[0x10]; u8 reserved_1[0x8]; }; enum { MLX5_NODNIC_EVENT_WORD_LINK_TYPE_INFINIBAND = 0x0, MLX5_NODNIC_EVENT_WORD_LINK_TYPE_ETHERNET = 0x1, }; enum { MLX5_NODNIC_EVENT_WORD_PORT_STATE_DOWN = 0x0, MLX5_NODNIC_EVENT_WORD_PORT_STATE_INITIALIZE = 0x1, MLX5_NODNIC_EVENT_WORD_PORT_STATE_ARMED = 0x2, MLX5_NODNIC_EVENT_WORD_PORT_STATE_ACTIVE = 0x3, }; struct mlx5_ifc_nodnic_event_word_bits { u8 driver_reset_needed[0x1]; u8 port_management_change_event[0x1]; u8 reserved_0[0x19]; u8 link_type[0x1]; u8 port_state[0x4]; }; struct mlx5_ifc_nic_vport_change_event_bits { u8 reserved_0[0x10]; u8 vport_num[0x10]; u8 reserved_1[0xc0]; }; struct mlx5_ifc_pages_req_event_bits { u8 reserved_0[0x10]; u8 function_id[0x10]; u8 num_pages[0x20]; u8 reserved_1[0xa0]; }; struct mlx5_ifc_cmd_inter_comp_event_bits { u8 command_completion_vector[0x20]; u8 reserved_0[0xc0]; }; struct mlx5_ifc_stall_vl_event_bits { u8 reserved_0[0x18]; u8 port_num[0x1]; u8 reserved_1[0x3]; u8 vl[0x4]; u8 reserved_2[0xa0]; }; struct mlx5_ifc_db_bf_congestion_event_bits { u8 event_subtype[0x8]; u8 reserved_0[0x8]; u8 congestion_level[0x8]; u8 reserved_1[0x8]; u8 reserved_2[0xa0]; }; struct mlx5_ifc_gpio_event_bits { u8 reserved_0[0x60]; u8 gpio_event_hi[0x20]; u8 gpio_event_lo[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_port_state_change_event_bits { u8 reserved_0[0x40]; u8 port_num[0x4]; u8 reserved_1[0x1c]; u8 reserved_2[0x80]; }; struct mlx5_ifc_dropped_packet_logged_bits { u8 reserved_0[0xe0]; }; enum { MLX5_CQ_ERROR_SYNDROME_CQ_OVERRUN = 0x1, MLX5_CQ_ERROR_SYNDROME_CQ_ACCESS_VIOLATION_ERROR = 0x2, }; struct mlx5_ifc_cq_error_bits { u8 reserved_0[0x8]; u8 cqn[0x18]; u8 reserved_1[0x20]; u8 reserved_2[0x18]; u8 syndrome[0x8]; u8 reserved_3[0x80]; }; struct mlx5_ifc_rdma_page_fault_event_bits { u8 bytes_commited[0x20]; u8 r_key[0x20]; u8 reserved_0[0x10]; u8 packet_len[0x10]; u8 rdma_op_len[0x20]; u8 rdma_va[0x40]; u8 reserved_1[0x5]; u8 rdma[0x1]; u8 write[0x1]; u8 requestor[0x1]; u8 qp_number[0x18]; }; struct mlx5_ifc_wqe_associated_page_fault_event_bits { u8 bytes_committed[0x20]; u8 reserved_0[0x10]; u8 wqe_index[0x10]; u8 reserved_1[0x10]; u8 len[0x10]; u8 reserved_2[0x60]; u8 reserved_3[0x5]; u8 rdma[0x1]; u8 write_read[0x1]; u8 requestor[0x1]; u8 qpn[0x18]; }; enum { MLX5_QP_EVENTS_TYPE_QP = 0x0, MLX5_QP_EVENTS_TYPE_RQ = 0x1, MLX5_QP_EVENTS_TYPE_SQ = 0x2, }; struct mlx5_ifc_qp_events_bits { u8 reserved_0[0xa0]; u8 type[0x8]; u8 reserved_1[0x18]; u8 reserved_2[0x8]; u8 qpn_rqn_sqn[0x18]; }; struct mlx5_ifc_dct_events_bits { u8 reserved_0[0xc0]; u8 reserved_1[0x8]; u8 dct_number[0x18]; }; struct mlx5_ifc_comp_event_bits { u8 reserved_0[0xc0]; u8 reserved_1[0x8]; u8 cq_number[0x18]; }; struct mlx5_ifc_fw_version_bits { u8 major[0x10]; u8 reserved_0[0x10]; u8 minor[0x10]; u8 subminor[0x10]; u8 second[0x8]; u8 minute[0x8]; u8 hour[0x8]; u8 reserved_1[0x8]; u8 year[0x10]; u8 month[0x8]; u8 day[0x8]; }; enum { MLX5_QPC_STATE_RST = 0x0, MLX5_QPC_STATE_INIT = 0x1, MLX5_QPC_STATE_RTR = 0x2, MLX5_QPC_STATE_RTS = 0x3, MLX5_QPC_STATE_SQER = 0x4, MLX5_QPC_STATE_SQD = 0x5, MLX5_QPC_STATE_ERR = 0x6, MLX5_QPC_STATE_SUSPENDED = 0x9, }; enum { MLX5_QPC_ST_RC = 0x0, MLX5_QPC_ST_UC = 0x1, MLX5_QPC_ST_UD = 0x2, MLX5_QPC_ST_XRC = 0x3, MLX5_QPC_ST_DCI = 0x5, MLX5_QPC_ST_QP0 = 0x7, MLX5_QPC_ST_QP1 = 0x8, MLX5_QPC_ST_RAW_DATAGRAM = 0x9, MLX5_QPC_ST_REG_UMR = 0xc, }; enum { MLX5_QP_PM_ARMED = 0x0, MLX5_QP_PM_REARM = 0x1, MLX5_QPC_PM_STATE_RESERVED = 0x2, MLX5_QP_PM_MIGRATED = 0x3, }; enum { MLX5_QPC_END_PADDING_MODE_SCATTER_AS_IS = 0x0, MLX5_QPC_END_PADDING_MODE_PAD_TO_CACHE_LINE_ALIGNMENT = 0x1, }; enum { MLX5_QPC_MTU_256_BYTES = 0x1, MLX5_QPC_MTU_512_BYTES = 0x2, MLX5_QPC_MTU_1K_BYTES = 0x3, MLX5_QPC_MTU_2K_BYTES = 0x4, MLX5_QPC_MTU_4K_BYTES = 0x5, MLX5_QPC_MTU_RAW_ETHERNET_QP = 0x7, }; enum { MLX5_QPC_ATOMIC_MODE_IB_SPEC = 0x1, MLX5_QPC_ATOMIC_MODE_ONLY_8B = 0x2, MLX5_QPC_ATOMIC_MODE_UP_TO_8B = 0x3, MLX5_QPC_ATOMIC_MODE_UP_TO_16B = 0x4, MLX5_QPC_ATOMIC_MODE_UP_TO_32B = 0x5, MLX5_QPC_ATOMIC_MODE_UP_TO_64B = 0x6, MLX5_QPC_ATOMIC_MODE_UP_TO_128B = 0x7, MLX5_QPC_ATOMIC_MODE_UP_TO_256B = 0x8, }; enum { MLX5_QPC_CS_REQ_DISABLE = 0x0, MLX5_QPC_CS_REQ_UP_TO_32B = 0x11, MLX5_QPC_CS_REQ_UP_TO_64B = 0x22, }; enum { MLX5_QPC_CS_RES_DISABLE = 0x0, MLX5_QPC_CS_RES_UP_TO_32B = 0x1, MLX5_QPC_CS_RES_UP_TO_64B = 0x2, }; struct mlx5_ifc_qpc_bits { u8 state[0x4]; u8 lag_tx_port_affinity[0x4]; u8 st[0x8]; u8 reserved_1[0x3]; u8 pm_state[0x2]; u8 reserved_2[0x7]; u8 end_padding_mode[0x2]; u8 reserved_3[0x2]; u8 wq_signature[0x1]; u8 block_lb_mc[0x1]; u8 atomic_like_write_en[0x1]; u8 latency_sensitive[0x1]; u8 reserved_4[0x1]; u8 drain_sigerr[0x1]; u8 reserved_5[0x2]; u8 pd[0x18]; u8 mtu[0x3]; u8 log_msg_max[0x5]; u8 reserved_6[0x1]; u8 log_rq_size[0x4]; u8 log_rq_stride[0x3]; u8 no_sq[0x1]; u8 log_sq_size[0x4]; u8 reserved_7[0x6]; u8 rlky[0x1]; u8 ulp_stateless_offload_mode[0x4]; u8 counter_set_id[0x8]; u8 uar_page[0x18]; u8 reserved_8[0x8]; u8 user_index[0x18]; u8 reserved_9[0x3]; u8 log_page_size[0x5]; u8 remote_qpn[0x18]; struct mlx5_ifc_ads_bits primary_address_path; struct mlx5_ifc_ads_bits secondary_address_path; u8 log_ack_req_freq[0x4]; u8 reserved_10[0x4]; u8 log_sra_max[0x3]; u8 reserved_11[0x2]; u8 retry_count[0x3]; u8 rnr_retry[0x3]; u8 reserved_12[0x1]; u8 fre[0x1]; u8 cur_rnr_retry[0x3]; u8 cur_retry_count[0x3]; u8 reserved_13[0x5]; u8 reserved_14[0x20]; u8 reserved_15[0x8]; u8 next_send_psn[0x18]; u8 reserved_16[0x8]; u8 cqn_snd[0x18]; u8 reserved_at_400[0x8]; u8 deth_sqpn[0x18]; u8 reserved_17[0x20]; u8 reserved_18[0x8]; u8 last_acked_psn[0x18]; u8 reserved_19[0x8]; u8 ssn[0x18]; u8 reserved_20[0x8]; u8 log_rra_max[0x3]; u8 reserved_21[0x1]; u8 atomic_mode[0x4]; u8 rre[0x1]; u8 rwe[0x1]; u8 rae[0x1]; u8 reserved_22[0x1]; u8 page_offset[0x6]; u8 reserved_23[0x3]; u8 cd_slave_receive[0x1]; u8 cd_slave_send[0x1]; u8 cd_master[0x1]; u8 reserved_24[0x3]; u8 min_rnr_nak[0x5]; u8 next_rcv_psn[0x18]; u8 reserved_25[0x8]; u8 xrcd[0x18]; u8 reserved_26[0x8]; u8 cqn_rcv[0x18]; u8 dbr_addr[0x40]; u8 q_key[0x20]; u8 reserved_27[0x5]; u8 rq_type[0x3]; u8 srqn_rmpn[0x18]; u8 reserved_28[0x8]; u8 rmsn[0x18]; u8 hw_sq_wqebb_counter[0x10]; u8 sw_sq_wqebb_counter[0x10]; u8 hw_rq_counter[0x20]; u8 sw_rq_counter[0x20]; u8 reserved_29[0x20]; u8 reserved_30[0xf]; u8 cgs[0x1]; u8 cs_req[0x8]; u8 cs_res[0x8]; u8 dc_access_key[0x40]; u8 rdma_active[0x1]; u8 comm_est[0x1]; u8 suspended[0x1]; u8 reserved_31[0x5]; u8 send_msg_psn[0x18]; u8 reserved_32[0x8]; u8 rcv_msg_psn[0x18]; u8 rdma_va[0x40]; u8 rdma_key[0x20]; u8 reserved_33[0x20]; }; struct mlx5_ifc_roce_addr_layout_bits { u8 source_l3_address[16][0x8]; u8 reserved_0[0x3]; u8 vlan_valid[0x1]; u8 vlan_id[0xc]; u8 source_mac_47_32[0x10]; u8 source_mac_31_0[0x20]; u8 reserved_1[0x14]; u8 roce_l3_type[0x4]; u8 roce_version[0x8]; u8 reserved_2[0x20]; }; struct mlx5_ifc_rdbc_bits { u8 reserved_0[0x1c]; u8 type[0x4]; u8 reserved_1[0x20]; u8 reserved_2[0x8]; u8 psn[0x18]; u8 rkey[0x20]; u8 address[0x40]; u8 byte_count[0x20]; u8 reserved_3[0x20]; u8 atomic_resp[32][0x8]; }; enum { MLX5_FLOW_CONTEXT_ACTION_ALLOW = 0x1, MLX5_FLOW_CONTEXT_ACTION_DROP = 0x2, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST = 0x4, MLX5_FLOW_CONTEXT_ACTION_COUNT = 0x8, }; struct mlx5_ifc_flow_context_bits { u8 reserved_0[0x20]; u8 group_id[0x20]; u8 reserved_1[0x8]; u8 flow_tag[0x18]; u8 reserved_2[0x10]; u8 action[0x10]; u8 reserved_3[0x8]; u8 destination_list_size[0x18]; u8 reserved_4[0x8]; u8 flow_counter_list_size[0x18]; u8 reserved_5[0x140]; struct mlx5_ifc_fte_match_param_bits match_value; u8 reserved_6[0x600]; union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits destination[0]; }; enum { MLX5_XRC_SRQC_STATE_GOOD = 0x0, MLX5_XRC_SRQC_STATE_ERROR = 0x1, }; struct mlx5_ifc_xrc_srqc_bits { u8 state[0x4]; u8 log_xrc_srq_size[0x4]; u8 reserved_0[0x18]; u8 wq_signature[0x1]; u8 cont_srq[0x1]; u8 reserved_1[0x1]; u8 rlky[0x1]; u8 basic_cyclic_rcv_wqe[0x1]; u8 log_rq_stride[0x3]; u8 xrcd[0x18]; u8 page_offset[0x6]; u8 reserved_2[0x2]; u8 cqn[0x18]; u8 reserved_3[0x20]; u8 reserved_4[0x2]; u8 log_page_size[0x6]; u8 user_index[0x18]; u8 reserved_5[0x20]; u8 reserved_6[0x8]; u8 pd[0x18]; u8 lwm[0x10]; u8 wqe_cnt[0x10]; u8 reserved_7[0x40]; u8 db_record_addr_h[0x20]; u8 db_record_addr_l[0x1e]; u8 reserved_8[0x2]; u8 reserved_9[0x80]; }; struct mlx5_ifc_vnic_diagnostic_statistics_bits { u8 counter_error_queues[0x20]; u8 total_error_queues[0x20]; u8 send_queue_priority_update_flow[0x20]; u8 reserved_at_60[0x20]; u8 nic_receive_steering_discard[0x40]; u8 receive_discard_vport_down[0x40]; u8 transmit_discard_vport_down[0x40]; u8 reserved_at_140[0xec0]; }; struct mlx5_ifc_traffic_counter_bits { u8 packets[0x40]; u8 octets[0x40]; }; struct mlx5_ifc_tisc_bits { u8 strict_lag_tx_port_affinity[0x1]; u8 tls_en[0x1]; u8 reserved_at_2[0x2]; u8 lag_tx_port_affinity[0x04]; u8 reserved_at_8[0x4]; u8 prio[0x4]; u8 reserved_1[0x10]; u8 reserved_2[0x100]; u8 reserved_3[0x8]; u8 transport_domain[0x18]; u8 reserved_4[0x8]; u8 underlay_qpn[0x18]; u8 reserved_5[0x8]; u8 pd[0x18]; u8 reserved_6[0x380]; }; enum { MLX5_TIRC_DISP_TYPE_DIRECT = 0x0, MLX5_TIRC_DISP_TYPE_INDIRECT = 0x1, }; enum { MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO = 0x1, MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO = 0x2, }; enum { MLX5_TIRC_RX_HASH_FN_HASH_NONE = 0x0, MLX5_TIRC_RX_HASH_FN_HASH_INVERTED_XOR8 = 0x1, MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ = 0x2, }; enum { MLX5_TIRC_SELF_LB_EN_ENABLE_UNICAST = 0x1, MLX5_TIRC_SELF_LB_EN_ENABLE_MULTICAST = 0x2, }; struct mlx5_ifc_tirc_bits { u8 reserved_0[0x20]; u8 disp_type[0x4]; u8 tls_en[0x1]; u8 reserved_at_25[0x1b]; u8 reserved_2[0x40]; u8 reserved_3[0x4]; u8 lro_timeout_period_usecs[0x10]; u8 lro_enable_mask[0x4]; u8 lro_max_msg_sz[0x8]; u8 reserved_4[0x40]; u8 reserved_5[0x8]; u8 inline_rqn[0x18]; u8 rx_hash_symmetric[0x1]; u8 reserved_6[0x1]; u8 tunneled_offload_en[0x1]; u8 reserved_7[0x5]; u8 indirect_table[0x18]; u8 rx_hash_fn[0x4]; u8 reserved_8[0x2]; u8 self_lb_en[0x2]; u8 transport_domain[0x18]; u8 rx_hash_toeplitz_key[10][0x20]; struct mlx5_ifc_rx_hash_field_select_bits rx_hash_field_selector_outer; struct mlx5_ifc_rx_hash_field_select_bits rx_hash_field_selector_inner; u8 reserved_9[0x4c0]; }; enum { MLX5_SRQC_STATE_GOOD = 0x0, MLX5_SRQC_STATE_ERROR = 0x1, }; struct mlx5_ifc_srqc_bits { u8 state[0x4]; u8 log_srq_size[0x4]; u8 reserved_0[0x18]; u8 wq_signature[0x1]; u8 cont_srq[0x1]; u8 reserved_1[0x1]; u8 rlky[0x1]; u8 reserved_2[0x1]; u8 log_rq_stride[0x3]; u8 xrcd[0x18]; u8 page_offset[0x6]; u8 reserved_3[0x2]; u8 cqn[0x18]; u8 reserved_4[0x20]; u8 reserved_5[0x2]; u8 log_page_size[0x6]; u8 reserved_6[0x18]; u8 reserved_7[0x20]; u8 reserved_8[0x8]; u8 pd[0x18]; u8 lwm[0x10]; u8 wqe_cnt[0x10]; u8 reserved_9[0x40]; u8 dbr_addr[0x40]; u8 reserved_10[0x80]; }; enum { MLX5_SQC_STATE_RST = 0x0, MLX5_SQC_STATE_RDY = 0x1, MLX5_SQC_STATE_ERR = 0x3, }; struct mlx5_ifc_sqc_bits { u8 rlkey[0x1]; u8 cd_master[0x1]; u8 fre[0x1]; u8 flush_in_error_en[0x1]; u8 allow_multi_pkt_send_wqe[0x1]; u8 min_wqe_inline_mode[0x3]; u8 state[0x4]; u8 reg_umr[0x1]; u8 allow_swp[0x1]; u8 reserved_0[0x12]; u8 reserved_1[0x8]; u8 user_index[0x18]; u8 reserved_2[0x8]; u8 cqn[0x18]; u8 reserved_3[0x80]; u8 qos_para_vport_number[0x10]; u8 packet_pacing_rate_limit_index[0x10]; u8 tis_lst_sz[0x10]; u8 reserved_4[0x10]; u8 reserved_5[0x40]; u8 reserved_6[0x8]; u8 tis_num_0[0x18]; struct mlx5_ifc_wq_bits wq; }; enum { MLX5_TSAR_TYPE_DWRR = 0, MLX5_TSAR_TYPE_ROUND_ROUBIN = 1, MLX5_TSAR_TYPE_ETS = 2 }; struct mlx5_ifc_tsar_element_attributes_bits { u8 reserved_0[0x8]; u8 tsar_type[0x8]; u8 reserved_1[0x10]; }; struct mlx5_ifc_vport_element_attributes_bits { u8 reserved_0[0x10]; u8 vport_number[0x10]; }; struct mlx5_ifc_vport_tc_element_attributes_bits { u8 traffic_class[0x10]; u8 vport_number[0x10]; }; struct mlx5_ifc_para_vport_tc_element_attributes_bits { u8 reserved_0[0x0C]; u8 traffic_class[0x04]; u8 qos_para_vport_number[0x10]; }; enum { MLX5_SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR = 0x0, MLX5_SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT = 0x1, MLX5_SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2, MLX5_SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3, }; struct mlx5_ifc_scheduling_context_bits { u8 element_type[0x8]; u8 reserved_at_8[0x18]; u8 element_attributes[0x20]; u8 parent_element_id[0x20]; u8 reserved_at_60[0x40]; u8 bw_share[0x20]; u8 max_average_bw[0x20]; u8 reserved_at_e0[0x120]; }; struct mlx5_ifc_rqtc_bits { u8 reserved_0[0xa0]; u8 reserved_1[0x10]; u8 rqt_max_size[0x10]; u8 reserved_2[0x10]; u8 rqt_actual_size[0x10]; u8 reserved_3[0x6a0]; struct mlx5_ifc_rq_num_bits rq_num[0]; }; enum { MLX5_RQC_RQ_TYPE_MEMORY_RQ_INLINE = 0x0, MLX5_RQC_RQ_TYPE_MEMORY_RQ_RMP = 0x1, }; enum { MLX5_RQC_STATE_RST = 0x0, MLX5_RQC_STATE_RDY = 0x1, MLX5_RQC_STATE_ERR = 0x3, }; enum { MLX5_RQC_DROPLESS_MODE_DISABLE = 0x0, MLX5_RQC_DROPLESS_MODE_ENABLE = 0x1, }; struct mlx5_ifc_rqc_bits { u8 rlkey[0x1]; u8 delay_drop_en[0x1]; u8 scatter_fcs[0x1]; u8 vlan_strip_disable[0x1]; u8 mem_rq_type[0x4]; u8 state[0x4]; u8 reserved_1[0x1]; u8 flush_in_error_en[0x1]; u8 reserved_2[0x12]; u8 reserved_3[0x8]; u8 user_index[0x18]; u8 reserved_4[0x8]; u8 cqn[0x18]; u8 counter_set_id[0x8]; u8 reserved_5[0x18]; u8 reserved_6[0x8]; u8 rmpn[0x18]; u8 reserved_7[0xe0]; struct mlx5_ifc_wq_bits wq; }; enum { MLX5_RMPC_STATE_RDY = 0x1, MLX5_RMPC_STATE_ERR = 0x3, }; struct mlx5_ifc_rmpc_bits { u8 reserved_0[0x8]; u8 state[0x4]; u8 reserved_1[0x14]; u8 basic_cyclic_rcv_wqe[0x1]; u8 reserved_2[0x1f]; u8 reserved_3[0x140]; struct mlx5_ifc_wq_bits wq; }; enum { MLX5_NIC_VPORT_CONTEXT_ALLOWED_LIST_TYPE_CURRENT_UC_MAC_ADDRESS = 0x0, MLX5_NIC_VPORT_CONTEXT_ALLOWED_LIST_TYPE_CURRENT_MC_MAC_ADDRESS = 0x1, MLX5_NIC_VPORT_CONTEXT_ALLOWED_LIST_TYPE_VLAN_LIST = 0x2, }; struct mlx5_ifc_nic_vport_context_bits { u8 reserved_0[0x5]; u8 min_wqe_inline_mode[0x3]; u8 reserved_1[0x15]; u8 disable_mc_local_lb[0x1]; u8 disable_uc_local_lb[0x1]; u8 roce_en[0x1]; u8 arm_change_event[0x1]; u8 reserved_2[0x1a]; u8 event_on_mtu[0x1]; u8 event_on_promisc_change[0x1]; u8 event_on_vlan_change[0x1]; u8 event_on_mc_address_change[0x1]; u8 event_on_uc_address_change[0x1]; u8 reserved_3[0xe0]; u8 reserved_4[0x10]; u8 mtu[0x10]; u8 system_image_guid[0x40]; u8 port_guid[0x40]; u8 node_guid[0x40]; u8 reserved_5[0x140]; u8 qkey_violation_counter[0x10]; u8 reserved_6[0x10]; u8 reserved_7[0x420]; u8 promisc_uc[0x1]; u8 promisc_mc[0x1]; u8 promisc_all[0x1]; u8 reserved_8[0x2]; u8 allowed_list_type[0x3]; u8 reserved_9[0xc]; u8 allowed_list_size[0xc]; struct mlx5_ifc_mac_address_layout_bits permanent_address; u8 reserved_10[0x20]; u8 current_uc_mac_address[0][0x40]; }; enum { MLX5_ACCESS_MODE_PA = 0x0, MLX5_ACCESS_MODE_MTT = 0x1, MLX5_ACCESS_MODE_KLM = 0x2, }; struct mlx5_ifc_mkc_bits { u8 reserved_at_0[0x1]; u8 free[0x1]; u8 reserved_at_2[0x1]; u8 access_mode_4_2[0x3]; u8 reserved_at_6[0x7]; u8 relaxed_ordering_write[0x1]; u8 reserved_at_e[0x1]; u8 small_fence_on_rdma_read_response[0x1]; u8 umr_en[0x1]; u8 a[0x1]; u8 rw[0x1]; u8 rr[0x1]; u8 lw[0x1]; u8 lr[0x1]; u8 access_mode[0x2]; u8 reserved_2[0x8]; u8 qpn[0x18]; u8 mkey_7_0[0x8]; u8 reserved_3[0x20]; u8 length64[0x1]; u8 bsf_en[0x1]; u8 sync_umr[0x1]; u8 reserved_4[0x2]; u8 expected_sigerr_count[0x1]; u8 reserved_5[0x1]; u8 en_rinval[0x1]; u8 pd[0x18]; u8 start_addr[0x40]; u8 len[0x40]; u8 bsf_octword_size[0x20]; u8 reserved_6[0x80]; u8 translations_octword_size[0x20]; u8 reserved_7[0x1b]; u8 log_page_size[0x5]; u8 reserved_8[0x20]; }; struct mlx5_ifc_pkey_bits { u8 reserved_0[0x10]; u8 pkey[0x10]; }; struct mlx5_ifc_array128_auto_bits { u8 array128_auto[16][0x8]; }; enum { MLX5_HCA_VPORT_CONTEXT_FIELD_SELECT_PORT_GUID = 0x0, MLX5_HCA_VPORT_CONTEXT_FIELD_SELECT_NODE_GUID = 0x1, MLX5_HCA_VPORT_CONTEXT_FIELD_SELECT_VPORT_STATE_POLICY = 0x2, }; enum { MLX5_HCA_VPORT_CONTEXT_PORT_PHYSICAL_STATE_SLEEP = 0x1, MLX5_HCA_VPORT_CONTEXT_PORT_PHYSICAL_STATE_POLLING = 0x2, MLX5_HCA_VPORT_CONTEXT_PORT_PHYSICAL_STATE_DISABLED = 0x3, MLX5_HCA_VPORT_CONTEXT_PORT_PHYSICAL_STATE_PORTCONFIGURATIONTRAINING = 0x4, MLX5_HCA_VPORT_CONTEXT_PORT_PHYSICAL_STATE_LINKUP = 0x5, MLX5_HCA_VPORT_CONTEXT_PORT_PHYSICAL_STATE_LINKERRORRECOVERY = 0x6, MLX5_HCA_VPORT_CONTEXT_PORT_PHYSICAL_STATE_PHYTEST = 0x7, }; enum { MLX5_HCA_VPORT_CONTEXT_VPORT_STATE_POLICY_DOWN = 0x0, MLX5_HCA_VPORT_CONTEXT_VPORT_STATE_POLICY_UP = 0x1, MLX5_HCA_VPORT_CONTEXT_VPORT_STATE_POLICY_FOLLOW = 0x2, }; enum { MLX5_HCA_VPORT_CONTEXT_PORT_STATE_DOWN = 0x1, MLX5_HCA_VPORT_CONTEXT_PORT_STATE_INIT = 0x2, MLX5_HCA_VPORT_CONTEXT_PORT_STATE_ARM = 0x3, MLX5_HCA_VPORT_CONTEXT_PORT_STATE_ACTIVE = 0x4, }; enum { MLX5_HCA_VPORT_CONTEXT_VPORT_STATE_DOWN = 0x1, MLX5_HCA_VPORT_CONTEXT_VPORT_STATE_INIT = 0x2, MLX5_HCA_VPORT_CONTEXT_VPORT_STATE_ARM = 0x3, MLX5_HCA_VPORT_CONTEXT_VPORT_STATE_ACTIVE = 0x4, }; struct mlx5_ifc_hca_vport_context_bits { u8 field_select[0x20]; u8 reserved_0[0xe0]; u8 sm_virt_aware[0x1]; u8 has_smi[0x1]; u8 has_raw[0x1]; u8 grh_required[0x1]; u8 reserved_1[0x1]; u8 min_wqe_inline_mode[0x3]; u8 reserved_2[0x8]; u8 port_physical_state[0x4]; u8 vport_state_policy[0x4]; u8 port_state[0x4]; u8 vport_state[0x4]; u8 reserved_3[0x20]; u8 system_image_guid[0x40]; u8 port_guid[0x40]; u8 node_guid[0x40]; u8 cap_mask1[0x20]; u8 cap_mask1_field_select[0x20]; u8 cap_mask2[0x20]; u8 cap_mask2_field_select[0x20]; u8 reserved_4[0x80]; u8 lid[0x10]; u8 reserved_5[0x4]; u8 init_type_reply[0x4]; u8 lmc[0x3]; u8 subnet_timeout[0x5]; u8 sm_lid[0x10]; u8 sm_sl[0x4]; u8 reserved_6[0xc]; u8 qkey_violation_counter[0x10]; u8 pkey_violation_counter[0x10]; u8 reserved_7[0xca0]; }; union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap; struct mlx5_ifc_odp_cap_bits odp_cap; struct mlx5_ifc_atomic_caps_bits atomic_caps; struct mlx5_ifc_roce_cap_bits roce_cap; struct mlx5_ifc_per_protocol_networking_offload_caps_bits per_protocol_networking_offload_caps; struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap; struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap; struct mlx5_ifc_e_switch_cap_bits e_switch_cap; struct mlx5_ifc_snapshot_cap_bits snapshot_cap; struct mlx5_ifc_debug_cap_bits diagnostic_counters_cap; struct mlx5_ifc_qos_cap_bits qos_cap; struct mlx5_ifc_tls_capabilities_bits tls_capabilities; u8 reserved_0[0x8000]; }; enum { MLX5_FLOW_TABLE_CONTEXT_TABLE_MISS_ACTION_DEFAULT = 0x0, MLX5_FLOW_TABLE_CONTEXT_TABLE_MISS_ACTION_IDENTIFIED = 0x1, }; struct mlx5_ifc_flow_table_context_bits { u8 encap_en[0x1]; u8 decap_en[0x1]; u8 reserved_at_2[0x2]; u8 table_miss_action[0x4]; u8 level[0x8]; u8 reserved_at_10[0x8]; u8 log_size[0x8]; u8 reserved_at_20[0x8]; u8 table_miss_id[0x18]; u8 reserved_at_40[0x8]; u8 lag_master_next_table_id[0x18]; u8 reserved_at_60[0xe0]; }; struct mlx5_ifc_esw_vport_context_bits { u8 reserved_0[0x3]; u8 vport_svlan_strip[0x1]; u8 vport_cvlan_strip[0x1]; u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert[0x2]; u8 reserved_1[0x18]; u8 reserved_2[0x20]; u8 svlan_cfi[0x1]; u8 svlan_pcp[0x3]; u8 svlan_id[0xc]; u8 cvlan_cfi[0x1]; u8 cvlan_pcp[0x3]; u8 cvlan_id[0xc]; u8 reserved_3[0x7a0]; }; enum { MLX5_EQC_STATUS_OK = 0x0, MLX5_EQC_STATUS_EQ_WRITE_FAILURE = 0xa, }; enum { MLX5_EQ_STATE_ARMED = 0x9, MLX5_EQ_STATE_FIRED = 0xa, }; struct mlx5_ifc_eqc_bits { u8 status[0x4]; u8 reserved_0[0x9]; u8 ec[0x1]; u8 oi[0x1]; u8 reserved_1[0x5]; u8 st[0x4]; u8 reserved_2[0x8]; u8 reserved_3[0x20]; u8 reserved_4[0x14]; u8 page_offset[0x6]; u8 reserved_5[0x6]; u8 reserved_6[0x3]; u8 log_eq_size[0x5]; u8 uar_page[0x18]; u8 reserved_7[0x20]; u8 reserved_8[0x18]; u8 intr[0x8]; u8 reserved_9[0x3]; u8 log_page_size[0x5]; u8 reserved_10[0x18]; u8 reserved_11[0x60]; u8 reserved_12[0x8]; u8 consumer_counter[0x18]; u8 reserved_13[0x8]; u8 producer_counter[0x18]; u8 reserved_14[0x80]; }; enum { MLX5_DCTC_STATE_ACTIVE = 0x0, MLX5_DCTC_STATE_DRAINING = 0x1, MLX5_DCTC_STATE_DRAINED = 0x2, }; enum { MLX5_DCTC_CS_RES_DISABLE = 0x0, MLX5_DCTC_CS_RES_NA = 0x1, MLX5_DCTC_CS_RES_UP_TO_64B = 0x2, }; enum { MLX5_DCTC_MTU_256_BYTES = 0x1, MLX5_DCTC_MTU_512_BYTES = 0x2, MLX5_DCTC_MTU_1K_BYTES = 0x3, MLX5_DCTC_MTU_2K_BYTES = 0x4, MLX5_DCTC_MTU_4K_BYTES = 0x5, }; struct mlx5_ifc_dctc_bits { u8 reserved_0[0x4]; u8 state[0x4]; u8 reserved_1[0x18]; u8 reserved_2[0x8]; u8 user_index[0x18]; u8 reserved_3[0x8]; u8 cqn[0x18]; u8 counter_set_id[0x8]; u8 atomic_mode[0x4]; u8 rre[0x1]; u8 rwe[0x1]; u8 rae[0x1]; u8 atomic_like_write_en[0x1]; u8 latency_sensitive[0x1]; u8 rlky[0x1]; u8 reserved_4[0xe]; u8 reserved_5[0x8]; u8 cs_res[0x8]; u8 reserved_6[0x3]; u8 min_rnr_nak[0x5]; u8 reserved_7[0x8]; u8 reserved_8[0x8]; u8 srqn[0x18]; u8 reserved_9[0x8]; u8 pd[0x18]; u8 tclass[0x8]; u8 reserved_10[0x4]; u8 flow_label[0x14]; u8 dc_access_key[0x40]; u8 reserved_11[0x5]; u8 mtu[0x3]; u8 port[0x8]; u8 pkey_index[0x10]; u8 reserved_12[0x8]; u8 my_addr_index[0x8]; u8 reserved_13[0x8]; u8 hop_limit[0x8]; u8 dc_access_key_violation_count[0x20]; u8 reserved_14[0x14]; u8 dei_cfi[0x1]; u8 eth_prio[0x3]; u8 ecn[0x2]; u8 dscp[0x6]; u8 reserved_15[0x40]; }; enum { MLX5_CQC_STATUS_OK = 0x0, MLX5_CQC_STATUS_CQ_OVERFLOW = 0x9, MLX5_CQC_STATUS_CQ_WRITE_FAIL = 0xa, }; enum { CQE_SIZE_64 = 0x0, CQE_SIZE_128 = 0x1, }; enum { MLX5_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, MLX5_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, }; enum { MLX5_CQ_STATE_SOLICITED_ARMED = 0x6, MLX5_CQ_STATE_ARMED = 0x9, MLX5_CQ_STATE_FIRED = 0xa, }; struct mlx5_ifc_cqc_bits { u8 status[0x4]; u8 reserved_0[0x4]; u8 cqe_sz[0x3]; u8 cc[0x1]; u8 reserved_1[0x1]; u8 scqe_break_moderation_en[0x1]; u8 oi[0x1]; u8 cq_period_mode[0x2]; u8 cqe_compression_en[0x1]; u8 mini_cqe_res_format[0x2]; u8 st[0x4]; u8 reserved_2[0x8]; u8 reserved_3[0x20]; u8 reserved_4[0x14]; u8 page_offset[0x6]; u8 reserved_5[0x6]; u8 reserved_6[0x3]; u8 log_cq_size[0x5]; u8 uar_page[0x18]; u8 reserved_7[0x4]; u8 cq_period[0xc]; u8 cq_max_count[0x10]; u8 reserved_8[0x18]; u8 c_eqn[0x8]; u8 reserved_9[0x3]; u8 log_page_size[0x5]; u8 reserved_10[0x18]; u8 reserved_11[0x20]; u8 reserved_12[0x8]; u8 last_notified_index[0x18]; u8 reserved_13[0x8]; u8 last_solicit_index[0x18]; u8 reserved_14[0x8]; u8 consumer_counter[0x18]; u8 reserved_15[0x8]; u8 producer_counter[0x18]; u8 reserved_16[0x40]; u8 dbr_addr[0x40]; }; union mlx5_ifc_cong_control_roce_ecn_auto_bits { struct mlx5_ifc_cong_control_802_1qau_rp_bits cong_control_802_1qau_rp; struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits cong_control_r_roce_ecn_rp; struct mlx5_ifc_cong_control_r_roce_ecn_np_bits cong_control_r_roce_ecn_np; u8 reserved_0[0x800]; }; struct mlx5_ifc_query_adapter_param_block_bits { u8 reserved_0[0xc0]; u8 reserved_1[0x8]; u8 ieee_vendor_id[0x18]; u8 reserved_2[0x10]; u8 vsd_vendor_id[0x10]; u8 vsd[208][0x8]; u8 vsd_contd_psid[16][0x8]; }; union mlx5_ifc_modify_field_select_resize_field_select_auto_bits { struct mlx5_ifc_modify_field_select_bits modify_field_select; struct mlx5_ifc_resize_field_select_bits resize_field_select; u8 reserved_0[0x20]; }; union mlx5_ifc_field_select_802_1_r_roce_auto_bits { struct mlx5_ifc_field_select_802_1qau_rp_bits field_select_802_1qau_rp; struct mlx5_ifc_field_select_r_roce_rp_bits field_select_r_roce_rp; struct mlx5_ifc_field_select_r_roce_np_bits field_select_r_roce_np; u8 reserved_0[0x20]; }; struct mlx5_ifc_bufferx_reg_bits { u8 reserved_0[0x6]; u8 lossy[0x1]; u8 epsb[0x1]; u8 reserved_1[0xc]; u8 size[0xc]; u8 xoff_threshold[0x10]; u8 xon_threshold[0x10]; }; struct mlx5_ifc_config_item_bits { u8 valid[0x2]; u8 reserved_0[0x2]; u8 header_type[0x2]; u8 reserved_1[0x2]; u8 default_location[0x1]; u8 reserved_2[0x7]; u8 version[0x4]; u8 reserved_3[0x3]; u8 length[0x9]; u8 type[0x20]; u8 reserved_4[0x10]; u8 crc16[0x10]; }; struct mlx5_ifc_nodnic_port_config_reg_bits { struct mlx5_ifc_nodnic_event_word_bits event; u8 network_en[0x1]; u8 dma_en[0x1]; u8 promisc_en[0x1]; u8 promisc_multicast_en[0x1]; u8 reserved_0[0x17]; u8 receive_filter_en[0x5]; u8 reserved_1[0x10]; u8 mac_47_32[0x10]; u8 mac_31_0[0x20]; u8 receive_filters_mgid_mac[64][0x8]; u8 gid[16][0x8]; u8 reserved_2[0x10]; u8 lid[0x10]; u8 reserved_3[0xc]; u8 sm_sl[0x4]; u8 sm_lid[0x10]; u8 completion_address_63_32[0x20]; u8 completion_address_31_12[0x14]; u8 reserved_4[0x6]; u8 log_cq_size[0x6]; u8 working_buffer_address_63_32[0x20]; u8 working_buffer_address_31_12[0x14]; u8 reserved_5[0xc]; struct mlx5_ifc_nodnic_cq_arming_word_bits arm_cq; u8 pkey_index[0x10]; u8 pkey[0x10]; struct mlx5_ifc_nodnic_ring_config_reg_bits send_ring0; struct mlx5_ifc_nodnic_ring_config_reg_bits send_ring1; struct mlx5_ifc_nodnic_ring_config_reg_bits receive_ring0; struct mlx5_ifc_nodnic_ring_config_reg_bits receive_ring1; u8 reserved_6[0x400]; }; union mlx5_ifc_event_auto_bits { struct mlx5_ifc_comp_event_bits comp_event; struct mlx5_ifc_dct_events_bits dct_events; struct mlx5_ifc_qp_events_bits qp_events; struct mlx5_ifc_wqe_associated_page_fault_event_bits wqe_associated_page_fault_event; struct mlx5_ifc_rdma_page_fault_event_bits rdma_page_fault_event; struct mlx5_ifc_cq_error_bits cq_error; struct mlx5_ifc_dropped_packet_logged_bits dropped_packet_logged; struct mlx5_ifc_port_state_change_event_bits port_state_change_event; struct mlx5_ifc_gpio_event_bits gpio_event; struct mlx5_ifc_db_bf_congestion_event_bits db_bf_congestion_event; struct mlx5_ifc_stall_vl_event_bits stall_vl_event; struct mlx5_ifc_cmd_inter_comp_event_bits cmd_inter_comp_event; struct mlx5_ifc_pages_req_event_bits pages_req_event; struct mlx5_ifc_nic_vport_change_event_bits nic_vport_change_event; u8 reserved_0[0xe0]; }; struct mlx5_ifc_health_buffer_bits { u8 reserved_0[0x100]; u8 assert_existptr[0x20]; u8 assert_callra[0x20]; u8 reserved_1[0x40]; u8 fw_version[0x20]; u8 hw_id[0x20]; u8 reserved_2[0x20]; u8 irisc_index[0x8]; u8 synd[0x8]; u8 ext_synd[0x10]; }; struct mlx5_ifc_register_loopback_control_bits { u8 no_lb[0x1]; u8 reserved_0[0x7]; u8 port[0x8]; u8 reserved_1[0x10]; u8 reserved_2[0x60]; }; struct mlx5_ifc_lrh_bits { u8 vl[4]; u8 lver[4]; u8 sl[4]; u8 reserved2[2]; u8 lnh[2]; u8 dlid[16]; u8 reserved5[5]; u8 pkt_len[11]; u8 slid[16]; }; struct mlx5_ifc_icmd_set_wol_rol_out_bits { u8 reserved_0[0x40]; u8 reserved_1[0x10]; u8 rol_mode[0x8]; u8 wol_mode[0x8]; }; struct mlx5_ifc_icmd_set_wol_rol_in_bits { u8 reserved_0[0x40]; u8 rol_mode_valid[0x1]; u8 wol_mode_valid[0x1]; u8 reserved_1[0xe]; u8 rol_mode[0x8]; u8 wol_mode[0x8]; u8 reserved_2[0x7a0]; }; struct mlx5_ifc_icmd_set_virtual_mac_in_bits { u8 virtual_mac_en[0x1]; u8 mac_aux_v[0x1]; u8 reserved_0[0x1e]; u8 reserved_1[0x40]; struct mlx5_ifc_mac_address_layout_bits virtual_mac; u8 reserved_2[0x760]; }; struct mlx5_ifc_icmd_query_virtual_mac_out_bits { u8 virtual_mac_en[0x1]; u8 mac_aux_v[0x1]; u8 reserved_0[0x1e]; struct mlx5_ifc_mac_address_layout_bits permanent_mac; struct mlx5_ifc_mac_address_layout_bits virtual_mac; u8 reserved_1[0x760]; }; struct mlx5_ifc_icmd_query_fw_info_out_bits { struct mlx5_ifc_fw_version_bits fw_version; u8 reserved_0[0x10]; u8 hash_signature[0x10]; u8 psid[16][0x8]; u8 reserved_1[0x6e0]; }; struct mlx5_ifc_icmd_query_cap_in_bits { u8 reserved_0[0x10]; u8 capability_group[0x10]; }; struct mlx5_ifc_icmd_query_cap_general_bits { u8 nv_access[0x1]; u8 fw_info_psid[0x1]; u8 reserved_0[0x1e]; u8 reserved_1[0x16]; u8 rol_s[0x1]; u8 rol_g[0x1]; u8 reserved_2[0x1]; u8 wol_s[0x1]; u8 wol_g[0x1]; u8 wol_a[0x1]; u8 wol_b[0x1]; u8 wol_m[0x1]; u8 wol_u[0x1]; u8 wol_p[0x1]; }; struct mlx5_ifc_icmd_ocbb_query_header_stats_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 reserved_1[0x7e0]; }; struct mlx5_ifc_icmd_ocbb_query_etoc_stats_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 reserved_1[0x7e0]; }; struct mlx5_ifc_icmd_ocbb_init_in_bits { u8 address_hi[0x20]; u8 address_lo[0x20]; u8 reserved_0[0x7c0]; }; struct mlx5_ifc_icmd_init_ocsd_in_bits { u8 reserved_0[0x20]; u8 address_hi[0x20]; u8 address_lo[0x20]; u8 reserved_1[0x7a0]; }; struct mlx5_ifc_icmd_access_reg_out_bits { u8 reserved_0[0x11]; u8 status[0x7]; u8 reserved_1[0x8]; u8 register_id[0x10]; u8 reserved_2[0x10]; u8 reserved_3[0x40]; u8 reserved_4[0x5]; u8 len[0xb]; u8 reserved_5[0x10]; u8 register_data[0][0x20]; }; enum { MLX5_ICMD_ACCESS_REG_IN_METHOD_QUERY = 0x1, MLX5_ICMD_ACCESS_REG_IN_METHOD_WRITE = 0x2, }; struct mlx5_ifc_icmd_access_reg_in_bits { u8 constant_1[0x5]; u8 constant_2[0xb]; u8 reserved_0[0x10]; u8 register_id[0x10]; u8 reserved_1[0x1]; u8 method[0x7]; u8 constant_3[0x8]; u8 reserved_2[0x40]; u8 constant_4[0x5]; u8 len[0xb]; u8 reserved_3[0x10]; u8 register_data[0][0x20]; }; enum { MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_SUCCESS = 0x0, MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL = 0x1, }; struct mlx5_ifc_teardown_hca_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x3f]; u8 state[0x1]; }; enum { MLX5_TEARDOWN_HCA_IN_PROFILE_GRACEFUL_CLOSE = 0x0, MLX5_TEARDOWN_HCA_IN_PROFILE_FORCE_CLOSE = 0x1, MLX5_TEARDOWN_HCA_IN_PROFILE_PREPARE_FAST_TEARDOWN = 0x2, }; struct mlx5_ifc_teardown_hca_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x10]; u8 profile[0x10]; u8 reserved_3[0x20]; }; struct mlx5_ifc_set_delay_drop_params_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; u8 reserved_at_40[0x40]; }; struct mlx5_ifc_set_delay_drop_params_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 reserved_at_40[0x20]; u8 reserved_at_60[0x10]; u8 delay_drop_timeout[0x10]; }; struct mlx5_ifc_query_delay_drop_params_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; u8 reserved_at_40[0x20]; u8 reserved_at_60[0x10]; u8 delay_drop_timeout[0x10]; }; struct mlx5_ifc_query_delay_drop_params_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 reserved_at_40[0x40]; }; struct mlx5_ifc_suspend_qp_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_suspend_qp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 qpn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_sqerr2rts_qp_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_sqerr2rts_qp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 qpn[0x18]; u8 reserved_3[0x20]; u8 opt_param_mask[0x20]; u8 reserved_4[0x20]; struct mlx5_ifc_qpc_bits qpc; u8 reserved_5[0x80]; }; struct mlx5_ifc_sqd2rts_qp_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_sqd2rts_qp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 qpn[0x18]; u8 reserved_3[0x20]; u8 opt_param_mask[0x20]; u8 reserved_4[0x20]; struct mlx5_ifc_qpc_bits qpc; u8 reserved_5[0x80]; }; struct mlx5_ifc_set_wol_rol_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_set_wol_rol_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 rol_mode_valid[0x1]; u8 wol_mode_valid[0x1]; u8 reserved_2[0xe]; u8 rol_mode[0x8]; u8 wol_mode[0x8]; u8 reserved_3[0x20]; }; struct mlx5_ifc_set_roce_address_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_set_roce_address_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 roce_address_index[0x10]; u8 reserved_2[0x10]; u8 reserved_3[0x20]; struct mlx5_ifc_roce_addr_layout_bits roce_address; }; struct mlx5_ifc_set_rdb_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_set_rdb_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 qpn[0x18]; u8 reserved_3[0x18]; u8 rdb_list_size[0x8]; struct mlx5_ifc_rdbc_bits rdb_context[0]; }; struct mlx5_ifc_set_mad_demux_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; enum { MLX5_SET_MAD_DEMUX_IN_DEMUX_MODE_PASS_ALL = 0x0, MLX5_SET_MAD_DEMUX_IN_DEMUX_MODE_SELECTIVE = 0x2, }; struct mlx5_ifc_set_mad_demux_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x20]; u8 reserved_3[0x6]; u8 demux_mode[0x2]; u8 reserved_4[0x18]; }; struct mlx5_ifc_set_l2_table_entry_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_set_l2_table_entry_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x60]; u8 reserved_3[0x8]; u8 table_index[0x18]; u8 reserved_4[0x20]; u8 reserved_5[0x13]; u8 vlan_valid[0x1]; u8 vlan[0xc]; struct mlx5_ifc_mac_address_layout_bits mac_address; u8 reserved_6[0xc0]; }; struct mlx5_ifc_set_issi_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_set_issi_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x10]; u8 current_issi[0x10]; u8 reserved_3[0x20]; }; struct mlx5_ifc_set_hca_cap_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_set_hca_cap_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; union mlx5_ifc_hca_cap_union_bits capability; }; enum { MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION = 0x0, MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_TAG = 0x1, MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST = 0x2, MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS = 0x3 }; struct mlx5_ifc_set_flow_table_root_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_set_flow_table_root_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xf]; u8 vport_number[0x10]; u8 reserved_3[0x20]; u8 table_type[0x8]; u8 reserved_4[0x18]; u8 reserved_5[0x8]; u8 table_id[0x18]; u8 reserved_6[0x8]; u8 underlay_qpn[0x18]; u8 reserved_7[0x120]; }; struct mlx5_ifc_set_fte_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_set_fte_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xf]; u8 vport_number[0x10]; u8 reserved_3[0x20]; u8 table_type[0x8]; u8 reserved_4[0x18]; u8 reserved_5[0x8]; u8 table_id[0x18]; u8 reserved_6[0x18]; u8 modify_enable_mask[0x8]; u8 reserved_7[0x20]; u8 flow_index[0x20]; u8 reserved_8[0xe0]; struct mlx5_ifc_flow_context_bits flow_context; }; struct mlx5_ifc_set_driver_version_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_set_driver_version_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; u8 driver_version[64][0x8]; }; struct mlx5_ifc_set_dc_cnak_trace_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_set_dc_cnak_trace_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 enable[0x1]; u8 reserved_2[0x1f]; u8 reserved_3[0x160]; struct mlx5_ifc_cmd_pas_bits pas; }; struct mlx5_ifc_set_burst_size_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_set_burst_size_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x20]; u8 reserved_3[0x9]; u8 device_burst_size[0x17]; }; struct mlx5_ifc_rts2rts_qp_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_rts2rts_qp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 qpn[0x18]; u8 reserved_3[0x20]; u8 opt_param_mask[0x20]; u8 reserved_4[0x20]; struct mlx5_ifc_qpc_bits qpc; u8 reserved_5[0x80]; }; struct mlx5_ifc_rtr2rts_qp_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_rtr2rts_qp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 qpn[0x18]; u8 reserved_3[0x20]; u8 opt_param_mask[0x20]; u8 reserved_4[0x20]; struct mlx5_ifc_qpc_bits qpc; u8 reserved_5[0x80]; }; struct mlx5_ifc_rst2init_qp_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_rst2init_qp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 qpn[0x18]; u8 reserved_3[0x20]; u8 opt_param_mask[0x20]; u8 reserved_4[0x20]; struct mlx5_ifc_qpc_bits qpc; u8 reserved_5[0x80]; }; struct mlx5_ifc_resume_qp_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_resume_qp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 qpn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_xrc_srq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry; u8 reserved_2[0x600]; u8 pas[0][0x40]; }; struct mlx5_ifc_query_xrc_srq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 xrc_srqn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_wol_rol_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x10]; u8 rol_mode[0x8]; u8 wol_mode[0x8]; u8 reserved_2[0x20]; }; struct mlx5_ifc_query_wol_rol_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; }; enum { MLX5_QUERY_VPORT_STATE_OUT_STATE_DOWN = 0x0, MLX5_QUERY_VPORT_STATE_OUT_STATE_UP = 0x1, }; struct mlx5_ifc_query_vport_state_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x20]; u8 reserved_2[0x18]; u8 admin_state[0x4]; u8 state[0x4]; }; enum { MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT = 0x0, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT = 0x1, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_UPLINK = 0x2, }; struct mlx5_ifc_query_vport_state_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xf]; u8 vport_number[0x10]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_vnic_env_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; u8 reserved_at_40[0x40]; struct mlx5_ifc_vnic_diagnostic_statistics_bits vport_env; }; enum { MLX5_QUERY_VNIC_ENV_IN_OP_MOD_VPORT_DIAG_STATISTICS = 0x0, }; struct mlx5_ifc_query_vnic_env_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_at_41[0xf]; u8 vport_number[0x10]; u8 reserved_at_60[0x20]; }; struct mlx5_ifc_query_vport_counter_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; struct mlx5_ifc_traffic_counter_bits received_errors; struct mlx5_ifc_traffic_counter_bits transmit_errors; struct mlx5_ifc_traffic_counter_bits received_ib_unicast; struct mlx5_ifc_traffic_counter_bits transmitted_ib_unicast; struct mlx5_ifc_traffic_counter_bits received_ib_multicast; struct mlx5_ifc_traffic_counter_bits transmitted_ib_multicast; struct mlx5_ifc_traffic_counter_bits received_eth_broadcast; struct mlx5_ifc_traffic_counter_bits transmitted_eth_broadcast; struct mlx5_ifc_traffic_counter_bits received_eth_unicast; struct mlx5_ifc_traffic_counter_bits transmitted_eth_unicast; struct mlx5_ifc_traffic_counter_bits received_eth_multicast; struct mlx5_ifc_traffic_counter_bits transmitted_eth_multicast; u8 reserved_2[0xa00]; }; enum { MLX5_QUERY_VPORT_COUNTER_IN_OP_MOD_VPORT_COUNTERS = 0x0, }; struct mlx5_ifc_query_vport_counter_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xb]; u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_3[0x60]; u8 clear[0x1]; u8 reserved_4[0x1f]; u8 reserved_5[0x20]; }; struct mlx5_ifc_query_tis_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; struct mlx5_ifc_tisc_bits tis_context; }; struct mlx5_ifc_query_tis_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 tisn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_tir_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0xc0]; struct mlx5_ifc_tirc_bits tir_context; }; struct mlx5_ifc_query_tir_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 tirn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_srq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; struct mlx5_ifc_srqc_bits srq_context_entry; u8 reserved_2[0x600]; u8 pas[0][0x40]; }; struct mlx5_ifc_query_srq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 srqn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_sq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0xc0]; struct mlx5_ifc_sqc_bits sq_context; }; struct mlx5_ifc_query_sq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 sqn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_special_contexts_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 dump_fill_mkey[0x20]; u8 resd_lkey[0x20]; }; struct mlx5_ifc_query_special_contexts_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; }; struct mlx5_ifc_query_scheduling_element_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; u8 reserved_at_40[0xc0]; struct mlx5_ifc_scheduling_context_bits scheduling_context; u8 reserved_at_300[0x100]; }; enum { MLX5_SCHEDULING_ELEMENT_IN_HIERARCHY_E_SWITCH = 0x2, }; struct mlx5_ifc_query_scheduling_element_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 scheduling_hierarchy[0x8]; u8 reserved_at_48[0x18]; u8 scheduling_element_id[0x20]; u8 reserved_at_80[0x180]; }; struct mlx5_ifc_query_rqt_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0xc0]; struct mlx5_ifc_rqtc_bits rqt_context; }; struct mlx5_ifc_query_rqt_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 rqtn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_rq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0xc0]; struct mlx5_ifc_rqc_bits rq_context; }; struct mlx5_ifc_query_rq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 rqn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_roce_address_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; struct mlx5_ifc_roce_addr_layout_bits roce_address; }; struct mlx5_ifc_query_roce_address_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 roce_address_index[0x10]; u8 reserved_2[0x10]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_rmp_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0xc0]; struct mlx5_ifc_rmpc_bits rmp_context; }; struct mlx5_ifc_query_rmp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 rmpn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_rdb_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x20]; u8 reserved_2[0x18]; u8 rdb_list_size[0x8]; struct mlx5_ifc_rdbc_bits rdb_context[0]; }; struct mlx5_ifc_query_rdb_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 qpn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_qp_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; u8 opt_param_mask[0x20]; u8 reserved_2[0x20]; struct mlx5_ifc_qpc_bits qpc; u8 reserved_3[0x80]; u8 pas[0][0x40]; }; struct mlx5_ifc_query_qp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 qpn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_q_counter_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; u8 rx_write_requests[0x20]; u8 reserved_2[0x20]; u8 rx_read_requests[0x20]; u8 reserved_3[0x20]; u8 rx_atomic_requests[0x20]; u8 reserved_4[0x20]; u8 rx_dct_connect[0x20]; u8 reserved_5[0x20]; u8 out_of_buffer[0x20]; u8 reserved_7[0x20]; u8 out_of_sequence[0x20]; u8 reserved_8[0x20]; u8 duplicate_request[0x20]; u8 reserved_9[0x20]; u8 rnr_nak_retry_err[0x20]; u8 reserved_10[0x20]; u8 packet_seq_err[0x20]; u8 reserved_11[0x20]; u8 implied_nak_seq_err[0x20]; u8 reserved_12[0x20]; u8 local_ack_timeout_err[0x20]; u8 reserved_13[0x20]; u8 resp_rnr_nak[0x20]; u8 reserved_14[0x20]; u8 req_rnr_retries_exceeded[0x20]; u8 reserved_15[0x460]; }; struct mlx5_ifc_query_q_counter_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x80]; u8 clear[0x1]; u8 reserved_3[0x1f]; u8 reserved_4[0x18]; u8 counter_set_id[0x8]; }; struct mlx5_ifc_query_pages_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x10]; u8 function_id[0x10]; u8 num_pages[0x20]; }; enum { MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES = 0x1, MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES = 0x2, MLX5_QUERY_PAGES_IN_OP_MOD_REGULAR_PAGES = 0x3, }; struct mlx5_ifc_query_pages_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x10]; u8 function_id[0x10]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_nic_vport_context_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; struct mlx5_ifc_nic_vport_context_bits nic_vport_context; }; struct mlx5_ifc_query_nic_vport_context_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xf]; u8 vport_number[0x10]; u8 reserved_3[0x5]; u8 allowed_list_type[0x3]; u8 reserved_4[0x18]; }; struct mlx5_ifc_query_mkey_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; struct mlx5_ifc_mkc_bits memory_key_mkey_entry; u8 reserved_2[0x600]; u8 bsf0_klm0_pas_mtt0_1[16][0x8]; u8 bsf1_klm1_pas_mtt2_3[16][0x8]; }; struct mlx5_ifc_query_mkey_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 mkey_index[0x18]; u8 pg_access[0x1]; u8 reserved_3[0x1f]; }; struct mlx5_ifc_query_mad_demux_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; u8 mad_dumux_parameters_block[0x20]; }; struct mlx5_ifc_query_mad_demux_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; }; struct mlx5_ifc_query_l2_table_entry_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0xa0]; u8 reserved_2[0x13]; u8 vlan_valid[0x1]; u8 vlan[0xc]; struct mlx5_ifc_mac_address_layout_bits mac_address; u8 reserved_3[0xc0]; }; struct mlx5_ifc_query_l2_table_entry_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x60]; u8 reserved_3[0x8]; u8 table_index[0x18]; u8 reserved_4[0x140]; }; struct mlx5_ifc_query_issi_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x10]; u8 current_issi[0x10]; u8 reserved_2[0xa0]; u8 supported_issi_reserved[76][0x8]; u8 supported_issi_dw0[0x20]; }; struct mlx5_ifc_query_issi_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; }; struct mlx5_ifc_query_hca_vport_pkey_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; struct mlx5_ifc_pkey_bits pkey[0]; }; struct mlx5_ifc_query_hca_vport_pkey_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xb]; u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_3[0x10]; u8 pkey_index[0x10]; }; struct mlx5_ifc_query_hca_vport_gid_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x20]; u8 gids_num[0x10]; u8 reserved_2[0x10]; struct mlx5_ifc_array128_auto_bits gid[0]; }; struct mlx5_ifc_query_hca_vport_gid_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xb]; u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_3[0x10]; u8 gid_index[0x10]; }; struct mlx5_ifc_query_hca_vport_context_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; struct mlx5_ifc_hca_vport_context_bits hca_vport_context; }; struct mlx5_ifc_query_hca_vport_context_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xb]; u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_hca_cap_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; union mlx5_ifc_hca_cap_union_bits capability; }; struct mlx5_ifc_query_hca_cap_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; }; struct mlx5_ifc_query_flow_table_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; u8 reserved_at_40[0x80]; struct mlx5_ifc_flow_table_context_bits flow_table_context; }; struct mlx5_ifc_query_flow_table_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xf]; u8 vport_number[0x10]; u8 reserved_3[0x20]; u8 table_type[0x8]; u8 reserved_4[0x18]; u8 reserved_5[0x8]; u8 table_id[0x18]; u8 reserved_6[0x140]; }; struct mlx5_ifc_query_fte_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x1c0]; struct mlx5_ifc_flow_context_bits flow_context; }; struct mlx5_ifc_query_fte_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xf]; u8 vport_number[0x10]; u8 reserved_3[0x20]; u8 table_type[0x8]; u8 reserved_4[0x18]; u8 reserved_5[0x8]; u8 table_id[0x18]; u8 reserved_6[0x40]; u8 flow_index[0x20]; u8 reserved_7[0xe0]; }; enum { MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0, MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_INNER_HEADERS = 0x2, }; struct mlx5_ifc_query_flow_group_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0xa0]; u8 start_flow_index[0x20]; u8 reserved_2[0x20]; u8 end_flow_index[0x20]; u8 reserved_3[0xa0]; u8 reserved_4[0x18]; u8 match_criteria_enable[0x8]; struct mlx5_ifc_fte_match_param_bits match_criteria; u8 reserved_5[0xe00]; }; struct mlx5_ifc_query_flow_group_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xf]; u8 vport_number[0x10]; u8 reserved_3[0x20]; u8 table_type[0x8]; u8 reserved_4[0x18]; u8 reserved_5[0x8]; u8 table_id[0x18]; u8 group_id[0x20]; u8 reserved_6[0x120]; }; struct mlx5_ifc_query_flow_counter_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; u8 reserved_at_40[0x40]; struct mlx5_ifc_traffic_counter_bits flow_statistics[0]; }; struct mlx5_ifc_query_flow_counter_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 reserved_at_40[0x80]; u8 clear[0x1]; u8 reserved_at_c1[0xf]; u8 num_of_counters[0x10]; u8 reserved_at_e0[0x10]; u8 flow_counter_id[0x10]; }; struct mlx5_ifc_query_esw_vport_context_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; struct mlx5_ifc_esw_vport_context_bits esw_vport_context; }; struct mlx5_ifc_query_esw_vport_context_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xf]; u8 vport_number[0x10]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_eq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; struct mlx5_ifc_eqc_bits eq_context_entry; u8 reserved_2[0x40]; u8 event_bitmask[0x40]; u8 reserved_3[0x580]; u8 pas[0][0x40]; }; struct mlx5_ifc_query_eq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x18]; u8 eq_number[0x8]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_dct_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; struct mlx5_ifc_dctc_bits dct_context_entry; u8 reserved_2[0x180]; }; struct mlx5_ifc_query_dct_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 dctn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_dc_cnak_trace_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 enable[0x1]; u8 reserved_1[0x1f]; u8 reserved_2[0x160]; struct mlx5_ifc_cmd_pas_bits pas; }; struct mlx5_ifc_query_dc_cnak_trace_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; }; struct mlx5_ifc_query_cq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; struct mlx5_ifc_cqc_bits cq_context; u8 reserved_2[0x600]; u8 pas[0][0x40]; }; struct mlx5_ifc_query_cq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 cqn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_cong_status_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x20]; u8 enable[0x1]; u8 tag_enable[0x1]; u8 reserved_2[0x1e]; }; struct mlx5_ifc_query_cong_status_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x18]; u8 priority[0x4]; u8 cong_protocol[0x4]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_cong_statistics_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; u8 rp_cur_flows[0x20]; u8 sum_flows[0x20]; u8 rp_cnp_ignored_high[0x20]; u8 rp_cnp_ignored_low[0x20]; u8 rp_cnp_handled_high[0x20]; u8 rp_cnp_handled_low[0x20]; u8 reserved_2[0x100]; u8 time_stamp_high[0x20]; u8 time_stamp_low[0x20]; u8 accumulators_period[0x20]; u8 np_ecn_marked_roce_packets_high[0x20]; u8 np_ecn_marked_roce_packets_low[0x20]; u8 np_cnp_sent_high[0x20]; u8 np_cnp_sent_low[0x20]; u8 reserved_3[0x560]; }; struct mlx5_ifc_query_cong_statistics_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 clear[0x1]; u8 reserved_2[0x1f]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_cong_params_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; union mlx5_ifc_cong_control_roce_ecn_auto_bits congestion_parameters; }; struct mlx5_ifc_query_cong_params_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x1c]; u8 cong_protocol[0x4]; u8 reserved_3[0x20]; }; struct mlx5_ifc_query_burst_size_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x20]; u8 reserved_2[0x9]; u8 device_burst_size[0x17]; }; struct mlx5_ifc_query_burst_size_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; }; struct mlx5_ifc_query_adapter_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; struct mlx5_ifc_query_adapter_param_block_bits query_adapter_struct; }; struct mlx5_ifc_query_adapter_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; }; struct mlx5_ifc_qp_2rst_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_qp_2rst_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 qpn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_qp_2err_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_qp_2err_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 qpn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_para_vport_element_bits { u8 reserved_at_0[0xc]; u8 traffic_class[0x4]; u8 qos_para_vport_number[0x10]; }; struct mlx5_ifc_page_fault_resume_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_page_fault_resume_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 error[0x1]; u8 reserved_2[0x4]; u8 rdma[0x1]; u8 read_write[0x1]; u8 req_res[0x1]; u8 qpn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_nop_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_nop_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; }; struct mlx5_ifc_modify_vport_state_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; enum { MLX5_MODIFY_VPORT_STATE_IN_OP_MOD_NIC_VPORT = 0x0, MLX5_MODIFY_VPORT_STATE_IN_OP_MOD_ESW_VPORT = 0x1, MLX5_MODIFY_VPORT_STATE_IN_OP_MOD_UPLINK = 0x2, }; enum { MLX5_MODIFY_VPORT_STATE_IN_ADMIN_STATE_DOWN = 0x0, MLX5_MODIFY_VPORT_STATE_IN_ADMIN_STATE_UP = 0x1, MLX5_MODIFY_VPORT_STATE_IN_ADMIN_STATE_FOLLOW = 0x2, }; struct mlx5_ifc_modify_vport_state_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xf]; u8 vport_number[0x10]; u8 reserved_3[0x18]; u8 admin_state[0x4]; u8 reserved_4[0x4]; }; struct mlx5_ifc_modify_tis_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_modify_tis_bitmask_bits { u8 reserved_at_0[0x20]; u8 reserved_at_20[0x1d]; u8 lag_tx_port_affinity[0x1]; u8 strict_lag_tx_port_affinity[0x1]; u8 prio[0x1]; }; struct mlx5_ifc_modify_tis_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 tisn[0x18]; u8 reserved_3[0x20]; struct mlx5_ifc_modify_tis_bitmask_bits bitmask; u8 reserved_4[0x40]; struct mlx5_ifc_tisc_bits ctx; }; struct mlx5_ifc_modify_tir_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; enum { MLX5_MODIFY_SQ_BITMASK_PACKET_PACING_RATE_LIMIT_INDEX = 0x1 << 0, MLX5_MODIFY_SQ_BITMASK_QOS_PARA_VPORT_NUMBER = 0x1 << 1 }; struct mlx5_ifc_modify_tir_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 tirn[0x18]; u8 reserved_3[0x20]; u8 modify_bitmask[0x40]; u8 reserved_4[0x40]; struct mlx5_ifc_tirc_bits tir_context; }; struct mlx5_ifc_modify_sq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_modify_sq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 sq_state[0x4]; u8 reserved_2[0x4]; u8 sqn[0x18]; u8 reserved_3[0x20]; u8 modify_bitmask[0x40]; u8 reserved_4[0x40]; struct mlx5_ifc_sqc_bits ctx; }; struct mlx5_ifc_modify_scheduling_element_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; u8 reserved_at_40[0x1c0]; }; enum { MLX5_MODIFY_SCHEDULING_ELEMENT_IN_SCHEDULING_HIERARCHY_E_SWITCH = 0x2, }; enum { MLX5_MODIFY_SCHEDULING_ELEMENT_BITMASK_BW_SHARE = 0x1, MLX5_MODIFY_SCHEDULING_ELEMENT_BITMASK_MAX_AVERAGE_BW = 0x2, }; struct mlx5_ifc_modify_scheduling_element_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 scheduling_hierarchy[0x8]; u8 reserved_at_48[0x18]; u8 scheduling_element_id[0x20]; u8 reserved_at_80[0x20]; u8 modify_bitmask[0x20]; u8 reserved_at_c0[0x40]; struct mlx5_ifc_scheduling_context_bits scheduling_context; u8 reserved_at_300[0x100]; }; struct mlx5_ifc_modify_rqt_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_modify_rqt_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 rqtn[0x18]; u8 reserved_3[0x20]; u8 modify_bitmask[0x40]; u8 reserved_4[0x40]; struct mlx5_ifc_rqtc_bits ctx; }; struct mlx5_ifc_modify_rq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; enum { MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1, MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID = 1ULL << 3, }; struct mlx5_ifc_modify_rq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 rq_state[0x4]; u8 reserved_2[0x4]; u8 rqn[0x18]; u8 reserved_3[0x20]; u8 modify_bitmask[0x40]; u8 reserved_4[0x40]; struct mlx5_ifc_rqc_bits ctx; }; struct mlx5_ifc_modify_rmp_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_rmp_bitmask_bits { u8 reserved[0x20]; u8 reserved1[0x1f]; u8 lwm[0x1]; }; struct mlx5_ifc_modify_rmp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 rmp_state[0x4]; u8 reserved_2[0x4]; u8 rmpn[0x18]; u8 reserved_3[0x20]; struct mlx5_ifc_rmp_bitmask_bits bitmask; u8 reserved_4[0x40]; struct mlx5_ifc_rmpc_bits ctx; }; struct mlx5_ifc_modify_nic_vport_context_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_modify_nic_vport_field_select_bits { u8 reserved_0[0x14]; u8 disable_uc_local_lb[0x1]; u8 disable_mc_local_lb[0x1]; u8 node_guid[0x1]; u8 port_guid[0x1]; u8 min_wqe_inline_mode[0x1]; u8 mtu[0x1]; u8 change_event[0x1]; u8 promisc[0x1]; u8 permanent_address[0x1]; u8 addresses_list[0x1]; u8 roce_en[0x1]; u8 reserved_1[0x1]; }; struct mlx5_ifc_modify_nic_vport_context_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xf]; u8 vport_number[0x10]; struct mlx5_ifc_modify_nic_vport_field_select_bits field_select; u8 reserved_3[0x780]; struct mlx5_ifc_nic_vport_context_bits nic_vport_context; }; struct mlx5_ifc_modify_hca_vport_context_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_grh_bits { u8 ip_version[4]; u8 traffic_class[8]; u8 flow_label[20]; u8 payload_length[16]; u8 next_header[8]; u8 hop_limit[8]; u8 sgid[128]; u8 dgid[128]; }; struct mlx5_ifc_bth_bits { u8 opcode[8]; u8 se[1]; u8 migreq[1]; u8 pad_count[2]; u8 tver[4]; u8 p_key[16]; u8 reserved8[8]; u8 dest_qp[24]; u8 ack_req[1]; u8 reserved7[7]; u8 psn[24]; }; struct mlx5_ifc_aeth_bits { u8 syndrome[8]; u8 msn[24]; }; struct mlx5_ifc_dceth_bits { u8 reserved0[8]; u8 session_id[24]; u8 reserved1[8]; u8 dci_dct[24]; }; struct mlx5_ifc_modify_hca_vport_context_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xb]; u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_3[0x20]; struct mlx5_ifc_hca_vport_context_bits hca_vport_context; }; struct mlx5_ifc_modify_flow_table_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; u8 reserved_at_40[0x40]; }; enum { MLX5_MODIFY_FLOW_TABLE_SELECT_MISS_ACTION_AND_ID = 0x1, MLX5_MODIFY_FLOW_TABLE_SELECT_LAG_MASTER_NEXT_TABLE_ID = 0x8000, }; struct mlx5_ifc_modify_flow_table_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_at_41[0xf]; u8 vport_number[0x10]; u8 reserved_at_60[0x10]; u8 modify_field_select[0x10]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; u8 reserved_at_a0[0x8]; u8 table_id[0x18]; struct mlx5_ifc_flow_table_context_bits flow_table_context; }; struct mlx5_ifc_modify_esw_vport_context_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_esw_vport_context_fields_select_bits { u8 reserved[0x1c]; u8 vport_cvlan_insert[0x1]; u8 vport_svlan_insert[0x1]; u8 vport_cvlan_strip[0x1]; u8 vport_svlan_strip[0x1]; }; struct mlx5_ifc_modify_esw_vport_context_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xf]; u8 vport_number[0x10]; struct mlx5_ifc_esw_vport_context_fields_select_bits field_select; struct mlx5_ifc_esw_vport_context_bits esw_vport_context; }; struct mlx5_ifc_modify_cq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; enum { MLX5_MODIFY_CQ_IN_OP_MOD_MODIFY_CQ = 0x0, MLX5_MODIFY_CQ_IN_OP_MOD_RESIZE_CQ = 0x1, }; struct mlx5_ifc_modify_cq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 cqn[0x18]; union mlx5_ifc_modify_field_select_resize_field_select_auto_bits modify_field_select_resize_field_select; struct mlx5_ifc_cqc_bits cq_context; u8 reserved_3[0x600]; u8 pas[0][0x40]; }; struct mlx5_ifc_modify_cong_status_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_modify_cong_status_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x18]; u8 priority[0x4]; u8 cong_protocol[0x4]; u8 enable[0x1]; u8 tag_enable[0x1]; u8 reserved_3[0x1e]; }; struct mlx5_ifc_modify_cong_params_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_modify_cong_params_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x1c]; u8 cong_protocol[0x4]; union mlx5_ifc_field_select_802_1_r_roce_auto_bits field_select; u8 reserved_3[0x80]; union mlx5_ifc_cong_control_roce_ecn_auto_bits congestion_parameters; }; struct mlx5_ifc_manage_pages_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 output_num_entries[0x20]; u8 reserved_1[0x20]; u8 pas[0][0x40]; }; enum { MLX5_PAGES_CANT_GIVE = 0x0, MLX5_PAGES_GIVE = 0x1, MLX5_PAGES_TAKE = 0x2, }; struct mlx5_ifc_manage_pages_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x10]; u8 function_id[0x10]; u8 input_num_entries[0x20]; u8 pas[0][0x40]; }; struct mlx5_ifc_mad_ifc_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; u8 response_mad_packet[256][0x8]; }; struct mlx5_ifc_mad_ifc_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 remote_lid[0x10]; u8 reserved_2[0x8]; u8 port[0x8]; u8 reserved_3[0x20]; u8 mad[256][0x8]; }; struct mlx5_ifc_init_hca_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; enum { MLX5_INIT_HCA_IN_OP_MOD_INIT = 0x0, MLX5_INIT_HCA_IN_OP_MOD_PRE_INIT = 0x1, }; struct mlx5_ifc_init_hca_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; }; struct mlx5_ifc_init2rtr_qp_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_init2rtr_qp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 qpn[0x18]; u8 reserved_3[0x20]; u8 opt_param_mask[0x20]; u8 reserved_4[0x20]; struct mlx5_ifc_qpc_bits qpc; u8 reserved_5[0x80]; }; struct mlx5_ifc_init2init_qp_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_init2init_qp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 qpn[0x18]; u8 reserved_3[0x20]; u8 opt_param_mask[0x20]; u8 reserved_4[0x20]; struct mlx5_ifc_qpc_bits qpc; u8 reserved_5[0x80]; }; struct mlx5_ifc_get_dropped_packet_log_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; u8 packet_headers_log[128][0x8]; u8 packet_syndrome[64][0x8]; }; struct mlx5_ifc_get_dropped_packet_log_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; }; struct mlx5_ifc_encryption_key_obj_bits { u8 modify_field_select[0x40]; u8 reserved_at_40[0x14]; u8 key_size[0x4]; u8 reserved_at_58[0x4]; u8 key_type[0x4]; u8 reserved_at_60[0x8]; u8 pd[0x18]; u8 reserved_at_80[0x180]; u8 key[8][0x20]; u8 reserved_at_300[0x500]; }; struct mlx5_ifc_gen_eqe_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x18]; u8 eq_number[0x8]; u8 reserved_3[0x20]; u8 eqe[64][0x8]; }; struct mlx5_ifc_gen_eq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_enable_hca_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x20]; }; struct mlx5_ifc_enable_hca_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x10]; u8 function_id[0x10]; u8 reserved_3[0x20]; }; struct mlx5_ifc_drain_dct_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_drain_dct_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 dctn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_disable_hca_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x20]; }; struct mlx5_ifc_disable_hca_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x10]; u8 function_id[0x10]; u8 reserved_3[0x20]; }; struct mlx5_ifc_detach_from_mcg_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_detach_from_mcg_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 qpn[0x18]; u8 reserved_3[0x20]; u8 multicast_gid[16][0x8]; }; struct mlx5_ifc_destroy_xrc_srq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_destroy_xrc_srq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 xrc_srqn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_destroy_tis_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_destroy_tis_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 tisn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_destroy_tir_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_destroy_tir_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 tirn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_destroy_srq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_destroy_srq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 srqn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_destroy_sq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_destroy_sq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 sqn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_destroy_scheduling_element_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; u8 reserved_at_40[0x1c0]; }; enum { MLX5_DESTROY_SCHEDULING_ELEMENT_IN_SCHEDULING_HIERARCHY_E_SWITCH = 0x2, }; struct mlx5_ifc_destroy_scheduling_element_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 scheduling_hierarchy[0x8]; u8 reserved_at_48[0x18]; u8 scheduling_element_id[0x20]; u8 reserved_at_80[0x180]; }; struct mlx5_ifc_destroy_rqt_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_destroy_rqt_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 rqtn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_destroy_rq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_destroy_rq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 rqn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_destroy_rmp_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_destroy_rmp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 rmpn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_destroy_qp_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_destroy_qp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 qpn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_destroy_qos_para_vport_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; u8 reserved_at_40[0x1c0]; }; struct mlx5_ifc_destroy_qos_para_vport_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 reserved_at_40[0x20]; u8 reserved_at_60[0x10]; u8 qos_para_vport_number[0x10]; u8 reserved_at_80[0x180]; }; struct mlx5_ifc_destroy_psv_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_destroy_psv_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 psvn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_destroy_mkey_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_destroy_mkey_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 mkey_index[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_destroy_flow_table_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_destroy_flow_table_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xf]; u8 vport_number[0x10]; u8 reserved_3[0x20]; u8 table_type[0x8]; u8 reserved_4[0x18]; u8 reserved_5[0x8]; u8 table_id[0x18]; u8 reserved_6[0x140]; }; struct mlx5_ifc_destroy_flow_group_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_destroy_flow_group_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xf]; u8 vport_number[0x10]; u8 reserved_3[0x20]; u8 table_type[0x8]; u8 reserved_4[0x18]; u8 reserved_5[0x8]; u8 table_id[0x18]; u8 group_id[0x20]; u8 reserved_6[0x120]; }; struct mlx5_ifc_destroy_encryption_key_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; u8 reserved_at_40[0x40]; }; struct mlx5_ifc_destroy_encryption_key_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 obj_type[0x10]; u8 obj_id[0x20]; u8 reserved_at_60[0x20]; }; struct mlx5_ifc_destroy_eq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_destroy_eq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x18]; u8 eq_number[0x8]; u8 reserved_3[0x20]; }; struct mlx5_ifc_destroy_dct_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_destroy_dct_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 dctn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_destroy_cq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_destroy_cq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 cqn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_delete_vxlan_udp_dport_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_delete_vxlan_udp_dport_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x20]; u8 reserved_3[0x10]; u8 vxlan_udp_port[0x10]; }; struct mlx5_ifc_delete_l2_table_entry_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_delete_l2_table_entry_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x60]; u8 reserved_3[0x8]; u8 table_index[0x18]; u8 reserved_4[0x140]; }; struct mlx5_ifc_delete_fte_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_delete_fte_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xf]; u8 vport_number[0x10]; u8 reserved_3[0x20]; u8 table_type[0x8]; u8 reserved_4[0x18]; u8 reserved_5[0x8]; u8 table_id[0x18]; u8 reserved_6[0x40]; u8 flow_index[0x20]; u8 reserved_7[0xe0]; }; struct mlx5_ifc_dealloc_xrcd_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_dealloc_xrcd_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 xrcd[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_dealloc_uar_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_dealloc_uar_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 uar[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_dealloc_transport_domain_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_dealloc_transport_domain_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 transport_domain[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_dealloc_q_counter_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_counter_id_bits { u8 reserved[0x10]; u8 counter_id[0x10]; }; struct mlx5_ifc_diagnostic_params_context_bits { u8 num_of_counters[0x10]; u8 reserved_2[0x8]; u8 log_num_of_samples[0x8]; u8 single[0x1]; u8 repetitive[0x1]; u8 sync[0x1]; u8 clear[0x1]; u8 on_demand[0x1]; u8 enable[0x1]; u8 reserved_3[0x12]; u8 log_sample_period[0x8]; u8 reserved_4[0x80]; struct mlx5_ifc_counter_id_bits counter_id[0]; }; struct mlx5_ifc_set_diagnostic_params_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; struct mlx5_ifc_diagnostic_params_context_bits diagnostic_params_ctx; }; struct mlx5_ifc_set_diagnostic_params_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_query_diagnostic_counters_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 num_of_samples[0x10]; u8 sample_index[0x10]; u8 reserved_2[0x20]; }; struct mlx5_ifc_diagnostic_counter_bits { u8 counter_id[0x10]; u8 sample_id[0x10]; u8 time_stamp_31_0[0x20]; u8 counter_value_h[0x20]; u8 counter_value_l[0x20]; }; struct mlx5_ifc_query_diagnostic_counters_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; struct mlx5_ifc_diagnostic_counter_bits diag_counter[0]; }; struct mlx5_ifc_dealloc_q_counter_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x18]; u8 counter_set_id[0x8]; u8 reserved_3[0x20]; }; struct mlx5_ifc_dealloc_pd_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_dealloc_pd_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 pd[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_dealloc_flow_counter_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_dealloc_flow_counter_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x10]; u8 flow_counter_id[0x10]; u8 reserved_3[0x20]; }; struct mlx5_ifc_deactivate_tracer_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_deactivate_tracer_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 mkey[0x20]; u8 reserved_2[0x20]; }; struct mlx5_ifc_create_xrc_srq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x8]; u8 xrc_srqn[0x18]; u8 reserved_2[0x20]; }; struct mlx5_ifc_create_xrc_srq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry; u8 reserved_3[0x600]; u8 pas[0][0x40]; }; struct mlx5_ifc_create_tis_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x8]; u8 tisn[0x18]; u8 reserved_2[0x20]; }; struct mlx5_ifc_create_tis_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0xc0]; struct mlx5_ifc_tisc_bits ctx; }; struct mlx5_ifc_create_tir_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x8]; u8 tirn[0x18]; u8 reserved_2[0x20]; }; struct mlx5_ifc_create_tir_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0xc0]; struct mlx5_ifc_tirc_bits tir_context; }; struct mlx5_ifc_create_srq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x8]; u8 srqn[0x18]; u8 reserved_2[0x20]; }; struct mlx5_ifc_create_srq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; struct mlx5_ifc_srqc_bits srq_context_entry; u8 reserved_3[0x600]; u8 pas[0][0x40]; }; struct mlx5_ifc_create_sq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x8]; u8 sqn[0x18]; u8 reserved_2[0x20]; }; struct mlx5_ifc_create_sq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0xc0]; struct mlx5_ifc_sqc_bits ctx; }; struct mlx5_ifc_create_scheduling_element_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; u8 reserved_at_40[0x40]; u8 scheduling_element_id[0x20]; u8 reserved_at_a0[0x160]; }; enum { MLX5_CREATE_SCHEDULING_ELEMENT_IN_SCHEDULING_HIERARCHY_E_SWITCH = 0x2, }; struct mlx5_ifc_create_scheduling_element_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 scheduling_hierarchy[0x8]; u8 reserved_at_48[0x18]; u8 reserved_at_60[0xa0]; struct mlx5_ifc_scheduling_context_bits scheduling_context; u8 reserved_at_300[0x100]; }; struct mlx5_ifc_create_rqt_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x8]; u8 rqtn[0x18]; u8 reserved_2[0x20]; }; struct mlx5_ifc_create_rqt_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0xc0]; struct mlx5_ifc_rqtc_bits rqt_context; }; struct mlx5_ifc_create_rq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x8]; u8 rqn[0x18]; u8 reserved_2[0x20]; }; struct mlx5_ifc_create_rq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0xc0]; struct mlx5_ifc_rqc_bits ctx; }; struct mlx5_ifc_create_rmp_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x8]; u8 rmpn[0x18]; u8 reserved_2[0x20]; }; struct mlx5_ifc_create_rmp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0xc0]; struct mlx5_ifc_rmpc_bits ctx; }; struct mlx5_ifc_create_qp_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x8]; u8 qpn[0x18]; u8 reserved_2[0x20]; }; struct mlx5_ifc_create_qp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 input_qpn[0x18]; u8 reserved_3[0x20]; u8 opt_param_mask[0x20]; u8 reserved_4[0x20]; struct mlx5_ifc_qpc_bits qpc; u8 reserved_5[0x80]; u8 pas[0][0x40]; }; struct mlx5_ifc_create_qos_para_vport_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; u8 reserved_at_40[0x20]; u8 reserved_at_60[0x10]; u8 qos_para_vport_number[0x10]; u8 reserved_at_80[0x180]; }; struct mlx5_ifc_create_qos_para_vport_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 reserved_at_40[0x1c0]; }; struct mlx5_ifc_create_psv_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; u8 reserved_2[0x8]; u8 psv0_index[0x18]; u8 reserved_3[0x8]; u8 psv1_index[0x18]; u8 reserved_4[0x8]; u8 psv2_index[0x18]; u8 reserved_5[0x8]; u8 psv3_index[0x18]; }; struct mlx5_ifc_create_psv_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 num_psv[0x4]; u8 reserved_2[0x4]; u8 pd[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_create_mkey_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x8]; u8 mkey_index[0x18]; u8 reserved_2[0x20]; }; struct mlx5_ifc_create_mkey_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x20]; u8 pg_access[0x1]; u8 reserved_3[0x1f]; struct mlx5_ifc_mkc_bits memory_key_mkey_entry; u8 reserved_4[0x80]; u8 translations_octword_actual_size[0x20]; u8 reserved_5[0x560]; u8 klm_pas_mtt[0][0x20]; }; struct mlx5_ifc_create_flow_table_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x8]; u8 table_id[0x18]; u8 reserved_2[0x20]; }; struct mlx5_ifc_create_flow_table_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_at_41[0xf]; u8 vport_number[0x10]; u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; u8 reserved_at_a0[0x20]; struct mlx5_ifc_flow_table_context_bits flow_table_context; }; struct mlx5_ifc_create_flow_group_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x8]; u8 group_id[0x18]; u8 reserved_2[0x20]; }; enum { MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0, MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS = 0x2, }; struct mlx5_ifc_create_flow_group_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 other_vport[0x1]; u8 reserved_2[0xf]; u8 vport_number[0x10]; u8 reserved_3[0x20]; u8 table_type[0x8]; u8 reserved_4[0x18]; u8 reserved_5[0x8]; u8 table_id[0x18]; u8 reserved_6[0x20]; u8 start_flow_index[0x20]; u8 reserved_7[0x20]; u8 end_flow_index[0x20]; u8 reserved_8[0xa0]; u8 reserved_9[0x18]; u8 match_criteria_enable[0x8]; struct mlx5_ifc_fte_match_param_bits match_criteria; u8 reserved_10[0xe00]; }; struct mlx5_ifc_create_encryption_key_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; u8 obj_id[0x20]; u8 reserved_at_60[0x20]; }; struct mlx5_ifc_create_encryption_key_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 obj_type[0x10]; u8 reserved_at_40[0x40]; struct mlx5_ifc_encryption_key_obj_bits encryption_key_object; }; struct mlx5_ifc_create_eq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x18]; u8 eq_number[0x8]; u8 reserved_2[0x20]; }; struct mlx5_ifc_create_eq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; struct mlx5_ifc_eqc_bits eq_context_entry; u8 reserved_3[0x40]; u8 event_bitmask[0x40]; u8 reserved_4[0x580]; u8 pas[0][0x40]; }; struct mlx5_ifc_create_dct_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x8]; u8 dctn[0x18]; u8 reserved_2[0x20]; }; struct mlx5_ifc_create_dct_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; struct mlx5_ifc_dctc_bits dct_context_entry; u8 reserved_3[0x180]; }; struct mlx5_ifc_create_cq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x8]; u8 cqn[0x18]; u8 reserved_2[0x20]; }; struct mlx5_ifc_create_cq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; struct mlx5_ifc_cqc_bits cq_context; u8 reserved_3[0x600]; u8 pas[0][0x40]; }; struct mlx5_ifc_config_int_moderation_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x4]; u8 min_delay[0xc]; u8 int_vector[0x10]; u8 reserved_2[0x20]; }; enum { MLX5_CONFIG_INT_MODERATION_IN_OP_MOD_WRITE = 0x0, MLX5_CONFIG_INT_MODERATION_IN_OP_MOD_READ = 0x1, }; struct mlx5_ifc_config_int_moderation_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x4]; u8 min_delay[0xc]; u8 int_vector[0x10]; u8 reserved_3[0x20]; }; struct mlx5_ifc_attach_to_mcg_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_attach_to_mcg_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 qpn[0x18]; u8 reserved_3[0x20]; u8 multicast_gid[16][0x8]; }; struct mlx5_ifc_arm_xrc_srq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; enum { MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ = 0x1, }; struct mlx5_ifc_arm_xrc_srq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 xrc_srqn[0x18]; u8 reserved_3[0x10]; u8 lwm[0x10]; }; struct mlx5_ifc_arm_rq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; enum { MLX5_ARM_RQ_IN_OP_MOD_SRQ = 0x1, }; struct mlx5_ifc_arm_rq_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 srq_number[0x18]; u8 reserved_3[0x10]; u8 lwm[0x10]; }; struct mlx5_ifc_arm_dct_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_arm_dct_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x8]; u8 dctn[0x18]; u8 reserved_3[0x20]; }; struct mlx5_ifc_alloc_xrcd_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x8]; u8 xrcd[0x18]; u8 reserved_2[0x20]; }; struct mlx5_ifc_alloc_xrcd_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; }; struct mlx5_ifc_alloc_uar_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x8]; u8 uar[0x18]; u8 reserved_2[0x20]; }; struct mlx5_ifc_alloc_uar_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; }; struct mlx5_ifc_alloc_transport_domain_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x8]; u8 transport_domain[0x18]; u8 reserved_2[0x20]; }; struct mlx5_ifc_alloc_transport_domain_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; }; struct mlx5_ifc_alloc_q_counter_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x18]; u8 counter_set_id[0x8]; u8 reserved_2[0x20]; }; struct mlx5_ifc_alloc_q_counter_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; }; struct mlx5_ifc_alloc_pd_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x8]; u8 pd[0x18]; u8 reserved_2[0x20]; }; struct mlx5_ifc_alloc_pd_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; }; struct mlx5_ifc_alloc_flow_counter_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x10]; u8 flow_counter_id[0x10]; u8 reserved_2[0x20]; }; struct mlx5_ifc_alloc_flow_counter_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x40]; }; struct mlx5_ifc_add_vxlan_udp_dport_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_add_vxlan_udp_dport_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x20]; u8 reserved_3[0x10]; u8 vxlan_udp_port[0x10]; }; struct mlx5_ifc_activate_tracer_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; }; struct mlx5_ifc_activate_tracer_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 mkey[0x20]; u8 reserved_2[0x20]; }; struct mlx5_ifc_set_rate_limit_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; u8 reserved_at_40[0x40]; }; struct mlx5_ifc_set_rate_limit_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 reserved_at_40[0x10]; u8 rate_limit_index[0x10]; u8 reserved_at_60[0x20]; u8 rate_limit[0x20]; u8 burst_upper_bound[0x20]; u8 reserved_at_c0[0x10]; u8 typical_packet_size[0x10]; u8 reserved_at_e0[0x120]; }; struct mlx5_ifc_access_register_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 reserved_1[0x40]; u8 register_data[0][0x20]; }; enum { MLX5_ACCESS_REGISTER_IN_OP_MOD_WRITE = 0x0, MLX5_ACCESS_REGISTER_IN_OP_MOD_READ = 0x1, }; struct mlx5_ifc_access_register_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 reserved_2[0x10]; u8 register_id[0x10]; u8 argument[0x20]; u8 register_data[0][0x20]; }; struct mlx5_ifc_sltp_reg_bits { u8 status[0x4]; u8 version[0x4]; u8 local_port[0x8]; u8 pnat[0x2]; u8 reserved_0[0x2]; u8 lane[0x4]; u8 reserved_1[0x8]; u8 reserved_2[0x20]; u8 reserved_3[0x7]; u8 polarity[0x1]; u8 ob_tap0[0x8]; u8 ob_tap1[0x8]; u8 ob_tap2[0x8]; u8 reserved_4[0xc]; u8 ob_preemp_mode[0x4]; u8 ob_reg[0x8]; u8 ob_bias[0x8]; u8 reserved_5[0x20]; }; struct mlx5_ifc_slrp_reg_bits { u8 status[0x4]; u8 version[0x4]; u8 local_port[0x8]; u8 pnat[0x2]; u8 reserved_0[0x2]; u8 lane[0x4]; u8 reserved_1[0x8]; u8 ib_sel[0x2]; u8 reserved_2[0x11]; u8 dp_sel[0x1]; u8 dp90sel[0x4]; u8 mix90phase[0x8]; u8 ffe_tap0[0x8]; u8 ffe_tap1[0x8]; u8 ffe_tap2[0x8]; u8 ffe_tap3[0x8]; u8 ffe_tap4[0x8]; u8 ffe_tap5[0x8]; u8 ffe_tap6[0x8]; u8 ffe_tap7[0x8]; u8 ffe_tap8[0x8]; u8 mixerbias_tap_amp[0x8]; u8 reserved_3[0x7]; u8 ffe_tap_en[0x9]; u8 ffe_tap_offset0[0x8]; u8 ffe_tap_offset1[0x8]; u8 slicer_offset0[0x10]; u8 mixer_offset0[0x10]; u8 mixer_offset1[0x10]; u8 mixerbgn_inp[0x8]; u8 mixerbgn_inn[0x8]; u8 mixerbgn_refp[0x8]; u8 mixerbgn_refn[0x8]; u8 sel_slicer_lctrl_h[0x1]; u8 sel_slicer_lctrl_l[0x1]; u8 reserved_4[0x1]; u8 ref_mixer_vreg[0x5]; u8 slicer_gctrl[0x8]; u8 lctrl_input[0x8]; u8 mixer_offset_cm1[0x8]; u8 common_mode[0x6]; u8 reserved_5[0x1]; u8 mixer_offset_cm0[0x9]; u8 reserved_6[0x7]; u8 slicer_offset_cm[0x9]; }; struct mlx5_ifc_slrg_reg_bits { u8 status[0x4]; u8 version[0x4]; u8 local_port[0x8]; u8 pnat[0x2]; u8 reserved_0[0x2]; u8 lane[0x4]; u8 reserved_1[0x8]; u8 time_to_link_up[0x10]; u8 reserved_2[0xc]; u8 grade_lane_speed[0x4]; u8 grade_version[0x8]; u8 grade[0x18]; u8 reserved_3[0x4]; u8 height_grade_type[0x4]; u8 height_grade[0x18]; u8 height_dz[0x10]; u8 height_dv[0x10]; u8 reserved_4[0x10]; u8 height_sigma[0x10]; u8 reserved_5[0x20]; u8 reserved_6[0x4]; u8 phase_grade_type[0x4]; u8 phase_grade[0x18]; u8 reserved_7[0x8]; u8 phase_eo_pos[0x8]; u8 reserved_8[0x8]; u8 phase_eo_neg[0x8]; u8 ffe_set_tested[0x10]; u8 test_errors_per_lane[0x10]; }; struct mlx5_ifc_pvlc_reg_bits { u8 reserved_0[0x8]; u8 local_port[0x8]; u8 reserved_1[0x10]; u8 reserved_2[0x1c]; u8 vl_hw_cap[0x4]; u8 reserved_3[0x1c]; u8 vl_admin[0x4]; u8 reserved_4[0x1c]; u8 vl_operational[0x4]; }; struct mlx5_ifc_pude_reg_bits { u8 swid[0x8]; u8 local_port[0x8]; u8 reserved_0[0x4]; u8 admin_status[0x4]; u8 reserved_1[0x4]; u8 oper_status[0x4]; u8 reserved_2[0x60]; }; enum { MLX5_PTYS_REG_PROTO_MASK_INFINIBAND = 0x1, MLX5_PTYS_REG_PROTO_MASK_ETHERNET = 0x4, }; struct mlx5_ifc_ptys_reg_bits { u8 reserved_0[0x1]; u8 an_disable_admin[0x1]; u8 an_disable_cap[0x1]; u8 reserved_1[0x4]; u8 force_tx_aba_param[0x1]; u8 local_port[0x8]; u8 reserved_2[0xd]; u8 proto_mask[0x3]; u8 an_status[0x4]; u8 reserved_3[0xc]; u8 data_rate_oper[0x10]; u8 ext_eth_proto_capability[0x20]; u8 eth_proto_capability[0x20]; u8 ib_link_width_capability[0x10]; u8 ib_proto_capability[0x10]; u8 ext_eth_proto_admin[0x20]; u8 eth_proto_admin[0x20]; u8 ib_link_width_admin[0x10]; u8 ib_proto_admin[0x10]; u8 ext_eth_proto_oper[0x20]; u8 eth_proto_oper[0x20]; u8 ib_link_width_oper[0x10]; u8 ib_proto_oper[0x10]; u8 reserved_4[0x1c]; u8 connector_type[0x4]; u8 eth_proto_lp_advertise[0x20]; u8 reserved_5[0x60]; }; struct mlx5_ifc_ptas_reg_bits { u8 reserved_0[0x20]; u8 algorithm_options[0x10]; u8 reserved_1[0x4]; u8 repetitions_mode[0x4]; u8 num_of_repetitions[0x8]; u8 grade_version[0x8]; u8 height_grade_type[0x4]; u8 phase_grade_type[0x4]; u8 height_grade_weight[0x8]; u8 phase_grade_weight[0x8]; u8 gisim_measure_bits[0x10]; u8 adaptive_tap_measure_bits[0x10]; u8 ber_bath_high_error_threshold[0x10]; u8 ber_bath_mid_error_threshold[0x10]; u8 ber_bath_low_error_threshold[0x10]; u8 one_ratio_high_threshold[0x10]; u8 one_ratio_high_mid_threshold[0x10]; u8 one_ratio_low_mid_threshold[0x10]; u8 one_ratio_low_threshold[0x10]; u8 ndeo_error_threshold[0x10]; u8 mixer_offset_step_size[0x10]; u8 reserved_2[0x8]; u8 mix90_phase_for_voltage_bath[0x8]; u8 mixer_offset_start[0x10]; u8 mixer_offset_end[0x10]; u8 reserved_3[0x15]; u8 ber_test_time[0xb]; }; struct mlx5_ifc_pspa_reg_bits { u8 swid[0x8]; u8 local_port[0x8]; u8 sub_port[0x8]; u8 reserved_0[0x8]; u8 reserved_1[0x20]; }; struct mlx5_ifc_ppsc_reg_bits { u8 reserved_0[0x8]; u8 local_port[0x8]; u8 reserved_1[0x10]; u8 reserved_2[0x60]; u8 reserved_3[0x1c]; u8 wrps_admin[0x4]; u8 reserved_4[0x1c]; u8 wrps_status[0x4]; u8 up_th_vld[0x1]; u8 down_th_vld[0x1]; u8 reserved_5[0x6]; u8 up_threshold[0x8]; u8 reserved_6[0x8]; u8 down_threshold[0x8]; u8 reserved_7[0x20]; u8 reserved_8[0x1c]; u8 srps_admin[0x4]; u8 reserved_9[0x60]; }; struct mlx5_ifc_pplr_reg_bits { u8 reserved_0[0x8]; u8 local_port[0x8]; u8 reserved_1[0x10]; u8 reserved_2[0x8]; u8 lb_cap[0x8]; u8 reserved_3[0x8]; u8 lb_en[0x8]; }; struct mlx5_ifc_pplm_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x20]; u8 port_profile_mode[0x8]; u8 static_port_profile[0x8]; u8 active_port_profile[0x8]; u8 reserved_at_58[0x8]; u8 retransmission_active[0x8]; u8 fec_mode_active[0x18]; u8 rs_fec_correction_bypass_cap[0x4]; u8 reserved_at_84[0x8]; u8 fec_override_cap_56g[0x4]; u8 fec_override_cap_100g[0x4]; u8 fec_override_cap_50g[0x4]; u8 fec_override_cap_25g[0x4]; u8 fec_override_cap_10g_40g[0x4]; u8 rs_fec_correction_bypass_admin[0x4]; u8 reserved_at_a4[0x8]; u8 fec_override_admin_56g[0x4]; u8 fec_override_admin_100g[0x4]; u8 fec_override_admin_50g[0x4]; u8 fec_override_admin_25g[0x4]; u8 fec_override_admin_10g_40g[0x4]; u8 fec_override_cap_400g_8x[0x10]; u8 fec_override_cap_200g_4x[0x10]; u8 fec_override_cap_100g_2x[0x10]; u8 fec_override_cap_50g_1x[0x10]; u8 fec_override_admin_400g_8x[0x10]; u8 fec_override_admin_200g_4x[0x10]; u8 fec_override_admin_100g_2x[0x10]; u8 fec_override_admin_50g_1x[0x10]; u8 reserved_at_140[0xC0]; }; struct mlx5_ifc_ppll_reg_bits { u8 num_pll_groups[0x8]; u8 pll_group[0x8]; u8 reserved_0[0x4]; u8 num_plls[0x4]; u8 reserved_1[0x8]; u8 reserved_2[0x1f]; u8 ae[0x1]; u8 pll_status[4][0x40]; }; struct mlx5_ifc_ppad_reg_bits { u8 reserved_0[0x3]; u8 single_mac[0x1]; u8 reserved_1[0x4]; u8 local_port[0x8]; u8 mac_47_32[0x10]; u8 mac_31_0[0x20]; u8 reserved_2[0x40]; }; struct mlx5_ifc_pmtu_reg_bits { u8 reserved_0[0x8]; u8 local_port[0x8]; u8 reserved_1[0x10]; u8 max_mtu[0x10]; u8 reserved_2[0x10]; u8 admin_mtu[0x10]; u8 reserved_3[0x10]; u8 oper_mtu[0x10]; u8 reserved_4[0x10]; }; struct mlx5_ifc_pmpr_reg_bits { u8 reserved_0[0x8]; u8 module[0x8]; u8 reserved_1[0x10]; u8 reserved_2[0x18]; u8 attenuation_5g[0x8]; u8 reserved_3[0x18]; u8 attenuation_7g[0x8]; u8 reserved_4[0x18]; u8 attenuation_12g[0x8]; }; struct mlx5_ifc_pmpe_reg_bits { u8 reserved_0[0x8]; u8 module[0x8]; u8 reserved_1[0xc]; u8 module_status[0x4]; u8 reserved_2[0x14]; u8 error_type[0x4]; u8 reserved_3[0x8]; u8 reserved_4[0x40]; }; struct mlx5_ifc_pmpc_reg_bits { u8 module_state_updated[32][0x8]; }; struct mlx5_ifc_pmlpn_reg_bits { u8 reserved_0[0x4]; u8 mlpn_status[0x4]; u8 local_port[0x8]; u8 reserved_1[0x10]; u8 e[0x1]; u8 reserved_2[0x1f]; }; struct mlx5_ifc_pmlp_reg_bits { u8 rxtx[0x1]; u8 reserved_0[0x7]; u8 local_port[0x8]; u8 reserved_1[0x8]; u8 width[0x8]; u8 lane0_module_mapping[0x20]; u8 lane1_module_mapping[0x20]; u8 lane2_module_mapping[0x20]; u8 lane3_module_mapping[0x20]; u8 reserved_2[0x160]; }; struct mlx5_ifc_pmaos_reg_bits { u8 reserved_0[0x8]; u8 module[0x8]; u8 reserved_1[0x4]; u8 admin_status[0x4]; u8 reserved_2[0x4]; u8 oper_status[0x4]; u8 ase[0x1]; u8 ee[0x1]; u8 reserved_3[0x12]; u8 error_type[0x4]; u8 reserved_4[0x6]; u8 e[0x2]; u8 reserved_5[0x40]; }; struct mlx5_ifc_plpc_reg_bits { u8 reserved_0[0x4]; u8 profile_id[0xc]; u8 reserved_1[0x4]; u8 proto_mask[0x4]; u8 reserved_2[0x8]; u8 reserved_3[0x10]; u8 lane_speed[0x10]; u8 reserved_4[0x17]; u8 lpbf[0x1]; u8 fec_mode_policy[0x8]; u8 retransmission_capability[0x8]; u8 fec_mode_capability[0x18]; u8 retransmission_support_admin[0x8]; u8 fec_mode_support_admin[0x18]; u8 retransmission_request_admin[0x8]; u8 fec_mode_request_admin[0x18]; u8 reserved_5[0x80]; }; struct mlx5_ifc_pll_status_data_bits { u8 reserved_0[0x1]; u8 lock_cal[0x1]; u8 lock_status[0x2]; u8 reserved_1[0x2]; u8 algo_f_ctrl[0xa]; u8 analog_algo_num_var[0x6]; u8 f_ctrl_measure[0xa]; u8 reserved_2[0x2]; u8 analog_var[0x6]; u8 reserved_3[0x2]; u8 high_var[0x6]; u8 reserved_4[0x2]; u8 low_var[0x6]; u8 reserved_5[0x2]; u8 mid_val[0x6]; }; struct mlx5_ifc_plib_reg_bits { u8 reserved_0[0x8]; u8 local_port[0x8]; u8 reserved_1[0x8]; u8 ib_port[0x8]; u8 reserved_2[0x60]; }; struct mlx5_ifc_plbf_reg_bits { u8 reserved_0[0x8]; u8 local_port[0x8]; u8 reserved_1[0xd]; u8 lbf_mode[0x3]; u8 reserved_2[0x20]; }; struct mlx5_ifc_pipg_reg_bits { u8 reserved_0[0x8]; u8 local_port[0x8]; u8 reserved_1[0x10]; u8 dic[0x1]; u8 reserved_2[0x19]; u8 ipg[0x4]; u8 reserved_3[0x2]; }; struct mlx5_ifc_pifr_reg_bits { u8 reserved_0[0x8]; u8 local_port[0x8]; u8 reserved_1[0x10]; u8 reserved_2[0xe0]; u8 port_filter[8][0x20]; u8 port_filter_update_en[8][0x20]; }; struct mlx5_ifc_phys_layer_cntrs_bits { u8 time_since_last_clear_high[0x20]; u8 time_since_last_clear_low[0x20]; u8 symbol_errors_high[0x20]; u8 symbol_errors_low[0x20]; u8 sync_headers_errors_high[0x20]; u8 sync_headers_errors_low[0x20]; u8 edpl_bip_errors_lane0_high[0x20]; u8 edpl_bip_errors_lane0_low[0x20]; u8 edpl_bip_errors_lane1_high[0x20]; u8 edpl_bip_errors_lane1_low[0x20]; u8 edpl_bip_errors_lane2_high[0x20]; u8 edpl_bip_errors_lane2_low[0x20]; u8 edpl_bip_errors_lane3_high[0x20]; u8 edpl_bip_errors_lane3_low[0x20]; u8 fc_fec_corrected_blocks_lane0_high[0x20]; u8 fc_fec_corrected_blocks_lane0_low[0x20]; u8 fc_fec_corrected_blocks_lane1_high[0x20]; u8 fc_fec_corrected_blocks_lane1_low[0x20]; u8 fc_fec_corrected_blocks_lane2_high[0x20]; u8 fc_fec_corrected_blocks_lane2_low[0x20]; u8 fc_fec_corrected_blocks_lane3_high[0x20]; u8 fc_fec_corrected_blocks_lane3_low[0x20]; u8 fc_fec_uncorrectable_blocks_lane0_high[0x20]; u8 fc_fec_uncorrectable_blocks_lane0_low[0x20]; u8 fc_fec_uncorrectable_blocks_lane1_high[0x20]; u8 fc_fec_uncorrectable_blocks_lane1_low[0x20]; u8 fc_fec_uncorrectable_blocks_lane2_high[0x20]; u8 fc_fec_uncorrectable_blocks_lane2_low[0x20]; u8 fc_fec_uncorrectable_blocks_lane3_high[0x20]; u8 fc_fec_uncorrectable_blocks_lane3_low[0x20]; u8 rs_fec_corrected_blocks_high[0x20]; u8 rs_fec_corrected_blocks_low[0x20]; u8 rs_fec_uncorrectable_blocks_high[0x20]; u8 rs_fec_uncorrectable_blocks_low[0x20]; u8 rs_fec_no_errors_blocks_high[0x20]; u8 rs_fec_no_errors_blocks_low[0x20]; u8 rs_fec_single_error_blocks_high[0x20]; u8 rs_fec_single_error_blocks_low[0x20]; u8 rs_fec_corrected_symbols_total_high[0x20]; u8 rs_fec_corrected_symbols_total_low[0x20]; u8 rs_fec_corrected_symbols_lane0_high[0x20]; u8 rs_fec_corrected_symbols_lane0_low[0x20]; u8 rs_fec_corrected_symbols_lane1_high[0x20]; u8 rs_fec_corrected_symbols_lane1_low[0x20]; u8 rs_fec_corrected_symbols_lane2_high[0x20]; u8 rs_fec_corrected_symbols_lane2_low[0x20]; u8 rs_fec_corrected_symbols_lane3_high[0x20]; u8 rs_fec_corrected_symbols_lane3_low[0x20]; u8 link_down_events[0x20]; u8 successful_recovery_events[0x20]; u8 reserved_0[0x180]; }; struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits { u8 symbol_error_counter[0x10]; u8 link_error_recovery_counter[0x8]; u8 link_downed_counter[0x8]; u8 port_rcv_errors[0x10]; u8 port_rcv_remote_physical_errors[0x10]; u8 port_rcv_switch_relay_errors[0x10]; u8 port_xmit_discards[0x10]; u8 port_xmit_constraint_errors[0x8]; u8 port_rcv_constraint_errors[0x8]; u8 reserved_at_70[0x8]; u8 link_overrun_errors[0x8]; u8 reserved_at_80[0x10]; u8 vl_15_dropped[0x10]; u8 reserved_at_a0[0xa0]; }; struct mlx5_ifc_phys_layer_statistical_cntrs_bits { u8 time_since_last_clear_high[0x20]; u8 time_since_last_clear_low[0x20]; u8 phy_received_bits_high[0x20]; u8 phy_received_bits_low[0x20]; u8 phy_symbol_errors_high[0x20]; u8 phy_symbol_errors_low[0x20]; u8 phy_corrected_bits_high[0x20]; u8 phy_corrected_bits_low[0x20]; u8 phy_corrected_bits_lane0_high[0x20]; u8 phy_corrected_bits_lane0_low[0x20]; u8 phy_corrected_bits_lane1_high[0x20]; u8 phy_corrected_bits_lane1_low[0x20]; u8 phy_corrected_bits_lane2_high[0x20]; u8 phy_corrected_bits_lane2_low[0x20]; u8 phy_corrected_bits_lane3_high[0x20]; u8 phy_corrected_bits_lane3_low[0x20]; u8 reserved_at_200[0x5c0]; }; struct mlx5_ifc_infiniband_port_cntrs_bits { u8 symbol_error_counter[0x10]; u8 link_error_recovery_counter[0x8]; u8 link_downed_counter[0x8]; u8 port_rcv_errors[0x10]; u8 port_rcv_remote_physical_errors[0x10]; u8 port_rcv_switch_relay_errors[0x10]; u8 port_xmit_discards[0x10]; u8 port_xmit_constraint_errors[0x8]; u8 port_rcv_constraint_errors[0x8]; u8 reserved_0[0x8]; u8 local_link_integrity_errors[0x4]; u8 excessive_buffer_overrun_errors[0x4]; u8 reserved_1[0x10]; u8 vl_15_dropped[0x10]; u8 port_xmit_data[0x20]; u8 port_rcv_data[0x20]; u8 port_xmit_pkts[0x20]; u8 port_rcv_pkts[0x20]; u8 port_xmit_wait[0x20]; u8 reserved_2[0x680]; }; struct mlx5_ifc_phrr_reg_bits { u8 clr[0x1]; u8 reserved_0[0x7]; u8 local_port[0x8]; u8 reserved_1[0x10]; u8 hist_group[0x8]; u8 reserved_2[0x10]; u8 hist_id[0x8]; u8 reserved_3[0x40]; u8 time_since_last_clear_high[0x20]; u8 time_since_last_clear_low[0x20]; u8 bin[10][0x20]; }; struct mlx5_ifc_phbr_for_prio_reg_bits { u8 reserved_0[0x18]; u8 prio[0x8]; }; struct mlx5_ifc_phbr_for_port_tclass_reg_bits { u8 reserved_0[0x18]; u8 tclass[0x8]; }; struct mlx5_ifc_phbr_binding_reg_bits { u8 opcode[0x4]; u8 reserved_0[0x4]; u8 local_port[0x8]; u8 pnat[0x2]; u8 reserved_1[0xe]; u8 hist_group[0x8]; u8 reserved_2[0x10]; u8 hist_id[0x8]; u8 reserved_3[0x10]; u8 hist_type[0x10]; u8 hist_parameters[0x20]; u8 hist_min_value[0x20]; u8 hist_max_value[0x20]; u8 sample_time[0x20]; }; enum { MLX5_PFCC_REG_PPAN_DISABLED = 0x0, MLX5_PFCC_REG_PPAN_ENABLED = 0x1, }; struct mlx5_ifc_pfcc_reg_bits { u8 dcbx_operation_type[0x2]; u8 cap_local_admin[0x1]; u8 cap_remote_admin[0x1]; u8 reserved_0[0x4]; u8 local_port[0x8]; u8 pnat[0x2]; u8 reserved_1[0xc]; u8 shl_cap[0x1]; u8 shl_opr[0x1]; u8 ppan[0x4]; u8 reserved_2[0x4]; u8 prio_mask_tx[0x8]; u8 reserved_3[0x8]; u8 prio_mask_rx[0x8]; u8 pptx[0x1]; u8 aptx[0x1]; u8 reserved_4[0x6]; u8 pfctx[0x8]; u8 reserved_5[0x8]; u8 cbftx[0x8]; u8 pprx[0x1]; u8 aprx[0x1]; u8 reserved_6[0x6]; u8 pfcrx[0x8]; u8 reserved_7[0x8]; u8 cbfrx[0x8]; u8 device_stall_minor_watermark[0x10]; u8 device_stall_critical_watermark[0x10]; u8 reserved_8[0x60]; }; struct mlx5_ifc_pelc_reg_bits { u8 op[0x4]; u8 reserved_0[0x4]; u8 local_port[0x8]; u8 reserved_1[0x10]; u8 op_admin[0x8]; u8 op_capability[0x8]; u8 op_request[0x8]; u8 op_active[0x8]; u8 admin[0x40]; u8 capability[0x40]; u8 request[0x40]; u8 active[0x40]; u8 reserved_2[0x80]; }; struct mlx5_ifc_peir_reg_bits { u8 reserved_0[0x8]; u8 local_port[0x8]; u8 reserved_1[0x10]; u8 reserved_2[0xc]; u8 error_count[0x4]; u8 reserved_3[0x10]; u8 reserved_4[0xc]; u8 lane[0x4]; u8 reserved_5[0x8]; u8 error_type[0x8]; }; struct mlx5_ifc_qcam_access_reg_cap_mask { u8 qcam_access_reg_cap_mask_127_to_20[0x6C]; u8 qpdpm[0x1]; u8 qcam_access_reg_cap_mask_18_to_4[0x0F]; u8 qdpm[0x1]; u8 qpts[0x1]; u8 qcap[0x1]; u8 qcam_access_reg_cap_mask_0[0x1]; }; struct mlx5_ifc_qcam_qos_feature_cap_mask { u8 qcam_qos_feature_cap_mask_127_to_1[0x7F]; u8 qpts_trust_both[0x1]; }; struct mlx5_ifc_qcam_reg_bits { u8 reserved_at_0[0x8]; u8 feature_group[0x8]; u8 reserved_at_10[0x8]; u8 access_reg_group[0x8]; u8 reserved_at_20[0x20]; union { struct mlx5_ifc_qcam_access_reg_cap_mask reg_cap; u8 reserved_at_0[0x80]; } qos_access_reg_cap_mask; u8 reserved_at_c0[0x80]; union { struct mlx5_ifc_qcam_qos_feature_cap_mask feature_cap; u8 reserved_at_0[0x80]; } qos_feature_cap_mask; u8 reserved_at_1c0[0x80]; }; struct mlx5_ifc_pcam_enhanced_features_bits { u8 reserved_at_0[0x6d]; u8 rx_icrc_encapsulated_counter[0x1]; u8 reserved_at_6e[0x4]; u8 ptys_extended_ethernet[0x1]; u8 reserved_at_73[0x3]; u8 pfcc_mask[0x1]; u8 reserved_at_77[0x3]; u8 per_lane_error_counters[0x1]; u8 rx_buffer_fullness_counters[0x1]; u8 ptys_connector_type[0x1]; u8 reserved_at_7d[0x1]; u8 ppcnt_discard_group[0x1]; u8 ppcnt_statistical_group[0x1]; }; struct mlx5_ifc_pcam_regs_5000_to_507f_bits { u8 port_access_reg_cap_mask_127_to_96[0x20]; u8 port_access_reg_cap_mask_95_to_64[0x20]; u8 reserved_at_40[0xe]; u8 pddr[0x1]; u8 reserved_at_4f[0xd]; u8 pplm[0x1]; u8 port_access_reg_cap_mask_34_to_32[0x3]; u8 port_access_reg_cap_mask_31_to_13[0x13]; u8 pbmc[0x1]; u8 pptb[0x1]; u8 port_access_reg_cap_mask_10_to_09[0x2]; u8 ppcnt[0x1]; u8 port_access_reg_cap_mask_07_to_00[0x8]; }; struct mlx5_ifc_pcam_reg_bits { u8 reserved_at_0[0x8]; u8 feature_group[0x8]; u8 reserved_at_10[0x8]; u8 access_reg_group[0x8]; u8 reserved_at_20[0x20]; union { struct mlx5_ifc_pcam_regs_5000_to_507f_bits regs_5000_to_507f; u8 reserved_at_0[0x80]; } port_access_reg_cap_mask; u8 reserved_at_c0[0x80]; union { struct mlx5_ifc_pcam_enhanced_features_bits enhanced_features; u8 reserved_at_0[0x80]; } feature_cap_mask; u8 reserved_at_1c0[0xc0]; }; struct mlx5_ifc_mcam_enhanced_features_bits { u8 reserved_at_0[0x6e]; u8 pcie_status_and_power[0x1]; u8 reserved_at_111[0x10]; u8 pcie_performance_group[0x1]; }; struct mlx5_ifc_mcam_access_reg_bits { u8 reserved_at_0[0x1c]; u8 mcda[0x1]; u8 mcc[0x1]; u8 mcqi[0x1]; u8 reserved_at_1f[0x1]; u8 regs_95_to_64[0x20]; u8 regs_63_to_32[0x20]; u8 regs_31_to_0[0x20]; }; struct mlx5_ifc_mcam_reg_bits { u8 reserved_at_0[0x8]; u8 feature_group[0x8]; u8 reserved_at_10[0x8]; u8 access_reg_group[0x8]; u8 reserved_at_20[0x20]; union { struct mlx5_ifc_mcam_access_reg_bits access_regs; u8 reserved_at_0[0x80]; } mng_access_reg_cap_mask; u8 reserved_at_c0[0x80]; union { struct mlx5_ifc_mcam_enhanced_features_bits enhanced_features; u8 reserved_at_0[0x80]; } mng_feature_cap_mask; u8 reserved_at_1c0[0x80]; }; struct mlx5_ifc_pcap_reg_bits { u8 reserved_0[0x8]; u8 local_port[0x8]; u8 reserved_1[0x10]; u8 port_capability_mask[4][0x20]; }; struct mlx5_ifc_pbmc_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; u8 reserved_at_10[0x10]; u8 xoff_timer_value[0x10]; u8 xoff_refresh[0x10]; u8 reserved_at_40[0x9]; u8 fullness_threshold[0x7]; u8 port_buffer_size[0x10]; struct mlx5_ifc_bufferx_reg_bits buffer[10]; u8 reserved_at_2e0[0x40]; }; struct mlx5_ifc_paos_reg_bits { u8 swid[0x8]; u8 local_port[0x8]; u8 reserved_0[0x4]; u8 admin_status[0x4]; u8 reserved_1[0x4]; u8 oper_status[0x4]; u8 ase[0x1]; u8 ee[0x1]; u8 reserved_2[0x1c]; u8 e[0x2]; u8 reserved_3[0x40]; }; struct mlx5_ifc_pamp_reg_bits { u8 reserved_0[0x8]; u8 opamp_group[0x8]; u8 reserved_1[0xc]; u8 opamp_group_type[0x4]; u8 start_index[0x10]; u8 reserved_2[0x4]; u8 num_of_indices[0xc]; u8 index_data[18][0x10]; }; struct mlx5_ifc_link_level_retrans_cntr_grp_date_bits { u8 llr_rx_cells_high[0x20]; u8 llr_rx_cells_low[0x20]; u8 llr_rx_error_high[0x20]; u8 llr_rx_error_low[0x20]; u8 llr_rx_crc_error_high[0x20]; u8 llr_rx_crc_error_low[0x20]; u8 llr_tx_cells_high[0x20]; u8 llr_tx_cells_low[0x20]; u8 llr_tx_ret_cells_high[0x20]; u8 llr_tx_ret_cells_low[0x20]; u8 llr_tx_ret_events_high[0x20]; u8 llr_tx_ret_events_low[0x20]; u8 reserved_0[0x640]; }; struct mlx5_ifc_mtmp_reg_bits { u8 i[0x1]; u8 reserved_at_1[0x18]; u8 sensor_index[0x7]; u8 reserved_at_20[0x10]; u8 temperature[0x10]; u8 mte[0x1]; u8 mtr[0x1]; u8 reserved_at_42[0x0e]; u8 max_temperature[0x10]; u8 tee[0x2]; u8 reserved_at_62[0x0e]; u8 temperature_threshold_hi[0x10]; u8 reserved_at_80[0x10]; u8 temperature_threshold_lo[0x10]; u8 reserved_at_100[0x20]; u8 sensor_name[0x40]; }; struct mlx5_ifc_lane_2_module_mapping_bits { u8 reserved_0[0x6]; u8 rx_lane[0x2]; u8 reserved_1[0x6]; u8 tx_lane[0x2]; u8 reserved_2[0x8]; u8 module[0x8]; }; struct mlx5_ifc_eth_per_traffic_class_layout_bits { u8 transmit_queue_high[0x20]; u8 transmit_queue_low[0x20]; u8 reserved_0[0x780]; }; struct mlx5_ifc_eth_per_traffic_class_cong_layout_bits { u8 no_buffer_discard_uc_high[0x20]; u8 no_buffer_discard_uc_low[0x20]; u8 wred_discard_high[0x20]; u8 wred_discard_low[0x20]; u8 reserved_0[0x740]; }; struct mlx5_ifc_eth_per_prio_grp_data_layout_bits { u8 rx_octets_high[0x20]; u8 rx_octets_low[0x20]; u8 reserved_0[0xc0]; u8 rx_frames_high[0x20]; u8 rx_frames_low[0x20]; u8 tx_octets_high[0x20]; u8 tx_octets_low[0x20]; u8 reserved_1[0xc0]; u8 tx_frames_high[0x20]; u8 tx_frames_low[0x20]; u8 rx_pause_high[0x20]; u8 rx_pause_low[0x20]; u8 rx_pause_duration_high[0x20]; u8 rx_pause_duration_low[0x20]; u8 tx_pause_high[0x20]; u8 tx_pause_low[0x20]; u8 tx_pause_duration_high[0x20]; u8 tx_pause_duration_low[0x20]; u8 rx_pause_transition_high[0x20]; u8 rx_pause_transition_low[0x20]; u8 rx_discards_high[0x20]; u8 rx_discards_low[0x20]; u8 device_stall_minor_watermark_cnt_high[0x20]; u8 device_stall_minor_watermark_cnt_low[0x20]; u8 device_stall_critical_watermark_cnt_high[0x20]; u8 device_stall_critical_watermark_cnt_low[0x20]; u8 reserved_2[0x340]; }; struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits { u8 port_transmit_wait_high[0x20]; u8 port_transmit_wait_low[0x20]; u8 ecn_marked_high[0x20]; u8 ecn_marked_low[0x20]; u8 no_buffer_discard_mc_high[0x20]; u8 no_buffer_discard_mc_low[0x20]; u8 rx_ebp_high[0x20]; u8 rx_ebp_low[0x20]; u8 tx_ebp_high[0x20]; u8 tx_ebp_low[0x20]; u8 rx_buffer_almost_full_high[0x20]; u8 rx_buffer_almost_full_low[0x20]; u8 rx_buffer_full_high[0x20]; u8 rx_buffer_full_low[0x20]; u8 rx_icrc_encapsulated_high[0x20]; u8 rx_icrc_encapsulated_low[0x20]; u8 reserved_0[0x80]; u8 tx_stats_pkts64octets_high[0x20]; u8 tx_stats_pkts64octets_low[0x20]; u8 tx_stats_pkts65to127octets_high[0x20]; u8 tx_stats_pkts65to127octets_low[0x20]; u8 tx_stats_pkts128to255octets_high[0x20]; u8 tx_stats_pkts128to255octets_low[0x20]; u8 tx_stats_pkts256to511octets_high[0x20]; u8 tx_stats_pkts256to511octets_low[0x20]; u8 tx_stats_pkts512to1023octets_high[0x20]; u8 tx_stats_pkts512to1023octets_low[0x20]; u8 tx_stats_pkts1024to1518octets_high[0x20]; u8 tx_stats_pkts1024to1518octets_low[0x20]; u8 tx_stats_pkts1519to2047octets_high[0x20]; u8 tx_stats_pkts1519to2047octets_low[0x20]; u8 tx_stats_pkts2048to4095octets_high[0x20]; u8 tx_stats_pkts2048to4095octets_low[0x20]; u8 tx_stats_pkts4096to8191octets_high[0x20]; u8 tx_stats_pkts4096to8191octets_low[0x20]; u8 tx_stats_pkts8192to10239octets_high[0x20]; u8 tx_stats_pkts8192to10239octets_low[0x20]; u8 reserved_1[0x2C0]; }; struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits { u8 a_frames_transmitted_ok_high[0x20]; u8 a_frames_transmitted_ok_low[0x20]; u8 a_frames_received_ok_high[0x20]; u8 a_frames_received_ok_low[0x20]; u8 a_frame_check_sequence_errors_high[0x20]; u8 a_frame_check_sequence_errors_low[0x20]; u8 a_alignment_errors_high[0x20]; u8 a_alignment_errors_low[0x20]; u8 a_octets_transmitted_ok_high[0x20]; u8 a_octets_transmitted_ok_low[0x20]; u8 a_octets_received_ok_high[0x20]; u8 a_octets_received_ok_low[0x20]; u8 a_multicast_frames_xmitted_ok_high[0x20]; u8 a_multicast_frames_xmitted_ok_low[0x20]; u8 a_broadcast_frames_xmitted_ok_high[0x20]; u8 a_broadcast_frames_xmitted_ok_low[0x20]; u8 a_multicast_frames_received_ok_high[0x20]; u8 a_multicast_frames_received_ok_low[0x20]; u8 a_broadcast_frames_recieved_ok_high[0x20]; u8 a_broadcast_frames_recieved_ok_low[0x20]; u8 a_in_range_length_errors_high[0x20]; u8 a_in_range_length_errors_low[0x20]; u8 a_out_of_range_length_field_high[0x20]; u8 a_out_of_range_length_field_low[0x20]; u8 a_frame_too_long_errors_high[0x20]; u8 a_frame_too_long_errors_low[0x20]; u8 a_symbol_error_during_carrier_high[0x20]; u8 a_symbol_error_during_carrier_low[0x20]; u8 a_mac_control_frames_transmitted_high[0x20]; u8 a_mac_control_frames_transmitted_low[0x20]; u8 a_mac_control_frames_received_high[0x20]; u8 a_mac_control_frames_received_low[0x20]; u8 a_unsupported_opcodes_received_high[0x20]; u8 a_unsupported_opcodes_received_low[0x20]; u8 a_pause_mac_ctrl_frames_received_high[0x20]; u8 a_pause_mac_ctrl_frames_received_low[0x20]; u8 a_pause_mac_ctrl_frames_transmitted_high[0x20]; u8 a_pause_mac_ctrl_frames_transmitted_low[0x20]; u8 reserved_0[0x300]; }; struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits { u8 dot3stats_alignment_errors_high[0x20]; u8 dot3stats_alignment_errors_low[0x20]; u8 dot3stats_fcs_errors_high[0x20]; u8 dot3stats_fcs_errors_low[0x20]; u8 dot3stats_single_collision_frames_high[0x20]; u8 dot3stats_single_collision_frames_low[0x20]; u8 dot3stats_multiple_collision_frames_high[0x20]; u8 dot3stats_multiple_collision_frames_low[0x20]; u8 dot3stats_sqe_test_errors_high[0x20]; u8 dot3stats_sqe_test_errors_low[0x20]; u8 dot3stats_deferred_transmissions_high[0x20]; u8 dot3stats_deferred_transmissions_low[0x20]; u8 dot3stats_late_collisions_high[0x20]; u8 dot3stats_late_collisions_low[0x20]; u8 dot3stats_excessive_collisions_high[0x20]; u8 dot3stats_excessive_collisions_low[0x20]; u8 dot3stats_internal_mac_transmit_errors_high[0x20]; u8 dot3stats_internal_mac_transmit_errors_low[0x20]; u8 dot3stats_carrier_sense_errors_high[0x20]; u8 dot3stats_carrier_sense_errors_low[0x20]; u8 dot3stats_frame_too_longs_high[0x20]; u8 dot3stats_frame_too_longs_low[0x20]; u8 dot3stats_internal_mac_receive_errors_high[0x20]; u8 dot3stats_internal_mac_receive_errors_low[0x20]; u8 dot3stats_symbol_errors_high[0x20]; u8 dot3stats_symbol_errors_low[0x20]; u8 dot3control_in_unknown_opcodes_high[0x20]; u8 dot3control_in_unknown_opcodes_low[0x20]; u8 dot3in_pause_frames_high[0x20]; u8 dot3in_pause_frames_low[0x20]; u8 dot3out_pause_frames_high[0x20]; u8 dot3out_pause_frames_low[0x20]; u8 reserved_0[0x3c0]; }; struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits { u8 if_in_octets_high[0x20]; u8 if_in_octets_low[0x20]; u8 if_in_ucast_pkts_high[0x20]; u8 if_in_ucast_pkts_low[0x20]; u8 if_in_discards_high[0x20]; u8 if_in_discards_low[0x20]; u8 if_in_errors_high[0x20]; u8 if_in_errors_low[0x20]; u8 if_in_unknown_protos_high[0x20]; u8 if_in_unknown_protos_low[0x20]; u8 if_out_octets_high[0x20]; u8 if_out_octets_low[0x20]; u8 if_out_ucast_pkts_high[0x20]; u8 if_out_ucast_pkts_low[0x20]; u8 if_out_discards_high[0x20]; u8 if_out_discards_low[0x20]; u8 if_out_errors_high[0x20]; u8 if_out_errors_low[0x20]; u8 if_in_multicast_pkts_high[0x20]; u8 if_in_multicast_pkts_low[0x20]; u8 if_in_broadcast_pkts_high[0x20]; u8 if_in_broadcast_pkts_low[0x20]; u8 if_out_multicast_pkts_high[0x20]; u8 if_out_multicast_pkts_low[0x20]; u8 if_out_broadcast_pkts_high[0x20]; u8 if_out_broadcast_pkts_low[0x20]; u8 reserved_0[0x480]; }; struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits { u8 ether_stats_drop_events_high[0x20]; u8 ether_stats_drop_events_low[0x20]; u8 ether_stats_octets_high[0x20]; u8 ether_stats_octets_low[0x20]; u8 ether_stats_pkts_high[0x20]; u8 ether_stats_pkts_low[0x20]; u8 ether_stats_broadcast_pkts_high[0x20]; u8 ether_stats_broadcast_pkts_low[0x20]; u8 ether_stats_multicast_pkts_high[0x20]; u8 ether_stats_multicast_pkts_low[0x20]; u8 ether_stats_crc_align_errors_high[0x20]; u8 ether_stats_crc_align_errors_low[0x20]; u8 ether_stats_undersize_pkts_high[0x20]; u8 ether_stats_undersize_pkts_low[0x20]; u8 ether_stats_oversize_pkts_high[0x20]; u8 ether_stats_oversize_pkts_low[0x20]; u8 ether_stats_fragments_high[0x20]; u8 ether_stats_fragments_low[0x20]; u8 ether_stats_jabbers_high[0x20]; u8 ether_stats_jabbers_low[0x20]; u8 ether_stats_collisions_high[0x20]; u8 ether_stats_collisions_low[0x20]; u8 ether_stats_pkts64octets_high[0x20]; u8 ether_stats_pkts64octets_low[0x20]; u8 ether_stats_pkts65to127octets_high[0x20]; u8 ether_stats_pkts65to127octets_low[0x20]; u8 ether_stats_pkts128to255octets_high[0x20]; u8 ether_stats_pkts128to255octets_low[0x20]; u8 ether_stats_pkts256to511octets_high[0x20]; u8 ether_stats_pkts256to511octets_low[0x20]; u8 ether_stats_pkts512to1023octets_high[0x20]; u8 ether_stats_pkts512to1023octets_low[0x20]; u8 ether_stats_pkts1024to1518octets_high[0x20]; u8 ether_stats_pkts1024to1518octets_low[0x20]; u8 ether_stats_pkts1519to2047octets_high[0x20]; u8 ether_stats_pkts1519to2047octets_low[0x20]; u8 ether_stats_pkts2048to4095octets_high[0x20]; u8 ether_stats_pkts2048to4095octets_low[0x20]; u8 ether_stats_pkts4096to8191octets_high[0x20]; u8 ether_stats_pkts4096to8191octets_low[0x20]; u8 ether_stats_pkts8192to10239octets_high[0x20]; u8 ether_stats_pkts8192to10239octets_low[0x20]; u8 reserved_0[0x280]; }; struct mlx5_ifc_ib_portcntrs_attribute_grp_data_bits { u8 symbol_error_counter[0x10]; u8 link_error_recovery_counter[0x8]; u8 link_downed_counter[0x8]; u8 port_rcv_errors[0x10]; u8 port_rcv_remote_physical_errors[0x10]; u8 port_rcv_switch_relay_errors[0x10]; u8 port_xmit_discards[0x10]; u8 port_xmit_constraint_errors[0x8]; u8 port_rcv_constraint_errors[0x8]; u8 reserved_0[0x8]; u8 local_link_integrity_errors[0x4]; u8 excessive_buffer_overrun_errors[0x4]; u8 reserved_1[0x10]; u8 vl_15_dropped[0x10]; u8 port_xmit_data[0x20]; u8 port_rcv_data[0x20]; u8 port_xmit_pkts[0x20]; u8 port_rcv_pkts[0x20]; u8 port_xmit_wait[0x20]; u8 reserved_2[0x680]; }; struct mlx5_ifc_trc_tlb_reg_bits { u8 reserved_0[0x80]; u8 tlb_addr[0][0x40]; }; struct mlx5_ifc_trc_read_fifo_reg_bits { u8 reserved_0[0x10]; u8 requested_event_num[0x10]; u8 reserved_1[0x20]; u8 reserved_2[0x10]; u8 acual_event_num[0x10]; u8 reserved_3[0x20]; u8 event[0][0x40]; }; struct mlx5_ifc_trc_lock_reg_bits { u8 reserved_0[0x1f]; u8 lock[0x1]; u8 reserved_1[0x60]; }; struct mlx5_ifc_trc_filter_reg_bits { u8 status[0x1]; u8 reserved_0[0xf]; u8 filter_index[0x10]; u8 reserved_1[0x20]; u8 filter_val[0x20]; u8 reserved_2[0x1a0]; }; struct mlx5_ifc_trc_event_reg_bits { u8 status[0x1]; u8 reserved_0[0xf]; u8 event_index[0x10]; u8 reserved_1[0x20]; u8 event_id[0x20]; u8 event_selector_val[0x10]; u8 event_selector_size[0x10]; u8 reserved_2[0x180]; }; struct mlx5_ifc_trc_conf_reg_bits { u8 limit_en[0x1]; u8 reserved_0[0x3]; u8 dump_mode[0x4]; u8 reserved_1[0x15]; u8 state[0x3]; u8 reserved_2[0x20]; u8 limit_event_index[0x20]; u8 mkey[0x20]; u8 fifo_ready_ev_num[0x20]; u8 reserved_3[0x160]; }; struct mlx5_ifc_trc_cap_reg_bits { u8 reserved_0[0x18]; u8 dump_mode[0x8]; u8 reserved_1[0x20]; u8 num_of_events[0x10]; u8 num_of_filters[0x10]; u8 fifo_size[0x20]; u8 tlb_size[0x10]; u8 event_size[0x10]; u8 reserved_2[0x160]; }; struct mlx5_ifc_set_node_in_bits { u8 node_description[64][0x8]; }; struct mlx5_ifc_register_power_settings_bits { u8 reserved_0[0x18]; u8 power_settings_level[0x8]; u8 reserved_1[0x60]; }; struct mlx5_ifc_register_host_endianess_bits { u8 he[0x1]; u8 reserved_0[0x1f]; u8 reserved_1[0x60]; }; struct mlx5_ifc_register_diag_buffer_ctrl_bits { u8 physical_address[0x40]; }; struct mlx5_ifc_qtct_reg_bits { u8 operation_type[0x2]; u8 cap_local_admin[0x1]; u8 cap_remote_admin[0x1]; u8 reserved_0[0x4]; u8 port_number[0x8]; u8 reserved_1[0xd]; u8 prio[0x3]; u8 reserved_2[0x1d]; u8 tclass[0x3]; }; struct mlx5_ifc_qpdp_reg_bits { u8 reserved_0[0x8]; u8 port_number[0x8]; u8 reserved_1[0x10]; u8 reserved_2[0x1d]; u8 pprio[0x3]; }; struct mlx5_ifc_port_info_ro_fields_param_bits { u8 reserved_0[0x8]; u8 port[0x8]; u8 max_gid[0x10]; u8 reserved_1[0x20]; u8 port_guid[0x40]; }; struct mlx5_ifc_nvqc_reg_bits { u8 type[0x20]; u8 reserved_0[0x18]; u8 version[0x4]; u8 reserved_1[0x2]; u8 support_wr[0x1]; u8 support_rd[0x1]; }; struct mlx5_ifc_nvia_reg_bits { u8 reserved_0[0x1d]; u8 target[0x3]; u8 reserved_1[0x20]; }; struct mlx5_ifc_nvdi_reg_bits { struct mlx5_ifc_config_item_bits configuration_item_header; }; struct mlx5_ifc_nvda_reg_bits { struct mlx5_ifc_config_item_bits configuration_item_header; u8 configuration_item_data[0x20]; }; struct mlx5_ifc_node_info_ro_fields_param_bits { u8 system_image_guid[0x40]; u8 reserved_0[0x40]; u8 node_guid[0x40]; u8 reserved_1[0x10]; u8 max_pkey[0x10]; u8 reserved_2[0x20]; }; struct mlx5_ifc_ets_tcn_config_reg_bits { u8 g[0x1]; u8 b[0x1]; u8 r[0x1]; u8 reserved_0[0x9]; u8 group[0x4]; u8 reserved_1[0x9]; u8 bw_allocation[0x7]; u8 reserved_2[0xc]; u8 max_bw_units[0x4]; u8 reserved_3[0x8]; u8 max_bw_value[0x8]; }; struct mlx5_ifc_ets_global_config_reg_bits { u8 reserved_0[0x2]; u8 r[0x1]; u8 reserved_1[0x1d]; u8 reserved_2[0xc]; u8 max_bw_units[0x4]; u8 reserved_3[0x8]; u8 max_bw_value[0x8]; }; struct mlx5_ifc_qetc_reg_bits { u8 reserved_at_0[0x8]; u8 port_number[0x8]; u8 reserved_at_10[0x30]; struct mlx5_ifc_ets_tcn_config_reg_bits tc_configuration[0x8]; struct mlx5_ifc_ets_global_config_reg_bits global_configuration; }; struct mlx5_ifc_nodnic_mac_filters_bits { struct mlx5_ifc_mac_address_layout_bits mac_filter0; struct mlx5_ifc_mac_address_layout_bits mac_filter1; struct mlx5_ifc_mac_address_layout_bits mac_filter2; struct mlx5_ifc_mac_address_layout_bits mac_filter3; struct mlx5_ifc_mac_address_layout_bits mac_filter4; u8 reserved_0[0xc0]; }; struct mlx5_ifc_nodnic_gid_filters_bits { u8 mgid_filter0[16][0x8]; u8 mgid_filter1[16][0x8]; u8 mgid_filter2[16][0x8]; u8 mgid_filter3[16][0x8]; }; enum { MLX5_NODNIC_CONFIG_REG_NUM_PORTS_SINGLE_PORT = 0x0, MLX5_NODNIC_CONFIG_REG_NUM_PORTS_DUAL_PORT = 0x1, }; enum { MLX5_NODNIC_CONFIG_REG_CQE_FORMAT_LEGACY_CQE = 0x0, MLX5_NODNIC_CONFIG_REG_CQE_FORMAT_NEW_CQE = 0x1, }; struct mlx5_ifc_nodnic_config_reg_bits { u8 no_dram_nic_revision[0x8]; u8 hardware_format[0x8]; u8 support_receive_filter[0x1]; u8 support_promisc_filter[0x1]; u8 support_promisc_multicast_filter[0x1]; u8 reserved_0[0x2]; u8 log_working_buffer_size[0x3]; u8 log_pkey_table_size[0x4]; u8 reserved_1[0x3]; u8 num_ports[0x1]; u8 reserved_2[0x2]; u8 log_max_ring_size[0x6]; u8 reserved_3[0x18]; u8 lkey[0x20]; u8 cqe_format[0x4]; u8 reserved_4[0x1c]; u8 node_guid[0x40]; u8 reserved_5[0x740]; struct mlx5_ifc_nodnic_port_config_reg_bits port1_settings; struct mlx5_ifc_nodnic_port_config_reg_bits port2_settings; }; struct mlx5_ifc_vlan_layout_bits { u8 reserved_0[0x14]; u8 vlan[0xc]; u8 reserved_1[0x20]; }; struct mlx5_ifc_umr_pointer_desc_argument_bits { u8 reserved_0[0x20]; u8 mkey[0x20]; u8 addressh_63_32[0x20]; u8 addressl_31_0[0x20]; }; struct mlx5_ifc_ud_adrs_vector_bits { u8 dc_key[0x40]; u8 ext[0x1]; u8 reserved_0[0x7]; u8 destination_qp_dct[0x18]; u8 static_rate[0x4]; u8 sl_eth_prio[0x4]; u8 fl[0x1]; u8 mlid[0x7]; u8 rlid_udp_sport[0x10]; u8 reserved_1[0x20]; u8 rmac_47_16[0x20]; u8 rmac_15_0[0x10]; u8 tclass[0x8]; u8 hop_limit[0x8]; u8 reserved_2[0x1]; u8 grh[0x1]; u8 reserved_3[0x2]; u8 src_addr_index[0x8]; u8 flow_label[0x14]; u8 rgid_rip[16][0x8]; }; struct mlx5_ifc_port_module_event_bits { u8 reserved_0[0x8]; u8 module[0x8]; u8 reserved_1[0xc]; u8 module_status[0x4]; u8 reserved_2[0x14]; u8 error_type[0x4]; u8 reserved_3[0x8]; u8 reserved_4[0xa0]; }; struct mlx5_ifc_icmd_control_bits { u8 opcode[0x10]; u8 status[0x8]; u8 reserved_0[0x7]; u8 busy[0x1]; }; struct mlx5_ifc_eqe_bits { u8 reserved_0[0x8]; u8 event_type[0x8]; u8 reserved_1[0x8]; u8 event_sub_type[0x8]; u8 reserved_2[0xe0]; union mlx5_ifc_event_auto_bits event_data; u8 reserved_3[0x10]; u8 signature[0x8]; u8 reserved_4[0x7]; u8 owner[0x1]; }; enum { MLX5_CMD_QUEUE_ENTRY_TYPE_PCIE_CMD_IF_TRANSPORT = 0x7, }; struct mlx5_ifc_cmd_queue_entry_bits { u8 type[0x8]; u8 reserved_0[0x18]; u8 input_length[0x20]; u8 input_mailbox_pointer_63_32[0x20]; u8 input_mailbox_pointer_31_9[0x17]; u8 reserved_1[0x9]; u8 command_input_inline_data[16][0x8]; u8 command_output_inline_data[16][0x8]; u8 output_mailbox_pointer_63_32[0x20]; u8 output_mailbox_pointer_31_9[0x17]; u8 reserved_2[0x9]; u8 output_length[0x20]; u8 token[0x8]; u8 signature[0x8]; u8 reserved_3[0x8]; u8 status[0x7]; u8 ownership[0x1]; }; struct mlx5_ifc_cmd_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; u8 syndrome[0x20]; u8 command_output[0x20]; }; struct mlx5_ifc_cmd_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; u8 reserved_1[0x10]; u8 op_mod[0x10]; u8 command[0][0x20]; }; struct mlx5_ifc_cmd_if_box_bits { u8 mailbox_data[512][0x8]; u8 reserved_0[0x180]; u8 next_pointer_63_32[0x20]; u8 next_pointer_31_10[0x16]; u8 reserved_1[0xa]; u8 block_number[0x20]; u8 reserved_2[0x8]; u8 token[0x8]; u8 ctrl_signature[0x8]; u8 signature[0x8]; }; struct mlx5_ifc_mtt_bits { u8 ptag_63_32[0x20]; u8 ptag_31_8[0x18]; u8 reserved_0[0x6]; u8 wr_en[0x1]; u8 rd_en[0x1]; }; struct mlx5_ifc_tls_progress_params_bits { u8 valid[0x1]; u8 reserved_at_1[0x7]; u8 pd[0x18]; u8 next_record_tcp_sn[0x20]; u8 hw_resync_tcp_sn[0x20]; u8 record_tracker_state[0x2]; u8 auth_state[0x2]; u8 reserved_at_64[0x4]; u8 hw_offset_record_number[0x18]; }; struct mlx5_ifc_tls_static_params_bits { u8 const_2[0x2]; u8 tls_version[0x4]; u8 const_1[0x2]; u8 reserved_at_8[0x14]; u8 encryption_standard[0x4]; u8 reserved_at_20[0x20]; u8 initial_record_number[0x40]; u8 resync_tcp_sn[0x20]; u8 gcm_iv[0x20]; u8 implicit_iv[0x40]; u8 reserved_at_100[0x8]; u8 dek_index[0x18]; u8 reserved_at_120[0xe0]; }; /* Vendor Specific Capabilities, VSC */ enum { MLX5_VSC_DOMAIN_ICMD = 0x1, MLX5_VSC_DOMAIN_PROTECTED_CRSPACE = 0x6, MLX5_VSC_DOMAIN_SCAN_CRSPACE = 0x7, MLX5_VSC_DOMAIN_SEMAPHORES = 0xA, }; struct mlx5_ifc_vendor_specific_cap_bits { u8 type[0x8]; u8 length[0x8]; u8 next_pointer[0x8]; u8 capability_id[0x8]; u8 status[0x3]; u8 reserved_0[0xd]; u8 space[0x10]; u8 counter[0x20]; u8 semaphore[0x20]; u8 flag[0x1]; u8 reserved_1[0x1]; u8 address[0x1e]; u8 data[0x20]; }; struct mlx5_ifc_vsc_space_bits { u8 status[0x3]; u8 reserved0[0xd]; u8 space[0x10]; }; struct mlx5_ifc_vsc_addr_bits { u8 flag[0x1]; u8 reserved0[0x1]; u8 address[0x1e]; }; enum { MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER = 0x0, MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED = 0x1, MLX5_INITIAL_SEG_NIC_INTERFACE_NO_DRAM_NIC = 0x2, }; enum { MLX5_INITIAL_SEG_NIC_INTERFACE_SUPPORTED_FULL_DRIVER = 0x0, MLX5_INITIAL_SEG_NIC_INTERFACE_SUPPORTED_DISABLED = 0x1, MLX5_INITIAL_SEG_NIC_INTERFACE_SUPPORTED_NO_DRAM_NIC = 0x2, }; enum { MLX5_HEALTH_SYNDR_FW_ERR = 0x1, MLX5_HEALTH_SYNDR_IRISC_ERR = 0x7, MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR = 0x8, MLX5_HEALTH_SYNDR_CRC_ERR = 0x9, MLX5_HEALTH_SYNDR_FETCH_PCI_ERR = 0xa, MLX5_HEALTH_SYNDR_HW_FTL_ERR = 0xb, MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR = 0xc, MLX5_HEALTH_SYNDR_EQ_ERR = 0xd, MLX5_HEALTH_SYNDR_EQ_INV = 0xe, MLX5_HEALTH_SYNDR_FFSER_ERR = 0xf, MLX5_HEALTH_SYNDR_HIGH_TEMP = 0x10, }; struct mlx5_ifc_initial_seg_bits { u8 fw_rev_minor[0x10]; u8 fw_rev_major[0x10]; u8 cmd_interface_rev[0x10]; u8 fw_rev_subminor[0x10]; u8 reserved_0[0x40]; u8 cmdq_phy_addr_63_32[0x20]; u8 cmdq_phy_addr_31_12[0x14]; u8 reserved_1[0x2]; u8 nic_interface[0x2]; u8 log_cmdq_size[0x4]; u8 log_cmdq_stride[0x4]; u8 command_doorbell_vector[0x20]; u8 reserved_2[0xf00]; u8 initializing[0x1]; u8 reserved_3[0x4]; u8 nic_interface_supported[0x3]; u8 reserved_4[0x18]; struct mlx5_ifc_health_buffer_bits health_buffer; u8 no_dram_nic_offset[0x20]; u8 reserved_5[0x6de0]; u8 internal_timer_h[0x20]; u8 internal_timer_l[0x20]; u8 reserved_6[0x20]; u8 reserved_7[0x1f]; u8 clear_int[0x1]; u8 health_syndrome[0x8]; u8 health_counter[0x18]; u8 reserved_8[0x17fc0]; }; union mlx5_ifc_icmd_interface_document_bits { struct mlx5_ifc_fw_version_bits fw_version; struct mlx5_ifc_icmd_access_reg_in_bits icmd_access_reg_in; struct mlx5_ifc_icmd_access_reg_out_bits icmd_access_reg_out; struct mlx5_ifc_icmd_init_ocsd_in_bits icmd_init_ocsd_in; struct mlx5_ifc_icmd_ocbb_init_in_bits icmd_ocbb_init_in; struct mlx5_ifc_icmd_ocbb_query_etoc_stats_out_bits icmd_ocbb_query_etoc_stats_out; struct mlx5_ifc_icmd_ocbb_query_header_stats_out_bits icmd_ocbb_query_header_stats_out; struct mlx5_ifc_icmd_query_cap_general_bits icmd_query_cap_general; struct mlx5_ifc_icmd_query_cap_in_bits icmd_query_cap_in; struct mlx5_ifc_icmd_query_fw_info_out_bits icmd_query_fw_info_out; struct mlx5_ifc_icmd_query_virtual_mac_out_bits icmd_query_virtual_mac_out; struct mlx5_ifc_icmd_set_virtual_mac_in_bits icmd_set_virtual_mac_in; struct mlx5_ifc_icmd_set_wol_rol_in_bits icmd_set_wol_rol_in; struct mlx5_ifc_icmd_set_wol_rol_out_bits icmd_set_wol_rol_out; u8 reserved_0[0x42c0]; }; union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits eth_802_3_cntrs_grp_data_layout; struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits eth_2863_cntrs_grp_data_layout; struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout; struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits eth_3635_cntrs_grp_data_layout; struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout; struct mlx5_ifc_eth_discard_cntrs_grp_bits eth_discard_cntrs_grp; struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout; struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; struct mlx5_ifc_phys_layer_statistical_cntrs_bits phys_layer_statistical_cntrs; struct mlx5_ifc_infiniband_port_cntrs_bits infiniband_port_cntrs; u8 reserved_0[0x7c0]; }; struct mlx5_ifc_ppcnt_reg_bits { u8 swid[0x8]; u8 local_port[0x8]; u8 pnat[0x2]; u8 reserved_0[0x8]; u8 grp[0x6]; u8 clr[0x1]; u8 reserved_1[0x1c]; u8 prio_tc[0x3]; union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set; }; struct mlx5_ifc_pcie_lanes_counters_bits { u8 life_time_counter_high[0x20]; u8 life_time_counter_low[0x20]; u8 error_counter_lane0[0x20]; u8 error_counter_lane1[0x20]; u8 error_counter_lane2[0x20]; u8 error_counter_lane3[0x20]; u8 error_counter_lane4[0x20]; u8 error_counter_lane5[0x20]; u8 error_counter_lane6[0x20]; u8 error_counter_lane7[0x20]; u8 error_counter_lane8[0x20]; u8 error_counter_lane9[0x20]; u8 error_counter_lane10[0x20]; u8 error_counter_lane11[0x20]; u8 error_counter_lane12[0x20]; u8 error_counter_lane13[0x20]; u8 error_counter_lane14[0x20]; u8 error_counter_lane15[0x20]; u8 reserved_at_240[0x580]; }; struct mlx5_ifc_pcie_lanes_counters_ext_bits { u8 reserved_at_0[0x40]; u8 error_counter_lane0[0x20]; u8 error_counter_lane1[0x20]; u8 error_counter_lane2[0x20]; u8 error_counter_lane3[0x20]; u8 error_counter_lane4[0x20]; u8 error_counter_lane5[0x20]; u8 error_counter_lane6[0x20]; u8 error_counter_lane7[0x20]; u8 error_counter_lane8[0x20]; u8 error_counter_lane9[0x20]; u8 error_counter_lane10[0x20]; u8 error_counter_lane11[0x20]; u8 error_counter_lane12[0x20]; u8 error_counter_lane13[0x20]; u8 error_counter_lane14[0x20]; u8 error_counter_lane15[0x20]; u8 reserved_at_240[0x580]; }; struct mlx5_ifc_pcie_perf_counters_bits { u8 life_time_counter_high[0x20]; u8 life_time_counter_low[0x20]; u8 rx_errors[0x20]; u8 tx_errors[0x20]; u8 l0_to_recovery_eieos[0x20]; u8 l0_to_recovery_ts[0x20]; u8 l0_to_recovery_framing[0x20]; u8 l0_to_recovery_retrain[0x20]; u8 crc_error_dllp[0x20]; u8 crc_error_tlp[0x20]; u8 tx_overflow_buffer_pkt[0x40]; u8 outbound_stalled_reads[0x20]; u8 outbound_stalled_writes[0x20]; u8 outbound_stalled_reads_events[0x20]; u8 outbound_stalled_writes_events[0x20]; u8 tx_overflow_buffer_marked_pkt[0x40]; u8 reserved_at_240[0x580]; }; struct mlx5_ifc_pcie_perf_counters_ext_bits { u8 reserved_at_0[0x40]; u8 rx_errors[0x20]; u8 tx_errors[0x20]; u8 reserved_at_80[0xc0]; u8 tx_overflow_buffer_pkt[0x40]; u8 outbound_stalled_reads[0x20]; u8 outbound_stalled_writes[0x20]; u8 outbound_stalled_reads_events[0x20]; u8 outbound_stalled_writes_events[0x20]; u8 tx_overflow_buffer_marked_pkt[0x40]; u8 reserved_at_240[0x580]; }; struct mlx5_ifc_pcie_timers_states_bits { u8 life_time_counter_high[0x20]; u8 life_time_counter_low[0x20]; u8 time_to_boot_image_start[0x20]; u8 time_to_link_image[0x20]; u8 calibration_time[0x20]; u8 time_to_first_perst[0x20]; u8 time_to_detect_state[0x20]; u8 time_to_l0[0x20]; u8 time_to_crs_en[0x20]; u8 time_to_plastic_image_start[0x20]; u8 time_to_iron_image_start[0x20]; u8 perst_handler[0x20]; u8 times_in_l1[0x20]; u8 times_in_l23[0x20]; u8 dl_down[0x20]; u8 config_cycle1usec[0x20]; u8 config_cycle2to7usec[0x20]; u8 config_cycle8to15usec[0x20]; u8 config_cycle16to63usec[0x20]; u8 config_cycle64usec[0x20]; u8 correctable_err_msg_sent[0x20]; u8 non_fatal_err_msg_sent[0x20]; u8 fatal_err_msg_sent[0x20]; u8 reserved_at_2e0[0x4e0]; }; struct mlx5_ifc_pcie_timers_states_ext_bits { u8 reserved_at_0[0x40]; u8 time_to_boot_image_start[0x20]; u8 time_to_link_image[0x20]; u8 calibration_time[0x20]; u8 time_to_first_perst[0x20]; u8 time_to_detect_state[0x20]; u8 time_to_l0[0x20]; u8 time_to_crs_en[0x20]; u8 time_to_plastic_image_start[0x20]; u8 time_to_iron_image_start[0x20]; u8 perst_handler[0x20]; u8 times_in_l1[0x20]; u8 times_in_l23[0x20]; u8 dl_down[0x20]; u8 config_cycle1usec[0x20]; u8 config_cycle2to7usec[0x20]; u8 config_cycle8to15usec[0x20]; u8 config_cycle16to63usec[0x20]; u8 config_cycle64usec[0x20]; u8 correctable_err_msg_sent[0x20]; u8 non_fatal_err_msg_sent[0x20]; u8 fatal_err_msg_sent[0x20]; u8 reserved_at_2e0[0x4e0]; }; union mlx5_ifc_mpcnt_reg_counter_set_auto_bits { struct mlx5_ifc_pcie_perf_counters_bits pcie_perf_counters; struct mlx5_ifc_pcie_lanes_counters_bits pcie_lanes_counters; struct mlx5_ifc_pcie_timers_states_bits pcie_timers_states; u8 reserved_at_0[0x7c0]; }; union mlx5_ifc_mpcnt_reg_counter_set_auto_ext_bits { struct mlx5_ifc_pcie_perf_counters_ext_bits pcie_perf_counters_ext; struct mlx5_ifc_pcie_lanes_counters_ext_bits pcie_lanes_counters_ext; struct mlx5_ifc_pcie_timers_states_ext_bits pcie_timers_states_ext; u8 reserved_at_0[0x7c0]; }; struct mlx5_ifc_mpcnt_reg_bits { u8 reserved_at_0[0x2]; u8 depth[0x6]; u8 pcie_index[0x8]; u8 node[0x8]; u8 reserved_at_18[0x2]; u8 grp[0x6]; u8 clr[0x1]; u8 reserved_at_21[0x1f]; union mlx5_ifc_mpcnt_reg_counter_set_auto_bits counter_set; }; struct mlx5_ifc_mpcnt_reg_ext_bits { u8 reserved_at_0[0x2]; u8 depth[0x6]; u8 pcie_index[0x8]; u8 node[0x8]; u8 reserved_at_18[0x2]; u8 grp[0x6]; u8 clr[0x1]; u8 reserved_at_21[0x1f]; union mlx5_ifc_mpcnt_reg_counter_set_auto_ext_bits counter_set; }; struct mlx5_ifc_monitor_opcodes_layout_bits { u8 reserved_at_0[0x10]; u8 monitor_opcode[0x10]; }; union mlx5_ifc_pddr_status_opcode_bits { struct mlx5_ifc_monitor_opcodes_layout_bits monitor_opcodes; u8 reserved_at_0[0x20]; }; struct mlx5_ifc_troubleshooting_info_page_layout_bits { u8 reserved_at_0[0x10]; u8 group_opcode[0x10]; union mlx5_ifc_pddr_status_opcode_bits status_opcode; u8 user_feedback_data[0x10]; u8 user_feedback_index[0x10]; u8 status_message[0x760]; }; union mlx5_ifc_pddr_page_data_bits { struct mlx5_ifc_troubleshooting_info_page_layout_bits troubleshooting_info_page; struct mlx5_ifc_pddr_module_info_bits pddr_module_info; u8 reserved_at_0[0x7c0]; }; struct mlx5_ifc_pddr_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; u8 pnat[0x2]; u8 reserved_at_12[0xe]; u8 reserved_at_20[0x18]; u8 page_select[0x8]; union mlx5_ifc_pddr_page_data_bits page_data; }; enum { MLX5_ACCESS_REG_SUMMARY_CTRL_ID_MPEIN = 0x9050, MLX5_MPEIN_PWR_STATUS_INVALID = 0, MLX5_MPEIN_PWR_STATUS_SUFFICIENT = 1, MLX5_MPEIN_PWR_STATUS_INSUFFICIENT = 2, }; struct mlx5_ifc_mpein_reg_bits { u8 reserved_at_0[0x2]; u8 depth[0x6]; u8 pcie_index[0x8]; u8 node[0x8]; u8 reserved_at_18[0x8]; u8 capability_mask[0x20]; u8 reserved_at_40[0x8]; u8 link_width_enabled[0x8]; u8 link_speed_enabled[0x10]; u8 lane0_physical_position[0x8]; u8 link_width_active[0x8]; u8 link_speed_active[0x10]; u8 num_of_pfs[0x10]; u8 num_of_vfs[0x10]; u8 bdf0[0x10]; u8 reserved_at_b0[0x10]; u8 max_read_request_size[0x4]; u8 max_payload_size[0x4]; u8 reserved_at_c8[0x5]; u8 pwr_status[0x3]; u8 port_type[0x4]; u8 reserved_at_d4[0xb]; u8 lane_reversal[0x1]; u8 reserved_at_e0[0x14]; u8 pci_power[0xc]; u8 reserved_at_100[0x20]; u8 device_status[0x10]; u8 port_state[0x8]; u8 reserved_at_138[0x8]; u8 reserved_at_140[0x10]; u8 receiver_detect_result[0x10]; u8 reserved_at_160[0x20]; }; struct mlx5_ifc_mpein_reg_ext_bits { u8 reserved_at_0[0x2]; u8 depth[0x6]; u8 pcie_index[0x8]; u8 node[0x8]; u8 reserved_at_18[0x8]; u8 reserved_at_20[0x20]; u8 reserved_at_40[0x8]; u8 link_width_enabled[0x8]; u8 link_speed_enabled[0x10]; u8 lane0_physical_position[0x8]; u8 link_width_active[0x8]; u8 link_speed_active[0x10]; u8 num_of_pfs[0x10]; u8 num_of_vfs[0x10]; u8 bdf0[0x10]; u8 reserved_at_b0[0x10]; u8 max_read_request_size[0x4]; u8 max_payload_size[0x4]; u8 reserved_at_c8[0x5]; u8 pwr_status[0x3]; u8 port_type[0x4]; u8 reserved_at_d4[0xb]; u8 lane_reversal[0x1]; }; struct mlx5_ifc_mcqi_cap_bits { u8 supported_info_bitmask[0x20]; u8 component_size[0x20]; u8 max_component_size[0x20]; u8 log_mcda_word_size[0x4]; u8 reserved_at_64[0xc]; u8 mcda_max_write_size[0x10]; u8 rd_en[0x1]; u8 reserved_at_81[0x1]; u8 match_chip_id[0x1]; u8 match_psid[0x1]; u8 check_user_timestamp[0x1]; u8 match_base_guid_mac[0x1]; u8 reserved_at_86[0x1a]; }; struct mlx5_ifc_mcqi_reg_bits { u8 read_pending_component[0x1]; u8 reserved_at_1[0xf]; u8 component_index[0x10]; u8 reserved_at_20[0x20]; u8 reserved_at_40[0x1b]; u8 info_type[0x5]; u8 info_size[0x20]; u8 offset[0x20]; u8 reserved_at_a0[0x10]; u8 data_size[0x10]; u8 data[0][0x20]; }; struct mlx5_ifc_mcc_reg_bits { u8 reserved_at_0[0x4]; u8 time_elapsed_since_last_cmd[0xc]; u8 reserved_at_10[0x8]; u8 instruction[0x8]; u8 reserved_at_20[0x10]; u8 component_index[0x10]; u8 reserved_at_40[0x8]; u8 update_handle[0x18]; u8 handle_owner_type[0x4]; u8 handle_owner_host_id[0x4]; u8 reserved_at_68[0x1]; u8 control_progress[0x7]; u8 error_code[0x8]; u8 reserved_at_78[0x4]; u8 control_state[0x4]; u8 component_size[0x20]; u8 reserved_at_a0[0x60]; }; struct mlx5_ifc_mcda_reg_bits { u8 reserved_at_0[0x8]; u8 update_handle[0x18]; u8 offset[0x20]; u8 reserved_at_40[0x10]; u8 size[0x10]; u8 reserved_at_60[0x20]; u8 data[0][0x20]; }; union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_ib_portcntrs_attribute_grp_data_bits ib_portcntrs_attribute_grp_data; struct mlx5_ifc_bufferx_reg_bits bufferx_reg; struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout; struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits eth_2863_cntrs_grp_data_layout; struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits eth_3635_cntrs_grp_data_layout; struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits eth_802_3_cntrs_grp_data_layout; struct mlx5_ifc_eth_discard_cntrs_grp_bits eth_discard_cntrs_grp; struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout; struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout; struct mlx5_ifc_eth_per_traffic_class_cong_layout_bits eth_per_traffic_class_cong_layout; struct mlx5_ifc_eth_per_traffic_class_layout_bits eth_per_traffic_class_layout; struct mlx5_ifc_lane_2_module_mapping_bits lane_2_module_mapping; struct mlx5_ifc_link_level_retrans_cntr_grp_date_bits link_level_retrans_cntr_grp_date; struct mlx5_ifc_pamp_reg_bits pamp_reg; struct mlx5_ifc_paos_reg_bits paos_reg; struct mlx5_ifc_pbmc_reg_bits pbmc_reg; struct mlx5_ifc_pcap_reg_bits pcap_reg; struct mlx5_ifc_peir_reg_bits peir_reg; struct mlx5_ifc_pelc_reg_bits pelc_reg; struct mlx5_ifc_pfcc_reg_bits pfcc_reg; struct mlx5_ifc_phbr_binding_reg_bits phbr_binding_reg; struct mlx5_ifc_phbr_for_port_tclass_reg_bits phbr_for_port_tclass_reg; struct mlx5_ifc_phbr_for_prio_reg_bits phbr_for_prio_reg; struct mlx5_ifc_phrr_reg_bits phrr_reg; struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; struct mlx5_ifc_pifr_reg_bits pifr_reg; struct mlx5_ifc_pipg_reg_bits pipg_reg; struct mlx5_ifc_plbf_reg_bits plbf_reg; struct mlx5_ifc_plib_reg_bits plib_reg; struct mlx5_ifc_pll_status_data_bits pll_status_data; struct mlx5_ifc_plpc_reg_bits plpc_reg; struct mlx5_ifc_pmaos_reg_bits pmaos_reg; struct mlx5_ifc_pmlp_reg_bits pmlp_reg; struct mlx5_ifc_pmlpn_reg_bits pmlpn_reg; struct mlx5_ifc_pmpc_reg_bits pmpc_reg; struct mlx5_ifc_pmpe_reg_bits pmpe_reg; struct mlx5_ifc_pmpr_reg_bits pmpr_reg; struct mlx5_ifc_pmtu_reg_bits pmtu_reg; struct mlx5_ifc_ppad_reg_bits ppad_reg; struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg; struct mlx5_ifc_ppll_reg_bits ppll_reg; struct mlx5_ifc_pplm_reg_bits pplm_reg; struct mlx5_ifc_pplr_reg_bits pplr_reg; struct mlx5_ifc_ppsc_reg_bits ppsc_reg; struct mlx5_ifc_pspa_reg_bits pspa_reg; struct mlx5_ifc_ptas_reg_bits ptas_reg; struct mlx5_ifc_ptys_reg_bits ptys_reg; struct mlx5_ifc_pude_reg_bits pude_reg; struct mlx5_ifc_pvlc_reg_bits pvlc_reg; struct mlx5_ifc_slrg_reg_bits slrg_reg; struct mlx5_ifc_slrp_reg_bits slrp_reg; struct mlx5_ifc_sltp_reg_bits sltp_reg; u8 reserved_0[0x7880]; }; union mlx5_ifc_debug_enhancements_document_bits { struct mlx5_ifc_health_buffer_bits health_buffer; u8 reserved_0[0x200]; }; union mlx5_ifc_no_dram_nic_document_bits { struct mlx5_ifc_nodnic_config_reg_bits nodnic_config_reg; struct mlx5_ifc_nodnic_cq_arming_word_bits nodnic_cq_arming_word; struct mlx5_ifc_nodnic_event_word_bits nodnic_event_word; struct mlx5_ifc_nodnic_gid_filters_bits nodnic_gid_filters; struct mlx5_ifc_nodnic_mac_filters_bits nodnic_mac_filters; struct mlx5_ifc_nodnic_port_config_reg_bits nodnic_port_config_reg; struct mlx5_ifc_nodnic_ring_config_reg_bits nodnic_ring_config_reg; struct mlx5_ifc_nodnic_ring_doorbell_bits nodnic_ring_doorbell; u8 reserved_0[0x3160]; }; union mlx5_ifc_uplink_pci_interface_document_bits { struct mlx5_ifc_initial_seg_bits initial_seg; struct mlx5_ifc_vendor_specific_cap_bits vendor_specific_cap; u8 reserved_0[0x20120]; }; struct mlx5_ifc_qpdpm_dscp_reg_bits { u8 e[0x1]; u8 reserved_at_01[0x0b]; u8 prio[0x04]; }; struct mlx5_ifc_qpdpm_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; u8 reserved_at_10[0x10]; struct mlx5_ifc_qpdpm_dscp_reg_bits dscp[64]; }; struct mlx5_ifc_qpts_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; u8 reserved_at_10[0x2d]; u8 trust_state[0x3]; }; struct mlx5_ifc_mfrl_reg_bits { u8 reserved_at_0[0x38]; u8 reset_level[0x8]; }; enum { MLX5_ACCESS_REG_SUMMARY_CTRL_ID_MTCAP = 0x9009, MLX5_ACCESS_REG_SUMMARY_CTRL_ID_MTECR = 0x9109, MLX5_ACCESS_REG_SUMMARY_CTRL_ID_MTMP = 0x900a, MLX5_ACCESS_REG_SUMMARY_CTRL_ID_MTWE = 0x900b, MLX5_ACCESS_REG_SUMMARY_CTRL_ID_MTBR = 0x900f, MLX5_ACCESS_REG_SUMMARY_CTRL_ID_MTEWE = 0x910b, MLX5_MAX_TEMPERATURE = 16, }; struct mlx5_ifc_mtbr_temp_record_bits { u8 max_temperature[0x10]; u8 temperature[0x10]; }; struct mlx5_ifc_mtbr_reg_bits { u8 reserved_at_0[0x14]; u8 base_sensor_index[0xc]; u8 reserved_at_20[0x18]; u8 num_rec[0x8]; u8 reserved_at_40[0x40]; struct mlx5_ifc_mtbr_temp_record_bits temperature_record[MLX5_MAX_TEMPERATURE]; }; struct mlx5_ifc_mtbr_reg_ext_bits { u8 reserved_at_0[0x14]; u8 base_sensor_index[0xc]; u8 reserved_at_20[0x18]; u8 num_rec[0x8]; u8 reserved_at_40[0x40]; struct mlx5_ifc_mtbr_temp_record_bits temperature_record[MLX5_MAX_TEMPERATURE]; }; struct mlx5_ifc_mtcap_bits { u8 reserved_at_0[0x19]; u8 sensor_count[0x7]; u8 reserved_at_20[0x19]; u8 internal_sensor_count[0x7]; u8 sensor_map[0x40]; }; struct mlx5_ifc_mtcap_ext_bits { u8 reserved_at_0[0x19]; u8 sensor_count[0x7]; u8 reserved_at_20[0x20]; u8 sensor_map[0x40]; }; struct mlx5_ifc_mtecr_bits { u8 reserved_at_0[0x4]; u8 last_sensor[0xc]; u8 reserved_at_10[0x4]; u8 sensor_count[0xc]; u8 reserved_at_20[0x19]; u8 internal_sensor_count[0x7]; u8 sensor_map_0[0x20]; u8 reserved_at_60[0x2a0]; }; struct mlx5_ifc_mtecr_ext_bits { u8 reserved_at_0[0x4]; u8 last_sensor[0xc]; u8 reserved_at_10[0x4]; u8 sensor_count[0xc]; u8 reserved_at_20[0x20]; u8 sensor_map_0[0x20]; u8 reserved_at_60[0x2a0]; }; struct mlx5_ifc_mtewe_bits { u8 reserved_at_0[0x4]; u8 last_sensor[0xc]; u8 reserved_at_10[0x4]; u8 sensor_count[0xc]; u8 sensor_warning_0[0x20]; u8 reserved_at_40[0x2a0]; }; struct mlx5_ifc_mtewe_ext_bits { u8 reserved_at_0[0x4]; u8 last_sensor[0xc]; u8 reserved_at_10[0x4]; u8 sensor_count[0xc]; u8 sensor_warning_0[0x20]; u8 reserved_at_40[0x2a0]; }; struct mlx5_ifc_mtmp_bits { u8 reserved_at_0[0x14]; u8 sensor_index[0xc]; u8 reserved_at_20[0x10]; u8 temperature[0x10]; u8 mte[0x1]; u8 mtr[0x1]; u8 reserved_at_42[0xe]; u8 max_temperature[0x10]; u8 tee[0x2]; u8 reserved_at_62[0xe]; u8 temperature_threshold_hi[0x10]; u8 reserved_at_80[0x10]; u8 temperature_threshold_lo[0x10]; u8 reserved_at_a0[0x20]; u8 sensor_name_hi[0x20]; u8 sensor_name_lo[0x20]; }; struct mlx5_ifc_mtmp_ext_bits { u8 reserved_at_0[0x14]; u8 sensor_index[0xc]; u8 reserved_at_20[0x10]; u8 temperature[0x10]; u8 mte[0x1]; u8 mtr[0x1]; u8 reserved_at_42[0xe]; u8 max_temperature[0x10]; u8 tee[0x2]; u8 reserved_at_62[0xe]; u8 temperature_threshold_hi[0x10]; u8 reserved_at_80[0x10]; u8 temperature_threshold_lo[0x10]; u8 reserved_at_a0[0x20]; u8 sensor_name_hi[0x20]; u8 sensor_name_lo[0x20]; }; #endif /* MLX5_IFC_H */ diff --git a/sys/ofed/include/uapi/rdma/mlx5-abi.h b/sys/ofed/include/uapi/rdma/mlx5-abi.h index 3ce4afeea603..abbab91c8aff 100644 --- a/sys/ofed/include/uapi/rdma/mlx5-abi.h +++ b/sys/ofed/include/uapi/rdma/mlx5-abi.h @@ -1,264 +1,284 @@ /*- * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 * * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * $FreeBSD$ */ #ifndef MLX5_ABI_USER_H #define MLX5_ABI_USER_H #ifdef _KERNEL #include #else #include #endif enum { MLX5_QP_FLAG_SIGNATURE = 1 << 0, MLX5_QP_FLAG_SCATTER_CQE = 1 << 1, + MLX5_QP_FLAG_BFREG_INDEX = 1 << 3, + MLX5_QP_FLAG_UAR_PAGE_INDEX = 1 << 10, }; enum { MLX5_SRQ_FLAG_SIGNATURE = 1 << 0, }; enum { MLX5_WQ_FLAG_SIGNATURE = 1 << 0, }; /* Increment this value if any changes that break userspace ABI * compatibility are made. */ #define MLX5_IB_UVERBS_ABI_VERSION 1 /* Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to * avoid incompatibility between 32-bit userspace and 64-bit kernels). * In particular do not use pointer types -- pass pointers in __u64 * instead. */ struct mlx5_ib_alloc_ucontext_req { - __u32 total_num_uuars; - __u32 num_low_latency_uuars; + __u32 total_num_bfregs; + __u32 num_low_latency_bfregs; }; +enum mlx5_lib_caps { + MLX5_LIB_CAP_4K_UAR = (__u64)1 << 0, + MLX5_LIB_CAP_DYN_UAR = (__u64)1 << 1, +}; + +enum mlx5_ib_alloc_uctx_v2_flags { + MLX5_IB_ALLOC_UCTX_DEVX = 1 << 0, +}; struct mlx5_ib_alloc_ucontext_req_v2 { - __u32 total_num_uuars; - __u32 num_low_latency_uuars; + __u32 total_num_bfregs; + __u32 num_low_latency_bfregs; __u32 flags; __u32 comp_mask; __u8 max_cqe_version; __u8 reserved0; __u16 reserved1; __u32 reserved2; + __aligned_u64 lib_caps; }; enum mlx5_ib_alloc_ucontext_resp_mask { MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0, }; enum mlx5_user_cmds_supp_uhw { MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0, MLX5_USER_CMDS_SUPP_UHW_CREATE_AH = 1 << 1, }; struct mlx5_ib_alloc_ucontext_resp { __u32 qp_tab_size; __u32 bf_reg_size; - __u32 tot_uuars; + __u32 tot_bfregs; __u32 cache_line_size; __u16 max_sq_desc_sz; __u16 max_rq_desc_sz; __u32 max_send_wqebb; __u32 max_recv_wr; __u32 max_srq_recv_wr; __u16 num_ports; __u16 reserved1; __u32 comp_mask; __u32 response_length; __u8 cqe_version; __u8 cmds_supp_uhw; __u16 reserved2; __u64 hca_core_clock_offset; + __u32 log_uar_size; + __u32 num_uars_per_page; + __u32 num_dyn_bfregs; }; struct mlx5_ib_alloc_pd_resp { __u32 pdn; }; struct mlx5_ib_tso_caps { __u32 max_tso; /* Maximum tso payload size in bytes */ /* Corresponding bit will be set if qp type from * 'enum ib_qp_type' is supported, e.g. * supported_qpts |= 1 << IB_QPT_UD */ __u32 supported_qpts; }; struct mlx5_ib_rss_caps { __u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */ __u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */ __u8 reserved[7]; }; struct mlx5_ib_query_device_resp { __u32 comp_mask; __u32 response_length; struct mlx5_ib_tso_caps tso_caps; struct mlx5_ib_rss_caps rss_caps; }; +enum mlx5_ib_create_cq_flags { + MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD = 1 << 0, + MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX = 1 << 1, +}; + struct mlx5_ib_create_cq { __u64 buf_addr; __u64 db_addr; __u32 cqe_size; - __u32 reserved; /* explicit padding (optional on i386) */ + __u16 flags; + __u16 uar_page_index; }; struct mlx5_ib_create_cq_resp { __u32 cqn; __u32 reserved; }; struct mlx5_ib_resize_cq { __u64 buf_addr; __u16 cqe_size; __u16 reserved0; __u32 reserved1; }; struct mlx5_ib_create_srq { __u64 buf_addr; __u64 db_addr; __u32 flags; __u32 reserved0; /* explicit padding (optional on i386) */ __u32 uidx; __u32 reserved1; }; struct mlx5_ib_create_srq_resp { __u32 srqn; __u32 reserved; }; struct mlx5_ib_create_qp { __u64 buf_addr; __u64 db_addr; __u32 sq_wqe_count; __u32 rq_wqe_count; __u32 rq_wqe_shift; __u32 flags; __u32 uidx; - __u32 reserved0; + __u32 bfreg_index; __u64 sq_buf_addr; }; /* RX Hash function flags */ enum mlx5_rx_hash_function_flags { MLX5_RX_HASH_FUNC_TOEPLITZ = 1 << 0, }; /* * RX Hash flags, these flags allows to set which incoming packet's field should * participates in RX Hash. Each flag represent certain packet's field, * when the flag is set the field that is represented by the flag will * participate in RX Hash calculation. * Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP * and *TCP and *UDP flags can't be enabled together on the same QP. */ enum mlx5_rx_hash_fields { MLX5_RX_HASH_SRC_IPV4 = 1 << 0, MLX5_RX_HASH_DST_IPV4 = 1 << 1, MLX5_RX_HASH_SRC_IPV6 = 1 << 2, MLX5_RX_HASH_DST_IPV6 = 1 << 3, MLX5_RX_HASH_SRC_PORT_TCP = 1 << 4, MLX5_RX_HASH_DST_PORT_TCP = 1 << 5, MLX5_RX_HASH_SRC_PORT_UDP = 1 << 6, MLX5_RX_HASH_DST_PORT_UDP = 1 << 7 }; struct mlx5_ib_create_qp_rss { __u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */ __u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */ __u8 rx_key_len; /* valid only for Toeplitz */ __u8 reserved[6]; __u8 rx_hash_key[128]; /* valid only for Toeplitz */ __u32 comp_mask; __u32 reserved1; }; struct mlx5_ib_create_qp_resp { - __u32 uuar_index; + __u32 bfreg_index; }; struct mlx5_ib_alloc_mw { __u32 comp_mask; __u8 num_klms; __u8 reserved1; __u16 reserved2; }; struct mlx5_ib_create_wq { __u64 buf_addr; __u64 db_addr; __u32 rq_wqe_count; __u32 rq_wqe_shift; __u32 user_index; __u32 flags; __u32 comp_mask; __u32 reserved; }; struct mlx5_ib_create_ah_resp { __u32 response_length; __u8 dmac[ETH_ALEN]; __u8 reserved[6]; }; struct mlx5_ib_create_wq_resp { __u32 response_length; __u32 reserved; }; struct mlx5_ib_create_rwq_ind_tbl_resp { __u32 response_length; __u32 reserved; }; struct mlx5_ib_modify_wq { __u32 comp_mask; __u32 reserved; }; #endif /* MLX5_ABI_USER_H */