diff --git a/sys/dev/mlx5/device.h b/sys/dev/mlx5/device.h index 64d4ed87d58f..e59fb6771d83 100644 --- a/sys/dev/mlx5/device.h +++ b/sys/dev/mlx5/device.h @@ -1,1263 +1,1268 @@ /*- * Copyright (c) 2013-2019, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef MLX5_DEVICE_H #define MLX5_DEVICE_H #include #include #include #define FW_INIT_TIMEOUT_MILI 2000 #define FW_INIT_WAIT_MS 2 #define FW_PRE_INIT_TIMEOUT_MILI 120000 #define FW_INIT_WARN_MESSAGE_INTERVAL 20000 #if defined(__LITTLE_ENDIAN) #define MLX5_SET_HOST_ENDIANNESS 0 #elif defined(__BIG_ENDIAN) #define MLX5_SET_HOST_ENDIANNESS 0x80 #else #error Host endianness not defined #endif /* helper macros */ #define __mlx5_nullp(typ) ((struct mlx5_ifc_##typ##_bits *)0) #define __mlx5_bit_sz(typ, fld) sizeof(__mlx5_nullp(typ)->fld) #define __mlx5_bit_off(typ, fld) __offsetof(struct mlx5_ifc_##typ##_bits, fld) #define __mlx5_16_off(typ, fld) (__mlx5_bit_off(typ, fld) / 16) #define __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32) #define __mlx5_64_off(typ, fld) (__mlx5_bit_off(typ, fld) / 64) #define __mlx5_16_bit_off(typ, fld) (16 - __mlx5_bit_sz(typ, fld) - (__mlx5_bit_off(typ, fld) & 0xf)) #define __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - (__mlx5_bit_off(typ, fld) & 0x1f)) #define __mlx5_mask(typ, fld) ((u32)((1ull << __mlx5_bit_sz(typ, fld)) - 1)) #define __mlx5_dw_mask(typ, fld) (__mlx5_mask(typ, fld) << __mlx5_dw_bit_off(typ, fld)) #define __mlx5_mask16(typ, fld) ((u16)((1ull << __mlx5_bit_sz(typ, fld)) - 1)) #define __mlx5_16_mask(typ, fld) (__mlx5_mask16(typ, fld) << __mlx5_16_bit_off(typ, fld)) #define __mlx5_st_sz_bits(typ) sizeof(struct mlx5_ifc_##typ##_bits) #define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8) #define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8) #define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32) #define MLX5_ST_SZ_QW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 64) #define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8) #define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32) #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8) #define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld)) /* insert a value to a struct */ #define MLX5_SET(typ, p, fld, v) do { \ BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32); \ BUILD_BUG_ON(__mlx5_bit_sz(typ, fld) > 32); \ *((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \ cpu_to_be32((be32_to_cpu(*((__be32 *)(p) + __mlx5_dw_off(typ, fld))) & \ (~__mlx5_dw_mask(typ, fld))) | (((v) & __mlx5_mask(typ, fld)) \ << __mlx5_dw_bit_off(typ, fld))); \ } while (0) #define MLX5_SET_TO_ONES(typ, p, fld) do { \ BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32); \ BUILD_BUG_ON(__mlx5_bit_sz(typ, fld) > 32); \ *((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \ cpu_to_be32((be32_to_cpu(*((__be32 *)(p) + __mlx5_dw_off(typ, fld))) & \ (~__mlx5_dw_mask(typ, fld))) | ((__mlx5_mask(typ, fld)) \ << __mlx5_dw_bit_off(typ, fld))); \ } while (0) #define MLX5_GET(typ, p, fld) ((be32_to_cpu(*((__be32 *)(p) +\ __mlx5_dw_off(typ, fld))) >> __mlx5_dw_bit_off(typ, fld)) & \ __mlx5_mask(typ, fld)) #define MLX5_GET_PR(typ, p, fld) ({ \ u32 ___t = MLX5_GET(typ, p, fld); \ pr_debug(#fld " = 0x%x\n", ___t); \ ___t; \ }) #define __MLX5_SET64(typ, p, fld, v) do { \ BUILD_BUG_ON(__mlx5_bit_sz(typ, fld) != 64); \ *((__be64 *)(p) + __mlx5_64_off(typ, fld)) = cpu_to_be64(v); \ } while (0) #define MLX5_SET64(typ, p, fld, v) do { \ BUILD_BUG_ON(__mlx5_bit_off(typ, fld) % 64); \ __MLX5_SET64(typ, p, fld, v); \ } while (0) #define MLX5_ARRAY_SET64(typ, p, fld, idx, v) do { \ BUILD_BUG_ON(__mlx5_bit_off(typ, fld) % 64); \ __MLX5_SET64(typ, p, fld[idx], v); \ } while (0) #define MLX5_GET64(typ, p, fld) be64_to_cpu(*((__be64 *)(p) + __mlx5_64_off(typ, fld))) #define MLX5_GET16(typ, p, fld) ((be16_to_cpu(*((__be16 *)(p) +\ __mlx5_16_off(typ, fld))) >> __mlx5_16_bit_off(typ, fld)) & \ __mlx5_mask16(typ, fld)) #define MLX5_SET16(typ, p, fld, v) do { \ u16 _v = v; \ BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 16); \ *((__be16 *)(p) + __mlx5_16_off(typ, fld)) = \ cpu_to_be16((be16_to_cpu(*((__be16 *)(p) + __mlx5_16_off(typ, fld))) & \ (~__mlx5_16_mask(typ, fld))) | (((_v) & __mlx5_mask16(typ, fld)) \ << __mlx5_16_bit_off(typ, fld))); \ } while (0) #define MLX5_GET64_BE(typ, p, fld) (*((__be64 *)(p) +\ __mlx5_64_off(typ, fld))) #define MLX5_GET_BE(type_t, typ, p, fld) ({ \ type_t tmp; \ switch (sizeof(tmp)) { \ case sizeof(u8): \ tmp = (__force type_t)MLX5_GET(typ, p, fld); \ break; \ case sizeof(u16): \ tmp = (__force type_t)cpu_to_be16(MLX5_GET(typ, p, fld)); \ break; \ case sizeof(u32): \ tmp = (__force type_t)cpu_to_be32(MLX5_GET(typ, p, fld)); \ break; \ case sizeof(u64): \ tmp = (__force type_t)MLX5_GET64_BE(typ, p, fld); \ break; \ } \ tmp; \ }) #define MLX5_BY_PASS_NUM_REGULAR_PRIOS 8 #define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 8 #define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1 #define MLX5_BY_PASS_NUM_PRIOS (MLX5_BY_PASS_NUM_REGULAR_PRIOS +\ MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS +\ MLX5_BY_PASS_NUM_MULTICAST_PRIOS) /* insert a value to a struct */ #define MLX5_VSC_SET(typ, p, fld, v) do { \ BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32); \ BUILD_BUG_ON(__mlx5_bit_sz(typ, fld) > 32); \ *((__le32 *)(p) + __mlx5_dw_off(typ, fld)) = \ cpu_to_le32((le32_to_cpu(*((__le32 *)(p) + __mlx5_dw_off(typ, fld))) & \ (~__mlx5_dw_mask(typ, fld))) | (((v) & __mlx5_mask(typ, fld)) \ << __mlx5_dw_bit_off(typ, fld))); \ } while (0) #define MLX5_VSC_GET(typ, p, fld) ((le32_to_cpu(*((__le32 *)(p) +\ __mlx5_dw_off(typ, fld))) >> __mlx5_dw_bit_off(typ, fld)) & \ __mlx5_mask(typ, fld)) #define MLX5_VSC_GET_PR(typ, p, fld) ({ \ u32 ___t = MLX5_VSC_GET(typ, p, fld); \ pr_debug(#fld " = 0x%x\n", ___t); \ ___t; \ }) enum { MLX5_MAX_COMMANDS = 32, MLX5_CMD_DATA_BLOCK_SIZE = 512, MLX5_CMD_MBOX_SIZE = 1024, MLX5_PCI_CMD_XPORT = 7, MLX5_MKEY_BSF_OCTO_SIZE = 4, MLX5_MAX_PSVS = 4, }; enum { MLX5_EXTENDED_UD_AV = 0x80000000, }; enum { MLX5_CQ_FLAGS_OI = 2, }; enum { MLX5_STAT_RATE_OFFSET = 5, }; enum { MLX5_INLINE_SEG = 0x80000000, }; enum { MLX5_HW_START_PADDING = MLX5_INLINE_SEG, }; enum { MLX5_MIN_PKEY_TABLE_SIZE = 128, MLX5_MAX_LOG_PKEY_TABLE = 5, }; enum { MLX5_MKEY_INBOX_PG_ACCESS = 1U << 31 }; enum { MLX5_PERM_LOCAL_READ = 1 << 2, MLX5_PERM_LOCAL_WRITE = 1 << 3, MLX5_PERM_REMOTE_READ = 1 << 4, MLX5_PERM_REMOTE_WRITE = 1 << 5, MLX5_PERM_ATOMIC = 1 << 6, MLX5_PERM_UMR_EN = 1 << 7, }; enum { MLX5_PCIE_CTRL_SMALL_FENCE = 1 << 0, MLX5_PCIE_CTRL_RELAXED_ORDERING = 1 << 2, MLX5_PCIE_CTRL_NO_SNOOP = 1 << 3, MLX5_PCIE_CTRL_TLP_PROCE_EN = 1 << 6, MLX5_PCIE_CTRL_TPH_MASK = 3 << 4, }; enum { MLX5_MKEY_REMOTE_INVAL = 1 << 24, MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29, MLX5_MKEY_BSF_EN = 1 << 30, MLX5_MKEY_LEN64 = 1U << 31, }; enum { MLX5_EN_RD = (u64)1, MLX5_EN_WR = (u64)2 }; enum { MLX5_ADAPTER_PAGE_SHIFT = 12, MLX5_ADAPTER_PAGE_SIZE = 1 << MLX5_ADAPTER_PAGE_SHIFT, }; enum { MLX5_BFREGS_PER_UAR = 4, MLX5_MAX_UARS = 1 << 8, MLX5_NON_FP_BFREGS_PER_UAR = 2, MLX5_FP_BFREGS_PER_UAR = MLX5_BFREGS_PER_UAR - MLX5_NON_FP_BFREGS_PER_UAR, MLX5_MAX_BFREGS = MLX5_MAX_UARS * MLX5_NON_FP_BFREGS_PER_UAR, MLX5_UARS_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE, MLX5_NON_FP_BFREGS_IN_PAGE = MLX5_NON_FP_BFREGS_PER_UAR * MLX5_UARS_IN_PAGE, MLX5_MIN_DYN_BFREGS = 512, MLX5_MAX_DYN_BFREGS = 1024, }; enum { MLX5_MKEY_MASK_LEN = 1ull << 0, MLX5_MKEY_MASK_PAGE_SIZE = 1ull << 1, MLX5_MKEY_MASK_START_ADDR = 1ull << 6, MLX5_MKEY_MASK_PD = 1ull << 7, MLX5_MKEY_MASK_EN_RINVAL = 1ull << 8, MLX5_MKEY_MASK_EN_SIGERR = 1ull << 9, MLX5_MKEY_MASK_BSF_EN = 1ull << 12, MLX5_MKEY_MASK_KEY = 1ull << 13, MLX5_MKEY_MASK_QPN = 1ull << 14, MLX5_MKEY_MASK_LR = 1ull << 17, MLX5_MKEY_MASK_LW = 1ull << 18, MLX5_MKEY_MASK_RR = 1ull << 19, MLX5_MKEY_MASK_RW = 1ull << 20, MLX5_MKEY_MASK_A = 1ull << 21, MLX5_MKEY_MASK_SMALL_FENCE = 1ull << 23, MLX5_MKEY_MASK_FREE = 1ull << 29, }; enum { MLX5_UMR_TRANSLATION_OFFSET_EN = (1 << 4), MLX5_UMR_CHECK_NOT_FREE = (1 << 5), MLX5_UMR_CHECK_FREE = (2 << 5), MLX5_UMR_INLINE = (1 << 7), }; #define MLX5_UMR_MTT_ALIGNMENT 0x40 #define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1) #define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT enum { MLX5_EVENT_QUEUE_TYPE_QP = 0, MLX5_EVENT_QUEUE_TYPE_RQ = 1, MLX5_EVENT_QUEUE_TYPE_SQ = 2, }; enum { MLX5_PORT_CHANGE_SUBTYPE_DOWN = 1, MLX5_PORT_CHANGE_SUBTYPE_ACTIVE = 4, MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED = 5, MLX5_PORT_CHANGE_SUBTYPE_LID = 6, MLX5_PORT_CHANGE_SUBTYPE_PKEY = 7, MLX5_PORT_CHANGE_SUBTYPE_GUID = 8, MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG = 9, }; enum { MLX5_DCBX_EVENT_SUBTYPE_ERROR_STATE_DCBX = 1, MLX5_DCBX_EVENT_SUBTYPE_REMOTE_CONFIG_CHANGE, MLX5_DCBX_EVENT_SUBTYPE_LOCAL_OPER_CHANGE, MLX5_DCBX_EVENT_SUBTYPE_REMOTE_CONFIG_APP_PRIORITY_CHANGE, MLX5_MAX_INLINE_RECEIVE_SIZE = 64 }; enum { MLX5_DEV_CAP_FLAG_XRC = 1LL << 3, MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8, MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9, MLX5_DEV_CAP_FLAG_APM = 1LL << 17, MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD = 1LL << 21, MLX5_DEV_CAP_FLAG_BLOCK_MCAST = 1LL << 23, MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29, MLX5_DEV_CAP_FLAG_RESIZE_CQ = 1LL << 30, MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 33, MLX5_DEV_CAP_FLAG_ROCE = 1LL << 34, MLX5_DEV_CAP_FLAG_DCT = 1LL << 37, MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40, MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 3LL << 46, MLX5_DEV_CAP_FLAG_DRAIN_SIGERR = 1LL << 48, }; enum { MLX5_ROCE_VERSION_1 = 0, MLX5_ROCE_VERSION_1_5 = 1, MLX5_ROCE_VERSION_2 = 2, }; enum { MLX5_ROCE_VERSION_1_CAP = 1 << MLX5_ROCE_VERSION_1, MLX5_ROCE_VERSION_1_5_CAP = 1 << MLX5_ROCE_VERSION_1_5, MLX5_ROCE_VERSION_2_CAP = 1 << MLX5_ROCE_VERSION_2, }; enum { MLX5_ROCE_L3_TYPE_IPV4 = 0, MLX5_ROCE_L3_TYPE_IPV6 = 1, }; enum { MLX5_ROCE_L3_TYPE_IPV4_CAP = 1 << 1, MLX5_ROCE_L3_TYPE_IPV6_CAP = 1 << 2, }; enum { MLX5_OPCODE_NOP = 0x00, MLX5_OPCODE_SEND_INVAL = 0x01, MLX5_OPCODE_RDMA_WRITE = 0x08, MLX5_OPCODE_RDMA_WRITE_IMM = 0x09, MLX5_OPCODE_SEND = 0x0a, MLX5_OPCODE_SEND_IMM = 0x0b, MLX5_OPCODE_LSO = 0x0e, MLX5_OPCODE_RDMA_READ = 0x10, MLX5_OPCODE_ATOMIC_CS = 0x11, MLX5_OPCODE_ATOMIC_FA = 0x12, MLX5_OPCODE_ATOMIC_MASKED_CS = 0x14, MLX5_OPCODE_ATOMIC_MASKED_FA = 0x15, MLX5_OPCODE_BIND_MW = 0x18, MLX5_OPCODE_CONFIG_CMD = 0x1f, MLX5_OPCODE_DUMP = 0x23, MLX5_RECV_OPCODE_RDMA_WRITE_IMM = 0x00, MLX5_RECV_OPCODE_SEND = 0x01, MLX5_RECV_OPCODE_SEND_IMM = 0x02, MLX5_RECV_OPCODE_SEND_INVAL = 0x03, MLX5_CQE_OPCODE_ERROR = 0x1e, MLX5_CQE_OPCODE_RESIZE = 0x16, MLX5_OPCODE_SET_PSV = 0x20, MLX5_OPCODE_GET_PSV = 0x21, MLX5_OPCODE_CHECK_PSV = 0x22, MLX5_OPCODE_RGET_PSV = 0x26, MLX5_OPCODE_RCHECK_PSV = 0x27, MLX5_OPCODE_UMR = 0x25, MLX5_OPCODE_SIGNATURE_CANCELED = (1 << 15), }; enum { MLX5_OPCODE_MOD_UMR_UMR = 0x0, MLX5_OPCODE_MOD_UMR_TLS_TIS_STATIC_PARAMS = 0x1, MLX5_OPCODE_MOD_UMR_TLS_TIR_STATIC_PARAMS = 0x2, }; enum { MLX5_OPCODE_MOD_PSV_PSV = 0x0, MLX5_OPCODE_MOD_PSV_TLS_TIS_PROGRESS_PARAMS = 0x1, MLX5_OPCODE_MOD_PSV_TLS_TIR_PROGRESS_PARAMS = 0x2, }; enum { MLX5_SET_PORT_RESET_QKEY = 0, MLX5_SET_PORT_GUID0 = 16, MLX5_SET_PORT_NODE_GUID = 17, MLX5_SET_PORT_SYS_GUID = 18, MLX5_SET_PORT_GID_TABLE = 19, MLX5_SET_PORT_PKEY_TABLE = 20, }; enum { MLX5_MAX_PAGE_SHIFT = 31 }; enum { MLX5_CAP_OFF_CMDIF_CSUM = 46, }; enum { /* * Max wqe size for rdma read is 512 bytes, so this * limits our max_sge_rd as the wqe needs to fit: * - ctrl segment (16 bytes) * - rdma segment (16 bytes) * - scatter elements (16 bytes each) */ MLX5_MAX_SGE_RD = (512 - 16 - 16) / 16 }; struct mlx5_cmd_layout { u8 type; u8 rsvd0[3]; __be32 inlen; __be64 in_ptr; __be32 in[4]; __be32 out[4]; __be64 out_ptr; __be32 outlen; u8 token; u8 sig; u8 rsvd1; u8 status_own; }; enum mlx5_fatal_assert_bit_offsets { MLX5_RFR_OFFSET = 31, }; struct mlx5_health_buffer { __be32 assert_var[5]; __be32 rsvd0[3]; __be32 assert_exit_ptr; __be32 assert_callra; __be32 rsvd1[2]; __be32 fw_ver; __be32 hw_id; __be32 rfr; u8 irisc_index; u8 synd; __be16 ext_synd; }; enum mlx5_initializing_bit_offsets { MLX5_FW_RESET_SUPPORTED_OFFSET = 30, }; enum mlx5_cmd_addr_l_sz_offset { MLX5_NIC_IFC_OFFSET = 8, }; struct mlx5_init_seg { __be32 fw_rev; __be32 cmdif_rev_fw_sub; __be32 rsvd0[2]; __be32 cmdq_addr_h; __be32 cmdq_addr_l_sz; __be32 cmd_dbell; __be32 rsvd1[120]; __be32 initializing; struct mlx5_health_buffer health; __be32 rsvd2[880]; __be32 internal_timer_h; __be32 internal_timer_l; __be32 rsvd3[2]; __be32 health_counter; __be32 rsvd4[1019]; __be64 ieee1588_clk; __be32 ieee1588_clk_type; __be32 clr_intx; }; struct mlx5_eqe_comp { __be32 reserved[6]; __be32 cqn; }; struct mlx5_eqe_qp_srq { __be32 reserved[6]; __be32 qp_srq_n; }; struct mlx5_eqe_cq_err { __be32 cqn; u8 reserved1[7]; u8 syndrome; }; struct mlx5_eqe_port_state { u8 reserved0[8]; u8 port; }; struct mlx5_eqe_gpio { __be32 reserved0[2]; __be64 gpio_event; }; struct mlx5_eqe_congestion { u8 type; u8 rsvd0; u8 congestion_level; }; struct mlx5_eqe_stall_vl { u8 rsvd0[3]; u8 port_vl; }; struct mlx5_eqe_cmd { __be32 vector; __be32 rsvd[6]; }; struct mlx5_eqe_page_req { u8 rsvd0[2]; __be16 func_id; __be32 num_pages; __be32 rsvd1[5]; }; struct mlx5_eqe_vport_change { u8 rsvd0[2]; __be16 vport_num; __be32 rsvd1[6]; }; #define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF #define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF enum { MLX5_MODULE_STATUS_PLUGGED_ENABLED = 0x1, MLX5_MODULE_STATUS_UNPLUGGED = 0x2, MLX5_MODULE_STATUS_ERROR = 0x3, MLX5_MODULE_STATUS_NUM , }; enum { MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED = 0x0, MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE = 0x1, MLX5_MODULE_EVENT_ERROR_BUS_STUCK = 0x2, MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT = 0x3, MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4, MLX5_MODULE_EVENT_ERROR_UNSUPPORTED_CABLE = 0x5, MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6, MLX5_MODULE_EVENT_ERROR_CABLE_IS_SHORTED = 0x7, MLX5_MODULE_EVENT_ERROR_PMD_TYPE_NOT_ENABLED = 0x8, MLX5_MODULE_EVENT_ERROR_LASTER_TEC_FAILURE = 0x9, MLX5_MODULE_EVENT_ERROR_HIGH_CURRENT = 0xa, MLX5_MODULE_EVENT_ERROR_HIGH_VOLTAGE = 0xb, MLX5_MODULE_EVENT_ERROR_PCIE_SYS_POWER_SLOT_EXCEEDED = 0xc, MLX5_MODULE_EVENT_ERROR_HIGH_POWER = 0xd, MLX5_MODULE_EVENT_ERROR_MODULE_STATE_MACHINE_FAULT = 0xe, MLX5_MODULE_EVENT_ERROR_NUM , }; struct mlx5_eqe_port_module_event { u8 rsvd0; u8 module; u8 rsvd1; u8 module_status; u8 rsvd2[2]; u8 error_type; }; struct mlx5_eqe_general_notification_event { u32 rq_user_index_delay_drop; u32 rsvd0[6]; }; struct mlx5_eqe_temp_warning { __be64 sensor_warning_msb; __be64 sensor_warning_lsb; } __packed; union ev_data { __be32 raw[7]; struct mlx5_eqe_cmd cmd; struct mlx5_eqe_comp comp; struct mlx5_eqe_qp_srq qp_srq; struct mlx5_eqe_cq_err cq_err; struct mlx5_eqe_port_state port; struct mlx5_eqe_gpio gpio; struct mlx5_eqe_congestion cong; struct mlx5_eqe_stall_vl stall_vl; struct mlx5_eqe_page_req req_pages; struct mlx5_eqe_port_module_event port_module_event; struct mlx5_eqe_vport_change vport_change; struct mlx5_eqe_general_notification_event general_notifications; struct mlx5_eqe_temp_warning temp_warning; } __packed; struct mlx5_eqe { u8 rsvd0; u8 type; u8 rsvd1; u8 sub_type; __be32 rsvd2[7]; union ev_data data; __be16 rsvd3; u8 signature; u8 owner; } __packed; struct mlx5_cmd_prot_block { u8 data[MLX5_CMD_DATA_BLOCK_SIZE]; u8 rsvd0[48]; __be64 next; __be32 block_num; u8 rsvd1; u8 token; u8 ctrl_sig; u8 sig; }; #define MLX5_NUM_CMDS_IN_ADAPTER_PAGE \ (MLX5_ADAPTER_PAGE_SIZE / MLX5_CMD_MBOX_SIZE) CTASSERT(MLX5_CMD_MBOX_SIZE >= sizeof(struct mlx5_cmd_prot_block)); CTASSERT(MLX5_CMD_MBOX_SIZE <= MLX5_ADAPTER_PAGE_SIZE); enum { MLX5_CQE_SYND_FLUSHED_IN_ERROR = 5, }; struct mlx5_err_cqe { u8 rsvd0[32]; __be32 srqn; u8 rsvd1[18]; u8 vendor_err_synd; u8 syndrome; __be32 s_wqe_opcode_qpn; __be16 wqe_counter; u8 signature; u8 op_own; }; struct mlx5_cqe64 { u8 tls_outer_l3_tunneled; u8 rsvd0; __be16 wqe_id; u8 lro_tcppsh_abort_dupack; u8 lro_min_ttl; __be16 lro_tcp_win; __be32 lro_ack_seq_num; __be32 rss_hash_result; u8 rss_hash_type; u8 ml_path; u8 rsvd20[2]; __be16 check_sum; __be16 slid; __be32 flags_rqpn; u8 hds_ip_ext; u8 l4_hdr_type_etc; __be16 vlan_info; __be32 srqn; /* [31:24]: lro_num_seg, [23:0]: srqn */ __be32 imm_inval_pkey; u8 rsvd40[4]; __be32 byte_cnt; __be64 timestamp; __be32 sop_drop_qpn; __be16 wqe_counter; u8 signature; u8 op_own; }; #define MLX5_CQE_TSTMP_PTP (1ULL << 63) +static inline u8 get_cqe_opcode(struct mlx5_cqe64 *cqe) +{ + return (cqe->op_own >> 4); +} + static inline bool get_cqe_lro_timestamp_valid(struct mlx5_cqe64 *cqe) { return (cqe->lro_tcppsh_abort_dupack >> 7) & 1; } static inline bool get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) { return (cqe->lro_tcppsh_abort_dupack >> 6) & 1; } static inline u8 get_cqe_l4_hdr_type(struct mlx5_cqe64 *cqe) { return (cqe->l4_hdr_type_etc >> 4) & 0x7; } static inline u16 get_cqe_vlan(struct mlx5_cqe64 *cqe) { return be16_to_cpu(cqe->vlan_info) & 0xfff; } static inline void get_cqe_smac(struct mlx5_cqe64 *cqe, u8 *smac) { memcpy(smac, &cqe->rss_hash_type , 4); memcpy(smac + 4, &cqe->slid , 2); } static inline bool cqe_has_vlan(struct mlx5_cqe64 *cqe) { return cqe->l4_hdr_type_etc & 0x1; } static inline bool cqe_is_tunneled(struct mlx5_cqe64 *cqe) { return cqe->tls_outer_l3_tunneled & 0x1; } enum { CQE_L4_HDR_TYPE_NONE = 0x0, CQE_L4_HDR_TYPE_TCP_NO_ACK = 0x1, CQE_L4_HDR_TYPE_UDP = 0x2, CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA = 0x3, CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA = 0x4, }; enum { /* source L3 hash types */ CQE_RSS_SRC_HTYPE_IP = 0x3 << 0, CQE_RSS_SRC_HTYPE_IPV4 = 0x1 << 0, CQE_RSS_SRC_HTYPE_IPV6 = 0x2 << 0, /* destination L3 hash types */ CQE_RSS_DST_HTYPE_IP = 0x3 << 2, CQE_RSS_DST_HTYPE_IPV4 = 0x1 << 2, CQE_RSS_DST_HTYPE_IPV6 = 0x2 << 2, /* source L4 hash types */ CQE_RSS_SRC_HTYPE_L4 = 0x3 << 4, CQE_RSS_SRC_HTYPE_TCP = 0x1 << 4, CQE_RSS_SRC_HTYPE_UDP = 0x2 << 4, CQE_RSS_SRC_HTYPE_IPSEC = 0x3 << 4, /* destination L4 hash types */ CQE_RSS_DST_HTYPE_L4 = 0x3 << 6, CQE_RSS_DST_HTYPE_TCP = 0x1 << 6, CQE_RSS_DST_HTYPE_UDP = 0x2 << 6, CQE_RSS_DST_HTYPE_IPSEC = 0x3 << 6, }; enum { MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH = 0x0, MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6 = 0x1, MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4 = 0x2, }; enum { CQE_L2_OK = 1 << 0, CQE_L3_OK = 1 << 1, CQE_L4_OK = 1 << 2, }; struct mlx5_sig_err_cqe { u8 rsvd0[16]; __be32 expected_trans_sig; __be32 actual_trans_sig; __be32 expected_reftag; __be32 actual_reftag; __be16 syndrome; u8 rsvd22[2]; __be32 mkey; __be64 err_offset; u8 rsvd30[8]; __be32 qpn; u8 rsvd38[2]; u8 signature; u8 op_own; }; struct mlx5_wqe_srq_next_seg { u8 rsvd0[2]; __be16 next_wqe_index; u8 signature; u8 rsvd1[11]; }; union mlx5_ext_cqe { struct ib_grh grh; u8 inl[64]; }; struct mlx5_cqe128 { union mlx5_ext_cqe inl_grh; struct mlx5_cqe64 cqe64; }; enum { MLX5_MKEY_STATUS_FREE = 1 << 6, }; struct mlx5_mkey_seg { /* This is a two bit field occupying bits 31-30. * bit 31 is always 0, * bit 30 is zero for regular MRs and 1 (e.g free) for UMRs that do not have tanslation */ u8 status; u8 pcie_control; u8 flags; u8 version; __be32 qpn_mkey7_0; u8 rsvd1[4]; __be32 flags_pd; __be64 start_addr; __be64 len; __be32 bsfs_octo_size; u8 rsvd2[16]; __be32 xlt_oct_size; u8 rsvd3[3]; u8 log2_page_size; u8 rsvd4[4]; }; #define MLX5_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) enum { MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO = 1 << 0 }; static inline int mlx5_host_is_le(void) { #if defined(__LITTLE_ENDIAN) return 1; #elif defined(__BIG_ENDIAN) return 0; #else #error Host endianness not defined #endif } #define MLX5_CMD_OP_MAX 0x939 enum { VPORT_STATE_DOWN = 0x0, VPORT_STATE_UP = 0x1, VPORT_STATE_FOLLOW = 0x2, }; enum { MLX5_L3_PROT_TYPE_IPV4 = 0, MLX5_L3_PROT_TYPE_IPV6 = 1, }; enum { MLX5_L4_PROT_TYPE_TCP = 0, MLX5_L4_PROT_TYPE_UDP = 1, }; enum { MLX5_HASH_FIELD_SEL_SRC_IP = 1 << 0, MLX5_HASH_FIELD_SEL_DST_IP = 1 << 1, MLX5_HASH_FIELD_SEL_L4_SPORT = 1 << 2, MLX5_HASH_FIELD_SEL_L4_DPORT = 1 << 3, MLX5_HASH_FIELD_SEL_IPSEC_SPI = 1 << 4, }; enum { MLX5_MATCH_OUTER_HEADERS = 1 << 0, MLX5_MATCH_MISC_PARAMETERS = 1 << 1, MLX5_MATCH_INNER_HEADERS = 1 << 2, }; enum { MLX5_FLOW_TABLE_TYPE_NIC_RCV = 0, MLX5_FLOW_TABLE_TYPE_EGRESS_ACL = 2, MLX5_FLOW_TABLE_TYPE_INGRESS_ACL = 3, MLX5_FLOW_TABLE_TYPE_ESWITCH = 4, MLX5_FLOW_TABLE_TYPE_SNIFFER_RX = 5, MLX5_FLOW_TABLE_TYPE_SNIFFER_TX = 6, MLX5_FLOW_TABLE_TYPE_NIC_RX_RDMA = 7, }; enum { MLX5_MODIFY_ESW_VPORT_CONTEXT_CVLAN_INSERT_NONE = 0, MLX5_MODIFY_ESW_VPORT_CONTEXT_CVLAN_INSERT_IF_NO_VLAN = 1, MLX5_MODIFY_ESW_VPORT_CONTEXT_CVLAN_INSERT_OVERWRITE = 2 }; enum { MLX5_MODIFY_ESW_VPORT_CONTEXT_FIELD_SELECT_SVLAN_STRIP = 1 << 0, MLX5_MODIFY_ESW_VPORT_CONTEXT_FIELD_SELECT_CVLAN_STRIP = 1 << 1, MLX5_MODIFY_ESW_VPORT_CONTEXT_FIELD_SELECT_SVLAN_INSERT = 1 << 2, MLX5_MODIFY_ESW_VPORT_CONTEXT_FIELD_SELECT_CVLAN_INSERT = 1 << 3 }; enum { MLX5_UC_ADDR_CHANGE = (1 << 0), MLX5_MC_ADDR_CHANGE = (1 << 1), MLX5_VLAN_CHANGE = (1 << 2), MLX5_PROMISC_CHANGE = (1 << 3), MLX5_MTU_CHANGE = (1 << 4), }; enum mlx5_list_type { MLX5_NIC_VPORT_LIST_TYPE_UC = 0x0, MLX5_NIC_VPORT_LIST_TYPE_MC = 0x1, MLX5_NIC_VPORT_LIST_TYPE_VLAN = 0x2, }; enum { MLX5_ESW_VPORT_ADMIN_STATE_DOWN = 0x0, MLX5_ESW_VPORT_ADMIN_STATE_UP = 0x1, MLX5_ESW_VPORT_ADMIN_STATE_AUTO = 0x2, }; /* MLX5 DEV CAPs */ /* TODO: EAT.ME */ enum mlx5_cap_mode { HCA_CAP_OPMOD_GET_MAX = 0, HCA_CAP_OPMOD_GET_CUR = 1, }; enum mlx5_cap_type { MLX5_CAP_GENERAL = 0, MLX5_CAP_ETHERNET_OFFLOADS, MLX5_CAP_ODP, MLX5_CAP_ATOMIC, MLX5_CAP_ROCE, MLX5_CAP_IPOIB_OFFLOADS, MLX5_CAP_EOIB_OFFLOADS, MLX5_CAP_FLOW_TABLE, MLX5_CAP_ESWITCH_FLOW_TABLE, MLX5_CAP_ESWITCH, MLX5_CAP_SNAPSHOT, MLX5_CAP_VECTOR_CALC, MLX5_CAP_QOS, MLX5_CAP_DEBUG, MLX5_CAP_NVME, MLX5_CAP_DMC, MLX5_CAP_DEC, MLX5_CAP_TLS, /* NUM OF CAP Types */ MLX5_CAP_NUM }; enum mlx5_qcam_reg_groups { MLX5_QCAM_REGS_FIRST_128 = 0x0, }; enum mlx5_qcam_feature_groups { MLX5_QCAM_FEATURE_ENHANCED_FEATURES = 0x0, }; enum mlx5_pcam_reg_groups { MLX5_PCAM_REGS_5000_TO_507F = 0x0, }; enum mlx5_pcam_feature_groups { MLX5_PCAM_FEATURE_ENHANCED_FEATURES = 0x0, }; enum mlx5_mcam_reg_groups { MLX5_MCAM_REGS_FIRST_128 = 0x0, }; enum mlx5_mcam_feature_groups { MLX5_MCAM_FEATURE_ENHANCED_FEATURES = 0x0, }; /* GET Dev Caps macros */ #define MLX5_CAP_GEN(mdev, cap) \ MLX5_GET(cmd_hca_cap, mdev->hca_caps_cur[MLX5_CAP_GENERAL], cap) #define MLX5_CAP_GEN_64(mdev, cap) \ MLX5_GET64(cmd_hca_cap, mdev->hca_caps_cur[MLX5_CAP_GENERAL], cap) #define MLX5_CAP_GEN_MAX(mdev, cap) \ MLX5_GET(cmd_hca_cap, mdev->hca_caps_max[MLX5_CAP_GENERAL], cap) #define MLX5_CAP_ETH(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ mdev->hca_caps_cur[MLX5_CAP_ETHERNET_OFFLOADS], cap) #define MLX5_CAP_ETH_MAX(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ mdev->hca_caps_max[MLX5_CAP_ETHERNET_OFFLOADS], cap) #define MLX5_CAP_ROCE(mdev, cap) \ MLX5_GET(roce_cap, mdev->hca_caps_cur[MLX5_CAP_ROCE], cap) #define MLX5_CAP_ROCE_MAX(mdev, cap) \ MLX5_GET(roce_cap, mdev->hca_caps_max[MLX5_CAP_ROCE], cap) #define MLX5_CAP_ATOMIC(mdev, cap) \ MLX5_GET(atomic_caps, mdev->hca_caps_cur[MLX5_CAP_ATOMIC], cap) #define MLX5_CAP_ATOMIC_MAX(mdev, cap) \ MLX5_GET(atomic_caps, mdev->hca_caps_max[MLX5_CAP_ATOMIC], cap) #define MLX5_CAP_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_nic_cap, mdev->hca_caps_cur[MLX5_CAP_FLOW_TABLE], cap) #define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \ MLX5_GET(flow_table_nic_cap, mdev->hca_caps_max[MLX5_CAP_FLOW_TABLE], cap) #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) #define MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->hca_caps_max[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) #define MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_nic_esw_fdb.cap) #define MLX5_CAP_ESW_FLOWTABLE_FDB_MAX(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_nic_esw_fdb.cap) #define MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_esw_acl_egress.cap) #define MLX5_CAP_ESW_EGRESS_ACL_MAX(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_esw_acl_egress.cap) #define MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_esw_acl_ingress.cap) #define MLX5_CAP_ESW_INGRESS_ACL_MAX(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_esw_acl_ingress.cap) #define MLX5_CAP_ESW(mdev, cap) \ MLX5_GET(e_switch_cap, \ mdev->hca_caps_cur[MLX5_CAP_ESWITCH], cap) #define MLX5_CAP_ESW_MAX(mdev, cap) \ MLX5_GET(e_switch_cap, \ mdev->hca_caps_max[MLX5_CAP_ESWITCH], cap) #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap) #define MLX5_CAP_ODP_MAX(mdev, cap)\ MLX5_GET(odp_cap, mdev->hca_caps_max[MLX5_CAP_ODP], cap) #define MLX5_CAP_SNAPSHOT(mdev, cap) \ MLX5_GET(snapshot_cap, \ mdev->hca_caps_cur[MLX5_CAP_SNAPSHOT], cap) #define MLX5_CAP_SNAPSHOT_MAX(mdev, cap) \ MLX5_GET(snapshot_cap, \ mdev->hca_caps_max[MLX5_CAP_SNAPSHOT], cap) #define MLX5_CAP_EOIB_OFFLOADS(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ mdev->hca_caps_cur[MLX5_CAP_EOIB_OFFLOADS], cap) #define MLX5_CAP_EOIB_OFFLOADS_MAX(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ mdev->hca_caps_max[MLX5_CAP_EOIB_OFFLOADS], cap) #define MLX5_CAP_DEBUG(mdev, cap) \ MLX5_GET(debug_cap, \ mdev->hca_caps_cur[MLX5_CAP_DEBUG], cap) #define MLX5_CAP_DEBUG_MAX(mdev, cap) \ MLX5_GET(debug_cap, \ mdev->hca_caps_max[MLX5_CAP_DEBUG], cap) #define MLX5_CAP_QOS(mdev, cap) \ MLX5_GET(qos_cap,\ mdev->hca_caps_cur[MLX5_CAP_QOS], cap) #define MLX5_CAP_QOS_MAX(mdev, cap) \ MLX5_GET(qos_cap,\ mdev->hca_caps_max[MLX5_CAP_QOS], cap) #define MLX5_CAP_PCAM_FEATURE(mdev, fld) \ MLX5_GET(pcam_reg, (mdev)->caps.pcam, feature_cap_mask.enhanced_features.fld) #define MLX5_CAP_PCAM_REG(mdev, reg) \ MLX5_GET(pcam_reg, (mdev)->caps.pcam, port_access_reg_cap_mask.regs_5000_to_507f.reg) #define MLX5_CAP_MCAM_FEATURE(mdev, fld) \ MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld) #define MLX5_CAP_MCAM_REG(mdev, reg) \ MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_access_reg_cap_mask.access_regs.reg) #define MLX5_CAP_QCAM_REG(mdev, fld) \ MLX5_GET(qcam_reg, (mdev)->caps.qcam, qos_access_reg_cap_mask.reg_cap.fld) #define MLX5_CAP_QCAM_FEATURE(mdev, fld) \ MLX5_GET(qcam_reg, (mdev)->caps.qcam, qos_feature_cap_mask.feature_cap.fld) #define MLX5_CAP_FPGA(mdev, cap) \ MLX5_GET(fpga_cap, (mdev)->caps.fpga, cap) #define MLX5_CAP64_FPGA(mdev, cap) \ MLX5_GET64(fpga_cap, (mdev)->caps.fpga, cap) #define MLX5_CAP_TLS(mdev, cap) \ MLX5_GET(tls_capabilities, (mdev)->hca_caps_cur[MLX5_CAP_TLS], cap) enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, MLX5_CMD_STAT_BAD_OP_ERR = 0x2, MLX5_CMD_STAT_BAD_PARAM_ERR = 0x3, MLX5_CMD_STAT_BAD_SYS_STATE_ERR = 0x4, MLX5_CMD_STAT_BAD_RES_ERR = 0x5, MLX5_CMD_STAT_RES_BUSY = 0x6, MLX5_CMD_STAT_LIM_ERR = 0x8, MLX5_CMD_STAT_BAD_RES_STATE_ERR = 0x9, MLX5_CMD_STAT_IX_ERR = 0xa, MLX5_CMD_STAT_NO_RES_ERR = 0xf, MLX5_CMD_STAT_BAD_INP_LEN_ERR = 0x50, MLX5_CMD_STAT_BAD_OUTP_LEN_ERR = 0x51, MLX5_CMD_STAT_BAD_QP_STATE_ERR = 0x10, MLX5_CMD_STAT_BAD_PKT_ERR = 0x30, MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR = 0x40, }; enum { MLX5_IEEE_802_3_COUNTERS_GROUP = 0x0, MLX5_RFC_2863_COUNTERS_GROUP = 0x1, MLX5_RFC_2819_COUNTERS_GROUP = 0x2, MLX5_RFC_3635_COUNTERS_GROUP = 0x3, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP = 0x5, MLX5_ETHERNET_DISCARD_COUNTERS_GROUP = 0x6, MLX5_PER_PRIORITY_COUNTERS_GROUP = 0x10, MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP = 0x12, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP = 0x16, MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, }; enum { MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP = 0x0, MLX5_PCIE_LANE_COUNTERS_GROUP = 0x1, MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP = 0x2, }; enum { MLX5_CAP_PORT_TYPE_IB = 0x0, MLX5_CAP_PORT_TYPE_ETH = 0x1, }; enum { MLX5_CMD_HCA_CAP_MIN_WQE_INLINE_MODE_L2 = 0x0, MLX5_CMD_HCA_CAP_MIN_WQE_INLINE_MODE_VPORT_CONFIG = 0x1, MLX5_CMD_HCA_CAP_MIN_WQE_INLINE_MODE_NOT_REQUIRED = 0x2 }; enum mlx5_inline_modes { MLX5_INLINE_MODE_NONE, MLX5_INLINE_MODE_L2, MLX5_INLINE_MODE_IP, MLX5_INLINE_MODE_TCP_UDP, }; enum { MLX5_QUERY_VPORT_STATE_OUT_STATE_FOLLOW = 0x2, }; static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) { if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE) return 0; return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; } struct mlx5_ifc_mcia_reg_bits { u8 l[0x1]; u8 reserved_0[0x7]; u8 module[0x8]; u8 reserved_1[0x8]; u8 status[0x8]; u8 i2c_device_address[0x8]; u8 page_number[0x8]; u8 device_address[0x10]; u8 reserved_2[0x10]; u8 size[0x10]; u8 reserved_3[0x20]; u8 dword_0[0x20]; u8 dword_1[0x20]; u8 dword_2[0x20]; u8 dword_3[0x20]; u8 dword_4[0x20]; u8 dword_5[0x20]; u8 dword_6[0x20]; u8 dword_7[0x20]; u8 dword_8[0x20]; u8 dword_9[0x20]; u8 dword_10[0x20]; u8 dword_11[0x20]; }; #define MLX5_CMD_OP_QUERY_EEPROM 0x93c struct mlx5_mini_cqe8 { union { __be32 rx_hash_result; __be16 checksum; __be16 rsvd; struct { __be16 wqe_counter; u8 s_wqe_opcode; u8 reserved; } s_wqe_info; }; __be32 byte_cnt; }; enum { MLX5_NO_INLINE_DATA, MLX5_INLINE_DATA32_SEG, MLX5_INLINE_DATA64_SEG, MLX5_COMPRESSED, }; enum mlx5_exp_cqe_zip_recv_type { MLX5_CQE_FORMAT_HASH, MLX5_CQE_FORMAT_CSUM, }; #define MLX5E_CQE_FORMAT_MASK 0xc static inline int mlx5_get_cqe_format(const struct mlx5_cqe64 *cqe) { return (cqe->op_own & MLX5E_CQE_FORMAT_MASK) >> 2; } enum { MLX5_GEN_EVENT_SUBTYPE_DELAY_DROP_TIMEOUT = 0x1, MLX5_GEN_EVENT_SUBTYPE_PCI_POWER_CHANGE_EVENT = 0x5, }; enum { MLX5_FRL_LEVEL3 = 0x8, MLX5_FRL_LEVEL6 = 0x40, }; /* 8 regular priorities + 1 for multicast */ #define MLX5_NUM_BYPASS_FTS 9 #endif /* MLX5_DEVICE_H */ diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h index e4b66bea8f60..b249a82d30ef 100644 --- a/sys/dev/mlx5/mlx5_en/en.h +++ b/sys/dev/mlx5/mlx5_en/en.h @@ -1,1221 +1,1222 @@ /*- * Copyright (c) 2015-2019 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MLX5_EN_H_ #define _MLX5_EN_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_rss.h" #ifdef RSS #include #include #endif #include #include #include #include #include #include #include #include #include #include #define MLX5E_MAX_PRIORITY 8 #define MLX5E_MAX_FEC_10X_25X 4 #define MLX5E_MAX_FEC_50X 4 /* IEEE 802.1Qaz standard supported values */ #define IEEE_8021QAZ_MAX_TCS 8 #define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x7 #define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa #define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xe #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE 0x7 #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xe #define MLX5E_MAX_BUSDMA_RX_SEGS 15 #ifndef MLX5E_MAX_RX_BYTES #define MLX5E_MAX_RX_BYTES MCLBYTES #endif #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ \ MIN(65535, 7 * MLX5E_MAX_RX_BYTES) #define MLX5E_DIM_DEFAULT_PROFILE 3 #define MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO 16 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 #define MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ 0x7 #define MLX5E_CACHELINE_SIZE CACHE_LINE_SIZE #define MLX5E_HW2SW_MTU(hwmtu) \ ((hwmtu) - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN)) #define MLX5E_SW2HW_MTU(swmtu) \ ((swmtu) + (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN)) #define MLX5E_SW2MB_MTU(swmtu) \ (MLX5E_SW2HW_MTU(swmtu) + MLX5E_NET_IP_ALIGN) #define MLX5E_MTU_MIN 72 /* Min MTU allowed by the kernel */ #define MLX5E_MTU_MAX MIN(ETHERMTU_JUMBO, MJUM16BYTES) /* Max MTU of Ethernet * jumbo frames */ #define MLX5E_BUDGET_MAX 8192 /* RX and TX */ #define MLX5E_RX_BUDGET_MAX 256 #define MLX5E_SQ_BF_BUDGET 16 #define MLX5E_SQ_TX_QUEUE_SIZE 4096 /* SQ drbr queue size */ #define MLX5E_MAX_TX_NUM_TC 8 /* units */ #define MLX5E_MAX_TX_HEADER 192 /* bytes */ #define MLX5E_MAX_TX_PAYLOAD_SIZE 65536 /* bytes */ #define MLX5E_MAX_TX_MBUF_SIZE 65536 /* bytes */ #define MLX5E_MAX_TX_MBUF_FRAGS \ ((MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) - \ (MLX5E_MAX_TX_HEADER / MLX5_SEND_WQE_DS) - \ 1 /* the maximum value of the DS counter is 0x3F and not 0x40 */) /* units */ #define MLX5E_MAX_TX_INLINE \ (MLX5E_MAX_TX_HEADER - sizeof(struct mlx5e_tx_wqe) + \ sizeof(((struct mlx5e_tx_wqe *)0)->eth.inline_hdr_start)) /* bytes */ #define MLX5E_100MB (100000) #define MLX5E_1GB (1000000) #define MLX5E_ZERO(ptr, field) \ memset(&(ptr)->field, 0, \ sizeof(*(ptr)) - __offsetof(__typeof(*(ptr)), field)) MALLOC_DECLARE(M_MLX5EN); struct mlx5_core_dev; struct mlx5e_cq; typedef void (mlx5e_cq_comp_t)(struct mlx5_core_cq *, struct mlx5_eqe *); #define mlx5_en_err(_dev, format, ...) \ if_printf(_dev, "ERR: ""%s:%d:(pid %d): " format, \ __func__, __LINE__, curthread->td_proc->p_pid, \ ##__VA_ARGS__) #define mlx5_en_warn(_dev, format, ...) \ if_printf(_dev, "WARN: ""%s:%d:(pid %d): " format, \ __func__, __LINE__, curthread->td_proc->p_pid, \ ##__VA_ARGS__) #define mlx5_en_info(_dev, format, ...) \ if_printf(_dev, "INFO: ""%s:%d:(pid %d): " format, \ __func__, __LINE__, curthread->td_proc->p_pid, \ ##__VA_ARGS__) #define MLX5E_STATS_COUNT(a, ...) a #define MLX5E_STATS_VAR(a, b, c, ...) b c; #define MLX5E_STATS_COUNTER(a, b, c, ...) counter_##b##_t c; #define MLX5E_STATS_DESC(a, b, c, d, e, ...) d, e, #define MLX5E_VPORT_STATS(m) \ /* HW counters */ \ m(+1, u64, rx_packets, "rx_packets", "Received packets") \ m(+1, u64, rx_bytes, "rx_bytes", "Received bytes") \ m(+1, u64, tx_packets, "tx_packets", "Transmitted packets") \ m(+1, u64, tx_bytes, "tx_bytes", "Transmitted bytes") \ m(+1, u64, rx_error_packets, "rx_error_packets", "Received error packets") \ m(+1, u64, rx_error_bytes, "rx_error_bytes", "Received error bytes") \ m(+1, u64, tx_error_packets, "tx_error_packets", "Transmitted error packets") \ m(+1, u64, tx_error_bytes, "tx_error_bytes", "Transmitted error bytes") \ m(+1, u64, rx_unicast_packets, "rx_unicast_packets", "Received unicast packets") \ m(+1, u64, rx_unicast_bytes, "rx_unicast_bytes", "Received unicast bytes") \ m(+1, u64, tx_unicast_packets, "tx_unicast_packets", "Transmitted unicast packets") \ m(+1, u64, tx_unicast_bytes, "tx_unicast_bytes", "Transmitted unicast bytes") \ m(+1, u64, rx_multicast_packets, "rx_multicast_packets", "Received multicast packets") \ m(+1, u64, rx_multicast_bytes, "rx_multicast_bytes", "Received multicast bytes") \ m(+1, u64, tx_multicast_packets, "tx_multicast_packets", "Transmitted multicast packets") \ m(+1, u64, tx_multicast_bytes, "tx_multicast_bytes", "Transmitted multicast bytes") \ m(+1, u64, rx_broadcast_packets, "rx_broadcast_packets", "Received broadcast packets") \ m(+1, u64, rx_broadcast_bytes, "rx_broadcast_bytes", "Received broadcast bytes") \ m(+1, u64, tx_broadcast_packets, "tx_broadcast_packets", "Transmitted broadcast packets") \ m(+1, u64, tx_broadcast_bytes, "tx_broadcast_bytes", "Transmitted broadcast bytes") \ m(+1, u64, rx_out_of_buffer, "rx_out_of_buffer", "Receive out of buffer, no recv wqes events") \ /* SW counters */ \ m(+1, u64, tso_packets, "tso_packets", "Transmitted TSO packets") \ m(+1, u64, tso_bytes, "tso_bytes", "Transmitted TSO bytes") \ m(+1, u64, lro_packets, "lro_packets", "Received LRO packets") \ m(+1, u64, lro_bytes, "lro_bytes", "Received LRO bytes") \ m(+1, u64, sw_lro_queued, "sw_lro_queued", "Packets queued for SW LRO") \ m(+1, u64, sw_lro_flushed, "sw_lro_flushed", "Packets flushed from SW LRO") \ m(+1, u64, rx_csum_good, "rx_csum_good", "Received checksum valid packets") \ m(+1, u64, rx_csum_none, "rx_csum_none", "Received no checksum packets") \ m(+1, u64, tx_csum_offload, "tx_csum_offload", "Transmit checksum offload packets") \ m(+1, u64, tx_queue_dropped, "tx_queue_dropped", "Transmit queue dropped") \ m(+1, u64, tx_defragged, "tx_defragged", "Transmit queue defragged") \ m(+1, u64, rx_wqe_err, "rx_wqe_err", "Receive WQE errors") \ m(+1, u64, tx_jumbo_packets, "tx_jumbo_packets", "TX packets greater than 1518 octets") \ m(+1, u64, rx_steer_missed_packets, "rx_steer_missed_packets", "RX packets dropped by steering rule(s)") #define MLX5E_VPORT_STATS_NUM (0 MLX5E_VPORT_STATS(MLX5E_STATS_COUNT)) struct mlx5e_vport_stats { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_VPORT_STATS(MLX5E_STATS_VAR) }; #define MLX5E_PPORT_IEEE802_3_STATS(m) \ m(+1, u64, frames_tx, "frames_tx", "Frames transmitted") \ m(+1, u64, frames_rx, "frames_rx", "Frames received") \ m(+1, u64, check_seq_err, "check_seq_err", "Sequence errors") \ m(+1, u64, alignment_err, "alignment_err", "Alignment errors") \ m(+1, u64, octets_tx, "octets_tx", "Bytes transmitted") \ m(+1, u64, octets_received, "octets_received", "Bytes received") \ m(+1, u64, multicast_xmitted, "multicast_xmitted", "Multicast transmitted") \ m(+1, u64, broadcast_xmitted, "broadcast_xmitted", "Broadcast transmitted") \ m(+1, u64, multicast_rx, "multicast_rx", "Multicast received") \ m(+1, u64, broadcast_rx, "broadcast_rx", "Broadcast received") \ m(+1, u64, in_range_len_errors, "in_range_len_errors", "In range length errors") \ m(+1, u64, out_of_range_len, "out_of_range_len", "Out of range length errors") \ m(+1, u64, too_long_errors, "too_long_errors", "Too long errors") \ m(+1, u64, symbol_err, "symbol_err", "Symbol errors") \ m(+1, u64, mac_control_tx, "mac_control_tx", "MAC control transmitted") \ m(+1, u64, mac_control_rx, "mac_control_rx", "MAC control received") \ m(+1, u64, unsupported_op_rx, "unsupported_op_rx", "Unsupported operation received") \ m(+1, u64, pause_ctrl_rx, "pause_ctrl_rx", "Pause control received") \ m(+1, u64, pause_ctrl_tx, "pause_ctrl_tx", "Pause control transmitted") #define MLX5E_PPORT_RFC2819_STATS(m) \ m(+1, u64, drop_events, "drop_events", "Dropped events") \ m(+1, u64, octets, "octets", "Octets") \ m(+1, u64, pkts, "pkts", "Packets") \ m(+1, u64, broadcast_pkts, "broadcast_pkts", "Broadcast packets") \ m(+1, u64, multicast_pkts, "multicast_pkts", "Multicast packets") \ m(+1, u64, crc_align_errors, "crc_align_errors", "CRC alignment errors") \ m(+1, u64, undersize_pkts, "undersize_pkts", "Undersized packets") \ m(+1, u64, oversize_pkts, "oversize_pkts", "Oversized packets") \ m(+1, u64, fragments, "fragments", "Fragments") \ m(+1, u64, jabbers, "jabbers", "Jabbers") \ m(+1, u64, collisions, "collisions", "Collisions") #define MLX5E_PPORT_RFC2819_STATS_DEBUG(m) \ m(+1, u64, p64octets, "p64octets", "Bytes") \ m(+1, u64, p65to127octets, "p65to127octets", "Bytes") \ m(+1, u64, p128to255octets, "p128to255octets", "Bytes") \ m(+1, u64, p256to511octets, "p256to511octets", "Bytes") \ m(+1, u64, p512to1023octets, "p512to1023octets", "Bytes") \ m(+1, u64, p1024to1518octets, "p1024to1518octets", "Bytes") \ m(+1, u64, p1519to2047octets, "p1519to2047octets", "Bytes") \ m(+1, u64, p2048to4095octets, "p2048to4095octets", "Bytes") \ m(+1, u64, p4096to8191octets, "p4096to8191octets", "Bytes") \ m(+1, u64, p8192to10239octets, "p8192to10239octets", "Bytes") #define MLX5E_PPORT_RFC2863_STATS_DEBUG(m) \ m(+1, u64, in_octets, "in_octets", "In octets") \ m(+1, u64, in_ucast_pkts, "in_ucast_pkts", "In unicast packets") \ m(+1, u64, in_discards, "in_discards", "In discards") \ m(+1, u64, in_errors, "in_errors", "In errors") \ m(+1, u64, in_unknown_protos, "in_unknown_protos", "In unknown protocols") \ m(+1, u64, out_octets, "out_octets", "Out octets") \ m(+1, u64, out_ucast_pkts, "out_ucast_pkts", "Out unicast packets") \ m(+1, u64, out_discards, "out_discards", "Out discards") \ m(+1, u64, out_errors, "out_errors", "Out errors") \ m(+1, u64, in_multicast_pkts, "in_multicast_pkts", "In multicast packets") \ m(+1, u64, in_broadcast_pkts, "in_broadcast_pkts", "In broadcast packets") \ m(+1, u64, out_multicast_pkts, "out_multicast_pkts", "Out multicast packets") \ m(+1, u64, out_broadcast_pkts, "out_broadcast_pkts", "Out broadcast packets") #define MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG(m) \ m(+1, u64, port_transmit_wait, "port_transmit_wait", "Port transmit wait") \ m(+1, u64, ecn_marked, "ecn_marked", "ECN marked") \ m(+1, u64, no_buffer_discard_mc, "no_buffer_discard_mc", "No buffer discard mc") \ m(+1, u64, rx_ebp, "rx_ebp", "RX EBP") \ m(+1, u64, tx_ebp, "tx_ebp", "TX EBP") \ m(+1, u64, rx_buffer_almost_full, "rx_buffer_almost_full", "RX buffer almost full") \ m(+1, u64, rx_buffer_full, "rx_buffer_full", "RX buffer full") \ m(+1, u64, rx_icrc_encapsulated, "rx_icrc_encapsulated", "RX ICRC encapsulated") \ m(+1, u64, ex_reserved_0, "ex_reserved_0", "Reserved") \ m(+1, u64, ex_reserved_1, "ex_reserved_1", "Reserved") \ m(+1, u64, tx_stat_p64octets, "tx_stat_p64octets", "Bytes") \ m(+1, u64, tx_stat_p65to127octets, "tx_stat_p65to127octets", "Bytes") \ m(+1, u64, tx_stat_p128to255octets, "tx_stat_p128to255octets", "Bytes") \ m(+1, u64, tx_stat_p256to511octets, "tx_stat_p256to511octets", "Bytes") \ m(+1, u64, tx_stat_p512to1023octets, "tx_stat_p512to1023octets", "Bytes") \ m(+1, u64, tx_stat_p1024to1518octets, "tx_stat_p1024to1518octets", "Bytes") \ m(+1, u64, tx_stat_p1519to2047octets, "tx_stat_p1519to2047octets", "Bytes") \ m(+1, u64, tx_stat_p2048to4095octets, "tx_stat_p2048to4095octets", "Bytes") \ m(+1, u64, tx_stat_p4096to8191octets, "tx_stat_p4096to8191octets", "Bytes") \ m(+1, u64, tx_stat_p8192to10239octets, "tx_stat_p8192to10239octets", "Bytes") #define MLX5E_PPORT_STATISTICAL_DEBUG(m) \ m(+1, u64, phy_time_since_last_clear, "phy_time_since_last_clear", \ "Time since last clear in milliseconds") \ m(+1, u64, phy_received_bits, "phy_received_bits", \ "Total amount of traffic received in bits before error correction") \ m(+1, u64, phy_symbol_errors, "phy_symbol_errors", \ "Total number of symbol errors before error correction") \ m(+1, u64, phy_corrected_bits, "phy_corrected_bits", \ "Total number of corrected bits ") \ m(+1, u64, phy_corrected_bits_lane0, "phy_corrected_bits_lane0", \ "Total number of corrected bits for lane 0") \ m(+1, u64, phy_corrected_bits_lane1, "phy_corrected_bits_lane1", \ "Total number of corrected bits for lane 1") \ m(+1, u64, phy_corrected_bits_lane2, "phy_corrected_bits_lane2", \ "Total number of corrected bits for lane 2") \ m(+1, u64, phy_corrected_bits_lane3, "phy_corrected_bits_lane3", \ "Total number of corrected bits for lane 3") #define MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(m) \ m(+1, u64, time_since_last_clear, "time_since_last_clear", \ "Time since the last counters clear event (msec)") \ m(+1, u64, symbol_errors, "symbol_errors", "Symbol errors") \ m(+1, u64, sync_headers_errors, "sync_headers_errors", \ "Sync header error counter") \ m(+1, u64, bip_errors_lane0, "edpl_bip_errors_lane0", \ "Indicates the number of PRBS errors on lane 0") \ m(+1, u64, bip_errors_lane1, "edpl_bip_errors_lane1", \ "Indicates the number of PRBS errors on lane 1") \ m(+1, u64, bip_errors_lane2, "edpl_bip_errors_lane2", \ "Indicates the number of PRBS errors on lane 2") \ m(+1, u64, bip_errors_lane3, "edpl_bip_errors_lane3", \ "Indicates the number of PRBS errors on lane 3") \ m(+1, u64, fc_corrected_blocks_lane0, "fc_corrected_blocks_lane0", \ "FEC correctable block counter lane 0") \ m(+1, u64, fc_corrected_blocks_lane1, "fc_corrected_blocks_lane1", \ "FEC correctable block counter lane 1") \ m(+1, u64, fc_corrected_blocks_lane2, "fc_corrected_blocks_lane2", \ "FEC correctable block counter lane 2") \ m(+1, u64, fc_corrected_blocks_lane3, "fc_corrected_blocks_lane3", \ "FEC correctable block counter lane 3") \ m(+1, u64, rs_corrected_blocks, "rs_corrected_blocks", \ "FEC correcable block counter") \ m(+1, u64, rs_uncorrectable_blocks, "rs_uncorrectable_blocks", \ "FEC uncorrecable block counter") \ m(+1, u64, rs_no_errors_blocks, "rs_no_errors_blocks", \ "The number of RS-FEC blocks received that had no errors") \ m(+1, u64, rs_single_error_blocks, "rs_single_error_blocks", \ "The number of corrected RS-FEC blocks received that had" \ "exactly 1 error symbol") \ m(+1, u64, rs_corrected_symbols_total, "rs_corrected_symbols_total", \ "Port FEC corrected symbol counter") \ m(+1, u64, rs_corrected_symbols_lane0, "rs_corrected_symbols_lane0", \ "FEC corrected symbol counter lane 0") \ m(+1, u64, rs_corrected_symbols_lane1, "rs_corrected_symbols_lane1", \ "FEC corrected symbol counter lane 1") \ m(+1, u64, rs_corrected_symbols_lane2, "rs_corrected_symbols_lane2", \ "FEC corrected symbol counter lane 2") \ m(+1, u64, rs_corrected_symbols_lane3, "rs_corrected_symbols_lane3", \ "FEC corrected symbol counter lane 3") /* Per priority statistics for PFC */ #define MLX5E_PPORT_PER_PRIO_STATS_SUB(m,n,p) \ m(n, p, +1, u64, rx_octets, "rx_octets", "Received octets") \ m(n, p, +1, u64, rx_uc_frames, "rx_uc_frames", "Received unicast frames") \ m(n, p, +1, u64, rx_mc_frames, "rx_mc_frames", "Received multicast frames") \ m(n, p, +1, u64, rx_bc_frames, "rx_bc_frames", "Received broadcast frames") \ m(n, p, +1, u64, rx_frames, "rx_frames", "Received frames") \ m(n, p, +1, u64, tx_octets, "tx_octets", "Transmitted octets") \ m(n, p, +1, u64, tx_uc_frames, "tx_uc_frames", "Transmitted unicast frames") \ m(n, p, +1, u64, tx_mc_frames, "tx_mc_frames", "Transmitted multicast frames") \ m(n, p, +1, u64, tx_bc_frames, "tx_bc_frames", "Transmitted broadcast frames") \ m(n, p, +1, u64, tx_frames, "tx_frames", "Transmitted frames") \ m(n, p, +1, u64, rx_pause, "rx_pause", "Received pause frames") \ m(n, p, +1, u64, rx_pause_duration, "rx_pause_duration", \ "Received pause duration") \ m(n, p, +1, u64, tx_pause, "tx_pause", "Transmitted pause frames") \ m(n, p, +1, u64, tx_pause_duration, "tx_pause_duration", \ "Transmitted pause duration") \ m(n, p, +1, u64, rx_pause_transition, "rx_pause_transition", \ "Received pause transitions") \ m(n, p, +1, u64, rx_discards, "rx_discards", "Discarded received frames") \ m(n, p, +1, u64, device_stall_minor_watermark, \ "device_stall_minor_watermark", "Device stall minor watermark") \ m(n, p, +1, u64, device_stall_critical_watermark, \ "device_stall_critical_watermark", "Device stall critical watermark") #define MLX5E_PPORT_PER_PRIO_STATS_PREFIX(m,p,c,t,f,s,d) \ m(c, t, pri_##p##_##f, "prio" #p "_" s, "Priority " #p " - " d) #define MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO 8 #define MLX5E_PPORT_PER_PRIO_STATS(m) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,0) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,1) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,2) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,3) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,4) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,5) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,6) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,7) #define MLX5E_PCIE_PERFORMANCE_COUNTERS_64(m) \ m(+1, u64, life_time_counter_high, "life_time_counter", \ "Life time counter.", pcie_perf_counters) \ m(+1, u64, tx_overflow_buffer_pkt, "tx_overflow_buffer_pkt", \ "The number of packets dropped due to lack of PCIe buffers " \ "in receive path from NIC port toward the hosts.", \ pcie_perf_counters) \ m(+1, u64, tx_overflow_buffer_marked_pkt, \ "tx_overflow_buffer_marked_pkt", \ "The number of packets marked due to lack of PCIe buffers " \ "in receive path from NIC port toward the hosts.", \ pcie_perf_counters) #define MLX5E_PCIE_PERFORMANCE_COUNTERS_32(m) \ m(+1, u64, rx_errors, "rx_errors", \ "Number of transitions to recovery due to Framing " \ "errors and CRC errors.", pcie_perf_counters) \ m(+1, u64, tx_errors, "tx_errors", "Number of transitions " \ "to recovery due to EIEOS and TS errors.", pcie_perf_counters) \ m(+1, u64, l0_to_recovery_eieos, "l0_to_recovery_eieos", "Number of " \ "transitions to recovery due to getting EIEOS.", pcie_perf_counters)\ m(+1, u64, l0_to_recovery_ts, "l0_to_recovery_ts", "Number of " \ "transitions to recovery due to getting TS.", pcie_perf_counters) \ m(+1, u64, l0_to_recovery_framing, "l0_to_recovery_framing", "Number "\ "of transitions to recovery due to identifying framing " \ "errors at gen3/4.", pcie_perf_counters) \ m(+1, u64, l0_to_recovery_retrain, "l0_to_recovery_retrain", \ "Number of transitions to recovery due to link retrain request " \ "from data link.", pcie_perf_counters) \ m(+1, u64, crc_error_dllp, "crc_error_dllp", "Number of transitions " \ "to recovery due to identifying CRC DLLP errors.", \ pcie_perf_counters) \ m(+1, u64, crc_error_tlp, "crc_error_tlp", "Number of transitions to "\ "recovery due to identifying CRC TLP errors.", pcie_perf_counters) \ m(+1, u64, outbound_stalled_reads, "outbound_stalled_reads", \ "The percentage of time within the last second that the NIC had " \ "outbound non-posted read requests but could not perform the " \ "operation due to insufficient non-posted credits.", \ pcie_perf_counters) \ m(+1, u64, outbound_stalled_writes, "outbound_stalled_writes", \ "The percentage of time within the last second that the NIC had " \ "outbound posted writes requests but could not perform the " \ "operation due to insufficient posted credits.", \ pcie_perf_counters) \ m(+1, u64, outbound_stalled_reads_events, \ "outbound_stalled_reads_events", "The number of events where " \ "outbound_stalled_reads was above a threshold.", \ pcie_perf_counters) \ m(+1, u64, outbound_stalled_writes_events, \ "outbound_stalled_writes_events", \ "The number of events where outbound_stalled_writes was above " \ "a threshold.", pcie_perf_counters) #define MLX5E_PCIE_TIMERS_AND_STATES_COUNTERS_32(m) \ m(+1, u64, time_to_boot_image_start, "time_to_boot_image_start", \ "Time from start until FW boot image starts running in usec.", \ pcie_timers_states) \ m(+1, u64, time_to_link_image, "time_to_link_image", \ "Time from start until FW pci_link image starts running in usec.", \ pcie_timers_states) \ m(+1, u64, calibration_time, "calibration_time", \ "Time it took FW to do calibration in usec.", \ pcie_timers_states) \ m(+1, u64, time_to_first_perst, "time_to_first_perst", \ "Time form start until FW handle first perst. in usec.", \ pcie_timers_states) \ m(+1, u64, time_to_detect_state, "time_to_detect_state", \ "Time from start until first transition to LTSSM.Detect_Q in usec", \ pcie_timers_states) \ m(+1, u64, time_to_l0, "time_to_l0", \ "Time from start until first transition to LTSSM.L0 in usec", \ pcie_timers_states) \ m(+1, u64, time_to_crs_en, "time_to_crs_en", \ "Time from start until crs is enabled in usec", \ pcie_timers_states) \ m(+1, u64, time_to_plastic_image_start, "time_to_plastic_image_start",\ "Time form start until FW plastic image starts running in usec.", \ pcie_timers_states) \ m(+1, u64, time_to_iron_image_start, "time_to_iron_image_start", \ "Time form start until FW iron image starts running in usec.", \ pcie_timers_states) \ m(+1, u64, perst_handler, "perst_handler", \ "Number of persts arrived.", pcie_timers_states) \ m(+1, u64, times_in_l1, "times_in_l1", \ "Number of times LTSSM entered L1 flow.", pcie_timers_states) \ m(+1, u64, times_in_l23, "times_in_l23", \ "Number of times LTSSM entered L23 flow.", pcie_timers_states) \ m(+1, u64, dl_down, "dl_down", \ "Number of moves for DL_active to DL_down.", pcie_timers_states) \ m(+1, u64, config_cycle1usec, "config_cycle1usec", \ "Number of configuration requests that firmware " \ "handled in less than 1 usec.", pcie_timers_states) \ m(+1, u64, config_cycle2to7usec, "config_cycle2to7usec", \ "Number of configuration requests that firmware " \ "handled within 2 to 7 usec.", pcie_timers_states) \ m(+1, u64, config_cycle8to15usec, "config_cycle8to15usec", \ "Number of configuration requests that firmware " \ "handled within 8 to 15 usec.", pcie_timers_states) \ m(+1, u64, config_cycle16to63usec, "config_cycle16to63usec", \ "Number of configuration requests that firmware " \ "handled within 16 to 63 usec.", pcie_timers_states) \ m(+1, u64, config_cycle64usec, "config_cycle64usec", \ "Number of configuration requests that firmware " \ "handled took more than 64 usec.", pcie_timers_states) \ m(+1, u64, correctable_err_msg_sent, "correctable_err_msg_sent", \ "Number of correctable error messages sent.", pcie_timers_states) \ m(+1, u64, non_fatal_err_msg_sent, "non_fatal_err_msg_sent", \ "Number of non-Fatal error msg sent.", pcie_timers_states) \ m(+1, u64, fatal_err_msg_sent, "fatal_err_msg_sent", \ "Number of fatal error msg sent.", pcie_timers_states) #define MLX5E_PCIE_LANE_COUNTERS_32(m) \ m(+1, u64, error_counter_lane0, "error_counter_lane0", \ "Error counter for PCI lane 0", pcie_lanes_counters) \ m(+1, u64, error_counter_lane1, "error_counter_lane1", \ "Error counter for PCI lane 1", pcie_lanes_counters) \ m(+1, u64, error_counter_lane2, "error_counter_lane2", \ "Error counter for PCI lane 2", pcie_lanes_counters) \ m(+1, u64, error_counter_lane3, "error_counter_lane3", \ "Error counter for PCI lane 3", pcie_lanes_counters) \ m(+1, u64, error_counter_lane4, "error_counter_lane4", \ "Error counter for PCI lane 4", pcie_lanes_counters) \ m(+1, u64, error_counter_lane5, "error_counter_lane5", \ "Error counter for PCI lane 5", pcie_lanes_counters) \ m(+1, u64, error_counter_lane6, "error_counter_lane6", \ "Error counter for PCI lane 6", pcie_lanes_counters) \ m(+1, u64, error_counter_lane7, "error_counter_lane7", \ "Error counter for PCI lane 7", pcie_lanes_counters) \ m(+1, u64, error_counter_lane8, "error_counter_lane8", \ "Error counter for PCI lane 8", pcie_lanes_counters) \ m(+1, u64, error_counter_lane9, "error_counter_lane9", \ "Error counter for PCI lane 9", pcie_lanes_counters) \ m(+1, u64, error_counter_lane10, "error_counter_lane10", \ "Error counter for PCI lane 10", pcie_lanes_counters) \ m(+1, u64, error_counter_lane11, "error_counter_lane11", \ "Error counter for PCI lane 11", pcie_lanes_counters) \ m(+1, u64, error_counter_lane12, "error_counter_lane12", \ "Error counter for PCI lane 12", pcie_lanes_counters) \ m(+1, u64, error_counter_lane13, "error_counter_lane13", \ "Error counter for PCI lane 13", pcie_lanes_counters) \ m(+1, u64, error_counter_lane14, "error_counter_lane14", \ "Error counter for PCI lane 14", pcie_lanes_counters) \ m(+1, u64, error_counter_lane15, "error_counter_lane15", \ "Error counter for PCI lane 15", pcie_lanes_counters) /* * Make sure to update mlx5e_update_pport_counters() * when adding a new MLX5E_PPORT_STATS block */ #define MLX5E_PPORT_STATS(m) \ MLX5E_PPORT_PER_PRIO_STATS(m) \ MLX5E_PPORT_IEEE802_3_STATS(m) \ MLX5E_PPORT_RFC2819_STATS(m) #define MLX5E_PORT_STATS_DEBUG(m) \ MLX5E_PPORT_RFC2819_STATS_DEBUG(m) \ MLX5E_PPORT_RFC2863_STATS_DEBUG(m) \ MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(m) \ MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG(m) \ MLX5E_PPORT_STATISTICAL_DEBUG(m) \ MLX5E_PCIE_PERFORMANCE_COUNTERS_64(m) \ MLX5E_PCIE_PERFORMANCE_COUNTERS_32(m) \ MLX5E_PCIE_TIMERS_AND_STATES_COUNTERS_32(m) \ MLX5E_PCIE_LANE_COUNTERS_32(m) #define MLX5E_PPORT_IEEE802_3_STATS_NUM \ (0 MLX5E_PPORT_IEEE802_3_STATS(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_RFC2819_STATS_NUM \ (0 MLX5E_PPORT_RFC2819_STATS(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_STATS_NUM \ (0 MLX5E_PPORT_STATS(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_PER_PRIO_STATS_NUM \ (0 MLX5E_PPORT_PER_PRIO_STATS(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM \ (0 MLX5E_PPORT_RFC2819_STATS_DEBUG(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM \ (0 MLX5E_PPORT_RFC2863_STATS_DEBUG(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM \ (0 MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG_NUM \ (0 MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_STATISTICAL_DEBUG_NUM \ (0 MLX5E_PPORT_STATISTICAL_DEBUG(MLX5E_STATS_COUNT)) #define MLX5E_PORT_STATS_DEBUG_NUM \ (0 MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_COUNT)) struct mlx5e_pport_stats { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_PPORT_STATS(MLX5E_STATS_VAR) }; struct mlx5e_port_stats_debug { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_VAR) }; #define MLX5E_RQ_STATS(m) \ m(+1, u64, packets, "packets", "Received packets") \ m(+1, u64, bytes, "bytes", "Received bytes") \ m(+1, u64, csum_none, "csum_none", "Received packets") \ m(+1, u64, lro_packets, "lro_packets", "Received LRO packets") \ m(+1, u64, lro_bytes, "lro_bytes", "Received LRO bytes") \ m(+1, u64, sw_lro_queued, "sw_lro_queued", "Packets queued for SW LRO") \ m(+1, u64, sw_lro_flushed, "sw_lro_flushed", "Packets flushed from SW LRO") \ m(+1, u64, wqe_err, "wqe_err", "Received packets") #define MLX5E_RQ_STATS_NUM (0 MLX5E_RQ_STATS(MLX5E_STATS_COUNT)) struct mlx5e_rq_stats { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_RQ_STATS(MLX5E_STATS_VAR) }; #define MLX5E_SQ_STATS(m) \ m(+1, u64, packets, "packets", "Transmitted packets") \ m(+1, u64, bytes, "bytes", "Transmitted bytes") \ m(+1, u64, tso_packets, "tso_packets", "Transmitted packets") \ m(+1, u64, tso_bytes, "tso_bytes", "Transmitted bytes") \ m(+1, u64, csum_offload_none, "csum_offload_none", "Transmitted packets") \ m(+1, u64, defragged, "defragged", "Transmitted packets") \ m(+1, u64, dropped, "dropped", "Transmitted packets") \ m(+1, u64, enobuf, "enobuf", "Transmitted packets") \ + m(+1, u64, cqe_err, "cqe_err", "Transmit CQE errors") \ m(+1, u64, nop, "nop", "Transmitted packets") #define MLX5E_SQ_STATS_NUM (0 MLX5E_SQ_STATS(MLX5E_STATS_COUNT)) struct mlx5e_sq_stats { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_SQ_STATS(MLX5E_STATS_VAR) }; struct mlx5e_stats { struct mlx5e_vport_stats vport; struct mlx5e_pport_stats pport; struct mlx5e_port_stats_debug port_stats_debug; }; struct mlx5e_rq_param { u32 rqc [MLX5_ST_SZ_DW(rqc)]; struct mlx5_wq_param wq; }; struct mlx5e_sq_param { u32 sqc [MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; }; struct mlx5e_cq_param { u32 cqc [MLX5_ST_SZ_DW(cqc)]; struct mlx5_wq_param wq; }; struct mlx5e_params { u8 log_sq_size; u8 log_rq_size; u16 num_channels; u8 default_vlan_prio; u8 num_tc; u8 rx_cq_moderation_mode; u8 tx_cq_moderation_mode; u16 rx_cq_moderation_usec; u16 rx_cq_moderation_pkts; u16 tx_cq_moderation_usec; u16 tx_cq_moderation_pkts; u16 min_rx_wqes; bool hw_lro_en; bool cqe_zipping_en; u32 lro_wqe_sz; u16 rx_hash_log_tbl_sz; u32 tx_pauseframe_control __aligned(4); u32 rx_pauseframe_control __aligned(4); u16 tx_max_inline; u8 tx_min_inline_mode; u8 tx_priority_flow_control; u8 rx_priority_flow_control; u8 channels_rsss; }; #define MLX5E_PARAMS(m) \ m(+1, u64, tx_queue_size_max, "tx_queue_size_max", "Max send queue size") \ m(+1, u64, rx_queue_size_max, "rx_queue_size_max", "Max receive queue size") \ m(+1, u64, tx_queue_size, "tx_queue_size", "Default send queue size") \ m(+1, u64, rx_queue_size, "rx_queue_size", "Default receive queue size") \ m(+1, u64, channels, "channels", "Default number of channels") \ m(+1, u64, channels_rsss, "channels_rsss", "Default channels receive side scaling stride") \ m(+1, u64, coalesce_usecs_max, "coalesce_usecs_max", "Maximum usecs for joining packets") \ m(+1, u64, coalesce_pkts_max, "coalesce_pkts_max", "Maximum packets to join") \ m(+1, u64, rx_coalesce_usecs, "rx_coalesce_usecs", "Limit in usec for joining rx packets") \ m(+1, u64, rx_coalesce_pkts, "rx_coalesce_pkts", "Maximum number of rx packets to join") \ m(+1, u64, rx_coalesce_mode, "rx_coalesce_mode", "0: EQE fixed mode 1: CQE fixed mode 2: EQE auto mode 3: CQE auto mode") \ m(+1, u64, tx_coalesce_usecs, "tx_coalesce_usecs", "Limit in usec for joining tx packets") \ m(+1, u64, tx_coalesce_pkts, "tx_coalesce_pkts", "Maximum number of tx packets to join") \ m(+1, u64, tx_coalesce_mode, "tx_coalesce_mode", "0: EQE mode 1: CQE mode") \ m(+1, u64, tx_completion_fact, "tx_completion_fact", "1..MAX: Completion event ratio") \ m(+1, u64, tx_completion_fact_max, "tx_completion_fact_max", "Maximum completion event ratio") \ m(+1, u64, hw_lro, "hw_lro", "set to enable hw_lro") \ m(+1, u64, cqe_zipping, "cqe_zipping", "0 : CQE zipping disabled") \ m(+1, u64, modify_tx_dma, "modify_tx_dma", "0: Enable TX 1: Disable TX") \ m(+1, u64, modify_rx_dma, "modify_rx_dma", "0: Enable RX 1: Disable RX") \ m(+1, u64, diag_pci_enable, "diag_pci_enable", "0: Disabled 1: Enabled") \ m(+1, u64, diag_general_enable, "diag_general_enable", "0: Disabled 1: Enabled") \ m(+1, u64, hw_mtu, "hw_mtu", "Current hardware MTU value") \ m(+1, u64, mc_local_lb, "mc_local_lb", "0: Local multicast loopback enabled 1: Disabled") \ m(+1, u64, uc_local_lb, "uc_local_lb", "0: Local unicast loopback enabled 1: Disabled") #define MLX5E_PARAMS_NUM (0 MLX5E_PARAMS(MLX5E_STATS_COUNT)) struct mlx5e_params_ethtool { u64 arg [0]; MLX5E_PARAMS(MLX5E_STATS_VAR) u64 max_bw_value[IEEE_8021QAZ_MAX_TCS]; u8 max_bw_share[IEEE_8021QAZ_MAX_TCS]; u8 prio_tc[MLX5E_MAX_PRIORITY]; u8 dscp2prio[MLX5_MAX_SUPPORTED_DSCP]; u8 trust_state; u8 fec_mask_10x_25x[MLX5E_MAX_FEC_10X_25X]; u16 fec_mask_50x[MLX5E_MAX_FEC_50X]; u8 fec_avail_10x_25x[MLX5E_MAX_FEC_10X_25X]; u16 fec_avail_50x[MLX5E_MAX_FEC_50X]; u32 fec_mode_active; u32 hw_mtu_msb; s32 hw_val_temp[MLX5_MAX_TEMPERATURE]; u32 hw_num_temp; }; struct mlx5e_cq { /* data path - accessed per cqe */ struct mlx5_cqwq wq; /* data path - accessed per HW polling */ struct mlx5_core_cq mcq; /* control */ struct mlx5e_priv *priv; struct mlx5_wq_ctrl wq_ctrl; } __aligned(MLX5E_CACHELINE_SIZE); struct mlx5e_rq_mbuf { bus_dmamap_t dma_map; caddr_t data; struct mbuf *mbuf; }; struct mlx5e_rq { /* persistant fields */ struct mtx mtx; struct mlx5e_rq_stats stats; struct callout watchdog; /* data path */ #define mlx5e_rq_zero_start wq struct mlx5_wq_ll wq; bus_dma_tag_t dma_tag; u32 wqe_sz; u32 nsegs; struct mlx5e_rq_mbuf *mbuf; struct ifnet *ifp; struct mlx5e_cq cq; struct lro_ctrl lro; volatile int enabled; int ix; /* Dynamic Interrupt Moderation */ struct net_dim dim; /* control */ struct mlx5_wq_ctrl wq_ctrl; u32 rqn; struct mlx5e_channel *channel; } __aligned(MLX5E_CACHELINE_SIZE); struct mlx5e_sq_mbuf { bus_dmamap_t dma_map; struct mbuf *mbuf; volatile s32 *p_refcount; /* in use refcount, if any */ u32 num_bytes; u32 num_wqebbs; }; enum { MLX5E_SQ_READY, MLX5E_SQ_FULL }; struct mlx5e_sq { /* persistant fields */ struct mtx lock; struct mtx comp_lock; struct mlx5e_sq_stats stats; struct callout cev_callout; /* data path */ #define mlx5e_sq_zero_start dma_tag bus_dma_tag_t dma_tag; /* dirtied @completion */ u16 cc; /* dirtied @xmit */ u16 pc __aligned(MLX5E_CACHELINE_SIZE); u16 cev_counter; /* completion event counter */ u16 cev_factor; /* completion event factor */ u16 cev_next_state; /* next completion event state */ #define MLX5E_CEV_STATE_INITIAL 0 /* timer not started */ #define MLX5E_CEV_STATE_SEND_NOPS 1 /* send NOPs */ #define MLX5E_CEV_STATE_HOLD_NOPS 2 /* don't send NOPs yet */ u16 running; /* set if SQ is running */ union { u32 d32[2]; u64 d64; } doorbell; struct mlx5e_cq cq; /* pointers to per packet info: write@xmit, read@completion */ struct mlx5e_sq_mbuf *mbuf; /* read only */ struct mlx5_wq_cyc wq; void __iomem *uar_map; struct ifnet *ifp; u32 sqn; u32 mkey_be; u16 max_inline; u8 min_inline_mode; u8 min_insert_caps; #define MLX5E_INSERT_VLAN 1 #define MLX5E_INSERT_NON_VLAN 2 /* control path */ struct mlx5_wq_ctrl wq_ctrl; struct mlx5e_priv *priv; int tc; } __aligned(MLX5E_CACHELINE_SIZE); static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) { u16 cc = sq->cc; u16 pc = sq->pc; return ((sq->wq.sz_m1 & (cc - pc)) >= n || cc == pc); } static inline u32 mlx5e_sq_queue_level(struct mlx5e_sq *sq) { u16 cc; u16 pc; if (sq == NULL) return (0); cc = sq->cc; pc = sq->pc; return (((sq->wq.sz_m1 & (pc - cc)) * IF_SND_QUEUE_LEVEL_MAX) / sq->wq.sz_m1); } struct mlx5e_channel { struct mlx5e_rq rq; struct m_snd_tag tag; struct mlx5e_sq sq[MLX5E_MAX_TX_NUM_TC]; struct mlx5e_priv *priv; struct completion completion; int ix; } __aligned(MLX5E_CACHELINE_SIZE); enum mlx5e_traffic_types { MLX5E_TT_IPV4_TCP, MLX5E_TT_IPV6_TCP, MLX5E_TT_IPV4_UDP, MLX5E_TT_IPV6_UDP, MLX5E_TT_IPV4_IPSEC_AH, MLX5E_TT_IPV6_IPSEC_AH, MLX5E_TT_IPV4_IPSEC_ESP, MLX5E_TT_IPV6_IPSEC_ESP, MLX5E_TT_IPV4, MLX5E_TT_IPV6, MLX5E_TT_ANY, MLX5E_NUM_TT, }; enum { MLX5E_RQT_SPREADING = 0, MLX5E_RQT_DEFAULT_RQ = 1, MLX5E_NUM_RQT = 2, }; struct mlx5_flow_rule; struct mlx5e_eth_addr_info { u8 addr [ETH_ALEN + 2]; u32 tt_vec; /* flow table rule per traffic type */ struct mlx5_flow_rule *ft_rule[MLX5E_NUM_TT]; }; #define MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE) struct mlx5e_eth_addr_hash_node; struct mlx5e_eth_addr_hash_head { struct mlx5e_eth_addr_hash_node *lh_first; }; struct mlx5e_eth_addr_db { struct mlx5e_eth_addr_hash_head if_uc[MLX5E_ETH_ADDR_HASH_SIZE]; struct mlx5e_eth_addr_hash_head if_mc[MLX5E_ETH_ADDR_HASH_SIZE]; struct mlx5e_eth_addr_info broadcast; struct mlx5e_eth_addr_info allmulti; struct mlx5e_eth_addr_info promisc; bool broadcast_enabled; bool allmulti_enabled; bool promisc_enabled; }; enum { MLX5E_STATE_ASYNC_EVENTS_ENABLE, MLX5E_STATE_OPENED, }; enum { MLX5_BW_NO_LIMIT = 0, MLX5_100_MBPS_UNIT = 3, MLX5_GBPS_UNIT = 4, }; struct mlx5e_vlan_db { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct mlx5_flow_rule *active_vlans_ft_rule[VLAN_N_VID]; struct mlx5_flow_rule *untagged_ft_rule; struct mlx5_flow_rule *any_cvlan_ft_rule; struct mlx5_flow_rule *any_svlan_ft_rule; bool filter_disabled; }; struct mlx5e_vxlan_db_el { u_int refcount; u_int proto; u_int port; bool installed; struct mlx5_flow_rule *vxlan_ft_rule; TAILQ_ENTRY(mlx5e_vxlan_db_el) link; }; struct mlx5e_vxlan_db { TAILQ_HEAD(, mlx5e_vxlan_db_el) head; }; struct mlx5e_flow_table { int num_groups; struct mlx5_flow_table *t; struct mlx5_flow_group **g; }; struct mlx5e_flow_tables { struct mlx5_flow_namespace *ns; struct mlx5e_flow_table vlan; struct mlx5e_flow_table vxlan; struct mlx5_flow_rule *vxlan_catchall_ft_rule; struct mlx5e_flow_table main; struct mlx5e_flow_table main_vxlan; struct mlx5_flow_rule *main_vxlan_rule[MLX5E_NUM_TT]; struct mlx5e_flow_table inner_rss; }; struct mlx5e_xmit_args { volatile s32 *pref; u32 tisn; u16 ihs; }; #include "en_rl.h" #include "en_hw_tls.h" #define MLX5E_TSTMP_PREC 10 struct mlx5e_clbr_point { uint64_t base_curr; uint64_t base_prev; uint64_t clbr_hw_prev; uint64_t clbr_hw_curr; u_int clbr_gen; }; struct mlx5e_dcbx { u32 cable_len; u32 xoff; }; struct mlx5e_priv { struct mlx5_core_dev *mdev; /* must be first */ /* priv data path fields - start */ int order_base_2_num_channels; int queue_mapping_channel_mask; int num_tc; int default_vlan_prio; /* priv data path fields - end */ unsigned long state; int gone; #define PRIV_LOCK(priv) sx_xlock(&(priv)->state_lock) #define PRIV_UNLOCK(priv) sx_xunlock(&(priv)->state_lock) #define PRIV_LOCKED(priv) sx_xlocked(&(priv)->state_lock) #define PRIV_ASSERT_LOCKED(priv) sx_assert(&(priv)->state_lock, SA_XLOCKED) struct sx state_lock; /* Protects Interface state */ u32 pdn; u32 tdn; struct mlx5_core_mr mr; u32 tisn[MLX5E_MAX_TX_NUM_TC]; u32 rqtn; u32 tirn[MLX5E_NUM_TT]; u32 tirn_inner_vxlan[MLX5E_NUM_TT]; struct mlx5e_flow_tables fts; struct mlx5e_eth_addr_db eth_addr; struct mlx5e_vlan_db vlan; struct mlx5e_vxlan_db vxlan; struct mlx5e_params params; struct mlx5e_params_ethtool params_ethtool; union mlx5_core_pci_diagnostics params_pci; union mlx5_core_general_diagnostics params_general; struct mtx async_events_mtx; /* sync hw events */ struct work_struct update_stats_work; struct work_struct update_carrier_work; struct work_struct set_rx_mode_work; MLX5_DECLARE_DOORBELL_LOCK(doorbell_lock) struct ifnet *ifp; struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_ifnet; struct sysctl_oid *sysctl_hw; int sysctl_debug; struct mlx5e_stats stats; int counter_set_id; struct workqueue_struct *wq; eventhandler_tag vlan_detach; eventhandler_tag vlan_attach; struct ifmedia media; int media_status_last; int media_active_last; eventhandler_tag vxlan_start; eventhandler_tag vxlan_stop; struct callout watchdog; struct mlx5e_rl_priv_data rl; struct mlx5e_tls tls; struct callout tstmp_clbr; int clbr_done; int clbr_curr; struct mlx5e_clbr_point clbr_points[2]; u_int clbr_gen; struct mlx5e_dcbx dcbx; bool sw_is_port_buf_owner; struct mlx5_sq_bfreg bfreg; struct pfil_head *pfil; struct mlx5e_channel channel[]; }; #define MLX5E_NET_IP_ALIGN 2 struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_eth_seg eth; }; struct mlx5e_tx_umr_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_umr_ctrl_seg umr; uint8_t mkc[64]; }; struct mlx5e_tx_psv_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_seg_set_psv psv; }; struct mlx5e_rx_wqe { struct mlx5_wqe_srq_next_seg next; struct mlx5_wqe_data_seg data[]; }; /* the size of the structure above must be power of two */ CTASSERT(powerof2(sizeof(struct mlx5e_rx_wqe))); struct mlx5e_eeprom { int lock_bit; int i2c_addr; int page_num; int device_addr; int module_num; int len; int type; int page_valid; u32 *data; }; #define MLX5E_FLD_MAX(typ, fld) ((1ULL << __mlx5_bit_sz(typ, fld)) - 1ULL) bool mlx5e_do_send_cqe(struct mlx5e_sq *); int mlx5e_get_full_header_size(const struct mbuf *, const struct tcphdr **); int mlx5e_xmit(struct ifnet *, struct mbuf *); int mlx5e_open_locked(struct ifnet *); int mlx5e_close_locked(struct ifnet *); void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event); mlx5e_cq_comp_t mlx5e_rx_cq_comp; mlx5e_cq_comp_t mlx5e_tx_cq_comp; struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); void mlx5e_dim_work(struct work_struct *); void mlx5e_dim_build_cq_param(struct mlx5e_priv *, struct mlx5e_cq_param *); int mlx5e_open_flow_table(struct mlx5e_priv *priv); void mlx5e_close_flow_table(struct mlx5e_priv *priv); void mlx5e_set_rx_mode_core(struct mlx5e_priv *priv); void mlx5e_set_rx_mode_work(struct work_struct *work); void mlx5e_vlan_rx_add_vid(void *, struct ifnet *, u16); void mlx5e_vlan_rx_kill_vid(void *, struct ifnet *, u16); void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); int mlx5e_add_all_vlan_rules(struct mlx5e_priv *priv); void mlx5e_del_all_vlan_rules(struct mlx5e_priv *priv); void mlx5e_vxlan_start(void *arg, struct ifnet *ifp, sa_family_t family, u_int port); void mlx5e_vxlan_stop(void *arg, struct ifnet *ifp, sa_family_t family, u_int port); int mlx5e_add_all_vxlan_rules(struct mlx5e_priv *priv); void mlx5e_del_all_vxlan_rules(struct mlx5e_priv *priv); static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, u32 *wqe) { /* ensure wqe is visible to device before updating doorbell record */ wmb(); *sq->wq.db = cpu_to_be32(sq->pc); /* * Ensure the doorbell record is visible to device before ringing * the doorbell: */ wmb(); mlx5_write64(wqe, sq->uar_map, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock)); } static inline void mlx5e_cq_arm(struct mlx5e_cq *cq, spinlock_t *dblock) { struct mlx5_core_cq *mcq; mcq = &cq->mcq; mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, dblock, cq->wq.cc); } #define mlx5e_dbg(_IGN, _priv, ...) mlx5_core_dbg((_priv)->mdev, __VA_ARGS__) extern const struct ethtool_ops mlx5e_ethtool_ops; void mlx5e_create_ethtool(struct mlx5e_priv *); void mlx5e_create_stats(struct sysctl_ctx_list *, struct sysctl_oid_list *, const char *, const char **, unsigned, u64 *); void mlx5e_create_counter_stats(struct sysctl_ctx_list *, struct sysctl_oid_list *, const char *, const char **, unsigned, counter_u64_t *); void mlx5e_send_nop(struct mlx5e_sq *, u32); int mlx5e_sq_dump_xmit(struct mlx5e_sq *, struct mlx5e_xmit_args *, struct mbuf **); int mlx5e_sq_xmit(struct mlx5e_sq *, struct mbuf **); void mlx5e_sq_cev_timeout(void *); int mlx5e_refresh_channel_params(struct mlx5e_priv *); int mlx5e_open_cq(struct mlx5e_priv *, struct mlx5e_cq_param *, struct mlx5e_cq *, mlx5e_cq_comp_t *, int eq_ix); void mlx5e_close_cq(struct mlx5e_cq *); void mlx5e_free_sq_db(struct mlx5e_sq *); int mlx5e_alloc_sq_db(struct mlx5e_sq *); int mlx5e_enable_sq(struct mlx5e_sq *, struct mlx5e_sq_param *, int tis_num); int mlx5e_modify_sq(struct mlx5e_sq *, int curr_state, int next_state); void mlx5e_disable_sq(struct mlx5e_sq *); void mlx5e_drain_sq(struct mlx5e_sq *); void mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value); void mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value); void mlx5e_resume_sq(struct mlx5e_sq *sq); void mlx5e_update_sq_inline(struct mlx5e_sq *sq); void mlx5e_refresh_sq_inline(struct mlx5e_priv *priv); int mlx5e_update_buf_lossy(struct mlx5e_priv *priv); int mlx5e_fec_update(struct mlx5e_priv *priv); int mlx5e_hw_temperature_update(struct mlx5e_priv *priv); if_snd_tag_alloc_t mlx5e_ul_snd_tag_alloc; if_snd_tag_modify_t mlx5e_ul_snd_tag_modify; if_snd_tag_query_t mlx5e_ul_snd_tag_query; if_snd_tag_free_t mlx5e_ul_snd_tag_free; #endif /* _MLX5_EN_H_ */ diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c index 437910ee7964..753b7ea20e5b 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c @@ -1,1167 +1,1185 @@ /*- * Copyright (c) 2015-2019 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_kern_tls.h" #include "en.h" #include static inline bool mlx5e_do_send_cqe_inline(struct mlx5e_sq *sq) { sq->cev_counter++; /* interleave the CQEs */ if (sq->cev_counter >= sq->cev_factor) { sq->cev_counter = 0; return (true); } return (false); } bool mlx5e_do_send_cqe(struct mlx5e_sq *sq) { return (mlx5e_do_send_cqe_inline(sq)); } void mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt) { u16 pi = sq->pc & sq->wq.sz_m1; struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); if (mlx5e_do_send_cqe_inline(sq)) wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; else wqe->ctrl.fm_ce_se = 0; /* Copy data for doorbell */ memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); sq->mbuf[pi].mbuf = NULL; sq->mbuf[pi].num_bytes = 0; sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->pc += sq->mbuf[pi].num_wqebbs; } #if (__FreeBSD_version >= 1100000) static uint32_t mlx5e_hash_value; static void mlx5e_hash_init(void *arg) { mlx5e_hash_value = m_ether_tcpip_hash_init(); } /* Make kernel call mlx5e_hash_init after the random stack finished initializing */ SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL); #endif static struct mlx5e_sq * mlx5e_select_queue_by_send_tag(struct ifnet *ifp, struct mbuf *mb) { struct m_snd_tag *mb_tag; struct mlx5e_sq *sq; mb_tag = mb->m_pkthdr.snd_tag; #ifdef KERN_TLS top: #endif /* get pointer to sendqueue */ switch (mb_tag->type) { #ifdef RATELIMIT case IF_SND_TAG_TYPE_RATE_LIMIT: sq = container_of(mb_tag, struct mlx5e_rl_channel, tag)->sq; break; #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag; goto top; #endif #endif case IF_SND_TAG_TYPE_UNLIMITED: sq = &container_of(mb_tag, struct mlx5e_channel, tag)->sq[0]; KASSERT((mb_tag->refcount > 0), ("mlx5e_select_queue: Channel refs are zero for unlimited tag")); break; #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS: mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag; goto top; #endif default: sq = NULL; break; } /* check if valid */ if (sq != NULL && READ_ONCE(sq->running) != 0) return (sq); return (NULL); } static struct mlx5e_sq * mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb) { struct mlx5e_priv *priv = ifp->if_softc; struct mlx5e_sq *sq; u32 ch; u32 tc; /* obtain VLAN information if present */ if (mb->m_flags & M_VLANTAG) { tc = (mb->m_pkthdr.ether_vtag >> 13); if (tc >= priv->num_tc) tc = priv->default_vlan_prio; } else { tc = priv->default_vlan_prio; } ch = priv->params.num_channels; /* check if flowid is set */ if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) { #ifdef RSS u32 temp; if (rss_hash2bucket(mb->m_pkthdr.flowid, M_HASHTYPE_GET(mb), &temp) == 0) ch = temp % ch; else #endif ch = (mb->m_pkthdr.flowid % 128) % ch; } else { #if (__FreeBSD_version >= 1100000) ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 | MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch; #else /* * m_ether_tcpip_hash not present in stable, so just * throw unhashed mbufs on queue 0 */ ch = 0; #endif } /* check if send queue is running */ sq = &priv->channel[ch].sq[tc]; if (likely(READ_ONCE(sq->running) != 0)) return (sq); return (NULL); } static inline u16 mlx5e_get_l2_header_size(struct mlx5e_sq *sq, struct mbuf *mb) { struct ether_vlan_header *eh; uint16_t eth_type; int min_inline; eh = mtod(mb, struct ether_vlan_header *); if (unlikely(mb->m_len < ETHER_HDR_LEN)) { goto max_inline; } else if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))) goto max_inline; eth_type = ntohs(eh->evl_proto); min_inline = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { eth_type = ntohs(eh->evl_encap_proto); min_inline = ETHER_HDR_LEN; } switch (eth_type) { case ETHERTYPE_IP: case ETHERTYPE_IPV6: /* * Make sure the TOS(IPv4) or traffic class(IPv6) * field gets inlined. Else the SQ may stall. */ min_inline += 4; break; default: goto max_inline; } /* * m_copydata() will be used on the remaining header which * does not need to reside within the first m_len bytes of * data: */ if (mb->m_pkthdr.len < min_inline) goto max_inline; return (min_inline); max_inline: return (MIN(mb->m_pkthdr.len, sq->max_inline)); } /* * This function parse IPv4 and IPv6 packets looking for TCP and UDP * headers. * * Upon return the pointer at which the "ppth" argument points, is set * to the location of the TCP header. NULL is used if no TCP header is * present. * * The return value indicates the number of bytes from the beginning * of the packet until the first byte after the TCP or UDP header. If * this function returns zero, the parsing failed. */ int mlx5e_get_full_header_size(const struct mbuf *mb, const struct tcphdr **ppth) { const struct ether_vlan_header *eh; const struct tcphdr *th; const struct ip *ip; int ip_hlen, tcp_hlen; const struct ip6_hdr *ip6; uint16_t eth_type; int eth_hdr_len; eh = mtod(mb, const struct ether_vlan_header *); if (unlikely(mb->m_len < ETHER_HDR_LEN)) goto failure; if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)) goto failure; eth_type = ntohs(eh->evl_proto); eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { eth_type = ntohs(eh->evl_encap_proto); eth_hdr_len = ETHER_HDR_LEN; } switch (eth_type) { case ETHERTYPE_IP: ip = (const struct ip *)(mb->m_data + eth_hdr_len); if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip))) goto failure; switch (ip->ip_p) { case IPPROTO_TCP: ip_hlen = ip->ip_hl << 2; eth_hdr_len += ip_hlen; goto tcp_packet; case IPPROTO_UDP: ip_hlen = ip->ip_hl << 2; eth_hdr_len += ip_hlen + sizeof(struct udphdr); th = NULL; goto udp_packet; default: goto failure; } break; case ETHERTYPE_IPV6: ip6 = (const struct ip6_hdr *)(mb->m_data + eth_hdr_len); if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip6))) goto failure; switch (ip6->ip6_nxt) { case IPPROTO_TCP: eth_hdr_len += sizeof(*ip6); goto tcp_packet; case IPPROTO_UDP: eth_hdr_len += sizeof(*ip6) + sizeof(struct udphdr); th = NULL; goto udp_packet; default: goto failure; } break; default: goto failure; } tcp_packet: if (unlikely(mb->m_len < eth_hdr_len + sizeof(*th))) { const struct mbuf *m_th = mb->m_next; if (unlikely(mb->m_len != eth_hdr_len || m_th == NULL || m_th->m_len < sizeof(*th))) goto failure; th = (const struct tcphdr *)(m_th->m_data); } else { th = (const struct tcphdr *)(mb->m_data + eth_hdr_len); } tcp_hlen = th->th_off << 2; eth_hdr_len += tcp_hlen; udp_packet: /* * m_copydata() will be used on the remaining header which * does not need to reside within the first m_len bytes of * data: */ if (unlikely(mb->m_pkthdr.len < eth_hdr_len)) goto failure; if (ppth != NULL) *ppth = th; return (eth_hdr_len); failure: if (ppth != NULL) *ppth = NULL; return (0); } /* * Locate a pointer inside a mbuf chain. Returns NULL upon failure. */ static inline void * mlx5e_parse_mbuf_chain(const struct mbuf **mb, int *poffset, int eth_hdr_len, int min_len) { if (unlikely(mb[0]->m_len == eth_hdr_len)) { poffset[0] = eth_hdr_len; if (unlikely((mb[0] = mb[0]->m_next) == NULL)) return (NULL); } if (unlikely(mb[0]->m_len < eth_hdr_len - poffset[0] + min_len)) return (NULL); return (mb[0]->m_data + eth_hdr_len - poffset[0]); } /* * This function parse IPv4 and IPv6 packets looking for UDP, VXLAN * and TCP headers. * * The return value indicates the number of bytes from the beginning * of the packet until the first byte after the TCP header. If this * function returns zero, the parsing failed. */ static int mlx5e_get_vxlan_header_size(const struct mbuf *mb, struct mlx5e_tx_wqe *wqe, uint8_t cs_mask, uint8_t opcode) { const struct ether_vlan_header *eh; struct ip *ip4; struct ip6_hdr *ip6; struct tcphdr *th; struct udphdr *udp; bool has_outer_vlan_tag; uint16_t eth_type; uint8_t ip_type; int pkt_hdr_len; int eth_hdr_len; int tcp_hlen; int ip_hlen; int offset; pkt_hdr_len = mb->m_pkthdr.len; has_outer_vlan_tag = (mb->m_flags & M_VLANTAG) != 0; offset = 0; eh = mtod(mb, const struct ether_vlan_header *); if (unlikely(mb->m_len < ETHER_HDR_LEN)) return (0); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)) return (0); eth_type = eh->evl_proto; eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { eth_type = eh->evl_encap_proto; eth_hdr_len = ETHER_HDR_LEN; } switch (eth_type) { case htons(ETHERTYPE_IP): ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, sizeof(*ip4)); if (unlikely(ip4 == NULL)) return (0); ip_type = ip4->ip_p; if (unlikely(ip_type != IPPROTO_UDP)) return (0); wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2; wqe->eth.cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; ip_hlen = ip4->ip_hl << 2; eth_hdr_len += ip_hlen; udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, sizeof(*udp)); if (unlikely(udp == NULL)) return (0); wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2; wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE; eth_hdr_len += sizeof(*udp); break; case htons(ETHERTYPE_IPV6): ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, sizeof(*ip6)); if (unlikely(ip6 == NULL)) return (0); ip_type = ip6->ip6_nxt; if (unlikely(ip_type != IPPROTO_UDP)) return (0); wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2; wqe->eth.cs_flags = MLX5_ETH_WQE_L4_CSUM; eth_hdr_len += sizeof(*ip6); udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, sizeof(*udp)); if (unlikely(udp == NULL)) return (0); wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2; wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE | MLX5_ETH_WQE_SWP_OUTER_L3_TYPE; eth_hdr_len += sizeof(*udp); break; default: return (0); } /* * If the hardware is not computing inner IP checksum, then * skip inlining the inner outer UDP and VXLAN header: */ if (unlikely((cs_mask & MLX5_ETH_WQE_L3_INNER_CSUM) == 0)) goto done; if (unlikely(mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, 8) == NULL)) return (0); eth_hdr_len += 8; /* Check for ethernet header again. */ eh = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, ETHER_HDR_LEN); if (unlikely(eh == NULL)) return (0); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { if (unlikely(mb->m_len < eth_hdr_len - offset + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)) return (0); eth_type = eh->evl_proto; eth_hdr_len += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { eth_type = eh->evl_encap_proto; eth_hdr_len += ETHER_HDR_LEN; } /* Check for IP header again. */ switch (eth_type) { case htons(ETHERTYPE_IP): ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, sizeof(*ip4)); if (unlikely(ip4 == NULL)) return (0); wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2; wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM; ip_type = ip4->ip_p; ip_hlen = ip4->ip_hl << 2; eth_hdr_len += ip_hlen; break; case htons(ETHERTYPE_IPV6): ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, sizeof(*ip6)); if (unlikely(ip6 == NULL)) return (0); wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2; wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_TYPE; ip_type = ip6->ip6_nxt; eth_hdr_len += sizeof(*ip6); break; default: return (0); } /* * If the hardware is not computing inner UDP/TCP checksum, * then skip inlining the inner UDP/TCP header: */ if (unlikely((cs_mask & MLX5_ETH_WQE_L4_INNER_CSUM) == 0)) goto done; switch (ip_type) { case IPPROTO_UDP: udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, sizeof(*udp)); if (unlikely(udp == NULL)) return (0); wqe->eth.swp_inner_l4_offset = (eth_hdr_len / 2); wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_TYPE; eth_hdr_len += sizeof(*udp); break; case IPPROTO_TCP: th = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, sizeof(*th)); if (unlikely(th == NULL)) return (0); wqe->eth.swp_inner_l4_offset = eth_hdr_len / 2; wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_TYPE; tcp_hlen = th->th_off << 2; eth_hdr_len += tcp_hlen; break; default: return (0); } done: if (unlikely(pkt_hdr_len < eth_hdr_len)) return (0); /* Account for software inserted VLAN tag, if any. */ if (unlikely(has_outer_vlan_tag)) { wqe->eth.swp_outer_l3_offset += ETHER_VLAN_ENCAP_LEN / 2; wqe->eth.swp_outer_l4_offset += ETHER_VLAN_ENCAP_LEN / 2; wqe->eth.swp_inner_l3_offset += ETHER_VLAN_ENCAP_LEN / 2; wqe->eth.swp_inner_l4_offset += ETHER_VLAN_ENCAP_LEN / 2; } /* * When inner checksums are set, outer L4 checksum flag must * be disabled. */ if (wqe->eth.cs_flags & (MLX5_ETH_WQE_L3_INNER_CSUM | MLX5_ETH_WQE_L4_INNER_CSUM)) wqe->eth.cs_flags &= ~MLX5_ETH_WQE_L4_CSUM; return (eth_hdr_len); } struct mlx5_wqe_dump_seg { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_data_seg data; } __aligned(MLX5_SEND_WQE_BB); CTASSERT(DIV_ROUND_UP(2, MLX5_SEND_WQEBB_NUM_DS) == 1); int mlx5e_sq_dump_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf **mbp) { bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; struct mlx5_wqe_dump_seg *wqe; struct mlx5_wqe_dump_seg *wqe_last; int nsegs; int xsegs; u32 off; u32 msb; int err; int x; struct mbuf *mb; const u32 ds_cnt = 2; u16 pi; const u8 opcode = MLX5_OPCODE_DUMP; /* get pointer to mbuf */ mb = *mbp; /* get producer index */ pi = sq->pc & sq->wq.sz_m1; sq->mbuf[pi].num_bytes = mb->m_pkthdr.len; sq->mbuf[pi].num_wqebbs = 0; /* check number of segments in mbuf */ err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); if (err == EFBIG) { /* update statistics */ sq->stats.defragged++; /* too many mbuf fragments */ mb = m_defrag(*mbp, M_NOWAIT); if (mb == NULL) { mb = *mbp; goto tx_drop; } /* try again */ err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); } if (err != 0) goto tx_drop; /* make sure all mbuf data, if any, is visible to the bus */ bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, BUS_DMASYNC_PREWRITE); /* compute number of real DUMP segments */ msb = sq->priv->params_ethtool.hw_mtu_msb; for (x = xsegs = 0; x != nsegs; x++) xsegs += howmany((u32)segs[x].ds_len, msb); /* check if there are no segments */ if (unlikely(xsegs == 0)) { bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); m_freem(mb); *mbp = NULL; /* safety clear */ return (0); } /* return ENOBUFS if the queue is full */ if (unlikely(!mlx5e_sq_has_room_for(sq, xsegs))) { sq->stats.enobuf++; bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); m_freem(mb); *mbp = NULL; /* safety clear */ return (ENOBUFS); } wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, sq->wq.sz_m1); for (x = 0; x != nsegs; x++) { for (off = 0; off < segs[x].ds_len; off += msb) { u32 len = segs[x].ds_len - off; /* limit length */ if (likely(len > msb)) len = msb; memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); /* fill control segment */ wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); wqe->ctrl.imm = cpu_to_be32(parg->tisn << 8); /* fill data segment */ wqe->data.addr = cpu_to_be64((uint64_t)segs[x].ds_addr + off); wqe->data.lkey = sq->mkey_be; wqe->data.byte_count = cpu_to_be32(len); /* advance to next building block */ if (unlikely(wqe == wqe_last)) wqe = mlx5_wq_cyc_get_wqe(&sq->wq, 0); else wqe++; sq->mbuf[pi].num_wqebbs++; sq->pc++; } } wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, (sq->pc - 1) & sq->wq.sz_m1); /* put in place data fence */ wqe->ctrl.fm_ce_se |= MLX5_FENCE_MODE_INITIATOR_SMALL; /* check if we should generate a completion event */ if (mlx5e_do_send_cqe_inline(sq)) wqe_last->ctrl.fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; /* copy data for doorbell */ memcpy(sq->doorbell.d32, wqe_last, sizeof(sq->doorbell.d32)); /* store pointer to mbuf */ sq->mbuf[pi].mbuf = mb; sq->mbuf[pi].p_refcount = parg->pref; atomic_add_int(parg->pref, 1); /* count all traffic going out */ sq->stats.packets++; sq->stats.bytes += sq->mbuf[pi].num_bytes; *mbp = NULL; /* safety clear */ return (0); tx_drop: sq->stats.dropped++; *mbp = NULL; m_freem(mb); return err; } int mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) { bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; struct mlx5e_xmit_args args = {}; struct mlx5_wqe_data_seg *dseg; struct mlx5e_tx_wqe *wqe; struct ifnet *ifp; int nsegs; int err; int x; struct mbuf *mb; u16 ds_cnt; u16 pi; u8 opcode; #ifdef KERN_TLS top: #endif /* Return ENOBUFS if the queue is full */ if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) { sq->stats.enobuf++; return (ENOBUFS); } /* Align SQ edge with NOPs to avoid WQE wrap around */ pi = ((~sq->pc) & sq->wq.sz_m1); if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { /* Send one multi NOP message instead of many */ mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS); pi = ((~sq->pc) & sq->wq.sz_m1); if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { sq->stats.enobuf++; return (ENOMEM); } } #ifdef KERN_TLS /* Special handling for TLS packets, if any */ switch (mlx5e_sq_tls_xmit(sq, &args, mbp)) { case MLX5E_TLS_LOOP: goto top; case MLX5E_TLS_FAILURE: mb = *mbp; err = ENOMEM; goto tx_drop; case MLX5E_TLS_DEFERRED: return (0); case MLX5E_TLS_CONTINUE: default: break; } #endif /* Setup local variables */ pi = sq->pc & sq->wq.sz_m1; wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); ifp = sq->ifp; memset(wqe, 0, sizeof(*wqe)); /* get pointer to mbuf */ mb = *mbp; /* Send a copy of the frame to the BPF listener, if any */ if (ifp != NULL && ifp->if_bpf != NULL) ETHER_BPF_MTAP(ifp, mb); if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) { wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM; } if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) { wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM; } if (wqe->eth.cs_flags == 0) { sq->stats.csum_offload_none++; } if (mb->m_pkthdr.csum_flags & CSUM_TSO) { u32 payload_len; u32 mss = mb->m_pkthdr.tso_segsz; u32 num_pkts; wqe->eth.mss = cpu_to_be16(mss); opcode = MLX5_OPCODE_LSO; if (args.ihs == 0) args.ihs = mlx5e_get_full_header_size(mb, NULL); if (unlikely(args.ihs == 0)) { err = EINVAL; goto tx_drop; } payload_len = mb->m_pkthdr.len - args.ihs; if (payload_len == 0) num_pkts = 1; else num_pkts = DIV_ROUND_UP(payload_len, mss); sq->mbuf[pi].num_bytes = payload_len + (num_pkts * args.ihs); sq->stats.tso_packets++; sq->stats.tso_bytes += payload_len; } else if (mb->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN) { /* check for inner TCP TSO first */ if (mb->m_pkthdr.csum_flags & (CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO)) { u32 payload_len; u32 mss = mb->m_pkthdr.tso_segsz; u32 num_pkts; wqe->eth.mss = cpu_to_be16(mss); opcode = MLX5_OPCODE_LSO; if (likely(args.ihs == 0)) { args.ihs = mlx5e_get_vxlan_header_size(mb, wqe, MLX5_ETH_WQE_L3_INNER_CSUM | MLX5_ETH_WQE_L4_INNER_CSUM | MLX5_ETH_WQE_L4_CSUM | MLX5_ETH_WQE_L3_CSUM, opcode); if (unlikely(args.ihs == 0)) { err = EINVAL; goto tx_drop; } } payload_len = mb->m_pkthdr.len - args.ihs; if (payload_len == 0) num_pkts = 1; else num_pkts = DIV_ROUND_UP(payload_len, mss); sq->mbuf[pi].num_bytes = payload_len + num_pkts * args.ihs; sq->stats.tso_packets++; sq->stats.tso_bytes += payload_len; } else { opcode = MLX5_OPCODE_SEND; if (likely(args.ihs == 0)) { uint8_t cs_mask; if (mb->m_pkthdr.csum_flags & (CSUM_INNER_IP_TCP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_UDP)) { cs_mask = MLX5_ETH_WQE_L3_INNER_CSUM | MLX5_ETH_WQE_L4_INNER_CSUM | MLX5_ETH_WQE_L4_CSUM | MLX5_ETH_WQE_L3_CSUM; } else if (mb->m_pkthdr.csum_flags & CSUM_INNER_IP) { cs_mask = MLX5_ETH_WQE_L3_INNER_CSUM | MLX5_ETH_WQE_L4_CSUM | MLX5_ETH_WQE_L3_CSUM; } else { cs_mask = MLX5_ETH_WQE_L4_CSUM | MLX5_ETH_WQE_L3_CSUM; } args.ihs = mlx5e_get_vxlan_header_size(mb, wqe, cs_mask, opcode); if (unlikely(args.ihs == 0)) { err = EINVAL; goto tx_drop; } } sq->mbuf[pi].num_bytes = max_t (unsigned int, mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN); } } else { opcode = MLX5_OPCODE_SEND; if (args.ihs == 0) { switch (sq->min_inline_mode) { case MLX5_INLINE_MODE_IP: case MLX5_INLINE_MODE_TCP_UDP: args.ihs = mlx5e_get_full_header_size(mb, NULL); if (unlikely(args.ihs == 0)) args.ihs = mlx5e_get_l2_header_size(sq, mb); break; case MLX5_INLINE_MODE_L2: args.ihs = mlx5e_get_l2_header_size(sq, mb); break; case MLX5_INLINE_MODE_NONE: /* FALLTHROUGH */ default: if ((mb->m_flags & M_VLANTAG) != 0 && (sq->min_insert_caps & MLX5E_INSERT_VLAN) != 0) { /* inlining VLAN data is not required */ wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */ wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag); args.ihs = 0; } else if ((mb->m_flags & M_VLANTAG) == 0 && (sq->min_insert_caps & MLX5E_INSERT_NON_VLAN) != 0) { /* inlining non-VLAN data is not required */ args.ihs = 0; } else { /* we are forced to inlining L2 header, if any */ args.ihs = mlx5e_get_l2_header_size(sq, mb); } break; } } sq->mbuf[pi].num_bytes = max_t (unsigned int, mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN); } if (likely(args.ihs == 0)) { /* nothing to inline */ } else if ((mb->m_flags & M_VLANTAG) != 0) { struct ether_vlan_header *eh = (struct ether_vlan_header *) wqe->eth.inline_hdr_start; /* Range checks */ if (unlikely(args.ihs > (sq->max_inline - ETHER_VLAN_ENCAP_LEN))) { if (mb->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_ENCAP_VXLAN)) { err = EINVAL; goto tx_drop; } args.ihs = (sq->max_inline - ETHER_VLAN_ENCAP_LEN); } else if (unlikely(args.ihs < ETHER_HDR_LEN)) { err = EINVAL; goto tx_drop; } m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh); m_adj(mb, ETHER_HDR_LEN); /* Insert 4 bytes VLAN tag into data stream */ eh->evl_proto = eh->evl_encap_proto; eh->evl_encap_proto = htons(ETHERTYPE_VLAN); eh->evl_tag = htons(mb->m_pkthdr.ether_vtag); /* Copy rest of header data, if any */ m_copydata(mb, 0, args.ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1)); m_adj(mb, args.ihs - ETHER_HDR_LEN); /* Extend header by 4 bytes */ args.ihs += ETHER_VLAN_ENCAP_LEN; wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs); } else { /* check if inline header size is too big */ if (unlikely(args.ihs > sq->max_inline)) { if (unlikely(mb->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_ENCAP_VXLAN))) { err = EINVAL; goto tx_drop; } args.ihs = sq->max_inline; } m_copydata(mb, 0, args.ihs, wqe->eth.inline_hdr_start); m_adj(mb, args.ihs); wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs); } ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; if (args.ihs > sizeof(wqe->eth.inline_hdr_start)) { ds_cnt += DIV_ROUND_UP(args.ihs - sizeof(wqe->eth.inline_hdr_start), MLX5_SEND_WQE_DS); } dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt; err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); if (err == EFBIG) { /* Update statistics */ sq->stats.defragged++; /* Too many mbuf fragments */ mb = m_defrag(*mbp, M_NOWAIT); if (mb == NULL) { mb = *mbp; goto tx_drop; } /* Try again */ err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); } /* Catch errors */ if (err != 0) goto tx_drop; /* Make sure all mbuf data, if any, is visible to the bus */ if (nsegs != 0) { bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, BUS_DMASYNC_PREWRITE); } else { /* All data was inlined, free the mbuf. */ bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); m_freem(mb); mb = NULL; } for (x = 0; x != nsegs; x++) { if (segs[x].ds_len == 0) continue; dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr); dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len); dseg++; } ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl)); wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); wqe->ctrl.imm = cpu_to_be32(args.tisn << 8); if (mlx5e_do_send_cqe_inline(sq)) wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; else wqe->ctrl.fm_ce_se = 0; /* Copy data for doorbell */ memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); /* Store pointer to mbuf */ sq->mbuf[pi].mbuf = mb; sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->mbuf[pi].p_refcount = args.pref; if (unlikely(args.pref != NULL)) atomic_add_int(args.pref, 1); sq->pc += sq->mbuf[pi].num_wqebbs; /* Count all traffic going out */ sq->stats.packets++; sq->stats.bytes += sq->mbuf[pi].num_bytes; *mbp = NULL; /* safety clear */ return (0); tx_drop: sq->stats.dropped++; *mbp = NULL; m_freem(mb); return err; } static void mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget) { u16 sqcc; /* * sq->cc must be updated only after mlx5_cqwq_update_db_record(), * otherwise a cq overrun may occur */ sqcc = sq->cc; while (budget > 0) { struct mlx5_cqe64 *cqe; struct mbuf *mb; + bool match; + u16 sqcc_this; + u16 delta; u16 x; u16 ci; cqe = mlx5e_get_cqe(&sq->cq); if (!cqe) break; mlx5_cqwq_pop(&sq->cq.wq); + /* check if the completion event indicates an error */ + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) + sq->stats.cqe_err++; + + /* setup local variables */ + sqcc_this = be16toh(cqe->wqe_counter); + match = false; + /* update budget according to the event factor */ budget -= sq->cev_factor; - for (x = 0; x != sq->cev_factor; x++) { + for (x = 0;; x++) { + if (unlikely(match != false)) { + break; + } else if (unlikely(x == sq->cev_factor)) { + /* WQE counter match not found */ + sq->stats.cqe_err++; + break; + } ci = sqcc & sq->wq.sz_m1; + delta = sqcc_this - sqcc; + match = (delta < sq->mbuf[ci].num_wqebbs); mb = sq->mbuf[ci].mbuf; sq->mbuf[ci].mbuf = NULL; if (unlikely(sq->mbuf[ci].p_refcount != NULL)) { atomic_add_int(sq->mbuf[ci].p_refcount, -1); sq->mbuf[ci].p_refcount = NULL; } if (mb == NULL) { - if (sq->mbuf[ci].num_bytes == 0) { - /* NOP */ + if (unlikely(sq->mbuf[ci].num_bytes == 0)) sq->stats.nop++; - } } else { bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map); /* Free transmitted mbuf */ m_freem(mb); } sqcc += sq->mbuf[ci].num_wqebbs; } } mlx5_cqwq_update_db_record(&sq->cq.wq); /* Ensure cq space is freed before enabling more cqes */ atomic_thread_fence_rel(); sq->cc = sqcc; } static int mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb) { int err = 0; if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || READ_ONCE(sq->running) == 0)) { m_freem(mb); return (ENETDOWN); } /* Do transmit */ if (mlx5e_sq_xmit(sq, &mb) != 0) { /* NOTE: m_freem() is NULL safe */ m_freem(mb); err = ENOBUFS; } /* Check if we need to write the doorbell */ if (likely(sq->doorbell.d64 != 0)) { mlx5e_tx_notify_hw(sq, sq->doorbell.d32); sq->doorbell.d64 = 0; } /* * Check if we need to start the event timer which flushes the * transmit ring on timeout: */ if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL && sq->cev_factor != 1)) { /* start the timer */ mlx5e_sq_cev_timeout(sq); } else { /* don't send NOPs yet */ sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS; } return (err); } int mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb) { struct mlx5e_sq *sq; int ret; if (mb->m_pkthdr.csum_flags & CSUM_SND_TAG) { MPASS(mb->m_pkthdr.snd_tag->ifp == ifp); sq = mlx5e_select_queue_by_send_tag(ifp, mb); if (unlikely(sq == NULL)) { goto select_queue; } } else { select_queue: sq = mlx5e_select_queue(ifp, mb); if (unlikely(sq == NULL)) { /* Free mbuf */ m_freem(mb); /* Invalid send queue */ return (ENXIO); } } mtx_lock(&sq->lock); ret = mlx5e_xmit_locked(ifp, sq, mb); mtx_unlock(&sq->lock); return (ret); } void mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused) { struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq); mtx_lock(&sq->comp_lock); mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX); mlx5e_cq_arm(&sq->cq, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock)); mtx_unlock(&sq->comp_lock); }