Index: head/sys/dev/mlx5/mlx5_en/en.h =================================================================== --- head/sys/dev/mlx5/mlx5_en/en.h (revision 341583) +++ head/sys/dev/mlx5/mlx5_en/en.h (revision 341584) @@ -1,933 +1,935 @@ /*- * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MLX5_EN_H_ #define _MLX5_EN_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_rss.h" #ifdef RSS #include #include #endif #include #include #include #include #include #include #include #include #include #include #define IEEE_8021QAZ_MAX_TCS 8 #define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x7 #define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa #define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xe #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE 0x7 #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xe #define MLX5E_MAX_RX_SEGS 7 #ifndef MLX5E_MAX_RX_BYTES #define MLX5E_MAX_RX_BYTES MCLBYTES #endif #if (MLX5E_MAX_RX_SEGS == 1) /* FreeBSD HW LRO is limited by 16KB - the size of max mbuf */ #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ MJUM16BYTES #else #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ \ MIN(65535, MLX5E_MAX_RX_SEGS * MLX5E_MAX_RX_BYTES) #endif #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 #define MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ 0x7 #define MLX5E_CACHELINE_SIZE CACHE_LINE_SIZE #define MLX5E_HW2SW_MTU(hwmtu) \ ((hwmtu) - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN)) #define MLX5E_SW2HW_MTU(swmtu) \ ((swmtu) + (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN)) #define MLX5E_SW2MB_MTU(swmtu) \ (MLX5E_SW2HW_MTU(swmtu) + MLX5E_NET_IP_ALIGN) #define MLX5E_MTU_MIN 72 /* Min MTU allowed by the kernel */ #define MLX5E_MTU_MAX MIN(ETHERMTU_JUMBO, MJUM16BYTES) /* Max MTU of Ethernet * jumbo frames */ #define MLX5E_BUDGET_MAX 8192 /* RX and TX */ #define MLX5E_RX_BUDGET_MAX 256 #define MLX5E_SQ_BF_BUDGET 16 #define MLX5E_SQ_TX_QUEUE_SIZE 4096 /* SQ drbr queue size */ #define MLX5E_MAX_TX_NUM_TC 8 /* units */ #define MLX5E_MAX_TX_HEADER 128 /* bytes */ #define MLX5E_MAX_TX_PAYLOAD_SIZE 65536 /* bytes */ #define MLX5E_MAX_TX_MBUF_SIZE 65536 /* bytes */ #define MLX5E_MAX_TX_MBUF_FRAGS \ ((MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) - \ (MLX5E_MAX_TX_HEADER / MLX5_SEND_WQE_DS) - \ 1 /* the maximum value of the DS counter is 0x3F and not 0x40 */) /* units */ #define MLX5E_MAX_TX_INLINE \ (MLX5E_MAX_TX_HEADER - sizeof(struct mlx5e_tx_wqe) + \ sizeof(((struct mlx5e_tx_wqe *)0)->eth.inline_hdr_start)) /* bytes */ #define MLX5E_100MB (100000) #define MLX5E_1GB (1000000) MALLOC_DECLARE(M_MLX5EN); struct mlx5_core_dev; struct mlx5e_cq; typedef void (mlx5e_cq_comp_t)(struct mlx5_core_cq *); #define MLX5E_STATS_COUNT(a,b,c,d) a #define MLX5E_STATS_VAR(a,b,c,d) b; #define MLX5E_STATS_DESC(a,b,c,d) c, d, #define MLX5E_VPORT_STATS(m) \ /* HW counters */ \ m(+1, u64 rx_packets, "rx_packets", "Received packets") \ m(+1, u64 rx_bytes, "rx_bytes", "Received bytes") \ m(+1, u64 tx_packets, "tx_packets", "Transmitted packets") \ m(+1, u64 tx_bytes, "tx_bytes", "Transmitted bytes") \ m(+1, u64 rx_error_packets, "rx_error_packets", "Received error packets") \ m(+1, u64 rx_error_bytes, "rx_error_bytes", "Received error bytes") \ m(+1, u64 tx_error_packets, "tx_error_packets", "Transmitted error packets") \ m(+1, u64 tx_error_bytes, "tx_error_bytes", "Transmitted error bytes") \ m(+1, u64 rx_unicast_packets, "rx_unicast_packets", "Received unicast packets") \ m(+1, u64 rx_unicast_bytes, "rx_unicast_bytes", "Received unicast bytes") \ m(+1, u64 tx_unicast_packets, "tx_unicast_packets", "Transmitted unicast packets") \ m(+1, u64 tx_unicast_bytes, "tx_unicast_bytes", "Transmitted unicast bytes") \ m(+1, u64 rx_multicast_packets, "rx_multicast_packets", "Received multicast packets") \ m(+1, u64 rx_multicast_bytes, "rx_multicast_bytes", "Received multicast bytes") \ m(+1, u64 tx_multicast_packets, "tx_multicast_packets", "Transmitted multicast packets") \ m(+1, u64 tx_multicast_bytes, "tx_multicast_bytes", "Transmitted multicast bytes") \ m(+1, u64 rx_broadcast_packets, "rx_broadcast_packets", "Received broadcast packets") \ m(+1, u64 rx_broadcast_bytes, "rx_broadcast_bytes", "Received broadcast bytes") \ m(+1, u64 tx_broadcast_packets, "tx_broadcast_packets", "Transmitted broadcast packets") \ m(+1, u64 tx_broadcast_bytes, "tx_broadcast_bytes", "Transmitted broadcast bytes") \ m(+1, u64 rx_out_of_buffer, "rx_out_of_buffer", "Receive out of buffer, no recv wqes events") \ /* SW counters */ \ m(+1, u64 tso_packets, "tso_packets", "Transmitted TSO packets") \ m(+1, u64 tso_bytes, "tso_bytes", "Transmitted TSO bytes") \ m(+1, u64 lro_packets, "lro_packets", "Received LRO packets") \ m(+1, u64 lro_bytes, "lro_bytes", "Received LRO bytes") \ m(+1, u64 sw_lro_queued, "sw_lro_queued", "Packets queued for SW LRO") \ m(+1, u64 sw_lro_flushed, "sw_lro_flushed", "Packets flushed from SW LRO") \ m(+1, u64 rx_csum_good, "rx_csum_good", "Received checksum valid packets") \ m(+1, u64 rx_csum_none, "rx_csum_none", "Received no checksum packets") \ m(+1, u64 tx_csum_offload, "tx_csum_offload", "Transmit checksum offload packets") \ m(+1, u64 tx_queue_dropped, "tx_queue_dropped", "Transmit queue dropped") \ m(+1, u64 tx_defragged, "tx_defragged", "Transmit queue defragged") \ m(+1, u64 rx_wqe_err, "rx_wqe_err", "Receive WQE errors") \ m(+1, u64 tx_jumbo_packets, "tx_jumbo_packets", "TX packets greater than 1518 octets") #define MLX5E_VPORT_STATS_NUM (0 MLX5E_VPORT_STATS(MLX5E_STATS_COUNT)) struct mlx5e_vport_stats { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_VPORT_STATS(MLX5E_STATS_VAR) u32 rx_out_of_buffer_prev; }; #define MLX5E_PPORT_IEEE802_3_STATS(m) \ m(+1, u64 frames_tx, "frames_tx", "Frames transmitted") \ m(+1, u64 frames_rx, "frames_rx", "Frames received") \ m(+1, u64 check_seq_err, "check_seq_err", "Sequence errors") \ m(+1, u64 alignment_err, "alignment_err", "Alignment errors") \ m(+1, u64 octets_tx, "octets_tx", "Bytes transmitted") \ m(+1, u64 octets_received, "octets_received", "Bytes received") \ m(+1, u64 multicast_xmitted, "multicast_xmitted", "Multicast transmitted") \ m(+1, u64 broadcast_xmitted, "broadcast_xmitted", "Broadcast transmitted") \ m(+1, u64 multicast_rx, "multicast_rx", "Multicast received") \ m(+1, u64 broadcast_rx, "broadcast_rx", "Broadcast received") \ m(+1, u64 in_range_len_errors, "in_range_len_errors", "In range length errors") \ m(+1, u64 out_of_range_len, "out_of_range_len", "Out of range length errors") \ m(+1, u64 too_long_errors, "too_long_errors", "Too long errors") \ m(+1, u64 symbol_err, "symbol_err", "Symbol errors") \ m(+1, u64 mac_control_tx, "mac_control_tx", "MAC control transmitted") \ m(+1, u64 mac_control_rx, "mac_control_rx", "MAC control received") \ m(+1, u64 unsupported_op_rx, "unsupported_op_rx", "Unsupported operation received") \ m(+1, u64 pause_ctrl_rx, "pause_ctrl_rx", "Pause control received") \ m(+1, u64 pause_ctrl_tx, "pause_ctrl_tx", "Pause control transmitted") #define MLX5E_PPORT_RFC2819_STATS(m) \ m(+1, u64 drop_events, "drop_events", "Dropped events") \ m(+1, u64 octets, "octets", "Octets") \ m(+1, u64 pkts, "pkts", "Packets") \ m(+1, u64 broadcast_pkts, "broadcast_pkts", "Broadcast packets") \ m(+1, u64 multicast_pkts, "multicast_pkts", "Multicast packets") \ m(+1, u64 crc_align_errors, "crc_align_errors", "CRC alignment errors") \ m(+1, u64 undersize_pkts, "undersize_pkts", "Undersized packets") \ m(+1, u64 oversize_pkts, "oversize_pkts", "Oversized packets") \ m(+1, u64 fragments, "fragments", "Fragments") \ m(+1, u64 jabbers, "jabbers", "Jabbers") \ m(+1, u64 collisions, "collisions", "Collisions") #define MLX5E_PPORT_RFC2819_STATS_DEBUG(m) \ m(+1, u64 p64octets, "p64octets", "Bytes") \ m(+1, u64 p65to127octets, "p65to127octets", "Bytes") \ m(+1, u64 p128to255octets, "p128to255octets", "Bytes") \ m(+1, u64 p256to511octets, "p256to511octets", "Bytes") \ m(+1, u64 p512to1023octets, "p512to1023octets", "Bytes") \ m(+1, u64 p1024to1518octets, "p1024to1518octets", "Bytes") \ m(+1, u64 p1519to2047octets, "p1519to2047octets", "Bytes") \ m(+1, u64 p2048to4095octets, "p2048to4095octets", "Bytes") \ m(+1, u64 p4096to8191octets, "p4096to8191octets", "Bytes") \ m(+1, u64 p8192to10239octets, "p8192to10239octets", "Bytes") #define MLX5E_PPORT_RFC2863_STATS_DEBUG(m) \ m(+1, u64 in_octets, "in_octets", "In octets") \ m(+1, u64 in_ucast_pkts, "in_ucast_pkts", "In unicast packets") \ m(+1, u64 in_discards, "in_discards", "In discards") \ m(+1, u64 in_errors, "in_errors", "In errors") \ m(+1, u64 in_unknown_protos, "in_unknown_protos", "In unknown protocols") \ m(+1, u64 out_octets, "out_octets", "Out octets") \ m(+1, u64 out_ucast_pkts, "out_ucast_pkts", "Out unicast packets") \ m(+1, u64 out_discards, "out_discards", "Out discards") \ m(+1, u64 out_errors, "out_errors", "Out errors") \ m(+1, u64 in_multicast_pkts, "in_multicast_pkts", "In multicast packets") \ m(+1, u64 in_broadcast_pkts, "in_broadcast_pkts", "In broadcast packets") \ m(+1, u64 out_multicast_pkts, "out_multicast_pkts", "Out multicast packets") \ m(+1, u64 out_broadcast_pkts, "out_broadcast_pkts", "Out broadcast packets") #define MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(m) \ m(+1, u64 time_since_last_clear, "time_since_last_clear", \ "Time since the last counters clear event (msec)") \ m(+1, u64 symbol_errors, "symbol_errors", "Symbol errors") \ m(+1, u64 sync_headers_errors, "sync_headers_errors", "Sync header error counter") \ m(+1, u64 bip_errors_lane0, "edpl_bip_errors_lane0", \ "Indicates the number of PRBS errors on lane 0") \ m(+1, u64 bip_errors_lane1, "edpl_bip_errors_lane1", \ "Indicates the number of PRBS errors on lane 1") \ m(+1, u64 bip_errors_lane2, "edpl_bip_errors_lane2", \ "Indicates the number of PRBS errors on lane 2") \ m(+1, u64 bip_errors_lane3, "edpl_bip_errors_lane3", \ "Indicates the number of PRBS errors on lane 3") \ m(+1, u64 fc_corrected_blocks_lane0, "fc_corrected_blocks_lane0", \ "FEC correctable block counter lane 0") \ m(+1, u64 fc_corrected_blocks_lane1, "fc_corrected_blocks_lane1", \ "FEC correctable block counter lane 1") \ m(+1, u64 fc_corrected_blocks_lane2, "fc_corrected_blocks_lane2", \ "FEC correctable block counter lane 2") \ m(+1, u64 fc_corrected_blocks_lane3, "fc_corrected_blocks_lane3", \ "FEC correctable block counter lane 3") \ m(+1, u64 rs_corrected_blocks, "rs_corrected_blocks", \ "FEC correcable block counter") \ m(+1, u64 rs_uncorrectable_blocks, "rs_uncorrectable_blocks", \ "FEC uncorrecable block counter") \ m(+1, u64 rs_no_errors_blocks, "rs_no_errors_blocks", \ "The number of RS-FEC blocks received that had no errors") \ m(+1, u64 rs_single_error_blocks, "rs_single_error_blocks", \ "The number of corrected RS-FEC blocks received that had" \ "exactly 1 error symbol") \ m(+1, u64 rs_corrected_symbols_total, "rs_corrected_symbols_total", \ "Port FEC corrected symbol counter") \ m(+1, u64 rs_corrected_symbols_lane0, "rs_corrected_symbols_lane0", \ "FEC corrected symbol counter lane 0") \ m(+1, u64 rs_corrected_symbols_lane1, "rs_corrected_symbols_lane1", \ "FEC corrected symbol counter lane 1") \ m(+1, u64 rs_corrected_symbols_lane2, "rs_corrected_symbols_lane2", \ "FEC corrected symbol counter lane 2") \ m(+1, u64 rs_corrected_symbols_lane3, "rs_corrected_symbols_lane3", \ "FEC corrected symbol counter lane 3") /* Per priority statistics for PFC */ #define MLX5E_PPORT_PER_PRIO_STATS_SUB(m,n,p) \ m(n, p, +1, u64, rx_octets, "rx_octets", "Received octets") \ m(n, p, +1, u64, reserved_0, "reserved_0", "Reserved") \ m(n, p, +1, u64, reserved_1, "reserved_1", "Reserved") \ m(n, p, +1, u64, reserved_2, "reserved_2", "Reserved") \ m(n, p, +1, u64, rx_frames, "rx_frames", "Received frames") \ m(n, p, +1, u64, tx_octets, "tx_octets", "Transmitted octets") \ m(n, p, +1, u64, reserved_3, "reserved_3", "Reserved") \ m(n, p, +1, u64, reserved_4, "reserved_4", "Reserved") \ m(n, p, +1, u64, reserved_5, "reserved_5", "Reserved") \ m(n, p, +1, u64, tx_frames, "tx_frames", "Transmitted frames") \ m(n, p, +1, u64, rx_pause, "rx_pause", "Received pause frames") \ m(n, p, +1, u64, rx_pause_duration, "rx_pause_duration", \ "Received pause duration") \ m(n, p, +1, u64, tx_pause, "tx_pause", "Transmitted pause frames") \ m(n, p, +1, u64, tx_pause_duration, "tx_pause_duration", \ "Transmitted pause duration") \ m(n, p, +1, u64, rx_pause_transition, "rx_pause_transition", \ "Received pause transitions") \ m(n, p, +1, u64, rx_discards, "rx_discards", "Discarded received frames") \ m(n, p, +1, u64, device_stall_minor_watermark, \ "device_stall_minor_watermark", "Device stall minor watermark") \ m(n, p, +1, u64, device_stall_critical_watermark, \ "device_stall_critical_watermark", "Device stall critical watermark") #define MLX5E_PPORT_PER_PRIO_STATS_PREFIX(m,p,c,t,f,s,d) \ m(c, t pri_##p##_##f, "prio" #p "_" s, "Priority " #p " - " d) #define MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO 8 #define MLX5E_PPORT_PER_PRIO_STATS(m) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,0) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,1) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,2) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,3) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,4) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,5) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,6) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,7) /* * Make sure to update mlx5e_update_pport_counters() * when adding a new MLX5E_PPORT_STATS block */ #define MLX5E_PPORT_STATS(m) \ MLX5E_PPORT_PER_PRIO_STATS(m) \ MLX5E_PPORT_IEEE802_3_STATS(m) \ MLX5E_PPORT_RFC2819_STATS(m) #define MLX5E_PORT_STATS_DEBUG(m) \ MLX5E_PPORT_RFC2819_STATS_DEBUG(m) \ MLX5E_PPORT_RFC2863_STATS_DEBUG(m) \ MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(m) #define MLX5E_PPORT_IEEE802_3_STATS_NUM \ (0 MLX5E_PPORT_IEEE802_3_STATS(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_RFC2819_STATS_NUM \ (0 MLX5E_PPORT_RFC2819_STATS(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_STATS_NUM \ (0 MLX5E_PPORT_STATS(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_PER_PRIO_STATS_NUM \ (0 MLX5E_PPORT_PER_PRIO_STATS(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM \ (0 MLX5E_PPORT_RFC2819_STATS_DEBUG(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM \ (0 MLX5E_PPORT_RFC2863_STATS_DEBUG(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM \ (0 MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(MLX5E_STATS_COUNT)) #define MLX5E_PORT_STATS_DEBUG_NUM \ (0 MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_COUNT)) struct mlx5e_pport_stats { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_PPORT_STATS(MLX5E_STATS_VAR) }; struct mlx5e_port_stats_debug { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_VAR) }; #define MLX5E_RQ_STATS(m) \ m(+1, u64 packets, "packets", "Received packets") \ + m(+1, u64 bytes, "bytes", "Received bytes") \ m(+1, u64 csum_none, "csum_none", "Received packets") \ - m(+1, u64 lro_packets, "lro_packets", "Received packets") \ - m(+1, u64 lro_bytes, "lro_bytes", "Received packets") \ + m(+1, u64 lro_packets, "lro_packets", "Received LRO packets") \ + m(+1, u64 lro_bytes, "lro_bytes", "Received LRO bytes") \ m(+1, u64 sw_lro_queued, "sw_lro_queued", "Packets queued for SW LRO") \ m(+1, u64 sw_lro_flushed, "sw_lro_flushed", "Packets flushed from SW LRO") \ m(+1, u64 wqe_err, "wqe_err", "Received packets") #define MLX5E_RQ_STATS_NUM (0 MLX5E_RQ_STATS(MLX5E_STATS_COUNT)) struct mlx5e_rq_stats { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_RQ_STATS(MLX5E_STATS_VAR) }; #define MLX5E_SQ_STATS(m) \ m(+1, u64 packets, "packets", "Transmitted packets") \ + m(+1, u64 bytes, "bytes", "Transmitted bytes") \ m(+1, u64 tso_packets, "tso_packets", "Transmitted packets") \ m(+1, u64 tso_bytes, "tso_bytes", "Transmitted bytes") \ m(+1, u64 csum_offload_none, "csum_offload_none", "Transmitted packets") \ m(+1, u64 defragged, "defragged", "Transmitted packets") \ m(+1, u64 dropped, "dropped", "Transmitted packets") \ m(+1, u64 nop, "nop", "Transmitted packets") #define MLX5E_SQ_STATS_NUM (0 MLX5E_SQ_STATS(MLX5E_STATS_COUNT)) struct mlx5e_sq_stats { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_SQ_STATS(MLX5E_STATS_VAR) }; struct mlx5e_stats { struct mlx5e_vport_stats vport; struct mlx5e_pport_stats pport; struct mlx5e_port_stats_debug port_stats_debug; }; struct mlx5e_rq_param { u32 rqc [MLX5_ST_SZ_DW(rqc)]; struct mlx5_wq_param wq; }; struct mlx5e_sq_param { u32 sqc [MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; }; struct mlx5e_cq_param { u32 cqc [MLX5_ST_SZ_DW(cqc)]; struct mlx5_wq_param wq; }; struct mlx5e_params { u8 log_sq_size; u8 log_rq_size; u16 num_channels; u8 default_vlan_prio; u8 num_tc; u8 rx_cq_moderation_mode; u8 tx_cq_moderation_mode; u16 rx_cq_moderation_usec; u16 rx_cq_moderation_pkts; u16 tx_cq_moderation_usec; u16 tx_cq_moderation_pkts; u16 min_rx_wqes; bool hw_lro_en; bool cqe_zipping_en; u32 lro_wqe_sz; u16 rx_hash_log_tbl_sz; u32 tx_pauseframe_control __aligned(4); u32 rx_pauseframe_control __aligned(4); u32 tx_priority_flow_control __aligned(4); u32 rx_priority_flow_control __aligned(4); u16 tx_max_inline; u8 tx_min_inline_mode; u8 channels_rsss; }; #define MLX5E_PARAMS(m) \ m(+1, u64 tx_queue_size_max, "tx_queue_size_max", "Max send queue size") \ m(+1, u64 rx_queue_size_max, "rx_queue_size_max", "Max receive queue size") \ m(+1, u64 tx_queue_size, "tx_queue_size", "Default send queue size") \ m(+1, u64 rx_queue_size, "rx_queue_size", "Default receive queue size") \ m(+1, u64 channels, "channels", "Default number of channels") \ m(+1, u64 channels_rsss, "channels_rsss", "Default channels receive side scaling stride") \ m(+1, u64 coalesce_usecs_max, "coalesce_usecs_max", "Maximum usecs for joining packets") \ m(+1, u64 coalesce_pkts_max, "coalesce_pkts_max", "Maximum packets to join") \ m(+1, u64 rx_coalesce_usecs, "rx_coalesce_usecs", "Limit in usec for joining rx packets") \ m(+1, u64 rx_coalesce_pkts, "rx_coalesce_pkts", "Maximum number of rx packets to join") \ m(+1, u64 rx_coalesce_mode, "rx_coalesce_mode", "0: EQE mode 1: CQE mode") \ m(+1, u64 tx_coalesce_usecs, "tx_coalesce_usecs", "Limit in usec for joining tx packets") \ m(+1, u64 tx_coalesce_pkts, "tx_coalesce_pkts", "Maximum number of tx packets to join") \ m(+1, u64 tx_coalesce_mode, "tx_coalesce_mode", "0: EQE mode 1: CQE mode") \ m(+1, u64 tx_completion_fact, "tx_completion_fact", "1..MAX: Completion event ratio") \ m(+1, u64 tx_completion_fact_max, "tx_completion_fact_max", "Maximum completion event ratio") \ m(+1, u64 hw_lro, "hw_lro", "set to enable hw_lro") \ m(+1, u64 cqe_zipping, "cqe_zipping", "0 : CQE zipping disabled") \ m(+1, u64 modify_tx_dma, "modify_tx_dma", "0: Enable TX 1: Disable TX") \ m(+1, u64 modify_rx_dma, "modify_rx_dma", "0: Enable RX 1: Disable RX") \ m(+1, u64 diag_pci_enable, "diag_pci_enable", "0: Disabled 1: Enabled") \ m(+1, u64 diag_general_enable, "diag_general_enable", "0: Disabled 1: Enabled") \ m(+1, u64 hw_mtu, "hw_mtu", "Current hardware MTU value") \ m(+1, u64 mc_local_lb, "mc_local_lb", "0: Local multicast loopback enabled 1: Disabled") \ m(+1, u64 uc_local_lb, "uc_local_lb", "0: Local unicast loopback enabled 1: Disabled") #define MLX5E_PARAMS_NUM (0 MLX5E_PARAMS(MLX5E_STATS_COUNT)) struct mlx5e_params_ethtool { u64 arg [0]; MLX5E_PARAMS(MLX5E_STATS_VAR) u64 max_bw_value[IEEE_8021QAZ_MAX_TCS]; u8 max_bw_share[IEEE_8021QAZ_MAX_TCS]; u8 prio_tc[IEEE_8021QAZ_MAX_TCS]; u8 dscp2prio[MLX5_MAX_SUPPORTED_DSCP]; u8 trust_state; }; /* EEPROM Standards for plug in modules */ #ifndef MLX5E_ETH_MODULE_SFF_8472 #define MLX5E_ETH_MODULE_SFF_8472 0x1 #define MLX5E_ETH_MODULE_SFF_8472_LEN 128 #endif #ifndef MLX5E_ETH_MODULE_SFF_8636 #define MLX5E_ETH_MODULE_SFF_8636 0x2 #define MLX5E_ETH_MODULE_SFF_8636_LEN 256 #endif #ifndef MLX5E_ETH_MODULE_SFF_8436 #define MLX5E_ETH_MODULE_SFF_8436 0x3 #define MLX5E_ETH_MODULE_SFF_8436_LEN 256 #endif /* EEPROM I2C Addresses */ #define MLX5E_I2C_ADDR_LOW 0x50 #define MLX5E_I2C_ADDR_HIGH 0x51 #define MLX5E_EEPROM_LOW_PAGE 0x0 #define MLX5E_EEPROM_HIGH_PAGE 0x3 #define MLX5E_EEPROM_HIGH_PAGE_OFFSET 128 #define MLX5E_EEPROM_PAGE_LENGTH 256 #define MLX5E_EEPROM_INFO_BYTES 0x3 struct mlx5e_cq { /* data path - accessed per cqe */ struct mlx5_cqwq wq; /* data path - accessed per HW polling */ struct mlx5_core_cq mcq; /* control */ struct mlx5e_priv *priv; struct mlx5_wq_ctrl wq_ctrl; } __aligned(MLX5E_CACHELINE_SIZE); struct mlx5e_rq_mbuf { bus_dmamap_t dma_map; caddr_t data; struct mbuf *mbuf; }; struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; struct mtx mtx; bus_dma_tag_t dma_tag; u32 wqe_sz; u32 nsegs; struct mlx5e_rq_mbuf *mbuf; struct ifnet *ifp; struct mlx5e_rq_stats stats; struct mlx5e_cq cq; struct lro_ctrl lro; volatile int enabled; int ix; /* control */ struct mlx5_wq_ctrl wq_ctrl; u32 rqn; struct mlx5e_channel *channel; struct callout watchdog; } __aligned(MLX5E_CACHELINE_SIZE); struct mlx5e_sq_mbuf { bus_dmamap_t dma_map; struct mbuf *mbuf; u32 num_bytes; u32 num_wqebbs; }; enum { MLX5E_SQ_READY, MLX5E_SQ_FULL }; struct mlx5e_sq { /* data path */ struct mtx lock; bus_dma_tag_t dma_tag; struct mtx comp_lock; /* dirtied @completion */ u16 cc; /* dirtied @xmit */ u16 pc __aligned(MLX5E_CACHELINE_SIZE); u16 bf_offset; u16 cev_counter; /* completion event counter */ u16 cev_factor; /* completion event factor */ u16 cev_next_state; /* next completion event state */ #define MLX5E_CEV_STATE_INITIAL 0 /* timer not started */ #define MLX5E_CEV_STATE_SEND_NOPS 1 /* send NOPs */ #define MLX5E_CEV_STATE_HOLD_NOPS 2 /* don't send NOPs yet */ u16 running; /* set if SQ is running */ struct callout cev_callout; union { u32 d32[2]; u64 d64; } doorbell; struct mlx5e_sq_stats stats; struct mlx5e_cq cq; /* pointers to per packet info: write@xmit, read@completion */ struct mlx5e_sq_mbuf *mbuf; struct buf_ring *br; /* read only */ struct mlx5_wq_cyc wq; struct mlx5_uar uar; struct ifnet *ifp; u32 sqn; u32 bf_buf_size; u32 mkey_be; u16 max_inline; u8 min_inline_mode; u8 min_insert_caps; #define MLX5E_INSERT_VLAN 1 #define MLX5E_INSERT_NON_VLAN 2 /* control path */ struct mlx5_wq_ctrl wq_ctrl; struct mlx5e_priv *priv; int tc; } __aligned(MLX5E_CACHELINE_SIZE); static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) { u16 cc = sq->cc; u16 pc = sq->pc; return ((sq->wq.sz_m1 & (cc - pc)) >= n || cc == pc); } struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; struct mlx5e_sq sq[MLX5E_MAX_TX_NUM_TC]; struct ifnet *ifp; u32 mkey_be; u8 num_tc; /* control */ struct mlx5e_priv *priv; int ix; int cpu; } __aligned(MLX5E_CACHELINE_SIZE); enum mlx5e_traffic_types { MLX5E_TT_IPV4_TCP, MLX5E_TT_IPV6_TCP, MLX5E_TT_IPV4_UDP, MLX5E_TT_IPV6_UDP, MLX5E_TT_IPV4_IPSEC_AH, MLX5E_TT_IPV6_IPSEC_AH, MLX5E_TT_IPV4_IPSEC_ESP, MLX5E_TT_IPV6_IPSEC_ESP, MLX5E_TT_IPV4, MLX5E_TT_IPV6, MLX5E_TT_ANY, MLX5E_NUM_TT, }; enum { MLX5E_RQT_SPREADING = 0, MLX5E_RQT_DEFAULT_RQ = 1, MLX5E_NUM_RQT = 2, }; struct mlx5_flow_rule; struct mlx5e_eth_addr_info { u8 addr [ETH_ALEN + 2]; u32 tt_vec; /* flow table rule per traffic type */ struct mlx5_flow_rule *ft_rule[MLX5E_NUM_TT]; }; #define MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE) struct mlx5e_eth_addr_hash_node; struct mlx5e_eth_addr_hash_head { struct mlx5e_eth_addr_hash_node *lh_first; }; struct mlx5e_eth_addr_db { struct mlx5e_eth_addr_hash_head if_uc[MLX5E_ETH_ADDR_HASH_SIZE]; struct mlx5e_eth_addr_hash_head if_mc[MLX5E_ETH_ADDR_HASH_SIZE]; struct mlx5e_eth_addr_info broadcast; struct mlx5e_eth_addr_info allmulti; struct mlx5e_eth_addr_info promisc; bool broadcast_enabled; bool allmulti_enabled; bool promisc_enabled; }; enum { MLX5E_STATE_ASYNC_EVENTS_ENABLE, MLX5E_STATE_OPENED, }; enum { MLX5_BW_NO_LIMIT = 0, MLX5_100_MBPS_UNIT = 3, MLX5_GBPS_UNIT = 4, }; struct mlx5e_vlan_db { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct mlx5_flow_rule *active_vlans_ft_rule[VLAN_N_VID]; struct mlx5_flow_rule *untagged_ft_rule; struct mlx5_flow_rule *any_cvlan_ft_rule; struct mlx5_flow_rule *any_svlan_ft_rule; bool filter_disabled; }; struct mlx5e_flow_table { int num_groups; struct mlx5_flow_table *t; struct mlx5_flow_group **g; }; struct mlx5e_flow_tables { struct mlx5_flow_namespace *ns; struct mlx5e_flow_table vlan; struct mlx5e_flow_table main; struct mlx5e_flow_table inner_rss; }; #ifdef RATELIMIT #include "en_rl.h" #endif #define MLX5E_TSTMP_PREC 10 struct mlx5e_clbr_point { uint64_t base_curr; uint64_t base_prev; uint64_t clbr_hw_prev; uint64_t clbr_hw_curr; u_int clbr_gen; }; struct mlx5e_priv { struct mlx5_core_dev *mdev; /* must be first */ /* priv data path fields - start */ int order_base_2_num_channels; int queue_mapping_channel_mask; int num_tc; int default_vlan_prio; /* priv data path fields - end */ unsigned long state; int gone; #define PRIV_LOCK(priv) sx_xlock(&(priv)->state_lock) #define PRIV_UNLOCK(priv) sx_xunlock(&(priv)->state_lock) #define PRIV_LOCKED(priv) sx_xlocked(&(priv)->state_lock) struct sx state_lock; /* Protects Interface state */ struct mlx5_uar cq_uar; u32 pdn; u32 tdn; struct mlx5_core_mr mr; u32 tisn[MLX5E_MAX_TX_NUM_TC]; u32 rqtn; u32 tirn[MLX5E_NUM_TT]; struct mlx5e_flow_tables fts; struct mlx5e_eth_addr_db eth_addr; struct mlx5e_vlan_db vlan; struct mlx5e_params params; struct mlx5e_params_ethtool params_ethtool; union mlx5_core_pci_diagnostics params_pci; union mlx5_core_general_diagnostics params_general; struct mtx async_events_mtx; /* sync hw events */ struct work_struct update_stats_work; struct work_struct update_carrier_work; struct work_struct set_rx_mode_work; MLX5_DECLARE_DOORBELL_LOCK(doorbell_lock) struct ifnet *ifp; struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_ifnet; struct sysctl_oid *sysctl_hw; int sysctl_debug; struct mlx5e_stats stats; int counter_set_id; struct workqueue_struct *wq; eventhandler_tag vlan_detach; eventhandler_tag vlan_attach; struct ifmedia media; int media_status_last; int media_active_last; struct callout watchdog; #ifdef RATELIMIT struct mlx5e_rl_priv_data rl; #endif struct callout tstmp_clbr; int clbr_done; int clbr_curr; struct mlx5e_clbr_point clbr_points[2]; u_int clbr_gen; struct mlx5e_channel channel[]; }; #define MLX5E_NET_IP_ALIGN 2 struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_eth_seg eth; }; struct mlx5e_rx_wqe { struct mlx5_wqe_srq_next_seg next; struct mlx5_wqe_data_seg data[]; }; /* the size of the structure above must be power of two */ CTASSERT(powerof2(sizeof(struct mlx5e_rx_wqe))); struct mlx5e_eeprom { int lock_bit; int i2c_addr; int page_num; int device_addr; int module_num; int len; int type; int page_valid; u32 *data; }; #define MLX5E_FLD_MAX(typ, fld) ((1ULL << __mlx5_bit_sz(typ, fld)) - 1ULL) int mlx5e_xmit(struct ifnet *, struct mbuf *); int mlx5e_open_locked(struct ifnet *); int mlx5e_close_locked(struct ifnet *); void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event); void mlx5e_rx_cq_comp(struct mlx5_core_cq *); void mlx5e_tx_cq_comp(struct mlx5_core_cq *); struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); int mlx5e_open_flow_table(struct mlx5e_priv *priv); void mlx5e_close_flow_table(struct mlx5e_priv *priv); void mlx5e_set_rx_mode_core(struct mlx5e_priv *priv); void mlx5e_set_rx_mode_work(struct work_struct *work); void mlx5e_vlan_rx_add_vid(void *, struct ifnet *, u16); void mlx5e_vlan_rx_kill_vid(void *, struct ifnet *, u16); void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); int mlx5e_add_all_vlan_rules(struct mlx5e_priv *priv); void mlx5e_del_all_vlan_rules(struct mlx5e_priv *priv); static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, u32 *wqe, int bf_sz) { u16 ofst = MLX5_BF_OFFSET + sq->bf_offset; /* ensure wqe is visible to device before updating doorbell record */ wmb(); *sq->wq.db = cpu_to_be32(sq->pc); /* * Ensure the doorbell record is visible to device before ringing * the doorbell: */ wmb(); if (bf_sz) { __iowrite64_copy(sq->uar.bf_map + ofst, wqe, bf_sz); /* flush the write-combining mapped buffer */ wmb(); } else { mlx5_write64(wqe, sq->uar.map + ofst, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock)); } sq->bf_offset ^= sq->bf_buf_size; } static inline void mlx5e_cq_arm(struct mlx5e_cq *cq, spinlock_t *dblock) { struct mlx5_core_cq *mcq; mcq = &cq->mcq; mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, dblock, cq->wq.cc); } extern const struct ethtool_ops mlx5e_ethtool_ops; void mlx5e_create_ethtool(struct mlx5e_priv *); void mlx5e_create_stats(struct sysctl_ctx_list *, struct sysctl_oid_list *, const char *, const char **, unsigned, u64 *); void mlx5e_send_nop(struct mlx5e_sq *, u32); void mlx5e_sq_cev_timeout(void *); int mlx5e_refresh_channel_params(struct mlx5e_priv *); int mlx5e_open_cq(struct mlx5e_priv *, struct mlx5e_cq_param *, struct mlx5e_cq *, mlx5e_cq_comp_t *, int eq_ix); void mlx5e_close_cq(struct mlx5e_cq *); void mlx5e_free_sq_db(struct mlx5e_sq *); int mlx5e_alloc_sq_db(struct mlx5e_sq *); int mlx5e_enable_sq(struct mlx5e_sq *, struct mlx5e_sq_param *, int tis_num); int mlx5e_modify_sq(struct mlx5e_sq *, int curr_state, int next_state); void mlx5e_disable_sq(struct mlx5e_sq *); void mlx5e_drain_sq(struct mlx5e_sq *); void mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value); void mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value); void mlx5e_resume_sq(struct mlx5e_sq *sq); void mlx5e_update_sq_inline(struct mlx5e_sq *sq); void mlx5e_refresh_sq_inline(struct mlx5e_priv *priv); #endif /* _MLX5_EN_H_ */ Index: head/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c =================================================================== --- head/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c (revision 341583) +++ head/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c (revision 341584) @@ -1,550 +1,551 @@ /*- * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "en.h" #include static inline int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { bus_dma_segment_t segs[rq->nsegs]; struct mbuf *mb; int nsegs; int err; #if (MLX5E_MAX_RX_SEGS != 1) struct mbuf *mb_head; int i; #endif if (rq->mbuf[ix].mbuf != NULL) return (0); #if (MLX5E_MAX_RX_SEGS == 1) mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rq->wqe_sz); if (unlikely(!mb)) return (-ENOMEM); mb->m_pkthdr.len = mb->m_len = rq->wqe_sz; #else mb_head = mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MLX5E_MAX_RX_BYTES); if (unlikely(mb == NULL)) return (-ENOMEM); mb->m_len = MLX5E_MAX_RX_BYTES; mb->m_pkthdr.len = MLX5E_MAX_RX_BYTES; for (i = 1; i < rq->nsegs; i++) { if (mb_head->m_pkthdr.len >= rq->wqe_sz) break; mb = mb->m_next = m_getjcl(M_NOWAIT, MT_DATA, 0, MLX5E_MAX_RX_BYTES); if (unlikely(mb == NULL)) { m_freem(mb_head); return (-ENOMEM); } mb->m_len = MLX5E_MAX_RX_BYTES; mb_head->m_pkthdr.len += MLX5E_MAX_RX_BYTES; } /* rewind to first mbuf in chain */ mb = mb_head; #endif /* get IP header aligned */ m_adj(mb, MLX5E_NET_IP_ALIGN); err = -bus_dmamap_load_mbuf_sg(rq->dma_tag, rq->mbuf[ix].dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); if (err != 0) goto err_free_mbuf; if (unlikely(nsegs == 0)) { bus_dmamap_unload(rq->dma_tag, rq->mbuf[ix].dma_map); err = -ENOMEM; goto err_free_mbuf; } #if (MLX5E_MAX_RX_SEGS == 1) wqe->data[0].addr = cpu_to_be64(segs[0].ds_addr); #else wqe->data[0].addr = cpu_to_be64(segs[0].ds_addr); wqe->data[0].byte_count = cpu_to_be32(segs[0].ds_len | MLX5_HW_START_PADDING); for (i = 1; i != nsegs; i++) { wqe->data[i].addr = cpu_to_be64(segs[i].ds_addr); wqe->data[i].byte_count = cpu_to_be32(segs[i].ds_len); } for (; i < rq->nsegs; i++) { wqe->data[i].addr = 0; wqe->data[i].byte_count = 0; } #endif rq->mbuf[ix].mbuf = mb; rq->mbuf[ix].data = mb->m_data; bus_dmamap_sync(rq->dma_tag, rq->mbuf[ix].dma_map, BUS_DMASYNC_PREREAD); return (0); err_free_mbuf: m_freem(mb); return (err); } static void mlx5e_post_rx_wqes(struct mlx5e_rq *rq) { if (unlikely(rq->enabled == 0)) return; while (!mlx5_wq_ll_is_full(&rq->wq)) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, rq->wq.head); if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, rq->wq.head))) { callout_reset_curcpu(&rq->watchdog, 1, (void *)&mlx5e_post_rx_wqes, rq); break; } mlx5_wq_ll_push(&rq->wq, be16_to_cpu(wqe->next.next_wqe_index)); } /* ensure wqes are visible to device before updating doorbell record */ atomic_thread_fence_rel(); mlx5_wq_ll_update_db_record(&rq->wq); } static void mlx5e_lro_update_hdr(struct mbuf *mb, struct mlx5_cqe64 *cqe) { /* TODO: consider vlans, ip options, ... */ struct ether_header *eh; uint16_t eh_type; uint16_t tot_len; struct ip6_hdr *ip6 = NULL; struct ip *ip4 = NULL; struct tcphdr *th; uint32_t *ts_ptr; uint8_t l4_hdr_type; int tcp_ack; eh = mtod(mb, struct ether_header *); eh_type = ntohs(eh->ether_type); l4_hdr_type = get_cqe_l4_hdr_type(cqe); tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) || (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type)); /* TODO: consider vlan */ tot_len = be32_to_cpu(cqe->byte_cnt) - ETHER_HDR_LEN; switch (eh_type) { case ETHERTYPE_IP: ip4 = (struct ip *)(eh + 1); th = (struct tcphdr *)(ip4 + 1); break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(eh + 1); th = (struct tcphdr *)(ip6 + 1); break; default: return; } ts_ptr = (uint32_t *)(th + 1); if (get_cqe_lro_tcppsh(cqe)) th->th_flags |= TH_PUSH; if (tcp_ack) { th->th_flags |= TH_ACK; th->th_ack = cqe->lro_ack_seq_num; th->th_win = cqe->lro_tcp_win; /* * FreeBSD handles only 32bit aligned timestamp right after * the TCP hdr * +--------+--------+--------+--------+ * | NOP | NOP | TSopt | 10 | * +--------+--------+--------+--------+ * | TSval timestamp | * +--------+--------+--------+--------+ * | TSecr timestamp | * +--------+--------+--------+--------+ */ if (get_cqe_lro_timestamp_valid(cqe) && (__predict_true(*ts_ptr) == ntohl(TCPOPT_NOP << 24 | TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 | TCPOLEN_TIMESTAMP))) { /* * cqe->timestamp is 64bit long. * [0-31] - timestamp. * [32-64] - timestamp echo replay. */ ts_ptr[1] = *(uint32_t *)&cqe->timestamp; ts_ptr[2] = *((uint32_t *)&cqe->timestamp + 1); } } if (ip4) { ip4->ip_ttl = cqe->lro_min_ttl; ip4->ip_len = cpu_to_be16(tot_len); ip4->ip_sum = 0; ip4->ip_sum = in_cksum(mb, ip4->ip_hl << 2); } else { ip6->ip6_hlim = cqe->lro_min_ttl; ip6->ip6_plen = cpu_to_be16(tot_len - sizeof(struct ip6_hdr)); } /* TODO: handle tcp checksum */ } static uint64_t mlx5e_mbuf_tstmp(struct mlx5e_priv *priv, uint64_t hw_tstmp) { struct mlx5e_clbr_point *cp; uint64_t a1, a2, res; u_int gen; do { cp = &priv->clbr_points[priv->clbr_curr]; gen = atomic_load_acq_int(&cp->clbr_gen); a1 = (hw_tstmp - cp->clbr_hw_prev) >> MLX5E_TSTMP_PREC; a2 = (cp->base_curr - cp->base_prev) >> MLX5E_TSTMP_PREC; res = (a1 * a2) << MLX5E_TSTMP_PREC; /* * Divisor cannot be zero because calibration callback * checks for the condition and disables timestamping * if clock halted. */ res /= (cp->clbr_hw_curr - cp->clbr_hw_prev) >> MLX5E_TSTMP_PREC; res += cp->base_prev; atomic_thread_fence_acq(); } while (gen == 0 || gen != cp->clbr_gen); return (res); } static inline void mlx5e_build_rx_mbuf(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, struct mbuf *mb, u32 cqe_bcnt) { struct ifnet *ifp = rq->ifp; struct mlx5e_channel *c; #if (MLX5E_MAX_RX_SEGS != 1) struct mbuf *mb_head; #endif int lro_num_seg; /* HW LRO session aggregated packets counter */ uint64_t tstmp; lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; if (lro_num_seg > 1) { mlx5e_lro_update_hdr(mb, cqe); rq->stats.lro_packets++; rq->stats.lro_bytes += cqe_bcnt; } #if (MLX5E_MAX_RX_SEGS == 1) mb->m_pkthdr.len = mb->m_len = cqe_bcnt; #else mb->m_pkthdr.len = cqe_bcnt; for (mb_head = mb; mb != NULL; mb = mb->m_next) { if (mb->m_len > cqe_bcnt) mb->m_len = cqe_bcnt; cqe_bcnt -= mb->m_len; if (likely(cqe_bcnt == 0)) { if (likely(mb->m_next != NULL)) { /* trim off empty mbufs */ m_freem(mb->m_next); mb->m_next = NULL; } break; } } /* rewind to first mbuf in chain */ mb = mb_head; #endif /* check if a Toeplitz hash was computed */ if (cqe->rss_hash_type != 0) { mb->m_pkthdr.flowid = be32_to_cpu(cqe->rss_hash_result); #ifdef RSS /* decode the RSS hash type */ switch (cqe->rss_hash_type & (CQE_RSS_DST_HTYPE_L4 | CQE_RSS_DST_HTYPE_IP)) { /* IPv4 */ case (CQE_RSS_DST_HTYPE_TCP | CQE_RSS_DST_HTYPE_IPV4): M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_TCP_IPV4); break; case (CQE_RSS_DST_HTYPE_UDP | CQE_RSS_DST_HTYPE_IPV4): M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_UDP_IPV4); break; case CQE_RSS_DST_HTYPE_IPV4: M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_IPV4); break; /* IPv6 */ case (CQE_RSS_DST_HTYPE_TCP | CQE_RSS_DST_HTYPE_IPV6): M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_TCP_IPV6); break; case (CQE_RSS_DST_HTYPE_UDP | CQE_RSS_DST_HTYPE_IPV6): M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_UDP_IPV6); break; case CQE_RSS_DST_HTYPE_IPV6: M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_IPV6); break; default: /* Other */ M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE_HASH); break; } #else M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE_HASH); #endif } else { mb->m_pkthdr.flowid = rq->ix; M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE); } mb->m_pkthdr.rcvif = ifp; if (likely(ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) && ((cqe->hds_ip_ext & (CQE_L2_OK | CQE_L3_OK | CQE_L4_OK)) == (CQE_L2_OK | CQE_L3_OK | CQE_L4_OK))) { mb->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR; mb->m_pkthdr.csum_data = htons(0xffff); } else { rq->stats.csum_none++; } if (cqe_has_vlan(cqe)) { mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->vlan_info); mb->m_flags |= M_VLANTAG; } c = container_of(rq, struct mlx5e_channel, rq); if (c->priv->clbr_done >= 2) { tstmp = mlx5e_mbuf_tstmp(c->priv, be64_to_cpu(cqe->timestamp)); if ((tstmp & MLX5_CQE_TSTMP_PTP) != 0) { /* * Timestamp was taken on the packet entrance, * instead of the cqe generation. */ tstmp &= ~MLX5_CQE_TSTMP_PTP; mb->m_flags |= M_TSTMP_HPREC; } mb->m_pkthdr.rcv_tstmp = tstmp; mb->m_flags |= M_TSTMP; } } static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cc, void *data) { memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, (cc & cq->wq.sz_m1)), sizeof(struct mlx5_cqe64)); } static inline void mlx5e_write_cqe_slot(struct mlx5e_cq *cq, u32 cc, void *data) { memcpy(mlx5_cqwq_get_wqe(&cq->wq, cc & cq->wq.sz_m1), data, sizeof(struct mlx5_cqe64)); } static inline void mlx5e_decompress_cqe(struct mlx5e_cq *cq, struct mlx5_cqe64 *title, struct mlx5_mini_cqe8 *mini, u16 wqe_counter, int i) { /* * NOTE: The fields which are not set here are copied from the * initial and common title. See memcpy() in * mlx5e_write_cqe_slot(). */ title->byte_cnt = mini->byte_cnt; title->wqe_counter = cpu_to_be16((wqe_counter + i) & cq->wq.sz_m1); title->check_sum = mini->checksum; title->op_own = (title->op_own & 0xf0) | (((cq->wq.cc + i) >> cq->wq.log_sz) & 1); } #define MLX5E_MINI_ARRAY_SZ 8 /* Make sure structs are not packet differently */ CTASSERT(sizeof(struct mlx5_cqe64) == sizeof(struct mlx5_mini_cqe8) * MLX5E_MINI_ARRAY_SZ); static void mlx5e_decompress_cqes(struct mlx5e_cq *cq) { struct mlx5_mini_cqe8 mini_array[MLX5E_MINI_ARRAY_SZ]; struct mlx5_cqe64 title; u32 cqe_count; u32 i = 0; u16 title_wqe_counter; mlx5e_read_cqe_slot(cq, cq->wq.cc, &title); title_wqe_counter = be16_to_cpu(title.wqe_counter); cqe_count = be32_to_cpu(title.byte_cnt); /* Make sure we won't overflow */ KASSERT(cqe_count <= cq->wq.sz_m1, ("%s: cqe_count %u > cq->wq.sz_m1 %u", __func__, cqe_count, cq->wq.sz_m1)); mlx5e_read_cqe_slot(cq, cq->wq.cc + 1, mini_array); while (true) { mlx5e_decompress_cqe(cq, &title, &mini_array[i % MLX5E_MINI_ARRAY_SZ], title_wqe_counter, i); mlx5e_write_cqe_slot(cq, cq->wq.cc + i, &title); i++; if (i == cqe_count) break; if (i % MLX5E_MINI_ARRAY_SZ == 0) mlx5e_read_cqe_slot(cq, cq->wq.cc + i, mini_array); } } static int mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget) { int i; for (i = 0; i < budget; i++) { struct mlx5e_rx_wqe *wqe; struct mlx5_cqe64 *cqe; struct mbuf *mb; __be16 wqe_counter_be; u16 wqe_counter; u32 byte_cnt; cqe = mlx5e_get_cqe(&rq->cq); if (!cqe) break; if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) mlx5e_decompress_cqes(&rq->cq); mlx5_cqwq_pop(&rq->cq.wq); wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); byte_cnt = be32_to_cpu(cqe->byte_cnt); bus_dmamap_sync(rq->dma_tag, rq->mbuf[wqe_counter].dma_map, BUS_DMASYNC_POSTREAD); if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { rq->stats.wqe_err++; goto wq_ll_pop; } if ((MHLEN - MLX5E_NET_IP_ALIGN) >= byte_cnt && (mb = m_gethdr(M_NOWAIT, MT_DATA)) != NULL) { #if (MLX5E_MAX_RX_SEGS != 1) /* set maximum mbuf length */ mb->m_len = MHLEN - MLX5E_NET_IP_ALIGN; #endif /* get IP header aligned */ mb->m_data += MLX5E_NET_IP_ALIGN; bcopy(rq->mbuf[wqe_counter].data, mtod(mb, caddr_t), byte_cnt); } else { mb = rq->mbuf[wqe_counter].mbuf; rq->mbuf[wqe_counter].mbuf = NULL; /* safety clear */ bus_dmamap_unload(rq->dma_tag, rq->mbuf[wqe_counter].dma_map); } mlx5e_build_rx_mbuf(cqe, rq, mb, byte_cnt); + rq->stats.bytes += byte_cnt; rq->stats.packets++; #if !defined(HAVE_TCP_LRO_RX) tcp_lro_queue_mbuf(&rq->lro, mb); #else if (mb->m_pkthdr.csum_flags == 0 || (rq->ifp->if_capenable & IFCAP_LRO) == 0 || rq->lro.lro_cnt == 0 || tcp_lro_rx(&rq->lro, mb, 0) != 0) { rq->ifp->if_input(rq->ifp, mb); } #endif wq_ll_pop: mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, &wqe->next.next_wqe_index); } mlx5_cqwq_update_db_record(&rq->cq.wq); /* ensure cq space is freed before enabling more cqes */ atomic_thread_fence_rel(); return (i); } void mlx5e_rx_cq_comp(struct mlx5_core_cq *mcq) { struct mlx5e_rq *rq = container_of(mcq, struct mlx5e_rq, cq.mcq); int i = 0; #ifdef HAVE_PER_CQ_EVENT_PACKET #if (MHLEN < 15) #error "MHLEN is too small" #endif struct mbuf *mb = m_gethdr(M_NOWAIT, MT_DATA); if (mb != NULL) { /* this code is used for debugging purpose only */ mb->m_pkthdr.len = mb->m_len = 15; memset(mb->m_data, 255, 14); mb->m_data[14] = rq->ix; mb->m_pkthdr.rcvif = rq->ifp; rq->ifp->if_input(rq->ifp, mb); } #endif mtx_lock(&rq->mtx); /* * Polling the entire CQ without posting new WQEs results in * lack of receive WQEs during heavy traffic scenarios. */ while (1) { if (mlx5e_poll_rx_cq(rq, MLX5E_RX_BUDGET_MAX) != MLX5E_RX_BUDGET_MAX) break; i += MLX5E_RX_BUDGET_MAX; if (i >= MLX5E_BUDGET_MAX) break; mlx5e_post_rx_wqes(rq); } mlx5e_post_rx_wqes(rq); mlx5e_cq_arm(&rq->cq, MLX5_GET_DOORBELL_LOCK(&rq->channel->priv->doorbell_lock)); tcp_lro_flush_all(&rq->lro); mtx_unlock(&rq->mtx); } Index: head/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c =================================================================== --- head/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c (revision 341583) +++ head/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c (revision 341584) @@ -1,626 +1,629 @@ /*- * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "en.h" #include static inline bool mlx5e_do_send_cqe(struct mlx5e_sq *sq) { sq->cev_counter++; /* interleave the CQEs */ if (sq->cev_counter >= sq->cev_factor) { sq->cev_counter = 0; return (1); } return (0); } void mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt) { u16 pi = sq->pc & sq->wq.sz_m1; struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); if (mlx5e_do_send_cqe(sq)) wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; else wqe->ctrl.fm_ce_se = 0; /* Copy data for doorbell */ memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); sq->mbuf[pi].mbuf = NULL; sq->mbuf[pi].num_bytes = 0; sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->pc += sq->mbuf[pi].num_wqebbs; } #if (__FreeBSD_version >= 1100000) static uint32_t mlx5e_hash_value; static void mlx5e_hash_init(void *arg) { mlx5e_hash_value = m_ether_tcpip_hash_init(); } /* Make kernel call mlx5e_hash_init after the random stack finished initializing */ SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL); #endif static struct mlx5e_sq * mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb) { struct mlx5e_priv *priv = ifp->if_softc; struct mlx5e_sq *sq; u32 ch; u32 tc; /* obtain VLAN information if present */ if (mb->m_flags & M_VLANTAG) { tc = (mb->m_pkthdr.ether_vtag >> 13); if (tc >= priv->num_tc) tc = priv->default_vlan_prio; } else { tc = priv->default_vlan_prio; } ch = priv->params.num_channels; #ifdef RATELIMIT if (mb->m_pkthdr.snd_tag != NULL) { struct mlx5e_sq *sq; /* check for route change */ if (mb->m_pkthdr.snd_tag->ifp != ifp) return (NULL); /* get pointer to sendqueue */ sq = container_of(mb->m_pkthdr.snd_tag, struct mlx5e_rl_channel, m_snd_tag)->sq; /* check if valid */ if (sq != NULL && sq->running != 0) return (sq); /* FALLTHROUGH */ } #endif /* check if flowid is set */ if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) { #ifdef RSS u32 temp; if (rss_hash2bucket(mb->m_pkthdr.flowid, M_HASHTYPE_GET(mb), &temp) == 0) ch = temp % ch; else #endif ch = (mb->m_pkthdr.flowid % 128) % ch; } else { #if (__FreeBSD_version >= 1100000) ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 | MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch; #else /* * m_ether_tcpip_hash not present in stable, so just * throw unhashed mbufs on queue 0 */ ch = 0; #endif } /* check if send queue is running */ sq = &priv->channel[ch].sq[tc]; if (likely(READ_ONCE(sq->running) != 0)) return (sq); return (NULL); } static inline u16 mlx5e_get_l2_header_size(struct mlx5e_sq *sq, struct mbuf *mb) { struct ether_vlan_header *eh; uint16_t eth_type; int min_inline; eh = mtod(mb, struct ether_vlan_header *); if (unlikely(mb->m_len < ETHER_HDR_LEN)) { goto max_inline; } else if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))) goto max_inline; eth_type = ntohs(eh->evl_proto); min_inline = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { eth_type = ntohs(eh->evl_encap_proto); min_inline = ETHER_HDR_LEN; } switch (eth_type) { case ETHERTYPE_IP: case ETHERTYPE_IPV6: /* * Make sure the TOS(IPv4) or traffic class(IPv6) * field gets inlined. Else the SQ may stall. */ min_inline += 4; break; default: goto max_inline; } /* * m_copydata() will be used on the remaining header which * does not need to reside within the first m_len bytes of * data: */ if (mb->m_pkthdr.len < min_inline) goto max_inline; return (min_inline); max_inline: return (MIN(mb->m_pkthdr.len, sq->max_inline)); } static int mlx5e_get_full_header_size(struct mbuf *mb) { struct ether_vlan_header *eh; struct tcphdr *th; struct ip *ip; int ip_hlen, tcp_hlen; struct ip6_hdr *ip6; uint16_t eth_type; int eth_hdr_len; eh = mtod(mb, struct ether_vlan_header *); if (mb->m_len < ETHER_HDR_LEN) return (0); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { if (mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)) return (0); eth_type = ntohs(eh->evl_proto); eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { eth_type = ntohs(eh->evl_encap_proto); eth_hdr_len = ETHER_HDR_LEN; } switch (eth_type) { case ETHERTYPE_IP: ip = (struct ip *)(mb->m_data + eth_hdr_len); if (mb->m_len < eth_hdr_len + sizeof(*ip)) return (0); switch (ip->ip_p) { case IPPROTO_TCP: ip_hlen = ip->ip_hl << 2; eth_hdr_len += ip_hlen; break; case IPPROTO_UDP: ip_hlen = ip->ip_hl << 2; eth_hdr_len += ip_hlen + 8; goto done; default: return (0); } break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mb->m_data + eth_hdr_len); if (mb->m_len < eth_hdr_len + sizeof(*ip6)) return (0); switch (ip6->ip6_nxt) { case IPPROTO_TCP: eth_hdr_len += sizeof(*ip6); break; case IPPROTO_UDP: eth_hdr_len += sizeof(*ip6) + 8; goto done; default: return (0); } break; default: return (0); } if (mb->m_len < eth_hdr_len + sizeof(*th)) return (0); th = (struct tcphdr *)(mb->m_data + eth_hdr_len); tcp_hlen = th->th_off << 2; eth_hdr_len += tcp_hlen; done: /* * m_copydata() will be used on the remaining header which * does not need to reside within the first m_len bytes of * data: */ if (mb->m_pkthdr.len < eth_hdr_len) return (0); return (eth_hdr_len); } static int mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) { bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; struct mlx5_wqe_data_seg *dseg; struct mlx5e_tx_wqe *wqe; struct ifnet *ifp; int nsegs; int err; int x; struct mbuf *mb = *mbp; u16 ds_cnt; u16 ihs; u16 pi; u8 opcode; /* Return ENOBUFS if the queue is full */ if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) return (ENOBUFS); /* Align SQ edge with NOPs to avoid WQE wrap around */ pi = ((~sq->pc) & sq->wq.sz_m1); if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { /* Send one multi NOP message instead of many */ mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS); pi = ((~sq->pc) & sq->wq.sz_m1); if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) return (ENOMEM); } /* Setup local variables */ pi = sq->pc & sq->wq.sz_m1; wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); ifp = sq->ifp; memset(wqe, 0, sizeof(*wqe)); /* Send a copy of the frame to the BPF listener, if any */ if (ifp != NULL && ifp->if_bpf != NULL) ETHER_BPF_MTAP(ifp, mb); if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) { wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM; } if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) { wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM; } if (wqe->eth.cs_flags == 0) { sq->stats.csum_offload_none++; } if (mb->m_pkthdr.csum_flags & CSUM_TSO) { u32 payload_len; u32 mss = mb->m_pkthdr.tso_segsz; u32 num_pkts; wqe->eth.mss = cpu_to_be16(mss); opcode = MLX5_OPCODE_LSO; ihs = mlx5e_get_full_header_size(mb); if (unlikely(ihs == 0)) { err = EINVAL; goto tx_drop; } payload_len = mb->m_pkthdr.len - ihs; if (payload_len == 0) num_pkts = 1; else num_pkts = DIV_ROUND_UP(payload_len, mss); sq->mbuf[pi].num_bytes = payload_len + (num_pkts * ihs); sq->stats.tso_packets++; sq->stats.tso_bytes += payload_len; } else { opcode = MLX5_OPCODE_SEND; switch (sq->min_inline_mode) { case MLX5_INLINE_MODE_IP: case MLX5_INLINE_MODE_TCP_UDP: ihs = mlx5e_get_full_header_size(mb); if (unlikely(ihs == 0)) ihs = mlx5e_get_l2_header_size(sq, mb); break; case MLX5_INLINE_MODE_L2: ihs = mlx5e_get_l2_header_size(sq, mb); break; case MLX5_INLINE_MODE_NONE: /* FALLTHROUGH */ default: if ((mb->m_flags & M_VLANTAG) != 0 && (sq->min_insert_caps & MLX5E_INSERT_VLAN) != 0) { /* inlining VLAN data is not required */ wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */ wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag); ihs = 0; } else if ((mb->m_flags & M_VLANTAG) == 0 && (sq->min_insert_caps & MLX5E_INSERT_NON_VLAN) != 0) { /* inlining non-VLAN data is not required */ ihs = 0; } else { /* we are forced to inlining L2 header, if any */ ihs = mlx5e_get_l2_header_size(sq, mb); } break; } sq->mbuf[pi].num_bytes = max_t (unsigned int, mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN); } if (likely(ihs == 0)) { /* nothing to inline */ } else if (unlikely(ihs > sq->max_inline)) { /* inline header size is too big */ err = EINVAL; goto tx_drop; } else if ((mb->m_flags & M_VLANTAG) != 0) { struct ether_vlan_header *eh = (struct ether_vlan_header *) wqe->eth.inline_hdr_start; /* Range checks */ if (unlikely(ihs > (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN))) ihs = (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN); else if (unlikely(ihs < ETHER_HDR_LEN)) { err = EINVAL; goto tx_drop; } m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh); m_adj(mb, ETHER_HDR_LEN); /* Insert 4 bytes VLAN tag into data stream */ eh->evl_proto = eh->evl_encap_proto; eh->evl_encap_proto = htons(ETHERTYPE_VLAN); eh->evl_tag = htons(mb->m_pkthdr.ether_vtag); /* Copy rest of header data, if any */ m_copydata(mb, 0, ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1)); m_adj(mb, ihs - ETHER_HDR_LEN); /* Extend header by 4 bytes */ ihs += ETHER_VLAN_ENCAP_LEN; wqe->eth.inline_hdr_sz = cpu_to_be16(ihs); } else { m_copydata(mb, 0, ihs, wqe->eth.inline_hdr_start); m_adj(mb, ihs); wqe->eth.inline_hdr_sz = cpu_to_be16(ihs); } ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; if (ihs > sizeof(wqe->eth.inline_hdr_start)) { ds_cnt += DIV_ROUND_UP(ihs - sizeof(wqe->eth.inline_hdr_start), MLX5_SEND_WQE_DS); } dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt; err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); if (err == EFBIG) { /* Update statistics */ sq->stats.defragged++; /* Too many mbuf fragments */ mb = m_defrag(*mbp, M_NOWAIT); if (mb == NULL) { mb = *mbp; goto tx_drop; } /* Try again */ err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); } /* Catch errors */ if (err != 0) goto tx_drop; /* Make sure all mbuf data, if any, is written to RAM */ if (nsegs != 0) { bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, BUS_DMASYNC_PREWRITE); } else { /* All data was inlined, free the mbuf. */ bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); m_freem(mb); mb = NULL; } for (x = 0; x != nsegs; x++) { if (segs[x].ds_len == 0) continue; dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr); dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len); dseg++; } ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl)); wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); if (mlx5e_do_send_cqe(sq)) wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; else wqe->ctrl.fm_ce_se = 0; /* Copy data for doorbell */ memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); /* Store pointer to mbuf */ sq->mbuf[pi].mbuf = mb; sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->pc += sq->mbuf[pi].num_wqebbs; + /* Count all traffic going out */ sq->stats.packets++; + sq->stats.bytes += sq->mbuf[pi].num_bytes; + *mbp = NULL; /* safety clear */ return (0); tx_drop: sq->stats.dropped++; *mbp = NULL; m_freem(mb); return err; } static void mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget) { u16 sqcc; /* * sq->cc must be updated only after mlx5_cqwq_update_db_record(), * otherwise a cq overrun may occur */ sqcc = sq->cc; while (budget > 0) { struct mlx5_cqe64 *cqe; struct mbuf *mb; u16 x; u16 ci; cqe = mlx5e_get_cqe(&sq->cq); if (!cqe) break; mlx5_cqwq_pop(&sq->cq.wq); /* update budget according to the event factor */ budget -= sq->cev_factor; for (x = 0; x != sq->cev_factor; x++) { ci = sqcc & sq->wq.sz_m1; mb = sq->mbuf[ci].mbuf; sq->mbuf[ci].mbuf = NULL; /* Safety clear */ if (mb == NULL) { if (sq->mbuf[ci].num_bytes == 0) { /* NOP */ sq->stats.nop++; } } else { bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map); /* Free transmitted mbuf */ m_freem(mb); } sqcc += sq->mbuf[ci].num_wqebbs; } } mlx5_cqwq_update_db_record(&sq->cq.wq); /* Ensure cq space is freed before enabling more cqes */ atomic_thread_fence_rel(); sq->cc = sqcc; } static int mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb) { int err = 0; if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || READ_ONCE(sq->running) == 0)) { m_freem(mb); return (ENETDOWN); } /* Do transmit */ if (mlx5e_sq_xmit(sq, &mb) != 0) { /* NOTE: m_freem() is NULL safe */ m_freem(mb); err = ENOBUFS; } /* Check if we need to write the doorbell */ if (likely(sq->doorbell.d64 != 0)) { mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); sq->doorbell.d64 = 0; } /* * Check if we need to start the event timer which flushes the * transmit ring on timeout: */ if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL && sq->cev_factor != 1)) { /* start the timer */ mlx5e_sq_cev_timeout(sq); } else { /* don't send NOPs yet */ sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS; } return (err); } int mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb) { struct mlx5e_sq *sq; int ret; sq = mlx5e_select_queue(ifp, mb); if (unlikely(sq == NULL)) { #ifdef RATELIMIT /* Check for route change */ if (mb->m_pkthdr.snd_tag != NULL && mb->m_pkthdr.snd_tag->ifp != ifp) { /* Free mbuf */ m_freem(mb); /* * Tell upper layers about route change and to * re-transmit this packet: */ return (EAGAIN); } #endif /* Free mbuf */ m_freem(mb); /* Invalid send queue */ return (ENXIO); } mtx_lock(&sq->lock); ret = mlx5e_xmit_locked(ifp, sq, mb); mtx_unlock(&sq->lock); return (ret); } void mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq) { struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq); mtx_lock(&sq->comp_lock); mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX); mlx5e_cq_arm(&sq->cq, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock)); mtx_unlock(&sq->comp_lock); }