diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h index 26d92827406c..af6cd8371506 100644 --- a/sys/dev/mlx5/mlx5_en/en.h +++ b/sys/dev/mlx5/mlx5_en/en.h @@ -1,1217 +1,1217 @@ /*- * Copyright (c) 2015-2021 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MLX5_EN_H_ #define _MLX5_EN_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #include #endif #include #include #include #include #include #include #include #include #include #include #define MLX5E_MAX_PRIORITY 8 #define MLX5E_MAX_FEC_10X_25X 4 #define MLX5E_MAX_FEC_50X 4 /* IEEE 802.1Qaz standard supported values */ #define IEEE_8021QAZ_MAX_TCS 8 #define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x7 #define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa #define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xe #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE 0x7 #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xe #define MLX5E_MAX_BUSDMA_RX_SEGS 15 #ifndef MLX5E_MAX_RX_BYTES #define MLX5E_MAX_RX_BYTES MCLBYTES #endif #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ \ MIN(65535, 7 * MLX5E_MAX_RX_BYTES) #define MLX5E_DIM_DEFAULT_PROFILE 3 #define MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO 16 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 #define MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ 0x7 #define MLX5E_CACHELINE_SIZE CACHE_LINE_SIZE #define MLX5E_HW2SW_MTU(hwmtu) \ ((hwmtu) - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN)) #define MLX5E_SW2HW_MTU(swmtu) \ ((swmtu) + (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN)) #define MLX5E_SW2MB_MTU(swmtu) \ (MLX5E_SW2HW_MTU(swmtu) + MLX5E_NET_IP_ALIGN) #define MLX5E_MTU_MIN 72 /* Min MTU allowed by the kernel */ #define MLX5E_MTU_MAX MIN(ETHERMTU_JUMBO, MJUM16BYTES) /* Max MTU of Ethernet * jumbo frames */ #define MLX5E_BUDGET_MAX 8192 /* RX and TX */ #define MLX5E_RX_BUDGET_MAX 256 #define MLX5E_SQ_BF_BUDGET 16 #define MLX5E_SQ_TX_QUEUE_SIZE 4096 /* SQ drbr queue size */ #define MLX5E_MAX_TX_NUM_TC 8 /* units */ #define MLX5E_MAX_TX_HEADER 192 /* bytes */ #define MLX5E_MAX_TX_PAYLOAD_SIZE 65536 /* bytes */ #define MLX5E_MAX_TX_MBUF_SIZE 65536 /* bytes */ #define MLX5E_MAX_TX_MBUF_FRAGS \ ((MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) - \ (MLX5E_MAX_TX_HEADER / MLX5_SEND_WQE_DS) - \ 1 /* the maximum value of the DS counter is 0x3F and not 0x40 */) /* units */ #define MLX5E_MAX_TX_INLINE \ (MLX5E_MAX_TX_HEADER - sizeof(struct mlx5e_tx_wqe) + \ sizeof(((struct mlx5e_tx_wqe *)0)->eth.inline_hdr_start)) /* bytes */ #define MLX5E_100MB (100000) #define MLX5E_1GB (1000000) #define MLX5E_ZERO(ptr, field) \ memset(&(ptr)->field, 0, \ sizeof(*(ptr)) - __offsetof(__typeof(*(ptr)), field)) MALLOC_DECLARE(M_MLX5EN); struct mlx5_core_dev; struct mlx5e_cq; typedef void (mlx5e_cq_comp_t)(struct mlx5_core_cq *, struct mlx5_eqe *); #define mlx5_en_err(_dev, format, ...) \ if_printf(_dev, "ERR: ""%s:%d:(pid %d): " format, \ __func__, __LINE__, curthread->td_proc->p_pid, \ ##__VA_ARGS__) #define mlx5_en_warn(_dev, format, ...) \ if_printf(_dev, "WARN: ""%s:%d:(pid %d): " format, \ __func__, __LINE__, curthread->td_proc->p_pid, \ ##__VA_ARGS__) #define mlx5_en_info(_dev, format, ...) \ if_printf(_dev, "INFO: ""%s:%d:(pid %d): " format, \ __func__, __LINE__, curthread->td_proc->p_pid, \ ##__VA_ARGS__) #define MLX5E_STATS_COUNT(a, ...) a #define MLX5E_STATS_VAR(a, b, c, ...) b c; #define MLX5E_STATS_COUNTER(a, b, c, ...) counter_##b##_t c; #define MLX5E_STATS_DESC(a, b, c, d, e, ...) d, e, #define MLX5E_VPORT_STATS(m) \ /* HW counters */ \ m(+1, u64, rx_packets, "rx_packets", "Received packets") \ m(+1, u64, rx_bytes, "rx_bytes", "Received bytes") \ m(+1, u64, tx_packets, "tx_packets", "Transmitted packets") \ m(+1, u64, tx_bytes, "tx_bytes", "Transmitted bytes") \ m(+1, u64, rx_error_packets, "rx_error_packets", "Received error packets") \ m(+1, u64, rx_error_bytes, "rx_error_bytes", "Received error bytes") \ m(+1, u64, tx_error_packets, "tx_error_packets", "Transmitted error packets") \ m(+1, u64, tx_error_bytes, "tx_error_bytes", "Transmitted error bytes") \ m(+1, u64, rx_unicast_packets, "rx_unicast_packets", "Received unicast packets") \ m(+1, u64, rx_unicast_bytes, "rx_unicast_bytes", "Received unicast bytes") \ m(+1, u64, tx_unicast_packets, "tx_unicast_packets", "Transmitted unicast packets") \ m(+1, u64, tx_unicast_bytes, "tx_unicast_bytes", "Transmitted unicast bytes") \ m(+1, u64, rx_multicast_packets, "rx_multicast_packets", "Received multicast packets") \ m(+1, u64, rx_multicast_bytes, "rx_multicast_bytes", "Received multicast bytes") \ m(+1, u64, tx_multicast_packets, "tx_multicast_packets", "Transmitted multicast packets") \ m(+1, u64, tx_multicast_bytes, "tx_multicast_bytes", "Transmitted multicast bytes") \ m(+1, u64, rx_broadcast_packets, "rx_broadcast_packets", "Received broadcast packets") \ m(+1, u64, rx_broadcast_bytes, "rx_broadcast_bytes", "Received broadcast bytes") \ m(+1, u64, tx_broadcast_packets, "tx_broadcast_packets", "Transmitted broadcast packets") \ m(+1, u64, tx_broadcast_bytes, "tx_broadcast_bytes", "Transmitted broadcast bytes") \ m(+1, u64, rx_out_of_buffer, "rx_out_of_buffer", "Receive out of buffer, no recv wqes events") \ /* SW counters */ \ m(+1, u64, tso_packets, "tso_packets", "Transmitted TSO packets") \ m(+1, u64, tso_bytes, "tso_bytes", "Transmitted TSO bytes") \ m(+1, u64, lro_packets, "lro_packets", "Received LRO packets") \ m(+1, u64, lro_bytes, "lro_bytes", "Received LRO bytes") \ m(+1, u64, sw_lro_queued, "sw_lro_queued", "Packets queued for SW LRO") \ m(+1, u64, sw_lro_flushed, "sw_lro_flushed", "Packets flushed from SW LRO") \ m(+1, u64, rx_csum_good, "rx_csum_good", "Received checksum valid packets") \ m(+1, u64, rx_csum_none, "rx_csum_none", "Received no checksum packets") \ m(+1, u64, tx_csum_offload, "tx_csum_offload", "Transmit checksum offload packets") \ m(+1, u64, tx_queue_dropped, "tx_queue_dropped", "Transmit queue dropped") \ m(+1, u64, tx_defragged, "tx_defragged", "Transmit queue defragged") \ m(+1, u64, rx_wqe_err, "rx_wqe_err", "Receive WQE errors") \ m(+1, u64, tx_jumbo_packets, "tx_jumbo_packets", "TX packets greater than 1518 octets") \ m(+1, u64, rx_steer_missed_packets, "rx_steer_missed_packets", "RX packets dropped by steering rule(s)") #define MLX5E_VPORT_STATS_NUM (0 MLX5E_VPORT_STATS(MLX5E_STATS_COUNT)) struct mlx5e_vport_stats { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_VPORT_STATS(MLX5E_STATS_VAR) }; #define MLX5E_PPORT_IEEE802_3_STATS(m) \ m(+1, u64, frames_tx, "frames_tx", "Frames transmitted") \ m(+1, u64, frames_rx, "frames_rx", "Frames received") \ m(+1, u64, check_seq_err, "check_seq_err", "Sequence errors") \ m(+1, u64, alignment_err, "alignment_err", "Alignment errors") \ m(+1, u64, octets_tx, "octets_tx", "Bytes transmitted") \ m(+1, u64, octets_received, "octets_received", "Bytes received") \ m(+1, u64, multicast_xmitted, "multicast_xmitted", "Multicast transmitted") \ m(+1, u64, broadcast_xmitted, "broadcast_xmitted", "Broadcast transmitted") \ m(+1, u64, multicast_rx, "multicast_rx", "Multicast received") \ m(+1, u64, broadcast_rx, "broadcast_rx", "Broadcast received") \ m(+1, u64, in_range_len_errors, "in_range_len_errors", "In range length errors") \ m(+1, u64, out_of_range_len, "out_of_range_len", "Out of range length errors") \ m(+1, u64, too_long_errors, "too_long_errors", "Too long errors") \ m(+1, u64, symbol_err, "symbol_err", "Symbol errors") \ m(+1, u64, mac_control_tx, "mac_control_tx", "MAC control transmitted") \ m(+1, u64, mac_control_rx, "mac_control_rx", "MAC control received") \ m(+1, u64, unsupported_op_rx, "unsupported_op_rx", "Unsupported operation received") \ m(+1, u64, pause_ctrl_rx, "pause_ctrl_rx", "Pause control received") \ m(+1, u64, pause_ctrl_tx, "pause_ctrl_tx", "Pause control transmitted") #define MLX5E_PPORT_RFC2819_STATS(m) \ m(+1, u64, drop_events, "drop_events", "Dropped events") \ m(+1, u64, octets, "octets", "Octets") \ m(+1, u64, pkts, "pkts", "Packets") \ m(+1, u64, broadcast_pkts, "broadcast_pkts", "Broadcast packets") \ m(+1, u64, multicast_pkts, "multicast_pkts", "Multicast packets") \ m(+1, u64, crc_align_errors, "crc_align_errors", "CRC alignment errors") \ m(+1, u64, undersize_pkts, "undersize_pkts", "Undersized packets") \ m(+1, u64, oversize_pkts, "oversize_pkts", "Oversized packets") \ m(+1, u64, fragments, "fragments", "Fragments") \ m(+1, u64, jabbers, "jabbers", "Jabbers") \ m(+1, u64, collisions, "collisions", "Collisions") #define MLX5E_PPORT_RFC2819_STATS_DEBUG(m) \ m(+1, u64, p64octets, "p64octets", "Bytes") \ m(+1, u64, p65to127octets, "p65to127octets", "Bytes") \ m(+1, u64, p128to255octets, "p128to255octets", "Bytes") \ m(+1, u64, p256to511octets, "p256to511octets", "Bytes") \ m(+1, u64, p512to1023octets, "p512to1023octets", "Bytes") \ m(+1, u64, p1024to1518octets, "p1024to1518octets", "Bytes") \ m(+1, u64, p1519to2047octets, "p1519to2047octets", "Bytes") \ m(+1, u64, p2048to4095octets, "p2048to4095octets", "Bytes") \ m(+1, u64, p4096to8191octets, "p4096to8191octets", "Bytes") \ m(+1, u64, p8192to10239octets, "p8192to10239octets", "Bytes") #define MLX5E_PPORT_RFC2863_STATS_DEBUG(m) \ m(+1, u64, in_octets, "in_octets", "In octets") \ m(+1, u64, in_ucast_pkts, "in_ucast_pkts", "In unicast packets") \ m(+1, u64, in_discards, "in_discards", "In discards") \ m(+1, u64, in_errors, "in_errors", "In errors") \ m(+1, u64, in_unknown_protos, "in_unknown_protos", "In unknown protocols") \ m(+1, u64, out_octets, "out_octets", "Out octets") \ m(+1, u64, out_ucast_pkts, "out_ucast_pkts", "Out unicast packets") \ m(+1, u64, out_discards, "out_discards", "Out discards") \ m(+1, u64, out_errors, "out_errors", "Out errors") \ m(+1, u64, in_multicast_pkts, "in_multicast_pkts", "In multicast packets") \ m(+1, u64, in_broadcast_pkts, "in_broadcast_pkts", "In broadcast packets") \ m(+1, u64, out_multicast_pkts, "out_multicast_pkts", "Out multicast packets") \ m(+1, u64, out_broadcast_pkts, "out_broadcast_pkts", "Out broadcast packets") #define MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG(m) \ m(+1, u64, port_transmit_wait, "port_transmit_wait", "Port transmit wait") \ m(+1, u64, ecn_marked, "ecn_marked", "ECN marked") \ m(+1, u64, no_buffer_discard_mc, "no_buffer_discard_mc", "No buffer discard mc") \ m(+1, u64, rx_ebp, "rx_ebp", "RX EBP") \ m(+1, u64, tx_ebp, "tx_ebp", "TX EBP") \ m(+1, u64, rx_buffer_almost_full, "rx_buffer_almost_full", "RX buffer almost full") \ m(+1, u64, rx_buffer_full, "rx_buffer_full", "RX buffer full") \ m(+1, u64, rx_icrc_encapsulated, "rx_icrc_encapsulated", "RX ICRC encapsulated") \ m(+1, u64, ex_reserved_0, "ex_reserved_0", "Reserved") \ m(+1, u64, ex_reserved_1, "ex_reserved_1", "Reserved") \ m(+1, u64, tx_stat_p64octets, "tx_stat_p64octets", "Bytes") \ m(+1, u64, tx_stat_p65to127octets, "tx_stat_p65to127octets", "Bytes") \ m(+1, u64, tx_stat_p128to255octets, "tx_stat_p128to255octets", "Bytes") \ m(+1, u64, tx_stat_p256to511octets, "tx_stat_p256to511octets", "Bytes") \ m(+1, u64, tx_stat_p512to1023octets, "tx_stat_p512to1023octets", "Bytes") \ m(+1, u64, tx_stat_p1024to1518octets, "tx_stat_p1024to1518octets", "Bytes") \ m(+1, u64, tx_stat_p1519to2047octets, "tx_stat_p1519to2047octets", "Bytes") \ m(+1, u64, tx_stat_p2048to4095octets, "tx_stat_p2048to4095octets", "Bytes") \ m(+1, u64, tx_stat_p4096to8191octets, "tx_stat_p4096to8191octets", "Bytes") \ m(+1, u64, tx_stat_p8192to10239octets, "tx_stat_p8192to10239octets", "Bytes") #define MLX5E_PPORT_STATISTICAL_DEBUG(m) \ m(+1, u64, phy_time_since_last_clear, "phy_time_since_last_clear", \ "Time since last clear in milliseconds") \ m(+1, u64, phy_received_bits, "phy_received_bits", \ "Total amount of traffic received in bits before error correction") \ m(+1, u64, phy_symbol_errors, "phy_symbol_errors", \ "Total number of symbol errors before error correction") \ m(+1, u64, phy_corrected_bits, "phy_corrected_bits", \ "Total number of corrected bits ") \ m(+1, u64, phy_corrected_bits_lane0, "phy_corrected_bits_lane0", \ "Total number of corrected bits for lane 0") \ m(+1, u64, phy_corrected_bits_lane1, "phy_corrected_bits_lane1", \ "Total number of corrected bits for lane 1") \ m(+1, u64, phy_corrected_bits_lane2, "phy_corrected_bits_lane2", \ "Total number of corrected bits for lane 2") \ m(+1, u64, phy_corrected_bits_lane3, "phy_corrected_bits_lane3", \ "Total number of corrected bits for lane 3") #define MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(m) \ m(+1, u64, time_since_last_clear, "time_since_last_clear", \ "Time since the last counters clear event (msec)") \ m(+1, u64, symbol_errors, "symbol_errors", "Symbol errors") \ m(+1, u64, sync_headers_errors, "sync_headers_errors", \ "Sync header error counter") \ m(+1, u64, bip_errors_lane0, "edpl_bip_errors_lane0", \ "Indicates the number of PRBS errors on lane 0") \ m(+1, u64, bip_errors_lane1, "edpl_bip_errors_lane1", \ "Indicates the number of PRBS errors on lane 1") \ m(+1, u64, bip_errors_lane2, "edpl_bip_errors_lane2", \ "Indicates the number of PRBS errors on lane 2") \ m(+1, u64, bip_errors_lane3, "edpl_bip_errors_lane3", \ "Indicates the number of PRBS errors on lane 3") \ m(+1, u64, fc_corrected_blocks_lane0, "fc_corrected_blocks_lane0", \ "FEC correctable block counter lane 0") \ m(+1, u64, fc_corrected_blocks_lane1, "fc_corrected_blocks_lane1", \ "FEC correctable block counter lane 1") \ m(+1, u64, fc_corrected_blocks_lane2, "fc_corrected_blocks_lane2", \ "FEC correctable block counter lane 2") \ m(+1, u64, fc_corrected_blocks_lane3, "fc_corrected_blocks_lane3", \ "FEC correctable block counter lane 3") \ m(+1, u64, rs_corrected_blocks, "rs_corrected_blocks", \ "FEC correcable block counter") \ m(+1, u64, rs_uncorrectable_blocks, "rs_uncorrectable_blocks", \ "FEC uncorrecable block counter") \ m(+1, u64, rs_no_errors_blocks, "rs_no_errors_blocks", \ "The number of RS-FEC blocks received that had no errors") \ m(+1, u64, rs_single_error_blocks, "rs_single_error_blocks", \ "The number of corrected RS-FEC blocks received that had" \ "exactly 1 error symbol") \ m(+1, u64, rs_corrected_symbols_total, "rs_corrected_symbols_total", \ "Port FEC corrected symbol counter") \ m(+1, u64, rs_corrected_symbols_lane0, "rs_corrected_symbols_lane0", \ "FEC corrected symbol counter lane 0") \ m(+1, u64, rs_corrected_symbols_lane1, "rs_corrected_symbols_lane1", \ "FEC corrected symbol counter lane 1") \ m(+1, u64, rs_corrected_symbols_lane2, "rs_corrected_symbols_lane2", \ "FEC corrected symbol counter lane 2") \ m(+1, u64, rs_corrected_symbols_lane3, "rs_corrected_symbols_lane3", \ "FEC corrected symbol counter lane 3") /* Per priority statistics for PFC */ #define MLX5E_PPORT_PER_PRIO_STATS_SUB(m,n,p) \ m(n, p, +1, u64, rx_octets, "rx_octets", "Received octets") \ m(n, p, +1, u64, rx_uc_frames, "rx_uc_frames", "Received unicast frames") \ m(n, p, +1, u64, rx_mc_frames, "rx_mc_frames", "Received multicast frames") \ m(n, p, +1, u64, rx_bc_frames, "rx_bc_frames", "Received broadcast frames") \ m(n, p, +1, u64, rx_frames, "rx_frames", "Received frames") \ m(n, p, +1, u64, tx_octets, "tx_octets", "Transmitted octets") \ m(n, p, +1, u64, tx_uc_frames, "tx_uc_frames", "Transmitted unicast frames") \ m(n, p, +1, u64, tx_mc_frames, "tx_mc_frames", "Transmitted multicast frames") \ m(n, p, +1, u64, tx_bc_frames, "tx_bc_frames", "Transmitted broadcast frames") \ m(n, p, +1, u64, tx_frames, "tx_frames", "Transmitted frames") \ m(n, p, +1, u64, rx_pause, "rx_pause", "Received pause frames") \ m(n, p, +1, u64, rx_pause_duration, "rx_pause_duration", \ "Received pause duration") \ m(n, p, +1, u64, tx_pause, "tx_pause", "Transmitted pause frames") \ m(n, p, +1, u64, tx_pause_duration, "tx_pause_duration", \ "Transmitted pause duration") \ m(n, p, +1, u64, rx_pause_transition, "rx_pause_transition", \ "Received pause transitions") \ m(n, p, +1, u64, rx_discards, "rx_discards", "Discarded received frames") \ m(n, p, +1, u64, device_stall_minor_watermark, \ "device_stall_minor_watermark", "Device stall minor watermark") \ m(n, p, +1, u64, device_stall_critical_watermark, \ "device_stall_critical_watermark", "Device stall critical watermark") #define MLX5E_PPORT_PER_PRIO_STATS_PREFIX(m,p,c,t,f,s,d) \ m(c, t, pri_##p##_##f, "prio" #p "_" s, "Priority " #p " - " d) #define MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO 8 #define MLX5E_PPORT_PER_PRIO_STATS(m) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,0) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,1) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,2) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,3) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,4) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,5) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,6) \ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,7) #define MLX5E_PCIE_PERFORMANCE_COUNTERS_64(m) \ m(+1, u64, life_time_counter_high, "life_time_counter", \ "Life time counter.", pcie_perf_counters) \ m(+1, u64, tx_overflow_buffer_pkt, "tx_overflow_buffer_pkt", \ "The number of packets dropped due to lack of PCIe buffers " \ "in receive path from NIC port toward the hosts.", \ pcie_perf_counters) \ m(+1, u64, tx_overflow_buffer_marked_pkt, \ "tx_overflow_buffer_marked_pkt", \ "The number of packets marked due to lack of PCIe buffers " \ "in receive path from NIC port toward the hosts.", \ pcie_perf_counters) #define MLX5E_PCIE_PERFORMANCE_COUNTERS_32(m) \ m(+1, u64, rx_errors, "rx_errors", \ "Number of transitions to recovery due to Framing " \ "errors and CRC errors.", pcie_perf_counters) \ m(+1, u64, tx_errors, "tx_errors", "Number of transitions " \ "to recovery due to EIEOS and TS errors.", pcie_perf_counters) \ m(+1, u64, l0_to_recovery_eieos, "l0_to_recovery_eieos", "Number of " \ "transitions to recovery due to getting EIEOS.", pcie_perf_counters)\ m(+1, u64, l0_to_recovery_ts, "l0_to_recovery_ts", "Number of " \ "transitions to recovery due to getting TS.", pcie_perf_counters) \ m(+1, u64, l0_to_recovery_framing, "l0_to_recovery_framing", "Number "\ "of transitions to recovery due to identifying framing " \ "errors at gen3/4.", pcie_perf_counters) \ m(+1, u64, l0_to_recovery_retrain, "l0_to_recovery_retrain", \ "Number of transitions to recovery due to link retrain request " \ "from data link.", pcie_perf_counters) \ m(+1, u64, crc_error_dllp, "crc_error_dllp", "Number of transitions " \ "to recovery due to identifying CRC DLLP errors.", \ pcie_perf_counters) \ m(+1, u64, crc_error_tlp, "crc_error_tlp", "Number of transitions to "\ "recovery due to identifying CRC TLP errors.", pcie_perf_counters) \ m(+1, u64, outbound_stalled_reads, "outbound_stalled_reads", \ "The percentage of time within the last second that the NIC had " \ "outbound non-posted read requests but could not perform the " \ "operation due to insufficient non-posted credits.", \ pcie_perf_counters) \ m(+1, u64, outbound_stalled_writes, "outbound_stalled_writes", \ "The percentage of time within the last second that the NIC had " \ "outbound posted writes requests but could not perform the " \ "operation due to insufficient posted credits.", \ pcie_perf_counters) \ m(+1, u64, outbound_stalled_reads_events, \ "outbound_stalled_reads_events", "The number of events where " \ "outbound_stalled_reads was above a threshold.", \ pcie_perf_counters) \ m(+1, u64, outbound_stalled_writes_events, \ "outbound_stalled_writes_events", \ "The number of events where outbound_stalled_writes was above " \ "a threshold.", pcie_perf_counters) #define MLX5E_PCIE_TIMERS_AND_STATES_COUNTERS_32(m) \ m(+1, u64, time_to_boot_image_start, "time_to_boot_image_start", \ "Time from start until FW boot image starts running in usec.", \ pcie_timers_states) \ m(+1, u64, time_to_link_image, "time_to_link_image", \ "Time from start until FW pci_link image starts running in usec.", \ pcie_timers_states) \ m(+1, u64, calibration_time, "calibration_time", \ "Time it took FW to do calibration in usec.", \ pcie_timers_states) \ m(+1, u64, time_to_first_perst, "time_to_first_perst", \ "Time form start until FW handle first perst. in usec.", \ pcie_timers_states) \ m(+1, u64, time_to_detect_state, "time_to_detect_state", \ "Time from start until first transition to LTSSM.Detect_Q in usec", \ pcie_timers_states) \ m(+1, u64, time_to_l0, "time_to_l0", \ "Time from start until first transition to LTSSM.L0 in usec", \ pcie_timers_states) \ m(+1, u64, time_to_crs_en, "time_to_crs_en", \ "Time from start until crs is enabled in usec", \ pcie_timers_states) \ m(+1, u64, time_to_plastic_image_start, "time_to_plastic_image_start",\ "Time form start until FW plastic image starts running in usec.", \ pcie_timers_states) \ m(+1, u64, time_to_iron_image_start, "time_to_iron_image_start", \ "Time form start until FW iron image starts running in usec.", \ pcie_timers_states) \ m(+1, u64, perst_handler, "perst_handler", \ "Number of persts arrived.", pcie_timers_states) \ m(+1, u64, times_in_l1, "times_in_l1", \ "Number of times LTSSM entered L1 flow.", pcie_timers_states) \ m(+1, u64, times_in_l23, "times_in_l23", \ "Number of times LTSSM entered L23 flow.", pcie_timers_states) \ m(+1, u64, dl_down, "dl_down", \ "Number of moves for DL_active to DL_down.", pcie_timers_states) \ m(+1, u64, config_cycle1usec, "config_cycle1usec", \ "Number of configuration requests that firmware " \ "handled in less than 1 usec.", pcie_timers_states) \ m(+1, u64, config_cycle2to7usec, "config_cycle2to7usec", \ "Number of configuration requests that firmware " \ "handled within 2 to 7 usec.", pcie_timers_states) \ m(+1, u64, config_cycle8to15usec, "config_cycle8to15usec", \ "Number of configuration requests that firmware " \ "handled within 8 to 15 usec.", pcie_timers_states) \ m(+1, u64, config_cycle16to63usec, "config_cycle16to63usec", \ "Number of configuration requests that firmware " \ "handled within 16 to 63 usec.", pcie_timers_states) \ m(+1, u64, config_cycle64usec, "config_cycle64usec", \ "Number of configuration requests that firmware " \ "handled took more than 64 usec.", pcie_timers_states) \ m(+1, u64, correctable_err_msg_sent, "correctable_err_msg_sent", \ "Number of correctable error messages sent.", pcie_timers_states) \ m(+1, u64, non_fatal_err_msg_sent, "non_fatal_err_msg_sent", \ "Number of non-Fatal error msg sent.", pcie_timers_states) \ m(+1, u64, fatal_err_msg_sent, "fatal_err_msg_sent", \ "Number of fatal error msg sent.", pcie_timers_states) #define MLX5E_PCIE_LANE_COUNTERS_32(m) \ m(+1, u64, error_counter_lane0, "error_counter_lane0", \ "Error counter for PCI lane 0", pcie_lanes_counters) \ m(+1, u64, error_counter_lane1, "error_counter_lane1", \ "Error counter for PCI lane 1", pcie_lanes_counters) \ m(+1, u64, error_counter_lane2, "error_counter_lane2", \ "Error counter for PCI lane 2", pcie_lanes_counters) \ m(+1, u64, error_counter_lane3, "error_counter_lane3", \ "Error counter for PCI lane 3", pcie_lanes_counters) \ m(+1, u64, error_counter_lane4, "error_counter_lane4", \ "Error counter for PCI lane 4", pcie_lanes_counters) \ m(+1, u64, error_counter_lane5, "error_counter_lane5", \ "Error counter for PCI lane 5", pcie_lanes_counters) \ m(+1, u64, error_counter_lane6, "error_counter_lane6", \ "Error counter for PCI lane 6", pcie_lanes_counters) \ m(+1, u64, error_counter_lane7, "error_counter_lane7", \ "Error counter for PCI lane 7", pcie_lanes_counters) \ m(+1, u64, error_counter_lane8, "error_counter_lane8", \ "Error counter for PCI lane 8", pcie_lanes_counters) \ m(+1, u64, error_counter_lane9, "error_counter_lane9", \ "Error counter for PCI lane 9", pcie_lanes_counters) \ m(+1, u64, error_counter_lane10, "error_counter_lane10", \ "Error counter for PCI lane 10", pcie_lanes_counters) \ m(+1, u64, error_counter_lane11, "error_counter_lane11", \ "Error counter for PCI lane 11", pcie_lanes_counters) \ m(+1, u64, error_counter_lane12, "error_counter_lane12", \ "Error counter for PCI lane 12", pcie_lanes_counters) \ m(+1, u64, error_counter_lane13, "error_counter_lane13", \ "Error counter for PCI lane 13", pcie_lanes_counters) \ m(+1, u64, error_counter_lane14, "error_counter_lane14", \ "Error counter for PCI lane 14", pcie_lanes_counters) \ m(+1, u64, error_counter_lane15, "error_counter_lane15", \ "Error counter for PCI lane 15", pcie_lanes_counters) /* * Make sure to update mlx5e_update_pport_counters() * when adding a new MLX5E_PPORT_STATS block */ #define MLX5E_PPORT_STATS(m) \ MLX5E_PPORT_PER_PRIO_STATS(m) \ MLX5E_PPORT_IEEE802_3_STATS(m) \ MLX5E_PPORT_RFC2819_STATS(m) #define MLX5E_PORT_STATS_DEBUG(m) \ MLX5E_PPORT_RFC2819_STATS_DEBUG(m) \ MLX5E_PPORT_RFC2863_STATS_DEBUG(m) \ MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(m) \ MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG(m) \ MLX5E_PPORT_STATISTICAL_DEBUG(m) \ MLX5E_PCIE_PERFORMANCE_COUNTERS_64(m) \ MLX5E_PCIE_PERFORMANCE_COUNTERS_32(m) \ MLX5E_PCIE_TIMERS_AND_STATES_COUNTERS_32(m) \ MLX5E_PCIE_LANE_COUNTERS_32(m) #define MLX5E_PPORT_IEEE802_3_STATS_NUM \ (0 MLX5E_PPORT_IEEE802_3_STATS(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_RFC2819_STATS_NUM \ (0 MLX5E_PPORT_RFC2819_STATS(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_STATS_NUM \ (0 MLX5E_PPORT_STATS(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_PER_PRIO_STATS_NUM \ (0 MLX5E_PPORT_PER_PRIO_STATS(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM \ (0 MLX5E_PPORT_RFC2819_STATS_DEBUG(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM \ (0 MLX5E_PPORT_RFC2863_STATS_DEBUG(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM \ (0 MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG_NUM \ (0 MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG(MLX5E_STATS_COUNT)) #define MLX5E_PPORT_STATISTICAL_DEBUG_NUM \ (0 MLX5E_PPORT_STATISTICAL_DEBUG(MLX5E_STATS_COUNT)) #define MLX5E_PORT_STATS_DEBUG_NUM \ (0 MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_COUNT)) struct mlx5e_pport_stats { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_PPORT_STATS(MLX5E_STATS_VAR) }; struct mlx5e_port_stats_debug { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_VAR) }; #define MLX5E_RQ_STATS(m) \ m(+1, u64, packets, "packets", "Received packets") \ m(+1, u64, bytes, "bytes", "Received bytes") \ m(+1, u64, csum_none, "csum_none", "Received packets") \ m(+1, u64, lro_packets, "lro_packets", "Received LRO packets") \ m(+1, u64, lro_bytes, "lro_bytes", "Received LRO bytes") \ m(+1, u64, sw_lro_queued, "sw_lro_queued", "Packets queued for SW LRO") \ m(+1, u64, sw_lro_flushed, "sw_lro_flushed", "Packets flushed from SW LRO") \ m(+1, u64, wqe_err, "wqe_err", "Received packets") #define MLX5E_RQ_STATS_NUM (0 MLX5E_RQ_STATS(MLX5E_STATS_COUNT)) struct mlx5e_rq_stats { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_RQ_STATS(MLX5E_STATS_VAR) }; #define MLX5E_SQ_STATS(m) \ m(+1, u64, packets, "packets", "Transmitted packets") \ m(+1, u64, bytes, "bytes", "Transmitted bytes") \ m(+1, u64, tso_packets, "tso_packets", "Transmitted packets") \ m(+1, u64, tso_bytes, "tso_bytes", "Transmitted bytes") \ m(+1, u64, csum_offload_none, "csum_offload_none", "Transmitted packets") \ m(+1, u64, defragged, "defragged", "Transmitted packets") \ m(+1, u64, dropped, "dropped", "Transmitted packets") \ m(+1, u64, enobuf, "enobuf", "Transmitted packets") \ m(+1, u64, cqe_err, "cqe_err", "Transmit CQE errors") \ m(+1, u64, nop, "nop", "Transmitted packets") #define MLX5E_SQ_STATS_NUM (0 MLX5E_SQ_STATS(MLX5E_STATS_COUNT)) struct mlx5e_sq_stats { struct sysctl_ctx_list ctx; u64 arg [0]; MLX5E_SQ_STATS(MLX5E_STATS_VAR) }; struct mlx5e_stats { struct mlx5e_vport_stats vport; struct mlx5e_pport_stats pport; struct mlx5e_port_stats_debug port_stats_debug; }; struct mlx5e_rq_param { u32 rqc [MLX5_ST_SZ_DW(rqc)]; struct mlx5_wq_param wq; }; struct mlx5e_sq_param { u32 sqc [MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; }; struct mlx5e_cq_param { u32 cqc [MLX5_ST_SZ_DW(cqc)]; struct mlx5_wq_param wq; }; struct mlx5e_params { u8 log_sq_size; u8 log_rq_size; u16 num_channels; u8 default_vlan_prio; u8 num_tc; u8 rx_cq_moderation_mode; u8 tx_cq_moderation_mode; u16 rx_cq_moderation_usec; u16 rx_cq_moderation_pkts; u16 tx_cq_moderation_usec; u16 tx_cq_moderation_pkts; u16 min_rx_wqes; bool hw_lro_en; bool cqe_zipping_en; u32 lro_wqe_sz; u16 rx_hash_log_tbl_sz; u32 tx_pauseframe_control __aligned(4); u32 rx_pauseframe_control __aligned(4); u16 tx_max_inline; u8 tx_min_inline_mode; u8 tx_priority_flow_control; u8 rx_priority_flow_control; u8 channels_rsss; }; #define MLX5E_PARAMS(m) \ m(+1, u64, tx_queue_size_max, "tx_queue_size_max", "Max send queue size") \ m(+1, u64, rx_queue_size_max, "rx_queue_size_max", "Max receive queue size") \ m(+1, u64, tx_queue_size, "tx_queue_size", "Default send queue size") \ m(+1, u64, rx_queue_size, "rx_queue_size", "Default receive queue size") \ m(+1, u64, channels, "channels", "Default number of channels") \ m(+1, u64, channels_rsss, "channels_rsss", "Default channels receive side scaling stride") \ m(+1, u64, coalesce_usecs_max, "coalesce_usecs_max", "Maximum usecs for joining packets") \ m(+1, u64, coalesce_pkts_max, "coalesce_pkts_max", "Maximum packets to join") \ m(+1, u64, rx_coalesce_usecs, "rx_coalesce_usecs", "Limit in usec for joining rx packets") \ m(+1, u64, rx_coalesce_pkts, "rx_coalesce_pkts", "Maximum number of rx packets to join") \ m(+1, u64, rx_coalesce_mode, "rx_coalesce_mode", "0: EQE fixed mode 1: CQE fixed mode 2: EQE auto mode 3: CQE auto mode") \ m(+1, u64, tx_coalesce_usecs, "tx_coalesce_usecs", "Limit in usec for joining tx packets") \ m(+1, u64, tx_coalesce_pkts, "tx_coalesce_pkts", "Maximum number of tx packets to join") \ m(+1, u64, tx_coalesce_mode, "tx_coalesce_mode", "0: EQE mode 1: CQE mode") \ m(+1, u64, tx_completion_fact, "tx_completion_fact", "1..MAX: Completion event ratio") \ m(+1, u64, tx_completion_fact_max, "tx_completion_fact_max", "Maximum completion event ratio") \ m(+1, u64, hw_lro, "hw_lro", "set to enable hw_lro") \ m(+1, u64, cqe_zipping, "cqe_zipping", "0 : CQE zipping disabled") \ m(+1, u64, modify_tx_dma, "modify_tx_dma", "0: Enable TX 1: Disable TX") \ m(+1, u64, modify_rx_dma, "modify_rx_dma", "0: Enable RX 1: Disable RX") \ m(+1, u64, diag_pci_enable, "diag_pci_enable", "0: Disabled 1: Enabled") \ m(+1, u64, diag_general_enable, "diag_general_enable", "0: Disabled 1: Enabled") \ m(+1, u64, hw_mtu, "hw_mtu", "Current hardware MTU value") \ m(+1, u64, mc_local_lb, "mc_local_lb", "0: Local multicast loopback enabled 1: Disabled") \ m(+1, u64, uc_local_lb, "uc_local_lb", "0: Local unicast loopback enabled 1: Disabled") \ m(+1, s64, irq_cpu_base, "irq_cpu_base", "-1: Don't bind IRQ 0..NCPU-1: select this base CPU when binding IRQs") \ m(+1, s64, irq_cpu_stride, "irq_cpu_stride", "0..NCPU-1: Distance between IRQ vectors when binding them") #define MLX5E_PARAMS_NUM (0 MLX5E_PARAMS(MLX5E_STATS_COUNT)) struct mlx5e_params_ethtool { u64 arg [0]; MLX5E_PARAMS(MLX5E_STATS_VAR) u64 max_bw_value[IEEE_8021QAZ_MAX_TCS]; u8 max_bw_share[IEEE_8021QAZ_MAX_TCS]; u8 prio_tc[MLX5E_MAX_PRIORITY]; u8 dscp2prio[MLX5_MAX_SUPPORTED_DSCP]; u8 trust_state; u8 fec_mask_10x_25x[MLX5E_MAX_FEC_10X_25X]; u16 fec_mask_50x[MLX5E_MAX_FEC_50X]; u8 fec_avail_10x_25x[MLX5E_MAX_FEC_10X_25X]; u16 fec_avail_50x[MLX5E_MAX_FEC_50X]; u32 fec_mode_active; u32 hw_mtu_msb; s32 hw_val_temp[MLX5_MAX_TEMPERATURE]; u32 hw_num_temp; }; struct mlx5e_cq { /* data path - accessed per cqe */ struct mlx5_cqwq wq; /* data path - accessed per HW polling */ struct mlx5_core_cq mcq; /* control */ struct mlx5e_priv *priv; struct mlx5_wq_ctrl wq_ctrl; } __aligned(MLX5E_CACHELINE_SIZE); struct mlx5e_rq_mbuf { bus_dmamap_t dma_map; caddr_t data; struct mbuf *mbuf; }; struct mlx5e_rq { /* persistant fields */ struct mtx mtx; struct mlx5e_rq_stats stats; struct callout watchdog; /* data path */ #define mlx5e_rq_zero_start wq struct mlx5_wq_ll wq; bus_dma_tag_t dma_tag; u32 wqe_sz; u32 nsegs; struct mlx5e_rq_mbuf *mbuf; struct ifnet *ifp; struct mlx5e_cq cq; struct lro_ctrl lro; volatile int enabled; int ix; /* Dynamic Interrupt Moderation */ struct net_dim dim; /* control */ struct mlx5_wq_ctrl wq_ctrl; u32 rqn; struct mlx5e_channel *channel; } __aligned(MLX5E_CACHELINE_SIZE); struct mlx5e_sq_mbuf { bus_dmamap_t dma_map; struct mbuf *mbuf; volatile s32 *p_refcount; /* in use refcount, if any */ u32 num_bytes; u32 num_wqebbs; }; enum { MLX5E_SQ_READY, MLX5E_SQ_FULL }; struct mlx5e_sq { /* persistant fields */ struct mtx lock; struct mtx comp_lock; struct mlx5e_sq_stats stats; struct callout cev_callout; /* data path */ #define mlx5e_sq_zero_start dma_tag bus_dma_tag_t dma_tag; /* dirtied @completion */ u16 cc; /* dirtied @xmit */ u16 pc __aligned(MLX5E_CACHELINE_SIZE); u16 cev_counter; /* completion event counter */ u16 cev_factor; /* completion event factor */ u16 cev_next_state; /* next completion event state */ #define MLX5E_CEV_STATE_INITIAL 0 /* timer not started */ #define MLX5E_CEV_STATE_SEND_NOPS 1 /* send NOPs */ #define MLX5E_CEV_STATE_HOLD_NOPS 2 /* don't send NOPs yet */ u16 running; /* set if SQ is running */ union { u32 d32[2]; u64 d64; } doorbell; struct mlx5e_cq cq; /* pointers to per packet info: write@xmit, read@completion */ struct mlx5e_sq_mbuf *mbuf; /* read only */ struct mlx5_wq_cyc wq; void __iomem *uar_map; struct ifnet *ifp; u32 sqn; u32 mkey_be; u16 max_inline; u8 min_inline_mode; u8 min_insert_caps; #define MLX5E_INSERT_VLAN 1 #define MLX5E_INSERT_NON_VLAN 2 /* control path */ struct mlx5_wq_ctrl wq_ctrl; struct mlx5e_priv *priv; int tc; } __aligned(MLX5E_CACHELINE_SIZE); static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) { u16 cc = sq->cc; u16 pc = sq->pc; return ((sq->wq.sz_m1 & (cc - pc)) >= n || cc == pc); } static inline u32 mlx5e_sq_queue_level(struct mlx5e_sq *sq) { u16 cc; u16 pc; if (sq == NULL) return (0); cc = sq->cc; pc = sq->pc; return (((sq->wq.sz_m1 & (pc - cc)) * IF_SND_QUEUE_LEVEL_MAX) / sq->wq.sz_m1); } struct mlx5e_channel { struct mlx5e_rq rq; struct m_snd_tag tag; struct mlx5_sq_bfreg bfreg; struct mlx5e_sq sq[MLX5E_MAX_TX_NUM_TC]; struct mlx5e_priv *priv; struct completion completion; int ix; } __aligned(MLX5E_CACHELINE_SIZE); enum mlx5e_traffic_types { MLX5E_TT_IPV4_TCP, MLX5E_TT_IPV6_TCP, MLX5E_TT_IPV4_UDP, MLX5E_TT_IPV6_UDP, MLX5E_TT_IPV4_IPSEC_AH, MLX5E_TT_IPV6_IPSEC_AH, MLX5E_TT_IPV4_IPSEC_ESP, MLX5E_TT_IPV6_IPSEC_ESP, MLX5E_TT_IPV4, MLX5E_TT_IPV6, MLX5E_TT_ANY, MLX5E_NUM_TT, }; enum { MLX5E_RQT_SPREADING = 0, MLX5E_RQT_DEFAULT_RQ = 1, MLX5E_NUM_RQT = 2, }; struct mlx5_flow_rule; struct mlx5e_eth_addr_info { u8 addr [ETH_ALEN + 2]; u32 tt_vec; /* flow table rule per traffic type */ struct mlx5_flow_rule *ft_rule[MLX5E_NUM_TT]; }; #define MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE) struct mlx5e_eth_addr_hash_node; struct mlx5e_eth_addr_hash_head { struct mlx5e_eth_addr_hash_node *lh_first; }; struct mlx5e_eth_addr_db { struct mlx5e_eth_addr_hash_head if_uc[MLX5E_ETH_ADDR_HASH_SIZE]; struct mlx5e_eth_addr_hash_head if_mc[MLX5E_ETH_ADDR_HASH_SIZE]; struct mlx5e_eth_addr_info broadcast; struct mlx5e_eth_addr_info allmulti; struct mlx5e_eth_addr_info promisc; bool broadcast_enabled; bool allmulti_enabled; bool promisc_enabled; }; enum { MLX5E_STATE_ASYNC_EVENTS_ENABLE, MLX5E_STATE_OPENED, }; enum { MLX5_BW_NO_LIMIT = 0, MLX5_100_MBPS_UNIT = 3, MLX5_GBPS_UNIT = 4, }; struct mlx5e_vlan_db { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct mlx5_flow_rule *active_vlans_ft_rule[VLAN_N_VID]; struct mlx5_flow_rule *untagged_ft_rule; struct mlx5_flow_rule *any_cvlan_ft_rule; struct mlx5_flow_rule *any_svlan_ft_rule; bool filter_disabled; }; struct mlx5e_vxlan_db_el { u_int refcount; u_int proto; u_int port; bool installed; struct mlx5_flow_rule *vxlan_ft_rule; TAILQ_ENTRY(mlx5e_vxlan_db_el) link; }; struct mlx5e_vxlan_db { TAILQ_HEAD(, mlx5e_vxlan_db_el) head; }; struct mlx5e_flow_table { int num_groups; struct mlx5_flow_table *t; struct mlx5_flow_group **g; }; struct mlx5e_flow_tables { struct mlx5_flow_namespace *ns; struct mlx5e_flow_table vlan; struct mlx5e_flow_table vxlan; struct mlx5_flow_rule *vxlan_catchall_ft_rule; struct mlx5e_flow_table main; struct mlx5e_flow_table main_vxlan; struct mlx5_flow_rule *main_vxlan_rule[MLX5E_NUM_TT]; struct mlx5e_flow_table inner_rss; }; struct mlx5e_xmit_args { volatile s32 *pref; u32 tisn; u16 ihs; }; -#include "en_rl.h" -#include "en_hw_tls.h" +#include +#include #define MLX5E_TSTMP_PREC 10 struct mlx5e_clbr_point { uint64_t base_curr; uint64_t base_prev; uint64_t clbr_hw_prev; uint64_t clbr_hw_curr; u_int clbr_gen; }; struct mlx5e_dcbx { u32 cable_len; u32 xoff; }; struct mlx5e_priv { struct mlx5_core_dev *mdev; /* must be first */ /* priv data path fields - start */ int order_base_2_num_channels; int queue_mapping_channel_mask; int num_tc; int default_vlan_prio; /* priv data path fields - end */ unsigned long state; int gone; #define PRIV_LOCK(priv) sx_xlock(&(priv)->state_lock) #define PRIV_UNLOCK(priv) sx_xunlock(&(priv)->state_lock) #define PRIV_LOCKED(priv) sx_xlocked(&(priv)->state_lock) #define PRIV_ASSERT_LOCKED(priv) sx_assert(&(priv)->state_lock, SA_XLOCKED) struct sx state_lock; /* Protects Interface state */ u32 pdn; u32 tdn; struct mlx5_core_mkey mr; u32 tisn[MLX5E_MAX_TX_NUM_TC]; u32 rqtn; u32 tirn[MLX5E_NUM_TT]; u32 tirn_inner_vxlan[MLX5E_NUM_TT]; struct mlx5e_flow_tables fts; struct mlx5e_eth_addr_db eth_addr; struct mlx5e_vlan_db vlan; struct mlx5e_vxlan_db vxlan; struct mlx5e_params params; struct mlx5e_params_ethtool params_ethtool; union mlx5_core_pci_diagnostics params_pci; union mlx5_core_general_diagnostics params_general; struct mtx async_events_mtx; /* sync hw events */ struct work_struct update_stats_work; struct work_struct update_carrier_work; struct work_struct set_rx_mode_work; MLX5_DECLARE_DOORBELL_LOCK(doorbell_lock) struct ifnet *ifp; struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_ifnet; struct sysctl_oid *sysctl_hw; int sysctl_debug; struct mlx5e_stats stats; int counter_set_id; struct workqueue_struct *wq; eventhandler_tag vlan_detach; eventhandler_tag vlan_attach; struct ifmedia media; int media_status_last; int media_active_last; eventhandler_tag vxlan_start; eventhandler_tag vxlan_stop; struct callout watchdog; struct mlx5e_rl_priv_data rl; struct mlx5e_tls tls; struct callout tstmp_clbr; int clbr_done; int clbr_curr; struct mlx5e_clbr_point clbr_points[2]; u_int clbr_gen; struct mlx5e_dcbx dcbx; bool sw_is_port_buf_owner; struct pfil_head *pfil; struct mlx5e_channel channel[]; }; #define MLX5E_NET_IP_ALIGN 2 struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_eth_seg eth; }; struct mlx5e_tx_umr_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_umr_ctrl_seg umr; uint8_t mkc[64]; }; struct mlx5e_tx_psv_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_seg_set_psv psv; }; struct mlx5e_rx_wqe { struct mlx5_wqe_srq_next_seg next; struct mlx5_wqe_data_seg data[]; }; /* the size of the structure above must be power of two */ CTASSERT(powerof2(sizeof(struct mlx5e_rx_wqe))); struct mlx5e_eeprom { int lock_bit; int i2c_addr; int page_num; int device_addr; int module_num; int len; int type; int page_valid; u32 *data; }; #define MLX5E_FLD_MAX(typ, fld) ((1ULL << __mlx5_bit_sz(typ, fld)) - 1ULL) bool mlx5e_do_send_cqe(struct mlx5e_sq *); int mlx5e_get_full_header_size(const struct mbuf *, const struct tcphdr **); int mlx5e_xmit(struct ifnet *, struct mbuf *); int mlx5e_open_locked(struct ifnet *); int mlx5e_close_locked(struct ifnet *); void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event); mlx5e_cq_comp_t mlx5e_rx_cq_comp; mlx5e_cq_comp_t mlx5e_tx_cq_comp; struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); void mlx5e_dim_work(struct work_struct *); void mlx5e_dim_build_cq_param(struct mlx5e_priv *, struct mlx5e_cq_param *); int mlx5e_open_flow_table(struct mlx5e_priv *priv); void mlx5e_close_flow_table(struct mlx5e_priv *priv); void mlx5e_set_rx_mode_core(struct mlx5e_priv *priv); void mlx5e_set_rx_mode_work(struct work_struct *work); void mlx5e_vlan_rx_add_vid(void *, struct ifnet *, u16); void mlx5e_vlan_rx_kill_vid(void *, struct ifnet *, u16); void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); int mlx5e_add_all_vlan_rules(struct mlx5e_priv *priv); void mlx5e_del_all_vlan_rules(struct mlx5e_priv *priv); void mlx5e_vxlan_start(void *arg, struct ifnet *ifp, sa_family_t family, u_int port); void mlx5e_vxlan_stop(void *arg, struct ifnet *ifp, sa_family_t family, u_int port); int mlx5e_add_all_vxlan_rules(struct mlx5e_priv *priv); void mlx5e_del_all_vxlan_rules(struct mlx5e_priv *priv); static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, u32 *wqe) { /* ensure wqe is visible to device before updating doorbell record */ wmb(); *sq->wq.db = cpu_to_be32(sq->pc); /* * Ensure the doorbell record is visible to device before ringing * the doorbell: */ wmb(); mlx5_write64(wqe, sq->uar_map, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock)); } static inline void mlx5e_cq_arm(struct mlx5e_cq *cq, spinlock_t *dblock) { struct mlx5_core_cq *mcq; mcq = &cq->mcq; mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, dblock, cq->wq.cc); } #define mlx5e_dbg(_IGN, _priv, ...) mlx5_core_dbg((_priv)->mdev, __VA_ARGS__) extern const struct ethtool_ops mlx5e_ethtool_ops; void mlx5e_create_ethtool(struct mlx5e_priv *); void mlx5e_create_stats(struct sysctl_ctx_list *, struct sysctl_oid_list *, const char *, const char **, unsigned, u64 *); void mlx5e_create_counter_stats(struct sysctl_ctx_list *, struct sysctl_oid_list *, const char *, const char **, unsigned, counter_u64_t *); void mlx5e_send_nop(struct mlx5e_sq *, u32); int mlx5e_sq_dump_xmit(struct mlx5e_sq *, struct mlx5e_xmit_args *, struct mbuf **); int mlx5e_sq_xmit(struct mlx5e_sq *, struct mbuf **); void mlx5e_sq_cev_timeout(void *); int mlx5e_refresh_channel_params(struct mlx5e_priv *); int mlx5e_open_cq(struct mlx5e_priv *, struct mlx5e_cq_param *, struct mlx5e_cq *, mlx5e_cq_comp_t *, int eq_ix); void mlx5e_close_cq(struct mlx5e_cq *); void mlx5e_free_sq_db(struct mlx5e_sq *); int mlx5e_alloc_sq_db(struct mlx5e_sq *); int mlx5e_enable_sq(struct mlx5e_sq *, struct mlx5e_sq_param *, const struct mlx5_sq_bfreg *, int tis_num); int mlx5e_modify_sq(struct mlx5e_sq *, int curr_state, int next_state); void mlx5e_disable_sq(struct mlx5e_sq *); void mlx5e_drain_sq(struct mlx5e_sq *); void mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value); void mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value); void mlx5e_resume_sq(struct mlx5e_sq *sq); void mlx5e_update_sq_inline(struct mlx5e_sq *sq); void mlx5e_refresh_sq_inline(struct mlx5e_priv *priv); int mlx5e_update_buf_lossy(struct mlx5e_priv *priv); int mlx5e_fec_update(struct mlx5e_priv *priv); int mlx5e_hw_temperature_update(struct mlx5e_priv *priv); #endif /* _MLX5_EN_H_ */ diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_dim.c b/sys/dev/mlx5/mlx5_en/mlx5_en_dim.c index cb8ffe098095..57ca04848bd2 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_dim.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_dim.c @@ -1,95 +1,95 @@ /*- * Copyright (c) 2018 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_rss.h" #include "opt_ratelimit.h" -#include "en.h" +#include void mlx5e_dim_build_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { struct net_dim_cq_moder prof; void *cqc = param->cqc; if (priv->params.rx_cq_moderation_mode < 2) return; switch (MLX5_GET(cqc, cqc, cq_period_mode)) { case MLX5_CQ_PERIOD_MODE_START_FROM_CQE: prof = net_dim_profile[NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE] [NET_DIM_DEF_PROFILE_CQE]; MLX5_SET(cqc, cqc, cq_period, prof.usec); MLX5_SET(cqc, cqc, cq_max_count, prof.pkts); break; case MLX5_CQ_PERIOD_MODE_START_FROM_EQE: prof = net_dim_profile[NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE] [NET_DIM_DEF_PROFILE_EQE]; MLX5_SET(cqc, cqc, cq_period, prof.usec); MLX5_SET(cqc, cqc, cq_max_count, prof.pkts); break; default: break; } } void mlx5e_dim_work(struct work_struct *work) { struct net_dim *dim = container_of(work, struct net_dim, work); struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim); struct mlx5e_channel *c = container_of(rq, struct mlx5e_channel, rq); struct net_dim_cq_moder cur_profile; u8 profile_ix; u8 mode; /* copy current auto moderation settings and set new state */ mtx_lock(&rq->mtx); profile_ix = dim->profile_ix; mode = dim->mode; dim->state = NET_DIM_START_MEASURE; mtx_unlock(&rq->mtx); /* check for invalid mode */ if (mode == 255) return; /* get current profile */ cur_profile = net_dim_profile[mode][profile_ix]; /* apply LRO restrictions */ if (c->priv->params.hw_lro_en && cur_profile.pkts > MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO) { cur_profile.pkts = MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO; } /* modify CQ */ mlx5_core_modify_cq_moderation(c->priv->mdev, &rq->cq.mcq, cur_profile.usec, cur_profile.pkts); } diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c b/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c index ab6392fa5fd6..11f47e3f8aa3 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c @@ -1,1647 +1,1647 @@ /*- * Copyright (c) 2015-2021 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_rss.h" #include "opt_ratelimit.h" -#include "en.h" -#include "port_buffer.h" +#include +#include void mlx5e_create_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent, const char *buffer, const char **desc, unsigned num, u64 * arg) { struct sysctl_oid *node; unsigned x; sysctl_ctx_init(ctx); node = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, buffer, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics"); if (node == NULL) return; for (x = 0; x != num; x++) { SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), OID_AUTO, desc[2 * x], CTLFLAG_RD, arg + x, desc[2 * x + 1]); } } void mlx5e_create_counter_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent, const char *buffer, const char **desc, unsigned num, counter_u64_t *arg) { struct sysctl_oid *node; unsigned x; sysctl_ctx_init(ctx); node = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, buffer, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics"); if (node == NULL) return; for (x = 0; x != num; x++) { SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, desc[2 * x], CTLFLAG_RD, arg + x, desc[2 * x + 1]); } } static void mlx5e_ethtool_sync_tx_completion_fact(struct mlx5e_priv *priv) { /* * Limit the maximum distance between completion events to * half of the currently set TX queue size. * * The maximum number of queue entries a single IP packet can * consume is given by MLX5_SEND_WQE_MAX_WQEBBS. * * The worst case max value is then given as below: */ uint64_t max = priv->params_ethtool.tx_queue_size / (2 * MLX5_SEND_WQE_MAX_WQEBBS); /* * Update the maximum completion factor value in case the * tx_queue_size field changed. Ensure we don't overflow * 16-bits. */ if (max < 1) max = 1; else if (max > 65535) max = 65535; priv->params_ethtool.tx_completion_fact_max = max; /* * Verify that the current TX completion factor is within the * given limits: */ if (priv->params_ethtool.tx_completion_fact < 1) priv->params_ethtool.tx_completion_fact = 1; else if (priv->params_ethtool.tx_completion_fact > max) priv->params_ethtool.tx_completion_fact = max; } static int mlx5e_getmaxrate(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS]; u8 max_bw_value[IEEE_8021QAZ_MAX_TCS]; int err; int i; PRIV_LOCK(priv); err = -mlx5_query_port_tc_rate_limit(mdev, max_bw_value, max_bw_unit); if (err) goto done; for (i = 0; i <= mlx5_max_tc(mdev); i++) { switch (max_bw_unit[i]) { case MLX5_100_MBPS_UNIT: priv->params_ethtool.max_bw_value[i] = max_bw_value[i] * MLX5E_100MB; break; case MLX5_GBPS_UNIT: priv->params_ethtool.max_bw_value[i] = max_bw_value[i] * MLX5E_1GB; break; case MLX5_BW_NO_LIMIT: priv->params_ethtool.max_bw_value[i] = 0; break; default: priv->params_ethtool.max_bw_value[i] = -1; WARN_ONCE(true, "non-supported BW unit"); break; } } done: PRIV_UNLOCK(priv); return (err); } static int mlx5e_get_max_alloc(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; int err; int x; PRIV_LOCK(priv); err = -mlx5_query_port_tc_bw_alloc(mdev, priv->params_ethtool.max_bw_share); if (err == 0) { /* set default value */ for (x = 0; x != IEEE_8021QAZ_MAX_TCS; x++) { priv->params_ethtool.max_bw_share[x] = 100 / IEEE_8021QAZ_MAX_TCS; } err = -mlx5_set_port_tc_bw_alloc(mdev, priv->params_ethtool.max_bw_share); } PRIV_UNLOCK(priv); return (err); } static int mlx5e_get_dscp(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; int err; if (MLX5_CAP_GEN(mdev, qcam_reg) == 0 || MLX5_CAP_QCAM_REG(mdev, qpts) == 0 || MLX5_CAP_QCAM_REG(mdev, qpdpm) == 0) return (EOPNOTSUPP); PRIV_LOCK(priv); err = -mlx5_query_dscp2prio(mdev, priv->params_ethtool.dscp2prio); if (err) goto done; err = -mlx5_query_trust_state(mdev, &priv->params_ethtool.trust_state); if (err) goto done; done: PRIV_UNLOCK(priv); return (err); } static void mlx5e_tc_get_parameters(struct mlx5e_priv *priv, u64 *new_bw_value, u8 *max_bw_value, u8 *max_bw_unit) { const u64 upper_limit_mbps = 255 * MLX5E_100MB; const u64 upper_limit_gbps = 255 * MLX5E_1GB; u64 temp; int i; memset(max_bw_value, 0, IEEE_8021QAZ_MAX_TCS); memset(max_bw_unit, 0, IEEE_8021QAZ_MAX_TCS); for (i = 0; i <= mlx5_max_tc(priv->mdev); i++) { temp = (new_bw_value != NULL) ? new_bw_value[i] : priv->params_ethtool.max_bw_value[i]; if (!temp) { max_bw_unit[i] = MLX5_BW_NO_LIMIT; } else if (temp > upper_limit_gbps) { max_bw_unit[i] = MLX5_BW_NO_LIMIT; } else if (temp <= upper_limit_mbps) { max_bw_value[i] = howmany(temp, MLX5E_100MB); max_bw_unit[i] = MLX5_100_MBPS_UNIT; } else { max_bw_value[i] = howmany(temp, MLX5E_1GB); max_bw_unit[i] = MLX5_GBPS_UNIT; } } } static int mlx5e_tc_maxrate_handler(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; struct mlx5_core_dev *mdev = priv->mdev; u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS]; u8 max_bw_value[IEEE_8021QAZ_MAX_TCS]; u64 new_bw_value[IEEE_8021QAZ_MAX_TCS]; u8 max_rates = mlx5_max_tc(mdev) + 1; u8 x; int err; PRIV_LOCK(priv); err = SYSCTL_OUT(req, priv->params_ethtool.max_bw_value, sizeof(priv->params_ethtool.max_bw_value[0]) * max_rates); if (err || !req->newptr) goto done; err = SYSCTL_IN(req, new_bw_value, sizeof(new_bw_value[0]) * max_rates); if (err) goto done; /* range check input value */ for (x = 0; x != max_rates; x++) { if (new_bw_value[x] % MLX5E_100MB) { err = ERANGE; goto done; } } mlx5e_tc_get_parameters(priv, new_bw_value, max_bw_value, max_bw_unit); err = -mlx5_modify_port_tc_rate_limit(mdev, max_bw_value, max_bw_unit); if (err) goto done; memcpy(priv->params_ethtool.max_bw_value, new_bw_value, sizeof(priv->params_ethtool.max_bw_value)); done: PRIV_UNLOCK(priv); return (err); } static int mlx5e_tc_rate_share_handler(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; struct mlx5_core_dev *mdev = priv->mdev; u8 max_bw_share[IEEE_8021QAZ_MAX_TCS]; u8 max_rates = mlx5_max_tc(mdev) + 1; int i; int err; int sum; PRIV_LOCK(priv); err = SYSCTL_OUT(req, priv->params_ethtool.max_bw_share, max_rates); if (err || !req->newptr) goto done; err = SYSCTL_IN(req, max_bw_share, max_rates); if (err) goto done; /* range check input value */ for (sum = i = 0; i != max_rates; i++) { if (max_bw_share[i] < 1 || max_bw_share[i] > 100) { err = ERANGE; goto done; } sum += max_bw_share[i]; } /* sum of values should be as close to 100 as possible */ if (sum < (100 - max_rates + 1) || sum > 100) { err = ERANGE; goto done; } err = -mlx5_set_port_tc_bw_alloc(mdev, max_bw_share); if (err) goto done; memcpy(priv->params_ethtool.max_bw_share, max_bw_share, sizeof(priv->params_ethtool.max_bw_share)); done: PRIV_UNLOCK(priv); return (err); } static int mlx5e_get_prio_tc(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; int err = 0; int i; PRIV_LOCK(priv); if (!MLX5_CAP_GEN(priv->mdev, ets)) { PRIV_UNLOCK(priv); return (EOPNOTSUPP); } for (i = 0; i != MLX5E_MAX_PRIORITY; i++) { err = -mlx5_query_port_prio_tc(mdev, i, priv->params_ethtool.prio_tc + i); if (err) break; } PRIV_UNLOCK(priv); return (err); } static int mlx5e_prio_to_tc_handler(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; struct mlx5_core_dev *mdev = priv->mdev; uint8_t temp[MLX5E_MAX_PRIORITY]; int err; int i; PRIV_LOCK(priv); err = SYSCTL_OUT(req, priv->params_ethtool.prio_tc, MLX5E_MAX_PRIORITY); if (err || !req->newptr) goto done; err = SYSCTL_IN(req, temp, MLX5E_MAX_PRIORITY); if (err) goto done; for (i = 0; i != MLX5E_MAX_PRIORITY; i++) { if (temp[i] > mlx5_max_tc(mdev)) { err = ERANGE; goto done; } } for (i = 0; i != MLX5E_MAX_PRIORITY; i++) { if (temp[i] == priv->params_ethtool.prio_tc[i]) continue; err = -mlx5_set_port_prio_tc(mdev, i, temp[i]); if (err) goto done; /* update cached value */ priv->params_ethtool.prio_tc[i] = temp[i]; } done: PRIV_UNLOCK(priv); return (err); } int mlx5e_fec_update(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; const int sz = MLX5_ST_SZ_BYTES(pplm_reg); int err; if (!MLX5_CAP_GEN(mdev, pcam_reg)) return (EOPNOTSUPP); if (!MLX5_CAP_PCAM_REG(mdev, pplm)) return (EOPNOTSUPP); MLX5_SET(pplm_reg, in, local_port, 1); err = -mlx5_core_access_reg(mdev, in, sz, in, sz, MLX5_REG_PPLM, 0, 0); if (err) return (err); /* get 10x..25x mask */ priv->params_ethtool.fec_mask_10x_25x[0] = MLX5_GET(pplm_reg, in, fec_override_admin_10g_40g); priv->params_ethtool.fec_mask_10x_25x[1] = MLX5_GET(pplm_reg, in, fec_override_admin_25g) & MLX5_GET(pplm_reg, in, fec_override_admin_50g); priv->params_ethtool.fec_mask_10x_25x[2] = MLX5_GET(pplm_reg, in, fec_override_admin_56g); priv->params_ethtool.fec_mask_10x_25x[3] = MLX5_GET(pplm_reg, in, fec_override_admin_100g); /* get 10x..25x available bits */ priv->params_ethtool.fec_avail_10x_25x[0] = MLX5_GET(pplm_reg, in, fec_override_cap_10g_40g); priv->params_ethtool.fec_avail_10x_25x[1] = MLX5_GET(pplm_reg, in, fec_override_cap_25g) & MLX5_GET(pplm_reg, in, fec_override_cap_50g); priv->params_ethtool.fec_avail_10x_25x[2] = MLX5_GET(pplm_reg, in, fec_override_cap_56g); priv->params_ethtool.fec_avail_10x_25x[3] = MLX5_GET(pplm_reg, in, fec_override_cap_100g); /* get 50x mask */ priv->params_ethtool.fec_mask_50x[0] = MLX5_GET(pplm_reg, in, fec_override_admin_50g_1x); priv->params_ethtool.fec_mask_50x[1] = MLX5_GET(pplm_reg, in, fec_override_admin_100g_2x); priv->params_ethtool.fec_mask_50x[2] = MLX5_GET(pplm_reg, in, fec_override_admin_200g_4x); priv->params_ethtool.fec_mask_50x[3] = MLX5_GET(pplm_reg, in, fec_override_admin_400g_8x); /* get 50x available bits */ priv->params_ethtool.fec_avail_50x[0] = MLX5_GET(pplm_reg, in, fec_override_cap_50g_1x); priv->params_ethtool.fec_avail_50x[1] = MLX5_GET(pplm_reg, in, fec_override_cap_100g_2x); priv->params_ethtool.fec_avail_50x[2] = MLX5_GET(pplm_reg, in, fec_override_cap_200g_4x); priv->params_ethtool.fec_avail_50x[3] = MLX5_GET(pplm_reg, in, fec_override_cap_400g_8x); /* get current FEC mask */ priv->params_ethtool.fec_mode_active = MLX5_GET(pplm_reg, in, fec_mode_active); return (0); } static int mlx5e_fec_mask_10x_25x_handler(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; struct mlx5_core_dev *mdev = priv->mdev; u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; const int sz = MLX5_ST_SZ_BYTES(pplm_reg); u8 fec_mask_10x_25x[MLX5E_MAX_FEC_10X_25X]; u8 fec_cap_changed = 0; u8 x; int err; PRIV_LOCK(priv); err = SYSCTL_OUT(req, priv->params_ethtool.fec_mask_10x_25x, sizeof(priv->params_ethtool.fec_mask_10x_25x)); if (err || !req->newptr) goto done; err = SYSCTL_IN(req, fec_mask_10x_25x, sizeof(fec_mask_10x_25x)); if (err) goto done; if (!MLX5_CAP_GEN(mdev, pcam_reg)) { err = EOPNOTSUPP; goto done; } if (!MLX5_CAP_PCAM_REG(mdev, pplm)) { err = EOPNOTSUPP; goto done; } MLX5_SET(pplm_reg, in, local_port, 1); err = -mlx5_core_access_reg(mdev, in, sz, in, sz, MLX5_REG_PPLM, 0, 0); if (err) goto done; /* range check input value */ for (x = 0; x != MLX5E_MAX_FEC_10X_25X; x++) { /* check only one bit is set, if any */ if (fec_mask_10x_25x[x] & (fec_mask_10x_25x[x] - 1)) { err = ERANGE; goto done; } /* check a supported bit is set, if any */ if (fec_mask_10x_25x[x] & ~priv->params_ethtool.fec_avail_10x_25x[x]) { err = ERANGE; goto done; } fec_cap_changed |= (fec_mask_10x_25x[x] ^ priv->params_ethtool.fec_mask_10x_25x[x]); } /* check for no changes */ if (fec_cap_changed == 0) goto done; memset(in, 0, sizeof(in)); MLX5_SET(pplm_reg, in, local_port, 1); /* set new values */ MLX5_SET(pplm_reg, in, fec_override_admin_10g_40g, fec_mask_10x_25x[0]); MLX5_SET(pplm_reg, in, fec_override_admin_25g, fec_mask_10x_25x[1]); MLX5_SET(pplm_reg, in, fec_override_admin_50g, fec_mask_10x_25x[1]); MLX5_SET(pplm_reg, in, fec_override_admin_56g, fec_mask_10x_25x[2]); MLX5_SET(pplm_reg, in, fec_override_admin_100g, fec_mask_10x_25x[3]); /* preserve other values */ MLX5_SET(pplm_reg, in, fec_override_admin_50g_1x, priv->params_ethtool.fec_mask_50x[0]); MLX5_SET(pplm_reg, in, fec_override_admin_100g_2x, priv->params_ethtool.fec_mask_50x[1]); MLX5_SET(pplm_reg, in, fec_override_admin_200g_4x, priv->params_ethtool.fec_mask_50x[2]); MLX5_SET(pplm_reg, in, fec_override_admin_400g_8x, priv->params_ethtool.fec_mask_50x[3]); /* send new value to the firmware */ err = -mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPLM, 0, 1); if (err) goto done; memcpy(priv->params_ethtool.fec_mask_10x_25x, fec_mask_10x_25x, sizeof(priv->params_ethtool.fec_mask_10x_25x)); mlx5_toggle_port_link(priv->mdev); done: PRIV_UNLOCK(priv); return (err); } static int mlx5e_fec_avail_10x_25x_handler(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; int err; PRIV_LOCK(priv); err = SYSCTL_OUT(req, priv->params_ethtool.fec_avail_10x_25x, sizeof(priv->params_ethtool.fec_avail_10x_25x)); PRIV_UNLOCK(priv); return (err); } static int mlx5e_fec_mask_50x_handler(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; struct mlx5_core_dev *mdev = priv->mdev; u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; const int sz = MLX5_ST_SZ_BYTES(pplm_reg); u16 fec_mask_50x[MLX5E_MAX_FEC_50X]; u16 fec_cap_changed = 0; u8 x; int err; PRIV_LOCK(priv); err = SYSCTL_OUT(req, priv->params_ethtool.fec_mask_50x, sizeof(priv->params_ethtool.fec_mask_50x)); if (err || !req->newptr) goto done; err = SYSCTL_IN(req, fec_mask_50x, sizeof(fec_mask_50x)); if (err) goto done; if (!MLX5_CAP_GEN(mdev, pcam_reg)) { err = EOPNOTSUPP; goto done; } if (!MLX5_CAP_PCAM_REG(mdev, pplm)) { err = EOPNOTSUPP; goto done; } MLX5_SET(pplm_reg, in, local_port, 1); err = -mlx5_core_access_reg(mdev, in, sz, in, sz, MLX5_REG_PPLM, 0, 0); if (err) goto done; /* range check input value */ for (x = 0; x != MLX5E_MAX_FEC_50X; x++) { /* check only one bit is set, if any */ if (fec_mask_50x[x] & (fec_mask_50x[x] - 1)) { err = ERANGE; goto done; } /* check a supported bit is set, if any */ if (fec_mask_50x[x] & ~priv->params_ethtool.fec_avail_50x[x]) { err = ERANGE; goto done; } fec_cap_changed |= (fec_mask_50x[x] ^ priv->params_ethtool.fec_mask_50x[x]); } /* check for no changes */ if (fec_cap_changed == 0) goto done; memset(in, 0, sizeof(in)); MLX5_SET(pplm_reg, in, local_port, 1); /* set new values */ MLX5_SET(pplm_reg, in, fec_override_admin_50g_1x, fec_mask_50x[0]); MLX5_SET(pplm_reg, in, fec_override_admin_100g_2x, fec_mask_50x[1]); MLX5_SET(pplm_reg, in, fec_override_admin_200g_4x, fec_mask_50x[2]); MLX5_SET(pplm_reg, in, fec_override_admin_400g_8x, fec_mask_50x[3]); /* preserve other values */ MLX5_SET(pplm_reg, in, fec_override_admin_10g_40g, priv->params_ethtool.fec_mask_10x_25x[0]); MLX5_SET(pplm_reg, in, fec_override_admin_25g, priv->params_ethtool.fec_mask_10x_25x[1]); MLX5_SET(pplm_reg, in, fec_override_admin_50g, priv->params_ethtool.fec_mask_10x_25x[1]); MLX5_SET(pplm_reg, in, fec_override_admin_56g, priv->params_ethtool.fec_mask_10x_25x[2]); MLX5_SET(pplm_reg, in, fec_override_admin_100g, priv->params_ethtool.fec_mask_10x_25x[3]); /* send new value to the firmware */ err = -mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPLM, 0, 1); if (err) goto done; memcpy(priv->params_ethtool.fec_mask_50x, fec_mask_50x, sizeof(priv->params_ethtool.fec_mask_50x)); mlx5_toggle_port_link(priv->mdev); done: PRIV_UNLOCK(priv); return (err); } static int mlx5e_fec_avail_50x_handler(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; int err; PRIV_LOCK(priv); err = SYSCTL_OUT(req, priv->params_ethtool.fec_avail_50x, sizeof(priv->params_ethtool.fec_avail_50x)); PRIV_UNLOCK(priv); return (err); } static int mlx5e_trust_state_handler(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; struct mlx5_core_dev *mdev = priv->mdev; int err; u8 result; PRIV_LOCK(priv); result = priv->params_ethtool.trust_state; err = sysctl_handle_8(oidp, &result, 0, req); if (err || !req->newptr || result == priv->params_ethtool.trust_state) goto done; switch (result) { case MLX5_QPTS_TRUST_PCP: case MLX5_QPTS_TRUST_DSCP: break; case MLX5_QPTS_TRUST_BOTH: if (!MLX5_CAP_QCAM_FEATURE(mdev, qpts_trust_both)) { err = EOPNOTSUPP; goto done; } break; default: err = ERANGE; goto done; } err = -mlx5_set_trust_state(mdev, result); if (err) goto done; priv->params_ethtool.trust_state = result; /* update inline mode */ mlx5e_refresh_sq_inline(priv); #ifdef RATELIMIT mlx5e_rl_refresh_sq_inline(&priv->rl); #endif done: PRIV_UNLOCK(priv); return (err); } static int mlx5e_dscp_prio_handler(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; int prio_index = arg2; struct mlx5_core_dev *mdev = priv->mdev; uint8_t dscp2prio[MLX5_MAX_SUPPORTED_DSCP]; uint8_t x; int err; PRIV_LOCK(priv); err = SYSCTL_OUT(req, priv->params_ethtool.dscp2prio + prio_index, sizeof(priv->params_ethtool.dscp2prio) / 8); if (err || !req->newptr) goto done; memcpy(dscp2prio, priv->params_ethtool.dscp2prio, sizeof(dscp2prio)); err = SYSCTL_IN(req, dscp2prio + prio_index, sizeof(dscp2prio) / 8); if (err) goto done; for (x = 0; x != MLX5_MAX_SUPPORTED_DSCP; x++) { if (dscp2prio[x] > 7) { err = ERANGE; goto done; } } err = -mlx5_set_dscp2prio(mdev, dscp2prio); if (err) goto done; /* update local array */ memcpy(priv->params_ethtool.dscp2prio, dscp2prio, sizeof(priv->params_ethtool.dscp2prio)); done: PRIV_UNLOCK(priv); return (err); } int mlx5e_update_buf_lossy(struct mlx5e_priv *priv) { struct ieee_pfc pfc; PRIV_ASSERT_LOCKED(priv); bzero(&pfc, sizeof(pfc)); pfc.pfc_en = priv->params.rx_priority_flow_control; return (-mlx5e_port_manual_buffer_config(priv, MLX5E_PORT_BUFFER_PFC, priv->params_ethtool.hw_mtu, &pfc, NULL, NULL)); } static int mlx5e_buf_size_handler(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv; u32 buf_size[MLX5E_MAX_BUFFER]; struct mlx5e_port_buffer port_buffer; int error, i; priv = arg1; PRIV_LOCK(priv); error = -mlx5e_port_query_buffer(priv, &port_buffer); if (error != 0) goto done; for (i = 0; i < nitems(buf_size); i++) buf_size[i] = port_buffer.buffer[i].size; error = SYSCTL_OUT(req, buf_size, sizeof(buf_size)); if (error != 0 || req->newptr == NULL) goto done; error = SYSCTL_IN(req, buf_size, sizeof(buf_size)); if (error != 0) goto done; error = -mlx5e_port_manual_buffer_config(priv, MLX5E_PORT_BUFFER_SIZE, priv->params_ethtool.hw_mtu, NULL, buf_size, NULL); done: PRIV_UNLOCK(priv); return (error); } static int mlx5e_buf_prio_handler(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv; struct mlx5_core_dev *mdev; u8 buffer[MLX5E_MAX_BUFFER]; int error; priv = arg1; mdev = priv->mdev; PRIV_LOCK(priv); error = -mlx5e_port_query_priority2buffer(mdev, buffer); if (error != 0) goto done; error = SYSCTL_OUT(req, buffer, MLX5E_MAX_BUFFER); if (error != 0 || req->newptr == NULL) goto done; error = SYSCTL_IN(req, buffer, MLX5E_MAX_BUFFER); if (error != 0) goto done; error = -mlx5e_port_manual_buffer_config(priv, MLX5E_PORT_BUFFER_PRIO2BUFFER, priv->params_ethtool.hw_mtu, NULL, NULL, buffer); if (error == 0) error = mlx5e_update_buf_lossy(priv); done: PRIV_UNLOCK(priv); return (error); } static int mlx5e_cable_length_handler(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv; u_int cable_len; int error; priv = arg1; PRIV_LOCK(priv); cable_len = priv->dcbx.cable_len; error = sysctl_handle_int(oidp, &cable_len, 0, req); if (error == 0 && req->newptr != NULL && cable_len != priv->dcbx.cable_len) { error = -mlx5e_port_manual_buffer_config(priv, MLX5E_PORT_BUFFER_CABLE_LEN, priv->params_ethtool.hw_mtu, NULL, NULL, NULL); if (error == 0) priv->dcbx.cable_len = cable_len; } PRIV_UNLOCK(priv); return (error); } static int mlx5e_hw_temperature_handler(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; int err; PRIV_LOCK(priv); err = SYSCTL_OUT(req, priv->params_ethtool.hw_val_temp, sizeof(priv->params_ethtool.hw_val_temp[0]) * priv->params_ethtool.hw_num_temp); if (err == 0 && req->newptr != NULL) err = EOPNOTSUPP; PRIV_UNLOCK(priv); return (err); } int mlx5e_hw_temperature_update(struct mlx5e_priv *priv) { int err; u32 x; if (priv->params_ethtool.hw_num_temp == 0) { u32 out_cap[MLX5_ST_SZ_DW(mtcap)] = {}; const int sz_cap = MLX5_ST_SZ_BYTES(mtcap); u32 value; err = -mlx5_core_access_reg(priv->mdev, NULL, 0, out_cap, sz_cap, MLX5_ACCESS_REG_SUMMARY_CTRL_ID_MTCAP, 0, 0); if (err) goto done; value = MLX5_GET(mtcap, out_cap, sensor_count); if (value == 0) return (0); if (value > MLX5_MAX_TEMPERATURE) value = MLX5_MAX_TEMPERATURE; /* update number of temperature sensors */ priv->params_ethtool.hw_num_temp = value; } for (x = 0; x != priv->params_ethtool.hw_num_temp; x++) { u32 out_sensor[MLX5_ST_SZ_DW(mtmp_reg)] = {}; const int sz_sensor = MLX5_ST_SZ_BYTES(mtmp_reg); MLX5_SET(mtmp_reg, out_sensor, sensor_index, x); err = -mlx5_core_access_reg(priv->mdev, out_sensor, sz_sensor, out_sensor, sz_sensor, MLX5_ACCESS_REG_SUMMARY_CTRL_ID_MTMP, 0, 0); if (err) goto done; /* convert from 0.125 celsius to millicelsius */ priv->params_ethtool.hw_val_temp[x] = (s16)MLX5_GET(mtmp_reg, out_sensor, temperature) * 125; } done: return (err); } #define MLX5_PARAM_OFFSET(n) \ __offsetof(struct mlx5e_priv, params_ethtool.n) static int mlx5e_ethtool_handler(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; uint64_t value; int mode_modify; int was_opened; int error; PRIV_LOCK(priv); value = priv->params_ethtool.arg[arg2]; if (req != NULL) { error = sysctl_handle_64(oidp, &value, 0, req); if (error || req->newptr == NULL || value == priv->params_ethtool.arg[arg2]) goto done; /* assign new value */ priv->params_ethtool.arg[arg2] = value; } else { error = 0; } /* check if device is gone */ if (priv->gone) { error = ENXIO; goto done; } was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); mode_modify = MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify); switch (MLX5_PARAM_OFFSET(arg[arg2])) { case MLX5_PARAM_OFFSET(rx_coalesce_usecs): /* import RX coal time */ if (priv->params_ethtool.rx_coalesce_usecs < 1) priv->params_ethtool.rx_coalesce_usecs = 0; else if (priv->params_ethtool.rx_coalesce_usecs > MLX5E_FLD_MAX(cqc, cq_period)) { priv->params_ethtool.rx_coalesce_usecs = MLX5E_FLD_MAX(cqc, cq_period); } priv->params.rx_cq_moderation_usec = priv->params_ethtool.rx_coalesce_usecs; /* check to avoid down and up the network interface */ if (was_opened) error = mlx5e_refresh_channel_params(priv); break; case MLX5_PARAM_OFFSET(rx_coalesce_pkts): /* import RX coal pkts */ if (priv->params_ethtool.rx_coalesce_pkts < 1) priv->params_ethtool.rx_coalesce_pkts = 0; else if (priv->params_ethtool.rx_coalesce_pkts > MLX5E_FLD_MAX(cqc, cq_max_count)) { priv->params_ethtool.rx_coalesce_pkts = MLX5E_FLD_MAX(cqc, cq_max_count); } priv->params.rx_cq_moderation_pkts = priv->params_ethtool.rx_coalesce_pkts; /* check to avoid down and up the network interface */ if (was_opened) error = mlx5e_refresh_channel_params(priv); break; case MLX5_PARAM_OFFSET(tx_coalesce_usecs): /* import TX coal time */ if (priv->params_ethtool.tx_coalesce_usecs < 1) priv->params_ethtool.tx_coalesce_usecs = 0; else if (priv->params_ethtool.tx_coalesce_usecs > MLX5E_FLD_MAX(cqc, cq_period)) { priv->params_ethtool.tx_coalesce_usecs = MLX5E_FLD_MAX(cqc, cq_period); } priv->params.tx_cq_moderation_usec = priv->params_ethtool.tx_coalesce_usecs; /* check to avoid down and up the network interface */ if (was_opened) error = mlx5e_refresh_channel_params(priv); break; case MLX5_PARAM_OFFSET(tx_coalesce_pkts): /* import TX coal pkts */ if (priv->params_ethtool.tx_coalesce_pkts < 1) priv->params_ethtool.tx_coalesce_pkts = 0; else if (priv->params_ethtool.tx_coalesce_pkts > MLX5E_FLD_MAX(cqc, cq_max_count)) { priv->params_ethtool.tx_coalesce_pkts = MLX5E_FLD_MAX(cqc, cq_max_count); } priv->params.tx_cq_moderation_pkts = priv->params_ethtool.tx_coalesce_pkts; /* check to avoid down and up the network interface */ if (was_opened) error = mlx5e_refresh_channel_params(priv); break; case MLX5_PARAM_OFFSET(tx_queue_size): /* network interface must be down */ if (was_opened) mlx5e_close_locked(priv->ifp); /* import TX queue size */ if (priv->params_ethtool.tx_queue_size < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) { priv->params_ethtool.tx_queue_size = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); } else if (priv->params_ethtool.tx_queue_size > priv->params_ethtool.tx_queue_size_max) { priv->params_ethtool.tx_queue_size = priv->params_ethtool.tx_queue_size_max; } /* store actual TX queue size */ priv->params.log_sq_size = order_base_2(priv->params_ethtool.tx_queue_size); priv->params_ethtool.tx_queue_size = 1 << priv->params.log_sq_size; /* verify TX completion factor */ mlx5e_ethtool_sync_tx_completion_fact(priv); /* restart network interface, if any */ if (was_opened) mlx5e_open_locked(priv->ifp); break; case MLX5_PARAM_OFFSET(rx_queue_size): /* network interface must be down */ if (was_opened) mlx5e_close_locked(priv->ifp); /* import RX queue size */ if (priv->params_ethtool.rx_queue_size < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) { priv->params_ethtool.rx_queue_size = (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE); } else if (priv->params_ethtool.rx_queue_size > priv->params_ethtool.rx_queue_size_max) { priv->params_ethtool.rx_queue_size = priv->params_ethtool.rx_queue_size_max; } /* store actual RX queue size */ priv->params.log_rq_size = order_base_2(priv->params_ethtool.rx_queue_size); priv->params_ethtool.rx_queue_size = 1 << priv->params.log_rq_size; /* update least number of RX WQEs */ priv->params.min_rx_wqes = min( priv->params_ethtool.rx_queue_size - 1, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES); /* restart network interface, if any */ if (was_opened) mlx5e_open_locked(priv->ifp); break; case MLX5_PARAM_OFFSET(channels_rsss): /* network interface must be down */ if (was_opened) mlx5e_close_locked(priv->ifp); /* import number of channels */ if (priv->params_ethtool.channels_rsss < 1) priv->params_ethtool.channels_rsss = 1; else if (priv->params_ethtool.channels_rsss > 128) priv->params_ethtool.channels_rsss = 128; priv->params.channels_rsss = priv->params_ethtool.channels_rsss; /* restart network interface, if any */ if (was_opened) mlx5e_open_locked(priv->ifp); break; case MLX5_PARAM_OFFSET(channels): /* network interface must be down */ if (was_opened) mlx5e_close_locked(priv->ifp); /* import number of channels */ if (priv->params_ethtool.channels < 1) priv->params_ethtool.channels = 1; else if (priv->params_ethtool.channels > (u64) priv->mdev->priv.eq_table.num_comp_vectors) { priv->params_ethtool.channels = (u64) priv->mdev->priv.eq_table.num_comp_vectors; } priv->params.num_channels = priv->params_ethtool.channels; /* restart network interface, if any */ if (was_opened) mlx5e_open_locked(priv->ifp); break; case MLX5_PARAM_OFFSET(rx_coalesce_mode): /* network interface must be down */ if (was_opened != 0 && mode_modify == 0) mlx5e_close_locked(priv->ifp); /* import RX coalesce mode */ if (priv->params_ethtool.rx_coalesce_mode > 3) priv->params_ethtool.rx_coalesce_mode = 3; priv->params.rx_cq_moderation_mode = priv->params_ethtool.rx_coalesce_mode; /* restart network interface, if any */ if (was_opened != 0) { if (mode_modify == 0) mlx5e_open_locked(priv->ifp); else error = mlx5e_refresh_channel_params(priv); } break; case MLX5_PARAM_OFFSET(tx_coalesce_mode): /* network interface must be down */ if (was_opened != 0 && mode_modify == 0) mlx5e_close_locked(priv->ifp); /* import TX coalesce mode */ if (priv->params_ethtool.tx_coalesce_mode != 0) priv->params_ethtool.tx_coalesce_mode = 1; priv->params.tx_cq_moderation_mode = priv->params_ethtool.tx_coalesce_mode; /* restart network interface, if any */ if (was_opened != 0) { if (mode_modify == 0) mlx5e_open_locked(priv->ifp); else error = mlx5e_refresh_channel_params(priv); } break; case MLX5_PARAM_OFFSET(hw_lro): /* network interface must be down */ if (was_opened) mlx5e_close_locked(priv->ifp); /* import HW LRO mode */ if (priv->params_ethtool.hw_lro != 0 && MLX5_CAP_ETH(priv->mdev, lro_cap)) { priv->params_ethtool.hw_lro = 1; /* check if feature should actually be enabled */ if (priv->ifp->if_capenable & IFCAP_LRO) { priv->params.hw_lro_en = true; } else { priv->params.hw_lro_en = false; mlx5_en_warn(priv->ifp, "To enable HW LRO " "please also enable LRO via ifconfig(8).\n"); } } else { /* return an error if HW does not support this feature */ if (priv->params_ethtool.hw_lro != 0) error = EINVAL; priv->params.hw_lro_en = false; priv->params_ethtool.hw_lro = 0; } /* restart network interface, if any */ if (was_opened) mlx5e_open_locked(priv->ifp); break; case MLX5_PARAM_OFFSET(cqe_zipping): /* network interface must be down */ if (was_opened) mlx5e_close_locked(priv->ifp); /* import CQE zipping mode */ if (priv->params_ethtool.cqe_zipping && MLX5_CAP_GEN(priv->mdev, cqe_compression)) { priv->params.cqe_zipping_en = true; priv->params_ethtool.cqe_zipping = 1; } else { priv->params.cqe_zipping_en = false; priv->params_ethtool.cqe_zipping = 0; } /* restart network interface, if any */ if (was_opened) mlx5e_open_locked(priv->ifp); break; case MLX5_PARAM_OFFSET(tx_completion_fact): /* network interface must be down */ if (was_opened) mlx5e_close_locked(priv->ifp); /* verify parameter */ mlx5e_ethtool_sync_tx_completion_fact(priv); /* restart network interface, if any */ if (was_opened) mlx5e_open_locked(priv->ifp); break; case MLX5_PARAM_OFFSET(modify_tx_dma): /* check if network interface is opened */ if (was_opened) { priv->params_ethtool.modify_tx_dma = priv->params_ethtool.modify_tx_dma ? 1 : 0; /* modify tx according to value */ mlx5e_modify_tx_dma(priv, value != 0); } else { /* if closed force enable tx */ priv->params_ethtool.modify_tx_dma = 0; } break; case MLX5_PARAM_OFFSET(modify_rx_dma): /* check if network interface is opened */ if (was_opened) { priv->params_ethtool.modify_rx_dma = priv->params_ethtool.modify_rx_dma ? 1 : 0; /* modify rx according to value */ mlx5e_modify_rx_dma(priv, value != 0); } else { /* if closed force enable rx */ priv->params_ethtool.modify_rx_dma = 0; } break; case MLX5_PARAM_OFFSET(diag_pci_enable): priv->params_ethtool.diag_pci_enable = priv->params_ethtool.diag_pci_enable ? 1 : 0; error = -mlx5_core_set_diagnostics_full(priv->mdev, priv->params_ethtool.diag_pci_enable, priv->params_ethtool.diag_general_enable); break; case MLX5_PARAM_OFFSET(diag_general_enable): priv->params_ethtool.diag_general_enable = priv->params_ethtool.diag_general_enable ? 1 : 0; error = -mlx5_core_set_diagnostics_full(priv->mdev, priv->params_ethtool.diag_pci_enable, priv->params_ethtool.diag_general_enable); break; case MLX5_PARAM_OFFSET(mc_local_lb): priv->params_ethtool.mc_local_lb = priv->params_ethtool.mc_local_lb ? 1 : 0; if (MLX5_CAP_GEN(priv->mdev, disable_local_lb)) { error = mlx5_nic_vport_modify_local_lb(priv->mdev, MLX5_LOCAL_MC_LB, priv->params_ethtool.mc_local_lb); } else { error = EOPNOTSUPP; } break; case MLX5_PARAM_OFFSET(uc_local_lb): priv->params_ethtool.uc_local_lb = priv->params_ethtool.uc_local_lb ? 1 : 0; if (MLX5_CAP_GEN(priv->mdev, disable_local_lb)) { error = mlx5_nic_vport_modify_local_lb(priv->mdev, MLX5_LOCAL_UC_LB, priv->params_ethtool.uc_local_lb); } else { error = EOPNOTSUPP; } break; case MLX5_PARAM_OFFSET(irq_cpu_base): case MLX5_PARAM_OFFSET(irq_cpu_stride): if (was_opened) { /* network interface must toggled */ mlx5e_close_locked(priv->ifp); mlx5e_open_locked(priv->ifp); } break; default: break; } done: PRIV_UNLOCK(priv); return (error); } static const char *mlx5e_params_desc[] = { MLX5E_PARAMS(MLX5E_STATS_DESC) }; static const char *mlx5e_port_stats_debug_desc[] = { MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_DESC) }; static int mlx5e_ethtool_debug_channel_info(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv; struct sbuf sb; struct mlx5e_channel *c; struct mlx5e_sq *sq; struct mlx5e_rq *rq; int error, i, tc; bool opened; priv = arg1; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); if (sbuf_new_for_sysctl(&sb, NULL, 1024, req) == NULL) return (ENOMEM); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); PRIV_LOCK(priv); opened = test_bit(MLX5E_STATE_OPENED, &priv->state); sbuf_printf(&sb, "pages irq %d\n", priv->mdev->priv.msix_arr[MLX5_EQ_VEC_PAGES].vector); sbuf_printf(&sb, "command irq %d\n", priv->mdev->priv.msix_arr[MLX5_EQ_VEC_CMD].vector); sbuf_printf(&sb, "async irq %d\n", priv->mdev->priv.msix_arr[MLX5_EQ_VEC_ASYNC].vector); for (i = 0; i != priv->params.num_channels; i++) { int eqn_not_used = -1; int irqn = MLX5_EQ_VEC_COMP_BASE; if (mlx5_vector2eqn(priv->mdev, i, &eqn_not_used, &irqn) != 0) continue; c = opened ? &priv->channel[i] : NULL; rq = opened ? &c->rq : NULL; sbuf_printf(&sb, "channel %d rq %d cq %d irq %d\n", i, opened ? rq->rqn : -1, opened ? rq->cq.mcq.cqn : -1, priv->mdev->priv.msix_arr[irqn].vector); for (tc = 0; tc != priv->num_tc; tc++) { sq = opened ? &c->sq[tc] : NULL; sbuf_printf(&sb, "channel %d tc %d sq %d cq %d irq %d\n", i, tc, opened ? sq->sqn : -1, opened ? sq->cq.mcq.cqn : -1, priv->mdev->priv.msix_arr[irqn].vector); } } PRIV_UNLOCK(priv); error = sbuf_finish(&sb); sbuf_delete(&sb); return (error); } static int mlx5e_ethtool_debug_stats(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; int sys_debug; int error; PRIV_LOCK(priv); if (priv->gone != 0) { error = ENODEV; goto done; } sys_debug = priv->sysctl_debug; error = sysctl_handle_int(oidp, &sys_debug, 0, req); if (error != 0 || !req->newptr) goto done; sys_debug = sys_debug ? 1 : 0; if (sys_debug == priv->sysctl_debug) goto done; if ((priv->sysctl_debug = sys_debug)) { mlx5e_create_stats(&priv->stats.port_stats_debug.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), "debug_stats", mlx5e_port_stats_debug_desc, MLX5E_PORT_STATS_DEBUG_NUM, priv->stats.port_stats_debug.arg); SYSCTL_ADD_PROC(&priv->stats.port_stats_debug.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "hw_ctx_debug", CTLFLAG_RD | CTLFLAG_MPSAFE | CTLTYPE_STRING, priv, 0, mlx5e_ethtool_debug_channel_info, "S", ""); } else { sysctl_ctx_free(&priv->stats.port_stats_debug.ctx); } done: PRIV_UNLOCK(priv); return (error); } static void mlx5e_create_diagnostics(struct mlx5e_priv *priv) { struct mlx5_core_diagnostics_entry entry; struct sysctl_ctx_list *ctx; struct sysctl_oid *node; int x; /* sysctl context we are using */ ctx = &priv->sysctl_ctx; /* create root node */ node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "diagnostics", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Diagnostics"); if (node == NULL) return; /* create PCI diagnostics */ for (x = 0; x != MLX5_CORE_PCI_DIAGNOSTICS_NUM; x++) { entry = mlx5_core_pci_diagnostics_table[x]; if (mlx5_core_supports_diagnostics(priv->mdev, entry.counter_id) == 0) continue; SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), OID_AUTO, entry.desc, CTLFLAG_RD, priv->params_pci.array + x, "PCI diagnostics counter"); } /* create general diagnostics */ for (x = 0; x != MLX5_CORE_GENERAL_DIAGNOSTICS_NUM; x++) { entry = mlx5_core_general_diagnostics_table[x]; if (mlx5_core_supports_diagnostics(priv->mdev, entry.counter_id) == 0) continue; SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), OID_AUTO, entry.desc, CTLFLAG_RD, priv->params_general.array + x, "General diagnostics counter"); } } void mlx5e_create_ethtool(struct mlx5e_priv *priv) { struct sysctl_oid *fec_node; struct sysctl_oid *qos_node; struct sysctl_oid *node; const char *pnameunit; struct mlx5e_port_buffer port_buffer; unsigned x; int i; /* set some defaults */ priv->params_ethtool.irq_cpu_base = -1; /* disabled */ priv->params_ethtool.irq_cpu_stride = 1; priv->params_ethtool.tx_queue_size_max = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; priv->params_ethtool.rx_queue_size_max = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; priv->params_ethtool.tx_queue_size = 1 << priv->params.log_sq_size; priv->params_ethtool.rx_queue_size = 1 << priv->params.log_rq_size; priv->params_ethtool.channels = priv->params.num_channels; priv->params_ethtool.channels_rsss = priv->params.channels_rsss; priv->params_ethtool.coalesce_pkts_max = MLX5E_FLD_MAX(cqc, cq_max_count); priv->params_ethtool.coalesce_usecs_max = MLX5E_FLD_MAX(cqc, cq_period); priv->params_ethtool.rx_coalesce_mode = priv->params.rx_cq_moderation_mode; priv->params_ethtool.rx_coalesce_usecs = priv->params.rx_cq_moderation_usec; priv->params_ethtool.rx_coalesce_pkts = priv->params.rx_cq_moderation_pkts; priv->params_ethtool.tx_coalesce_mode = priv->params.tx_cq_moderation_mode; priv->params_ethtool.tx_coalesce_usecs = priv->params.tx_cq_moderation_usec; priv->params_ethtool.tx_coalesce_pkts = priv->params.tx_cq_moderation_pkts; priv->params_ethtool.hw_lro = priv->params.hw_lro_en; priv->params_ethtool.cqe_zipping = priv->params.cqe_zipping_en; mlx5e_ethtool_sync_tx_completion_fact(priv); /* get default values for local loopback, if any */ if (MLX5_CAP_GEN(priv->mdev, disable_local_lb)) { int err; u8 val; err = mlx5_nic_vport_query_local_lb(priv->mdev, MLX5_LOCAL_MC_LB, &val); if (err == 0) priv->params_ethtool.mc_local_lb = val; err = mlx5_nic_vport_query_local_lb(priv->mdev, MLX5_LOCAL_UC_LB, &val); if (err == 0) priv->params_ethtool.uc_local_lb = val; } /* create root node */ node = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "conf", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Configuration"); if (node == NULL) return; for (x = 0; x != MLX5E_PARAMS_NUM; x++) { /* check for read-only parameter */ if (strstr(mlx5e_params_desc[2 * x], "_max") != NULL || strstr(mlx5e_params_desc[2 * x], "_mtu") != NULL) { SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO, mlx5e_params_desc[2 * x], CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, priv, x, &mlx5e_ethtool_handler, "QU", mlx5e_params_desc[2 * x + 1]); } else { #if (__FreeBSD_version < 1100000) char path[64]; #endif /* * NOTE: In FreeBSD-11 and newer the * CTLFLAG_RWTUN flag will take care of * loading default sysctl value from the * kernel environment, if any: */ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO, mlx5e_params_desc[2 * x], CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, x, &mlx5e_ethtool_handler, "QU", mlx5e_params_desc[2 * x + 1]); #if (__FreeBSD_version < 1100000) /* compute path for sysctl */ snprintf(path, sizeof(path), "dev.mce.%d.conf.%s", device_get_unit(priv->mdev->pdev->dev.bsddev), mlx5e_params_desc[2 * x]); /* try to fetch tunable, if any */ if (TUNABLE_QUAD_FETCH(path, &priv->params_ethtool.arg[x])) mlx5e_ethtool_handler(NULL, priv, x, NULL); #endif } } /* create fec node */ fec_node = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO, "fec", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Forward Error Correction"); if (fec_node == NULL) return; if (mlx5e_fec_update(priv) == 0) { SYSCTL_ADD_U32(&priv->sysctl_ctx, SYSCTL_CHILDREN(fec_node), OID_AUTO, "mode_active", CTLFLAG_RD | CTLFLAG_MPSAFE, &priv->params_ethtool.fec_mode_active, 0, "Current FEC mode bit, if any."); SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(fec_node), OID_AUTO, "mask_10x_25x", CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, 0, &mlx5e_fec_mask_10x_25x_handler, "CU", "Set FEC masks for 10G_40G, 25G_50G, 56G, 100G respectivly. " "0:Auto " "1:NOFEC " "2:FIRECODE " "4:RS"); SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(fec_node), OID_AUTO, "avail_10x_25x", CTLTYPE_U8 | CTLFLAG_RD | CTLFLAG_MPSAFE, priv, 0, &mlx5e_fec_avail_10x_25x_handler, "CU", "Get available FEC bits for 10G_40G, 25G_50G, 56G, 100G respectivly. " "0:Auto " "1:NOFEC " "2:FIRECODE " "4:RS"); SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(fec_node), OID_AUTO, "mask_50x", CTLTYPE_U16 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, 0, &mlx5e_fec_mask_50x_handler, "SU", "Set FEC masks for 50G 1x, 100G 2x, 200G 4x, 400G 8x respectivly. " "0:Auto " "128:RS " "512:LL RS"); SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(fec_node), OID_AUTO, "avail_50x", CTLTYPE_U16 | CTLFLAG_RD | CTLFLAG_MPSAFE, priv, 0, &mlx5e_fec_avail_50x_handler, "SU", "Get available FEC bits for 50G 1x, 100G 2x, 200G 4x, 400G 8x respectivly. " "0:Auto " "128:RS " "512:LL RS"); } SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO, "debug_stats", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, &mlx5e_ethtool_debug_stats, "I", "Extended debug statistics"); pnameunit = device_get_nameunit(priv->mdev->pdev->dev.bsddev); SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO, "device_name", CTLFLAG_RD, __DECONST(void *, pnameunit), 0, "PCI device name"); /* Diagnostics support */ mlx5e_create_diagnostics(priv); /* create qos node */ qos_node = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO, "qos", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Quality Of Service configuration"); if (qos_node == NULL) return; /* Priority rate limit support */ if (mlx5e_getmaxrate(priv) == 0) { SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(qos_node), OID_AUTO, "tc_max_rate", CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, 0, mlx5e_tc_maxrate_handler, "QU", "Max rate for priority, specified in kilobits, where kilo=1000, " "max_rate must be divisible by 100000"); } /* Bandwidth limiting by ratio */ if (mlx5e_get_max_alloc(priv) == 0) { SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(qos_node), OID_AUTO, "tc_rate_share", CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, 0, mlx5e_tc_rate_share_handler, "QU", "Specify bandwidth ratio from 1 to 100 " "for the available traffic classes"); } /* Priority to traffic class mapping */ if (mlx5e_get_prio_tc(priv) == 0) { SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(qos_node), OID_AUTO, "prio_0_7_tc", CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, 0, mlx5e_prio_to_tc_handler, "CU", "Set traffic class 0 to 7 for priority 0 to 7 inclusivly"); } /* DSCP support */ if (mlx5e_get_dscp(priv) == 0) { for (i = 0; i != MLX5_MAX_SUPPORTED_DSCP; i += 8) { char name[32]; snprintf(name, sizeof(name), "dscp_%d_%d_prio", i, i + 7); SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(qos_node), OID_AUTO, name, CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, i, mlx5e_dscp_prio_handler, "CU", "Set DSCP to priority mapping, 0..7"); } #define A "Set trust state, 1:PCP 2:DSCP" #define B " 3:BOTH" SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(qos_node), OID_AUTO, "trust_state", CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, 0, mlx5e_trust_state_handler, "CU", MLX5_CAP_QCAM_FEATURE(priv->mdev, qpts_trust_both) ? A B : A); #undef B #undef A } if (mlx5e_port_query_buffer(priv, &port_buffer) == 0) { SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(qos_node), OID_AUTO, "buffers_size", CTLTYPE_U32 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, 0, mlx5e_buf_size_handler, "IU", "Set buffers sizes"); SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(qos_node), OID_AUTO, "buffers_prio", CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, 0, mlx5e_buf_prio_handler, "CU", "Set prio to buffers mapping"); SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(qos_node), OID_AUTO, "cable_length", CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, 0, mlx5e_cable_length_handler, "IU", "Set cable length in meters for xoff threshold calculation"); } if (mlx5e_hw_temperature_update(priv) == 0) { SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "hw_temperature", CTLTYPE_S32 | CTLFLAG_RD | CTLFLAG_MPSAFE, priv, 0, mlx5e_hw_temperature_handler, "I", "HW temperature in millicelsius"); } } diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c index 5b139974c76f..da4b1cbb676d 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c @@ -1,2335 +1,2335 @@ /*- * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_rss.h" #include "opt_ratelimit.h" -#include "en.h" +#include #include #include #include /* * The flow tables with rules define the packet processing on receive. * Currently, the following structure is set up to handle different offloads * like VLAN decapsulation, packet classification, RSS hashing, VxLAN checksum * offloading: * * * +=========+ +=========+ +=================+ * |VLAN ft: | |VxLAN | |VxLAN Main | * |CTAG/STAG|------>| VNI|----->|Inner Proto Match|=====> Inner TIR n * |VID/noVID|/ |Catch-all|\ | | * +=========+ +=========+| +=================+ * | * | * | * v * +=================+ * |Main | * |Outer Proto Match|=====> TIR n * | | * +=================+ * * The path through flow rules directs each packet into an appropriate TIR, * according to the: * - VLAN encapsulation * - Outer protocol * - Presence of inner protocol */ #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) enum { MLX5E_FULLMATCH = 0, MLX5E_ALLMULTI = 1, MLX5E_PROMISC = 2, }; enum { MLX5E_UC = 0, MLX5E_MC_IPV4 = 1, MLX5E_MC_IPV6 = 2, MLX5E_MC_OTHER = 3, }; enum { MLX5E_ACTION_NONE = 0, MLX5E_ACTION_ADD = 1, MLX5E_ACTION_DEL = 2, }; struct mlx5e_eth_addr_hash_node { LIST_ENTRY(mlx5e_eth_addr_hash_node) hlist; u8 action; u32 mpfs_index; struct mlx5e_eth_addr_info ai; }; static inline int mlx5e_hash_eth_addr(const u8 * addr) { return (addr[5]); } static bool mlx5e_add_eth_addr_to_hash(struct mlx5e_eth_addr_hash_head *hash, struct mlx5e_eth_addr_hash_node *hn_new) { struct mlx5e_eth_addr_hash_node *hn; u32 ix = mlx5e_hash_eth_addr(hn_new->ai.addr); LIST_FOREACH(hn, &hash[ix], hlist) { if (bcmp(hn->ai.addr, hn_new->ai.addr, ETHER_ADDR_LEN) == 0) { if (hn->action == MLX5E_ACTION_DEL) hn->action = MLX5E_ACTION_NONE; free(hn_new, M_MLX5EN); return (false); } } LIST_INSERT_HEAD(&hash[ix], hn_new, hlist); return (true); } static void mlx5e_del_eth_addr_from_hash(struct mlx5e_eth_addr_hash_node *hn) { LIST_REMOVE(hn, hlist); free(hn, M_MLX5EN); } static void mlx5e_del_eth_addr_from_flow_table(struct mlx5e_priv *priv, struct mlx5e_eth_addr_info *ai) { if (ai->tt_vec & (1 << MLX5E_TT_IPV6_IPSEC_ESP)) mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP]); if (ai->tt_vec & (1 << MLX5E_TT_IPV4_IPSEC_ESP)) mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP]); if (ai->tt_vec & (1 << MLX5E_TT_IPV6_IPSEC_AH)) mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH]); if (ai->tt_vec & (1 << MLX5E_TT_IPV4_IPSEC_AH)) mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH]); if (ai->tt_vec & (1 << MLX5E_TT_IPV6_TCP)) mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_TCP]); if (ai->tt_vec & (1 << MLX5E_TT_IPV4_TCP)) mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_TCP]); if (ai->tt_vec & (1 << MLX5E_TT_IPV6_UDP)) mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_UDP]); if (ai->tt_vec & (1 << MLX5E_TT_IPV4_UDP)) mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_UDP]); if (ai->tt_vec & (1 << MLX5E_TT_IPV6)) mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6]); if (ai->tt_vec & (1 << MLX5E_TT_IPV4)) mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4]); if (ai->tt_vec & (1 << MLX5E_TT_ANY)) mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_ANY]); /* ensure the rules are not freed again */ ai->tt_vec = 0; } static int mlx5e_get_eth_addr_type(const u8 * addr) { if (ETHER_IS_MULTICAST(addr) == 0) return (MLX5E_UC); if ((addr[0] == 0x01) && (addr[1] == 0x00) && (addr[2] == 0x5e) && !(addr[3] & 0x80)) return (MLX5E_MC_IPV4); if ((addr[0] == 0x33) && (addr[1] == 0x33)) return (MLX5E_MC_IPV6); return (MLX5E_MC_OTHER); } static u32 mlx5e_get_tt_vec(struct mlx5e_eth_addr_info *ai, int type) { int eth_addr_type; u32 ret; switch (type) { case MLX5E_FULLMATCH: eth_addr_type = mlx5e_get_eth_addr_type(ai->addr); switch (eth_addr_type) { case MLX5E_UC: ret = (1 << MLX5E_TT_IPV4_TCP) | (1 << MLX5E_TT_IPV6_TCP) | (1 << MLX5E_TT_IPV4_UDP) | (1 << MLX5E_TT_IPV6_UDP) | (1 << MLX5E_TT_IPV4) | (1 << MLX5E_TT_IPV6) | (1 << MLX5E_TT_ANY) | 0; break; case MLX5E_MC_IPV4: ret = (1 << MLX5E_TT_IPV4_UDP) | (1 << MLX5E_TT_IPV4) | 0; break; case MLX5E_MC_IPV6: ret = (1 << MLX5E_TT_IPV6_UDP) | (1 << MLX5E_TT_IPV6) | 0; break; default: ret = (1 << MLX5E_TT_ANY) | 0; break; } break; case MLX5E_ALLMULTI: ret = (1 << MLX5E_TT_IPV4_UDP) | (1 << MLX5E_TT_IPV6_UDP) | (1 << MLX5E_TT_IPV4) | (1 << MLX5E_TT_IPV6) | (1 << MLX5E_TT_ANY) | 0; break; default: /* MLX5E_PROMISC */ ret = (1 << MLX5E_TT_IPV4_TCP) | (1 << MLX5E_TT_IPV6_TCP) | (1 << MLX5E_TT_IPV4_UDP) | (1 << MLX5E_TT_IPV6_UDP) | (1 << MLX5E_TT_IPV4) | (1 << MLX5E_TT_IPV6) | (1 << MLX5E_TT_ANY) | 0; break; } return (ret); } static int mlx5e_add_eth_addr_rule_sub(struct mlx5e_priv *priv, struct mlx5e_eth_addr_info *ai, int type, u32 *mc, u32 *mv) { struct mlx5_flow_destination dest = {}; u8 mc_enable = 0; struct mlx5_flow_rule **rule_p; struct mlx5_flow_table *ft = priv->fts.main.t; u8 *mc_dmac = MLX5_ADDR_OF(fte_match_param, mc, outer_headers.dmac_47_16); u8 *mv_dmac = MLX5_ADDR_OF(fte_match_param, mv, outer_headers.dmac_47_16); u32 *tirn = priv->tirn; u32 tt_vec; int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; switch (type) { case MLX5E_FULLMATCH: mc_enable = MLX5_MATCH_OUTER_HEADERS; memset(mc_dmac, 0xff, ETH_ALEN); ether_addr_copy(mv_dmac, ai->addr); break; case MLX5E_ALLMULTI: mc_enable = MLX5_MATCH_OUTER_HEADERS; mc_dmac[0] = 0x01; mv_dmac[0] = 0x01; break; case MLX5E_PROMISC: break; default: break; } tt_vec = mlx5e_get_tt_vec(ai, type); if (tt_vec & BIT(MLX5E_TT_ANY)) { rule_p = &ai->ft_rule[MLX5E_TT_ANY]; dest.tir_num = tirn[MLX5E_TT_ANY]; *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; ai->tt_vec |= BIT(MLX5E_TT_ANY); } mc_enable = MLX5_MATCH_OUTER_HEADERS; MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); if (tt_vec & BIT(MLX5E_TT_IPV4)) { rule_p = &ai->ft_rule[MLX5E_TT_IPV4]; dest.tir_num = tirn[MLX5E_TT_IPV4]; MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETHERTYPE_IP); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; ai->tt_vec |= BIT(MLX5E_TT_IPV4); } if (tt_vec & BIT(MLX5E_TT_IPV6)) { rule_p = &ai->ft_rule[MLX5E_TT_IPV6]; dest.tir_num = tirn[MLX5E_TT_IPV6]; MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETHERTYPE_IPV6); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; ai->tt_vec |= BIT(MLX5E_TT_IPV6); } MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_UDP); if (tt_vec & BIT(MLX5E_TT_IPV4_UDP)) { rule_p = &ai->ft_rule[MLX5E_TT_IPV4_UDP]; dest.tir_num = tirn[MLX5E_TT_IPV4_UDP]; MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETHERTYPE_IP); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; ai->tt_vec |= BIT(MLX5E_TT_IPV4_UDP); } if (tt_vec & BIT(MLX5E_TT_IPV6_UDP)) { rule_p = &ai->ft_rule[MLX5E_TT_IPV6_UDP]; dest.tir_num = tirn[MLX5E_TT_IPV6_UDP]; MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETHERTYPE_IPV6); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; ai->tt_vec |= BIT(MLX5E_TT_IPV6_UDP); } MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_TCP); if (tt_vec & BIT(MLX5E_TT_IPV4_TCP)) { rule_p = &ai->ft_rule[MLX5E_TT_IPV4_TCP]; dest.tir_num = tirn[MLX5E_TT_IPV4_TCP]; MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETHERTYPE_IP); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; ai->tt_vec |= BIT(MLX5E_TT_IPV4_TCP); } if (tt_vec & BIT(MLX5E_TT_IPV6_TCP)) { rule_p = &ai->ft_rule[MLX5E_TT_IPV6_TCP]; dest.tir_num = tirn[MLX5E_TT_IPV6_TCP]; MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETHERTYPE_IPV6); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; ai->tt_vec |= BIT(MLX5E_TT_IPV6_TCP); } MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_AH); if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) { rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH]; dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_AH]; MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETHERTYPE_IP); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_AH); } if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) { rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH]; dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_AH]; MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETHERTYPE_IPV6); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_AH); } MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_ESP); if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) { rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP]; dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_ESP]; MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETHERTYPE_IP); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_ESP); } if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) { rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP]; dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_ESP]; MLX5_SET(fte_match_param, mv, outer_headers.ethertype, ETHERTYPE_IPV6); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_ESP); } return 0; err_del_ai: err = PTR_ERR(*rule_p); *rule_p = NULL; mlx5e_del_eth_addr_from_flow_table(priv, ai); return err; } static int mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, struct mlx5e_eth_addr_info *ai, int type) { u32 *match_criteria; u32 *match_value; int err = 0; match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); if (!match_value || !match_criteria) { mlx5_en_err(priv->ifp, "alloc failed\n"); err = -ENOMEM; goto add_eth_addr_rule_out; } err = mlx5e_add_eth_addr_rule_sub(priv, ai, type, match_criteria, match_value); add_eth_addr_rule_out: kvfree(match_criteria); kvfree(match_value); return (err); } static void mlx5e_del_main_vxlan_rules(struct mlx5e_priv *priv) { struct mlx5_flow_rule **ra = priv->fts.main_vxlan_rule, **r; r = &ra[MLX5E_TT_IPV6_IPSEC_ESP]; if (*r != NULL) { mlx5_del_flow_rule(*r); *r = NULL; } r = &ra[MLX5E_TT_IPV4_IPSEC_ESP]; if (*r != NULL) { mlx5_del_flow_rule(*r); *r = NULL; } r = &ra[MLX5E_TT_IPV6_IPSEC_AH]; if (*r != NULL) { mlx5_del_flow_rule(*r); *r = NULL; } r = &ra[MLX5E_TT_IPV4_IPSEC_AH]; if (*r != NULL) { mlx5_del_flow_rule(*r); *r = NULL; } r = &ra[MLX5E_TT_IPV6_TCP]; if (*r != NULL) { mlx5_del_flow_rule(*r); *r = NULL; } r = &ra[MLX5E_TT_IPV4_TCP]; if (*r != NULL) { mlx5_del_flow_rule(*r); *r = NULL; } r = &ra[MLX5E_TT_IPV6_UDP]; if (*r != NULL) { mlx5_del_flow_rule(*r); *r = NULL; } r = &ra[MLX5E_TT_IPV4_UDP]; if (*r != NULL) { mlx5_del_flow_rule(*r); *r = NULL; } r = &ra[MLX5E_TT_IPV6]; if (*r != NULL) { mlx5_del_flow_rule(*r); *r = NULL; } r = &ra[MLX5E_TT_IPV4]; if (*r != NULL) { mlx5_del_flow_rule(*r); *r = NULL; } r = &ra[MLX5E_TT_ANY]; if (*r != NULL) { mlx5_del_flow_rule(*r); *r = NULL; } } static int mlx5e_add_main_vxlan_rules_sub(struct mlx5e_priv *priv, u32 *mc, u32 *mv) { struct mlx5_flow_destination dest = {}; u8 mc_enable = 0; struct mlx5_flow_rule **rule_p; struct mlx5_flow_table *ft = priv->fts.main_vxlan.t; u32 *tirn = priv->tirn_inner_vxlan; int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; mc_enable = MLX5_MATCH_INNER_HEADERS; MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ethertype); rule_p = &priv->fts.main_vxlan_rule[MLX5E_TT_IPV4]; dest.tir_num = tirn[MLX5E_TT_IPV4]; MLX5_SET(fte_match_param, mv, inner_headers.ethertype, ETHERTYPE_IP); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; rule_p = &priv->fts.main_vxlan_rule[MLX5E_TT_IPV6]; dest.tir_num = tirn[MLX5E_TT_IPV6]; MLX5_SET(fte_match_param, mv, inner_headers.ethertype, ETHERTYPE_IPV6); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); MLX5_SET(fte_match_param, mv, inner_headers.ip_protocol, IPPROTO_UDP); rule_p = &priv->fts.main_vxlan_rule[MLX5E_TT_IPV4_UDP]; dest.tir_num = tirn[MLX5E_TT_IPV4_UDP]; MLX5_SET(fte_match_param, mv, inner_headers.ethertype, ETHERTYPE_IP); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; rule_p = &priv->fts.main_vxlan_rule[MLX5E_TT_IPV6_UDP]; dest.tir_num = tirn[MLX5E_TT_IPV6_UDP]; MLX5_SET(fte_match_param, mv, inner_headers.ethertype, ETHERTYPE_IPV6); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; MLX5_SET(fte_match_param, mv, inner_headers.ip_protocol, IPPROTO_TCP); rule_p = &priv->fts.main_vxlan_rule[MLX5E_TT_IPV4_TCP]; dest.tir_num = tirn[MLX5E_TT_IPV4_TCP]; MLX5_SET(fte_match_param, mv, inner_headers.ethertype, ETHERTYPE_IP); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; rule_p = &priv->fts.main_vxlan_rule[MLX5E_TT_IPV6_TCP]; dest.tir_num = tirn[MLX5E_TT_IPV6_TCP]; MLX5_SET(fte_match_param, mv, inner_headers.ethertype, ETHERTYPE_IPV6); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; MLX5_SET(fte_match_param, mv, inner_headers.ip_protocol, IPPROTO_AH); rule_p = &priv->fts.main_vxlan_rule[MLX5E_TT_IPV4_IPSEC_AH]; dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_AH]; MLX5_SET(fte_match_param, mv, inner_headers.ethertype, ETHERTYPE_IP); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; rule_p = &priv->fts.main_vxlan_rule[MLX5E_TT_IPV6_IPSEC_AH]; dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_AH]; MLX5_SET(fte_match_param, mv, inner_headers.ethertype, ETHERTYPE_IPV6); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; MLX5_SET(fte_match_param, mv, inner_headers.ip_protocol, IPPROTO_ESP); rule_p = &priv->fts.main_vxlan_rule[MLX5E_TT_IPV4_IPSEC_ESP]; dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_ESP]; MLX5_SET(fte_match_param, mv, inner_headers.ethertype, ETHERTYPE_IP); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; rule_p = &priv->fts.main_vxlan_rule[MLX5E_TT_IPV6_IPSEC_ESP]; dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_ESP]; MLX5_SET(fte_match_param, mv, inner_headers.ethertype, ETHERTYPE_IPV6); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; mc_enable = 0; memset(mv, 0, MLX5_ST_SZ_BYTES(fte_match_param)); memset(mc, 0, MLX5_ST_SZ_BYTES(fte_match_param)); rule_p = &priv->fts.main_vxlan_rule[MLX5E_TT_ANY]; dest.tir_num = tirn[MLX5E_TT_ANY]; *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR_OR_NULL(*rule_p)) goto err_del_ai; return (0); err_del_ai: err = PTR_ERR(*rule_p); *rule_p = NULL; mlx5e_del_main_vxlan_rules(priv); return (err); } static int mlx5e_add_main_vxlan_rules(struct mlx5e_priv *priv) { u32 *match_criteria; u32 *match_value; int err = 0; match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); if (match_value == NULL || match_criteria == NULL) { mlx5_en_err(priv->ifp, "alloc failed\n"); err = -ENOMEM; goto add_main_vxlan_rules_out; } err = mlx5e_add_main_vxlan_rules_sub(priv, match_criteria, match_value); add_main_vxlan_rules_out: kvfree(match_criteria); kvfree(match_value); return (err); } static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) { struct ifnet *ifp = priv->ifp; int max_list_size; int list_size; u16 *vlans; int vlan; int err; int i; list_size = 0; for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) list_size++; max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list); if (list_size > max_list_size) { mlx5_en_err(ifp, "ifnet vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n", list_size, max_list_size); list_size = max_list_size; } vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL); if (!vlans) return -ENOMEM; i = 0; for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) { if (i >= list_size) break; vlans[i++] = vlan; } err = mlx5_modify_nic_vport_vlans(priv->mdev, vlans, list_size); if (err) mlx5_en_err(ifp, "Failed to modify vport vlans list err(%d)\n", err); kfree(vlans); return err; } enum mlx5e_vlan_rule_type { MLX5E_VLAN_RULE_TYPE_UNTAGGED, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, MLX5E_VLAN_RULE_TYPE_MATCH_VID, }; static int mlx5e_add_vlan_rule_sub(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, u16 vid, u32 *mc, u32 *mv) { struct mlx5_flow_table *ft = priv->fts.vlan.t; struct mlx5_flow_destination dest = {}; u8 mc_enable = 0; struct mlx5_flow_rule **rule_p; int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = ((priv->ifp->if_capenable & IFCAP_VXLAN_HWCSUM) != 0) ? priv->fts.vxlan.t : priv->fts.main.t; mc_enable = MLX5_MATCH_OUTER_HEADERS; switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: rule_p = &priv->vlan.untagged_ft_rule; MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); break; case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID: rule_p = &priv->vlan.any_cvlan_ft_rule; MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); MLX5_SET(fte_match_param, mv, outer_headers.cvlan_tag, 1); break; case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID: rule_p = &priv->vlan.any_svlan_ft_rule; MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); MLX5_SET(fte_match_param, mv, outer_headers.svlan_tag, 1); break; default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ rule_p = &priv->vlan.active_vlans_ft_rule[vid]; MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); MLX5_SET(fte_match_param, mv, outer_headers.cvlan_tag, 1); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); MLX5_SET(fte_match_param, mv, outer_headers.first_vid, vid); mlx5e_vport_context_update_vlans(priv); break; } *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR(*rule_p)) { err = PTR_ERR(*rule_p); *rule_p = NULL; mlx5_en_err(priv->ifp, "add rule failed\n"); } return (err); } static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, u16 vid) { u32 *match_criteria; u32 *match_value; int err = 0; match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); if (!match_value || !match_criteria) { mlx5_en_err(priv->ifp, "alloc failed\n"); err = -ENOMEM; goto add_vlan_rule_out; } err = mlx5e_add_vlan_rule_sub(priv, rule_type, vid, match_criteria, match_value); add_vlan_rule_out: kvfree(match_criteria); kvfree(match_value); return (err); } static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, u16 vid) { switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: if (priv->vlan.untagged_ft_rule) { mlx5_del_flow_rule(priv->vlan.untagged_ft_rule); priv->vlan.untagged_ft_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID: if (priv->vlan.any_cvlan_ft_rule) { mlx5_del_flow_rule(priv->vlan.any_cvlan_ft_rule); priv->vlan.any_cvlan_ft_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID: if (priv->vlan.any_svlan_ft_rule) { mlx5_del_flow_rule(priv->vlan.any_svlan_ft_rule); priv->vlan.any_svlan_ft_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_MATCH_VID: if (priv->vlan.active_vlans_ft_rule[vid]) { mlx5_del_flow_rule(priv->vlan.active_vlans_ft_rule[vid]); priv->vlan.active_vlans_ft_rule[vid] = NULL; } mlx5e_vport_context_update_vlans(priv); break; default: break; } } static void mlx5e_del_any_vid_rules(struct mlx5e_priv *priv) { mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); } static int mlx5e_add_any_vid_rules(struct mlx5e_priv *priv) { int err; err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); if (err) return (err); err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); if (err) mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); return (err); } void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) { if (priv->vlan.filter_disabled) { priv->vlan.filter_disabled = false; if (priv->ifp->if_flags & IFF_PROMISC) return; if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_del_any_vid_rules(priv); } } void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) { if (!priv->vlan.filter_disabled) { priv->vlan.filter_disabled = true; if (priv->ifp->if_flags & IFF_PROMISC) return; if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_add_any_vid_rules(priv); } } void mlx5e_vlan_rx_add_vid(void *arg, struct ifnet *ifp, u16 vid) { struct mlx5e_priv *priv = arg; if (ifp != priv->ifp) return; PRIV_LOCK(priv); if (!test_and_set_bit(vid, priv->vlan.active_vlans) && test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); PRIV_UNLOCK(priv); } void mlx5e_vlan_rx_kill_vid(void *arg, struct ifnet *ifp, u16 vid) { struct mlx5e_priv *priv = arg; if (ifp != priv->ifp) return; PRIV_LOCK(priv); clear_bit(vid, priv->vlan.active_vlans); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); PRIV_UNLOCK(priv); } int mlx5e_add_all_vlan_rules(struct mlx5e_priv *priv) { int err; int i; set_bit(0, priv->vlan.active_vlans); for_each_set_bit(i, priv->vlan.active_vlans, VLAN_N_VID) { err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i); if (err) goto error; } err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); if (err) goto error; if (priv->vlan.filter_disabled) { err = mlx5e_add_any_vid_rules(priv); if (err) goto error; } return (0); error: mlx5e_del_all_vlan_rules(priv); return (err); } void mlx5e_del_all_vlan_rules(struct mlx5e_priv *priv) { int i; if (priv->vlan.filter_disabled) mlx5e_del_any_vid_rules(priv); mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); for_each_set_bit(i, priv->vlan.active_vlans, VLAN_N_VID) mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i); clear_bit(0, priv->vlan.active_vlans); } #define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ for (i = 0; i < MLX5E_ETH_ADDR_HASH_SIZE; i++) \ LIST_FOREACH_SAFE(hn, &(hash)[i], hlist, tmp) static void mlx5e_execute_action(struct mlx5e_priv *priv, struct mlx5e_eth_addr_hash_node *hn) { switch (hn->action) { case MLX5E_ACTION_ADD: mlx5e_add_eth_addr_rule(priv, &hn->ai, MLX5E_FULLMATCH); hn->action = MLX5E_ACTION_NONE; break; case MLX5E_ACTION_DEL: mlx5e_del_eth_addr_from_flow_table(priv, &hn->ai); if (hn->mpfs_index != -1U) mlx5_mpfs_del_mac(priv->mdev, hn->mpfs_index); mlx5e_del_eth_addr_from_hash(hn); break; default: break; } } static struct mlx5e_eth_addr_hash_node * mlx5e_move_hn(struct mlx5e_eth_addr_hash_head *fh, struct mlx5e_eth_addr_hash_head *uh) { struct mlx5e_eth_addr_hash_node *hn; hn = LIST_FIRST(fh); if (hn != NULL) { LIST_REMOVE(hn, hlist); LIST_INSERT_HEAD(uh, hn, hlist); } return (hn); } static struct mlx5e_eth_addr_hash_node * mlx5e_remove_hn(struct mlx5e_eth_addr_hash_head *fh) { struct mlx5e_eth_addr_hash_node *hn; hn = LIST_FIRST(fh); if (hn != NULL) LIST_REMOVE(hn, hlist); return (hn); } struct mlx5e_copy_addr_ctx { struct mlx5e_eth_addr_hash_head *free; struct mlx5e_eth_addr_hash_head *fill; bool success; }; static u_int mlx5e_copy_addr(void *arg, struct sockaddr_dl *sdl, u_int cnt) { struct mlx5e_copy_addr_ctx *ctx = arg; struct mlx5e_eth_addr_hash_node *hn; hn = mlx5e_move_hn(ctx->free, ctx->fill); if (hn == NULL) { ctx->success = false; return (0); } ether_addr_copy(hn->ai.addr, LLADDR(sdl)); return (1); } static void mlx5e_sync_ifp_addr(struct mlx5e_priv *priv) { struct mlx5e_copy_addr_ctx ctx; struct mlx5e_eth_addr_hash_head head_free; struct mlx5e_eth_addr_hash_head head_uc; struct mlx5e_eth_addr_hash_head head_mc; struct mlx5e_eth_addr_hash_node *hn; struct ifnet *ifp = priv->ifp; size_t x; size_t num; PRIV_ASSERT_LOCKED(priv); retry: LIST_INIT(&head_free); LIST_INIT(&head_uc); LIST_INIT(&head_mc); num = 1 + if_lladdr_count(ifp) + if_llmaddr_count(ifp); /* allocate place holders */ for (x = 0; x != num; x++) { hn = malloc(sizeof(*hn), M_MLX5EN, M_WAITOK | M_ZERO); hn->action = MLX5E_ACTION_ADD; hn->mpfs_index = -1U; LIST_INSERT_HEAD(&head_free, hn, hlist); } hn = mlx5e_move_hn(&head_free, &head_uc); MPASS(hn != NULL); ether_addr_copy(hn->ai.addr, LLADDR((struct sockaddr_dl *)(ifp->if_addr->ifa_addr))); ctx.free = &head_free; ctx.fill = &head_uc; ctx.success = true; if_foreach_lladdr(ifp, mlx5e_copy_addr, &ctx); if (ctx.success == false) goto cleanup; ctx.fill = &head_mc; if_foreach_llmaddr(ifp, mlx5e_copy_addr, &ctx); if (ctx.success == false) goto cleanup; /* insert L2 unicast addresses into hash list */ while ((hn = mlx5e_remove_hn(&head_uc)) != NULL) { if (mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_uc, hn) == 0) continue; if (hn->mpfs_index == -1U) mlx5_mpfs_add_mac(priv->mdev, &hn->mpfs_index, hn->ai.addr, 0, 0); } /* insert L2 multicast addresses into hash list */ while ((hn = mlx5e_remove_hn(&head_mc)) != NULL) { if (mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_mc, hn) == 0) continue; } cleanup: while ((hn = mlx5e_remove_hn(&head_uc)) != NULL) free(hn, M_MLX5EN); while ((hn = mlx5e_remove_hn(&head_mc)) != NULL) free(hn, M_MLX5EN); while ((hn = mlx5e_remove_hn(&head_free)) != NULL) free(hn, M_MLX5EN); if (ctx.success == false) goto retry; } static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type, u8 addr_array[][ETH_ALEN], int size) { bool is_uc = (list_type == MLX5_NIC_VPORT_LIST_TYPE_UC); struct ifnet *ifp = priv->ifp; struct mlx5e_eth_addr_hash_node *hn; struct mlx5e_eth_addr_hash_head *addr_list; struct mlx5e_eth_addr_hash_node *tmp; int i = 0; int hi; addr_list = is_uc ? priv->eth_addr.if_uc : priv->eth_addr.if_mc; if (is_uc) /* Make sure our own address is pushed first */ ether_addr_copy(addr_array[i++], IF_LLADDR(ifp)); else if (priv->eth_addr.broadcast_enabled) ether_addr_copy(addr_array[i++], ifp->if_broadcastaddr); mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) { if (ether_addr_equal(IF_LLADDR(ifp), hn->ai.addr)) continue; if (i >= size) break; ether_addr_copy(addr_array[i++], hn->ai.addr); } } static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv, int list_type) { bool is_uc = (list_type == MLX5_NIC_VPORT_LIST_TYPE_UC); struct mlx5e_eth_addr_hash_node *hn; u8 (*addr_array)[ETH_ALEN] = NULL; struct mlx5e_eth_addr_hash_head *addr_list; struct mlx5e_eth_addr_hash_node *tmp; int max_size; int size; int err; int hi; size = is_uc ? 0 : (priv->eth_addr.broadcast_enabled ? 1 : 0); max_size = is_uc ? 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) : 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list); addr_list = is_uc ? priv->eth_addr.if_uc : priv->eth_addr.if_mc; mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) size++; if (size > max_size) { mlx5_en_err(priv->ifp, "ifp %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n", is_uc ? "UC" : "MC", size, max_size); size = max_size; } if (size) { addr_array = kcalloc(size, ETH_ALEN, GFP_KERNEL); if (!addr_array) { err = -ENOMEM; goto out; } mlx5e_fill_addr_array(priv, list_type, addr_array, size); } err = mlx5_modify_nic_vport_mac_list(priv->mdev, list_type, addr_array, size); out: if (err) mlx5_en_err(priv->ifp, "Failed to modify vport %s list err(%d)\n", is_uc ? "UC" : "MC", err); kfree(addr_array); } static void mlx5e_vport_context_update(struct mlx5e_priv *priv) { struct mlx5e_eth_addr_db *ea = &priv->eth_addr; mlx5e_vport_context_update_addr_list(priv, MLX5_NIC_VPORT_LIST_TYPE_UC); mlx5e_vport_context_update_addr_list(priv, MLX5_NIC_VPORT_LIST_TYPE_MC); mlx5_modify_nic_vport_promisc(priv->mdev, 0, ea->allmulti_enabled, ea->promisc_enabled); } static void mlx5e_apply_ifp_addr(struct mlx5e_priv *priv) { struct mlx5e_eth_addr_hash_node *hn; struct mlx5e_eth_addr_hash_node *tmp; int i; mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.if_uc, i) mlx5e_execute_action(priv, hn); mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.if_mc, i) mlx5e_execute_action(priv, hn); } static void mlx5e_handle_ifp_addr(struct mlx5e_priv *priv) { struct mlx5e_eth_addr_hash_node *hn; struct mlx5e_eth_addr_hash_node *tmp; int i; mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.if_uc, i) hn->action = MLX5E_ACTION_DEL; mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.if_mc, i) hn->action = MLX5E_ACTION_DEL; if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_sync_ifp_addr(priv); mlx5e_apply_ifp_addr(priv); } void mlx5e_set_rx_mode_core(struct mlx5e_priv *priv) { struct mlx5e_eth_addr_db *ea = &priv->eth_addr; struct ifnet *ndev = priv->ifp; bool rx_mode_enable = test_bit(MLX5E_STATE_OPENED, &priv->state); bool promisc_enabled = rx_mode_enable && (ndev->if_flags & IFF_PROMISC); bool allmulti_enabled = rx_mode_enable && (ndev->if_flags & IFF_ALLMULTI); bool broadcast_enabled = rx_mode_enable; bool enable_promisc = !ea->promisc_enabled && promisc_enabled; bool disable_promisc = ea->promisc_enabled && !promisc_enabled; bool enable_allmulti = !ea->allmulti_enabled && allmulti_enabled; bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled; bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled; bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; /* update broadcast address */ ether_addr_copy(priv->eth_addr.broadcast.addr, priv->ifp->if_broadcastaddr); if (enable_promisc) { mlx5e_add_eth_addr_rule(priv, &ea->promisc, MLX5E_PROMISC); if (!priv->vlan.filter_disabled) mlx5e_add_any_vid_rules(priv); } if (enable_allmulti) mlx5e_add_eth_addr_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); if (enable_broadcast) mlx5e_add_eth_addr_rule(priv, &ea->broadcast, MLX5E_FULLMATCH); mlx5e_handle_ifp_addr(priv); if (disable_broadcast) mlx5e_del_eth_addr_from_flow_table(priv, &ea->broadcast); if (disable_allmulti) mlx5e_del_eth_addr_from_flow_table(priv, &ea->allmulti); if (disable_promisc) { if (!priv->vlan.filter_disabled) mlx5e_del_any_vid_rules(priv); mlx5e_del_eth_addr_from_flow_table(priv, &ea->promisc); } ea->promisc_enabled = promisc_enabled; ea->allmulti_enabled = allmulti_enabled; ea->broadcast_enabled = broadcast_enabled; mlx5e_vport_context_update(priv); } void mlx5e_set_rx_mode_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, set_rx_mode_work); PRIV_LOCK(priv); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_set_rx_mode_core(priv); PRIV_UNLOCK(priv); } static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft) { int i; for (i = ft->num_groups - 1; i >= 0; i--) { if (!IS_ERR_OR_NULL(ft->g[i])) mlx5_destroy_flow_group(ft->g[i]); ft->g[i] = NULL; } ft->num_groups = 0; } static void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) { mlx5e_destroy_groups(ft); kfree(ft->g); mlx5_destroy_flow_table(ft->t); ft->t = NULL; } #define MLX5E_NUM_MAIN_GROUPS 10 #define MLX5E_MAIN_GROUP0_SIZE BIT(4) #define MLX5E_MAIN_GROUP1_SIZE BIT(3) #define MLX5E_MAIN_GROUP2_SIZE BIT(1) #define MLX5E_MAIN_GROUP3_SIZE BIT(0) #define MLX5E_MAIN_GROUP4_SIZE BIT(14) #define MLX5E_MAIN_GROUP5_SIZE BIT(13) #define MLX5E_MAIN_GROUP6_SIZE BIT(11) #define MLX5E_MAIN_GROUP7_SIZE BIT(2) #define MLX5E_MAIN_GROUP8_SIZE BIT(1) #define MLX5E_MAIN_GROUP9_SIZE BIT(0) #define MLX5E_MAIN_TABLE_SIZE (MLX5E_MAIN_GROUP0_SIZE +\ MLX5E_MAIN_GROUP1_SIZE +\ MLX5E_MAIN_GROUP2_SIZE +\ MLX5E_MAIN_GROUP3_SIZE +\ MLX5E_MAIN_GROUP4_SIZE +\ MLX5E_MAIN_GROUP5_SIZE +\ MLX5E_MAIN_GROUP6_SIZE +\ MLX5E_MAIN_GROUP7_SIZE +\ MLX5E_MAIN_GROUP8_SIZE +\ MLX5E_MAIN_GROUP9_SIZE +\ 0) static int mlx5e_create_main_groups_sub(struct mlx5e_flow_table *ft, u32 *in, int inlen) { u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); u8 *dmac = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria.outer_headers.dmac_47_16); int err; int ix = 0; /* Tunnel rules need to be first in this list of groups */ /* Start tunnel rules */ memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.udp_dport); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_MAIN_GROUP0_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; /* End Tunnel Rules */ memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_MAIN_GROUP1_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_MAIN_GROUP2_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; memset(in, 0, inlen); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_MAIN_GROUP3_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); memset(dmac, 0xff, ETH_ALEN); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_MAIN_GROUP4_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); memset(dmac, 0xff, ETH_ALEN); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_MAIN_GROUP5_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); memset(dmac, 0xff, ETH_ALEN); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_MAIN_GROUP6_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); dmac[0] = 0x01; MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_MAIN_GROUP7_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); dmac[0] = 0x01; MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_MAIN_GROUP8_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); dmac[0] = 0x01; MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_MAIN_GROUP9_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; return (0); err_destory_groups: err = PTR_ERR(ft->g[ft->num_groups]); ft->g[ft->num_groups] = NULL; mlx5e_destroy_groups(ft); return (err); } static int mlx5e_create_main_groups(struct mlx5e_flow_table *ft) { u32 *in; int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); int err; in = mlx5_vzalloc(inlen); if (!in) return (-ENOMEM); err = mlx5e_create_main_groups_sub(ft, in, inlen); kvfree(in); return (err); } #define MLX5E_MAIN_VXLAN_GROUP0_SIZE BIT(3) #define MLX5E_MAIN_VXLAN_GROUP1_SIZE BIT(3) #define MLX5E_MAIN_VXLAN_GROUP2_SIZE BIT(0) static int mlx5e_create_main_vxlan_groups_sub(struct mlx5e_flow_table *ft, u32 *in, int inlen) { u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); int err; int ix = 0; memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ethertype); MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_MAIN_VXLAN_GROUP0_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ethertype); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_MAIN_VXLAN_GROUP1_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; memset(in, 0, inlen); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_MAIN_VXLAN_GROUP2_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; return (0); err_destory_groups: err = PTR_ERR(ft->g[ft->num_groups]); ft->g[ft->num_groups] = NULL; mlx5e_destroy_groups(ft); return (err); } static int mlx5e_create_main_vxlan_groups(struct mlx5e_flow_table *ft) { u32 *in; int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); int err; in = mlx5_vzalloc(inlen); if (!in) return (-ENOMEM); err = mlx5e_create_main_vxlan_groups_sub(ft, in, inlen); kvfree(in); return (err); } static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv, bool inner_vxlan) { struct mlx5e_flow_table *ft = inner_vxlan ? &priv->fts.main_vxlan : &priv->fts.main; int err; ft->num_groups = 0; ft->t = mlx5_create_flow_table(priv->fts.ns, 0, inner_vxlan ? "vxlan_main" : "main", MLX5E_MAIN_TABLE_SIZE); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; return (err); } ft->g = kcalloc(MLX5E_NUM_MAIN_GROUPS, sizeof(*ft->g), GFP_KERNEL); if (!ft->g) { err = -ENOMEM; goto err_destroy_main_flow_table; } err = inner_vxlan ? mlx5e_create_main_vxlan_groups(ft) : mlx5e_create_main_groups(ft); if (err) goto err_free_g; return (0); err_free_g: kfree(ft->g); err_destroy_main_flow_table: mlx5_destroy_flow_table(ft->t); ft->t = NULL; return (err); } static void mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv) { mlx5e_destroy_flow_table(&priv->fts.main); } static void mlx5e_destroy_main_vxlan_flow_table(struct mlx5e_priv *priv) { mlx5e_destroy_flow_table(&priv->fts.main_vxlan); } #define MLX5E_NUM_VLAN_GROUPS 3 #define MLX5E_VLAN_GROUP0_SIZE BIT(12) #define MLX5E_VLAN_GROUP1_SIZE BIT(1) #define MLX5E_VLAN_GROUP2_SIZE BIT(0) #define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\ MLX5E_VLAN_GROUP1_SIZE +\ MLX5E_VLAN_GROUP2_SIZE +\ 0) static int mlx5e_create_vlan_groups_sub(struct mlx5e_flow_table *ft, u32 *in, int inlen) { int err; int ix = 0; u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_VLAN_GROUP0_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_VLAN_GROUP1_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_VLAN_GROUP2_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; return (0); err_destory_groups: err = PTR_ERR(ft->g[ft->num_groups]); ft->g[ft->num_groups] = NULL; mlx5e_destroy_groups(ft); return (err); } static int mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft) { u32 *in; int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); int err; in = mlx5_vzalloc(inlen); if (!in) return (-ENOMEM); err = mlx5e_create_vlan_groups_sub(ft, in, inlen); kvfree(in); return (err); } static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) { struct mlx5e_flow_table *ft = &priv->fts.vlan; int err; ft->num_groups = 0; ft->t = mlx5_create_flow_table(priv->fts.ns, 0, "vlan", MLX5E_VLAN_TABLE_SIZE); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; return (err); } ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL); if (!ft->g) { err = -ENOMEM; goto err_destroy_vlan_flow_table; } err = mlx5e_create_vlan_groups(ft); if (err) goto err_free_g; return (0); err_free_g: kfree(ft->g); err_destroy_vlan_flow_table: mlx5_destroy_flow_table(ft->t); ft->t = NULL; return (err); } static void mlx5e_destroy_vlan_flow_table(struct mlx5e_priv *priv) { mlx5e_destroy_flow_table(&priv->fts.vlan); } static int mlx5e_add_vxlan_rule_sub(struct mlx5e_priv *priv, u32 *mc, u32 *mv, struct mlx5e_vxlan_db_el *el) { struct mlx5_flow_table *ft = priv->fts.vxlan.t; struct mlx5_flow_destination dest = {}; u8 mc_enable; struct mlx5_flow_rule **rule_p; int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = priv->fts.main_vxlan.t; mc_enable = MLX5_MATCH_OUTER_HEADERS; rule_p = &el->vxlan_ft_rule; MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); MLX5_SET(fte_match_param, mv, outer_headers.ethertype, el->proto); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_UDP); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.udp_dport); MLX5_SET(fte_match_param, mv, outer_headers.udp_dport, el->port); *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR(*rule_p)) { err = PTR_ERR(*rule_p); *rule_p = NULL; mlx5_en_err(priv->ifp, "add rule failed\n"); } return (err); } static struct mlx5e_vxlan_db_el * mlx5e_vxlan_find_db_el(struct mlx5e_priv *priv, u_int proto, u_int port) { struct mlx5e_vxlan_db_el *el; TAILQ_FOREACH(el, &priv->vxlan.head, link) { if (el->proto == proto && el->port == port) return (el); } return (NULL); } static struct mlx5e_vxlan_db_el * mlx5e_vxlan_alloc_db_el(struct mlx5e_priv *priv, u_int proto, u_int port) { struct mlx5e_vxlan_db_el *el; el = mlx5_vzalloc(sizeof(*el)); el->refcount = 1; el->proto = proto; el->port = port; el->vxlan_ft_rule = NULL; return (el); } static int mlx5e_vxlan_family_to_proto(sa_family_t family, u_int *proto) { switch (family) { case AF_INET: *proto = ETHERTYPE_IP; return (0); case AF_INET6: *proto = ETHERTYPE_IPV6; return (0); default: return (-EINVAL); } } static int mlx5e_add_vxlan_rule_from_db(struct mlx5e_priv *priv, struct mlx5e_vxlan_db_el *el) { u32 *match_criteria; u32 *match_value; int err; match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); if (match_value == NULL || match_criteria == NULL) { mlx5_en_err(priv->ifp, "alloc failed\n"); err = -ENOMEM; goto add_vxlan_rule_out; } err = mlx5e_add_vxlan_rule_sub(priv, match_criteria, match_value, el); add_vxlan_rule_out: kvfree(match_criteria); kvfree(match_value); return (err); } static int mlx5e_add_vxlan_rule(struct mlx5e_priv *priv, sa_family_t family, u_int port) { struct mlx5e_vxlan_db_el *el; u_int proto; int err; err = mlx5e_vxlan_family_to_proto(family, &proto); if (err != 0) return (err); el = mlx5e_vxlan_find_db_el(priv, proto, port); if (el != NULL) { el->refcount++; if (el->installed) return (0); } el = mlx5e_vxlan_alloc_db_el(priv, proto, port); if ((priv->ifp->if_capenable & IFCAP_VXLAN_HWCSUM) != 0) { err = mlx5e_add_vxlan_rule_from_db(priv, el); if (err == 0) el->installed = true; } if (err == 0) TAILQ_INSERT_TAIL(&priv->vxlan.head, el, link); else kvfree(el); return (err); } static int mlx5e_add_vxlan_catchall_rule_sub(struct mlx5e_priv *priv, u32 *mc, u32 *mv) { struct mlx5_flow_table *ft = priv->fts.vxlan.t; struct mlx5_flow_destination dest = {}; u8 mc_enable = 0; struct mlx5_flow_rule **rule_p; int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = priv->fts.main.t; rule_p = &priv->fts.vxlan_catchall_ft_rule; *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_ETH_FLOW_TAG, &dest); if (IS_ERR(*rule_p)) { err = PTR_ERR(*rule_p); *rule_p = NULL; mlx5_en_err(priv->ifp, "add rule failed\n"); } return (err); } static int mlx5e_add_vxlan_catchall_rule(struct mlx5e_priv *priv) { u32 *match_criteria; u32 *match_value; int err; match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); if (match_value == NULL || match_criteria == NULL) { mlx5_en_err(priv->ifp, "alloc failed\n"); err = -ENOMEM; goto add_vxlan_rule_out; } err = mlx5e_add_vxlan_catchall_rule_sub(priv, match_criteria, match_value); add_vxlan_rule_out: kvfree(match_criteria); kvfree(match_value); return (err); } int mlx5e_add_all_vxlan_rules(struct mlx5e_priv *priv) { struct mlx5e_vxlan_db_el *el; int err; err = 0; TAILQ_FOREACH(el, &priv->vxlan.head, link) { if (el->installed) continue; err = mlx5e_add_vxlan_rule_from_db(priv, el); if (err != 0) break; el->installed = false; } return (err); } static int mlx5e_del_vxlan_rule(struct mlx5e_priv *priv, sa_family_t family, u_int port) { struct mlx5e_vxlan_db_el *el; u_int proto; int err; err = mlx5e_vxlan_family_to_proto(family, &proto); if (err != 0) return (err); el = mlx5e_vxlan_find_db_el(priv, proto, port); if (el == NULL) return (0); if (el->refcount > 1) { el->refcount--; return (0); } mlx5_del_flow_rule(el->vxlan_ft_rule); TAILQ_REMOVE(&priv->vxlan.head, el, link); kvfree(el); return (0); } void mlx5e_del_all_vxlan_rules(struct mlx5e_priv *priv) { struct mlx5e_vxlan_db_el *el; TAILQ_FOREACH(el, &priv->vxlan.head, link) { if (!el->installed) continue; mlx5_del_flow_rule(el->vxlan_ft_rule); el->installed = false; } } static void mlx5e_del_vxlan_catchall_rule(struct mlx5e_priv *priv) { mlx5_del_flow_rule(priv->fts.vxlan_catchall_ft_rule); } void mlx5e_vxlan_start(void *arg, struct ifnet *ifp __unused, sa_family_t family, u_int port) { struct mlx5e_priv *priv = arg; int err; PRIV_LOCK(priv); err = mlx5_vxlan_udp_port_add(priv->mdev, port); if (err == 0 && test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_add_vxlan_rule(priv, family, port); PRIV_UNLOCK(priv); } void mlx5e_vxlan_stop(void *arg, struct ifnet *ifp __unused, sa_family_t family, u_int port) { struct mlx5e_priv *priv = arg; PRIV_LOCK(priv); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_del_vxlan_rule(priv, family, port); (void)mlx5_vxlan_udp_port_delete(priv->mdev, port); PRIV_UNLOCK(priv); } #define MLX5E_VXLAN_GROUP0_SIZE BIT(3) /* XXXKIB */ #define MLX5E_VXLAN_GROUP1_SIZE BIT(0) #define MLX5E_NUM_VXLAN_GROUPS BIT(1) #define MLX5E_VXLAN_TABLE_SIZE \ (MLX5E_VXLAN_GROUP0_SIZE + MLX5E_VXLAN_GROUP1_SIZE) static int mlx5e_create_vxlan_groups_sub(struct mlx5e_flow_table *ft, u32 *in, int inlen) { int err; int ix = 0; u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.udp_dport); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_VXLAN_GROUP0_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; memset(in, 0, inlen); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_VXLAN_GROUP1_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; return (0); err_destory_groups: err = PTR_ERR(ft->g[ft->num_groups]); ft->g[ft->num_groups] = NULL; mlx5e_destroy_groups(ft); return (err); } static int mlx5e_create_vxlan_groups(struct mlx5e_flow_table *ft) { u32 *in; int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); int err; in = mlx5_vzalloc(inlen); if (!in) return (-ENOMEM); err = mlx5e_create_vxlan_groups_sub(ft, in, inlen); kvfree(in); return (err); } static int mlx5e_create_vxlan_flow_table(struct mlx5e_priv *priv) { struct mlx5e_flow_table *ft = &priv->fts.vxlan; int err; ft->num_groups = 0; ft->t = mlx5_create_flow_table(priv->fts.ns, 0, "vxlan", MLX5E_VXLAN_TABLE_SIZE); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; return (err); } ft->g = kcalloc(MLX5E_NUM_VXLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL); if (!ft->g) { err = -ENOMEM; goto err_destroy_vxlan_flow_table; } err = mlx5e_create_vxlan_groups(ft); if (err) goto err_free_g; TAILQ_INIT(&priv->vxlan.head); return (0); err_free_g: kfree(ft->g); err_destroy_vxlan_flow_table: mlx5_destroy_flow_table(ft->t); ft->t = NULL; return (err); } #define MLX5E_NUM_INNER_RSS_GROUPS 3 #define MLX5E_INNER_RSS_GROUP0_SIZE BIT(3) #define MLX5E_INNER_RSS_GROUP1_SIZE BIT(1) #define MLX5E_INNER_RSS_GROUP2_SIZE BIT(0) #define MLX5E_INNER_RSS_TABLE_SIZE (MLX5E_INNER_RSS_GROUP0_SIZE +\ MLX5E_INNER_RSS_GROUP1_SIZE +\ MLX5E_INNER_RSS_GROUP2_SIZE +\ 0) static int mlx5e_create_inner_rss_groups_sub(struct mlx5e_flow_table *ft, u32 *in, int inlen) { u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); int err; int ix = 0; memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ethertype); MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_INNER_RSS_GROUP0_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ethertype); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_INNER_RSS_GROUP1_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; memset(in, 0, inlen); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_INNER_RSS_GROUP2_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destory_groups; ft->num_groups++; return (0); err_destory_groups: err = PTR_ERR(ft->g[ft->num_groups]); ft->g[ft->num_groups] = NULL; mlx5e_destroy_groups(ft); return (err); } static int mlx5e_create_inner_rss_groups(struct mlx5e_flow_table *ft) { u32 *in; int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); int err; in = mlx5_vzalloc(inlen); if (!in) return (-ENOMEM); err = mlx5e_create_inner_rss_groups_sub(ft, in, inlen); kvfree(in); return (err); } static int mlx5e_create_inner_rss_flow_table(struct mlx5e_priv *priv) { struct mlx5e_flow_table *ft = &priv->fts.inner_rss; int err; ft->num_groups = 0; ft->t = mlx5_create_flow_table(priv->fts.ns, 0, "inner_rss", MLX5E_INNER_RSS_TABLE_SIZE); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; return (err); } ft->g = kcalloc(MLX5E_NUM_INNER_RSS_GROUPS, sizeof(*ft->g), GFP_KERNEL); if (!ft->g) { err = -ENOMEM; goto err_destroy_inner_rss_flow_table; } err = mlx5e_create_inner_rss_groups(ft); if (err) goto err_free_g; return (0); err_free_g: kfree(ft->g); err_destroy_inner_rss_flow_table: mlx5_destroy_flow_table(ft->t); ft->t = NULL; return (err); } static void mlx5e_destroy_inner_rss_flow_table(struct mlx5e_priv *priv) { mlx5e_destroy_flow_table(&priv->fts.inner_rss); } static void mlx5e_destroy_vxlan_flow_table(struct mlx5e_priv *priv) { mlx5e_destroy_flow_table(&priv->fts.vxlan); } int mlx5e_open_flow_table(struct mlx5e_priv *priv) { int err; priv->fts.ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); err = mlx5e_create_vlan_flow_table(priv); if (err) return (err); if ((priv->ifp->if_capenable & IFCAP_VXLAN_HWCSUM) != 0) { err = mlx5e_create_vxlan_flow_table(priv); if (err) goto err_destroy_vlan_flow_table; } err = mlx5e_create_main_flow_table(priv, false); if (err) goto err_destroy_vxlan_flow_table; if ((priv->ifp->if_capenable & IFCAP_VXLAN_HWCSUM) != 0) { err = mlx5e_create_main_flow_table(priv, true); if (err) goto err_destroy_main_flow_table; err = mlx5e_create_inner_rss_flow_table(priv); if (err) goto err_destroy_main_vxlan_flow_table; err = mlx5e_add_vxlan_catchall_rule(priv); if (err != 0) goto err_destroy_inner_rss_flow_table; err = mlx5e_add_main_vxlan_rules(priv); if (err != 0) goto err_destroy_vxlan_catchall_rule; } return (0); err_destroy_vxlan_catchall_rule: mlx5e_del_vxlan_catchall_rule(priv); err_destroy_inner_rss_flow_table: mlx5e_destroy_inner_rss_flow_table(priv); err_destroy_main_vxlan_flow_table: mlx5e_destroy_main_vxlan_flow_table(priv); err_destroy_main_flow_table: mlx5e_destroy_main_flow_table(priv); err_destroy_vxlan_flow_table: if ((priv->ifp->if_capenable & IFCAP_VXLAN_HWCSUM) != 0) mlx5e_destroy_vxlan_flow_table(priv); err_destroy_vlan_flow_table: mlx5e_destroy_vlan_flow_table(priv); return (err); } void mlx5e_close_flow_table(struct mlx5e_priv *priv) { mlx5e_handle_ifp_addr(priv); if ((priv->ifp->if_capenable & IFCAP_VXLAN_HWCSUM) != 0) { mlx5e_destroy_inner_rss_flow_table(priv); mlx5e_del_vxlan_catchall_rule(priv); mlx5e_destroy_vxlan_flow_table(priv); mlx5e_del_main_vxlan_rules(priv); } mlx5e_destroy_main_flow_table(priv); if ((priv->ifp->if_capenable & IFCAP_VXLAN_HWCSUM) != 0) mlx5e_destroy_main_vxlan_flow_table(priv); mlx5e_destroy_vlan_flow_table(priv); } diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c index d269b7fae64b..059703b28ce4 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c @@ -1,810 +1,810 @@ /*- * Copyright (c) 2019 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_kern_tls.h" #include "opt_rss.h" #include "opt_ratelimit.h" -#include "en.h" +#include #include #include #include #include #ifdef KERN_TLS #ifdef RATELIMIT static if_snd_tag_modify_t mlx5e_tls_rl_snd_tag_modify; #endif static if_snd_tag_query_t mlx5e_tls_snd_tag_query; static if_snd_tag_free_t mlx5e_tls_snd_tag_free; static const struct if_snd_tag_sw mlx5e_tls_snd_tag_sw = { .snd_tag_query = mlx5e_tls_snd_tag_query, .snd_tag_free = mlx5e_tls_snd_tag_free, .type = IF_SND_TAG_TYPE_TLS }; #ifdef RATELIMIT static const struct if_snd_tag_sw mlx5e_tls_rl_snd_tag_sw = { .snd_tag_modify = mlx5e_tls_rl_snd_tag_modify, .snd_tag_query = mlx5e_tls_snd_tag_query, .snd_tag_free = mlx5e_tls_snd_tag_free, .type = IF_SND_TAG_TYPE_TLS_RATE_LIMIT }; #endif MALLOC_DEFINE(M_MLX5E_TLS, "MLX5E_TLS", "MLX5 ethernet HW TLS"); /* software TLS context */ struct mlx5_ifc_sw_tls_cntx_bits { struct mlx5_ifc_tls_static_params_bits param; struct mlx5_ifc_tls_progress_params_bits progress; struct { uint8_t key_data[8][0x20]; uint8_t key_len[0x20]; } key; }; CTASSERT(MLX5_ST_SZ_BYTES(sw_tls_cntx) <= sizeof(((struct mlx5e_tls_tag *)0)->crypto_params)); CTASSERT(MLX5_ST_SZ_BYTES(mkc) == sizeof(((struct mlx5e_tx_umr_wqe *)0)->mkc)); static const char *mlx5e_tls_stats_desc[] = { MLX5E_TLS_STATS(MLX5E_STATS_DESC) }; static void mlx5e_tls_work(struct work_struct *); static int mlx5e_tls_tag_zinit(void *mem, int size, int flags) { struct mlx5e_tls_tag *ptag = mem; MPASS(size == sizeof(*ptag)); memset(ptag, 0, sizeof(*ptag)); mtx_init(&ptag->mtx, "mlx5-tls-tag-mtx", NULL, MTX_DEF); INIT_WORK(&ptag->work, mlx5e_tls_work); return (0); } static void mlx5e_tls_tag_zfini(void *mem, int size) { struct mlx5e_tls_tag *ptag = mem; struct mlx5e_priv *priv; struct mlx5e_tls *ptls; ptls = ptag->tls; priv = container_of(ptls, struct mlx5e_priv, tls); flush_work(&ptag->work); if (ptag->tisn != 0) { mlx5_tls_close_tis(priv->mdev, ptag->tisn); atomic_add_32(&ptls->num_resources, -1U); } mtx_destroy(&ptag->mtx); } static void mlx5e_tls_tag_zfree(struct mlx5e_tls_tag *ptag) { /* reset some variables */ ptag->state = MLX5E_TLS_ST_INIT; ptag->dek_index = 0; ptag->dek_index_ok = 0; /* avoid leaking keys */ memset(ptag->crypto_params, 0, sizeof(ptag->crypto_params)); /* update number of TIS contexts */ if (ptag->tisn == 0) atomic_add_32(&ptag->tls->num_resources, -1U); /* return tag to UMA */ uma_zfree(ptag->tls->zone, ptag); } int mlx5e_tls_init(struct mlx5e_priv *priv) { struct mlx5e_tls *ptls = &priv->tls; struct sysctl_oid *node; uint32_t x; if (MLX5_CAP_GEN(priv->mdev, tls_tx) == 0) return (0); ptls->wq = create_singlethread_workqueue("mlx5-tls-wq"); if (ptls->wq == NULL) return (ENOMEM); sysctl_ctx_init(&ptls->ctx); snprintf(ptls->zname, sizeof(ptls->zname), "mlx5_%u_tls", device_get_unit(priv->mdev->pdev->dev.bsddev)); ptls->zone = uma_zcreate(ptls->zname, sizeof(struct mlx5e_tls_tag), NULL, NULL, mlx5e_tls_tag_zinit, mlx5e_tls_tag_zfini, UMA_ALIGN_CACHE, 0); ptls->max_resources = 1U << MLX5_CAP_GEN(priv->mdev, log_max_dek); for (x = 0; x != MLX5E_TLS_STATS_NUM; x++) ptls->stats.arg[x] = counter_u64_alloc(M_WAITOK); ptls->init = 1; node = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "tls", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Hardware TLS offload"); if (node == NULL) return (0); mlx5e_create_counter_stats(&ptls->ctx, SYSCTL_CHILDREN(node), "stats", mlx5e_tls_stats_desc, MLX5E_TLS_STATS_NUM, ptls->stats.arg); return (0); } void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { struct mlx5e_tls *ptls = &priv->tls; uint32_t x; if (MLX5_CAP_GEN(priv->mdev, tls_tx) == 0) return; ptls->init = 0; flush_workqueue(ptls->wq); sysctl_ctx_free(&ptls->ctx); uma_zdestroy(ptls->zone); destroy_workqueue(ptls->wq); /* check if all resources are freed */ MPASS(priv->tls.num_resources == 0); for (x = 0; x != MLX5E_TLS_STATS_NUM; x++) counter_u64_free(ptls->stats.arg[x]); } static void mlx5e_tls_work(struct work_struct *work) { struct mlx5e_tls_tag *ptag; struct mlx5e_priv *priv; int err; ptag = container_of(work, struct mlx5e_tls_tag, work); priv = container_of(ptag->tls, struct mlx5e_priv, tls); switch (ptag->state) { case MLX5E_TLS_ST_INIT: /* try to open TIS, if not present */ if (ptag->tisn == 0) { err = mlx5_tls_open_tis(priv->mdev, 0, priv->tdn, priv->pdn, &ptag->tisn); if (err) { MLX5E_TLS_STAT_INC(ptag, tx_error, 1); break; } } MLX5_SET(sw_tls_cntx, ptag->crypto_params, progress.pd, ptag->tisn); /* try to allocate a DEK context ID */ err = mlx5_encryption_key_create(priv->mdev, priv->pdn, MLX5_ADDR_OF(sw_tls_cntx, ptag->crypto_params, key.key_data), MLX5_GET(sw_tls_cntx, ptag->crypto_params, key.key_len), &ptag->dek_index); if (err) { MLX5E_TLS_STAT_INC(ptag, tx_error, 1); break; } MLX5_SET(sw_tls_cntx, ptag->crypto_params, param.dek_index, ptag->dek_index); ptag->dek_index_ok = 1; MLX5E_TLS_TAG_LOCK(ptag); if (ptag->state == MLX5E_TLS_ST_INIT) ptag->state = MLX5E_TLS_ST_SETUP; MLX5E_TLS_TAG_UNLOCK(ptag); break; case MLX5E_TLS_ST_FREED: /* wait for all refs to go away */ while (ptag->refs != 0) msleep(1); /* try to destroy DEK context by ID */ if (ptag->dek_index_ok) err = mlx5_encryption_key_destroy(priv->mdev, ptag->dek_index); /* free tag */ mlx5e_tls_tag_zfree(ptag); break; default: break; } } static int mlx5e_tls_set_params(void *ctx, const struct tls_session_params *en) { MLX5_SET(sw_tls_cntx, ctx, param.const_2, 2); if (en->tls_vminor == TLS_MINOR_VER_TWO) MLX5_SET(sw_tls_cntx, ctx, param.tls_version, 2); /* v1.2 */ else MLX5_SET(sw_tls_cntx, ctx, param.tls_version, 3); /* v1.3 */ MLX5_SET(sw_tls_cntx, ctx, param.const_1, 1); MLX5_SET(sw_tls_cntx, ctx, param.encryption_standard, 1); /* TLS */ /* copy the initial vector in place */ switch (en->iv_len) { case MLX5_FLD_SZ_BYTES(sw_tls_cntx, param.gcm_iv): case MLX5_FLD_SZ_BYTES(sw_tls_cntx, param.gcm_iv) + MLX5_FLD_SZ_BYTES(sw_tls_cntx, param.implicit_iv): memcpy(MLX5_ADDR_OF(sw_tls_cntx, ctx, param.gcm_iv), en->iv, en->iv_len); break; default: return (EINVAL); } if (en->cipher_key_len <= MLX5_FLD_SZ_BYTES(sw_tls_cntx, key.key_data)) { memcpy(MLX5_ADDR_OF(sw_tls_cntx, ctx, key.key_data), en->cipher_key, en->cipher_key_len); MLX5_SET(sw_tls_cntx, ctx, key.key_len, en->cipher_key_len); } else { return (EINVAL); } return (0); } /* Verify zero default */ CTASSERT(MLX5E_TLS_ST_INIT == 0); int mlx5e_tls_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **ppmt) { union if_snd_tag_alloc_params rl_params; const struct if_snd_tag_sw *snd_tag_sw; struct mlx5e_priv *priv; struct mlx5e_tls_tag *ptag; const struct tls_session_params *en; int error; priv = ifp->if_softc; if (priv->gone != 0 || priv->tls.init == 0) return (EOPNOTSUPP); /* allocate new tag from zone, if any */ ptag = uma_zalloc(priv->tls.zone, M_NOWAIT); if (ptag == NULL) return (ENOMEM); /* sanity check default values */ MPASS(ptag->state == MLX5E_TLS_ST_INIT); MPASS(ptag->dek_index == 0); MPASS(ptag->dek_index_ok == 0); /* setup TLS tag */ ptag->tls = &priv->tls; /* check if there is no TIS context */ if (ptag->tisn == 0) { uint32_t value; value = atomic_fetchadd_32(&priv->tls.num_resources, 1U); /* check resource limits */ if (value >= priv->tls.max_resources) { error = ENOMEM; goto failure; } } en = ¶ms->tls.tls->params; /* only TLS v1.2 and v1.3 is currently supported */ if (en->tls_vmajor != TLS_MAJOR_VER_ONE || (en->tls_vminor != TLS_MINOR_VER_TWO #ifdef TLS_MINOR_VER_THREE && en->tls_vminor != TLS_MINOR_VER_THREE #endif )) { error = EPROTONOSUPPORT; goto failure; } switch (en->cipher_algorithm) { case CRYPTO_AES_NIST_GCM_16: switch (en->cipher_key_len) { case 128 / 8: if (en->tls_vminor == TLS_MINOR_VER_TWO) { if (MLX5_CAP_TLS(priv->mdev, tls_1_2_aes_gcm_128) == 0) { error = EPROTONOSUPPORT; goto failure; } } else { if (MLX5_CAP_TLS(priv->mdev, tls_1_3_aes_gcm_128) == 0) { error = EPROTONOSUPPORT; goto failure; } } error = mlx5e_tls_set_params(ptag->crypto_params, en); if (error) goto failure; break; case 256 / 8: if (en->tls_vminor == TLS_MINOR_VER_TWO) { if (MLX5_CAP_TLS(priv->mdev, tls_1_2_aes_gcm_256) == 0) { error = EPROTONOSUPPORT; goto failure; } } else { if (MLX5_CAP_TLS(priv->mdev, tls_1_3_aes_gcm_256) == 0) { error = EPROTONOSUPPORT; goto failure; } } error = mlx5e_tls_set_params(ptag->crypto_params, en); if (error) goto failure; break; default: error = EINVAL; goto failure; } break; default: error = EPROTONOSUPPORT; goto failure; } memset(&rl_params, 0, sizeof(rl_params)); rl_params.hdr = params->hdr; switch (params->hdr.type) { #ifdef RATELIMIT case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: rl_params.hdr.type = IF_SND_TAG_TYPE_RATE_LIMIT; rl_params.rate_limit.max_rate = params->tls_rate_limit.max_rate; snd_tag_sw = &mlx5e_tls_rl_snd_tag_sw; break; #endif case IF_SND_TAG_TYPE_TLS: rl_params.hdr.type = IF_SND_TAG_TYPE_UNLIMITED; snd_tag_sw = &mlx5e_tls_snd_tag_sw; break; default: error = EOPNOTSUPP; goto failure; } error = m_snd_tag_alloc(ifp, &rl_params, &ptag->rl_tag); if (error) goto failure; /* store pointer to mbuf tag */ MPASS(ptag->tag.refcount == 0); m_snd_tag_init(&ptag->tag, ifp, snd_tag_sw); *ppmt = &ptag->tag; queue_work(priv->tls.wq, &ptag->work); flush_work(&ptag->work); return (0); failure: mlx5e_tls_tag_zfree(ptag); return (error); } #ifdef RATELIMIT static int mlx5e_tls_rl_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params) { union if_snd_tag_modify_params rl_params; struct mlx5e_tls_tag *ptag = container_of(pmt, struct mlx5e_tls_tag, tag); int error; memset(&rl_params, 0, sizeof(rl_params)); rl_params.rate_limit.max_rate = params->tls_rate_limit.max_rate; error = ptag->rl_tag->sw->snd_tag_modify(ptag->rl_tag, &rl_params); return (error); } #endif static int mlx5e_tls_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params) { struct mlx5e_tls_tag *ptag = container_of(pmt, struct mlx5e_tls_tag, tag); return (ptag->rl_tag->sw->snd_tag_query(ptag->rl_tag, params)); } static void mlx5e_tls_snd_tag_free(struct m_snd_tag *pmt) { struct mlx5e_tls_tag *ptag = container_of(pmt, struct mlx5e_tls_tag, tag); struct mlx5e_priv *priv; m_snd_tag_rele(ptag->rl_tag); MLX5E_TLS_TAG_LOCK(ptag); ptag->state = MLX5E_TLS_ST_FREED; MLX5E_TLS_TAG_UNLOCK(ptag); priv = ptag->tag.ifp->if_softc; queue_work(priv->tls.wq, &ptag->work); } CTASSERT((MLX5_FLD_SZ_BYTES(sw_tls_cntx, param) % 16) == 0); static void mlx5e_tls_send_static_parameters(struct mlx5e_sq *sq, struct mlx5e_tls_tag *ptag) { const u32 ds_cnt = DIV_ROUND_UP(sizeof(struct mlx5e_tx_umr_wqe) + MLX5_FLD_SZ_BYTES(sw_tls_cntx, param), MLX5_SEND_WQE_DS); struct mlx5e_tx_umr_wqe *wqe; u16 pi; pi = sq->pc & sq->wq.sz_m1; wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); memset(wqe, 0, sizeof(*wqe)); wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_UMR | (MLX5_OPCODE_MOD_UMR_TLS_TIS_STATIC_PARAMS << 24)); wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); wqe->ctrl.imm = cpu_to_be32(ptag->tisn << 8); if (mlx5e_do_send_cqe(sq)) wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE | MLX5_FENCE_MODE_INITIATOR_SMALL; else wqe->ctrl.fm_ce_se = MLX5_FENCE_MODE_INITIATOR_SMALL; /* fill out UMR control segment */ wqe->umr.flags = 0x80; /* inline data */ wqe->umr.bsf_octowords = cpu_to_be16(MLX5_FLD_SZ_BYTES(sw_tls_cntx, param) / 16); /* copy in the static crypto parameters */ memcpy(wqe + 1, MLX5_ADDR_OF(sw_tls_cntx, ptag->crypto_params, param), MLX5_FLD_SZ_BYTES(sw_tls_cntx, param)); /* copy data for doorbell */ memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); sq->mbuf[pi].mbuf = NULL; sq->mbuf[pi].num_bytes = 0; sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->mbuf[pi].p_refcount = &ptag->refs; atomic_add_int(&ptag->refs, 1); sq->pc += sq->mbuf[pi].num_wqebbs; } CTASSERT(MLX5_FLD_SZ_BYTES(sw_tls_cntx, progress) == sizeof(((struct mlx5e_tx_psv_wqe *)0)->psv)); static void mlx5e_tls_send_progress_parameters(struct mlx5e_sq *sq, struct mlx5e_tls_tag *ptag) { const u32 ds_cnt = DIV_ROUND_UP(sizeof(struct mlx5e_tx_psv_wqe), MLX5_SEND_WQE_DS); struct mlx5e_tx_psv_wqe *wqe; u16 pi; pi = sq->pc & sq->wq.sz_m1; wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); memset(wqe, 0, sizeof(*wqe)); wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SET_PSV | (MLX5_OPCODE_MOD_PSV_TLS_TIS_PROGRESS_PARAMS << 24)); wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); if (mlx5e_do_send_cqe(sq)) wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; /* copy in the PSV control segment */ memcpy(&wqe->psv, MLX5_ADDR_OF(sw_tls_cntx, ptag->crypto_params, progress), sizeof(wqe->psv)); /* copy data for doorbell */ memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); sq->mbuf[pi].mbuf = NULL; sq->mbuf[pi].num_bytes = 0; sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->mbuf[pi].p_refcount = &ptag->refs; atomic_add_int(&ptag->refs, 1); sq->pc += sq->mbuf[pi].num_wqebbs; } static void mlx5e_tls_send_nop(struct mlx5e_sq *sq, struct mlx5e_tls_tag *ptag) { const u32 ds_cnt = MLX5_SEND_WQEBB_NUM_DS; struct mlx5e_tx_wqe *wqe; u16 pi; pi = sq->pc & sq->wq.sz_m1; wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); if (mlx5e_do_send_cqe(sq)) wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE | MLX5_FENCE_MODE_INITIATOR_SMALL; else wqe->ctrl.fm_ce_se = MLX5_FENCE_MODE_INITIATOR_SMALL; /* Copy data for doorbell */ memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); sq->mbuf[pi].mbuf = NULL; sq->mbuf[pi].num_bytes = 0; sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->mbuf[pi].p_refcount = &ptag->refs; atomic_add_int(&ptag->refs, 1); sq->pc += sq->mbuf[pi].num_wqebbs; } #define SBTLS_MBUF_NO_DATA ((struct mbuf *)1) static struct mbuf * sbtls_recover_record(struct mbuf *mb, int wait, uint32_t tcp_old, uint32_t *ptcp_seq, bool *pis_start) { struct mbuf *mr, *top; uint32_t offset; uint32_t delta; /* check format of incoming mbuf */ if (mb->m_next == NULL || (mb->m_next->m_flags & (M_EXTPG | M_EXT)) != (M_EXTPG | M_EXT)) { top = NULL; goto done; } /* get unmapped data offset */ offset = mtod(mb->m_next, uintptr_t); /* check if we don't need to re-transmit anything */ if (offset == 0) { top = SBTLS_MBUF_NO_DATA; *pis_start = true; goto done; } /* try to get a new packet header */ top = m_gethdr(wait, MT_DATA); if (top == NULL) goto done; mr = m_get(wait, MT_DATA); if (mr == NULL) { m_free(top); top = NULL; goto done; } top->m_next = mr; mb_dupcl(mr, mb->m_next); /* the beginning of the TLS record */ mr->m_data = NULL; /* setup packet header length */ top->m_pkthdr.len = mr->m_len = offset; top->m_len = 0; /* check for partial re-transmit */ delta = *ptcp_seq - tcp_old; if (delta < offset) { m_adj(top, offset - delta); offset = delta; /* continue where we left off */ *pis_start = false; } else { *pis_start = true; } /* * Rewind the TCP sequence number by the amount of data * retransmitted: */ *ptcp_seq -= offset; done: return (top); } static int mlx5e_sq_tls_populate(struct mbuf *mb, uint64_t *pseq) { for (; mb != NULL; mb = mb->m_next) { if (!(mb->m_flags & M_EXTPG)) continue; *pseq = mb->m_epg_seqno; return (1); } return (0); } int mlx5e_sq_tls_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf **ppmb) { struct mlx5e_tls_tag *ptls_tag; struct m_snd_tag *ptag; const struct tcphdr *th; struct mbuf *mb = *ppmb; u64 rcd_sn; u32 header_size; u32 mb_seq; if ((mb->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0) return (MLX5E_TLS_CONTINUE); ptag = mb->m_pkthdr.snd_tag; if ( #ifdef RATELIMIT ptag->sw->type != IF_SND_TAG_TYPE_TLS_RATE_LIMIT && #endif ptag->sw->type != IF_SND_TAG_TYPE_TLS) return (MLX5E_TLS_CONTINUE); ptls_tag = container_of(ptag, struct mlx5e_tls_tag, tag); header_size = mlx5e_get_full_header_size(mb, &th); if (unlikely(header_size == 0 || th == NULL)) return (MLX5E_TLS_FAILURE); /* * Send non-TLS TCP packets AS-IS: */ if (header_size == mb->m_pkthdr.len || mlx5e_sq_tls_populate(mb, &rcd_sn) == 0) { parg->tisn = 0; parg->ihs = header_size; return (MLX5E_TLS_CONTINUE); } mb_seq = ntohl(th->th_seq); MLX5E_TLS_TAG_LOCK(ptls_tag); switch (ptls_tag->state) { case MLX5E_TLS_ST_INIT: MLX5E_TLS_TAG_UNLOCK(ptls_tag); return (MLX5E_TLS_FAILURE); case MLX5E_TLS_ST_SETUP: ptls_tag->state = MLX5E_TLS_ST_TXRDY; ptls_tag->expected_seq = ~mb_seq; /* force setup */ default: MLX5E_TLS_TAG_UNLOCK(ptls_tag); break; } if (unlikely(ptls_tag->expected_seq != mb_seq)) { bool is_start; struct mbuf *r_mb; uint32_t tcp_seq = mb_seq; r_mb = sbtls_recover_record(mb, M_NOWAIT, ptls_tag->expected_seq, &tcp_seq, &is_start); if (r_mb == NULL) { MLX5E_TLS_STAT_INC(ptls_tag, tx_error, 1); return (MLX5E_TLS_FAILURE); } MLX5E_TLS_STAT_INC(ptls_tag, tx_packets_ooo, 1); /* check if this is the first fragment of a TLS record */ if (is_start) { /* setup TLS static parameters */ MLX5_SET64(sw_tls_cntx, ptls_tag->crypto_params, param.initial_record_number, rcd_sn); /* * NOTE: The sendqueue should have enough room to * carry both the static and the progress parameters * when we get here! */ mlx5e_tls_send_static_parameters(sq, ptls_tag); mlx5e_tls_send_progress_parameters(sq, ptls_tag); if (r_mb == SBTLS_MBUF_NO_DATA) { mlx5e_tls_send_nop(sq, ptls_tag); ptls_tag->expected_seq = mb_seq; return (MLX5E_TLS_LOOP); } } MLX5E_TLS_STAT_INC(ptls_tag, tx_bytes_ooo, r_mb->m_pkthdr.len); /* setup transmit arguments */ parg->tisn = ptls_tag->tisn; parg->pref = &ptls_tag->refs; /* try to send DUMP data */ if (mlx5e_sq_dump_xmit(sq, parg, &r_mb) != 0) { m_freem(r_mb); ptls_tag->expected_seq = tcp_seq; return (MLX5E_TLS_FAILURE); } else { ptls_tag->expected_seq = mb_seq; return (MLX5E_TLS_LOOP); } } else { MLX5E_TLS_STAT_INC(ptls_tag, tx_packets, 1); MLX5E_TLS_STAT_INC(ptls_tag, tx_bytes, mb->m_pkthdr.len); } ptls_tag->expected_seq += mb->m_pkthdr.len - header_size; parg->tisn = ptls_tag->tisn; parg->ihs = header_size; parg->pref = &ptls_tag->refs; return (MLX5E_TLS_CONTINUE); } #else int mlx5e_tls_init(struct mlx5e_priv *priv) { return (0); } void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { /* NOP */ } #endif /* KERN_TLS */ diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c index 92aae3180b5e..e8d9d26efc6f 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -1,4845 +1,4845 @@ /*- * Copyright (c) 2015-2021 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_kern_tls.h" #include "opt_rss.h" #include "opt_ratelimit.h" -#include "en.h" +#include #include #include #include #include static int mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs); static if_snd_tag_query_t mlx5e_ul_snd_tag_query; static if_snd_tag_free_t mlx5e_ul_snd_tag_free; struct mlx5e_channel_param { struct mlx5e_rq_param rq; struct mlx5e_sq_param sq; struct mlx5e_cq_param rx_cq; struct mlx5e_cq_param tx_cq; }; struct media { u32 subtype; u64 baudrate; }; static const struct media mlx5e_mode_table[MLX5E_LINK_SPEEDS_NUMBER] = { [MLX5E_1000BASE_CX_SGMII] = { .subtype = IFM_1000_CX_SGMII, .baudrate = IF_Mbps(1000ULL), }, [MLX5E_1000BASE_KX] = { .subtype = IFM_1000_KX, .baudrate = IF_Mbps(1000ULL), }, [MLX5E_10GBASE_CX4] = { .subtype = IFM_10G_CX4, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_KX4] = { .subtype = IFM_10G_KX4, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_KR] = { .subtype = IFM_10G_KR, .baudrate = IF_Gbps(10ULL), }, [MLX5E_20GBASE_KR2] = { .subtype = IFM_20G_KR2, .baudrate = IF_Gbps(20ULL), }, [MLX5E_40GBASE_CR4] = { .subtype = IFM_40G_CR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_40GBASE_KR4] = { .subtype = IFM_40G_KR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_56GBASE_R4] = { .subtype = IFM_56G_R4, .baudrate = IF_Gbps(56ULL), }, [MLX5E_10GBASE_CR] = { .subtype = IFM_10G_CR1, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_SR] = { .subtype = IFM_10G_SR, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_ER_LR] = { .subtype = IFM_10G_ER, .baudrate = IF_Gbps(10ULL), }, [MLX5E_40GBASE_SR4] = { .subtype = IFM_40G_SR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_40GBASE_LR4_ER4] = { .subtype = IFM_40G_LR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_100GBASE_CR4] = { .subtype = IFM_100G_CR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GBASE_SR4] = { .subtype = IFM_100G_SR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GBASE_KR4] = { .subtype = IFM_100G_KR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GBASE_LR4] = { .subtype = IFM_100G_LR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100BASE_TX] = { .subtype = IFM_100_TX, .baudrate = IF_Mbps(100ULL), }, [MLX5E_1000BASE_T] = { .subtype = IFM_1000_T, .baudrate = IF_Mbps(1000ULL), }, [MLX5E_10GBASE_T] = { .subtype = IFM_10G_T, .baudrate = IF_Gbps(10ULL), }, [MLX5E_25GBASE_CR] = { .subtype = IFM_25G_CR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GBASE_KR] = { .subtype = IFM_25G_KR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GBASE_SR] = { .subtype = IFM_25G_SR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_50GBASE_CR2] = { .subtype = IFM_50G_CR2, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GBASE_KR2] = { .subtype = IFM_50G_KR2, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GBASE_KR4] = { .subtype = IFM_50G_KR4, .baudrate = IF_Gbps(50ULL), }, }; static const struct media mlx5e_ext_mode_table[MLX5E_EXT_LINK_SPEEDS_NUMBER][MLX5E_CABLE_TYPE_NUMBER] = { /**/ [MLX5E_SGMII_100M][MLX5E_CABLE_TYPE_UNKNOWN] = { .subtype = IFM_100_SGMII, .baudrate = IF_Mbps(100), }, /**/ [MLX5E_1000BASE_X_SGMII][MLX5E_CABLE_TYPE_UNKNOWN] = { .subtype = IFM_1000_CX, .baudrate = IF_Mbps(1000), }, [MLX5E_1000BASE_X_SGMII][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = { .subtype = IFM_1000_SX, .baudrate = IF_Mbps(1000), }, /**/ [MLX5E_5GBASE_R][MLX5E_CABLE_TYPE_UNKNOWN] = { .subtype = IFM_5000_KR, .baudrate = IF_Mbps(5000), }, [MLX5E_5GBASE_R][MLX5E_CABLE_TYPE_TWISTED_PAIR] = { .subtype = IFM_5000_T, .baudrate = IF_Mbps(5000), }, /**/ [MLX5E_10GBASE_XFI_XAUI_1][MLX5E_CABLE_TYPE_UNKNOWN] = { .subtype = IFM_10G_KR, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_XFI_XAUI_1][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = { .subtype = IFM_10G_CR1, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_XFI_XAUI_1][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = { .subtype = IFM_10G_SR, .baudrate = IF_Gbps(10ULL), }, /**/ [MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_CABLE_TYPE_UNKNOWN] = { .subtype = IFM_40G_KR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = { .subtype = IFM_40G_CR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_40GBASE_XLAUI_4_XLPPI_4][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = { .subtype = IFM_40G_SR4, .baudrate = IF_Gbps(40ULL), }, /**/ [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_CABLE_TYPE_UNKNOWN] = { .subtype = IFM_25G_KR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = { .subtype = IFM_25G_CR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = { .subtype = IFM_25G_SR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GAUI_1_25GBASE_CR_KR][MLX5E_CABLE_TYPE_TWISTED_PAIR] = { .subtype = IFM_25G_T, .baudrate = IF_Gbps(25ULL), }, /**/ [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_CABLE_TYPE_UNKNOWN] = { .subtype = IFM_50G_KR2, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = { .subtype = IFM_50G_CR2, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = { .subtype = IFM_50G_SR2, .baudrate = IF_Gbps(50ULL), }, /**/ [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_CABLE_TYPE_UNKNOWN] = { .subtype = IFM_50G_KR_PAM4, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = { .subtype = IFM_50G_CP, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = { .subtype = IFM_50G_SR, .baudrate = IF_Gbps(50ULL), }, /**/ [MLX5E_CAUI_4_100GBASE_CR4_KR4][MLX5E_CABLE_TYPE_UNKNOWN] = { .subtype = IFM_100G_KR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_CAUI_4_100GBASE_CR4_KR4][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = { .subtype = IFM_100G_CR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_CAUI_4_100GBASE_CR4_KR4][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = { .subtype = IFM_100G_SR4, .baudrate = IF_Gbps(100ULL), }, /**/ [MLX5E_100GAUI_1_100GBASE_CR_KR][MLX5E_CABLE_TYPE_UNKNOWN] = { .subtype = IFM_100G_KR_PAM4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GAUI_1_100GBASE_CR_KR][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = { .subtype = IFM_100G_CR_PAM4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GAUI_1_100GBASE_CR_KR][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = { .subtype = IFM_100G_SR2, /* XXX */ .baudrate = IF_Gbps(100ULL), }, /**/ [MLX5E_100GAUI_2_100GBASE_CR2_KR2][MLX5E_CABLE_TYPE_UNKNOWN] = { .subtype = IFM_100G_KR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GAUI_2_100GBASE_CR2_KR2][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = { .subtype = IFM_100G_CP2, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GAUI_2_100GBASE_CR2_KR2][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = { .subtype = IFM_100G_SR2, .baudrate = IF_Gbps(100ULL), }, /**/ [MLX5E_200GAUI_2_200GBASE_CR2_KR2][MLX5E_CABLE_TYPE_UNKNOWN] = { .subtype = IFM_200G_KR4_PAM4, /* XXX */ .baudrate = IF_Gbps(200ULL), }, [MLX5E_200GAUI_2_200GBASE_CR2_KR2][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = { .subtype = IFM_200G_CR4_PAM4, /* XXX */ .baudrate = IF_Gbps(200ULL), }, [MLX5E_200GAUI_2_200GBASE_CR2_KR2][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = { .subtype = IFM_200G_SR4, /* XXX */ .baudrate = IF_Gbps(200ULL), }, /**/ [MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_CABLE_TYPE_UNKNOWN] = { .subtype = IFM_200G_KR4_PAM4, .baudrate = IF_Gbps(200ULL), }, [MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_CABLE_TYPE_PASSIVE_COPPER] = { .subtype = IFM_200G_CR4_PAM4, .baudrate = IF_Gbps(200ULL), }, [MLX5E_200GAUI_4_200GBASE_CR4_KR4][MLX5E_CABLE_TYPE_OPTICAL_MODULE] = { .subtype = IFM_200G_SR4, .baudrate = IF_Gbps(200ULL), }, /**/ [MLX5E_400GAUI_8][MLX5E_CABLE_TYPE_UNKNOWN] = { .subtype = IFM_400G_LR8, /* XXX */ .baudrate = IF_Gbps(400ULL), }, /**/ [MLX5E_400GAUI_4_400GBASE_CR4_KR4][MLX5E_CABLE_TYPE_UNKNOWN] = { .subtype = IFM_400G_LR8, /* XXX */ .baudrate = IF_Gbps(400ULL), }, }; static const struct if_snd_tag_sw mlx5e_ul_snd_tag_sw = { .snd_tag_query = mlx5e_ul_snd_tag_query, .snd_tag_free = mlx5e_ul_snd_tag_free, .type = IF_SND_TAG_TYPE_UNLIMITED }; DEBUGNET_DEFINE(mlx5_en); MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet"); static void mlx5e_update_carrier(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; u32 out[MLX5_ST_SZ_DW(ptys_reg)]; u32 eth_proto_oper; int error; u8 i; u8 cable_type; u8 port_state; u8 is_er_type; bool ext; struct media media_entry = {}; port_state = mlx5_query_vport_state(mdev, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0); if (port_state == VPORT_STATE_UP) { priv->media_status_last |= IFM_ACTIVE; } else { priv->media_status_last &= ~IFM_ACTIVE; priv->media_active_last = IFM_ETHER; if_link_state_change(priv->ifp, LINK_STATE_DOWN); return; } error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); if (error) { priv->media_active_last = IFM_ETHER; priv->ifp->if_baudrate = 1; mlx5_en_err(priv->ifp, "query port ptys failed: 0x%x\n", error); return; } ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); eth_proto_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); i = ilog2(eth_proto_oper); if (ext) { error = mlx5_query_pddr_cable_type(mdev, 1, &cable_type); if (error != 0) { /* use fallback entry */ media_entry = mlx5e_ext_mode_table[i][MLX5E_CABLE_TYPE_UNKNOWN]; mlx5_en_err(priv->ifp, "query port pddr failed: %d\n", error); } else { media_entry = mlx5e_ext_mode_table[i][cable_type]; /* check if we should use fallback entry */ if (media_entry.subtype == 0) media_entry = mlx5e_ext_mode_table[i][MLX5E_CABLE_TYPE_UNKNOWN]; } } else { media_entry = mlx5e_mode_table[i]; } if (media_entry.subtype == 0) { mlx5_en_err(priv->ifp, "Could not find operational media subtype\n"); return; } switch (media_entry.subtype) { case IFM_10G_ER: error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type); if (error != 0) { mlx5_en_err(priv->ifp, "query port pddr failed: %d\n", error); } if (error != 0 || is_er_type == 0) media_entry.subtype = IFM_10G_LR; break; case IFM_40G_LR4: error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type); if (error != 0) { mlx5_en_err(priv->ifp, "query port pddr failed: %d\n", error); } if (error == 0 && is_er_type != 0) media_entry.subtype = IFM_40G_ER4; break; } priv->media_active_last = media_entry.subtype | IFM_ETHER | IFM_FDX; priv->ifp->if_baudrate = media_entry.baudrate; if_link_state_change(priv->ifp, LINK_STATE_UP); } static void mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr) { struct mlx5e_priv *priv = dev->if_softc; ifmr->ifm_status = priv->media_status_last; ifmr->ifm_current = ifmr->ifm_active = priv->media_active_last | (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) | (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0); } static u32 mlx5e_find_link_mode(u32 subtype, bool ext) { u32 link_mode = 0; switch (subtype) { case 0: goto done; case IFM_10G_LR: subtype = IFM_10G_ER; break; case IFM_40G_ER4: subtype = IFM_40G_LR4; break; default: break; } if (ext) { for (unsigned i = 0; i != MLX5E_EXT_LINK_SPEEDS_NUMBER; i++) { for (unsigned j = 0; j != MLX5E_CABLE_TYPE_NUMBER; j++) { if (mlx5e_ext_mode_table[i][j].subtype == subtype) link_mode |= MLX5E_PROT_MASK(i); } } } else { for (unsigned i = 0; i != MLX5E_LINK_SPEEDS_NUMBER; i++) { if (mlx5e_mode_table[i].subtype == subtype) link_mode |= MLX5E_PROT_MASK(i); } } done: return (link_mode); } static int mlx5e_set_port_pause_and_pfc(struct mlx5e_priv *priv) { return (mlx5_set_port_pause_and_pfc(priv->mdev, 1, priv->params.rx_pauseframe_control, priv->params.tx_pauseframe_control, priv->params.rx_priority_flow_control, priv->params.tx_priority_flow_control)); } static int mlx5e_set_port_pfc(struct mlx5e_priv *priv) { int error; if (priv->gone != 0) { error = -ENXIO; } else if (priv->params.rx_pauseframe_control || priv->params.tx_pauseframe_control) { mlx5_en_err(priv->ifp, "Global pauseframes must be disabled before enabling PFC.\n"); error = -EINVAL; } else { error = mlx5e_set_port_pause_and_pfc(priv); } return (error); } static int mlx5e_media_change(struct ifnet *dev) { struct mlx5e_priv *priv = dev->if_softc; struct mlx5_core_dev *mdev = priv->mdev; u32 eth_proto_cap; u32 link_mode; u32 out[MLX5_ST_SZ_DW(ptys_reg)]; int was_opened; int locked; int error; bool ext; locked = PRIV_LOCKED(priv); if (!locked) PRIV_LOCK(priv); if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) { error = EINVAL; goto done; } error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); if (error != 0) { mlx5_en_err(dev, "Query port media capability failed\n"); goto done; } ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media), ext); /* query supported capabilities */ eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_capability); /* check for autoselect */ if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) { link_mode = eth_proto_cap; if (link_mode == 0) { mlx5_en_err(dev, "Port media capability is zero\n"); error = EINVAL; goto done; } } else { link_mode = link_mode & eth_proto_cap; if (link_mode == 0) { mlx5_en_err(dev, "Not supported link mode requested\n"); error = EINVAL; goto done; } } if (priv->media.ifm_media & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) { /* check if PFC is enabled */ if (priv->params.rx_priority_flow_control || priv->params.tx_priority_flow_control) { mlx5_en_err(dev, "PFC must be disabled before enabling global pauseframes.\n"); error = EINVAL; goto done; } } /* update pauseframe control bits */ priv->params.rx_pauseframe_control = (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0; priv->params.tx_pauseframe_control = (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0; /* check if device is opened */ was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); /* reconfigure the hardware */ mlx5_set_port_status(mdev, MLX5_PORT_DOWN); mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN, ext); error = -mlx5e_set_port_pause_and_pfc(priv); if (was_opened) mlx5_set_port_status(mdev, MLX5_PORT_UP); done: if (!locked) PRIV_UNLOCK(priv); return (error); } static void mlx5e_update_carrier_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, update_carrier_work); PRIV_LOCK(priv); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_update_carrier(priv); PRIV_UNLOCK(priv); } #define MLX5E_PCIE_PERF_GET_64(a,b,c,d,e,f) \ s_debug->c = MLX5_GET64(mpcnt_reg, out, counter_set.f.c); #define MLX5E_PCIE_PERF_GET_32(a,b,c,d,e,f) \ s_debug->c = MLX5_GET(mpcnt_reg, out, counter_set.f.c); static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug; const unsigned sz = MLX5_ST_SZ_BYTES(mpcnt_reg); void *out; void *in; int err; /* allocate firmware request structures */ in = mlx5_vzalloc(sz); out = mlx5_vzalloc(sz); if (in == NULL || out == NULL) goto free_out; MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP); err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); if (err != 0) goto free_out; MLX5E_PCIE_PERFORMANCE_COUNTERS_64(MLX5E_PCIE_PERF_GET_64) MLX5E_PCIE_PERFORMANCE_COUNTERS_32(MLX5E_PCIE_PERF_GET_32) MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP); err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); if (err != 0) goto free_out; MLX5E_PCIE_TIMERS_AND_STATES_COUNTERS_32(MLX5E_PCIE_PERF_GET_32) MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_LANE_COUNTERS_GROUP); err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); if (err != 0) goto free_out; MLX5E_PCIE_LANE_COUNTERS_32(MLX5E_PCIE_PERF_GET_32) free_out: /* free firmware request structures */ kvfree(in); kvfree(out); } /* * This function reads the physical port counters from the firmware * using a pre-defined layout defined by various MLX5E_PPORT_XXX() * macros. The output is converted from big-endian 64-bit values into * host endian ones and stored in the "priv->stats.pport" structure. */ static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_pport_stats *s = &priv->stats.pport; struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug; u32 *in; u32 *out; const u64 *ptr; unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg); unsigned x; unsigned y; unsigned z; /* allocate firmware request structures */ in = mlx5_vzalloc(sz); out = mlx5_vzalloc(sz); if (in == NULL || out == NULL) goto free_out; /* * Get pointer to the 64-bit counter set which is located at a * fixed offset in the output firmware request structure: */ ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set); MLX5_SET(ppcnt_reg, in, local_port, 1); /* read IEEE802_3 counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0, y = MLX5E_PPORT_PER_PRIO_STATS_NUM; x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++) s->arg[y] = be64toh(ptr[x]); /* read RFC2819 counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++) s->arg[y] = be64toh(ptr[x]); for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM + MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++) s_debug->arg[y] = be64toh(ptr[x]); /* read RFC2863 counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++) s_debug->arg[y] = be64toh(ptr[x]); /* read physical layer stats counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++) s_debug->arg[y] = be64toh(ptr[x]); /* read Extended Ethernet counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0; x != MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG_NUM; x++, y++) s_debug->arg[y] = be64toh(ptr[x]); /* read Extended Statistical Group */ if (MLX5_CAP_GEN(mdev, pcam_reg) && MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group) && MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) { /* read Extended Statistical counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0; x != MLX5E_PPORT_STATISTICAL_DEBUG_NUM; x++, y++) s_debug->arg[y] = be64toh(ptr[x]); } /* read PCIE counters */ mlx5e_update_pcie_counters(priv); /* read per-priority counters */ MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); /* iterate all the priorities */ for (y = z = 0; z != MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO; z++) { MLX5_SET(ppcnt_reg, in, prio_tc, z); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); /* read per priority stats counter group using predefined counter layout */ for (x = 0; x != (MLX5E_PPORT_PER_PRIO_STATS_NUM / MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO); x++, y++) s->arg[y] = be64toh(ptr[x]); } free_out: /* free firmware request structures */ kvfree(in); kvfree(out); } static void mlx5e_grp_vnic_env_update_stats(struct mlx5e_priv *priv) { u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {}; u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard)) return; MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); MLX5_SET(query_vnic_env_in, in, op_mod, 0); MLX5_SET(query_vnic_env_in, in, other_vport, 0); if (mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out)) != 0) return; priv->stats.vport.rx_steer_missed_packets = MLX5_GET64(query_vnic_env_out, out, vport_env.nic_receive_steering_discard); } /* * This function is called regularly to collect all statistics * counters from the firmware. The values can be viewed through the * sysctl interface. Execution is serialized using the priv's global * configuration lock. */ static void mlx5e_update_stats_locked(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_vport_stats *s = &priv->stats.vport; struct mlx5e_sq_stats *sq_stats; #if (__FreeBSD_version < 1100000) struct ifnet *ifp = priv->ifp; #endif u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); u64 tso_packets = 0; u64 tso_bytes = 0; u64 tx_queue_dropped = 0; u64 tx_defragged = 0; u64 tx_offload_none = 0; u64 lro_packets = 0; u64 lro_bytes = 0; u64 sw_lro_queued = 0; u64 sw_lro_flushed = 0; u64 rx_csum_none = 0; u64 rx_wqe_err = 0; u64 rx_packets = 0; u64 rx_bytes = 0; u32 rx_out_of_buffer = 0; int error; int i; int j; out = mlx5_vzalloc(outlen); if (out == NULL) goto free_out; /* Collect firts the SW counters and then HW for consistency */ for (i = 0; i < priv->params.num_channels; i++) { struct mlx5e_channel *pch = priv->channel + i; struct mlx5e_rq *rq = &pch->rq; struct mlx5e_rq_stats *rq_stats = &pch->rq.stats; /* collect stats from LRO */ rq_stats->sw_lro_queued = rq->lro.lro_queued; rq_stats->sw_lro_flushed = rq->lro.lro_flushed; sw_lro_queued += rq_stats->sw_lro_queued; sw_lro_flushed += rq_stats->sw_lro_flushed; lro_packets += rq_stats->lro_packets; lro_bytes += rq_stats->lro_bytes; rx_csum_none += rq_stats->csum_none; rx_wqe_err += rq_stats->wqe_err; rx_packets += rq_stats->packets; rx_bytes += rq_stats->bytes; for (j = 0; j < priv->num_tc; j++) { sq_stats = &pch->sq[j].stats; tso_packets += sq_stats->tso_packets; tso_bytes += sq_stats->tso_bytes; tx_queue_dropped += sq_stats->dropped; tx_queue_dropped += sq_stats->enobuf; tx_defragged += sq_stats->defragged; tx_offload_none += sq_stats->csum_offload_none; } } #ifdef RATELIMIT /* Collect statistics from all rate-limit queues */ for (j = 0; j < priv->rl.param.tx_worker_threads_def; j++) { struct mlx5e_rl_worker *rlw = priv->rl.workers + j; for (i = 0; i < priv->rl.param.tx_channels_per_worker_def; i++) { struct mlx5e_rl_channel *channel = rlw->channels + i; struct mlx5e_sq *sq = channel->sq; if (sq == NULL) continue; sq_stats = &sq->stats; tso_packets += sq_stats->tso_packets; tso_bytes += sq_stats->tso_bytes; tx_queue_dropped += sq_stats->dropped; tx_queue_dropped += sq_stats->enobuf; tx_defragged += sq_stats->defragged; tx_offload_none += sq_stats->csum_offload_none; } } #endif /* update counters */ s->tso_packets = tso_packets; s->tso_bytes = tso_bytes; s->tx_queue_dropped = tx_queue_dropped; s->tx_defragged = tx_defragged; s->lro_packets = lro_packets; s->lro_bytes = lro_bytes; s->sw_lro_queued = sw_lro_queued; s->sw_lro_flushed = sw_lro_flushed; s->rx_csum_none = rx_csum_none; s->rx_wqe_err = rx_wqe_err; s->rx_packets = rx_packets; s->rx_bytes = rx_bytes; mlx5e_grp_vnic_env_update_stats(priv); /* HW counters */ memset(in, 0, sizeof(in)); MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER); MLX5_SET(query_vport_counter_in, in, op_mod, 0); MLX5_SET(query_vport_counter_in, in, other_vport, 0); memset(out, 0, outlen); /* get number of out-of-buffer drops first */ if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0 && mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id, &rx_out_of_buffer) == 0) { s->rx_out_of_buffer = rx_out_of_buffer; } /* get port statistics */ if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen) == 0) { #define MLX5_GET_CTR(out, x) \ MLX5_GET64(query_vport_counter_out, out, x) s->rx_error_packets = MLX5_GET_CTR(out, received_errors.packets); s->rx_error_bytes = MLX5_GET_CTR(out, received_errors.octets); s->tx_error_packets = MLX5_GET_CTR(out, transmit_errors.packets); s->tx_error_bytes = MLX5_GET_CTR(out, transmit_errors.octets); s->rx_unicast_packets = MLX5_GET_CTR(out, received_eth_unicast.packets); s->rx_unicast_bytes = MLX5_GET_CTR(out, received_eth_unicast.octets); s->tx_unicast_packets = MLX5_GET_CTR(out, transmitted_eth_unicast.packets); s->tx_unicast_bytes = MLX5_GET_CTR(out, transmitted_eth_unicast.octets); s->rx_multicast_packets = MLX5_GET_CTR(out, received_eth_multicast.packets); s->rx_multicast_bytes = MLX5_GET_CTR(out, received_eth_multicast.octets); s->tx_multicast_packets = MLX5_GET_CTR(out, transmitted_eth_multicast.packets); s->tx_multicast_bytes = MLX5_GET_CTR(out, transmitted_eth_multicast.octets); s->rx_broadcast_packets = MLX5_GET_CTR(out, received_eth_broadcast.packets); s->rx_broadcast_bytes = MLX5_GET_CTR(out, received_eth_broadcast.octets); s->tx_broadcast_packets = MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); s->tx_broadcast_bytes = MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); s->tx_packets = s->tx_unicast_packets + s->tx_multicast_packets + s->tx_broadcast_packets; s->tx_bytes = s->tx_unicast_bytes + s->tx_multicast_bytes + s->tx_broadcast_bytes; /* Update calculated offload counters */ s->tx_csum_offload = s->tx_packets - tx_offload_none; s->rx_csum_good = s->rx_packets - s->rx_csum_none; } /* Get physical port counters */ mlx5e_update_pport_counters(priv); s->tx_jumbo_packets = priv->stats.port_stats_debug.tx_stat_p1519to2047octets + priv->stats.port_stats_debug.tx_stat_p2048to4095octets + priv->stats.port_stats_debug.tx_stat_p4096to8191octets + priv->stats.port_stats_debug.tx_stat_p8192to10239octets; #if (__FreeBSD_version < 1100000) /* no get_counters interface in fbsd 10 */ ifp->if_ipackets = s->rx_packets; ifp->if_ierrors = priv->stats.pport.in_range_len_errors + priv->stats.pport.out_of_range_len + priv->stats.pport.too_long_errors + priv->stats.pport.check_seq_err + priv->stats.pport.alignment_err; ifp->if_iqdrops = s->rx_out_of_buffer; ifp->if_opackets = s->tx_packets; ifp->if_oerrors = priv->stats.port_stats_debug.out_discards; ifp->if_snd.ifq_drops = s->tx_queue_dropped; ifp->if_ibytes = s->rx_bytes; ifp->if_obytes = s->tx_bytes; ifp->if_collisions = priv->stats.pport.collisions; #endif free_out: kvfree(out); /* Update diagnostics, if any */ if (priv->params_ethtool.diag_pci_enable || priv->params_ethtool.diag_general_enable) { error = mlx5_core_get_diagnostics_full(mdev, priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL, priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL); if (error != 0) mlx5_en_err(priv->ifp, "Failed reading diagnostics: %d\n", error); } /* Update FEC, if any */ error = mlx5e_fec_update(priv); if (error != 0 && error != EOPNOTSUPP) { mlx5_en_err(priv->ifp, "Updating FEC failed: %d\n", error); } /* Update temperature, if any */ if (priv->params_ethtool.hw_num_temp != 0) { error = mlx5e_hw_temperature_update(priv); if (error != 0 && error != EOPNOTSUPP) { mlx5_en_err(priv->ifp, "Updating temperature failed: %d\n", error); } } } static void mlx5e_update_stats_work(struct work_struct *work) { struct mlx5e_priv *priv; priv = container_of(work, struct mlx5e_priv, update_stats_work); PRIV_LOCK(priv); if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0 && !test_bit(MLX5_INTERFACE_STATE_TEARDOWN, &priv->mdev->intf_state)) mlx5e_update_stats_locked(priv); PRIV_UNLOCK(priv); } static void mlx5e_update_stats(void *arg) { struct mlx5e_priv *priv = arg; queue_work(priv->wq, &priv->update_stats_work); callout_reset(&priv->watchdog, hz / 4, &mlx5e_update_stats, priv); } static void mlx5e_async_event_sub(struct mlx5e_priv *priv, enum mlx5_dev_event event) { switch (event) { case MLX5_DEV_EVENT_PORT_UP: case MLX5_DEV_EVENT_PORT_DOWN: queue_work(priv->wq, &priv->update_carrier_work); break; default: break; } } static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, enum mlx5_dev_event event, unsigned long param) { struct mlx5e_priv *priv = vpriv; mtx_lock(&priv->async_events_mtx); if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state)) mlx5e_async_event_sub(priv, event); mtx_unlock(&priv->async_events_mtx); } static void mlx5e_enable_async_events(struct mlx5e_priv *priv) { set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); } static void mlx5e_disable_async_events(struct mlx5e_priv *priv) { mtx_lock(&priv->async_events_mtx); clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); mtx_unlock(&priv->async_events_mtx); } static void mlx5e_calibration_callout(void *arg); static int mlx5e_calibration_duration = 20; static int mlx5e_fast_calibration = 1; static int mlx5e_normal_calibration = 30; static SYSCTL_NODE(_hw_mlx5, OID_AUTO, calibr, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "MLX5 timestamp calibration parameters"); SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, duration, CTLFLAG_RWTUN, &mlx5e_calibration_duration, 0, "Duration of initial calibration"); SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, fast, CTLFLAG_RWTUN, &mlx5e_fast_calibration, 0, "Recalibration interval during initial calibration"); SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, normal, CTLFLAG_RWTUN, &mlx5e_normal_calibration, 0, "Recalibration interval during normal operations"); /* * Ignites the calibration process. */ static void mlx5e_reset_calibration_callout(struct mlx5e_priv *priv) { if (priv->clbr_done == 0) mlx5e_calibration_callout(priv); else callout_reset_sbt_curcpu(&priv->tstmp_clbr, (priv->clbr_done < mlx5e_calibration_duration ? mlx5e_fast_calibration : mlx5e_normal_calibration) * SBT_1S, 0, mlx5e_calibration_callout, priv, C_DIRECT_EXEC); } static uint64_t mlx5e_timespec2usec(const struct timespec *ts) { return ((uint64_t)ts->tv_sec * 1000000000 + ts->tv_nsec); } static uint64_t mlx5e_hw_clock(struct mlx5e_priv *priv) { struct mlx5_init_seg *iseg; uint32_t hw_h, hw_h1, hw_l; iseg = priv->mdev->iseg; do { hw_h = ioread32be(&iseg->internal_timer_h); hw_l = ioread32be(&iseg->internal_timer_l); hw_h1 = ioread32be(&iseg->internal_timer_h); } while (hw_h1 != hw_h); return (((uint64_t)hw_h << 32) | hw_l); } /* * The calibration callout, it runs either in the context of the * thread which enables calibration, or in callout. It takes the * snapshot of system and adapter clocks, then advances the pointers to * the calibration point to allow rx path to read the consistent data * lockless. */ static void mlx5e_calibration_callout(void *arg) { struct mlx5e_priv *priv; struct mlx5e_clbr_point *next, *curr; struct timespec ts; int clbr_curr_next; priv = arg; curr = &priv->clbr_points[priv->clbr_curr]; clbr_curr_next = priv->clbr_curr + 1; if (clbr_curr_next >= nitems(priv->clbr_points)) clbr_curr_next = 0; next = &priv->clbr_points[clbr_curr_next]; next->base_prev = curr->base_curr; next->clbr_hw_prev = curr->clbr_hw_curr; next->clbr_hw_curr = mlx5e_hw_clock(priv); if (((next->clbr_hw_curr - curr->clbr_hw_curr) >> MLX5E_TSTMP_PREC) == 0) { if (priv->clbr_done != 0) { mlx5_en_err(priv->ifp, "HW failed tstmp frozen %#jx %#jx, disabling\n", next->clbr_hw_curr, curr->clbr_hw_prev); priv->clbr_done = 0; } atomic_store_rel_int(&curr->clbr_gen, 0); return; } nanouptime(&ts); next->base_curr = mlx5e_timespec2usec(&ts); curr->clbr_gen = 0; atomic_thread_fence_rel(); priv->clbr_curr = clbr_curr_next; atomic_store_rel_int(&next->clbr_gen, ++(priv->clbr_gen)); if (priv->clbr_done < mlx5e_calibration_duration) priv->clbr_done++; mlx5e_reset_calibration_callout(priv); } static const char *mlx5e_rq_stats_desc[] = { MLX5E_RQ_STATS(MLX5E_STATS_DESC) }; static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) { struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; char buffer[16]; void *rqc = param->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); int wq_sz; int err; int i; u32 nsegs, wqe_sz; err = mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs); if (err != 0) goto done; /* Create DMA descriptor TAG */ if ((err = -bus_dma_tag_create( bus_get_dma_tag(mdev->pdev->dev.bsddev), 1, /* any alignment */ 0, /* no boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ nsegs * MLX5E_MAX_RX_BYTES, /* maxsize */ nsegs, /* nsegments */ nsegs * MLX5E_MAX_RX_BYTES, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &rq->dma_tag))) goto done; err = mlx5_wq_ll_create(mdev, ¶m->wq, rqc_wq, &rq->wq, &rq->wq_ctrl); if (err) goto err_free_dma_tag; rq->wq.db = &rq->wq.db[MLX5_RCV_DBR]; err = mlx5e_get_wqe_sz(priv, &rq->wqe_sz, &rq->nsegs); if (err != 0) goto err_rq_wq_destroy; wq_sz = mlx5_wq_ll_get_size(&rq->wq); err = -tcp_lro_init_args(&rq->lro, priv->ifp, TCP_LRO_ENTRIES, wq_sz); if (err) goto err_rq_wq_destroy; rq->mbuf = malloc_domainset(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, mlx5_dev_domainset(mdev), M_WAITOK | M_ZERO); for (i = 0; i != wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); int j; err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map); if (err != 0) { while (i--) bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map); goto err_rq_mbuf_free; } /* set value for constant fields */ for (j = 0; j < rq->nsegs; j++) wqe->data[j].lkey = cpu_to_be32(priv->mr.key); } INIT_WORK(&rq->dim.work, mlx5e_dim_work); if (priv->params.rx_cq_moderation_mode < 2) { rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED; } else { void *cqc = container_of(param, struct mlx5e_channel_param, rq)->rx_cq.cqc; switch (MLX5_GET(cqc, cqc, cq_period_mode)) { case MLX5_CQ_PERIOD_MODE_START_FROM_EQE: rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; break; case MLX5_CQ_PERIOD_MODE_START_FROM_CQE: rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE; break; default: rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED; break; } } rq->ifp = priv->ifp; rq->channel = c; rq->ix = c->ix; snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix); mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM, rq->stats.arg); return (0); err_rq_mbuf_free: free(rq->mbuf, M_MLX5EN); tcp_lro_free(&rq->lro); err_rq_wq_destroy: mlx5_wq_destroy(&rq->wq_ctrl); err_free_dma_tag: bus_dma_tag_destroy(rq->dma_tag); done: return (err); } static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { int wq_sz; int i; /* destroy all sysctl nodes */ sysctl_ctx_free(&rq->stats.ctx); /* free leftover LRO packets, if any */ tcp_lro_free(&rq->lro); wq_sz = mlx5_wq_ll_get_size(&rq->wq); for (i = 0; i != wq_sz; i++) { if (rq->mbuf[i].mbuf != NULL) { bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map); m_freem(rq->mbuf[i].mbuf); } bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map); } free(rq->mbuf, M_MLX5EN); mlx5_wq_destroy(&rq->wq_ctrl); bus_dma_tag_destroy(rq->dma_tag); } static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; void *in; void *rqc; void *wq; int inlen; int err; u8 ts_format; inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rq->wq_ctrl.buf.npages; in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); ts_format = mlx5_get_rq_default_ts(mdev); rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); wq = MLX5_ADDR_OF(rqc, rqc, wq); memcpy(rqc, param->rqc, sizeof(param->rqc)); MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); MLX5_SET(rqc, rqc, ts_format, ts_format); MLX5_SET(rqc, rqc, flush_in_error_en, 1); if (priv->counter_set_id >= 0) MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id); MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); mlx5_fill_page_array(&rq->wq_ctrl.buf, (__be64 *) MLX5_ADDR_OF(wq, wq, pas)); err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn); kvfree(in); return (err); } static int mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; void *in; void *rqc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(modify_rq_in); in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); MLX5_SET(modify_rq_in, in, rqn, rq->rqn); MLX5_SET(modify_rq_in, in, rq_state, curr_state); MLX5_SET(rqc, rqc, state, next_state); err = mlx5_core_modify_rq(mdev, in, inlen); kvfree(in); return (err); } static void mlx5e_disable_rq(struct mlx5e_rq *rq) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; mlx5_core_destroy_rq(mdev, rq->rqn); } static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; struct mlx5_wq_ll *wq = &rq->wq; int i; for (i = 0; i < 1000; i++) { if (wq->cur_sz >= priv->params.min_rx_wqes) return (0); msleep(4); } return (-ETIMEDOUT); } static int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) { int err; err = mlx5e_create_rq(c, param, rq); if (err) return (err); err = mlx5e_enable_rq(rq, param); if (err) goto err_destroy_rq; err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err) goto err_disable_rq; c->rq.enabled = 1; return (0); err_disable_rq: mlx5e_disable_rq(rq); err_destroy_rq: mlx5e_destroy_rq(rq); return (err); } static void mlx5e_close_rq(struct mlx5e_rq *rq) { mtx_lock(&rq->mtx); rq->enabled = 0; callout_stop(&rq->watchdog); mtx_unlock(&rq->mtx); mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); } static void mlx5e_close_rq_wait(struct mlx5e_rq *rq) { mlx5e_disable_rq(rq); mlx5e_close_cq(&rq->cq); cancel_work_sync(&rq->dim.work); mlx5e_destroy_rq(rq); } void mlx5e_free_sq_db(struct mlx5e_sq *sq) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); int x; for (x = 0; x != wq_sz; x++) { if (unlikely(sq->mbuf[x].p_refcount != NULL)) { atomic_add_int(sq->mbuf[x].p_refcount, -1); sq->mbuf[x].p_refcount = NULL; } if (sq->mbuf[x].mbuf != NULL) { bus_dmamap_unload(sq->dma_tag, sq->mbuf[x].dma_map); m_freem(sq->mbuf[x].mbuf); } bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map); } free(sq->mbuf, M_MLX5EN); } int mlx5e_alloc_sq_db(struct mlx5e_sq *sq) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); int err; int x; sq->mbuf = malloc_domainset(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, mlx5_dev_domainset(sq->priv->mdev), M_WAITOK | M_ZERO); /* Create DMA descriptor MAPs */ for (x = 0; x != wq_sz; x++) { err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map); if (err != 0) { while (x--) bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map); free(sq->mbuf, M_MLX5EN); return (err); } } return (0); } static const char *mlx5e_sq_stats_desc[] = { MLX5E_SQ_STATS(MLX5E_STATS_DESC) }; void mlx5e_update_sq_inline(struct mlx5e_sq *sq) { sq->max_inline = sq->priv->params.tx_max_inline; sq->min_inline_mode = sq->priv->params.tx_min_inline_mode; /* * Check if trust state is DSCP or if inline mode is NONE which * indicates CX-5 or newer hardware. */ if (sq->priv->params_ethtool.trust_state != MLX5_QPTS_TRUST_PCP || sq->min_inline_mode == MLX5_INLINE_MODE_NONE) { if (MLX5_CAP_ETH(sq->priv->mdev, wqe_vlan_insert)) sq->min_insert_caps = MLX5E_INSERT_VLAN | MLX5E_INSERT_NON_VLAN; else sq->min_insert_caps = MLX5E_INSERT_NON_VLAN; } else { sq->min_insert_caps = 0; } } static void mlx5e_refresh_sq_inline_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c) { int i; for (i = 0; i != priv->num_tc; i++) { mtx_lock(&c->sq[i].lock); mlx5e_update_sq_inline(&c->sq[i]); mtx_unlock(&c->sq[i].lock); } } void mlx5e_refresh_sq_inline(struct mlx5e_priv *priv) { int i; /* check if channels are closed */ if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) return; for (i = 0; i < priv->params.num_channels; i++) mlx5e_refresh_sq_inline_sub(priv, &priv->channel[i]); } static int mlx5e_create_sq(struct mlx5e_channel *c, int tc, struct mlx5e_sq_param *param, struct mlx5e_sq *sq) { struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; char buffer[16]; void *sqc = param->sqc; void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); int err; /* Create DMA descriptor TAG */ if ((err = -bus_dma_tag_create( bus_get_dma_tag(mdev->pdev->dev.bsddev), 1, /* any alignment */ 0, /* no boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */ MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */ MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &sq->dma_tag))) goto done; sq->mkey_be = cpu_to_be32(priv->mr.key); sq->ifp = priv->ifp; sq->priv = priv; sq->tc = tc; err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) goto err_free_dma_tag; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; err = mlx5e_alloc_sq_db(sq); if (err) goto err_sq_wq_destroy; mlx5e_update_sq_inline(sq); snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc); mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM, sq->stats.arg); return (0); err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); err_free_dma_tag: bus_dma_tag_destroy(sq->dma_tag); done: return (err); } static void mlx5e_destroy_sq(struct mlx5e_sq *sq) { /* destroy all sysctl nodes */ sysctl_ctx_free(&sq->stats.ctx); mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); bus_dma_tag_destroy(sq->dma_tag); } int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param, const struct mlx5_sq_bfreg *bfreg, int tis_num) { void *in; void *sqc; void *wq; int inlen; int err; u8 ts_format; inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * sq->wq_ctrl.buf.npages; in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); sq->uar_map = bfreg->map; ts_format = mlx5_get_sq_default_ts(sq->priv->mdev); sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); wq = MLX5_ADDR_OF(sqc, sqc, wq); memcpy(sqc, param->sqc, sizeof(param->sqc)); MLX5_SET(sqc, sqc, tis_num_0, tis_num); MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, ts_format, ts_format); MLX5_SET(sqc, sqc, tis_lst_sz, 1); MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(sqc, sqc, allow_swp, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); MLX5_SET(wq, wq, uar_page, bfreg->index); MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); mlx5_fill_page_array(&sq->wq_ctrl.buf, (__be64 *) MLX5_ADDR_OF(wq, wq, pas)); err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn); kvfree(in); return (err); } int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state) { void *in; void *sqc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(modify_sq_in); in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); MLX5_SET(modify_sq_in, in, sqn, sq->sqn); MLX5_SET(modify_sq_in, in, sq_state, curr_state); MLX5_SET(sqc, sqc, state, next_state); err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen); kvfree(in); return (err); } void mlx5e_disable_sq(struct mlx5e_sq *sq) { mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn); } static int mlx5e_open_sq(struct mlx5e_channel *c, int tc, struct mlx5e_sq_param *param, struct mlx5e_sq *sq) { int err; sq->cev_factor = c->priv->params_ethtool.tx_completion_fact; /* ensure the TX completion event factor is not zero */ if (sq->cev_factor == 0) sq->cev_factor = 1; err = mlx5e_create_sq(c, tc, param, sq); if (err) return (err); err = mlx5e_enable_sq(sq, param, &c->bfreg, c->priv->tisn[tc]); if (err) goto err_destroy_sq; err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); if (err) goto err_disable_sq; WRITE_ONCE(sq->running, 1); return (0); err_disable_sq: mlx5e_disable_sq(sq); err_destroy_sq: mlx5e_destroy_sq(sq); return (err); } static void mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep) { /* fill up remainder with NOPs */ while (sq->cev_counter != 0) { while (!mlx5e_sq_has_room_for(sq, 1)) { if (can_sleep != 0) { mtx_unlock(&sq->lock); msleep(4); mtx_lock(&sq->lock); } else { goto done; } } /* send a single NOP */ mlx5e_send_nop(sq, 1); atomic_thread_fence_rel(); } done: /* Check if we need to write the doorbell */ if (likely(sq->doorbell.d64 != 0)) { mlx5e_tx_notify_hw(sq, sq->doorbell.d32); sq->doorbell.d64 = 0; } } void mlx5e_sq_cev_timeout(void *arg) { struct mlx5e_sq *sq = arg; mtx_assert(&sq->lock, MA_OWNED); /* check next state */ switch (sq->cev_next_state) { case MLX5E_CEV_STATE_SEND_NOPS: /* fill TX ring with NOPs, if any */ mlx5e_sq_send_nops_locked(sq, 0); /* check if completed */ if (sq->cev_counter == 0) { sq->cev_next_state = MLX5E_CEV_STATE_INITIAL; return; } break; default: /* send NOPs on next timeout */ sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS; break; } /* restart timer */ callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq); } void mlx5e_drain_sq(struct mlx5e_sq *sq) { int error; struct mlx5_core_dev *mdev= sq->priv->mdev; /* * Check if already stopped. * * NOTE: Serialization of this function is managed by the * caller ensuring the priv's state lock is locked or in case * of rate limit support, a single thread manages drain and * resume of SQs. The "running" variable can therefore safely * be read without any locks. */ if (READ_ONCE(sq->running) == 0) return; /* don't put more packets into the SQ */ WRITE_ONCE(sq->running, 0); /* serialize access to DMA rings */ mtx_lock(&sq->lock); /* teardown event factor timer, if any */ sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS; callout_stop(&sq->cev_callout); /* send dummy NOPs in order to flush the transmit ring */ mlx5e_sq_send_nops_locked(sq, 1); mtx_unlock(&sq->lock); /* wait till SQ is empty or link is down */ mtx_lock(&sq->lock); while (sq->cc != sq->pc && (sq->priv->media_status_last & IFM_ACTIVE) != 0 && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR && pci_channel_offline(mdev->pdev) == 0) { mtx_unlock(&sq->lock); msleep(1); sq->cq.mcq.comp(&sq->cq.mcq, NULL); mtx_lock(&sq->lock); } mtx_unlock(&sq->lock); /* error out remaining requests */ error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); if (error != 0) { mlx5_en_err(sq->ifp, "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error); } /* wait till SQ is empty */ mtx_lock(&sq->lock); while (sq->cc != sq->pc && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR && pci_channel_offline(mdev->pdev) == 0) { mtx_unlock(&sq->lock); msleep(1); sq->cq.mcq.comp(&sq->cq.mcq, NULL); mtx_lock(&sq->lock); } mtx_unlock(&sq->lock); } static void mlx5e_close_sq_wait(struct mlx5e_sq *sq) { mlx5e_drain_sq(sq); mlx5e_disable_sq(sq); mlx5e_destroy_sq(sq); } static int mlx5e_create_cq(struct mlx5e_priv *priv, struct mlx5e_cq_param *param, struct mlx5e_cq *cq, mlx5e_cq_comp_t *comp, int eq_ix) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_core_cq *mcq = &cq->mcq; int eqn_not_used; int irqn; int err; u32 i; err = mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn); if (err) return (err); err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, &cq->wq_ctrl); if (err) return (err); mcq->cqe_sz = 64; mcq->set_ci_db = cq->wq_ctrl.db.db; mcq->arm_db = cq->wq_ctrl.db.db + 1; *mcq->set_ci_db = 0; *mcq->arm_db = 0; mcq->vector = eq_ix; mcq->comp = comp; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); cqe->op_own = 0xf1; } cq->priv = priv; return (0); } static void mlx5e_destroy_cq(struct mlx5e_cq *cq) { mlx5_wq_destroy(&cq->wq_ctrl); } static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix) { struct mlx5_core_cq *mcq = &cq->mcq; u32 out[MLX5_ST_SZ_DW(create_cq_out)]; void *in; void *cqc; int inlen; int irqn_not_used; int eqn; int err; inlen = MLX5_ST_SZ_BYTES(create_cq_in) + sizeof(u64) * cq->wq_ctrl.buf.npages; in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); memcpy(cqc, param->cqc, sizeof(param->cqc)); mlx5_fill_page_array(&cq->wq_ctrl.buf, (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas)); mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used); MLX5_SET(cqc, cqc, c_eqn, eqn); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - PAGE_SHIFT); MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen, out, sizeof(out)); kvfree(in); if (err) return (err); mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock)); return (0); } static void mlx5e_disable_cq(struct mlx5e_cq *cq) { mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq); } int mlx5e_open_cq(struct mlx5e_priv *priv, struct mlx5e_cq_param *param, struct mlx5e_cq *cq, mlx5e_cq_comp_t *comp, int eq_ix) { int err; err = mlx5e_create_cq(priv, param, cq, comp, eq_ix); if (err) return (err); err = mlx5e_enable_cq(cq, param, eq_ix); if (err) goto err_destroy_cq; return (0); err_destroy_cq: mlx5e_destroy_cq(cq); return (err); } void mlx5e_close_cq(struct mlx5e_cq *cq) { mlx5e_disable_cq(cq); mlx5e_destroy_cq(cq); } static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, struct mlx5e_channel_param *cparam) { int err; int tc; for (tc = 0; tc < c->priv->num_tc; tc++) { /* open completion queue */ err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq, &mlx5e_tx_cq_comp, c->ix); if (err) goto err_close_tx_cqs; } return (0); err_close_tx_cqs: for (tc--; tc >= 0; tc--) mlx5e_close_cq(&c->sq[tc].cq); return (err); } static void mlx5e_close_tx_cqs(struct mlx5e_channel *c) { int tc; for (tc = 0; tc < c->priv->num_tc; tc++) mlx5e_close_cq(&c->sq[tc].cq); } static int mlx5e_open_sqs(struct mlx5e_channel *c, struct mlx5e_channel_param *cparam) { int err; int tc; for (tc = 0; tc < c->priv->num_tc; tc++) { err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]); if (err) goto err_close_sqs; } return (0); err_close_sqs: for (tc--; tc >= 0; tc--) mlx5e_close_sq_wait(&c->sq[tc]); return (err); } static void mlx5e_close_sqs_wait(struct mlx5e_channel *c) { int tc; for (tc = 0; tc < c->priv->num_tc; tc++) mlx5e_close_sq_wait(&c->sq[tc]); } static void mlx5e_chan_static_init(struct mlx5e_priv *priv, struct mlx5e_channel *c, int ix) { int tc; /* setup priv and channel number */ c->priv = priv; c->ix = ix; /* setup send tag */ m_snd_tag_init(&c->tag, c->priv->ifp, &mlx5e_ul_snd_tag_sw); init_completion(&c->completion); mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0); for (tc = 0; tc != MLX5E_MAX_TX_NUM_TC; tc++) { struct mlx5e_sq *sq = c->sq + tc; mtx_init(&sq->lock, "mlx5tx", MTX_NETWORK_LOCK " TX", MTX_DEF); mtx_init(&sq->comp_lock, "mlx5comp", MTX_NETWORK_LOCK " TX", MTX_DEF); callout_init_mtx(&sq->cev_callout, &sq->lock, 0); } } static void mlx5e_chan_wait_for_completion(struct mlx5e_channel *c) { m_snd_tag_rele(&c->tag); wait_for_completion(&c->completion); } static void mlx5e_priv_wait_for_completion(struct mlx5e_priv *priv, const uint32_t channels) { uint32_t x; for (x = 0; x != channels; x++) mlx5e_chan_wait_for_completion(&priv->channel[x]); } static void mlx5e_chan_static_destroy(struct mlx5e_channel *c) { int tc; callout_drain(&c->rq.watchdog); mtx_destroy(&c->rq.mtx); for (tc = 0; tc != MLX5E_MAX_TX_NUM_TC; tc++) { callout_drain(&c->sq[tc].cev_callout); mtx_destroy(&c->sq[tc].lock); mtx_destroy(&c->sq[tc].comp_lock); } } static int mlx5e_open_channel(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam, struct mlx5e_channel *c) { struct epoch_tracker et; int i, err; /* zero non-persistant data */ MLX5E_ZERO(&c->rq, mlx5e_rq_zero_start); for (i = 0; i != priv->num_tc; i++) MLX5E_ZERO(&c->sq[i], mlx5e_sq_zero_start); /* open transmit completion queue */ err = mlx5e_open_tx_cqs(c, cparam); if (err) goto err_free; /* open receive completion queue */ err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq, &mlx5e_rx_cq_comp, c->ix); if (err) goto err_close_tx_cqs; err = mlx5e_open_sqs(c, cparam); if (err) goto err_close_rx_cq; err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) goto err_close_sqs; /* poll receive queue initially */ NET_EPOCH_ENTER(et); c->rq.cq.mcq.comp(&c->rq.cq.mcq, NULL); NET_EPOCH_EXIT(et); return (0); err_close_sqs: mlx5e_close_sqs_wait(c); err_close_rx_cq: mlx5e_close_cq(&c->rq.cq); err_close_tx_cqs: mlx5e_close_tx_cqs(c); err_free: return (err); } static void mlx5e_close_channel(struct mlx5e_channel *c) { mlx5e_close_rq(&c->rq); } static void mlx5e_close_channel_wait(struct mlx5e_channel *c) { mlx5e_close_rq_wait(&c->rq); mlx5e_close_sqs_wait(c); mlx5e_close_tx_cqs(c); } static int mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs) { u32 r, n; r = priv->params.hw_lro_en ? priv->params.lro_wqe_sz : MLX5E_SW2MB_MTU(priv->ifp->if_mtu); if (r > MJUM16BYTES) return (-ENOMEM); if (r > MJUM9BYTES) r = MJUM16BYTES; else if (r > MJUMPAGESIZE) r = MJUM9BYTES; else if (r > MCLBYTES) r = MJUMPAGESIZE; else r = MCLBYTES; /* * n + 1 must be a power of two, because stride size must be. * Stride size is 16 * (n + 1), as the first segment is * control. */ for (n = howmany(r, MLX5E_MAX_RX_BYTES); !powerof2(n + 1); n++) ; if (n > MLX5E_MAX_BUSDMA_RX_SEGS) return (-ENOMEM); *wqe_sz = r; *nsegs = n; return (0); } static void mlx5e_build_rq_param(struct mlx5e_priv *priv, struct mlx5e_rq_param *param) { void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); u32 wqe_sz, nsegs; mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe) + nsegs * sizeof(struct mlx5_wqe_data_seg))); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); MLX5_SET(wq, wq, pd, priv->pdn); param->wq.linear = 1; } static void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, pd, priv->pdn); param->wq.linear = 1; } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { void *cqc = param->cqc; MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index); } static void mlx5e_get_default_profile(struct mlx5e_priv *priv, int mode, struct net_dim_cq_moder *ptr) { *ptr = net_dim_get_profile(mode, MLX5E_DIM_DEFAULT_PROFILE); /* apply LRO restrictions */ if (priv->params.hw_lro_en && ptr->pkts > MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO) { ptr->pkts = MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO; } } static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { struct net_dim_cq_moder curr; void *cqc = param->cqc; /* * We use MLX5_CQE_FORMAT_HASH because the RX hash mini CQE * format is more beneficial for FreeBSD use case. * * Adding support for MLX5_CQE_FORMAT_CSUM will require changes * in mlx5e_decompress_cqe. */ if (priv->params.cqe_zipping_en) { MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_HASH); MLX5_SET(cqc, cqc, cqe_compression_en, 1); } MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size); switch (priv->params.rx_cq_moderation_mode) { case 0: MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec); MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts); MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; case 1: MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec); MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts); if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); else MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; case 2: mlx5e_get_default_profile(priv, NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE, &curr); MLX5_SET(cqc, cqc, cq_period, curr.usec); MLX5_SET(cqc, cqc, cq_max_count, curr.pkts); MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; case 3: mlx5e_get_default_profile(priv, NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE, &curr); MLX5_SET(cqc, cqc, cq_period, curr.usec); MLX5_SET(cqc, cqc, cq_max_count, curr.pkts); if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); else MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; default: break; } mlx5e_dim_build_cq_param(priv, param); mlx5e_build_common_cq_param(priv, param); } static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { void *cqc = param->cqc; MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec); MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts); switch (priv->params.tx_cq_moderation_mode) { case 0: MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; default: if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); else MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; } mlx5e_build_common_cq_param(priv, param); } static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) { memset(cparam, 0, sizeof(*cparam)); mlx5e_build_rq_param(priv, &cparam->rq); mlx5e_build_sq_param(priv, &cparam->sq); mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); } static int mlx5e_open_channels(struct mlx5e_priv *priv) { struct mlx5e_channel_param *cparam; int err; int i; int j; cparam = malloc(sizeof(*cparam), M_MLX5EN, M_WAITOK); mlx5e_build_channel_param(priv, cparam); for (i = 0; i < priv->params.num_channels; i++) { err = mlx5e_open_channel(priv, cparam, &priv->channel[i]); if (err) goto err_close_channels; /* Bind interrupt vectors, if any. */ if (priv->params_ethtool.irq_cpu_base > -1) { cpuset_t cpuset; int cpu; int irq; int eqn; int nirq; err = mlx5_vector2eqn(priv->mdev, i, &eqn, &nirq); /* error here is non-fatal */ if (err != 0) continue; irq = priv->mdev->priv.msix_arr[nirq].vector; cpu = (unsigned)(priv->params_ethtool.irq_cpu_base + i * priv->params_ethtool.irq_cpu_stride) % (unsigned)mp_ncpus; CPU_ZERO(&cpuset); CPU_SET(cpu, &cpuset); intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &cpuset); } } for (j = 0; j < priv->params.num_channels; j++) { err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j].rq); if (err) goto err_close_channels; } free(cparam, M_MLX5EN); return (0); err_close_channels: while (i--) { mlx5e_close_channel(&priv->channel[i]); mlx5e_close_channel_wait(&priv->channel[i]); } free(cparam, M_MLX5EN); return (err); } static void mlx5e_close_channels(struct mlx5e_priv *priv) { int i; for (i = 0; i < priv->params.num_channels; i++) mlx5e_close_channel(&priv->channel[i]); for (i = 0; i < priv->params.num_channels; i++) mlx5e_close_channel_wait(&priv->channel[i]); } static int mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq) { if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) { uint8_t cq_mode; switch (priv->params.tx_cq_moderation_mode) { case 0: case 2: cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE; break; default: cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE; break; } return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq, priv->params.tx_cq_moderation_usec, priv->params.tx_cq_moderation_pkts, cq_mode)); } return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq, priv->params.tx_cq_moderation_usec, priv->params.tx_cq_moderation_pkts)); } static int mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq) { if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) { uint8_t cq_mode; uint8_t dim_mode; int retval; switch (priv->params.rx_cq_moderation_mode) { case 0: case 2: cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE; dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; break; default: cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE; dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE; break; } /* tear down dynamic interrupt moderation */ mtx_lock(&rq->mtx); rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED; mtx_unlock(&rq->mtx); /* wait for dynamic interrupt moderation work task, if any */ cancel_work_sync(&rq->dim.work); if (priv->params.rx_cq_moderation_mode >= 2) { struct net_dim_cq_moder curr; mlx5e_get_default_profile(priv, dim_mode, &curr); retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq, curr.usec, curr.pkts, cq_mode); /* set dynamic interrupt moderation mode and zero defaults */ mtx_lock(&rq->mtx); rq->dim.mode = dim_mode; rq->dim.state = 0; rq->dim.profile_ix = MLX5E_DIM_DEFAULT_PROFILE; mtx_unlock(&rq->mtx); } else { retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq, priv->params.rx_cq_moderation_usec, priv->params.rx_cq_moderation_pkts, cq_mode); } return (retval); } return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq, priv->params.rx_cq_moderation_usec, priv->params.rx_cq_moderation_pkts)); } static int mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c) { int err; int i; err = mlx5e_refresh_rq_params(priv, &c->rq); if (err) goto done; for (i = 0; i != priv->num_tc; i++) { err = mlx5e_refresh_sq_params(priv, &c->sq[i]); if (err) goto done; } done: return (err); } int mlx5e_refresh_channel_params(struct mlx5e_priv *priv) { int i; /* check if channels are closed */ if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) return (EINVAL); for (i = 0; i < priv->params.num_channels; i++) { int err; err = mlx5e_refresh_channel_params_sub(priv, &priv->channel[i]); if (err) return (err); } return (0); } static int mlx5e_open_tis(struct mlx5e_priv *priv, int tc) { struct mlx5_core_dev *mdev = priv->mdev; u32 in[MLX5_ST_SZ_DW(create_tis_in)]; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); memset(in, 0, sizeof(in)); MLX5_SET(tisc, tisc, prio, tc); MLX5_SET(tisc, tisc, transport_domain, priv->tdn); return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc])); } static void mlx5e_close_tis(struct mlx5e_priv *priv, int tc) { mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc], 0); } static int mlx5e_open_tises(struct mlx5e_priv *priv) { int num_tc = priv->num_tc; int err; int tc; for (tc = 0; tc < num_tc; tc++) { err = mlx5e_open_tis(priv, tc); if (err) goto err_close_tises; } return (0); err_close_tises: for (tc--; tc >= 0; tc--) mlx5e_close_tis(priv, tc); return (err); } static void mlx5e_close_tises(struct mlx5e_priv *priv) { int num_tc = priv->num_tc; int tc; for (tc = 0; tc < num_tc; tc++) mlx5e_close_tis(priv, tc); } static int mlx5e_open_rqt(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; u32 *in; u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0}; void *rqtc; int inlen; int err; int sz; int i; sz = 1 << priv->params.rx_hash_log_tbl_sz; inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); MLX5_SET(rqtc, rqtc, rqt_max_size, sz); for (i = 0; i < sz; i++) { int ix = i; #ifdef RSS ix = rss_get_indirection_to_bucket(ix); #endif /* ensure we don't overflow */ ix %= priv->params.num_channels; /* apply receive side scaling stride, if any */ ix -= ix % (int)priv->params.channels_rsss; MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix].rq.rqn); } MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT); err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); if (!err) priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn); kvfree(in); return (err); } static void mlx5e_close_rqt(struct mlx5e_priv *priv) { u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0}; u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0}; MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn); mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out)); } #define MLX5E_RSS_KEY_SIZE (10 * 4) /* bytes */ static void mlx5e_get_rss_key(void *key_ptr) { #ifdef RSS rss_getkey(key_ptr); #else static const u32 rsskey[] = { cpu_to_be32(0xD181C62C), cpu_to_be32(0xF7F4DB5B), cpu_to_be32(0x1983A2FC), cpu_to_be32(0x943E1ADB), cpu_to_be32(0xD9389E6B), cpu_to_be32(0xD1039C2C), cpu_to_be32(0xA74499AD), cpu_to_be32(0x593D56D9), cpu_to_be32(0xF3253C06), cpu_to_be32(0x2ADC1FFC), }; CTASSERT(sizeof(rsskey) == MLX5E_RSS_KEY_SIZE); memcpy(key_ptr, rsskey, MLX5E_RSS_KEY_SIZE); #endif } static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt, bool inner_vxlan) { void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); void *hfsi = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner); void *hfs = inner_vxlan ? hfsi : hfso; __be32 *hkey; MLX5_SET(tirc, tirc, transport_domain, priv->tdn); #define ROUGH_MAX_L2_L3_HDR_SZ 256 #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) #define MLX5_HASH_ALL (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_L4_SPORT |\ MLX5_HASH_FIELD_SEL_L4_DPORT) #define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_IPSEC_SPI) if (priv->params.hw_lro_en) { MLX5_SET(tirc, tirc, lro_enable_mask, MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); MLX5_SET(tirc, tirc, lro_max_msg_sz, (priv->params.lro_wqe_sz - ROUGH_MAX_L2_L3_HDR_SZ) >> 8); /* TODO: add the option to choose timer value dynamically */ MLX5_SET(tirc, tirc, lro_timeout_period_usecs, MLX5_CAP_ETH(priv->mdev, lro_timer_supported_periods[2])); } if (inner_vxlan) MLX5_SET(tirc, tirc, tunneled_offload_en, 1); /* setup parameters for hashing TIR type, if any */ switch (tt) { case MLX5E_TT_ANY: MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); MLX5_SET(tirc, tirc, inline_rqn, priv->channel[0].rq.rqn); break; default: MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, priv->rqtn); MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ); hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); CTASSERT(MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key) >= MLX5E_RSS_KEY_SIZE); #ifdef RSS /* * The FreeBSD RSS implementation does currently not * support symmetric Toeplitz hashes: */ MLX5_SET(tirc, tirc, rx_hash_symmetric, 0); #else MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); #endif mlx5e_get_rss_key(hkey); break; } switch (tt) { case MLX5E_TT_IPV4_TCP: MLX5_SET(rx_hash_field_select, hfs, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfs, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); #ifdef RSS if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) { MLX5_SET(rx_hash_field_select, hfs, selected_fields, MLX5_HASH_IP); } else #endif MLX5_SET(rx_hash_field_select, hfs, selected_fields, MLX5_HASH_ALL); break; case MLX5E_TT_IPV6_TCP: MLX5_SET(rx_hash_field_select, hfs, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfs, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); #ifdef RSS if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) { MLX5_SET(rx_hash_field_select, hfs, selected_fields, MLX5_HASH_IP); } else #endif MLX5_SET(rx_hash_field_select, hfs, selected_fields, MLX5_HASH_ALL); break; case MLX5E_TT_IPV4_UDP: MLX5_SET(rx_hash_field_select, hfs, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfs, l4_prot_type, MLX5_L4_PROT_TYPE_UDP); #ifdef RSS if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) { MLX5_SET(rx_hash_field_select, hfs, selected_fields, MLX5_HASH_IP); } else #endif MLX5_SET(rx_hash_field_select, hfs, selected_fields, MLX5_HASH_ALL); break; case MLX5E_TT_IPV6_UDP: MLX5_SET(rx_hash_field_select, hfs, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfs, l4_prot_type, MLX5_L4_PROT_TYPE_UDP); #ifdef RSS if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) { MLX5_SET(rx_hash_field_select, hfs, selected_fields, MLX5_HASH_IP); } else #endif MLX5_SET(rx_hash_field_select, hfs, selected_fields, MLX5_HASH_ALL); break; case MLX5E_TT_IPV4_IPSEC_AH: MLX5_SET(rx_hash_field_select, hfs, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfs, selected_fields, MLX5_HASH_IP_IPSEC_SPI); break; case MLX5E_TT_IPV6_IPSEC_AH: MLX5_SET(rx_hash_field_select, hfs, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfs, selected_fields, MLX5_HASH_IP_IPSEC_SPI); break; case MLX5E_TT_IPV4_IPSEC_ESP: MLX5_SET(rx_hash_field_select, hfs, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfs, selected_fields, MLX5_HASH_IP_IPSEC_SPI); break; case MLX5E_TT_IPV6_IPSEC_ESP: MLX5_SET(rx_hash_field_select, hfs, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfs, selected_fields, MLX5_HASH_IP_IPSEC_SPI); break; case MLX5E_TT_IPV4: MLX5_SET(rx_hash_field_select, hfs, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfs, selected_fields, MLX5_HASH_IP); break; case MLX5E_TT_IPV6: MLX5_SET(rx_hash_field_select, hfs, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfs, selected_fields, MLX5_HASH_IP); break; default: break; } } static int mlx5e_open_tir(struct mlx5e_priv *priv, int tt, bool inner_vxlan) { struct mlx5_core_dev *mdev = priv->mdev; u32 *in; void *tirc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(create_tir_in); in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context); mlx5e_build_tir_ctx(priv, tirc, tt, inner_vxlan); err = mlx5_core_create_tir(mdev, in, inlen, inner_vxlan ? &priv->tirn_inner_vxlan[tt] : &priv->tirn[tt]); kvfree(in); return (err); } static void mlx5e_close_tir(struct mlx5e_priv *priv, int tt, bool inner_vxlan) { mlx5_core_destroy_tir(priv->mdev, inner_vxlan ? priv->tirn_inner_vxlan[tt] : priv->tirn[tt], 0); } static int mlx5e_open_tirs(struct mlx5e_priv *priv, bool inner_vxlan) { int err; int i; for (i = 0; i < MLX5E_NUM_TT; i++) { err = mlx5e_open_tir(priv, i, inner_vxlan); if (err) goto err_close_tirs; } return (0); err_close_tirs: for (i--; i >= 0; i--) mlx5e_close_tir(priv, i, inner_vxlan); return (err); } static void mlx5e_close_tirs(struct mlx5e_priv *priv, bool inner_vxlan) { int i; for (i = 0; i < MLX5E_NUM_TT; i++) mlx5e_close_tir(priv, i, inner_vxlan); } /* * SW MTU does not include headers, * HW MTU includes all headers and checksums. */ static int mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu) { struct mlx5e_priv *priv = ifp->if_softc; struct mlx5_core_dev *mdev = priv->mdev; int hw_mtu; int err; hw_mtu = MLX5E_SW2HW_MTU(sw_mtu); err = mlx5_set_port_mtu(mdev, hw_mtu); if (err) { mlx5_en_err(ifp, "mlx5_set_port_mtu failed setting %d, err=%d\n", sw_mtu, err); return (err); } /* Update vport context MTU */ err = mlx5_set_vport_mtu(mdev, hw_mtu); if (err) { mlx5_en_err(ifp, "Failed updating vport context with MTU size, err=%d\n", err); } ifp->if_mtu = sw_mtu; err = mlx5_query_vport_mtu(mdev, &hw_mtu); if (err || !hw_mtu) { /* fallback to port oper mtu */ err = mlx5_query_port_oper_mtu(mdev, &hw_mtu); } if (err) { mlx5_en_err(ifp, "Query port MTU, after setting new MTU value, failed\n"); return (err); } else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) { err = -E2BIG, mlx5_en_err(ifp, "Port MTU %d is smaller than ifp mtu %d\n", hw_mtu, sw_mtu); } else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) { err = -EINVAL; mlx5_en_err(ifp, "Port MTU %d is bigger than ifp mtu %d\n", hw_mtu, sw_mtu); } priv->params_ethtool.hw_mtu = hw_mtu; /* compute MSB */ while (hw_mtu & (hw_mtu - 1)) hw_mtu &= (hw_mtu - 1); priv->params_ethtool.hw_mtu_msb = hw_mtu; return (err); } int mlx5e_open_locked(struct ifnet *ifp) { struct mlx5e_priv *priv = ifp->if_softc; int err; u16 set_id; /* check if already opened */ if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0) return (0); #ifdef RSS if (rss_getnumbuckets() > priv->params.num_channels) { mlx5_en_info(ifp, "NOTE: There are more RSS buckets(%u) than channels(%u) available\n", rss_getnumbuckets(), priv->params.num_channels); } #endif err = mlx5e_open_tises(priv); if (err) { mlx5_en_err(ifp, "mlx5e_open_tises failed, %d\n", err); return (err); } err = mlx5_vport_alloc_q_counter(priv->mdev, MLX5_INTERFACE_PROTOCOL_ETH, &set_id); if (err) { mlx5_en_err(priv->ifp, "mlx5_vport_alloc_q_counter failed: %d\n", err); goto err_close_tises; } /* store counter set ID */ priv->counter_set_id = set_id; err = mlx5e_open_channels(priv); if (err) { mlx5_en_err(ifp, "mlx5e_open_channels failed, %d\n", err); goto err_dalloc_q_counter; } err = mlx5e_open_rqt(priv); if (err) { mlx5_en_err(ifp, "mlx5e_open_rqt failed, %d\n", err); goto err_close_channels; } err = mlx5e_open_tirs(priv, false); if (err) { mlx5_en_err(ifp, "mlx5e_open_tir(main) failed, %d\n", err); goto err_close_rqls; } if ((ifp->if_capenable & IFCAP_VXLAN_HWCSUM) != 0) { err = mlx5e_open_tirs(priv, true); if (err) { mlx5_en_err(ifp, "mlx5e_open_tir(inner) failed, %d\n", err); goto err_close_tirs; } } err = mlx5e_open_flow_table(priv); if (err) { mlx5_en_err(ifp, "mlx5e_open_flow_table failed, %d\n", err); goto err_close_tirs_inner; } err = mlx5e_add_all_vlan_rules(priv); if (err) { mlx5_en_err(ifp, "mlx5e_add_all_vlan_rules failed, %d\n", err); goto err_close_flow_table; } if ((ifp->if_capenable & IFCAP_VXLAN_HWCSUM) != 0) { err = mlx5e_add_all_vxlan_rules(priv); if (err) { mlx5_en_err(ifp, "mlx5e_add_all_vxlan_rules failed, %d\n", err); goto err_del_vlan_rules; } } set_bit(MLX5E_STATE_OPENED, &priv->state); mlx5e_update_carrier(priv); mlx5e_set_rx_mode_core(priv); return (0); err_del_vlan_rules: mlx5e_del_all_vlan_rules(priv); err_close_flow_table: mlx5e_close_flow_table(priv); err_close_tirs_inner: if ((ifp->if_capenable & IFCAP_VXLAN_HWCSUM) != 0) mlx5e_close_tirs(priv, true); err_close_tirs: mlx5e_close_tirs(priv, false); err_close_rqls: mlx5e_close_rqt(priv); err_close_channels: mlx5e_close_channels(priv); err_dalloc_q_counter: mlx5_vport_dealloc_q_counter(priv->mdev, MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id); err_close_tises: mlx5e_close_tises(priv); return (err); } static void mlx5e_open(void *arg) { struct mlx5e_priv *priv = arg; PRIV_LOCK(priv); if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP)) mlx5_en_err(priv->ifp, "Setting port status to up failed\n"); mlx5e_open_locked(priv->ifp); priv->ifp->if_drv_flags |= IFF_DRV_RUNNING; PRIV_UNLOCK(priv); } int mlx5e_close_locked(struct ifnet *ifp) { struct mlx5e_priv *priv = ifp->if_softc; /* check if already closed */ if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) return (0); clear_bit(MLX5E_STATE_OPENED, &priv->state); mlx5e_set_rx_mode_core(priv); mlx5e_del_all_vlan_rules(priv); if ((ifp->if_capenable & IFCAP_VXLAN_HWCSUM) != 0) mlx5e_del_all_vxlan_rules(priv); if_link_state_change(priv->ifp, LINK_STATE_DOWN); mlx5e_close_flow_table(priv); if ((ifp->if_capenable & IFCAP_VXLAN_HWCSUM) != 0) mlx5e_close_tirs(priv, true); mlx5e_close_tirs(priv, false); mlx5e_close_rqt(priv); mlx5e_close_channels(priv); mlx5_vport_dealloc_q_counter(priv->mdev, MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id); mlx5e_close_tises(priv); return (0); } #if (__FreeBSD_version >= 1100000) static uint64_t mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt) { struct mlx5e_priv *priv = ifp->if_softc; u64 retval; /* PRIV_LOCK(priv); XXX not allowed */ switch (cnt) { case IFCOUNTER_IPACKETS: retval = priv->stats.vport.rx_packets; break; case IFCOUNTER_IERRORS: retval = priv->stats.pport.in_range_len_errors + priv->stats.pport.out_of_range_len + priv->stats.pport.too_long_errors + priv->stats.pport.check_seq_err + priv->stats.pport.alignment_err; break; case IFCOUNTER_IQDROPS: retval = priv->stats.vport.rx_out_of_buffer; break; case IFCOUNTER_OPACKETS: retval = priv->stats.vport.tx_packets; break; case IFCOUNTER_OERRORS: retval = priv->stats.port_stats_debug.out_discards; break; case IFCOUNTER_IBYTES: retval = priv->stats.vport.rx_bytes; break; case IFCOUNTER_OBYTES: retval = priv->stats.vport.tx_bytes; break; case IFCOUNTER_IMCASTS: retval = priv->stats.vport.rx_multicast_packets; break; case IFCOUNTER_OMCASTS: retval = priv->stats.vport.tx_multicast_packets; break; case IFCOUNTER_OQDROPS: retval = priv->stats.vport.tx_queue_dropped; break; case IFCOUNTER_COLLISIONS: retval = priv->stats.pport.collisions; break; default: retval = if_get_counter_default(ifp, cnt); break; } /* PRIV_UNLOCK(priv); XXX not allowed */ return (retval); } #endif static void mlx5e_set_rx_mode(struct ifnet *ifp) { struct mlx5e_priv *priv = ifp->if_softc; queue_work(priv->wq, &priv->set_rx_mode_work); } static int mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct mlx5e_priv *priv; struct ifreq *ifr; struct ifdownreason *ifdr; struct ifi2creq i2c; struct ifrsskey *ifrk; struct ifrsshash *ifrh; int error = 0; int mask = 0; int size_read = 0; int module_status; int module_num; int max_mtu; uint8_t read_addr; priv = ifp->if_softc; /* check if detaching */ if (priv == NULL || priv->gone != 0) return (ENXIO); switch (command) { case SIOCSIFMTU: ifr = (struct ifreq *)data; PRIV_LOCK(priv); mlx5_query_port_max_mtu(priv->mdev, &max_mtu); if (ifr->ifr_mtu >= MLX5E_MTU_MIN && ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) { int was_opened; was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); if (was_opened) mlx5e_close_locked(ifp); /* set new MTU */ mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu); if (was_opened) mlx5e_open_locked(ifp); } else { error = EINVAL; mlx5_en_err(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n", MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu)); } PRIV_UNLOCK(priv); break; case SIOCSIFFLAGS: if ((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { mlx5e_set_rx_mode(ifp); break; } PRIV_LOCK(priv); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) mlx5e_open_locked(ifp); ifp->if_drv_flags |= IFF_DRV_RUNNING; mlx5_set_port_status(priv->mdev, MLX5_PORT_UP); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { mlx5_set_port_status(priv->mdev, MLX5_PORT_DOWN); if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0) mlx5e_close_locked(ifp); mlx5e_update_carrier(priv); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; } } PRIV_UNLOCK(priv); break; case SIOCADDMULTI: case SIOCDELMULTI: mlx5e_set_rx_mode(ifp); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: case SIOCGIFXMEDIA: ifr = (struct ifreq *)data; error = ifmedia_ioctl(ifp, ifr, &priv->media, command); break; case SIOCSIFCAP: ifr = (struct ifreq *)data; PRIV_LOCK(priv); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO4 & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { mask &= ~IFCAP_TSO4; ifp->if_capenable &= ~IFCAP_TSO4; ifp->if_hwassist &= ~CSUM_IP_TSO; mlx5_en_err(ifp, "tso4 disabled due to -txcsum.\n"); } } if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (IFCAP_TSO6 & ifp->if_capenable && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { mask &= ~IFCAP_TSO6; ifp->if_capenable &= ~IFCAP_TSO6; ifp->if_hwassist &= ~CSUM_IP6_TSO; mlx5_en_err(ifp, "tso6 disabled due to -txcsum6.\n"); } } if (mask & IFCAP_MEXTPG) ifp->if_capenable ^= IFCAP_MEXTPG; if (mask & IFCAP_TXTLS4) ifp->if_capenable ^= IFCAP_TXTLS4; if (mask & IFCAP_TXTLS6) ifp->if_capenable ^= IFCAP_TXTLS6; #ifdef RATELIMIT if (mask & IFCAP_TXTLS_RTLMT) ifp->if_capenable ^= IFCAP_TXTLS_RTLMT; #endif if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_TSO4) { if (!(IFCAP_TSO4 & ifp->if_capenable) && !(IFCAP_TXCSUM & ifp->if_capenable)) { mlx5_en_err(ifp, "enable txcsum first.\n"); error = EAGAIN; goto out; } ifp->if_capenable ^= IFCAP_TSO4; ifp->if_hwassist ^= CSUM_IP_TSO; } if (mask & IFCAP_TSO6) { if (!(IFCAP_TSO6 & ifp->if_capenable) && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { mlx5_en_err(ifp, "enable txcsum6 first.\n"); error = EAGAIN; goto out; } ifp->if_capenable ^= IFCAP_TSO6; ifp->if_hwassist ^= CSUM_IP6_TSO; } if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_VLAN_HWFILTER) { if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) mlx5e_disable_vlan_filter(priv); else mlx5e_enable_vlan_filter(priv); ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; } if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_WOL_MAGIC) ifp->if_capenable ^= IFCAP_WOL_MAGIC; if (mask & IFCAP_VXLAN_HWCSUM) { int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); if (was_opened) mlx5e_close_locked(ifp); ifp->if_capenable ^= IFCAP_VXLAN_HWCSUM; ifp->if_hwassist ^= CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP | CSUM_INNER_IP6_UDP | CSUM_INNER_IP6_TCP; if (was_opened) mlx5e_open_locked(ifp); } if (mask & IFCAP_VXLAN_HWTSO) { ifp->if_capenable ^= IFCAP_VXLAN_HWTSO; ifp->if_hwassist ^= CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO; } VLAN_CAPABILITIES(ifp); /* turn off LRO means also turn of HW LRO - if it's on */ if (mask & IFCAP_LRO) { int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); bool need_restart = false; ifp->if_capenable ^= IFCAP_LRO; /* figure out if updating HW LRO is needed */ if (!(ifp->if_capenable & IFCAP_LRO)) { if (priv->params.hw_lro_en) { priv->params.hw_lro_en = false; need_restart = true; } } else { if (priv->params.hw_lro_en == false && priv->params_ethtool.hw_lro != 0) { priv->params.hw_lro_en = true; need_restart = true; } } if (was_opened && need_restart) { mlx5e_close_locked(ifp); mlx5e_open_locked(ifp); } } if (mask & IFCAP_HWRXTSTMP) { ifp->if_capenable ^= IFCAP_HWRXTSTMP; if (ifp->if_capenable & IFCAP_HWRXTSTMP) { if (priv->clbr_done == 0) mlx5e_reset_calibration_callout(priv); } else { callout_drain(&priv->tstmp_clbr); priv->clbr_done = 0; } } out: PRIV_UNLOCK(priv); break; case SIOCGI2C: ifr = (struct ifreq *)data; /* * Copy from the user-space address ifr_data to the * kernel-space address i2c */ error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (error) break; if (i2c.len > sizeof(i2c.data)) { error = EINVAL; break; } PRIV_LOCK(priv); /* Get module_num which is required for the query_eeprom */ error = mlx5_query_module_num(priv->mdev, &module_num); if (error) { mlx5_en_err(ifp, "Query module num failed, eeprom reading is not supported\n"); error = EINVAL; goto err_i2c; } /* Check if module is present before doing an access */ module_status = mlx5_query_module_status(priv->mdev, module_num); if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED) { error = EINVAL; goto err_i2c; } /* * Currently 0XA0 and 0xA2 are the only addresses permitted. * The internal conversion is as follows: */ if (i2c.dev_addr == 0xA0) read_addr = MLX5_I2C_ADDR_LOW; else if (i2c.dev_addr == 0xA2) read_addr = MLX5_I2C_ADDR_HIGH; else { mlx5_en_err(ifp, "Query eeprom failed, Invalid Address: %X\n", i2c.dev_addr); error = EINVAL; goto err_i2c; } error = mlx5_query_eeprom(priv->mdev, read_addr, MLX5_EEPROM_LOW_PAGE, (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num, (uint32_t *)i2c.data, &size_read); if (error) { mlx5_en_err(ifp, "Query eeprom failed, eeprom reading is not supported\n"); error = EINVAL; goto err_i2c; } if (i2c.len > MLX5_EEPROM_MAX_BYTES) { error = mlx5_query_eeprom(priv->mdev, read_addr, MLX5_EEPROM_LOW_PAGE, (uint32_t)(i2c.offset + size_read), (uint32_t)(i2c.len - size_read), module_num, (uint32_t *)(i2c.data + size_read), &size_read); } if (error) { mlx5_en_err(ifp, "Query eeprom failed, eeprom reading is not supported\n"); error = EINVAL; goto err_i2c; } error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); err_i2c: PRIV_UNLOCK(priv); break; case SIOCGIFDOWNREASON: ifdr = (struct ifdownreason *)data; bzero(ifdr->ifdr_msg, sizeof(ifdr->ifdr_msg)); PRIV_LOCK(priv); error = -mlx5_query_pddr_troubleshooting_info(priv->mdev, NULL, ifdr->ifdr_msg, sizeof(ifdr->ifdr_msg)); PRIV_UNLOCK(priv); if (error == 0) ifdr->ifdr_reason = IFDR_REASON_MSG; break; case SIOCGIFRSSKEY: ifrk = (struct ifrsskey *)data; ifrk->ifrk_func = RSS_FUNC_TOEPLITZ; ifrk->ifrk_keylen = MLX5E_RSS_KEY_SIZE; CTASSERT(sizeof(ifrk->ifrk_key) >= MLX5E_RSS_KEY_SIZE); mlx5e_get_rss_key(ifrk->ifrk_key); break; case SIOCGIFRSSHASH: ifrh = (struct ifrsshash *)data; ifrh->ifrh_func = RSS_FUNC_TOEPLITZ; ifrh->ifrh_types = RSS_TYPE_IPV4 | RSS_TYPE_TCP_IPV4 | RSS_TYPE_UDP_IPV4 | RSS_TYPE_IPV6 | RSS_TYPE_TCP_IPV6 | RSS_TYPE_UDP_IPV6; break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) { /* * TODO: uncoment once FW really sets all these bits if * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap || * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap || * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return * -ENOTSUPP; */ /* TODO: add more must-to-have features */ if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return (-ENODEV); return (0); } static u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev) { const int min_size = ETHER_VLAN_ENCAP_LEN + ETHER_HDR_LEN; const int max_size = MLX5E_MAX_TX_INLINE; const int bf_buf_size = ((1U << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2U) - (sizeof(struct mlx5e_tx_wqe) - 2); /* verify against driver limits */ if (bf_buf_size > max_size) return (max_size); else if (bf_buf_size < min_size) return (min_size); else return (bf_buf_size); } static int mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv, int num_comp_vectors) { int err; /* * TODO: Consider link speed for setting "log_sq_size", * "log_rq_size" and "cq_moderation_xxx": */ priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; priv->params.rx_cq_moderation_usec = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE : MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; priv->params.rx_cq_moderation_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0; priv->params.rx_cq_moderation_pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; priv->params.tx_cq_moderation_usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; priv->params.tx_cq_moderation_pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; priv->params.min_rx_wqes = MLX5E_PARAMS_DEFAULT_MIN_RX_WQES; priv->params.rx_hash_log_tbl_sz = (order_base_2(num_comp_vectors) > MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ? order_base_2(num_comp_vectors) : MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ; priv->params.num_tc = 1; priv->params.default_vlan_prio = 0; priv->counter_set_id = -1; priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); err = mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode); if (err) return (err); /* * hw lro is currently defaulted to off. when it won't anymore we * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)" */ priv->params.hw_lro_en = false; priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; /* * CQE zipping is currently defaulted to off. when it won't * anymore we will consider the HW capability: * "!!MLX5_CAP_GEN(mdev, cqe_compression)" */ priv->params.cqe_zipping_en = false; priv->mdev = mdev; priv->params.num_channels = num_comp_vectors; priv->params.channels_rsss = 1; priv->order_base_2_num_channels = order_base_2(num_comp_vectors); priv->queue_mapping_channel_mask = roundup_pow_of_two(num_comp_vectors) - 1; priv->num_tc = priv->params.num_tc; priv->default_vlan_prio = priv->params.default_vlan_prio; INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work); INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); return (0); } static void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc) { bool ro_pci_enable = pci_get_relaxed_ordering_enabled(mdev->pdev->dev.bsddev); bool ro_write = MLX5_CAP_GEN(mdev, relaxed_ordering_write); bool ro_read = MLX5_CAP_GEN(mdev, relaxed_ordering_read); MLX5_SET(mkc, mkc, relaxed_ordering_read, ro_pci_enable && ro_read); MLX5_SET(mkc, mkc, relaxed_ordering_write, ro_pci_enable && ro_write); } static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, struct mlx5_core_mkey *mkey) { struct ifnet *ifp = priv->ifp; struct mlx5_core_dev *mdev = priv->mdev; int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); void *mkc; u32 *in; int err; in = mlx5_vzalloc(inlen); if (in == NULL) { mlx5_en_err(ifp, "failed to allocate inbox\n"); return (-ENOMEM); } mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA); MLX5_SET(mkc, mkc, umr_en, 1); /* used by HW TLS */ MLX5_SET(mkc, mkc, lw, 1); MLX5_SET(mkc, mkc, lr, 1); mlx5e_mkey_set_relaxed_ordering(mdev, mkc); MLX5_SET(mkc, mkc, pd, pdn); MLX5_SET(mkc, mkc, length64, 1); MLX5_SET(mkc, mkc, qpn, 0xffffff); err = mlx5_core_create_mkey(mdev, mkey, in, inlen); if (err) mlx5_en_err(ifp, "mlx5_core_create_mkey failed, %d\n", err); kvfree(in); return (err); } static const char *mlx5e_vport_stats_desc[] = { MLX5E_VPORT_STATS(MLX5E_STATS_DESC) }; static const char *mlx5e_pport_stats_desc[] = { MLX5E_PPORT_STATS(MLX5E_STATS_DESC) }; static int mlx5e_priv_static_init(struct mlx5e_priv *priv, struct mlx5_core_dev *mdev, const uint32_t channels) { uint32_t x; int err; mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF); sx_init(&priv->state_lock, "mlx5state"); callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0); MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock); for (x = 0; x != channels; x++) mlx5e_chan_static_init(priv, &priv->channel[x], x); for (x = 0; x != channels; x++) { err = mlx5_alloc_bfreg(mdev, &priv->channel[x].bfreg, false, false); if (err) goto err_alloc_bfreg; } return (0); err_alloc_bfreg: while (x--) mlx5_free_bfreg(mdev, &priv->channel[x].bfreg); for (x = 0; x != channels; x++) mlx5e_chan_static_destroy(&priv->channel[x]); callout_drain(&priv->watchdog); mtx_destroy(&priv->async_events_mtx); sx_destroy(&priv->state_lock); return (err); } static void mlx5e_priv_static_destroy(struct mlx5e_priv *priv, struct mlx5_core_dev *mdev, const uint32_t channels) { uint32_t x; for (x = 0; x != channels; x++) mlx5_free_bfreg(mdev, &priv->channel[x].bfreg); for (x = 0; x != channels; x++) mlx5e_chan_static_destroy(&priv->channel[x]); callout_drain(&priv->watchdog); mtx_destroy(&priv->async_events_mtx); sx_destroy(&priv->state_lock); } static int sysctl_firmware(SYSCTL_HANDLER_ARGS) { /* * %d.%d%.d the string format. * fw_rev_{maj,min,sub} return u16, 2^16 = 65536. * We need at most 5 chars to store that. * It also has: two "." and NULL at the end, which means we need 18 * (5*3 + 3) chars at most. */ char fw[18]; struct mlx5e_priv *priv = arg1; int error; snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev), fw_rev_sub(priv->mdev)); error = sysctl_handle_string(oidp, fw, sizeof(fw), req); return (error); } static void mlx5e_disable_tx_dma(struct mlx5e_channel *ch) { int i; for (i = 0; i < ch->priv->num_tc; i++) mlx5e_drain_sq(&ch->sq[i]); } static void mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq) { sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP); sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8); mlx5e_tx_notify_hw(sq, sq->doorbell.d32); sq->doorbell.d64 = 0; } void mlx5e_resume_sq(struct mlx5e_sq *sq) { int err; /* check if already enabled */ if (READ_ONCE(sq->running) != 0) return; err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR, MLX5_SQC_STATE_RST); if (err != 0) { mlx5_en_err(sq->ifp, "mlx5e_modify_sq() from ERR to RST failed: %d\n", err); } sq->cc = 0; sq->pc = 0; /* reset doorbell prior to moving from RST to RDY */ mlx5e_reset_sq_doorbell_record(sq); err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); if (err != 0) { mlx5_en_err(sq->ifp, "mlx5e_modify_sq() from RST to RDY failed: %d\n", err); } sq->cev_next_state = MLX5E_CEV_STATE_INITIAL; WRITE_ONCE(sq->running, 1); } static void mlx5e_enable_tx_dma(struct mlx5e_channel *ch) { int i; for (i = 0; i < ch->priv->num_tc; i++) mlx5e_resume_sq(&ch->sq[i]); } static void mlx5e_disable_rx_dma(struct mlx5e_channel *ch) { struct mlx5e_rq *rq = &ch->rq; struct epoch_tracker et; int err; mtx_lock(&rq->mtx); rq->enabled = 0; callout_stop(&rq->watchdog); mtx_unlock(&rq->mtx); err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); if (err != 0) { mlx5_en_err(rq->ifp, "mlx5e_modify_rq() from RDY to RST failed: %d\n", err); } while (!mlx5_wq_ll_is_empty(&rq->wq)) { msleep(1); NET_EPOCH_ENTER(et); rq->cq.mcq.comp(&rq->cq.mcq, NULL); NET_EPOCH_EXIT(et); } /* * Transitioning into RST state will allow the FW to track less ERR state queues, * thus reducing the recv queue flushing time */ err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST); if (err != 0) { mlx5_en_err(rq->ifp, "mlx5e_modify_rq() from ERR to RST failed: %d\n", err); } } static void mlx5e_enable_rx_dma(struct mlx5e_channel *ch) { struct mlx5e_rq *rq = &ch->rq; struct epoch_tracker et; int err; rq->wq.wqe_ctr = 0; mlx5_wq_ll_update_db_record(&rq->wq); err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err != 0) { mlx5_en_err(rq->ifp, "mlx5e_modify_rq() from RST to RDY failed: %d\n", err); } rq->enabled = 1; NET_EPOCH_ENTER(et); rq->cq.mcq.comp(&rq->cq.mcq, NULL); NET_EPOCH_EXIT(et); } void mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value) { int i; if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) return; for (i = 0; i < priv->params.num_channels; i++) { if (value) mlx5e_disable_tx_dma(&priv->channel[i]); else mlx5e_enable_tx_dma(&priv->channel[i]); } } void mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value) { int i; if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) return; for (i = 0; i < priv->params.num_channels; i++) { if (value) mlx5e_disable_rx_dma(&priv->channel[i]); else mlx5e_enable_rx_dma(&priv->channel[i]); } } static void mlx5e_add_hw_stats(struct mlx5e_priv *priv) { SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw), OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, priv, 0, sysctl_firmware, "A", "HCA firmware version"); SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw), OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0, "Board ID"); } static int mlx5e_sysctl_tx_priority_flow_control(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; uint8_t temp[MLX5E_MAX_PRIORITY]; uint32_t tx_pfc; int err; int i; PRIV_LOCK(priv); tx_pfc = priv->params.tx_priority_flow_control; for (i = 0; i != MLX5E_MAX_PRIORITY; i++) temp[i] = (tx_pfc >> i) & 1; err = SYSCTL_OUT(req, temp, MLX5E_MAX_PRIORITY); if (err || !req->newptr) goto done; err = SYSCTL_IN(req, temp, MLX5E_MAX_PRIORITY); if (err) goto done; priv->params.tx_priority_flow_control = 0; /* range check input value */ for (i = 0; i != MLX5E_MAX_PRIORITY; i++) { if (temp[i] > 1) { err = ERANGE; goto done; } priv->params.tx_priority_flow_control |= (temp[i] << i); } /* check if update is required */ if (tx_pfc != priv->params.tx_priority_flow_control) err = -mlx5e_set_port_pfc(priv); done: if (err != 0) priv->params.tx_priority_flow_control= tx_pfc; PRIV_UNLOCK(priv); return (err); } static int mlx5e_sysctl_rx_priority_flow_control(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; uint8_t temp[MLX5E_MAX_PRIORITY]; uint32_t rx_pfc; int err; int i; PRIV_LOCK(priv); rx_pfc = priv->params.rx_priority_flow_control; for (i = 0; i != MLX5E_MAX_PRIORITY; i++) temp[i] = (rx_pfc >> i) & 1; err = SYSCTL_OUT(req, temp, MLX5E_MAX_PRIORITY); if (err || !req->newptr) goto done; err = SYSCTL_IN(req, temp, MLX5E_MAX_PRIORITY); if (err) goto done; priv->params.rx_priority_flow_control = 0; /* range check input value */ for (i = 0; i != MLX5E_MAX_PRIORITY; i++) { if (temp[i] > 1) { err = ERANGE; goto done; } priv->params.rx_priority_flow_control |= (temp[i] << i); } /* check if update is required */ if (rx_pfc != priv->params.rx_priority_flow_control) { err = -mlx5e_set_port_pfc(priv); if (err == 0 && priv->sw_is_port_buf_owner) err = mlx5e_update_buf_lossy(priv); } done: if (err != 0) priv->params.rx_priority_flow_control= rx_pfc; PRIV_UNLOCK(priv); return (err); } static void mlx5e_setup_pauseframes(struct mlx5e_priv *priv) { #if (__FreeBSD_version < 1100000) char path[96]; #endif int error; /* enable pauseframes by default */ priv->params.tx_pauseframe_control = 1; priv->params.rx_pauseframe_control = 1; /* disable ports flow control, PFC, by default */ priv->params.tx_priority_flow_control = 0; priv->params.rx_priority_flow_control = 0; #if (__FreeBSD_version < 1100000) /* compute path for sysctl */ snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control", device_get_unit(priv->mdev->pdev->dev.bsddev)); /* try to fetch tunable, if any */ TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control); /* compute path for sysctl */ snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control", device_get_unit(priv->mdev->pdev->dev.bsddev)); /* try to fetch tunable, if any */ TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control); #endif /* register pauseframe SYSCTLs */ SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN, &priv->params.tx_pauseframe_control, 0, "Set to enable TX pause frames. Clear to disable."); SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN, &priv->params.rx_pauseframe_control, 0, "Set to enable RX pause frames. Clear to disable."); /* register priority flow control, PFC, SYSCTLs */ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "tx_priority_flow_control", CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, 0, &mlx5e_sysctl_tx_priority_flow_control, "CU", "Set to enable TX ports flow control frames for priorities 0..7. Clear to disable."); SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "rx_priority_flow_control", CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, 0, &mlx5e_sysctl_rx_priority_flow_control, "CU", "Set to enable RX ports flow control frames for priorities 0..7. Clear to disable."); PRIV_LOCK(priv); /* range check */ priv->params.tx_pauseframe_control = priv->params.tx_pauseframe_control ? 1 : 0; priv->params.rx_pauseframe_control = priv->params.rx_pauseframe_control ? 1 : 0; /* update firmware */ error = mlx5e_set_port_pause_and_pfc(priv); if (error == -EINVAL) { mlx5_en_err(priv->ifp, "Global pauseframes must be disabled before enabling PFC.\n"); priv->params.rx_priority_flow_control = 0; priv->params.tx_priority_flow_control = 0; /* update firmware */ (void) mlx5e_set_port_pause_and_pfc(priv); } PRIV_UNLOCK(priv); } static int mlx5e_ul_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **ppmt) { struct mlx5e_priv *priv; struct mlx5e_channel *pch; priv = ifp->if_softc; if (unlikely(priv->gone || params->hdr.flowtype == M_HASHTYPE_NONE)) { return (EOPNOTSUPP); } else { /* keep this code synced with mlx5e_select_queue() */ u32 ch = priv->params.num_channels; #ifdef RSS u32 temp; if (rss_hash2bucket(params->hdr.flowid, params->hdr.flowtype, &temp) == 0) ch = temp % ch; else #endif ch = (params->hdr.flowid % 128) % ch; /* * NOTE: The channels array is only freed at detach * and it safe to return a pointer to the send tag * inside the channels structure as long as we * reference the priv. */ pch = priv->channel + ch; /* check if send queue is not running */ if (unlikely(pch->sq[0].running == 0)) return (ENXIO); m_snd_tag_ref(&pch->tag); *ppmt = &pch->tag; return (0); } } static int mlx5e_ul_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params) { struct mlx5e_channel *pch = container_of(pmt, struct mlx5e_channel, tag); params->unlimited.max_rate = -1ULL; params->unlimited.queue_level = mlx5e_sq_queue_level(&pch->sq[0]); return (0); } static void mlx5e_ul_snd_tag_free(struct m_snd_tag *pmt) { struct mlx5e_channel *pch = container_of(pmt, struct mlx5e_channel, tag); complete(&pch->completion); } static int mlx5e_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **ppmt) { switch (params->hdr.type) { #ifdef RATELIMIT case IF_SND_TAG_TYPE_RATE_LIMIT: return (mlx5e_rl_snd_tag_alloc(ifp, params, ppmt)); #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: return (mlx5e_tls_snd_tag_alloc(ifp, params, ppmt)); #endif #endif case IF_SND_TAG_TYPE_UNLIMITED: return (mlx5e_ul_snd_tag_alloc(ifp, params, ppmt)); #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS: return (mlx5e_tls_snd_tag_alloc(ifp, params, ppmt)); #endif default: return (EOPNOTSUPP); } } #ifdef RATELIMIT #define NUM_HDWR_RATES_MLX 13 static const uint64_t adapter_rates_mlx[NUM_HDWR_RATES_MLX] = { 135375, /* 1,083,000 */ 180500, /* 1,444,000 */ 270750, /* 2,166,000 */ 361000, /* 2,888,000 */ 541500, /* 4,332,000 */ 721875, /* 5,775,000 */ 1082875, /* 8,663,000 */ 1443875, /* 11,551,000 */ 2165750, /* 17,326,000 */ 2887750, /* 23,102,000 */ 4331625, /* 34,653,000 */ 5775500, /* 46,204,000 */ 8663125 /* 69,305,000 */ }; static void mlx5e_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q) { /* * This function needs updating by the driver maintainer! * For the MLX card there are currently (ConectX-4?) 13 * pre-set rates and others i.e. ConnectX-5, 6, 7?? * * This will change based on later adapters * and this code should be updated to look at ifp * and figure out the specific adapter type * settings i.e. how many rates as well * as if they are fixed (as is shown here) or * if they are dynamic (example chelsio t4). Also if there * is a maximum number of flows that the adapter * can handle that too needs to be updated in * the max_flows field. */ q->rate_table = adapter_rates_mlx; q->flags = RT_IS_FIXED_TABLE; q->max_flows = 0; /* mlx has no limit */ q->number_of_rates = NUM_HDWR_RATES_MLX; q->min_segment_burst = 1; } #endif static void mlx5e_ifm_add(struct mlx5e_priv *priv, int type) { ifmedia_add(&priv->media, type | IFM_ETHER, 0, NULL); ifmedia_add(&priv->media, type | IFM_ETHER | IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL); ifmedia_add(&priv->media, type | IFM_ETHER | IFM_ETH_RXPAUSE, 0, NULL); ifmedia_add(&priv->media, type | IFM_ETHER | IFM_ETH_TXPAUSE, 0, NULL); ifmedia_add(&priv->media, type | IFM_ETHER | IFM_FDX, 0, NULL); ifmedia_add(&priv->media, type | IFM_ETHER | IFM_FDX | IFM_ETH_RXPAUSE, 0, NULL); ifmedia_add(&priv->media, type | IFM_ETHER | IFM_FDX | IFM_ETH_TXPAUSE, 0, NULL); ifmedia_add(&priv->media, type | IFM_ETHER | IFM_FDX | IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL); } static void * mlx5e_create_ifp(struct mlx5_core_dev *mdev) { struct ifnet *ifp; struct mlx5e_priv *priv; u8 dev_addr[ETHER_ADDR_LEN] __aligned(4); struct sysctl_oid_list *child; int ncv = mdev->priv.eq_table.num_comp_vectors; char unit[16]; struct pfil_head_args pa; int err; u32 eth_proto_cap; u32 out[MLX5_ST_SZ_DW(ptys_reg)]; bool ext; struct media media_entry = {}; if (mlx5e_check_required_hca_cap(mdev)) { mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n"); return (NULL); } /* * Try to allocate the priv and make room for worst-case * number of channel structures: */ priv = malloc_domainset(sizeof(*priv) + (sizeof(priv->channel[0]) * mdev->priv.eq_table.num_comp_vectors), M_MLX5EN, mlx5_dev_domainset(mdev), M_WAITOK | M_ZERO); ifp = priv->ifp = if_alloc_dev(IFT_ETHER, mdev->pdev->dev.bsddev); if (ifp == NULL) { mlx5_core_err(mdev, "if_alloc() failed\n"); goto err_free_priv; } /* setup all static fields */ if (mlx5e_priv_static_init(priv, mdev, mdev->priv.eq_table.num_comp_vectors)) { mlx5_core_err(mdev, "mlx5e_priv_static_init() failed\n"); goto err_free_ifp; } ifp->if_softc = priv; if_initname(ifp, "mce", device_get_unit(mdev->pdev->dev.bsddev)); ifp->if_mtu = ETHERMTU; ifp->if_init = mlx5e_open; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | IFF_KNOWSEPOCH; ifp->if_ioctl = mlx5e_ioctl; ifp->if_transmit = mlx5e_xmit; ifp->if_qflush = if_qflush; #if (__FreeBSD_version >= 1100000) ifp->if_get_counter = mlx5e_get_counter; #endif ifp->if_snd.ifq_maxlen = ifqmaxlen; /* * Set driver features */ ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6; ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER; ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_LRO; ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO; ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP; ifp->if_capabilities |= IFCAP_MEXTPG; ifp->if_capabilities |= IFCAP_TXTLS4 | IFCAP_TXTLS6; #ifdef RATELIMIT ifp->if_capabilities |= IFCAP_TXRTLMT | IFCAP_TXTLS_RTLMT; #endif ifp->if_capabilities |= IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO; ifp->if_snd_tag_alloc = mlx5e_snd_tag_alloc; #ifdef RATELIMIT ifp->if_ratelimit_query = mlx5e_ratelimit_query; #endif /* set TSO limits so that we don't have to drop TX packets */ ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */; ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE; ifp->if_capenable = ifp->if_capabilities; ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (ifp->if_capabilities & IFCAP_VXLAN_HWCSUM) ifp->if_hwassist |= CSUM_INNER_IP6_UDP | CSUM_INNER_IP6_TCP | CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP | CSUM_ENCAP_VXLAN; if (ifp->if_capabilities & IFCAP_VXLAN_HWTSO) ifp->if_hwassist |= CSUM_INNER_IP6_TSO | CSUM_INNER_IP_TSO; /* ifnet sysctl tree */ sysctl_ctx_init(&priv->sysctl_ctx); priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev), OID_AUTO, ifp->if_dname, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "MLX5 ethernet - interface name"); if (priv->sysctl_ifnet == NULL) { mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n"); goto err_free_sysctl; } snprintf(unit, sizeof(unit), "%d", ifp->if_dunit); priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, unit, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "MLX5 ethernet - interface unit"); if (priv->sysctl_ifnet == NULL) { mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n"); goto err_free_sysctl; } /* HW sysctl tree */ child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev)); priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child, OID_AUTO, "hw", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "MLX5 ethernet dev hw"); if (priv->sysctl_hw == NULL) { mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n"); goto err_free_sysctl; } err = mlx5e_build_ifp_priv(mdev, priv, ncv); if (err) { mlx5_core_err(mdev, "mlx5e_build_ifp_priv() failed (%d)\n", err); goto err_free_sysctl; } /* reuse mlx5core's watchdog workqueue */ priv->wq = mdev->priv.health.wq_watchdog; err = mlx5_core_alloc_pd(mdev, &priv->pdn, 0); if (err) { mlx5_en_err(ifp, "mlx5_core_alloc_pd failed, %d\n", err); goto err_free_wq; } err = mlx5_alloc_transport_domain(mdev, &priv->tdn, 0); if (err) { mlx5_en_err(ifp, "mlx5_alloc_transport_domain failed, %d\n", err); goto err_dealloc_pd; } err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr); if (err) { mlx5_en_err(ifp, "mlx5e_create_mkey failed, %d\n", err); goto err_dealloc_transport_domain; } mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr); /* check if we should generate a random MAC address */ if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 && is_zero_ether_addr(dev_addr)) { random_ether_addr(dev_addr); mlx5_en_err(ifp, "Assigned random MAC address\n"); } err = mlx5e_rl_init(priv); if (err) { mlx5_en_err(ifp, "mlx5e_rl_init failed, %d\n", err); goto err_create_mkey; } err = mlx5e_tls_init(priv); if (err) { if_printf(ifp, "%s: mlx5e_tls_init failed\n", __func__); goto err_rl_init; } /* set default MTU */ mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu); /* Set default media status */ priv->media_status_last = IFM_AVALID; priv->media_active_last = IFM_ETHER | IFM_AUTO | IFM_FDX; /* setup default pauseframes configuration */ mlx5e_setup_pauseframes(priv); /* Setup supported medias */ if (!mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1)) { ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_capability); } else { ext = false; eth_proto_cap = 0; mlx5_en_err(ifp, "Query port media capability failed, %d\n", err); } ifmedia_init(&priv->media, IFM_IMASK, mlx5e_media_change, mlx5e_media_status); if (ext) { for (unsigned i = 0; i != MLX5E_EXT_LINK_SPEEDS_NUMBER; i++) { /* check if hardware has the right capability */ if (MLX5E_PROT_MASK(i) & ~eth_proto_cap) continue; for (unsigned j = 0; j != MLX5E_CABLE_TYPE_NUMBER; j++) { media_entry = mlx5e_ext_mode_table[i][j]; if (media_entry.subtype == 0) continue; /* check if this subtype was already added */ for (unsigned k = 0; k != i; k++) { /* check if hardware has the right capability */ if (MLX5E_PROT_MASK(k) & ~eth_proto_cap) continue; for (unsigned m = 0; m != MLX5E_CABLE_TYPE_NUMBER; m++) { if (media_entry.subtype == mlx5e_ext_mode_table[k][m].subtype) goto skip_ext_media; } } mlx5e_ifm_add(priv, media_entry.subtype); skip_ext_media:; } } } else { for (unsigned i = 0; i != MLX5E_LINK_SPEEDS_NUMBER; i++) { media_entry = mlx5e_mode_table[i]; if (media_entry.subtype == 0) continue; if (MLX5E_PROT_MASK(i) & ~eth_proto_cap) continue; /* check if this subtype was already added */ for (unsigned k = 0; k != i; k++) { if (media_entry.subtype == mlx5e_mode_table[k].subtype) goto skip_media; } mlx5e_ifm_add(priv, media_entry.subtype); /* NOTE: 10G ER and LR shares the same entry */ if (media_entry.subtype == IFM_10G_ER) mlx5e_ifm_add(priv, IFM_10G_LR); skip_media:; } } mlx5e_ifm_add(priv, IFM_AUTO); /* Set autoselect by default */ ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX | IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE); DEBUGNET_SET(ifp, mlx5_en); ether_ifattach(ifp, dev_addr); /* Register for VLAN events */ priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST); priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST); /* Register for VxLAN events */ priv->vxlan_start = EVENTHANDLER_REGISTER(vxlan_start, mlx5e_vxlan_start, priv, EVENTHANDLER_PRI_ANY); priv->vxlan_stop = EVENTHANDLER_REGISTER(vxlan_stop, mlx5e_vxlan_stop, priv, EVENTHANDLER_PRI_ANY); /* Link is down by default */ if_link_state_change(ifp, LINK_STATE_DOWN); mlx5e_enable_async_events(priv); mlx5e_add_hw_stats(priv); mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM, priv->stats.vport.arg); mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM, priv->stats.pport.arg); mlx5e_create_ethtool(priv); mtx_lock(&priv->async_events_mtx); mlx5e_update_stats(priv); mtx_unlock(&priv->async_events_mtx); SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "rx_clbr_done", CTLFLAG_RD, &priv->clbr_done, 0, "RX timestamps calibration state"); callout_init(&priv->tstmp_clbr, 1); mlx5e_reset_calibration_callout(priv); pa.pa_version = PFIL_VERSION; pa.pa_flags = PFIL_IN; pa.pa_type = PFIL_TYPE_ETHERNET; pa.pa_headname = ifp->if_xname; priv->pfil = pfil_head_register(&pa); return (priv); err_rl_init: mlx5e_rl_cleanup(priv); err_create_mkey: mlx5_core_destroy_mkey(priv->mdev, &priv->mr); err_dealloc_transport_domain: mlx5_dealloc_transport_domain(mdev, priv->tdn, 0); err_dealloc_pd: mlx5_core_dealloc_pd(mdev, priv->pdn, 0); err_free_wq: flush_workqueue(priv->wq); err_free_sysctl: sysctl_ctx_free(&priv->sysctl_ctx); if (priv->sysctl_debug) sysctl_ctx_free(&priv->stats.port_stats_debug.ctx); mlx5e_priv_static_destroy(priv, mdev, mdev->priv.eq_table.num_comp_vectors); err_free_ifp: if_free(ifp); err_free_priv: free(priv, M_MLX5EN); return (NULL); } static void mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv) { struct mlx5e_priv *priv = vpriv; struct ifnet *ifp = priv->ifp; /* don't allow more IOCTLs */ priv->gone = 1; /* XXX wait a bit to allow IOCTL handlers to complete */ pause("W", hz); #ifdef RATELIMIT /* * The kernel can have reference(s) via the m_snd_tag's into * the ratelimit channels, and these must go away before * detaching: */ while (READ_ONCE(priv->rl.stats.tx_active_connections) != 0) { mlx5_en_err(priv->ifp, "Waiting for all ratelimit connections to terminate\n"); pause("W", hz); } #endif #ifdef KERN_TLS /* wait for all TLS tags to get freed */ while (priv->tls.init != 0 && uma_zone_get_cur(priv->tls.zone) != 0) { mlx5_en_err(priv->ifp, "Waiting for all TLS connections to terminate\n"); pause("W", hz); } #endif /* wait for all unlimited send tags to complete */ mlx5e_priv_wait_for_completion(priv, mdev->priv.eq_table.num_comp_vectors); /* stop watchdog timer */ callout_drain(&priv->watchdog); callout_drain(&priv->tstmp_clbr); if (priv->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach); if (priv->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach); if (priv->vxlan_start != NULL) EVENTHANDLER_DEREGISTER(vxlan_start, priv->vxlan_start); if (priv->vxlan_stop != NULL) EVENTHANDLER_DEREGISTER(vxlan_stop, priv->vxlan_stop); /* make sure device gets closed */ PRIV_LOCK(priv); mlx5e_close_locked(ifp); PRIV_UNLOCK(priv); /* deregister pfil */ if (priv->pfil != NULL) { pfil_head_unregister(priv->pfil); priv->pfil = NULL; } /* unregister device */ ifmedia_removeall(&priv->media); ether_ifdetach(ifp); mlx5e_tls_cleanup(priv); mlx5e_rl_cleanup(priv); /* destroy all remaining sysctl nodes */ sysctl_ctx_free(&priv->stats.vport.ctx); sysctl_ctx_free(&priv->stats.pport.ctx); if (priv->sysctl_debug) sysctl_ctx_free(&priv->stats.port_stats_debug.ctx); sysctl_ctx_free(&priv->sysctl_ctx); mlx5_core_destroy_mkey(priv->mdev, &priv->mr); mlx5_dealloc_transport_domain(priv->mdev, priv->tdn, 0); mlx5_core_dealloc_pd(priv->mdev, priv->pdn, 0); mlx5e_disable_async_events(priv); flush_workqueue(priv->wq); mlx5e_priv_static_destroy(priv, mdev, mdev->priv.eq_table.num_comp_vectors); if_free(ifp); free(priv, M_MLX5EN); } #ifdef DEBUGNET static void mlx5_en_debugnet_init(struct ifnet *dev, int *nrxr, int *ncl, int *clsize) { struct mlx5e_priv *priv = if_getsoftc(dev); PRIV_LOCK(priv); *nrxr = priv->params.num_channels; *ncl = DEBUGNET_MAX_IN_FLIGHT; *clsize = MLX5E_MAX_RX_BYTES; PRIV_UNLOCK(priv); } static void mlx5_en_debugnet_event(struct ifnet *dev, enum debugnet_ev event) { } static int mlx5_en_debugnet_transmit(struct ifnet *dev, struct mbuf *m) { struct mlx5e_priv *priv = if_getsoftc(dev); struct mlx5e_sq *sq; int err; if ((if_getdrvflags(dev) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || (priv->media_status_last & IFM_ACTIVE) == 0) return (ENOENT); sq = &priv->channel[0].sq[0]; if (sq->running == 0) { m_freem(m); return (ENOENT); } if (mlx5e_sq_xmit(sq, &m) != 0) { m_freem(m); err = ENOBUFS; } else { err = 0; } if (likely(sq->doorbell.d64 != 0)) { mlx5e_tx_notify_hw(sq, sq->doorbell.d32); sq->doorbell.d64 = 0; } return (err); } static int mlx5_en_debugnet_poll(struct ifnet *dev, int count) { struct mlx5e_priv *priv = if_getsoftc(dev); if ((if_getdrvflags(dev) & IFF_DRV_RUNNING) == 0 || (priv->media_status_last & IFM_ACTIVE) == 0) return (ENOENT); mlx5_poll_interrupts(priv->mdev); return (0); } #endif /* DEBUGNET */ static void * mlx5e_get_ifp(void *vpriv) { struct mlx5e_priv *priv = vpriv; return (priv->ifp); } static struct mlx5_interface mlx5e_interface = { .add = mlx5e_create_ifp, .remove = mlx5e_destroy_ifp, .event = mlx5e_async_event, .protocol = MLX5_INTERFACE_PROTOCOL_ETH, .get_dev = mlx5e_get_ifp, }; void mlx5e_init(void) { mlx5_register_interface(&mlx5e_interface); } void mlx5e_cleanup(void) { mlx5_unregister_interface(&mlx5e_interface); } module_init_order(mlx5e_init, SI_ORDER_SIXTH); module_exit_order(mlx5e_cleanup, SI_ORDER_SIXTH); #if (__FreeBSD_version >= 1100000) MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1); #endif MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1); MODULE_VERSION(mlx5en, 1); diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_port_buffer.c b/sys/dev/mlx5/mlx5_en/mlx5_en_port_buffer.c index 6c7ae5a297cc..078e31cf1949 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_port_buffer.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_port_buffer.c @@ -1,341 +1,341 @@ /*- * Copyright (c) 2018-2019 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_rss.h" #include "opt_ratelimit.h" -#include "port_buffer.h" +#include #define MLX5E_MAX_PORT_MTU 9216 int mlx5e_port_query_buffer(struct mlx5e_priv *priv, struct mlx5e_port_buffer *port_buffer) { struct mlx5_core_dev *mdev = priv->mdev; int sz = MLX5_ST_SZ_BYTES(pbmc_reg); u32 total_used = 0; void *buffer; void *out; int err; int i; out = kzalloc(sz, GFP_KERNEL); if (!out) return -ENOMEM; err = mlx5e_port_query_pbmc(mdev, out); if (err) goto out; for (i = 0; i < MLX5E_MAX_BUFFER; i++) { buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]); port_buffer->buffer[i].lossy = MLX5_GET(bufferx_reg, buffer, lossy); port_buffer->buffer[i].epsb = MLX5_GET(bufferx_reg, buffer, epsb); port_buffer->buffer[i].size = MLX5_GET(bufferx_reg, buffer, size) << MLX5E_BUFFER_CELL_SHIFT; port_buffer->buffer[i].xon = MLX5_GET(bufferx_reg, buffer, xon_threshold) << MLX5E_BUFFER_CELL_SHIFT; port_buffer->buffer[i].xoff = MLX5_GET(bufferx_reg, buffer, xoff_threshold) << MLX5E_BUFFER_CELL_SHIFT; total_used += port_buffer->buffer[i].size; mlx5e_dbg(HW, priv, "buffer %d: size=%d, xon=%d, xoff=%d, epsb=%d, lossy=%d\n", i, port_buffer->buffer[i].size, port_buffer->buffer[i].xon, port_buffer->buffer[i].xoff, port_buffer->buffer[i].epsb, port_buffer->buffer[i].lossy); } port_buffer->port_buffer_size = MLX5_GET(pbmc_reg, out, port_buffer_size) << MLX5E_BUFFER_CELL_SHIFT; port_buffer->spare_buffer_size = port_buffer->port_buffer_size - total_used; mlx5e_dbg(HW, priv, "total buffer size=%d, spare buffer size=%d\n", port_buffer->port_buffer_size, port_buffer->spare_buffer_size); out: kfree(out); return err; } static int port_set_buffer(struct mlx5e_priv *priv, struct mlx5e_port_buffer *port_buffer) { struct mlx5_core_dev *mdev = priv->mdev; int sz = MLX5_ST_SZ_BYTES(pbmc_reg); void *buffer; void *in; int err; int i; in = kzalloc(sz, GFP_KERNEL); if (!in) return -ENOMEM; err = mlx5e_port_query_pbmc(mdev, in); if (err) goto out; for (i = 0; i < MLX5E_MAX_BUFFER; i++) { buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]); MLX5_SET(bufferx_reg, buffer, size, port_buffer->buffer[i].size >> MLX5E_BUFFER_CELL_SHIFT); MLX5_SET(bufferx_reg, buffer, lossy, port_buffer->buffer[i].lossy); MLX5_SET(bufferx_reg, buffer, xoff_threshold, port_buffer->buffer[i].xoff >> MLX5E_BUFFER_CELL_SHIFT); MLX5_SET(bufferx_reg, buffer, xon_threshold, port_buffer->buffer[i].xon >> MLX5E_BUFFER_CELL_SHIFT); } err = mlx5e_port_set_pbmc(mdev, in); out: kfree(in); return err; } /* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) */ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) { u32 speed; u32 xoff; int err; err = mlx5e_port_linkspeed(priv->mdev, &speed); if (err) { mlx5_core_warn(priv->mdev, "cannot get port speed\n"); speed = SPEED_40000; } speed = max_t(u32, speed, SPEED_40000); xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100; mlx5e_dbg(HW, priv, "%s: xoff=%d\n", __func__, xoff); return xoff; } static int update_xoff_threshold(struct mlx5e_priv *priv, struct mlx5e_port_buffer *port_buffer, u32 xoff) { int i; for (i = 0; i < MLX5E_MAX_BUFFER; i++) { if (port_buffer->buffer[i].lossy) { port_buffer->buffer[i].xoff = 0; port_buffer->buffer[i].xon = 0; continue; } if (port_buffer->buffer[i].size < (xoff + MLX5E_MAX_PORT_MTU + (1 << MLX5E_BUFFER_CELL_SHIFT))) { mlx5_en_info(priv->ifp, "non-lossy buffer %d size %d less than xoff threshold %d\n", i, port_buffer->buffer[i].size, xoff + MLX5E_MAX_PORT_MTU + (1 << MLX5E_BUFFER_CELL_SHIFT)); return -ENOMEM; } port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff; port_buffer->buffer[i].xon = port_buffer->buffer[i].xoff - MLX5E_MAX_PORT_MTU; } return 0; } /** * update_buffer_lossy() * mtu: device's MTU * pfc_en: current pfc configuration * buffer: current prio to buffer mapping * xoff: xoff value * port_buffer: port receive buffer configuration * change: * * Update buffer configuration based on pfc configuration and priority * to buffer mapping. * Buffer's lossy bit is changed to: * lossless if there is at least one PFC enabled priority mapped to this buffer * lossy if all priorities mapped to this buffer are PFC disabled * * Return: * Return 0 if no error. * Set change to true if buffer configuration is modified. */ static int update_buffer_lossy(struct mlx5e_priv *priv, unsigned int mtu, u8 pfc_en, u8 *buffer, u32 xoff, struct mlx5e_port_buffer *port_buffer, bool *change) { bool changed = false; u8 lossy_count; u8 prio_count; u8 lossy; int prio; int err; int i; for (i = 0; i < MLX5E_MAX_BUFFER; i++) { prio_count = 0; lossy_count = 0; for (prio = 0; prio < MLX5E_MAX_PRIORITY; prio++) { if (buffer[prio] != i) continue; prio_count++; lossy_count += !(pfc_en & (1 << prio)); } if (lossy_count == prio_count) lossy = 1; else /* lossy_count < prio_count */ lossy = 0; if (lossy != port_buffer->buffer[i].lossy) { port_buffer->buffer[i].lossy = lossy; changed = true; } } if (changed) { err = update_xoff_threshold(priv, port_buffer, xoff); if (err) return err; *change = true; } return 0; } int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, u32 change, unsigned int mtu, struct ieee_pfc *pfc, u32 *buffer_size, u8 *prio2buffer) { struct mlx5e_port_buffer port_buffer; u32 xoff = calculate_xoff(priv, mtu); bool update_prio2buffer = false; u8 buffer[MLX5E_MAX_PRIORITY]; bool update_buffer = false; u32 total_used = 0; u8 curr_pfc_en; int err; int i; mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change); err = mlx5e_port_query_buffer(priv, &port_buffer); if (err) return err; if (change & MLX5E_PORT_BUFFER_CABLE_LEN) { update_buffer = true; err = update_xoff_threshold(priv, &port_buffer, xoff); if (err) return err; } if (change & MLX5E_PORT_BUFFER_PFC) { err = mlx5e_port_query_priority2buffer(priv->mdev, buffer); if (err) return err; priv->sw_is_port_buf_owner = true; err = update_buffer_lossy(priv, mtu, pfc->pfc_en, buffer, xoff, &port_buffer, &update_buffer); if (err) return err; } if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) { update_prio2buffer = true; err = mlx5_query_port_pfc(priv->mdev, &curr_pfc_en, NULL); if (err) return err; err = update_buffer_lossy(priv, mtu, curr_pfc_en, prio2buffer, xoff, &port_buffer, &update_buffer); if (err) return err; } if (change & MLX5E_PORT_BUFFER_SIZE) { for (i = 0; i < MLX5E_MAX_BUFFER; i++) { mlx5e_dbg(HW, priv, "%s: buffer[%d]=%d\n", __func__, i, buffer_size[i]); if (!port_buffer.buffer[i].lossy && !buffer_size[i]) { mlx5e_dbg(HW, priv, "%s: lossless buffer[%d] size cannot be zero\n", __func__, i); return -EINVAL; } port_buffer.buffer[i].size = buffer_size[i]; total_used += buffer_size[i]; } mlx5e_dbg(HW, priv, "%s: total buffer requested=%d\n", __func__, total_used); if (total_used > port_buffer.port_buffer_size) return -EINVAL; update_buffer = true; err = update_xoff_threshold(priv, &port_buffer, xoff); if (err) return err; } /* Need to update buffer configuration if xoff value is changed */ if (!update_buffer && xoff != priv->dcbx.xoff) { update_buffer = true; err = update_xoff_threshold(priv, &port_buffer, xoff); if (err) return err; } priv->dcbx.xoff = xoff; /* Apply the settings */ if (update_buffer) { priv->sw_is_port_buf_owner = true; err = port_set_buffer(priv, &port_buffer); if (err) return err; } if (update_prio2buffer) { priv->sw_is_port_buf_owner = true; err = mlx5e_port_set_priority2buffer(priv->mdev, prio2buffer); } return err; } diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c index ede6b5019cff..09a894608e6d 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c @@ -1,1584 +1,1584 @@ /*- * Copyright (c) 2016-2020 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_rss.h" #include "opt_ratelimit.h" -#include "en.h" +#include #ifdef RATELIMIT static int mlx5e_rl_open_workers(struct mlx5e_priv *); static void mlx5e_rl_close_workers(struct mlx5e_priv *); static int mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS); static void mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *, unsigned x, struct sysctl_oid *, const char *name, const char *desc); static void mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, struct sysctl_oid *node, const char *name, const char *desc); static int mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *, uint64_t value); static int mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *, uint64_t value); static if_snd_tag_modify_t mlx5e_rl_snd_tag_modify; static if_snd_tag_query_t mlx5e_rl_snd_tag_query; static if_snd_tag_free_t mlx5e_rl_snd_tag_free; static const struct if_snd_tag_sw mlx5e_rl_snd_tag_sw = { .snd_tag_modify = mlx5e_rl_snd_tag_modify, .snd_tag_query = mlx5e_rl_snd_tag_query, .snd_tag_free = mlx5e_rl_snd_tag_free, .type = IF_SND_TAG_TYPE_RATE_LIMIT }; static void mlx5e_rl_build_sq_param(struct mlx5e_rl_priv_data *rl, struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size); MLX5_SET(wq, wq, log_wq_sz, log_sq_size); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, pd, rl->priv->pdn); param->wq.linear = 1; } static void mlx5e_rl_build_cq_param(struct mlx5e_rl_priv_data *rl, struct mlx5e_cq_param *param) { void *cqc = param->cqc; uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size); MLX5_SET(cqc, cqc, log_cq_size, log_sq_size); MLX5_SET(cqc, cqc, cq_period, rl->param.tx_coalesce_usecs); MLX5_SET(cqc, cqc, cq_max_count, rl->param.tx_coalesce_pkts); MLX5_SET(cqc, cqc, uar_page, rl->priv->mdev->priv.uar->index); switch (rl->param.tx_coalesce_mode) { case 0: MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; default: if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_start_from_cqe)) MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); else MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; } } static void mlx5e_rl_build_channel_param(struct mlx5e_rl_priv_data *rl, struct mlx5e_rl_channel_param *cparam) { memset(cparam, 0, sizeof(*cparam)); mlx5e_rl_build_sq_param(rl, &cparam->sq); mlx5e_rl_build_cq_param(rl, &cparam->cq); } static int mlx5e_rl_create_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, struct mlx5e_sq_param *param, int ix) { struct mlx5_core_dev *mdev = priv->mdev; void *sqc = param->sqc; void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); int err; /* Create DMA descriptor TAG */ if ((err = -bus_dma_tag_create( bus_get_dma_tag(mdev->pdev->dev.bsddev), 1, /* any alignment */ 0, /* no boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */ MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */ MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &sq->dma_tag))) goto done; sq->mkey_be = cpu_to_be32(priv->mr.key); sq->ifp = priv->ifp; sq->priv = priv; err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) goto err_free_dma_tag; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; err = mlx5e_alloc_sq_db(sq); if (err) goto err_sq_wq_destroy; mlx5e_update_sq_inline(sq); return (0); err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); err_free_dma_tag: bus_dma_tag_destroy(sq->dma_tag); done: return (err); } static void mlx5e_rl_destroy_sq(struct mlx5e_sq *sq) { mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); bus_dma_tag_destroy(sq->dma_tag); } static int mlx5e_rl_open_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, struct mlx5e_sq_param *param, int ix) { int err; err = mlx5e_rl_create_sq(priv, sq, param, ix); if (err) return (err); err = mlx5e_enable_sq(sq, param, &priv->channel[ix].bfreg, priv->rl.tisn); if (err) goto err_destroy_sq; err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); if (err) goto err_disable_sq; WRITE_ONCE(sq->running, 1); return (0); err_disable_sq: mlx5e_disable_sq(sq); err_destroy_sq: mlx5e_rl_destroy_sq(sq); return (err); } static void mlx5e_rl_chan_mtx_init(struct mlx5e_priv *priv, struct mlx5e_sq *sq) { mtx_init(&sq->lock, "mlx5tx-rl", NULL, MTX_DEF); mtx_init(&sq->comp_lock, "mlx5comp-rl", NULL, MTX_DEF); callout_init_mtx(&sq->cev_callout, &sq->lock, 0); sq->cev_factor = priv->rl.param.tx_completion_fact; /* ensure the TX completion event factor is not zero */ if (sq->cev_factor == 0) sq->cev_factor = 1; } static int mlx5e_rl_open_channel(struct mlx5e_rl_worker *rlw, int eq_ix, struct mlx5e_rl_channel_param *cparam, struct mlx5e_sq *volatile *ppsq) { struct mlx5e_priv *priv = rlw->priv; struct mlx5e_sq *sq; int err; sq = malloc(sizeof(*sq), M_MLX5EN, M_WAITOK | M_ZERO); /* init mutexes */ mlx5e_rl_chan_mtx_init(priv, sq); /* open TX completion queue */ err = mlx5e_open_cq(priv, &cparam->cq, &sq->cq, &mlx5e_tx_cq_comp, eq_ix); if (err) goto err_free; err = mlx5e_rl_open_sq(priv, sq, &cparam->sq, eq_ix); if (err) goto err_close_tx_cq; /* store TX channel pointer */ *ppsq = sq; /* poll TX queue initially */ sq->cq.mcq.comp(&sq->cq.mcq, NULL); return (0); err_close_tx_cq: mlx5e_close_cq(&sq->cq); err_free: /* destroy mutexes */ mtx_destroy(&sq->lock); mtx_destroy(&sq->comp_lock); free(sq, M_MLX5EN); atomic_add_64(&priv->rl.stats.tx_allocate_resource_failure, 1ULL); return (err); } static void mlx5e_rl_close_channel(struct mlx5e_sq *volatile *ppsq) { struct mlx5e_sq *sq = *ppsq; /* check if channel is already closed */ if (sq == NULL) return; /* ensure channel pointer is no longer used */ *ppsq = NULL; /* teardown and destroy SQ */ mlx5e_drain_sq(sq); mlx5e_disable_sq(sq); mlx5e_rl_destroy_sq(sq); /* close CQ */ mlx5e_close_cq(&sq->cq); /* destroy mutexes */ mtx_destroy(&sq->lock); mtx_destroy(&sq->comp_lock); free(sq, M_MLX5EN); } static void mlx5e_rl_sync_tx_completion_fact(struct mlx5e_rl_priv_data *rl) { /* * Limit the maximum distance between completion events to * half of the currently set TX queue size. * * The maximum number of queue entries a single IP packet can * consume is given by MLX5_SEND_WQE_MAX_WQEBBS. * * The worst case max value is then given as below: */ uint64_t max = rl->param.tx_queue_size / (2 * MLX5_SEND_WQE_MAX_WQEBBS); /* * Update the maximum completion factor value in case the * tx_queue_size field changed. Ensure we don't overflow * 16-bits. */ if (max < 1) max = 1; else if (max > 65535) max = 65535; rl->param.tx_completion_fact_max = max; /* * Verify that the current TX completion factor is within the * given limits: */ if (rl->param.tx_completion_fact < 1) rl->param.tx_completion_fact = 1; else if (rl->param.tx_completion_fact > max) rl->param.tx_completion_fact = max; } static int mlx5e_rl_modify_sq(struct mlx5e_sq *sq, uint16_t rl_index) { struct mlx5e_priv *priv = sq->priv; struct mlx5_core_dev *mdev = priv->mdev; void *in; void *sqc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(modify_sq_in); in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); MLX5_SET(modify_sq_in, in, sqn, sq->sqn); MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RDY); MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY); MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); err = mlx5_core_modify_sq(mdev, in, inlen); kvfree(in); return (err); } /* * This function will search the configured rate limit table for the * best match to avoid that a single socket based application can * allocate all the available hardware rates. If the user selected * rate deviates too much from the closes rate available in the rate * limit table, unlimited rate will be selected. */ static uint64_t mlx5e_rl_find_best_rate_locked(struct mlx5e_rl_priv_data *rl, uint64_t user_rate) { uint64_t distance = -1ULL; uint64_t diff; uint64_t retval = 0; /* unlimited */ uint64_t x; /* search for closest rate */ for (x = 0; x != rl->param.tx_rates_def; x++) { uint64_t rate = rl->rate_limit_table[x]; if (rate == 0) continue; if (rate > user_rate) diff = rate - user_rate; else diff = user_rate - rate; /* check if distance is smaller than previous rate */ if (diff < distance) { distance = diff; retval = rate; } } /* range check for multiplication below */ if (user_rate > rl->param.tx_limit_max) user_rate = rl->param.tx_limit_max; /* fallback to unlimited, if rate deviates too much */ if (distance > howmany(user_rate * rl->param.tx_allowed_deviation, 1000ULL)) retval = 0; return (retval); } /* * This function sets the requested rate for a rate limit channel, in * bits per second. The requested rate will be filtered through the * find best rate function above. */ static int mlx5e_rlw_channel_set_rate_locked(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, uint64_t rate) { struct mlx5e_rl_priv_data *rl = &rlw->priv->rl; struct mlx5e_sq *sq; uint64_t temp; uint16_t index; uint16_t burst; int error; if (rate != 0) { MLX5E_RL_WORKER_UNLOCK(rlw); MLX5E_RL_RLOCK(rl); /* get current burst size in bytes */ temp = rl->param.tx_burst_size * MLX5E_SW2HW_MTU(rlw->priv->ifp->if_mtu); /* limit burst size to 64K currently */ if (temp > 65535) temp = 65535; burst = temp; /* find best rate */ rate = mlx5e_rl_find_best_rate_locked(rl, rate); MLX5E_RL_RUNLOCK(rl); if (rate == 0) { /* rate doesn't exist, fallback to unlimited */ index = 0; rate = 0; atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL); } else { /* get a reference on the new rate */ error = -mlx5_rl_add_rate(rlw->priv->mdev, howmany(rate, 1000), burst, &index); if (error != 0) { /* adding rate failed, fallback to unlimited */ index = 0; rate = 0; atomic_add_64(&rlw->priv->rl.stats.tx_add_new_rate_failure, 1ULL); } } MLX5E_RL_WORKER_LOCK(rlw); } else { index = 0; burst = 0; /* default */ } /* atomically swap rates */ temp = channel->last_rate; channel->last_rate = rate; rate = temp; /* atomically swap burst size */ temp = channel->last_burst; channel->last_burst = burst; burst = temp; MLX5E_RL_WORKER_UNLOCK(rlw); /* put reference on the old rate, if any */ if (rate != 0) { mlx5_rl_remove_rate(rlw->priv->mdev, howmany(rate, 1000), burst); } /* set new rate, if SQ is running */ sq = channel->sq; if (sq != NULL && READ_ONCE(sq->running) != 0) { error = mlx5e_rl_modify_sq(sq, index); if (error != 0) atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL); } else error = 0; MLX5E_RL_WORKER_LOCK(rlw); return (-error); } static void mlx5e_rl_worker(void *arg) { struct thread *td; struct mlx5e_rl_worker *rlw = arg; struct mlx5e_rl_channel *channel; struct mlx5e_priv *priv; unsigned ix; uint64_t x; int error; /* set thread priority */ td = curthread; thread_lock(td); sched_prio(td, PI_SWI(SWI_NET)); thread_unlock(td); priv = rlw->priv; /* compute completion vector */ ix = (rlw - priv->rl.workers) % priv->mdev->priv.eq_table.num_comp_vectors; /* TODO bind to CPU */ /* open all the SQs */ MLX5E_RL_WORKER_LOCK(rlw); for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) { struct mlx5e_rl_channel *channel = rlw->channels + x; #if !defined(HAVE_RL_PRE_ALLOCATE_CHANNELS) if (channel->state == MLX5E_RL_ST_FREE) continue; #endif MLX5E_RL_WORKER_UNLOCK(rlw); MLX5E_RL_RLOCK(&priv->rl); error = mlx5e_rl_open_channel(rlw, ix, &priv->rl.chan_param, &channel->sq); MLX5E_RL_RUNLOCK(&priv->rl); MLX5E_RL_WORKER_LOCK(rlw); if (error != 0) { mlx5_en_err(priv->ifp, "mlx5e_rl_open_channel failed: %d\n", error); break; } mlx5e_rlw_channel_set_rate_locked(rlw, channel, channel->init_rate); } while (1) { if (STAILQ_FIRST(&rlw->process_head) == NULL) { /* check if we are tearing down */ if (rlw->worker_done != 0) break; cv_wait(&rlw->cv, &rlw->mtx); } /* check if we are tearing down */ if (rlw->worker_done != 0) break; channel = STAILQ_FIRST(&rlw->process_head); if (channel != NULL) { STAILQ_REMOVE_HEAD(&rlw->process_head, entry); switch (channel->state) { case MLX5E_RL_ST_MODIFY: channel->state = MLX5E_RL_ST_USED; MLX5E_RL_WORKER_UNLOCK(rlw); /* create channel by demand */ if (channel->sq == NULL) { MLX5E_RL_RLOCK(&priv->rl); error = mlx5e_rl_open_channel(rlw, ix, &priv->rl.chan_param, &channel->sq); MLX5E_RL_RUNLOCK(&priv->rl); if (error != 0) { mlx5_en_err(priv->ifp, "mlx5e_rl_open_channel failed: %d\n", error); } else { atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, 1ULL); } } else { mlx5e_resume_sq(channel->sq); } MLX5E_RL_WORKER_LOCK(rlw); /* convert from bytes/s to bits/s and set new rate */ error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, channel->new_rate * 8ULL); if (error != 0) { mlx5_en_err(priv->ifp, "mlx5e_rlw_channel_set_rate_locked failed: %d\n", error); } break; case MLX5E_RL_ST_DESTROY: error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0); if (error != 0) { mlx5_en_err(priv->ifp, "mlx5e_rlw_channel_set_rate_locked failed: %d\n", error); } if (channel->sq != NULL) { /* * Make sure all packets are * transmitted before SQ is * returned to free list: */ MLX5E_RL_WORKER_UNLOCK(rlw); mlx5e_drain_sq(channel->sq); MLX5E_RL_WORKER_LOCK(rlw); } /* put the channel back into the free list */ STAILQ_INSERT_HEAD(&rlw->index_list_head, channel, entry); channel->state = MLX5E_RL_ST_FREE; atomic_add_64(&priv->rl.stats.tx_active_connections, -1ULL); break; default: /* NOP */ break; } } } /* close all the SQs */ for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) { struct mlx5e_rl_channel *channel = rlw->channels + x; /* update the initial rate */ channel->init_rate = channel->last_rate; /* make sure we free up the rate resource */ mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0); if (channel->sq != NULL) { MLX5E_RL_WORKER_UNLOCK(rlw); mlx5e_rl_close_channel(&channel->sq); atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, -1ULL); MLX5E_RL_WORKER_LOCK(rlw); } } rlw->worker_done = 0; cv_broadcast(&rlw->cv); MLX5E_RL_WORKER_UNLOCK(rlw); kthread_exit(); } static int mlx5e_rl_open_tis(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; u32 in[MLX5_ST_SZ_DW(create_tis_in)]; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); memset(in, 0, sizeof(in)); MLX5_SET(tisc, tisc, prio, 0); MLX5_SET(tisc, tisc, transport_domain, priv->tdn); return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->rl.tisn)); } static void mlx5e_rl_close_tis(struct mlx5e_priv *priv) { mlx5_core_destroy_tis(priv->mdev, priv->rl.tisn, 0); } static void mlx5e_rl_set_default_params(struct mlx5e_rl_params *param, struct mlx5_core_dev *mdev) { /* ratelimit workers */ param->tx_worker_threads_def = mdev->priv.eq_table.num_comp_vectors; param->tx_worker_threads_max = MLX5E_RL_MAX_WORKERS; /* range check */ if (param->tx_worker_threads_def == 0 || param->tx_worker_threads_def > param->tx_worker_threads_max) param->tx_worker_threads_def = param->tx_worker_threads_max; /* ratelimit channels */ param->tx_channels_per_worker_def = MLX5E_RL_MAX_SQS / param->tx_worker_threads_def; param->tx_channels_per_worker_max = MLX5E_RL_MAX_SQS; /* range check */ if (param->tx_channels_per_worker_def > MLX5E_RL_DEF_SQ_PER_WORKER) param->tx_channels_per_worker_def = MLX5E_RL_DEF_SQ_PER_WORKER; /* set default burst size */ param->tx_burst_size = 4; /* MTUs */ /* * Set maximum burst size * * The burst size is multiplied by the MTU and clamped to the * range 0 ... 65535 bytes inclusivly before fed into the * firmware. * * NOTE: If the burst size or MTU is changed only ratelimit * connections made after the change will use the new burst * size. */ param->tx_burst_size_max = 255; /* get firmware rate limits in 1000bit/s and convert them to bit/s */ param->tx_limit_min = mdev->priv.rl_table.min_rate * 1000ULL; param->tx_limit_max = mdev->priv.rl_table.max_rate * 1000ULL; /* ratelimit table size */ param->tx_rates_max = mdev->priv.rl_table.max_size; /* range check */ if (param->tx_rates_max > MLX5E_RL_MAX_TX_RATES) param->tx_rates_max = MLX5E_RL_MAX_TX_RATES; /* set default number of rates */ param->tx_rates_def = param->tx_rates_max; /* set maximum allowed rate deviation */ if (param->tx_limit_max != 0) { /* * Make sure the deviation multiplication doesn't * overflow unsigned 64-bit: */ param->tx_allowed_deviation_max = -1ULL / param->tx_limit_max; } /* set default rate deviation */ param->tx_allowed_deviation = 50; /* 5.0% */ /* channel parameters */ param->tx_queue_size = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); param->tx_coalesce_usecs = MLX5E_RL_TX_COAL_USEC_DEFAULT; param->tx_coalesce_pkts = MLX5E_RL_TX_COAL_PKTS_DEFAULT; param->tx_coalesce_mode = MLX5E_RL_TX_COAL_MODE_DEFAULT; param->tx_completion_fact = MLX5E_RL_TX_COMP_FACT_DEFAULT; } static const char *mlx5e_rl_params_desc[] = { MLX5E_RL_PARAMS(MLX5E_STATS_DESC) }; static const char *mlx5e_rl_table_params_desc[] = { MLX5E_RL_TABLE_PARAMS(MLX5E_STATS_DESC) }; static const char *mlx5e_rl_stats_desc[] = { MLX5E_RL_STATS(MLX5E_STATS_DESC) }; int mlx5e_rl_init(struct mlx5e_priv *priv) { struct mlx5e_rl_priv_data *rl = &priv->rl; struct sysctl_oid *node; struct sysctl_oid *stats; char buf[64]; uint64_t i; uint64_t j; int error; /* check if there is support for packet pacing */ if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing)) return (0); rl->priv = priv; sysctl_ctx_init(&rl->ctx); sx_init(&rl->rl_sxlock, "ratelimit-sxlock"); /* open own TIS domain for ratelimit SQs */ error = mlx5e_rl_open_tis(priv); if (error) goto done; /* setup default value for parameters */ mlx5e_rl_set_default_params(&rl->param, priv->mdev); /* update the completion factor */ mlx5e_rl_sync_tx_completion_fact(rl); /* create root node */ node = SYSCTL_ADD_NODE(&rl->ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "rate_limit", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Rate limiting support"); if (node != NULL) { /* create SYSCTLs */ for (i = 0; i != MLX5E_RL_PARAMS_NUM; i++) { mlx5e_rl_sysctl_add_u64_oid(rl, MLX5E_RL_PARAMS_INDEX(arg[i]), node, mlx5e_rl_params_desc[2 * i], mlx5e_rl_params_desc[2 * i + 1]); } stats = SYSCTL_ADD_NODE(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Rate limiting statistics"); if (stats != NULL) { /* create SYSCTLs */ for (i = 0; i != MLX5E_RL_STATS_NUM; i++) { mlx5e_rl_sysctl_add_stats_u64_oid(rl, i, stats, mlx5e_rl_stats_desc[2 * i], mlx5e_rl_stats_desc[2 * i + 1]); } } } /* allocate workers array */ rl->workers = malloc(sizeof(rl->workers[0]) * rl->param.tx_worker_threads_def, M_MLX5EN, M_WAITOK | M_ZERO); /* allocate rate limit array */ rl->rate_limit_table = malloc(sizeof(rl->rate_limit_table[0]) * rl->param.tx_rates_def, M_MLX5EN, M_WAITOK | M_ZERO); if (node != NULL) { /* create more SYSCTls */ SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, "tx_rate_show", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, rl, 0, &mlx5e_rl_sysctl_show_rate_table, "A", "Show table of all configured TX rates"); /* try to fetch rate table from kernel environment */ for (i = 0; i != rl->param.tx_rates_def; i++) { /* compute path for tunable */ snprintf(buf, sizeof(buf), "dev.mce.%d.rate_limit.tx_rate_add_%d", device_get_unit(priv->mdev->pdev->dev.bsddev), (int)i); if (TUNABLE_QUAD_FETCH(buf, &j)) mlx5e_rl_tx_limit_add(rl, j); } /* setup rate table sysctls */ for (i = 0; i != MLX5E_RL_TABLE_PARAMS_NUM; i++) { mlx5e_rl_sysctl_add_u64_oid(rl, MLX5E_RL_PARAMS_INDEX(table_arg[i]), node, mlx5e_rl_table_params_desc[2 * i], mlx5e_rl_table_params_desc[2 * i + 1]); } } for (j = 0; j < rl->param.tx_worker_threads_def; j++) { struct mlx5e_rl_worker *rlw = rl->workers + j; rlw->priv = priv; cv_init(&rlw->cv, "mlx5-worker-cv"); mtx_init(&rlw->mtx, "mlx5-worker-mtx", NULL, MTX_DEF); STAILQ_INIT(&rlw->index_list_head); STAILQ_INIT(&rlw->process_head); rlw->channels = malloc(sizeof(rlw->channels[0]) * rl->param.tx_channels_per_worker_def, M_MLX5EN, M_WAITOK | M_ZERO); MLX5E_RL_WORKER_LOCK(rlw); for (i = 0; i < rl->param.tx_channels_per_worker_def; i++) { struct mlx5e_rl_channel *channel = rlw->channels + i; channel->worker = rlw; STAILQ_INSERT_TAIL(&rlw->index_list_head, channel, entry); } MLX5E_RL_WORKER_UNLOCK(rlw); } PRIV_LOCK(priv); error = mlx5e_rl_open_workers(priv); PRIV_UNLOCK(priv); if (error != 0) { mlx5_en_err(priv->ifp, "mlx5e_rl_open_workers failed: %d\n", error); } return (0); done: sysctl_ctx_free(&rl->ctx); sx_destroy(&rl->rl_sxlock); return (error); } static int mlx5e_rl_open_workers(struct mlx5e_priv *priv) { struct mlx5e_rl_priv_data *rl = &priv->rl; struct thread *rl_thread = NULL; struct proc *rl_proc = NULL; uint64_t j; int error; if (priv->gone || rl->opened) return (-EINVAL); MLX5E_RL_WLOCK(rl); /* compute channel parameters once */ mlx5e_rl_build_channel_param(rl, &rl->chan_param); MLX5E_RL_WUNLOCK(rl); for (j = 0; j < rl->param.tx_worker_threads_def; j++) { struct mlx5e_rl_worker *rlw = rl->workers + j; /* start worker thread */ error = kproc_kthread_add(mlx5e_rl_worker, rlw, &rl_proc, &rl_thread, RFHIGHPID, 0, "mlx5-ratelimit", "mlx5-rl-worker-thread-%d", (int)j); if (error != 0) { mlx5_en_err(rl->priv->ifp, "kproc_kthread_add failed: %d\n", error); rlw->worker_done = 1; } } rl->opened = 1; return (0); } static void mlx5e_rl_close_workers(struct mlx5e_priv *priv) { struct mlx5e_rl_priv_data *rl = &priv->rl; uint64_t y; if (rl->opened == 0) return; /* tear down worker threads simultaneously */ for (y = 0; y < rl->param.tx_worker_threads_def; y++) { struct mlx5e_rl_worker *rlw = rl->workers + y; /* tear down worker before freeing SQs */ MLX5E_RL_WORKER_LOCK(rlw); if (rlw->worker_done == 0) { rlw->worker_done = 1; cv_broadcast(&rlw->cv); } else { /* XXX thread not started */ rlw->worker_done = 0; } MLX5E_RL_WORKER_UNLOCK(rlw); } /* wait for worker threads to exit */ for (y = 0; y < rl->param.tx_worker_threads_def; y++) { struct mlx5e_rl_worker *rlw = rl->workers + y; /* tear down worker before freeing SQs */ MLX5E_RL_WORKER_LOCK(rlw); while (rlw->worker_done != 0) cv_wait(&rlw->cv, &rlw->mtx); MLX5E_RL_WORKER_UNLOCK(rlw); } rl->opened = 0; } static void mlx5e_rl_reset_rates(struct mlx5e_rl_priv_data *rl) { unsigned x; MLX5E_RL_WLOCK(rl); for (x = 0; x != rl->param.tx_rates_def; x++) rl->rate_limit_table[x] = 0; MLX5E_RL_WUNLOCK(rl); } void mlx5e_rl_cleanup(struct mlx5e_priv *priv) { struct mlx5e_rl_priv_data *rl = &priv->rl; uint64_t y; /* check if there is support for packet pacing */ if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing)) return; /* TODO check if there is support for packet pacing */ sysctl_ctx_free(&rl->ctx); PRIV_LOCK(priv); mlx5e_rl_close_workers(priv); PRIV_UNLOCK(priv); mlx5e_rl_reset_rates(rl); /* close TIS domain */ mlx5e_rl_close_tis(priv); for (y = 0; y < rl->param.tx_worker_threads_def; y++) { struct mlx5e_rl_worker *rlw = rl->workers + y; cv_destroy(&rlw->cv); mtx_destroy(&rlw->mtx); free(rlw->channels, M_MLX5EN); } free(rl->rate_limit_table, M_MLX5EN); free(rl->workers, M_MLX5EN); sx_destroy(&rl->rl_sxlock); } static void mlx5e_rlw_queue_channel_locked(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel) { STAILQ_INSERT_TAIL(&rlw->process_head, channel, entry); cv_broadcast(&rlw->cv); } static void mlx5e_rl_free(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel) { if (channel == NULL) return; MLX5E_RL_WORKER_LOCK(rlw); switch (channel->state) { case MLX5E_RL_ST_MODIFY: channel->state = MLX5E_RL_ST_DESTROY; break; case MLX5E_RL_ST_USED: channel->state = MLX5E_RL_ST_DESTROY; mlx5e_rlw_queue_channel_locked(rlw, channel); break; default: break; } MLX5E_RL_WORKER_UNLOCK(rlw); } static int mlx5e_rl_modify(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, uint64_t rate) { MLX5E_RL_WORKER_LOCK(rlw); channel->new_rate = rate; switch (channel->state) { case MLX5E_RL_ST_USED: channel->state = MLX5E_RL_ST_MODIFY; mlx5e_rlw_queue_channel_locked(rlw, channel); break; default: break; } MLX5E_RL_WORKER_UNLOCK(rlw); return (0); } static int mlx5e_rl_query(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, union if_snd_tag_query_params *params) { int retval; MLX5E_RL_WORKER_LOCK(rlw); switch (channel->state) { case MLX5E_RL_ST_USED: params->rate_limit.max_rate = channel->last_rate; params->rate_limit.queue_level = mlx5e_sq_queue_level(channel->sq); retval = 0; break; case MLX5E_RL_ST_MODIFY: params->rate_limit.max_rate = channel->last_rate; params->rate_limit.queue_level = mlx5e_sq_queue_level(channel->sq); retval = EBUSY; break; default: retval = EINVAL; break; } MLX5E_RL_WORKER_UNLOCK(rlw); return (retval); } static int mlx5e_find_available_tx_ring_index(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel **pchannel) { struct mlx5e_rl_channel *channel; int retval = ENOMEM; MLX5E_RL_WORKER_LOCK(rlw); /* Check for available channel in free list */ if ((channel = STAILQ_FIRST(&rlw->index_list_head)) != NULL) { retval = 0; /* Remove head index from available list */ STAILQ_REMOVE_HEAD(&rlw->index_list_head, entry); channel->state = MLX5E_RL_ST_USED; atomic_add_64(&rlw->priv->rl.stats.tx_active_connections, 1ULL); } else { atomic_add_64(&rlw->priv->rl.stats.tx_available_resource_failure, 1ULL); } MLX5E_RL_WORKER_UNLOCK(rlw); *pchannel = channel; #ifdef RATELIMIT_DEBUG mlx5_en_info(rlw->priv->ifp, "Channel pointer for rate limit connection is %p\n", channel); #endif return (retval); } int mlx5e_rl_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **ppmt) { struct mlx5e_rl_channel *channel; struct mlx5e_rl_worker *rlw; struct mlx5e_priv *priv; int error; priv = ifp->if_softc; /* check if there is support for packet pacing or if device is going away */ if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing) || priv->gone || params->rate_limit.hdr.type != IF_SND_TAG_TYPE_RATE_LIMIT) return (EOPNOTSUPP); /* compute worker thread this TCP connection belongs to */ rlw = priv->rl.workers + ((params->rate_limit.hdr.flowid % 128) % priv->rl.param.tx_worker_threads_def); error = mlx5e_find_available_tx_ring_index(rlw, &channel); if (error != 0) goto done; error = mlx5e_rl_modify(rlw, channel, params->rate_limit.max_rate); if (error != 0) { mlx5e_rl_free(rlw, channel); goto done; } /* store pointer to mbuf tag */ MPASS(channel->tag.refcount == 0); m_snd_tag_init(&channel->tag, ifp, &mlx5e_rl_snd_tag_sw); *ppmt = &channel->tag; done: return (error); } static int mlx5e_rl_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params) { struct mlx5e_rl_channel *channel = container_of(pmt, struct mlx5e_rl_channel, tag); return (mlx5e_rl_modify(channel->worker, channel, params->rate_limit.max_rate)); } static int mlx5e_rl_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params) { struct mlx5e_rl_channel *channel = container_of(pmt, struct mlx5e_rl_channel, tag); return (mlx5e_rl_query(channel->worker, channel, params)); } static void mlx5e_rl_snd_tag_free(struct m_snd_tag *pmt) { struct mlx5e_rl_channel *channel = container_of(pmt, struct mlx5e_rl_channel, tag); mlx5e_rl_free(channel->worker, channel); } static int mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS) { struct mlx5e_rl_priv_data *rl = arg1; struct mlx5e_priv *priv = rl->priv; struct sbuf sbuf; unsigned x; int error; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); PRIV_LOCK(priv); sbuf_new_for_sysctl(&sbuf, NULL, 128 * rl->param.tx_rates_def, req); sbuf_printf(&sbuf, "\n\n" "\t" "ENTRY" "\t" "BURST" "\t" "RATE [bit/s]\n" "\t" "--------------------------------------------\n"); MLX5E_RL_RLOCK(rl); for (x = 0; x != rl->param.tx_rates_def; x++) { if (rl->rate_limit_table[x] == 0) continue; sbuf_printf(&sbuf, "\t" "%3u" "\t" "%3u" "\t" "%lld\n", x, (unsigned)rl->param.tx_burst_size, (long long)rl->rate_limit_table[x]); } MLX5E_RL_RUNLOCK(rl); error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); PRIV_UNLOCK(priv); return (error); } static int mlx5e_rl_refresh_channel_params(struct mlx5e_rl_priv_data *rl) { uint64_t x; uint64_t y; MLX5E_RL_WLOCK(rl); /* compute channel parameters once */ mlx5e_rl_build_channel_param(rl, &rl->chan_param); MLX5E_RL_WUNLOCK(rl); for (y = 0; y != rl->param.tx_worker_threads_def; y++) { struct mlx5e_rl_worker *rlw = rl->workers + y; for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) { struct mlx5e_rl_channel *channel; struct mlx5e_sq *sq; channel = rlw->channels + x; sq = channel->sq; if (sq == NULL) continue; if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_mode_modify)) { mlx5_core_modify_cq_moderation_mode(rl->priv->mdev, &sq->cq.mcq, rl->param.tx_coalesce_usecs, rl->param.tx_coalesce_pkts, rl->param.tx_coalesce_mode); } else { mlx5_core_modify_cq_moderation(rl->priv->mdev, &sq->cq.mcq, rl->param.tx_coalesce_usecs, rl->param.tx_coalesce_pkts); } } } return (0); } void mlx5e_rl_refresh_sq_inline(struct mlx5e_rl_priv_data *rl) { uint64_t x; uint64_t y; for (y = 0; y != rl->param.tx_worker_threads_def; y++) { struct mlx5e_rl_worker *rlw = rl->workers + y; for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) { struct mlx5e_rl_channel *channel; struct mlx5e_sq *sq; channel = rlw->channels + x; sq = channel->sq; if (sq == NULL) continue; mtx_lock(&sq->lock); mlx5e_update_sq_inline(sq); mtx_unlock(&sq->lock); } } } static int mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *rl, uint64_t value) { unsigned x; int error; if (value < 1000 || mlx5_rl_is_in_range(rl->priv->mdev, howmany(value, 1000), 0) == 0) return (EINVAL); MLX5E_RL_WLOCK(rl); error = ENOMEM; /* check if rate already exists */ for (x = 0; x != rl->param.tx_rates_def; x++) { if (rl->rate_limit_table[x] != value) continue; error = EEXIST; break; } /* check if there is a free rate entry */ if (x == rl->param.tx_rates_def) { for (x = 0; x != rl->param.tx_rates_def; x++) { if (rl->rate_limit_table[x] != 0) continue; rl->rate_limit_table[x] = value; error = 0; break; } } MLX5E_RL_WUNLOCK(rl); return (error); } static int mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *rl, uint64_t value) { unsigned x; int error; if (value == 0) return (EINVAL); MLX5E_RL_WLOCK(rl); /* check if rate already exists */ for (x = 0; x != rl->param.tx_rates_def; x++) { if (rl->rate_limit_table[x] != value) continue; /* free up rate */ rl->rate_limit_table[x] = 0; break; } /* check if there is a free rate entry */ if (x == rl->param.tx_rates_def) error = ENOENT; else error = 0; MLX5E_RL_WUNLOCK(rl); return (error); } static int mlx5e_rl_sysctl_handler(SYSCTL_HANDLER_ARGS) { struct mlx5e_rl_priv_data *rl = arg1; struct mlx5e_priv *priv = rl->priv; unsigned mode_modify; unsigned was_opened; uint64_t value; uint64_t old; int error; PRIV_LOCK(priv); MLX5E_RL_RLOCK(rl); value = rl->param.arg[arg2]; MLX5E_RL_RUNLOCK(rl); if (req != NULL) { old = value; error = sysctl_handle_64(oidp, &value, 0, req); if (error || req->newptr == NULL || value == rl->param.arg[arg2]) goto done; } else { old = 0; error = 0; } /* check if device is gone */ if (priv->gone) { error = ENXIO; goto done; } was_opened = rl->opened; mode_modify = MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify); switch (MLX5E_RL_PARAMS_INDEX(arg[arg2])) { case MLX5E_RL_PARAMS_INDEX(tx_worker_threads_def): if (value > rl->param.tx_worker_threads_max) value = rl->param.tx_worker_threads_max; else if (value < 1) value = 1; /* store new value */ rl->param.arg[arg2] = value; break; case MLX5E_RL_PARAMS_INDEX(tx_channels_per_worker_def): if (value > rl->param.tx_channels_per_worker_max) value = rl->param.tx_channels_per_worker_max; else if (value < 1) value = 1; /* store new value */ rl->param.arg[arg2] = value; break; case MLX5E_RL_PARAMS_INDEX(tx_rates_def): if (value > rl->param.tx_rates_max) value = rl->param.tx_rates_max; else if (value < 1) value = 1; /* store new value */ rl->param.arg[arg2] = value; break; case MLX5E_RL_PARAMS_INDEX(tx_coalesce_usecs): /* range check */ if (value < 1) value = 0; else if (value > MLX5E_FLD_MAX(cqc, cq_period)) value = MLX5E_FLD_MAX(cqc, cq_period); /* store new value */ rl->param.arg[arg2] = value; /* check to avoid down and up the network interface */ if (was_opened) error = mlx5e_rl_refresh_channel_params(rl); break; case MLX5E_RL_PARAMS_INDEX(tx_coalesce_pkts): /* import TX coal pkts */ if (value < 1) value = 0; else if (value > MLX5E_FLD_MAX(cqc, cq_max_count)) value = MLX5E_FLD_MAX(cqc, cq_max_count); /* store new value */ rl->param.arg[arg2] = value; /* check to avoid down and up the network interface */ if (was_opened) error = mlx5e_rl_refresh_channel_params(rl); break; case MLX5E_RL_PARAMS_INDEX(tx_coalesce_mode): /* network interface must be down */ if (was_opened != 0 && mode_modify == 0) mlx5e_rl_close_workers(priv); /* import TX coalesce mode */ if (value != 0) value = 1; /* store new value */ rl->param.arg[arg2] = value; /* restart network interface, if any */ if (was_opened != 0) { if (mode_modify == 0) mlx5e_rl_open_workers(priv); else error = mlx5e_rl_refresh_channel_params(rl); } break; case MLX5E_RL_PARAMS_INDEX(tx_queue_size): /* network interface must be down */ if (was_opened) mlx5e_rl_close_workers(priv); /* import TX queue size */ if (value < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) value = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); else if (value > priv->params_ethtool.tx_queue_size_max) value = priv->params_ethtool.tx_queue_size_max; /* store actual TX queue size */ value = 1ULL << order_base_2(value); /* store new value */ rl->param.arg[arg2] = value; /* verify TX completion factor */ mlx5e_rl_sync_tx_completion_fact(rl); /* restart network interface, if any */ if (was_opened) mlx5e_rl_open_workers(priv); break; case MLX5E_RL_PARAMS_INDEX(tx_completion_fact): /* network interface must be down */ if (was_opened) mlx5e_rl_close_workers(priv); /* store new value */ rl->param.arg[arg2] = value; /* verify parameter */ mlx5e_rl_sync_tx_completion_fact(rl); /* restart network interface, if any */ if (was_opened) mlx5e_rl_open_workers(priv); break; case MLX5E_RL_PARAMS_INDEX(tx_limit_add): error = mlx5e_rl_tx_limit_add(rl, value); break; case MLX5E_RL_PARAMS_INDEX(tx_limit_clr): error = mlx5e_rl_tx_limit_clr(rl, value); break; case MLX5E_RL_PARAMS_INDEX(tx_allowed_deviation): /* range check */ if (value > rl->param.tx_allowed_deviation_max) value = rl->param.tx_allowed_deviation_max; else if (value < rl->param.tx_allowed_deviation_min) value = rl->param.tx_allowed_deviation_min; MLX5E_RL_WLOCK(rl); rl->param.arg[arg2] = value; MLX5E_RL_WUNLOCK(rl); break; case MLX5E_RL_PARAMS_INDEX(tx_burst_size): /* range check */ if (value > rl->param.tx_burst_size_max) value = rl->param.tx_burst_size_max; else if (value < rl->param.tx_burst_size_min) value = rl->param.tx_burst_size_min; MLX5E_RL_WLOCK(rl); rl->param.arg[arg2] = value; MLX5E_RL_WUNLOCK(rl); break; default: break; } done: PRIV_UNLOCK(priv); return (error); } static void mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, struct sysctl_oid *node, const char *name, const char *desc) { /* * NOTE: In FreeBSD-11 and newer the CTLFLAG_RWTUN flag will * take care of loading default sysctl value from the kernel * environment, if any: */ if (strstr(name, "_max") != 0 || strstr(name, "_min") != 0) { /* read-only SYSCTLs */ SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, name, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); } else { if (strstr(name, "_def") != 0) { #ifdef RATELIMIT_DEBUG /* tunable read-only advanced SYSCTLs */ SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, name, CTLTYPE_U64 | CTLFLAG_RDTUN | CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); #endif } else { /* read-write SYSCTLs */ SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, name, CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); } } } static void mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, struct sysctl_oid *node, const char *name, const char *desc) { /* read-only SYSCTLs */ SYSCTL_ADD_U64(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, name, CTLFLAG_RD, &rl->stats.arg[x], 0, desc); } #else int mlx5e_rl_init(struct mlx5e_priv *priv) { return (0); } void mlx5e_rl_cleanup(struct mlx5e_priv *priv) { /* NOP */ } #endif /* RATELIMIT */ diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c index d4a175bf0988..e943a65709cf 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c @@ -1,610 +1,610 @@ /*- * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_rss.h" #include "opt_ratelimit.h" -#include "en.h" +#include #include static inline int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { bus_dma_segment_t segs[MLX5E_MAX_BUSDMA_RX_SEGS]; struct mbuf *mb; int nsegs; int err; struct mbuf *mb_head; int i; if (rq->mbuf[ix].mbuf != NULL) return (0); mb_head = mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MLX5E_MAX_RX_BYTES); if (unlikely(mb == NULL)) return (-ENOMEM); mb->m_len = MLX5E_MAX_RX_BYTES; mb->m_pkthdr.len = MLX5E_MAX_RX_BYTES; for (i = 1; i < rq->nsegs; i++) { if (mb_head->m_pkthdr.len >= rq->wqe_sz) break; mb = mb->m_next = m_getjcl(M_NOWAIT, MT_DATA, 0, MLX5E_MAX_RX_BYTES); if (unlikely(mb == NULL)) { m_freem(mb_head); return (-ENOMEM); } mb->m_len = MLX5E_MAX_RX_BYTES; mb_head->m_pkthdr.len += MLX5E_MAX_RX_BYTES; } /* rewind to first mbuf in chain */ mb = mb_head; /* get IP header aligned */ m_adj(mb, MLX5E_NET_IP_ALIGN); err = -bus_dmamap_load_mbuf_sg(rq->dma_tag, rq->mbuf[ix].dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); if (err != 0) goto err_free_mbuf; if (unlikely(nsegs == 0)) { bus_dmamap_unload(rq->dma_tag, rq->mbuf[ix].dma_map); err = -ENOMEM; goto err_free_mbuf; } wqe->data[0].addr = cpu_to_be64(segs[0].ds_addr); wqe->data[0].byte_count = cpu_to_be32(segs[0].ds_len | MLX5_HW_START_PADDING); for (i = 1; i != nsegs; i++) { wqe->data[i].addr = cpu_to_be64(segs[i].ds_addr); wqe->data[i].byte_count = cpu_to_be32(segs[i].ds_len); } for (; i < rq->nsegs; i++) { wqe->data[i].addr = 0; wqe->data[i].byte_count = 0; } rq->mbuf[ix].mbuf = mb; rq->mbuf[ix].data = mb->m_data; bus_dmamap_sync(rq->dma_tag, rq->mbuf[ix].dma_map, BUS_DMASYNC_PREREAD); return (0); err_free_mbuf: m_freem(mb); return (err); } static void mlx5e_post_rx_wqes(struct mlx5e_rq *rq) { if (unlikely(rq->enabled == 0)) return; while (!mlx5_wq_ll_is_full(&rq->wq)) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, rq->wq.head); if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, rq->wq.head))) { callout_reset_curcpu(&rq->watchdog, 1, (void *)&mlx5e_post_rx_wqes, rq); break; } mlx5_wq_ll_push(&rq->wq, be16_to_cpu(wqe->next.next_wqe_index)); } /* ensure wqes are visible to device before updating doorbell record */ atomic_thread_fence_rel(); mlx5_wq_ll_update_db_record(&rq->wq); } static void mlx5e_lro_update_hdr(struct mbuf *mb, struct mlx5_cqe64 *cqe) { /* TODO: consider vlans, ip options, ... */ struct ether_header *eh; uint16_t eh_type; uint16_t tot_len; struct ip6_hdr *ip6 = NULL; struct ip *ip4 = NULL; struct tcphdr *th; uint32_t *ts_ptr; uint8_t l4_hdr_type; int tcp_ack; eh = mtod(mb, struct ether_header *); eh_type = ntohs(eh->ether_type); l4_hdr_type = get_cqe_l4_hdr_type(cqe); tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) || (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type)); /* TODO: consider vlan */ tot_len = be32_to_cpu(cqe->byte_cnt) - ETHER_HDR_LEN; switch (eh_type) { case ETHERTYPE_IP: ip4 = (struct ip *)(eh + 1); th = (struct tcphdr *)(ip4 + 1); break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(eh + 1); th = (struct tcphdr *)(ip6 + 1); break; default: return; } ts_ptr = (uint32_t *)(th + 1); if (get_cqe_lro_tcppsh(cqe)) th->th_flags |= TH_PUSH; if (tcp_ack) { th->th_flags |= TH_ACK; th->th_ack = cqe->lro_ack_seq_num; th->th_win = cqe->lro_tcp_win; /* * FreeBSD handles only 32bit aligned timestamp right after * the TCP hdr * +--------+--------+--------+--------+ * | NOP | NOP | TSopt | 10 | * +--------+--------+--------+--------+ * | TSval timestamp | * +--------+--------+--------+--------+ * | TSecr timestamp | * +--------+--------+--------+--------+ */ if (get_cqe_lro_timestamp_valid(cqe) && (__predict_true(*ts_ptr) == ntohl(TCPOPT_NOP << 24 | TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 | TCPOLEN_TIMESTAMP))) { /* * cqe->timestamp is 64bit long. * [0-31] - timestamp. * [32-64] - timestamp echo replay. */ ts_ptr[1] = *(uint32_t *)&cqe->timestamp; ts_ptr[2] = *((uint32_t *)&cqe->timestamp + 1); } } if (ip4) { ip4->ip_ttl = cqe->lro_min_ttl; ip4->ip_len = cpu_to_be16(tot_len); ip4->ip_sum = 0; ip4->ip_sum = in_cksum(mb, ip4->ip_hl << 2); } else { ip6->ip6_hlim = cqe->lro_min_ttl; ip6->ip6_plen = cpu_to_be16(tot_len - sizeof(struct ip6_hdr)); } /* TODO: handle tcp checksum */ } static uint64_t mlx5e_mbuf_tstmp(struct mlx5e_priv *priv, uint64_t hw_tstmp) { struct mlx5e_clbr_point *cp, dcp; uint64_t a1, a2, res; u_int gen; do { cp = &priv->clbr_points[priv->clbr_curr]; gen = atomic_load_acq_int(&cp->clbr_gen); if (gen == 0) return (0); dcp = *cp; atomic_thread_fence_acq(); } while (gen != cp->clbr_gen); a1 = (hw_tstmp - dcp.clbr_hw_prev) >> MLX5E_TSTMP_PREC; a2 = (dcp.base_curr - dcp.base_prev) >> MLX5E_TSTMP_PREC; res = (a1 * a2) << MLX5E_TSTMP_PREC; /* * Divisor cannot be zero because calibration callback * checks for the condition and disables timestamping * if clock halted. */ res /= (dcp.clbr_hw_curr - dcp.clbr_hw_prev) >> MLX5E_TSTMP_PREC; res += dcp.base_prev; return (res); } static inline void mlx5e_build_rx_mbuf(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, struct mbuf *mb, u32 cqe_bcnt) { struct ifnet *ifp = rq->ifp; struct mlx5e_channel *c; struct mbuf *mb_head; int lro_num_seg; /* HW LRO session aggregated packets counter */ uint64_t tstmp; lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; if (lro_num_seg > 1) { mlx5e_lro_update_hdr(mb, cqe); rq->stats.lro_packets++; rq->stats.lro_bytes += cqe_bcnt; } mb->m_pkthdr.len = cqe_bcnt; for (mb_head = mb; mb != NULL; mb = mb->m_next) { if (mb->m_len > cqe_bcnt) mb->m_len = cqe_bcnt; cqe_bcnt -= mb->m_len; if (likely(cqe_bcnt == 0)) { if (likely(mb->m_next != NULL)) { /* trim off empty mbufs */ m_freem(mb->m_next); mb->m_next = NULL; } break; } } /* rewind to first mbuf in chain */ mb = mb_head; /* check if a Toeplitz hash was computed */ if (cqe->rss_hash_type != 0) { mb->m_pkthdr.flowid = be32_to_cpu(cqe->rss_hash_result); #ifdef RSS /* decode the RSS hash type */ switch (cqe->rss_hash_type & (CQE_RSS_DST_HTYPE_L4 | CQE_RSS_DST_HTYPE_IP)) { /* IPv4 */ case (CQE_RSS_DST_HTYPE_TCP | CQE_RSS_DST_HTYPE_IPV4): M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_TCP_IPV4); break; case (CQE_RSS_DST_HTYPE_UDP | CQE_RSS_DST_HTYPE_IPV4): M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_UDP_IPV4); break; case CQE_RSS_DST_HTYPE_IPV4: M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_IPV4); break; /* IPv6 */ case (CQE_RSS_DST_HTYPE_TCP | CQE_RSS_DST_HTYPE_IPV6): M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_TCP_IPV6); break; case (CQE_RSS_DST_HTYPE_UDP | CQE_RSS_DST_HTYPE_IPV6): M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_UDP_IPV6); break; case CQE_RSS_DST_HTYPE_IPV6: M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_IPV6); break; default: /* Other */ M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE_HASH); break; } #else M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE_HASH); #endif #ifdef M_HASHTYPE_SETINNER if (cqe_is_tunneled(cqe)) M_HASHTYPE_SETINNER(mb); #endif } else { mb->m_pkthdr.flowid = rq->ix; M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE); } mb->m_pkthdr.rcvif = ifp; if (cqe_is_tunneled(cqe)) { /* * CQE can be tunneled only if TIR is configured to * enable parsing of tunneled payload, so no need to * check for capabilities. */ if (((cqe->hds_ip_ext & (CQE_L2_OK | CQE_L3_OK)) == (CQE_L2_OK | CQE_L3_OK))) { mb->m_pkthdr.csum_flags |= CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID | CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR; mb->m_pkthdr.csum_data = htons(0xffff); } if (((cqe->hds_ip_ext & (CQE_L2_OK | CQE_L3_OK | CQE_L4_OK)) == (CQE_L2_OK | CQE_L3_OK | CQE_L4_OK))) { mb->m_pkthdr.csum_flags |= CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID; } } else if (likely((ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) != 0) && ((cqe->hds_ip_ext & (CQE_L2_OK | CQE_L3_OK | CQE_L4_OK)) == (CQE_L2_OK | CQE_L3_OK | CQE_L4_OK))) { mb->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR; mb->m_pkthdr.csum_data = htons(0xffff); } else { rq->stats.csum_none++; } if (cqe_has_vlan(cqe)) { mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->vlan_info); mb->m_flags |= M_VLANTAG; } c = container_of(rq, struct mlx5e_channel, rq); if (c->priv->clbr_done >= 2) { tstmp = mlx5e_mbuf_tstmp(c->priv, be64_to_cpu(cqe->timestamp)); if ((tstmp & MLX5_CQE_TSTMP_PTP) != 0) { /* * Timestamp was taken on the packet entrance, * instead of the cqe generation. */ tstmp &= ~MLX5_CQE_TSTMP_PTP; mb->m_flags |= M_TSTMP_HPREC; } mb->m_pkthdr.rcv_tstmp = tstmp; mb->m_flags |= M_TSTMP; } } static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cc, void *data) { memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, (cc & cq->wq.sz_m1)), sizeof(struct mlx5_cqe64)); } static inline void mlx5e_write_cqe_slot(struct mlx5e_cq *cq, u32 cc, void *data) { memcpy(mlx5_cqwq_get_wqe(&cq->wq, cc & cq->wq.sz_m1), data, sizeof(struct mlx5_cqe64)); } static inline void mlx5e_decompress_cqe(struct mlx5e_cq *cq, struct mlx5_cqe64 *title, struct mlx5_mini_cqe8 *mini, u16 wqe_counter, int i) { /* * NOTE: The fields which are not set here are copied from the * initial and common title. See memcpy() in * mlx5e_write_cqe_slot(). */ title->byte_cnt = mini->byte_cnt; title->wqe_counter = cpu_to_be16((wqe_counter + i) & cq->wq.sz_m1); title->rss_hash_result = mini->rx_hash_result; /* * Since we use MLX5_CQE_FORMAT_HASH when creating the RX CQ, * the value of the checksum should be ignored. */ title->check_sum = 0; title->op_own = (title->op_own & 0xf0) | (((cq->wq.cc + i) >> cq->wq.log_sz) & 1); } #define MLX5E_MINI_ARRAY_SZ 8 /* Make sure structs are not packet differently */ CTASSERT(sizeof(struct mlx5_cqe64) == sizeof(struct mlx5_mini_cqe8) * MLX5E_MINI_ARRAY_SZ); static void mlx5e_decompress_cqes(struct mlx5e_cq *cq) { struct mlx5_mini_cqe8 mini_array[MLX5E_MINI_ARRAY_SZ]; struct mlx5_cqe64 title; u32 cqe_count; u32 i = 0; u16 title_wqe_counter; mlx5e_read_cqe_slot(cq, cq->wq.cc, &title); title_wqe_counter = be16_to_cpu(title.wqe_counter); cqe_count = be32_to_cpu(title.byte_cnt); /* Make sure we won't overflow */ KASSERT(cqe_count <= cq->wq.sz_m1, ("%s: cqe_count %u > cq->wq.sz_m1 %u", __func__, cqe_count, cq->wq.sz_m1)); mlx5e_read_cqe_slot(cq, cq->wq.cc + 1, mini_array); while (true) { mlx5e_decompress_cqe(cq, &title, &mini_array[i % MLX5E_MINI_ARRAY_SZ], title_wqe_counter, i); mlx5e_write_cqe_slot(cq, cq->wq.cc + i, &title); i++; if (i == cqe_count) break; if (i % MLX5E_MINI_ARRAY_SZ == 0) mlx5e_read_cqe_slot(cq, cq->wq.cc + i, mini_array); } } static int mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget) { struct pfil_head *pfil; int i, rv; CURVNET_SET_QUIET(rq->ifp->if_vnet); pfil = rq->channel->priv->pfil; for (i = 0; i < budget; i++) { struct mlx5e_rx_wqe *wqe; struct mlx5_cqe64 *cqe; struct mbuf *mb; __be16 wqe_counter_be; u16 wqe_counter; u32 byte_cnt, seglen; cqe = mlx5e_get_cqe(&rq->cq); if (!cqe) break; if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) mlx5e_decompress_cqes(&rq->cq); mlx5_cqwq_pop(&rq->cq.wq); wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); byte_cnt = be32_to_cpu(cqe->byte_cnt); bus_dmamap_sync(rq->dma_tag, rq->mbuf[wqe_counter].dma_map, BUS_DMASYNC_POSTREAD); if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { rq->stats.wqe_err++; goto wq_ll_pop; } if (pfil != NULL && PFIL_HOOKED_IN(pfil)) { seglen = MIN(byte_cnt, MLX5E_MAX_RX_BYTES); rv = pfil_run_hooks(rq->channel->priv->pfil, rq->mbuf[wqe_counter].data, rq->ifp, seglen | PFIL_MEMPTR | PFIL_IN, NULL); switch (rv) { case PFIL_DROPPED: case PFIL_CONSUMED: /* * Filter dropped or consumed it. In * either case, we can just recycle * buffer; there is no more work to do. */ rq->stats.packets++; goto wq_ll_pop; case PFIL_REALLOCED: /* * Filter copied it; recycle buffer * and receive the new mbuf allocated * by the Filter */ mb = pfil_mem2mbuf(rq->mbuf[wqe_counter].data); goto rx_common; default: /* * The Filter said it was OK, so * receive like normal. */ KASSERT(rv == PFIL_PASS, ("Filter returned %d!\n", rv)); } } if ((MHLEN - MLX5E_NET_IP_ALIGN) >= byte_cnt && (mb = m_gethdr(M_NOWAIT, MT_DATA)) != NULL) { /* set maximum mbuf length */ mb->m_len = MHLEN - MLX5E_NET_IP_ALIGN; /* get IP header aligned */ mb->m_data += MLX5E_NET_IP_ALIGN; bcopy(rq->mbuf[wqe_counter].data, mtod(mb, caddr_t), byte_cnt); } else { mb = rq->mbuf[wqe_counter].mbuf; rq->mbuf[wqe_counter].mbuf = NULL; /* safety clear */ bus_dmamap_unload(rq->dma_tag, rq->mbuf[wqe_counter].dma_map); } rx_common: mlx5e_build_rx_mbuf(cqe, rq, mb, byte_cnt); rq->stats.bytes += byte_cnt; rq->stats.packets++; #ifdef NUMA mb->m_pkthdr.numa_domain = rq->ifp->if_numa_domain; #endif #if !defined(HAVE_TCP_LRO_RX) tcp_lro_queue_mbuf(&rq->lro, mb); #else if (mb->m_pkthdr.csum_flags == 0 || (rq->ifp->if_capenable & IFCAP_LRO) == 0 || rq->lro.lro_cnt == 0 || tcp_lro_rx(&rq->lro, mb, 0) != 0) { rq->ifp->if_input(rq->ifp, mb); } #endif wq_ll_pop: mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, &wqe->next.next_wqe_index); } CURVNET_RESTORE(); mlx5_cqwq_update_db_record(&rq->cq.wq); /* ensure cq space is freed before enabling more cqes */ atomic_thread_fence_rel(); return (i); } void mlx5e_rx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused) { struct mlx5e_rq *rq = container_of(mcq, struct mlx5e_rq, cq.mcq); int i = 0; #ifdef HAVE_PER_CQ_EVENT_PACKET #if (MHLEN < 15) #error "MHLEN is too small" #endif struct mbuf *mb = m_gethdr(M_NOWAIT, MT_DATA); if (mb != NULL) { /* this code is used for debugging purpose only */ mb->m_pkthdr.len = mb->m_len = 15; memset(mb->m_data, 255, 14); mb->m_data[14] = rq->ix; mb->m_pkthdr.rcvif = rq->ifp; rq->ifp->if_input(rq->ifp, mb); } #endif mtx_lock(&rq->mtx); /* * Polling the entire CQ without posting new WQEs results in * lack of receive WQEs during heavy traffic scenarios. */ while (1) { if (mlx5e_poll_rx_cq(rq, MLX5E_RX_BUDGET_MAX) != MLX5E_RX_BUDGET_MAX) break; i += MLX5E_RX_BUDGET_MAX; if (i >= MLX5E_BUDGET_MAX) break; mlx5e_post_rx_wqes(rq); } mlx5e_post_rx_wqes(rq); /* check for dynamic interrupt moderation callback */ if (rq->dim.mode != NET_DIM_CQ_PERIOD_MODE_DISABLED) net_dim(&rq->dim, rq->stats.packets, rq->stats.bytes); mlx5e_cq_arm(&rq->cq, MLX5_GET_DOORBELL_LOCK(&rq->channel->priv->doorbell_lock)); tcp_lro_flush_all(&rq->lro); mtx_unlock(&rq->mtx); } diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c index 69b7d904f36f..7bb12d7b5b8a 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c @@ -1,1186 +1,1186 @@ /*- * Copyright (c) 2015-2019 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_kern_tls.h" #include "opt_rss.h" #include "opt_ratelimit.h" -#include "en.h" +#include #include static inline bool mlx5e_do_send_cqe_inline(struct mlx5e_sq *sq) { sq->cev_counter++; /* interleave the CQEs */ if (sq->cev_counter >= sq->cev_factor) { sq->cev_counter = 0; return (true); } return (false); } bool mlx5e_do_send_cqe(struct mlx5e_sq *sq) { return (mlx5e_do_send_cqe_inline(sq)); } void mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt) { u16 pi = sq->pc & sq->wq.sz_m1; struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); if (mlx5e_do_send_cqe_inline(sq)) wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; else wqe->ctrl.fm_ce_se = 0; /* Copy data for doorbell */ memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); sq->mbuf[pi].mbuf = NULL; sq->mbuf[pi].num_bytes = 0; sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->pc += sq->mbuf[pi].num_wqebbs; } #if (__FreeBSD_version >= 1100000) static uint32_t mlx5e_hash_value; static void mlx5e_hash_init(void *arg) { mlx5e_hash_value = m_ether_tcpip_hash_init(); } /* Make kernel call mlx5e_hash_init after the random stack finished initializing */ SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL); #endif static struct mlx5e_sq * mlx5e_select_queue_by_send_tag(struct ifnet *ifp, struct mbuf *mb) { struct m_snd_tag *mb_tag; struct mlx5e_sq *sq; mb_tag = mb->m_pkthdr.snd_tag; #ifdef KERN_TLS top: #endif /* get pointer to sendqueue */ switch (mb_tag->sw->type) { #ifdef RATELIMIT case IF_SND_TAG_TYPE_RATE_LIMIT: sq = container_of(mb_tag, struct mlx5e_rl_channel, tag)->sq; break; #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag; goto top; #endif #endif case IF_SND_TAG_TYPE_UNLIMITED: sq = &container_of(mb_tag, struct mlx5e_channel, tag)->sq[0]; KASSERT((mb_tag->refcount > 0), ("mlx5e_select_queue: Channel refs are zero for unlimited tag")); break; #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS: mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag; goto top; #endif default: sq = NULL; break; } /* check if valid */ if (sq != NULL && READ_ONCE(sq->running) != 0) return (sq); return (NULL); } static struct mlx5e_sq * mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb) { struct mlx5e_priv *priv = ifp->if_softc; struct mlx5e_sq *sq; u32 ch; u32 tc; /* obtain VLAN information if present */ if (mb->m_flags & M_VLANTAG) { tc = (mb->m_pkthdr.ether_vtag >> 13); if (tc >= priv->num_tc) tc = priv->default_vlan_prio; } else { tc = priv->default_vlan_prio; } ch = priv->params.num_channels; /* check if flowid is set */ if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) { #ifdef RSS u32 temp; if (rss_hash2bucket(mb->m_pkthdr.flowid, M_HASHTYPE_GET(mb), &temp) == 0) ch = temp % ch; else #endif ch = (mb->m_pkthdr.flowid % 128) % ch; } else { #if (__FreeBSD_version >= 1100000) ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 | MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch; #else /* * m_ether_tcpip_hash not present in stable, so just * throw unhashed mbufs on queue 0 */ ch = 0; #endif } /* check if send queue is running */ sq = &priv->channel[ch].sq[tc]; if (likely(READ_ONCE(sq->running) != 0)) return (sq); return (NULL); } static inline u16 mlx5e_get_l2_header_size(struct mlx5e_sq *sq, struct mbuf *mb) { struct ether_vlan_header *eh; uint16_t eth_type; int min_inline; eh = mtod(mb, struct ether_vlan_header *); if (unlikely(mb->m_len < ETHER_HDR_LEN)) { goto max_inline; } else if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))) goto max_inline; eth_type = ntohs(eh->evl_proto); min_inline = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { eth_type = ntohs(eh->evl_encap_proto); min_inline = ETHER_HDR_LEN; } switch (eth_type) { case ETHERTYPE_IP: case ETHERTYPE_IPV6: /* * Make sure the TOS(IPv4) or traffic class(IPv6) * field gets inlined. Else the SQ may stall. */ min_inline += 4; break; default: goto max_inline; } /* * m_copydata() will be used on the remaining header which * does not need to reside within the first m_len bytes of * data: */ if (mb->m_pkthdr.len < min_inline) goto max_inline; return (min_inline); max_inline: return (MIN(mb->m_pkthdr.len, sq->max_inline)); } /* * This function parse IPv4 and IPv6 packets looking for TCP and UDP * headers. * * Upon return the pointer at which the "ppth" argument points, is set * to the location of the TCP header. NULL is used if no TCP header is * present. * * The return value indicates the number of bytes from the beginning * of the packet until the first byte after the TCP or UDP header. If * this function returns zero, the parsing failed. */ int mlx5e_get_full_header_size(const struct mbuf *mb, const struct tcphdr **ppth) { const struct ether_vlan_header *eh; const struct tcphdr *th; const struct ip *ip; int ip_hlen, tcp_hlen; const struct ip6_hdr *ip6; uint16_t eth_type; int eth_hdr_len; eh = mtod(mb, const struct ether_vlan_header *); if (unlikely(mb->m_len < ETHER_HDR_LEN)) goto failure; if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)) goto failure; eth_type = ntohs(eh->evl_proto); eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { eth_type = ntohs(eh->evl_encap_proto); eth_hdr_len = ETHER_HDR_LEN; } switch (eth_type) { case ETHERTYPE_IP: ip = (const struct ip *)(mb->m_data + eth_hdr_len); if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip))) goto failure; switch (ip->ip_p) { case IPPROTO_TCP: ip_hlen = ip->ip_hl << 2; eth_hdr_len += ip_hlen; goto tcp_packet; case IPPROTO_UDP: ip_hlen = ip->ip_hl << 2; eth_hdr_len += ip_hlen + sizeof(struct udphdr); th = NULL; goto udp_packet; default: goto failure; } break; case ETHERTYPE_IPV6: ip6 = (const struct ip6_hdr *)(mb->m_data + eth_hdr_len); if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip6))) goto failure; switch (ip6->ip6_nxt) { case IPPROTO_TCP: eth_hdr_len += sizeof(*ip6); goto tcp_packet; case IPPROTO_UDP: eth_hdr_len += sizeof(*ip6) + sizeof(struct udphdr); th = NULL; goto udp_packet; default: goto failure; } break; default: goto failure; } tcp_packet: if (unlikely(mb->m_len < eth_hdr_len + sizeof(*th))) { const struct mbuf *m_th = mb->m_next; if (unlikely(mb->m_len != eth_hdr_len || m_th == NULL || m_th->m_len < sizeof(*th))) goto failure; th = (const struct tcphdr *)(m_th->m_data); } else { th = (const struct tcphdr *)(mb->m_data + eth_hdr_len); } tcp_hlen = th->th_off << 2; eth_hdr_len += tcp_hlen; udp_packet: /* * m_copydata() will be used on the remaining header which * does not need to reside within the first m_len bytes of * data: */ if (unlikely(mb->m_pkthdr.len < eth_hdr_len)) goto failure; if (ppth != NULL) *ppth = th; return (eth_hdr_len); failure: if (ppth != NULL) *ppth = NULL; return (0); } /* * Locate a pointer inside a mbuf chain. Returns NULL upon failure. */ static inline void * mlx5e_parse_mbuf_chain(const struct mbuf **mb, int *poffset, int eth_hdr_len, int min_len) { if (unlikely(mb[0]->m_len == eth_hdr_len)) { poffset[0] = eth_hdr_len; if (unlikely((mb[0] = mb[0]->m_next) == NULL)) return (NULL); } if (unlikely(mb[0]->m_len < eth_hdr_len - poffset[0] + min_len)) return (NULL); return (mb[0]->m_data + eth_hdr_len - poffset[0]); } /* * This function parse IPv4 and IPv6 packets looking for UDP, VXLAN * and TCP headers. * * The return value indicates the number of bytes from the beginning * of the packet until the first byte after the TCP header. If this * function returns zero, the parsing failed. */ static int mlx5e_get_vxlan_header_size(const struct mbuf *mb, struct mlx5e_tx_wqe *wqe, uint8_t cs_mask, uint8_t opcode) { const struct ether_vlan_header *eh; struct ip *ip4; struct ip6_hdr *ip6; struct tcphdr *th; struct udphdr *udp; bool has_outer_vlan_tag; uint16_t eth_type; uint8_t ip_type; int pkt_hdr_len; int eth_hdr_len; int tcp_hlen; int ip_hlen; int offset; pkt_hdr_len = mb->m_pkthdr.len; has_outer_vlan_tag = (mb->m_flags & M_VLANTAG) != 0; offset = 0; eh = mtod(mb, const struct ether_vlan_header *); if (unlikely(mb->m_len < ETHER_HDR_LEN)) return (0); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)) return (0); eth_type = eh->evl_proto; eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { eth_type = eh->evl_encap_proto; eth_hdr_len = ETHER_HDR_LEN; } switch (eth_type) { case htons(ETHERTYPE_IP): ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, sizeof(*ip4)); if (unlikely(ip4 == NULL)) return (0); ip_type = ip4->ip_p; if (unlikely(ip_type != IPPROTO_UDP)) return (0); wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2; wqe->eth.cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; ip_hlen = ip4->ip_hl << 2; eth_hdr_len += ip_hlen; udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, sizeof(*udp)); if (unlikely(udp == NULL)) return (0); wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2; wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE; eth_hdr_len += sizeof(*udp); break; case htons(ETHERTYPE_IPV6): ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, sizeof(*ip6)); if (unlikely(ip6 == NULL)) return (0); ip_type = ip6->ip6_nxt; if (unlikely(ip_type != IPPROTO_UDP)) return (0); wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2; wqe->eth.cs_flags = MLX5_ETH_WQE_L4_CSUM; eth_hdr_len += sizeof(*ip6); udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, sizeof(*udp)); if (unlikely(udp == NULL)) return (0); wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2; wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE | MLX5_ETH_WQE_SWP_OUTER_L3_TYPE; eth_hdr_len += sizeof(*udp); break; default: return (0); } /* * If the hardware is not computing inner IP checksum, then * skip inlining the inner outer UDP and VXLAN header: */ if (unlikely((cs_mask & MLX5_ETH_WQE_L3_INNER_CSUM) == 0)) goto done; if (unlikely(mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, 8) == NULL)) return (0); eth_hdr_len += 8; /* Check for ethernet header again. */ eh = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, ETHER_HDR_LEN); if (unlikely(eh == NULL)) return (0); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { if (unlikely(mb->m_len < eth_hdr_len - offset + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)) return (0); eth_type = eh->evl_proto; eth_hdr_len += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { eth_type = eh->evl_encap_proto; eth_hdr_len += ETHER_HDR_LEN; } /* Check for IP header again. */ switch (eth_type) { case htons(ETHERTYPE_IP): ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, sizeof(*ip4)); if (unlikely(ip4 == NULL)) return (0); wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2; wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM; ip_type = ip4->ip_p; ip_hlen = ip4->ip_hl << 2; eth_hdr_len += ip_hlen; break; case htons(ETHERTYPE_IPV6): ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, sizeof(*ip6)); if (unlikely(ip6 == NULL)) return (0); wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2; wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_TYPE; ip_type = ip6->ip6_nxt; eth_hdr_len += sizeof(*ip6); break; default: return (0); } /* * If the hardware is not computing inner UDP/TCP checksum, * then skip inlining the inner UDP/TCP header: */ if (unlikely((cs_mask & MLX5_ETH_WQE_L4_INNER_CSUM) == 0)) goto done; switch (ip_type) { case IPPROTO_UDP: udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, sizeof(*udp)); if (unlikely(udp == NULL)) return (0); wqe->eth.swp_inner_l4_offset = (eth_hdr_len / 2); wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_TYPE; eth_hdr_len += sizeof(*udp); break; case IPPROTO_TCP: th = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, sizeof(*th)); if (unlikely(th == NULL)) return (0); wqe->eth.swp_inner_l4_offset = eth_hdr_len / 2; wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; tcp_hlen = th->th_off << 2; eth_hdr_len += tcp_hlen; break; default: return (0); } done: if (unlikely(pkt_hdr_len < eth_hdr_len)) return (0); /* Account for software inserted VLAN tag, if any. */ if (unlikely(has_outer_vlan_tag)) { wqe->eth.swp_outer_l3_offset += ETHER_VLAN_ENCAP_LEN / 2; wqe->eth.swp_outer_l4_offset += ETHER_VLAN_ENCAP_LEN / 2; wqe->eth.swp_inner_l3_offset += ETHER_VLAN_ENCAP_LEN / 2; wqe->eth.swp_inner_l4_offset += ETHER_VLAN_ENCAP_LEN / 2; } /* * When inner checksums are set, outer L4 checksum flag must * be disabled. */ if (wqe->eth.cs_flags & (MLX5_ETH_WQE_L3_INNER_CSUM | MLX5_ETH_WQE_L4_INNER_CSUM)) wqe->eth.cs_flags &= ~MLX5_ETH_WQE_L4_CSUM; return (eth_hdr_len); } struct mlx5_wqe_dump_seg { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_data_seg data; } __aligned(MLX5_SEND_WQE_BB); CTASSERT(DIV_ROUND_UP(2, MLX5_SEND_WQEBB_NUM_DS) == 1); int mlx5e_sq_dump_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf **mbp) { bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; struct mlx5_wqe_dump_seg *wqe; struct mlx5_wqe_dump_seg *wqe_last; int nsegs; int xsegs; u32 off; u32 msb; int err; int x; struct mbuf *mb; const u32 ds_cnt = 2; u16 pi; const u8 opcode = MLX5_OPCODE_DUMP; /* get pointer to mbuf */ mb = *mbp; /* get producer index */ pi = sq->pc & sq->wq.sz_m1; sq->mbuf[pi].num_bytes = mb->m_pkthdr.len; sq->mbuf[pi].num_wqebbs = 0; /* check number of segments in mbuf */ err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); if (err == EFBIG) { /* update statistics */ sq->stats.defragged++; /* too many mbuf fragments */ mb = m_defrag(*mbp, M_NOWAIT); if (mb == NULL) { mb = *mbp; goto tx_drop; } /* try again */ err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); } if (err != 0) goto tx_drop; /* make sure all mbuf data, if any, is visible to the bus */ bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, BUS_DMASYNC_PREWRITE); /* compute number of real DUMP segments */ msb = sq->priv->params_ethtool.hw_mtu_msb; for (x = xsegs = 0; x != nsegs; x++) xsegs += howmany((u32)segs[x].ds_len, msb); /* check if there are no segments */ if (unlikely(xsegs == 0)) { bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); m_freem(mb); *mbp = NULL; /* safety clear */ return (0); } /* return ENOBUFS if the queue is full */ if (unlikely(!mlx5e_sq_has_room_for(sq, xsegs))) { sq->stats.enobuf++; bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); m_freem(mb); *mbp = NULL; /* safety clear */ return (ENOBUFS); } wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, sq->wq.sz_m1); for (x = 0; x != nsegs; x++) { for (off = 0; off < segs[x].ds_len; off += msb) { u32 len = segs[x].ds_len - off; /* limit length */ if (likely(len > msb)) len = msb; memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); /* fill control segment */ wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); wqe->ctrl.imm = cpu_to_be32(parg->tisn << 8); /* fill data segment */ wqe->data.addr = cpu_to_be64((uint64_t)segs[x].ds_addr + off); wqe->data.lkey = sq->mkey_be; wqe->data.byte_count = cpu_to_be32(len); /* advance to next building block */ if (unlikely(wqe == wqe_last)) wqe = mlx5_wq_cyc_get_wqe(&sq->wq, 0); else wqe++; sq->mbuf[pi].num_wqebbs++; sq->pc++; } } wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, (sq->pc - 1) & sq->wq.sz_m1); /* put in place data fence */ wqe->ctrl.fm_ce_se |= MLX5_FENCE_MODE_INITIATOR_SMALL; /* check if we should generate a completion event */ if (mlx5e_do_send_cqe_inline(sq)) wqe_last->ctrl.fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; /* copy data for doorbell */ memcpy(sq->doorbell.d32, wqe_last, sizeof(sq->doorbell.d32)); /* store pointer to mbuf */ sq->mbuf[pi].mbuf = mb; sq->mbuf[pi].p_refcount = parg->pref; atomic_add_int(parg->pref, 1); /* count all traffic going out */ sq->stats.packets++; sq->stats.bytes += sq->mbuf[pi].num_bytes; *mbp = NULL; /* safety clear */ return (0); tx_drop: sq->stats.dropped++; *mbp = NULL; m_freem(mb); return err; } int mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) { bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; struct mlx5e_xmit_args args = {}; struct mlx5_wqe_data_seg *dseg; struct mlx5e_tx_wqe *wqe; struct ifnet *ifp; int nsegs; int err; int x; struct mbuf *mb; u16 ds_cnt; u16 pi; u8 opcode; #ifdef KERN_TLS top: #endif /* Return ENOBUFS if the queue is full */ if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) { sq->stats.enobuf++; return (ENOBUFS); } /* Align SQ edge with NOPs to avoid WQE wrap around */ pi = ((~sq->pc) & sq->wq.sz_m1); if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { /* Send one multi NOP message instead of many */ mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS); pi = ((~sq->pc) & sq->wq.sz_m1); if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { sq->stats.enobuf++; return (ENOMEM); } } #ifdef KERN_TLS /* Special handling for TLS packets, if any */ switch (mlx5e_sq_tls_xmit(sq, &args, mbp)) { case MLX5E_TLS_LOOP: goto top; case MLX5E_TLS_FAILURE: mb = *mbp; err = ENOMEM; goto tx_drop; case MLX5E_TLS_DEFERRED: return (0); case MLX5E_TLS_CONTINUE: default: break; } #endif /* Setup local variables */ pi = sq->pc & sq->wq.sz_m1; wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); ifp = sq->ifp; memset(wqe, 0, sizeof(*wqe)); /* get pointer to mbuf */ mb = *mbp; /* Send a copy of the frame to the BPF listener, if any */ if (ifp != NULL && ifp->if_bpf != NULL) ETHER_BPF_MTAP(ifp, mb); if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) { wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM; } if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) { wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM; } if (wqe->eth.cs_flags == 0) { sq->stats.csum_offload_none++; } if (mb->m_pkthdr.csum_flags & CSUM_TSO) { u32 payload_len; u32 mss = mb->m_pkthdr.tso_segsz; u32 num_pkts; wqe->eth.mss = cpu_to_be16(mss); opcode = MLX5_OPCODE_LSO; if (args.ihs == 0) args.ihs = mlx5e_get_full_header_size(mb, NULL); if (unlikely(args.ihs == 0)) { err = EINVAL; goto tx_drop; } payload_len = mb->m_pkthdr.len - args.ihs; if (payload_len == 0) num_pkts = 1; else num_pkts = DIV_ROUND_UP(payload_len, mss); sq->mbuf[pi].num_bytes = payload_len + (num_pkts * args.ihs); sq->stats.tso_packets++; sq->stats.tso_bytes += payload_len; } else if (mb->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN) { /* check for inner TCP TSO first */ if (mb->m_pkthdr.csum_flags & (CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO)) { u32 payload_len; u32 mss = mb->m_pkthdr.tso_segsz; u32 num_pkts; wqe->eth.mss = cpu_to_be16(mss); opcode = MLX5_OPCODE_LSO; if (likely(args.ihs == 0)) { args.ihs = mlx5e_get_vxlan_header_size(mb, wqe, MLX5_ETH_WQE_L3_INNER_CSUM | MLX5_ETH_WQE_L4_INNER_CSUM | MLX5_ETH_WQE_L4_CSUM | MLX5_ETH_WQE_L3_CSUM, opcode); if (unlikely(args.ihs == 0)) { err = EINVAL; goto tx_drop; } } payload_len = mb->m_pkthdr.len - args.ihs; if (payload_len == 0) num_pkts = 1; else num_pkts = DIV_ROUND_UP(payload_len, mss); sq->mbuf[pi].num_bytes = payload_len + num_pkts * args.ihs; sq->stats.tso_packets++; sq->stats.tso_bytes += payload_len; } else { opcode = MLX5_OPCODE_SEND; if (likely(args.ihs == 0)) { uint8_t cs_mask; if (mb->m_pkthdr.csum_flags & (CSUM_INNER_IP_TCP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_UDP)) { cs_mask = MLX5_ETH_WQE_L3_INNER_CSUM | MLX5_ETH_WQE_L4_INNER_CSUM | MLX5_ETH_WQE_L4_CSUM | MLX5_ETH_WQE_L3_CSUM; } else if (mb->m_pkthdr.csum_flags & CSUM_INNER_IP) { cs_mask = MLX5_ETH_WQE_L3_INNER_CSUM | MLX5_ETH_WQE_L4_CSUM | MLX5_ETH_WQE_L3_CSUM; } else { cs_mask = MLX5_ETH_WQE_L4_CSUM | MLX5_ETH_WQE_L3_CSUM; } args.ihs = mlx5e_get_vxlan_header_size(mb, wqe, cs_mask, opcode); if (unlikely(args.ihs == 0)) { err = EINVAL; goto tx_drop; } } sq->mbuf[pi].num_bytes = max_t (unsigned int, mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN); } } else { opcode = MLX5_OPCODE_SEND; if (args.ihs == 0) { switch (sq->min_inline_mode) { case MLX5_INLINE_MODE_IP: case MLX5_INLINE_MODE_TCP_UDP: args.ihs = mlx5e_get_full_header_size(mb, NULL); if (unlikely(args.ihs == 0)) args.ihs = mlx5e_get_l2_header_size(sq, mb); break; case MLX5_INLINE_MODE_L2: args.ihs = mlx5e_get_l2_header_size(sq, mb); break; case MLX5_INLINE_MODE_NONE: /* FALLTHROUGH */ default: if ((mb->m_flags & M_VLANTAG) != 0 && (sq->min_insert_caps & MLX5E_INSERT_VLAN) != 0) { /* inlining VLAN data is not required */ wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */ wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag); args.ihs = 0; } else if ((mb->m_flags & M_VLANTAG) == 0 && (sq->min_insert_caps & MLX5E_INSERT_NON_VLAN) != 0) { /* inlining non-VLAN data is not required */ args.ihs = 0; } else { /* we are forced to inlining L2 header, if any */ args.ihs = mlx5e_get_l2_header_size(sq, mb); } break; } } sq->mbuf[pi].num_bytes = max_t (unsigned int, mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN); } if (likely(args.ihs == 0)) { /* nothing to inline */ } else if ((mb->m_flags & M_VLANTAG) != 0) { struct ether_vlan_header *eh = (struct ether_vlan_header *) wqe->eth.inline_hdr_start; /* Range checks */ if (unlikely(args.ihs > (sq->max_inline - ETHER_VLAN_ENCAP_LEN))) { if (mb->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_ENCAP_VXLAN)) { err = EINVAL; goto tx_drop; } args.ihs = (sq->max_inline - ETHER_VLAN_ENCAP_LEN); } else if (unlikely(args.ihs < ETHER_HDR_LEN)) { err = EINVAL; goto tx_drop; } m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh); m_adj(mb, ETHER_HDR_LEN); /* Insert 4 bytes VLAN tag into data stream */ eh->evl_proto = eh->evl_encap_proto; eh->evl_encap_proto = htons(ETHERTYPE_VLAN); eh->evl_tag = htons(mb->m_pkthdr.ether_vtag); /* Copy rest of header data, if any */ m_copydata(mb, 0, args.ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1)); m_adj(mb, args.ihs - ETHER_HDR_LEN); /* Extend header by 4 bytes */ args.ihs += ETHER_VLAN_ENCAP_LEN; wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs); } else { /* check if inline header size is too big */ if (unlikely(args.ihs > sq->max_inline)) { if (unlikely(mb->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_ENCAP_VXLAN))) { err = EINVAL; goto tx_drop; } args.ihs = sq->max_inline; } m_copydata(mb, 0, args.ihs, wqe->eth.inline_hdr_start); m_adj(mb, args.ihs); wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs); } ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; if (args.ihs > sizeof(wqe->eth.inline_hdr_start)) { ds_cnt += DIV_ROUND_UP(args.ihs - sizeof(wqe->eth.inline_hdr_start), MLX5_SEND_WQE_DS); } dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt; err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); if (err == EFBIG) { /* Update statistics */ sq->stats.defragged++; /* Too many mbuf fragments */ mb = m_defrag(*mbp, M_NOWAIT); if (mb == NULL) { mb = *mbp; goto tx_drop; } /* Try again */ err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); } /* Catch errors */ if (err != 0) goto tx_drop; /* Make sure all mbuf data, if any, is visible to the bus */ if (nsegs != 0) { bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, BUS_DMASYNC_PREWRITE); } else { /* All data was inlined, free the mbuf. */ bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); m_freem(mb); mb = NULL; } for (x = 0; x != nsegs; x++) { if (segs[x].ds_len == 0) continue; dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr); dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len); dseg++; } ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl)); wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); wqe->ctrl.imm = cpu_to_be32(args.tisn << 8); if (mlx5e_do_send_cqe_inline(sq)) wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; else wqe->ctrl.fm_ce_se = 0; /* Copy data for doorbell */ memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); /* Store pointer to mbuf */ sq->mbuf[pi].mbuf = mb; sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->mbuf[pi].p_refcount = args.pref; if (unlikely(args.pref != NULL)) atomic_add_int(args.pref, 1); sq->pc += sq->mbuf[pi].num_wqebbs; /* Count all traffic going out */ sq->stats.packets++; sq->stats.bytes += sq->mbuf[pi].num_bytes; *mbp = NULL; /* safety clear */ return (0); tx_drop: sq->stats.dropped++; *mbp = NULL; m_freem(mb); return err; } static void mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget) { u16 sqcc; /* * sq->cc must be updated only after mlx5_cqwq_update_db_record(), * otherwise a cq overrun may occur */ sqcc = sq->cc; while (budget > 0) { struct mlx5_cqe64 *cqe; struct mbuf *mb; bool match; u16 sqcc_this; u16 delta; u16 x; u16 ci; cqe = mlx5e_get_cqe(&sq->cq); if (!cqe) break; mlx5_cqwq_pop(&sq->cq.wq); /* check if the completion event indicates an error */ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) sq->stats.cqe_err++; /* setup local variables */ sqcc_this = be16toh(cqe->wqe_counter); match = false; /* update budget according to the event factor */ budget -= sq->cev_factor; for (x = 0;; x++) { if (unlikely(match != false)) { break; } else if (unlikely(x == sq->cev_factor)) { /* WQE counter match not found */ sq->stats.cqe_err++; break; } ci = sqcc & sq->wq.sz_m1; delta = sqcc_this - sqcc; match = (delta < sq->mbuf[ci].num_wqebbs); mb = sq->mbuf[ci].mbuf; sq->mbuf[ci].mbuf = NULL; if (unlikely(sq->mbuf[ci].p_refcount != NULL)) { atomic_add_int(sq->mbuf[ci].p_refcount, -1); sq->mbuf[ci].p_refcount = NULL; } if (mb == NULL) { if (unlikely(sq->mbuf[ci].num_bytes == 0)) sq->stats.nop++; } else { bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map); /* Free transmitted mbuf */ m_freem(mb); } sqcc += sq->mbuf[ci].num_wqebbs; } } mlx5_cqwq_update_db_record(&sq->cq.wq); /* Ensure cq space is freed before enabling more cqes */ atomic_thread_fence_rel(); sq->cc = sqcc; } static int mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb) { int err = 0; if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || READ_ONCE(sq->running) == 0)) { m_freem(mb); return (ENETDOWN); } /* Do transmit */ if (mlx5e_sq_xmit(sq, &mb) != 0) { /* NOTE: m_freem() is NULL safe */ m_freem(mb); err = ENOBUFS; } /* Check if we need to write the doorbell */ if (likely(sq->doorbell.d64 != 0)) { mlx5e_tx_notify_hw(sq, sq->doorbell.d32); sq->doorbell.d64 = 0; } /* * Check if we need to start the event timer which flushes the * transmit ring on timeout: */ if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL && sq->cev_factor != 1)) { /* start the timer */ mlx5e_sq_cev_timeout(sq); } else { /* don't send NOPs yet */ sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS; } return (err); } int mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb) { struct mlx5e_sq *sq; int ret; if (mb->m_pkthdr.csum_flags & CSUM_SND_TAG) { MPASS(mb->m_pkthdr.snd_tag->ifp == ifp); sq = mlx5e_select_queue_by_send_tag(ifp, mb); if (unlikely(sq == NULL)) { goto select_queue; } } else { select_queue: sq = mlx5e_select_queue(ifp, mb); if (unlikely(sq == NULL)) { /* Free mbuf */ m_freem(mb); /* Invalid send queue */ return (ENXIO); } } mtx_lock(&sq->lock); ret = mlx5e_xmit_locked(ifp, sq, mb); mtx_unlock(&sq->lock); return (ret); } void mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused) { struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq); mtx_lock(&sq->comp_lock); mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX); mlx5e_cq_arm(&sq->cq, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock)); mtx_unlock(&sq->comp_lock); } diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c index c0f71c3fde86..9f5e17ad864e 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c @@ -1,56 +1,56 @@ /*- * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_rss.h" #include "opt_ratelimit.h" -#include "en.h" +#include struct mlx5_cqe64 * mlx5e_get_cqe(struct mlx5e_cq *cq) { struct mlx5_cqe64 *cqe; cqe = mlx5_cqwq_get_wqe(&cq->wq, mlx5_cqwq_get_ci(&cq->wq)); if ((cqe->op_own ^ mlx5_cqwq_get_wrap_cnt(&cq->wq)) & MLX5_CQE_OWNER_MASK) return (NULL); /* ensure cqe content is read after cqe ownership bit */ atomic_thread_fence_acq(); return (cqe); } void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event) { struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); mlx5_en_err(cq->priv->ifp, "cqn=0x%.6x event=0x%.2x\n", mcq->cqn, event); } diff --git a/sys/dev/mlx5/mlx5_en/port_buffer.h b/sys/dev/mlx5/mlx5_en/port_buffer.h index d16347f31a62..d5e2b1c9bd35 100644 --- a/sys/dev/mlx5/mlx5_en/port_buffer.h +++ b/sys/dev/mlx5/mlx5_en/port_buffer.h @@ -1,82 +1,82 @@ /*- * Copyright (c) 2018-2019 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __MLX5_EN_PORT_BUFFER_H__ #define __MLX5_EN_PORT_BUFFER_H__ -#include "en.h" +#include #include #define MLX5E_MAX_BUFFER 8 #define MLX5E_BUFFER_CELL_SHIFT 7 #define MLX5E_DEFAULT_CABLE_LEN 7 /* 7 meters */ #define MLX5_BUFFER_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, pcam_reg) && \ MLX5_CAP_PCAM_REG(mdev, pbmc) && \ MLX5_CAP_PCAM_REG(mdev, pptb)) enum { MLX5E_PORT_BUFFER_CABLE_LEN = BIT(0), MLX5E_PORT_BUFFER_PFC = BIT(1), MLX5E_PORT_BUFFER_PRIO2BUFFER = BIT(2), MLX5E_PORT_BUFFER_SIZE = BIT(3), }; struct mlx5e_bufferx_reg { u8 lossy; u8 epsb; u32 size; u32 xoff; u32 xon; }; struct mlx5e_port_buffer { u32 port_buffer_size; u32 spare_buffer_size; struct mlx5e_bufferx_reg buffer[MLX5E_MAX_BUFFER]; }; #define IEEE_8021QAZ_MAX_TCS 8 struct ieee_pfc { __u8 pfc_cap; __u8 pfc_en; __u8 mbc; __u16 delay; __u64 requests[IEEE_8021QAZ_MAX_TCS]; __u64 indications[IEEE_8021QAZ_MAX_TCS]; }; int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, u32 change, unsigned int mtu, struct ieee_pfc *pfc, u32 *buffer_size, u8 *prio2buffer); int mlx5e_port_query_buffer(struct mlx5e_priv *priv, struct mlx5e_port_buffer *port_buffer); #endif