Index: sys/dev/mlx4/mlx4_en/en.h =================================================================== --- sys/dev/mlx4/mlx4_en/en.h +++ sys/dev/mlx4/mlx4_en/en.h @@ -278,6 +278,8 @@ unsigned long queue_stopped; unsigned long oversized_packets; unsigned long wake_queue; + unsigned long tso_packets; + unsigned long defrag_attempts; struct mlx4_bf bf; bool bf_enabled; int hwtstamp_tx_type; Index: sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c =================================================================== --- sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c +++ sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c @@ -2681,6 +2681,8 @@ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_chksum_offload", CTLFLAG_RD, &priv->port_stats.tx_chksum_offload, "TX checksum offloads"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "defrag_attempts", CTLFLAG_RD, + &priv->port_stats.defrag_attempts, "Oversized chains defragged"); /* Could strdup the names and add in a loop. This is simpler. */ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, Index: sys/dev/mlx4/mlx4_en/mlx4_en_port.c =================================================================== --- sys/dev/mlx4/mlx4_en/mlx4_en_port.c +++ sys/dev/mlx4/mlx4_en/mlx4_en_port.c @@ -191,11 +191,16 @@ priv->port_stats.tx_chksum_offload = 0; priv->port_stats.queue_stopped = 0; priv->port_stats.wake_queue = 0; + priv->port_stats.oversized_packets = 0; + priv->port_stats.tso_packets = 0; + priv->port_stats.defrag_attempts = 0; for (i = 0; i < priv->tx_ring_num; i++) { priv->port_stats.tx_chksum_offload += priv->tx_ring[i]->tx_csum; priv->port_stats.queue_stopped += priv->tx_ring[i]->queue_stopped; priv->port_stats.wake_queue += priv->tx_ring[i]->wake_queue; priv->port_stats.oversized_packets += priv->tx_ring[i]->oversized_packets; + priv->port_stats.tso_packets += priv->tx_ring[i]->tso_packets; + priv->port_stats.defrag_attempts += priv->tx_ring[i]->defrag_attempts; } /* RX Statistics */ priv->pkstats.rx_packets = be64_to_cpu(mlx4_en_stats->RTOT_prio_0) + Index: sys/dev/mlx4/mlx4_en/mlx4_en_tx.c =================================================================== --- sys/dev/mlx4/mlx4_en/mlx4_en_tx.c +++ sys/dev/mlx4/mlx4_en/mlx4_en_tx.c @@ -793,7 +793,7 @@ num_pkts = DIV_ROUND_UP(payload_len, mss); ring->bytes += payload_len + (num_pkts * ihs); ring->packets += num_pkts; - priv->port_stats.tso_packets++; + ring->tso_packets++; /* store pointer to inline header */ dseg_inline = dseg; /* copy data inline */ @@ -814,20 +814,11 @@ } m_adj(mb, ihs); - /* trim off empty mbufs */ - while (mb->m_len == 0) { - mb = m_free(mb); - /* check if all data has been inlined */ - if (mb == NULL) { - nr_segs = 0; - goto skip_dma; - } - } - err = bus_dmamap_load_mbuf_sg(ring->dma_tag, tx_info->dma_map, mb, segs, &nr_segs, BUS_DMA_NOWAIT); if (unlikely(err == EFBIG)) { /* Too many mbuf fragments */ + ring->defrag_attempts++; m = m_defrag(mb, M_NOWAIT); if (m == NULL) { ring->oversized_packets++; @@ -843,11 +834,13 @@ ring->oversized_packets++; goto tx_drop; } - /* make sure all mbuf data is written to RAM */ - bus_dmamap_sync(ring->dma_tag, tx_info->dma_map, - BUS_DMASYNC_PREWRITE); + /* If there were no errors and we didn't load anything, don't sync. */ + if (nr_segs != 0) { + /* make sure all mbuf data is written to RAM */ + bus_dmamap_sync(ring->dma_tag, tx_info->dma_map, + BUS_DMASYNC_PREWRITE); + } -skip_dma: /* compute number of DS needed */ ds_cnt = (dseg - ((volatile struct mlx4_wqe_data_seg *)tx_desc)) + nr_segs; Index: sys/dev/mlx4/stats.h =================================================================== --- sys/dev/mlx4/stats.h +++ sys/dev/mlx4/stats.h @@ -126,6 +126,7 @@ unsigned long rx_chksum_good; unsigned long rx_chksum_none; unsigned long tx_chksum_offload; + unsigned long defrag_attempts; }; struct mlx4_en_perf_stats {