Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F150535770
D11560.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
15 KB
Referenced Files
None
Subscribers
None
D11560.diff
View Options
Index: sys/dev/mlx4/mlx4_en/en.h
===================================================================
--- sys/dev/mlx4/mlx4_en/en.h
+++ sys/dev/mlx4/mlx4_en/en.h
@@ -108,6 +108,25 @@
MLX4_EN_ALLOC_REPLACEMENT = 1,
};
+/* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU
+ * and 4K allocations) */
+#if MJUMPAGESIZE == 4096
+enum {
+ FRAG_SZ0 = MCLBYTES,
+ FRAG_SZ1 = MJUMPAGESIZE,
+ FRAG_SZ2 = MJUMPAGESIZE,
+};
+#define MLX4_EN_MAX_RX_FRAGS 3
+#elif MJUMPAGESIZE == 8192
+enum {
+ FRAG_SZ0 = MCLBYTES,
+ FRAG_SZ1 = MJUMPAGESIZE,
+};
+#define MLX4_EN_MAX_RX_FRAGS 2
+#else
+#error "Unknown PAGE_SIZE"
+#endif
+
/* Maximum ring sizes */
#define MLX4_EN_DEF_TX_QUEUE_SIZE 4096
@@ -307,6 +326,7 @@
bus_dma_tag_t dma_tag;
struct mlx4_en_rx_spare spare;
u32 size ; /* number of Rx descs*/
+ u32 num_mbufs;
u32 actual_size;
u32 size_mask;
u16 stride;
@@ -504,6 +524,10 @@
u8 vport_num;
};
+struct mlx4_en_frag_info {
+ u16 frag_size;
+};
+
struct mlx4_en_priv {
struct mlx4_en_dev *mdev;
struct mlx4_en_port_profile *prof;
@@ -554,6 +578,9 @@
u32 tx_ring_num;
u32 rx_ring_num;
u32 rx_mb_size;
+ struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS];
+ u16 num_frags;
+ u16 log_mbuf;
struct mlx4_en_tx_ring **tx_ring;
struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS];
Index: sys/dev/mlx4/mlx4_en/mlx4_en_rx.c
===================================================================
--- sys/dev/mlx4/mlx4_en/mlx4_en_rx.c
+++ sys/dev/mlx4/mlx4_en/mlx4_en_rx.c
@@ -53,10 +53,18 @@
(ring->buf + (ring->stride * index));
int possible_frags;
int i;
+ int ip_align;
+ ip_align = MLX4_NET_IP_ALIGN;
/* Set size and memtype fields */
- rx_desc->data[0].byte_count = cpu_to_be32(priv->rx_mb_size - MLX4_NET_IP_ALIGN);
- rx_desc->data[0].lkey = cpu_to_be32(priv->mdev->mr.key);
+ for (i = 0; i < priv->num_frags; i++) {
+ rx_desc->data[i].byte_count =
+ cpu_to_be32(priv->frag_info[i].frag_size - ip_align);
+ rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key);
+
+ /* Adjust only the first fragment for IP header alignment. */
+ ip_align = 0;
+ }
/*
* If the number of used fragments does not fill up the ring
@@ -64,50 +72,23 @@
* null address/size and a special memory key:
*/
possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE;
- for (i = 1; i < possible_frags; i++) {
+ for (i = priv->num_frags; i < possible_frags; i++) {
rx_desc->data[i].byte_count = 0;
rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
rx_desc->data[i].addr = 0;
}
+
}
static int
-mlx4_en_alloc_buf(struct mlx4_en_rx_ring *ring,
- __be64 *pdma, struct mlx4_en_rx_mbuf *mb_list)
+mlx4_en_alloc_buf(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring,
+ __be64 *pdma, struct mlx4_en_rx_mbuf *mb_list, int flags, int frag_size)
{
bus_dma_segment_t segs[1];
- bus_dmamap_t map;
struct mbuf *mb;
int nsegs;
int err;
- /* try to allocate a new spare mbuf */
- if (unlikely(ring->spare.mbuf == NULL)) {
- mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size);
- if (unlikely(mb == NULL))
- return (-ENOMEM);
- /* setup correct length */
- mb->m_pkthdr.len = mb->m_len = ring->rx_mb_size;
-
- /* make sure IP header gets aligned */
- m_adj(mb, MLX4_NET_IP_ALIGN);
-
- /* load spare mbuf into BUSDMA */
- err = -bus_dmamap_load_mbuf_sg(ring->dma_tag, ring->spare.dma_map,
- mb, segs, &nsegs, BUS_DMA_NOWAIT);
- if (unlikely(err != 0)) {
- m_freem(mb);
- return (err);
- }
-
- /* store spare info */
- ring->spare.mbuf = mb;
- ring->spare.paddr_be = cpu_to_be64(segs[0].ds_addr);
-
- bus_dmamap_sync(ring->dma_tag, ring->spare.dma_map,
- BUS_DMASYNC_PREREAD);
- }
-
/* synchronize and unload the current mbuf, if any */
if (likely(mb_list->mbuf != NULL)) {
bus_dmamap_sync(ring->dma_tag, mb_list->dma_map,
@@ -115,21 +96,26 @@
bus_dmamap_unload(ring->dma_tag, mb_list->dma_map);
}
- mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size);
- if (unlikely(mb == NULL))
- goto use_spare;
+ mb = m_getjcl(M_NOWAIT, MT_DATA, flags, frag_size);
+ if (unlikely(mb == NULL)) {
+ priv->port_stats.rx_alloc_failed++;
+ return -ENOMEM;
+ }
/* setup correct length */
- mb->m_pkthdr.len = mb->m_len = ring->rx_mb_size;
+ mb->m_len = frag_size;
/* make sure IP header gets aligned */
- m_adj(mb, MLX4_NET_IP_ALIGN);
+ if (flags & M_PKTHDR) {
+ mb->m_pkthdr.len = frag_size;
+ m_adj(mb, MLX4_NET_IP_ALIGN);
+ }
err = -bus_dmamap_load_mbuf_sg(ring->dma_tag, mb_list->dma_map,
mb, segs, &nsegs, BUS_DMA_NOWAIT);
if (unlikely(err != 0)) {
m_freem(mb);
- goto use_spare;
+ return (-err);
}
*pdma = cpu_to_be64(segs[0].ds_addr);
@@ -137,30 +123,40 @@
bus_dmamap_sync(ring->dma_tag, mb_list->dma_map, BUS_DMASYNC_PREREAD);
return (0);
+}
+
+static void
+mlx4_en_free_buf(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring,
+ struct mlx4_en_rx_mbuf *mb_list)
+{
+ bus_dmamap_t map;
+ bus_dma_tag_t tag;
+ int nr;
-use_spare:
- /* swap DMA maps */
- map = mb_list->dma_map;
- mb_list->dma_map = ring->spare.dma_map;
- ring->spare.dma_map = map;
+ for (nr = 0; nr < priv->num_frags; nr++) {
+ en_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
- /* swap MBUFs */
- mb_list->mbuf = ring->spare.mbuf;
- ring->spare.mbuf = NULL;
+ if (mb_list->mbuf != NULL) {
+ map = mb_list->dma_map;
+ tag = ring->dma_tag;
- /* store physical address */
- *pdma = ring->spare.paddr_be;
- return (0);
+ bus_dmamap_sync(tag, map, BUS_DMASYNC_POSTREAD);
+ bus_dmamap_unload(tag, map);
+ m_freem(mb_list->mbuf);
+ mb_list->mbuf = NULL; /* safety clearing */
+ }
+ mb_list++;
+ }
}
static void
-mlx4_en_free_buf(struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_mbuf *mb_list)
+mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring,
+ int index)
{
- bus_dmamap_t map = mb_list->dma_map;
- bus_dmamap_sync(ring->dma_tag, map, BUS_DMASYNC_POSTREAD);
- bus_dmamap_unload(ring->dma_tag, map);
- m_freem(mb_list->mbuf);
- mb_list->mbuf = NULL; /* safety clearing */
+ struct mlx4_en_rx_mbuf *mb_list;
+
+ mb_list = ring->mbuf + (index << priv->log_mbuf);
+ mlx4_en_free_buf(priv, ring, mb_list);
}
static int
@@ -169,15 +165,24 @@
{
struct mlx4_en_rx_desc *rx_desc = (struct mlx4_en_rx_desc *)
(ring->buf + (index * ring->stride));
- struct mlx4_en_rx_mbuf *mb_list = ring->mbuf + index;
+ struct mlx4_en_rx_mbuf *mb_list = ring->mbuf + (index << priv->log_mbuf);
+ int i;
+ int flags;
- mb_list->mbuf = NULL;
+ mlx4_en_free_buf(priv, ring, mb_list);
- if (mlx4_en_alloc_buf(ring, &rx_desc->data[0].addr, mb_list)) {
- priv->port_stats.rx_alloc_failed++;
- return (-ENOMEM);
+ flags = M_PKTHDR;
+ for (i = 0; i < priv->num_frags; i++) {
+ if (mlx4_en_alloc_buf(priv, ring, &rx_desc->data[i].addr, &mb_list[i],
+ flags, priv->frag_info[i].frag_size))
+ goto err;
+ flags = 0;
}
return (0);
+
+err:
+ mlx4_en_free_buf(priv, ring, mb_list);
+ return -ENOMEM;
}
static inline void
@@ -226,8 +231,7 @@
while (ring->actual_size > new_size) {
ring->actual_size--;
ring->prod--;
- mlx4_en_free_buf(ring,
- ring->mbuf + ring->actual_size);
+ mlx4_en_free_rx_desc(priv, ring, ring->actual_size);
}
}
@@ -247,33 +251,69 @@
while (ring->cons != ring->prod) {
index = ring->cons & ring->size_mask;
en_dbg(DRV, priv, "Processing descriptor:%d\n", index);
- mlx4_en_free_buf(ring, ring->mbuf + index);
+ mlx4_en_free_rx_desc(priv, ring, index);
++ring->cons;
}
}
+#if MLX4_EN_MAX_RX_FRAGS == 3
+static int frag_sizes[] = {
+ FRAG_SZ0,
+ FRAG_SZ1,
+ FRAG_SZ2,
+};
+#elif MLX4_EN_MAX_RX_FRAGS == 2
+static int frag_sizes[] = {
+ FRAG_SZ0,
+ FRAG_SZ1,
+};
+#else
+#error "Unknown MAX_RX_FRAGS"
+#endif
+
void mlx4_en_calc_rx_buf(struct net_device *dev)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
int eff_mtu = dev->if_mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN +
MLX4_NET_IP_ALIGN;
+ int buf_size = 0;
+ int i, frag;
- if (eff_mtu > MJUM16BYTES) {
- en_err(priv, "MTU(%d) is too big\n", dev->if_mtu);
- eff_mtu = MJUM16BYTES;
- } else if (eff_mtu > MJUM9BYTES) {
- eff_mtu = MJUM16BYTES;
- } else if (eff_mtu > MJUMPAGESIZE) {
- eff_mtu = MJUM9BYTES;
- } else if (eff_mtu > MCLBYTES) {
- eff_mtu = MJUMPAGESIZE;
- } else {
- eff_mtu = MCLBYTES;
- }
+ /*
+ * Try to fit packets into a single mbuf+cluster, but we have to split
+ * frames across multiple mbufs if the MTU is greater than the page
+ * size so that we don't don't trigger the (very expensive) contiguous
+ * memory allocator during normal rx operation.
+ */
+ if (eff_mtu <= MCLBYTES) {
+ priv->frag_info[0].frag_size = MCLBYTES;
+ priv->num_frags = 1;
+ } else if (eff_mtu <= MJUMPAGESIZE) {
+ priv->frag_info[0].frag_size = MJUMPAGESIZE;
+ priv->num_frags = 1;
+ } else {
+ for (i = 0, frag = 0; buf_size < eff_mtu; frag++, i++) {
+ /*
+ * Allocate small to large but only as much as is needed for
+ * the tail.
+ */
+ while (i > 0 && eff_mtu - buf_size <= frag_sizes[i - 1])
+ i--;
+ priv->frag_info[frag].frag_size = frag_sizes[i];
+ buf_size += priv->frag_info[frag].frag_size;
+ }
+ priv->num_frags = frag;
+ }
priv->rx_mb_size = eff_mtu;
+ priv->log_mbuf = ROUNDUP_LOG2(priv->num_frags);
- en_dbg(DRV, priv, "Effective RX MTU: %d bytes\n", eff_mtu);
+ en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d "
+ "num_frags:%d):\n", eff_mtu, priv->num_frags);
+ for (i = 0; i < priv->num_frags; i++) {
+ en_dbg(DRV, priv, " frag:%d - size:%d\n", i,
+ priv->frag_info[i].frag_size);
+ }
}
int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
@@ -283,7 +323,7 @@
struct mlx4_en_dev *mdev = priv->mdev;
struct mlx4_en_rx_ring *ring;
int err;
- int tmp;
+ size_t ring_size_bytes;
uint32_t x;
ring = kzalloc(sizeof(struct mlx4_en_rx_ring), GFP_KERNEL);
@@ -300,9 +340,9 @@
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
- MJUM16BYTES, /* maxsize */
+ MJUMPAGESIZE, /* maxsize */
1, /* nsegments */
- MJUM16BYTES, /* maxsegsize */
+ MJUMPAGESIZE, /* maxsegsize */
0, /* flags */
NULL, NULL, /* lockfunc, lockfuncarg */
&ring->dma_tag))) {
@@ -314,24 +354,21 @@
ring->cons = 0;
ring->size = size;
ring->size_mask = size - 1;
- ring->stride = roundup_pow_of_two(
- sizeof(struct mlx4_en_rx_desc) + DS_SIZE);
+ ring->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
+ DS_SIZE * MLX4_EN_MAX_RX_FRAGS);
ring->log_stride = ffs(ring->stride) - 1;
ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
- tmp = size * sizeof(struct mlx4_en_rx_mbuf);
+ ring->num_mbufs = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS);
+ ring_size_bytes = ring->num_mbufs * sizeof(struct mlx4_en_rx_mbuf);
- ring->mbuf = kzalloc(tmp, GFP_KERNEL);
+ ring->mbuf = kzalloc(ring_size_bytes, GFP_KERNEL);
if (ring->mbuf == NULL) {
err = -ENOMEM;
goto err_dma_tag;
}
- err = -bus_dmamap_create(ring->dma_tag, 0, &ring->spare.dma_map);
- if (err != 0)
- goto err_info;
-
- for (x = 0; x != size; x++) {
+ for (x = 0; x != ring->num_mbufs; x++) {
err = -bus_dmamap_create(ring->dma_tag, 0,
&ring->mbuf[x].dma_map);
if (err != 0) {
@@ -341,8 +378,8 @@
goto err_info;
}
}
- en_dbg(DRV, priv, "Allocated MBUF ring at addr:%p size:%d\n",
- ring->mbuf, tmp);
+ en_dbg(DRV, priv, "Allocated MBUF ring at addr:%p size:%zd\n",
+ ring->mbuf, ring_size_bytes);
err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
ring->buf_size, 2 * PAGE_SIZE);
@@ -381,8 +418,8 @@
int i;
int ring_ind;
int err;
- int stride = roundup_pow_of_two(
- sizeof(struct mlx4_en_rx_desc) + DS_SIZE);
+ int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
+ DS_SIZE * priv->num_frags);
for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
ring = priv->rx_ring[ring_ind];
@@ -465,16 +502,8 @@
mlx4_en_unmap_buffer(&ring->wqres.buf);
mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
- for (x = 0; x != size; x++)
+ for (x = 0; x != ring->num_mbufs; x++)
bus_dmamap_destroy(ring->dma_tag, ring->mbuf[x].dma_map);
- /* free spare mbuf, if any */
- if (ring->spare.mbuf != NULL) {
- bus_dmamap_sync(ring->dma_tag, ring->spare.dma_map,
- BUS_DMASYNC_POSTREAD);
- bus_dmamap_unload(ring->dma_tag, ring->spare.dma_map);
- m_freem(ring->spare.mbuf);
- }
- bus_dmamap_destroy(ring->dma_tag, ring->spare.dma_map);
vfree(ring->mbuf);
bus_dma_tag_destroy(ring->dma_tag);
kfree(ring);
@@ -532,6 +561,68 @@
return 0;
}
+/*
+ * Collect up the packet fragments, represented by individual mbufs, into a
+ * single mbuf chain ready to be passed up the stack. As mbufs are removed from
+ * the ring replace them with newly allocated ones; if we fail to allocate an
+ * mbuf then drop the current packet and return an error. This ensures that the
+ * ring is always in a state where it is ready to receive packets.
+ */
+static int
+mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_desc *rx_desc,
+ struct mlx4_en_rx_mbuf *mb_list, int length)
+{
+ struct mlx4_en_frag_info *frag_info;
+ struct mbuf *mb, *first_mb, *prev_mb;
+ int flags, nr, align_len, mb_len;
+
+ first_mb = mb_list[0].mbuf;
+ prev_mb = NULL;
+ first_mb->m_pkthdr.len = length;
+ flags = M_PKTHDR;
+ align_len = MLX4_NET_IP_ALIGN;
+
+ /* Collect used fragments while replacing them in the HW descriptors */
+ for (nr = 0; nr < priv->num_frags; nr++) {
+ frag_info = &priv->frag_info[nr];
+
+ mb = mb_list[nr].mbuf;
+
+ /* Allocate a replacement page */
+ if (mlx4_en_alloc_buf(priv, ring, &rx_desc->data[nr].addr,
+ &mb_list[nr], flags, priv->frag_info[nr].frag_size))
+ goto fail;
+
+ if (prev_mb != NULL)
+ prev_mb->m_next = mb;
+ mb_len = frag_info->frag_size - align_len;
+ prev_mb = mb;
+
+ if (length <= mb_len)
+ break;
+
+ mb->m_len = mb_len;
+ length -= mb_len;
+ flags = 0;
+ align_len = 0;
+ }
+ /* Adjust size of last fragment to match actual length */
+ prev_mb->m_len = min(length, prev_mb->m_len);;
+ prev_mb->m_next = NULL;
+ return (0);
+
+fail:
+ /*
+ * At this point the fragments have been partially extracted and
+ * replaced. Free the mbufs that are no longer referenced by the ring.
+ */
+ if (first_mb != mb_list[0].mbuf)
+ m_freem(first_mb);
+ return (-ENOMEM);
+
+}
+
static struct mbuf *
mlx4_en_rx_mb(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring,
struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_mbuf *mb_list,
@@ -539,19 +630,12 @@
{
struct mbuf *mb;
- /* get mbuf */
mb = mb_list->mbuf;
- /* collect used fragment while atomically replacing it */
- if (mlx4_en_alloc_buf(ring, &rx_desc->data[0].addr, mb_list))
- return (NULL);
-
- /* range check hardware computed value */
- if (unlikely(length > mb->m_len))
- length = mb->m_len;
+ /* Move relevant fragments to mb */
+ if (unlikely(mlx4_en_complete_rx_desc(priv, ring, rx_desc, mb_list, length)))
+ return NULL;
- /* update total packet length in packet header */
- mb->m_len = mb->m_pkthdr.len = length;
return (mb);
}
@@ -591,7 +675,7 @@
/* Process all completed CQEs */
while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
cons_index & size)) {
- mb_list = ring->mbuf + index;
+ mb_list = ring->mbuf + (index << priv->log_mbuf);
rx_desc = (struct mlx4_en_rx_desc *)
(ring->buf + (index << ring->log_stride));
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Apr 3, 3:39 AM (2 h, 48 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
30742702
Default Alt Text
D11560.diff (15 KB)
Attached To
Mode
D11560: Remove use of 9k clusters from mlx4_en rx path
Attached
Detach File
Event Timeline
Log In to Comment