Page MenuHomeFreeBSD

D50786.diff
No OneTemporary

D50786.diff

diff --git a/share/man/man4/gve.4 b/share/man/man4/gve.4
--- a/share/man/man4/gve.4
+++ b/share/man/man4/gve.4
@@ -230,6 +230,14 @@
The software LRO stack in the kernel is always used.
This sysctl variable needs to be set before loading the driver, using
.Xr loader.conf 5 .
+.It Va hw.gve.allow_4k_rx_buffers
+Setting this boot-time tunable to 1 enables support for 4K RX Buffers.
+The default value is 0, which means 2K RX Buffers will be used.
+4K RX Buffers are only supported on DQO_RDA and DQO_QPL queue formats.
+When enabled, 4K RX Buffers will be used either when HW LRO is enabled
+or mtu is greated than 2048.
+This sysctl variable needs to be set before loading the driver, using
+.Xr loader.conf 5 .
.It Va dev.gve.X.num_rx_queues and dev.gve.X.num_tx_queues
Run-time tunables that represent the number of currently used RX/TX queues.
The default value is the max number of RX/TX queues the device can support.
diff --git a/sys/dev/gve/gve.h b/sys/dev/gve/gve.h
--- a/sys/dev/gve/gve.h
+++ b/sys/dev/gve/gve.h
@@ -65,6 +65,7 @@
#define ADMINQ_SIZE PAGE_SIZE
#define GVE_DEFAULT_RX_BUFFER_SIZE 2048
+#define GVE_4K_RX_BUFFER_SIZE_DQO 4096
/* Each RX bounce buffer page can fit two packet buffers. */
#define GVE_DEFAULT_RX_BUFFER_OFFSET (PAGE_SIZE / 2)
@@ -84,6 +85,11 @@
static MALLOC_DEFINE(M_GVE, "gve", "gve allocations");
+_Static_assert(MCLBYTES == GVE_DEFAULT_RX_BUFFER_SIZE,
+ "gve: bad MCLBYTES length");
+_Static_assert(MJUMPAGESIZE >= GVE_4K_RX_BUFFER_SIZE_DQO,
+ "gve: bad MJUMPAGESIZE length");
+
struct gve_dma_handle {
bus_addr_t bus_addr;
void *cpu_addr;
@@ -633,6 +639,7 @@
/* The index of tx queue that the timer service will check on its next invocation */
uint16_t check_tx_queue_idx;
+ uint16_t rx_buf_size_dqo;
};
static inline bool
@@ -666,6 +673,18 @@
priv->queue_format == GVE_DQO_QPL_FORMAT);
}
+static inline bool
+gve_is_4k_rx_buf(struct gve_priv *priv)
+{
+ return (priv->rx_buf_size_dqo == GVE_4K_RX_BUFFER_SIZE_DQO);
+}
+
+static inline bus_size_t
+gve_rx_dqo_mbuf_segment_size(struct gve_priv *priv)
+{
+ return (gve_is_4k_rx_buf(priv) ? MJUMPAGESIZE : MCLBYTES);
+}
+
/* Defined in gve_main.c */
void gve_schedule_reset(struct gve_priv *priv);
int gve_adjust_tx_queues(struct gve_priv *priv, uint16_t new_queue_cnt);
@@ -746,6 +765,7 @@
/* Systcl functions defined in gve_sysctl.c */
extern bool gve_disable_hw_lro;
+extern bool gve_allow_4k_rx_buffers;
extern char gve_queue_format[8];
extern char gve_version[8];
void gve_setup_sysctl(struct gve_priv *priv);
diff --git a/sys/dev/gve/gve_adminq.c b/sys/dev/gve/gve_adminq.c
--- a/sys/dev/gve/gve_adminq.c
+++ b/sys/dev/gve/gve_adminq.c
@@ -296,7 +296,6 @@
.ntfy_id = htobe32(rx->com.ntfy_id),
.queue_resources_addr = htobe64(qres_dma->bus_addr),
.rx_ring_size = htobe16(priv->rx_desc_cnt),
- .packet_buffer_size = htobe16(GVE_DEFAULT_RX_BUFFER_SIZE),
};
if (gve_is_gqi(priv)) {
@@ -308,6 +307,8 @@
htobe32(queue_index);
cmd.create_rx_queue.queue_page_list_id =
htobe32((rx->com.qpl)->id);
+ cmd.create_rx_queue.packet_buffer_size =
+ htobe16(GVE_DEFAULT_RX_BUFFER_SIZE);
} else {
cmd.create_rx_queue.queue_page_list_id =
htobe32(GVE_RAW_ADDRESSING_QPL_ID);
@@ -320,6 +321,8 @@
cmd.create_rx_queue.enable_rsc =
!!((if_getcapenable(priv->ifp) & IFCAP_LRO) &&
!gve_disable_hw_lro);
+ cmd.create_rx_queue.packet_buffer_size =
+ htobe16(priv->rx_buf_size_dqo);
}
return (gve_adminq_execute_cmd(priv, &cmd));
diff --git a/sys/dev/gve/gve_dqo.h b/sys/dev/gve/gve_dqo.h
--- a/sys/dev/gve/gve_dqo.h
+++ b/sys/dev/gve/gve_dqo.h
@@ -59,8 +59,6 @@
*/
#define GVE_RX_DQO_MIN_PENDING_BUFS 128
-#define GVE_DQ_NUM_FRAGS_IN_PAGE (PAGE_SIZE / GVE_DEFAULT_RX_BUFFER_SIZE)
-
/*
* gve_rx_qpl_buf_id_dqo's 11 bit wide buf_id field limits the total
* number of pages per QPL to 2048.
@@ -330,4 +328,10 @@
_Static_assert(sizeof(struct gve_rx_compl_desc_dqo) == 32,
"gve: bad dqo desc struct length");
+
+static inline uint8_t
+gve_get_dq_num_frags_in_page(struct gve_priv *priv)
+{
+ return (PAGE_SIZE / priv->rx_buf_size_dqo);
+}
#endif /* _GVE_DESC_DQO_H_ */
diff --git a/sys/dev/gve/gve_main.c b/sys/dev/gve/gve_main.c
--- a/sys/dev/gve/gve_main.c
+++ b/sys/dev/gve/gve_main.c
@@ -35,7 +35,7 @@
#define GVE_DRIVER_VERSION "GVE-FBSD-1.3.4\n"
#define GVE_VERSION_MAJOR 1
#define GVE_VERSION_MINOR 3
-#define GVE_VERSION_SUB 4
+#define GVE_VERSION_SUB 5
#define GVE_DEFAULT_RX_COPYBREAK 256
@@ -382,12 +382,27 @@
return (0);
}
+static int
+gve_get_dqo_rx_buf_size(struct gve_priv *priv, uint16_t mtu)
+{
+ /*
+ * Use 4k buffers only if mode is DQ, 4k buffers flag is on,
+ * and either hw LRO is enabled or mtu is greater than 2048
+ */
+ if (!gve_is_gqi(priv) && gve_allow_4k_rx_buffers &&
+ (!gve_disable_hw_lro || mtu > GVE_DEFAULT_RX_BUFFER_SIZE))
+ return (GVE_4K_RX_BUFFER_SIZE_DQO);
+
+ return (GVE_DEFAULT_RX_BUFFER_SIZE);
+}
+
static int
gve_set_mtu(if_t ifp, uint32_t new_mtu)
{
struct gve_priv *priv = if_getsoftc(ifp);
const uint32_t max_problem_range = 8227;
const uint32_t min_problem_range = 7822;
+ uint16_t new_rx_buf_size = gve_get_dqo_rx_buf_size(priv, new_mtu);
int err;
if ((new_mtu > priv->max_mtu) || (new_mtu < ETHERMIN)) {
@@ -402,9 +417,10 @@
* in throughput.
*/
if (!gve_is_gqi(priv) && !gve_disable_hw_lro &&
- new_mtu >= min_problem_range && new_mtu <= max_problem_range) {
+ new_mtu >= min_problem_range && new_mtu <= max_problem_range &&
+ new_rx_buf_size != GVE_4K_RX_BUFFER_SIZE_DQO) {
device_printf(priv->dev,
- "Cannot set to MTU to %d within the range [%d, %d] while hardware LRO is enabled\n",
+ "Cannot set to MTU to %d within the range [%d, %d] while HW LRO is enabled and not using 4k RX Buffers\n",
new_mtu, min_problem_range, max_problem_range);
return (EINVAL);
}
@@ -414,6 +430,13 @@
if (bootverbose)
device_printf(priv->dev, "MTU set to %d\n", new_mtu);
if_setmtu(ifp, new_mtu);
+ /* Need to re-alloc RX queues if RX buffer size changed */
+ if (!gve_is_gqi(priv) &&
+ new_rx_buf_size != priv->rx_buf_size_dqo) {
+ gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues);
+ priv->rx_buf_size_dqo = new_rx_buf_size;
+ gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues);
+ }
} else {
device_printf(priv->dev, "Failed to set MTU to %d\n", new_mtu);
}
@@ -1064,6 +1087,7 @@
if (err != 0)
goto abort;
+ priv->rx_buf_size_dqo = gve_get_dqo_rx_buf_size(priv, priv->max_mtu);
err = gve_alloc_rings(priv);
if (err != 0)
goto abort;
diff --git a/sys/dev/gve/gve_rx_dqo.c b/sys/dev/gve/gve_rx_dqo.c
--- a/sys/dev/gve/gve_rx_dqo.c
+++ b/sys/dev/gve/gve_rx_dqo.c
@@ -140,15 +140,17 @@
return (0);
}
+ bus_size_t max_seg_size = gve_rx_dqo_mbuf_segment_size(priv);
+
err = bus_dma_tag_create(
bus_get_dma_tag(priv->dev), /* parent */
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
- MCLBYTES, /* maxsize */
+ max_seg_size, /* maxsize */
1, /* nsegments */
- MCLBYTES, /* maxsegsize */
+ max_seg_size, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockarg */
@@ -317,7 +319,8 @@
}
SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry);
- buf->mbuf = m_getcl(how, MT_DATA, M_PKTHDR);
+ bus_size_t segment_size = gve_rx_dqo_mbuf_segment_size(rx->com.priv);
+ buf->mbuf = m_getjcl(how, MT_DATA, M_PKTHDR, segment_size);
if (__predict_false(!buf->mbuf)) {
err = ENOMEM;
counter_enter();
@@ -325,7 +328,7 @@
counter_exit();
goto abort_with_buf;
}
- buf->mbuf->m_len = MCLBYTES;
+ buf->mbuf->m_len = segment_size;
err = bus_dmamap_load_mbuf_sg(rx->dqo.buf_dmatag, buf->dmamap,
buf->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
@@ -371,7 +374,7 @@
bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
BUS_DMASYNC_PREREAD);
desc->buf_addr = htole64(page_dma_handle->bus_addr +
- frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
+ frag_num * rx->com.priv->rx_buf_size_dqo);
buf->num_nic_frags++;
gve_rx_advance_head_dqo(rx);
@@ -430,7 +433,7 @@
}
gve_rx_post_qpl_buf_dqo(rx, buf, buf->next_idx);
- if (buf->next_idx == GVE_DQ_NUM_FRAGS_IN_PAGE - 1)
+ if (buf->next_idx == gve_get_dq_num_frags_in_page(rx->com.priv) - 1)
buf->next_idx = 0;
else
buf->next_idx++;
@@ -742,7 +745,7 @@
int page_idx = buf - rx->dqo.bufs;
void *va = rx->com.qpl->dmas[page_idx].cpu_addr;
- va = (char *)va + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
+ va = (char *)va + (buf_frag_num * rx->com.priv->rx_buf_size_dqo);
return (va);
}
@@ -753,15 +756,16 @@
{
void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num);
struct mbuf *mbuf;
+ bus_size_t segment_size = gve_rx_dqo_mbuf_segment_size(rx->com.priv);
if (ctx->mbuf_tail == NULL) {
- mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, segment_size);
if (mbuf == NULL)
return (ENOMEM);
ctx->mbuf_head = mbuf;
ctx->mbuf_tail = mbuf;
} else {
- mbuf = m_getcl(M_NOWAIT, MT_DATA, 0);
+ mbuf = m_getjcl(M_NOWAIT, MT_DATA, 0, segment_size);
if (mbuf == NULL)
return (ENOMEM);
ctx->mbuf_tail->m_next = mbuf;
@@ -809,7 +813,7 @@
page_idx = buf - rx->dqo.bufs;
page = rx->com.qpl->pages[page_idx];
page_addr = rx->com.qpl->dmas[page_idx].cpu_addr;
- va = (char *)page_addr + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
+ va = (char *)page_addr + (buf_frag_num * rx->com.priv->rx_buf_size_dqo);
/*
* Grab an extra ref to the page so that gve_mextadd_free
@@ -855,7 +859,7 @@
}
buf = &rx->dqo.bufs[buf_id];
if (__predict_false(buf->num_nic_frags == 0 ||
- buf_frag_num > GVE_DQ_NUM_FRAGS_IN_PAGE - 1)) {
+ buf_frag_num > gve_get_dq_num_frags_in_page(priv) - 1)) {
device_printf(priv->dev, "Spurious compl for buf id %d on rxq %d "
"with buf_frag_num %d and num_nic_frags %d, issuing reset\n",
buf_id, rx->com.id, buf_frag_num, buf->num_nic_frags);
diff --git a/sys/dev/gve/gve_sysctl.c b/sys/dev/gve/gve_sysctl.c
--- a/sys/dev/gve/gve_sysctl.c
+++ b/sys/dev/gve/gve_sysctl.c
@@ -37,6 +37,10 @@
SYSCTL_BOOL(_hw_gve, OID_AUTO, disable_hw_lro, CTLFLAG_RDTUN,
&gve_disable_hw_lro, 0, "Controls if hardware LRO is used");
+bool gve_allow_4k_rx_buffers = false;
+SYSCTL_BOOL(_hw_gve, OID_AUTO, allow_4k_rx_buffers, CTLFLAG_RDTUN,
+ &gve_allow_4k_rx_buffers, 0, "Controls if 4K RX Buffers are allowed");
+
char gve_queue_format[8];
SYSCTL_STRING(_hw_gve, OID_AUTO, queue_format, CTLFLAG_RD,
&gve_queue_format, 0, "Queue format being used by the iface");

File Metadata

Mime Type
text/plain
Expires
Tue, Oct 14, 3:34 AM (3 h, 7 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23705017
Default Alt Text
D50786.diff (10 KB)

Event Timeline