diff --git a/share/man/man4/gve.4 b/share/man/man4/gve.4 --- a/share/man/man4/gve.4 +++ b/share/man/man4/gve.4 @@ -230,6 +230,14 @@ The software LRO stack in the kernel is always used. This sysctl variable needs to be set before loading the driver, using .Xr loader.conf 5 . +.It Va hw.gve.allow_4k_rx_buffers +Setting this boot-time tunable to 1 enables support for 4K RX Buffers. +The default value is 0, which means 2K RX Buffers will be used. +4K RX Buffers are only supported on DQO_RDA and DQO_QPL queue formats. +When enabled, 4K RX Buffers will be used either when HW LRO is enabled +or mtu is greated than 2048. +This sysctl variable needs to be set before loading the driver, using +.Xr loader.conf 5 . .It Va dev.gve.X.num_rx_queues and dev.gve.X.num_tx_queues Run-time tunables that represent the number of currently used RX/TX queues. The default value is the max number of RX/TX queues the device can support. diff --git a/sys/dev/gve/gve.h b/sys/dev/gve/gve.h --- a/sys/dev/gve/gve.h +++ b/sys/dev/gve/gve.h @@ -65,6 +65,7 @@ #define ADMINQ_SIZE PAGE_SIZE #define GVE_DEFAULT_RX_BUFFER_SIZE 2048 +#define GVE_4K_RX_BUFFER_SIZE_DQO 4096 /* Each RX bounce buffer page can fit two packet buffers. */ #define GVE_DEFAULT_RX_BUFFER_OFFSET (PAGE_SIZE / 2) @@ -84,6 +85,11 @@ static MALLOC_DEFINE(M_GVE, "gve", "gve allocations"); +_Static_assert(MCLBYTES == GVE_DEFAULT_RX_BUFFER_SIZE, + "gve: bad MCLBYTES length"); +_Static_assert(MJUMPAGESIZE >= GVE_4K_RX_BUFFER_SIZE_DQO, + "gve: bad MJUMPAGESIZE length"); + struct gve_dma_handle { bus_addr_t bus_addr; void *cpu_addr; @@ -633,6 +639,7 @@ /* The index of tx queue that the timer service will check on its next invocation */ uint16_t check_tx_queue_idx; + uint16_t rx_buf_size_dqo; }; static inline bool @@ -666,6 +673,18 @@ priv->queue_format == GVE_DQO_QPL_FORMAT); } +static inline bool +gve_is_4k_rx_buf(struct gve_priv *priv) +{ + return (priv->rx_buf_size_dqo == GVE_4K_RX_BUFFER_SIZE_DQO); +} + +static inline bus_size_t +gve_rx_dqo_mbuf_segment_size(struct gve_priv *priv) +{ + return (gve_is_4k_rx_buf(priv) ? MJUMPAGESIZE : MCLBYTES); +} + /* Defined in gve_main.c */ void gve_schedule_reset(struct gve_priv *priv); int gve_adjust_tx_queues(struct gve_priv *priv, uint16_t new_queue_cnt); @@ -746,6 +765,7 @@ /* Systcl functions defined in gve_sysctl.c */ extern bool gve_disable_hw_lro; +extern bool gve_allow_4k_rx_buffers; extern char gve_queue_format[8]; extern char gve_version[8]; void gve_setup_sysctl(struct gve_priv *priv); diff --git a/sys/dev/gve/gve_adminq.c b/sys/dev/gve/gve_adminq.c --- a/sys/dev/gve/gve_adminq.c +++ b/sys/dev/gve/gve_adminq.c @@ -296,7 +296,6 @@ .ntfy_id = htobe32(rx->com.ntfy_id), .queue_resources_addr = htobe64(qres_dma->bus_addr), .rx_ring_size = htobe16(priv->rx_desc_cnt), - .packet_buffer_size = htobe16(GVE_DEFAULT_RX_BUFFER_SIZE), }; if (gve_is_gqi(priv)) { @@ -308,6 +307,8 @@ htobe32(queue_index); cmd.create_rx_queue.queue_page_list_id = htobe32((rx->com.qpl)->id); + cmd.create_rx_queue.packet_buffer_size = + htobe16(GVE_DEFAULT_RX_BUFFER_SIZE); } else { cmd.create_rx_queue.queue_page_list_id = htobe32(GVE_RAW_ADDRESSING_QPL_ID); @@ -320,6 +321,8 @@ cmd.create_rx_queue.enable_rsc = !!((if_getcapenable(priv->ifp) & IFCAP_LRO) && !gve_disable_hw_lro); + cmd.create_rx_queue.packet_buffer_size = + htobe16(priv->rx_buf_size_dqo); } return (gve_adminq_execute_cmd(priv, &cmd)); diff --git a/sys/dev/gve/gve_dqo.h b/sys/dev/gve/gve_dqo.h --- a/sys/dev/gve/gve_dqo.h +++ b/sys/dev/gve/gve_dqo.h @@ -59,8 +59,6 @@ */ #define GVE_RX_DQO_MIN_PENDING_BUFS 128 -#define GVE_DQ_NUM_FRAGS_IN_PAGE (PAGE_SIZE / GVE_DEFAULT_RX_BUFFER_SIZE) - /* * gve_rx_qpl_buf_id_dqo's 11 bit wide buf_id field limits the total * number of pages per QPL to 2048. @@ -330,4 +328,10 @@ _Static_assert(sizeof(struct gve_rx_compl_desc_dqo) == 32, "gve: bad dqo desc struct length"); + +static inline uint8_t +gve_get_dq_num_frags_in_page(struct gve_priv *priv) +{ + return (PAGE_SIZE / priv->rx_buf_size_dqo); +} #endif /* _GVE_DESC_DQO_H_ */ diff --git a/sys/dev/gve/gve_main.c b/sys/dev/gve/gve_main.c --- a/sys/dev/gve/gve_main.c +++ b/sys/dev/gve/gve_main.c @@ -35,7 +35,7 @@ #define GVE_DRIVER_VERSION "GVE-FBSD-1.3.4\n" #define GVE_VERSION_MAJOR 1 #define GVE_VERSION_MINOR 3 -#define GVE_VERSION_SUB 4 +#define GVE_VERSION_SUB 5 #define GVE_DEFAULT_RX_COPYBREAK 256 @@ -382,12 +382,27 @@ return (0); } +static int +gve_get_dqo_rx_buf_size(struct gve_priv *priv, uint16_t mtu) +{ + /* + * Use 4k buffers only if mode is DQ, 4k buffers flag is on, + * and either hw LRO is enabled or mtu is greater than 2048 + */ + if (!gve_is_gqi(priv) && gve_allow_4k_rx_buffers && + (!gve_disable_hw_lro || mtu > GVE_DEFAULT_RX_BUFFER_SIZE)) + return (GVE_4K_RX_BUFFER_SIZE_DQO); + + return (GVE_DEFAULT_RX_BUFFER_SIZE); +} + static int gve_set_mtu(if_t ifp, uint32_t new_mtu) { struct gve_priv *priv = if_getsoftc(ifp); const uint32_t max_problem_range = 8227; const uint32_t min_problem_range = 7822; + uint16_t new_rx_buf_size = gve_get_dqo_rx_buf_size(priv, new_mtu); int err; if ((new_mtu > priv->max_mtu) || (new_mtu < ETHERMIN)) { @@ -402,9 +417,10 @@ * in throughput. */ if (!gve_is_gqi(priv) && !gve_disable_hw_lro && - new_mtu >= min_problem_range && new_mtu <= max_problem_range) { + new_mtu >= min_problem_range && new_mtu <= max_problem_range && + new_rx_buf_size != GVE_4K_RX_BUFFER_SIZE_DQO) { device_printf(priv->dev, - "Cannot set to MTU to %d within the range [%d, %d] while hardware LRO is enabled\n", + "Cannot set to MTU to %d within the range [%d, %d] while HW LRO is enabled and not using 4k RX Buffers\n", new_mtu, min_problem_range, max_problem_range); return (EINVAL); } @@ -414,6 +430,13 @@ if (bootverbose) device_printf(priv->dev, "MTU set to %d\n", new_mtu); if_setmtu(ifp, new_mtu); + /* Need to re-alloc RX queues if RX buffer size changed */ + if (!gve_is_gqi(priv) && + new_rx_buf_size != priv->rx_buf_size_dqo) { + gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues); + priv->rx_buf_size_dqo = new_rx_buf_size; + gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues); + } } else { device_printf(priv->dev, "Failed to set MTU to %d\n", new_mtu); } @@ -1064,6 +1087,7 @@ if (err != 0) goto abort; + priv->rx_buf_size_dqo = gve_get_dqo_rx_buf_size(priv, priv->max_mtu); err = gve_alloc_rings(priv); if (err != 0) goto abort; diff --git a/sys/dev/gve/gve_rx_dqo.c b/sys/dev/gve/gve_rx_dqo.c --- a/sys/dev/gve/gve_rx_dqo.c +++ b/sys/dev/gve/gve_rx_dqo.c @@ -140,15 +140,17 @@ return (0); } + bus_size_t max_seg_size = gve_rx_dqo_mbuf_segment_size(priv); + err = bus_dma_tag_create( bus_get_dma_tag(priv->dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ - MCLBYTES, /* maxsize */ + max_seg_size, /* maxsize */ 1, /* nsegments */ - MCLBYTES, /* maxsegsize */ + max_seg_size, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ @@ -317,7 +319,8 @@ } SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry); - buf->mbuf = m_getcl(how, MT_DATA, M_PKTHDR); + bus_size_t segment_size = gve_rx_dqo_mbuf_segment_size(rx->com.priv); + buf->mbuf = m_getjcl(how, MT_DATA, M_PKTHDR, segment_size); if (__predict_false(!buf->mbuf)) { err = ENOMEM; counter_enter(); @@ -325,7 +328,7 @@ counter_exit(); goto abort_with_buf; } - buf->mbuf->m_len = MCLBYTES; + buf->mbuf->m_len = segment_size; err = bus_dmamap_load_mbuf_sg(rx->dqo.buf_dmatag, buf->dmamap, buf->mbuf, segs, &nsegs, BUS_DMA_NOWAIT); @@ -371,7 +374,7 @@ bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map, BUS_DMASYNC_PREREAD); desc->buf_addr = htole64(page_dma_handle->bus_addr + - frag_num * GVE_DEFAULT_RX_BUFFER_SIZE); + frag_num * rx->com.priv->rx_buf_size_dqo); buf->num_nic_frags++; gve_rx_advance_head_dqo(rx); @@ -430,7 +433,7 @@ } gve_rx_post_qpl_buf_dqo(rx, buf, buf->next_idx); - if (buf->next_idx == GVE_DQ_NUM_FRAGS_IN_PAGE - 1) + if (buf->next_idx == gve_get_dq_num_frags_in_page(rx->com.priv) - 1) buf->next_idx = 0; else buf->next_idx++; @@ -742,7 +745,7 @@ int page_idx = buf - rx->dqo.bufs; void *va = rx->com.qpl->dmas[page_idx].cpu_addr; - va = (char *)va + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE); + va = (char *)va + (buf_frag_num * rx->com.priv->rx_buf_size_dqo); return (va); } @@ -753,15 +756,16 @@ { void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num); struct mbuf *mbuf; + bus_size_t segment_size = gve_rx_dqo_mbuf_segment_size(rx->com.priv); if (ctx->mbuf_tail == NULL) { - mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); + mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, segment_size); if (mbuf == NULL) return (ENOMEM); ctx->mbuf_head = mbuf; ctx->mbuf_tail = mbuf; } else { - mbuf = m_getcl(M_NOWAIT, MT_DATA, 0); + mbuf = m_getjcl(M_NOWAIT, MT_DATA, 0, segment_size); if (mbuf == NULL) return (ENOMEM); ctx->mbuf_tail->m_next = mbuf; @@ -809,7 +813,7 @@ page_idx = buf - rx->dqo.bufs; page = rx->com.qpl->pages[page_idx]; page_addr = rx->com.qpl->dmas[page_idx].cpu_addr; - va = (char *)page_addr + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE); + va = (char *)page_addr + (buf_frag_num * rx->com.priv->rx_buf_size_dqo); /* * Grab an extra ref to the page so that gve_mextadd_free @@ -855,7 +859,7 @@ } buf = &rx->dqo.bufs[buf_id]; if (__predict_false(buf->num_nic_frags == 0 || - buf_frag_num > GVE_DQ_NUM_FRAGS_IN_PAGE - 1)) { + buf_frag_num > gve_get_dq_num_frags_in_page(priv) - 1)) { device_printf(priv->dev, "Spurious compl for buf id %d on rxq %d " "with buf_frag_num %d and num_nic_frags %d, issuing reset\n", buf_id, rx->com.id, buf_frag_num, buf->num_nic_frags); diff --git a/sys/dev/gve/gve_sysctl.c b/sys/dev/gve/gve_sysctl.c --- a/sys/dev/gve/gve_sysctl.c +++ b/sys/dev/gve/gve_sysctl.c @@ -37,6 +37,10 @@ SYSCTL_BOOL(_hw_gve, OID_AUTO, disable_hw_lro, CTLFLAG_RDTUN, &gve_disable_hw_lro, 0, "Controls if hardware LRO is used"); +bool gve_allow_4k_rx_buffers = false; +SYSCTL_BOOL(_hw_gve, OID_AUTO, allow_4k_rx_buffers, CTLFLAG_RDTUN, + &gve_allow_4k_rx_buffers, 0, "Controls if 4K RX Buffers are allowed"); + char gve_queue_format[8]; SYSCTL_STRING(_hw_gve, OID_AUTO, queue_format, CTLFLAG_RD, &gve_queue_format, 0, "Queue format being used by the iface");