Page MenuHomeFreeBSD

D46691.id145996.diff
No OneTemporary

D46691.id145996.diff

diff --git a/share/man/man4/gve.4 b/share/man/man4/gve.4
--- a/share/man/man4/gve.4
+++ b/share/man/man4/gve.4
@@ -239,6 +239,8 @@
stands for "Queue Out-of-order" referring to the fact that the NIC might
send Tx and Rx completions in an order different from the one in which
the corresponding descriptors were posted by the driver.
+.It
+DQO_QPL: The next generation descriptor format in the "QPL" mode.
.El
.Sh SUPPORT
Please email gvnic-drivers@google.com with the specifics of the issue encountered.
diff --git a/sys/dev/gve/gve.h b/sys/dev/gve/gve.h
--- a/sys/dev/gve/gve.h
+++ b/sys/dev/gve/gve.h
@@ -105,6 +105,7 @@
GVE_GQI_RDA_FORMAT = 0x1,
GVE_GQI_QPL_FORMAT = 0x2,
GVE_DQO_RDA_FORMAT = 0x3,
+ GVE_DQO_QPL_FORMAT = 0x4,
};
enum gve_state_flags_bit {
@@ -226,6 +227,7 @@
counter_u64_t rx_frag_flip_cnt;
counter_u64_t rx_frag_copy_cnt;
counter_u64_t rx_dropped_pkt_desc_err;
+ counter_u64_t rx_dropped_pkt_buf_post_fail;
counter_u64_t rx_dropped_pkt_mbuf_alloc_fail;
counter_u64_t rx_mbuf_dmamap_err;
counter_u64_t rx_mbuf_mclget_null;
@@ -233,11 +235,34 @@
#define NUM_RX_STATS (sizeof(struct gve_rxq_stats) / sizeof(counter_u64_t))
+union gve_rx_qpl_buf_id_dqo {
+ struct {
+ uint16_t buf_id:11; /* Index into rx->dqo.bufs */
+ uint8_t frag_num:5; /* Which frag in the QPL page */
+ };
+ uint16_t all;
+} __packed;
+_Static_assert(sizeof(union gve_rx_qpl_buf_id_dqo) == 2,
+ "gve: bad dqo qpl rx buf id length");
+
struct gve_rx_buf_dqo {
- struct mbuf *mbuf;
- bus_dmamap_t dmamap;
- uint64_t addr;
- bool mapped;
+ union {
+ /* RDA */
+ struct {
+ struct mbuf *mbuf;
+ bus_dmamap_t dmamap;
+ uint64_t addr;
+ bool mapped;
+ };
+ /* QPL */
+ struct {
+ uint8_t num_nic_frags; /* number of pending completions */
+ uint8_t next_idx; /* index of the next frag to post */
+ /* for chaining rx->dqo.used_bufs */
+ STAILQ_ENTRY(gve_rx_buf_dqo) stailq_entry;
+ };
+ };
+ /* for chaining rx->dqo.free_bufs */
SLIST_ENTRY(gve_rx_buf_dqo) slist_entry;
};
@@ -276,6 +301,13 @@
uint32_t tail; /* The index at which to receive the next compl at */
uint8_t cur_gen_bit; /* Gets flipped on every cycle of the compl ring */
SLIST_HEAD(, gve_rx_buf_dqo) free_bufs;
+
+ /*
+ * Only used in QPL mode. Pages refered to by if_input-ed mbufs
+ * stay parked here till their wire count comes back to 1.
+ * Pages are moved here after there aren't any pending completions.
+ */
+ STAILQ_HEAD(, gve_rx_buf_dqo) used_bufs;
} dqo;
};
@@ -313,6 +345,7 @@
counter_u64_t tx_dropped_pkt_nospace_bufring;
counter_u64_t tx_delayed_pkt_nospace_descring;
counter_u64_t tx_delayed_pkt_nospace_compring;
+ counter_u64_t tx_delayed_pkt_nospace_qpl_bufs;
counter_u64_t tx_delayed_pkt_tsoerr;
counter_u64_t tx_dropped_pkt_vlan;
counter_u64_t tx_mbuf_collapse;
@@ -326,7 +359,19 @@
struct gve_tx_pending_pkt_dqo {
struct mbuf *mbuf;
- bus_dmamap_t dmamap;
+ union {
+ /* RDA */
+ bus_dmamap_t dmamap;
+ /* QPL */
+ struct {
+ /*
+ * A linked list of entries from qpl_bufs that served
+ * as the bounce buffer for this packet.
+ */
+ int32_t qpl_buf_head;
+ uint32_t num_qpl_bufs;
+ };
+ };
uint8_t state; /* the gve_packet_state enum */
int next; /* To chain the free_pending_pkts lists */
};
@@ -377,7 +422,20 @@
*/
int32_t free_pending_pkts_csm;
- bus_dma_tag_t buf_dmatag; /* DMA params for mapping Tx mbufs */
+ /*
+ * The head index of a singly linked list representing QPL page fragments
+ * to copy mbuf payload into for the NIC to see. Once this list is depleted,
+ * the "_prd" suffixed producer list, grown by the completion taskqueue,
+ * is stolen.
+ *
+ * Only used in QPL mode. int32_t because atomic_swap_16 doesn't exist.
+ */
+ int32_t free_qpl_bufs_csm;
+ uint32_t qpl_bufs_consumed; /* Allows quickly checking for buf availability */
+ uint32_t qpl_bufs_produced_cached; /* Cached value of qpl_bufs_produced */
+
+ /* DMA params for mapping Tx mbufs. Only used in RDA mode. */
+ bus_dma_tag_t buf_dmatag;
} __aligned(CACHE_LINE_SIZE);
/* Accessed when processing completions */
@@ -395,6 +453,18 @@
* its consumer list, with the "_csm" suffix, is depleted.
*/
int32_t free_pending_pkts_prd;
+
+ /*
+ * The completion taskqueue moves the QPL pages corresponding to a
+ * completed packet into this list. It is only used in QPL mode.
+ * The "_prd" denotes that this is a producer list. The trasnmit
+ * taskqueue steals this list once its consumer list, with the "_csm"
+ * suffix, is depleted.
+ *
+ * Only used in QPL mode. int32_t because atomic_swap_16 doesn't exist.
+ */
+ int32_t free_qpl_bufs_prd;
+ uint32_t qpl_bufs_produced;
} __aligned(CACHE_LINE_SIZE);
/* Accessed by both the completion and xmit loops */
@@ -402,6 +472,16 @@
/* completion tags index into this array */
struct gve_tx_pending_pkt_dqo *pending_pkts;
uint16_t num_pending_pkts;
+
+ /*
+ * Represents QPL page fragments. An index into this array
+ * always represents the same QPL page fragment. The value
+ * is also an index into this array and servers as a means
+ * to chain buffers into linked lists whose heads are
+ * either free_qpl_bufs_prd or free_qpl_bufs_csm or
+ * qpl_bufs_head.
+ */
+ int32_t *qpl_bufs;
} __aligned(CACHE_LINE_SIZE);
} dqo;
};
@@ -531,6 +611,13 @@
return (priv->queue_format == GVE_GQI_QPL_FORMAT);
}
+static inline bool
+gve_is_qpl(struct gve_priv *priv)
+{
+ return (priv->queue_format == GVE_GQI_QPL_FORMAT ||
+ priv->queue_format == GVE_DQO_QPL_FORMAT);
+}
+
/* Defined in gve_main.c */
void gve_schedule_reset(struct gve_priv *priv);
@@ -545,6 +632,7 @@
void gve_free_qpls(struct gve_priv *priv);
int gve_register_qpls(struct gve_priv *priv);
int gve_unregister_qpls(struct gve_priv *priv);
+void gve_mextadd_free(struct mbuf *mbuf);
/* TX functions defined in gve_tx.c */
int gve_alloc_tx_rings(struct gve_priv *priv);
@@ -563,6 +651,7 @@
void gve_clear_tx_ring_dqo(struct gve_priv *priv, int i);
int gve_tx_intr_dqo(void *arg);
int gve_xmit_dqo(struct gve_tx_ring *tx, struct mbuf **mbuf_ptr);
+int gve_xmit_dqo_qpl(struct gve_tx_ring *tx, struct mbuf *mbuf);
void gve_tx_cleanup_tq_dqo(void *arg, int pending);
/* RX functions defined in gve_rx.c */
diff --git a/sys/dev/gve/gve_adminq.h b/sys/dev/gve/gve_adminq.h
--- a/sys/dev/gve/gve_adminq.h
+++ b/sys/dev/gve/gve_adminq.h
@@ -144,6 +144,15 @@
_Static_assert(sizeof(struct gve_device_option_dqo_rda) == 8,
"gve: bad admin queue struct length");
+struct gve_device_option_dqo_qpl {
+ __be32 supported_features_mask;
+ __be16 tx_comp_ring_entries;
+ __be16 rx_buff_ring_entries;
+};
+
+_Static_assert(sizeof(struct gve_device_option_dqo_qpl) == 8,
+ "gve: bad admin queue struct length");
+
struct gve_device_option_modify_ring {
__be32 supported_features_mask;
__be16 max_rx_ring_size;
@@ -168,6 +177,7 @@
GVE_DEV_OPT_ID_GQI_QPL = 0x3,
GVE_DEV_OPT_ID_DQO_RDA = 0x4,
GVE_DEV_OPT_ID_MODIFY_RING = 0x6,
+ GVE_DEV_OPT_ID_DQO_QPL = 0x7,
GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8,
};
@@ -182,6 +192,7 @@
GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0,
};
@@ -196,7 +207,7 @@
enum gve_driver_capability {
gve_driver_capability_gqi_qpl = 0,
gve_driver_capability_gqi_rda = 1,
- gve_driver_capability_dqo_qpl = 2, /* reserved for future use */
+ gve_driver_capability_dqo_qpl = 2,
gve_driver_capability_dqo_rda = 3,
};
@@ -212,6 +223,7 @@
*/
#define GVE_DRIVER_CAPABILITY_FLAGS1 \
(GVE_CAP1(gve_driver_capability_gqi_qpl) | \
+ GVE_CAP1(gve_driver_capability_dqo_qpl) | \
GVE_CAP1(gve_driver_capability_dqo_rda))
#define GVE_DRIVER_CAPABILITY_FLAGS2 0x0
#define GVE_DRIVER_CAPABILITY_FLAGS3 0x0
diff --git a/sys/dev/gve/gve_adminq.c b/sys/dev/gve/gve_adminq.c
--- a/sys/dev/gve/gve_adminq.c
+++ b/sys/dev/gve/gve_adminq.c
@@ -58,6 +58,7 @@
struct gve_device_option *option,
struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
struct gve_device_option_dqo_rda **dev_op_dqo_rda,
+ struct gve_device_option_dqo_qpl **dev_op_dqo_qpl,
struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
{
uint32_t req_feat_mask = be32toh(option->required_features_mask);
@@ -103,6 +104,23 @@
*dev_op_dqo_rda = (void *)(option + 1);
break;
+ case GVE_DEV_OPT_ID_DQO_QPL:
+ if (option_length < sizeof(**dev_op_dqo_qpl) ||
+ req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL) {
+ device_printf(priv->dev, GVE_DEVICE_OPTION_ERROR_FMT,
+ "DQO QPL", (int)sizeof(**dev_op_dqo_qpl),
+ GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL,
+ option_length, req_feat_mask);
+ break;
+ }
+
+ if (option_length > sizeof(**dev_op_dqo_qpl)) {
+ device_printf(priv->dev, GVE_DEVICE_OPTION_TOO_BIG_FMT,
+ "DQO QPL");
+ }
+ *dev_op_dqo_qpl = (void *)(option + 1);
+ break;
+
case GVE_DEV_OPT_ID_JUMBO_FRAMES:
if (option_length < sizeof(**dev_op_jumbo_frames) ||
req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES) {
@@ -136,6 +154,7 @@
struct gve_device_descriptor *descriptor,
struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
struct gve_device_option_dqo_rda **dev_op_dqo_rda,
+ struct gve_device_option_dqo_qpl **dev_op_dqo_qpl,
struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
{
char *desc_end = (char *)descriptor + be16toh(descriptor->total_length);
@@ -154,7 +173,10 @@
}
gve_parse_device_option(priv, descriptor, dev_opt,
- dev_op_gqi_qpl, dev_op_dqo_rda, dev_op_jumbo_frames);
+ dev_op_gqi_qpl,
+ dev_op_dqo_rda,
+ dev_op_dqo_qpl,
+ dev_op_jumbo_frames);
dev_opt = (void *)((char *)(dev_opt + 1) + be16toh(dev_opt->option_length));
}
@@ -387,6 +409,7 @@
struct gve_dma_handle desc_mem;
struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL;
struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL;
+ struct gve_device_option_dqo_qpl *dev_op_dqo_qpl = NULL;
struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL;
uint32_t supported_features_mask = 0;
int rc;
@@ -416,7 +439,9 @@
bus_dmamap_sync(desc_mem.tag, desc_mem.map, BUS_DMASYNC_POSTREAD);
rc = gve_process_device_options(priv, desc,
- &dev_op_gqi_qpl, &dev_op_dqo_rda,
+ &dev_op_gqi_qpl,
+ &dev_op_dqo_rda,
+ &dev_op_dqo_qpl,
&dev_op_jumbo_frames);
if (rc != 0)
goto free_device_descriptor;
@@ -430,6 +455,15 @@
if (bootverbose)
device_printf(priv->dev,
"Driver is running with DQO RDA queue format.\n");
+ } else if (dev_op_dqo_qpl != NULL) {
+ snprintf(gve_queue_format, sizeof(gve_queue_format),
+ "%s", "DQO QPL");
+ priv->queue_format = GVE_DQO_QPL_FORMAT;
+ supported_features_mask = be32toh(
+ dev_op_dqo_qpl->supported_features_mask);
+ if (bootverbose)
+ device_printf(priv->dev,
+ "Driver is running with DQO QPL queue format.\n");
} else if (dev_op_gqi_qpl != NULL) {
snprintf(gve_queue_format, sizeof(gve_queue_format),
"%s", "GQI QPL");
diff --git a/sys/dev/gve/gve_dqo.h b/sys/dev/gve/gve_dqo.h
--- a/sys/dev/gve/gve_dqo.h
+++ b/sys/dev/gve/gve_dqo.h
@@ -57,7 +57,22 @@
* Start dropping RX fragments if at least these many
* buffers cannot be posted to the NIC.
*/
-#define GVE_RX_DQO_MIN_PENDING_BUFS 32
+#define GVE_RX_DQO_MIN_PENDING_BUFS 128
+
+#define GVE_DQ_NUM_FRAGS_IN_PAGE (PAGE_SIZE / GVE_DEFAULT_RX_BUFFER_SIZE)
+
+/*
+ * gve_rx_qpl_buf_id_dqo's 11 bit wide buf_id field limits the total
+ * number of pages per QPL to 2048.
+ */
+#define GVE_RX_NUM_QPL_PAGES_DQO 2048
+
+/* 2K TX buffers for DQO-QPL */
+#define GVE_TX_BUF_SHIFT_DQO 11
+#define GVE_TX_BUF_SIZE_DQO BIT(GVE_TX_BUF_SHIFT_DQO)
+#define GVE_TX_BUFS_PER_PAGE_DQO (PAGE_SIZE >> GVE_TX_BUF_SHIFT_DQO)
+
+#define GVE_TX_NUM_QPL_PAGES_DQO 512
/* Basic TX descriptor (DTYPE 0x0C) */
struct gve_tx_pkt_desc_dqo {
diff --git a/sys/dev/gve/gve_main.c b/sys/dev/gve/gve_main.c
--- a/sys/dev/gve/gve_main.c
+++ b/sys/dev/gve/gve_main.c
@@ -32,9 +32,9 @@
#include "gve_adminq.h"
#include "gve_dqo.h"
-#define GVE_DRIVER_VERSION "GVE-FBSD-1.2.0\n"
+#define GVE_DRIVER_VERSION "GVE-FBSD-1.3.0\n"
#define GVE_VERSION_MAJOR 1
-#define GVE_VERSION_MINOR 2
+#define GVE_VERSION_MINOR 3
#define GVE_VERSION_SUB 0
#define GVE_DEFAULT_RX_COPYBREAK 256
@@ -125,7 +125,7 @@
if (if_getcapenable(ifp) & IFCAP_TSO6)
if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
- if (gve_is_gqi(priv)) {
+ if (gve_is_qpl(priv)) {
err = gve_register_qpls(priv);
if (err != 0)
goto reset;
@@ -177,7 +177,7 @@
if (gve_destroy_tx_rings(priv) != 0)
goto reset;
- if (gve_is_gqi(priv)) {
+ if (gve_is_qpl(priv)) {
if (gve_unregister_qpls(priv) != 0)
goto reset;
}
@@ -375,13 +375,15 @@
/*
* Set TSO limits, must match the arguments to bus_dma_tag_create
- * when creating tx->dqo.buf_dmatag
+ * when creating tx->dqo.buf_dmatag. Only applies to the RDA mode
+ * because in QPL we copy the entire pakcet into the bounce buffer
+ * and thus it does not matter how fragmented the mbuf is.
*/
- if (!gve_is_gqi(priv)) {
- if_sethwtsomax(ifp, GVE_TSO_MAXSIZE_DQO);
+ if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) {
if_sethwtsomaxsegcount(ifp, GVE_TX_MAX_DATA_DESCS_DQO);
if_sethwtsomaxsegsize(ifp, GVE_TX_MAX_BUF_SIZE_DQO);
}
+ if_sethwtsomax(ifp, GVE_TSO_MAXSIZE_DQO);
#if __FreeBSD_version >= 1400086
if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
@@ -465,7 +467,7 @@
gve_free_irqs(priv);
gve_free_tx_rings(priv);
gve_free_rx_rings(priv);
- if (gve_is_gqi(priv))
+ if (gve_is_qpl(priv))
gve_free_qpls(priv);
}
@@ -474,7 +476,7 @@
{
int err;
- if (gve_is_gqi(priv)) {
+ if (gve_is_qpl(priv)) {
err = gve_alloc_qpls(priv);
if (err != 0)
goto abort;
diff --git a/sys/dev/gve/gve_qpl.c b/sys/dev/gve/gve_qpl.c
--- a/sys/dev/gve/gve_qpl.c
+++ b/sys/dev/gve/gve_qpl.c
@@ -32,13 +32,14 @@
#include "gve.h"
#include "gve_adminq.h"
+#include "gve_dqo.h"
static MALLOC_DEFINE(M_GVE_QPL, "gve qpl", "gve qpl allocations");
static uint32_t
gve_num_tx_qpls(struct gve_priv *priv)
{
- if (priv->queue_format != GVE_GQI_QPL_FORMAT)
+ if (!gve_is_qpl(priv))
return (0);
return (priv->tx_cfg.max_queues);
@@ -47,7 +48,7 @@
static uint32_t
gve_num_rx_qpls(struct gve_priv *priv)
{
- if (priv->queue_format != GVE_GQI_QPL_FORMAT)
+ if (!gve_is_qpl(priv))
return (0);
return (priv->rx_cfg.max_queues);
@@ -189,6 +190,7 @@
int gve_alloc_qpls(struct gve_priv *priv)
{
int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+ int num_pages;
int err;
int i;
@@ -198,15 +200,19 @@
priv->qpls = malloc(num_qpls * sizeof(*priv->qpls), M_GVE_QPL,
M_WAITOK | M_ZERO);
+ num_pages = gve_is_gqi(priv) ?
+ priv->tx_desc_cnt / GVE_QPL_DIVISOR :
+ GVE_TX_NUM_QPL_PAGES_DQO;
for (i = 0; i < gve_num_tx_qpls(priv); i++) {
- err = gve_alloc_qpl(priv, i, priv->tx_desc_cnt / GVE_QPL_DIVISOR,
+ err = gve_alloc_qpl(priv, i, num_pages,
/*single_kva=*/true);
if (err != 0)
goto abort;
}
+ num_pages = gve_is_gqi(priv) ? priv->rx_desc_cnt : GVE_RX_NUM_QPL_PAGES_DQO;
for (; i < num_qpls; i++) {
- err = gve_alloc_qpl(priv, i, priv->rx_desc_cnt, /*single_kva=*/false);
+ err = gve_alloc_qpl(priv, i, num_pages, /*single_kva=*/false);
if (err != 0)
goto abort;
}
@@ -283,3 +289,21 @@
gve_clear_state_flag(priv, GVE_STATE_FLAG_QPLREG_OK);
return (0);
}
+
+void
+gve_mextadd_free(struct mbuf *mbuf)
+{
+ vm_page_t page = (vm_page_t)mbuf->m_ext.ext_arg1;
+ vm_offset_t va = (vm_offset_t)mbuf->m_ext.ext_arg2;
+
+ /*
+ * Free the page only if this is the last ref.
+ * The interface might no longer exist by the time
+ * this callback is called, see gve_free_qpl.
+ */
+ if (__predict_false(vm_page_unwire_noq(page))) {
+ pmap_qremove(va, 1);
+ kva_free(va, PAGE_SIZE);
+ vm_page_free(page);
+ }
+}
diff --git a/sys/dev/gve/gve_rx.c b/sys/dev/gve/gve_rx.c
--- a/sys/dev/gve/gve_rx.c
+++ b/sys/dev/gve/gve_rx.c
@@ -409,24 +409,6 @@
}
}
-static void
-gve_mextadd_free(struct mbuf *mbuf)
-{
- vm_page_t page = (vm_page_t)mbuf->m_ext.ext_arg1;
- vm_offset_t va = (vm_offset_t)mbuf->m_ext.ext_arg2;
-
- /*
- * Free the page only if this is the last ref.
- * The interface might no longer exist by the time
- * this callback is called, see gve_free_qpl.
- */
- if (__predict_false(vm_page_unwire_noq(page))) {
- pmap_qremove(va, 1);
- kva_free(va, PAGE_SIZE);
- vm_page_free(page);
- }
-}
-
static void
gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr)
{
diff --git a/sys/dev/gve/gve_rx_dqo.c b/sys/dev/gve/gve_rx_dqo.c
--- a/sys/dev/gve/gve_rx_dqo.c
+++ b/sys/dev/gve/gve_rx_dqo.c
@@ -38,6 +38,9 @@
struct gve_rx_buf_dqo *buf;
int i;
+ if (gve_is_qpl(rx->com.priv))
+ return;
+
for (i = 0; i < rx->dqo.buf_cnt; i++) {
buf = &rx->dqo.bufs[i];
if (!buf->mbuf)
@@ -70,7 +73,7 @@
if (rx->dqo.bufs != NULL) {
gve_free_rx_mbufs_dqo(rx);
- if (rx->dqo.buf_dmatag) {
+ if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag) {
for (j = 0; j < rx->dqo.buf_cnt; j++)
if (rx->dqo.bufs[j].mapped)
bus_dmamap_destroy(rx->dqo.buf_dmatag,
@@ -81,7 +84,7 @@
rx->dqo.bufs = NULL;
}
- if (rx->dqo.buf_dmatag)
+ if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag)
bus_dma_tag_destroy(rx->dqo.buf_dmatag);
}
@@ -103,6 +106,31 @@
rx->dqo.desc_ring = rx->desc_ring_mem.cpu_addr;
rx->dqo.mask = priv->rx_desc_cnt - 1;
+ err = gve_dma_alloc_coherent(priv,
+ sizeof(struct gve_rx_compl_desc_dqo) * priv->rx_desc_cnt,
+ CACHE_LINE_SIZE, &rx->dqo.compl_ring_mem);
+ if (err != 0) {
+ device_printf(priv->dev,
+ "Failed to alloc compl ring for rx ring %d", i);
+ goto abort;
+ }
+ rx->dqo.compl_ring = rx->dqo.compl_ring_mem.cpu_addr;
+ rx->dqo.mask = priv->rx_desc_cnt - 1;
+
+ rx->dqo.buf_cnt = gve_is_qpl(priv) ? GVE_RX_NUM_QPL_PAGES_DQO :
+ priv->rx_desc_cnt;
+ rx->dqo.bufs = malloc(rx->dqo.buf_cnt * sizeof(struct gve_rx_buf_dqo),
+ M_GVE, M_WAITOK | M_ZERO);
+
+ if (gve_is_qpl(priv)) {
+ rx->com.qpl = &priv->qpls[priv->tx_cfg.max_queues + i];
+ if (rx->com.qpl == NULL) {
+ device_printf(priv->dev, "No QPL left for rx ring %d", i);
+ return (ENOMEM);
+ }
+ return (0);
+ }
+
err = bus_dma_tag_create(
bus_get_dma_tag(priv->dev), /* parent */
1, 0, /* alignment, bounds */
@@ -123,9 +151,6 @@
goto abort;
}
- rx->dqo.buf_cnt = priv->rx_desc_cnt;
- rx->dqo.bufs = malloc(rx->dqo.buf_cnt * sizeof(struct gve_rx_buf_dqo),
- M_GVE, M_WAITOK | M_ZERO);
for (j = 0; j < rx->dqo.buf_cnt; j++) {
err = bus_dmamap_create(rx->dqo.buf_dmatag, 0,
&rx->dqo.bufs[j].dmamap);
@@ -138,17 +163,6 @@
rx->dqo.bufs[j].mapped = true;
}
- err = gve_dma_alloc_coherent(priv,
- sizeof(struct gve_rx_compl_desc_dqo) * priv->rx_desc_cnt,
- CACHE_LINE_SIZE, &rx->dqo.compl_ring_mem);
- if (err != 0) {
- device_printf(priv->dev,
- "Failed to alloc compl ring for rx ring %d", i);
- goto abort;
- }
- rx->dqo.compl_ring = rx->dqo.compl_ring_mem.cpu_addr;
- rx->dqo.mask = priv->rx_desc_cnt - 1;
-
return (0);
abort:
@@ -202,10 +216,36 @@
gve_free_rx_mbufs_dqo(rx);
- SLIST_INIT(&rx->dqo.free_bufs);
- for (j = 0; j < rx->dqo.buf_cnt; j++)
- SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
- &rx->dqo.bufs[j], slist_entry);
+ if (gve_is_qpl(priv)) {
+ SLIST_INIT(&rx->dqo.free_bufs);
+ STAILQ_INIT(&rx->dqo.used_bufs);
+
+ for (j = 0; j < rx->dqo.buf_cnt; j++) {
+ struct gve_rx_buf_dqo *buf = &rx->dqo.bufs[j];
+
+ vm_page_t page = rx->com.qpl->pages[buf - rx->dqo.bufs];
+ u_int ref_count = atomic_load_int(&page->ref_count);
+
+ /*
+ * An ifconfig down+up might see pages still in flight
+ * from the previous innings.
+ */
+ if (VPRC_WIRE_COUNT(ref_count) == 1)
+ SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
+ buf, slist_entry);
+ else
+ STAILQ_INSERT_TAIL(&rx->dqo.used_bufs,
+ buf, stailq_entry);
+
+ buf->num_nic_frags = 0;
+ buf->next_idx = 0;
+ }
+ } else {
+ SLIST_INIT(&rx->dqo.free_bufs);
+ for (j = 0; j < rx->dqo.buf_cnt; j++)
+ SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
+ &rx->dqo.bufs[j], slist_entry);
+ }
}
int
@@ -223,6 +263,20 @@
return (FILTER_HANDLED);
}
+static void
+gve_rx_advance_head_dqo(struct gve_rx_ring *rx)
+{
+ rx->dqo.head = (rx->dqo.head + 1) & rx->dqo.mask;
+ rx->fill_cnt++; /* rx->fill_cnt is just a sysctl counter */
+
+ if ((rx->dqo.head & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) {
+ bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
+ BUS_DMASYNC_PREWRITE);
+ gve_db_bar_dqo_write_4(rx->com.priv, rx->com.db_offset,
+ rx->dqo.head);
+ }
+}
+
static void
gve_rx_post_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf)
{
@@ -235,15 +289,7 @@
desc->buf_id = htole16(buf - rx->dqo.bufs);
desc->buf_addr = htole64(buf->addr);
- rx->dqo.head = (rx->dqo.head + 1) & rx->dqo.mask;
- rx->fill_cnt++; /* rx->fill_cnt is just a sysctl counter */
-
- if ((rx->dqo.head & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) {
- bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
- BUS_DMASYNC_PREWRITE);
- gve_db_bar_dqo_write_4(rx->com.priv, rx->com.db_offset,
- rx->dqo.head);
- }
+ gve_rx_advance_head_dqo(rx);
}
static int
@@ -294,6 +340,103 @@
return (err);
}
+static struct gve_dma_handle *
+gve_get_page_dma_handle(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf)
+{
+ return (&(rx->com.qpl->dmas[buf - rx->dqo.bufs]));
+}
+
+static void
+gve_rx_post_qpl_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf,
+ uint8_t frag_num)
+{
+ struct gve_rx_desc_dqo *desc = &rx->dqo.desc_ring[rx->dqo.head];
+ union gve_rx_qpl_buf_id_dqo composed_id;
+ struct gve_dma_handle *page_dma_handle;
+
+ composed_id.buf_id = buf - rx->dqo.bufs;
+ composed_id.frag_num = frag_num;
+ desc->buf_id = htole16(composed_id.all);
+
+ page_dma_handle = gve_get_page_dma_handle(rx, buf);
+ bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
+ BUS_DMASYNC_PREREAD);
+ desc->buf_addr = htole64(page_dma_handle->bus_addr +
+ frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
+
+ buf->num_nic_frags++;
+ gve_rx_advance_head_dqo(rx);
+}
+
+static void
+gve_rx_maybe_extract_from_used_bufs(struct gve_rx_ring *rx, bool just_one)
+{
+ struct gve_rx_buf_dqo *hol_blocker = NULL;
+ struct gve_rx_buf_dqo *buf;
+ u_int ref_count;
+ vm_page_t page;
+
+ while (true) {
+ buf = STAILQ_FIRST(&rx->dqo.used_bufs);
+ if (__predict_false(buf == NULL))
+ break;
+
+ page = rx->com.qpl->pages[buf - rx->dqo.bufs];
+ ref_count = atomic_load_int(&page->ref_count);
+
+ if (VPRC_WIRE_COUNT(ref_count) != 1) {
+ /* Account for one head-of-line blocker */
+ if (hol_blocker != NULL)
+ break;
+ hol_blocker = buf;
+ STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs,
+ stailq_entry);
+ continue;
+ }
+
+ STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs,
+ stailq_entry);
+ SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
+ buf, slist_entry);
+ if (just_one)
+ break;
+ }
+
+ if (hol_blocker != NULL)
+ STAILQ_INSERT_HEAD(&rx->dqo.used_bufs,
+ hol_blocker, stailq_entry);
+}
+
+static int
+gve_rx_post_new_dqo_qpl_buf(struct gve_rx_ring *rx)
+{
+ struct gve_rx_buf_dqo *buf;
+
+ buf = SLIST_FIRST(&rx->dqo.free_bufs);
+ if (__predict_false(buf == NULL)) {
+ gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/true);
+ buf = SLIST_FIRST(&rx->dqo.free_bufs);
+ if (__predict_false(buf == NULL))
+ return (ENOBUFS);
+ }
+
+ gve_rx_post_qpl_buf_dqo(rx, buf, buf->next_idx);
+ if (buf->next_idx == GVE_DQ_NUM_FRAGS_IN_PAGE - 1)
+ buf->next_idx = 0;
+ else
+ buf->next_idx++;
+
+ /*
+ * We have posted all the frags in this buf to the NIC.
+ * - buf will enter used_bufs once the last completion arrives.
+ * - It will renter free_bufs in gve_rx_maybe_extract_from_used_bufs
+ * when its wire count drops back to 1.
+ */
+ if (buf->next_idx == 0)
+ SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry);
+ return (0);
+}
+
static void
gve_rx_post_buffers_dqo(struct gve_rx_ring *rx, int how)
{
@@ -306,7 +449,10 @@
num_to_post = rx->dqo.mask - num_pending_bufs;
for (i = 0; i < num_to_post; i++) {
- err = gve_rx_post_new_mbuf_dqo(rx, how);
+ if (gve_is_qpl(rx->com.priv))
+ err = gve_rx_post_new_dqo_qpl_buf(rx);
+ else
+ err = gve_rx_post_new_mbuf_dqo(rx, how);
if (err)
break;
}
@@ -427,7 +573,7 @@
}
static int
-gve_rx_copybreak_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf,
+gve_rx_copybreak_dqo(struct gve_rx_ring *rx, void *va,
struct gve_rx_compl_desc_dqo *compl_desc, uint16_t frag_len)
{
struct mbuf *mbuf;
@@ -440,14 +586,13 @@
counter_u64_add_protected(rx->stats.rx_copybreak_cnt, 1);
counter_exit();
- m_copyback(mbuf, 0, frag_len, mtod(buf->mbuf, char*));
+ m_copyback(mbuf, 0, frag_len, va);
mbuf->m_len = frag_len;
rx->ctx.mbuf_head = mbuf;
rx->ctx.mbuf_tail = mbuf;
rx->ctx.total_size += frag_len;
- gve_rx_post_buf_dqo(rx, buf);
gve_rx_input_mbuf_dqo(rx, compl_desc);
return (0);
}
@@ -495,10 +640,12 @@
frag_len = compl_desc->packet_len;
if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) {
- err = gve_rx_copybreak_dqo(rx, buf, compl_desc, frag_len);
+ err = gve_rx_copybreak_dqo(rx, mtod(buf->mbuf, char*),
+ compl_desc, frag_len);
if (__predict_false(err != 0))
goto drop_frag;
(*work_done)++;
+ gve_rx_post_buf_dqo(rx, buf);
return;
}
@@ -579,6 +726,233 @@
rx->ctx = (struct gve_rx_ctx){};
}
+static void *
+gve_get_cpu_addr_for_qpl_buf(struct gve_rx_ring *rx,
+ struct gve_rx_buf_dqo *buf, uint8_t buf_frag_num)
+{
+ int page_idx = buf - rx->dqo.bufs;
+ void *va = rx->com.qpl->dmas[page_idx].cpu_addr;
+
+ va = (char *)va + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
+ return (va);
+}
+
+static int
+gve_rx_add_clmbuf_to_ctx(struct gve_rx_ring *rx,
+ struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf,
+ uint8_t buf_frag_num, uint16_t frag_len)
+{
+ void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num);
+ struct mbuf *mbuf;
+
+ if (ctx->mbuf_tail == NULL) {
+ mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ if (mbuf == NULL)
+ return (ENOMEM);
+ ctx->mbuf_head = mbuf;
+ ctx->mbuf_tail = mbuf;
+ } else {
+ mbuf = m_getcl(M_NOWAIT, MT_DATA, 0);
+ if (mbuf == NULL)
+ return (ENOMEM);
+ ctx->mbuf_tail->m_next = mbuf;
+ ctx->mbuf_tail = mbuf;
+ }
+
+ mbuf->m_len = frag_len;
+ ctx->total_size += frag_len;
+
+ m_copyback(mbuf, 0, frag_len, va);
+ counter_enter();
+ counter_u64_add_protected(rx->stats.rx_frag_copy_cnt, 1);
+ counter_exit();
+ return (0);
+}
+
+static int
+gve_rx_add_extmbuf_to_ctx(struct gve_rx_ring *rx,
+ struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf,
+ uint8_t buf_frag_num, uint16_t frag_len)
+{
+ struct mbuf *mbuf;
+ void *page_addr;
+ vm_page_t page;
+ int page_idx;
+ void *va;
+
+ if (ctx->mbuf_tail == NULL) {
+ mbuf = m_gethdr(M_NOWAIT, MT_DATA);
+ if (mbuf == NULL)
+ return (ENOMEM);
+ ctx->mbuf_head = mbuf;
+ ctx->mbuf_tail = mbuf;
+ } else {
+ mbuf = m_get(M_NOWAIT, MT_DATA);
+ if (mbuf == NULL)
+ return (ENOMEM);
+ ctx->mbuf_tail->m_next = mbuf;
+ ctx->mbuf_tail = mbuf;
+ }
+
+ mbuf->m_len = frag_len;
+ ctx->total_size += frag_len;
+
+ page_idx = buf - rx->dqo.bufs;
+ page = rx->com.qpl->pages[page_idx];
+ page_addr = rx->com.qpl->dmas[page_idx].cpu_addr;
+ va = (char *)page_addr + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
+
+ /*
+ * Grab an extra ref to the page so that gve_mextadd_free
+ * does not end up freeing the page while the interface exists.
+ */
+ vm_page_wire(page);
+
+ counter_enter();
+ counter_u64_add_protected(rx->stats.rx_frag_flip_cnt, 1);
+ counter_exit();
+
+ MEXTADD(mbuf, va, frag_len,
+ gve_mextadd_free, page, page_addr,
+ 0, EXT_NET_DRV);
+ return (0);
+}
+
+static void
+gve_rx_dqo_qpl(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_compl_desc_dqo *compl_desc,
+ int *work_done)
+{
+ bool is_last_frag = compl_desc->end_of_packet != 0;
+ union gve_rx_qpl_buf_id_dqo composed_id;
+ struct gve_dma_handle *page_dma_handle;
+ struct gve_rx_ctx *ctx = &rx->ctx;
+ struct gve_rx_buf_dqo *buf;
+ uint32_t num_pending_bufs;
+ uint8_t buf_frag_num;
+ uint16_t frag_len;
+ uint16_t buf_id;
+ int err;
+
+ composed_id.all = le16toh(compl_desc->buf_id);
+ buf_id = composed_id.buf_id;
+ buf_frag_num = composed_id.frag_num;
+
+ if (__predict_false(buf_id >= rx->dqo.buf_cnt)) {
+ device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n",
+ buf_id, rx->com.id);
+ gve_schedule_reset(priv);
+ goto drop_frag_clear_ctx;
+ }
+ buf = &rx->dqo.bufs[buf_id];
+ if (__predict_false(buf->num_nic_frags == 0 ||
+ buf_frag_num > GVE_DQ_NUM_FRAGS_IN_PAGE - 1)) {
+ device_printf(priv->dev, "Spurious compl for buf id %d on rxq %d "
+ "with buf_frag_num %d and num_nic_frags %d, issuing reset\n",
+ buf_id, rx->com.id, buf_frag_num, buf->num_nic_frags);
+ gve_schedule_reset(priv);
+ goto drop_frag_clear_ctx;
+ }
+
+ buf->num_nic_frags--;
+
+ if (__predict_false(ctx->drop_pkt))
+ goto drop_frag;
+
+ if (__predict_false(compl_desc->rx_error)) {
+ counter_enter();
+ counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1);
+ counter_exit();
+ goto drop_frag;
+ }
+
+ page_dma_handle = gve_get_page_dma_handle(rx, buf);
+ bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
+ BUS_DMASYNC_POSTREAD);
+
+ frag_len = compl_desc->packet_len;
+ if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) {
+ void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num);
+
+ err = gve_rx_copybreak_dqo(rx, va, compl_desc, frag_len);
+ if (__predict_false(err != 0))
+ goto drop_frag;
+ (*work_done)++;
+ gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
+ return;
+ }
+
+ num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
+ err = gve_rx_post_new_dqo_qpl_buf(rx);
+ if (__predict_false(err != 0 &&
+ num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) {
+ /*
+ * Resort to copying this fragment into a cluster mbuf
+ * when the above threshold is breached and repost the
+ * incoming buffer. If we cannot find cluster mbufs,
+ * just drop the packet (to repost its buffer).
+ */
+ err = gve_rx_add_clmbuf_to_ctx(rx, ctx, buf,
+ buf_frag_num, frag_len);
+ if (err != 0) {
+ counter_enter();
+ counter_u64_add_protected(
+ rx->stats.rx_dropped_pkt_buf_post_fail, 1);
+ counter_exit();
+ goto drop_frag;
+ }
+ gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
+ } else {
+ err = gve_rx_add_extmbuf_to_ctx(rx, ctx, buf,
+ buf_frag_num, frag_len);
+ if (__predict_false(err != 0)) {
+ counter_enter();
+ counter_u64_add_protected(
+ rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1);
+ counter_exit();
+ goto drop_frag;
+ }
+ }
+
+ /*
+ * Both the counts need to be checked.
+ *
+ * num_nic_frags == 0 implies no pending completions
+ * but not all frags may have yet been posted.
+ *
+ * next_idx == 0 implies all frags have been posted
+ * but there might be pending completions.
+ */
+ if (buf->num_nic_frags == 0 && buf->next_idx == 0)
+ STAILQ_INSERT_TAIL(&rx->dqo.used_bufs, buf, stailq_entry);
+
+ if (is_last_frag) {
+ gve_rx_input_mbuf_dqo(rx, compl_desc);
+ (*work_done)++;
+ }
+ return;
+
+drop_frag:
+ /* Clear the earlier frags if there were any */
+ m_freem(ctx->mbuf_head);
+ rx->ctx = (struct gve_rx_ctx){};
+ /* Drop the rest of the pkt if there are more frags */
+ ctx->drop_pkt = true;
+ /* Reuse the dropped frag's buffer */
+ gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
+
+ if (is_last_frag)
+ goto drop_frag_clear_ctx;
+ return;
+
+drop_frag_clear_ctx:
+ counter_enter();
+ counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
+ counter_exit();
+ m_freem(ctx->mbuf_head);
+ rx->ctx = (struct gve_rx_ctx){};
+}
+
static bool
gve_rx_cleanup_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, int budget)
{
@@ -604,13 +978,18 @@
rx->dqo.tail = (rx->dqo.tail + 1) & rx->dqo.mask;
rx->dqo.cur_gen_bit ^= (rx->dqo.tail == 0);
- gve_rx_dqo(priv, rx, compl_desc, &work_done);
+ if (gve_is_qpl(priv))
+ gve_rx_dqo_qpl(priv, rx, compl_desc, &work_done);
+ else
+ gve_rx_dqo(priv, rx, compl_desc, &work_done);
}
if (work_done != 0)
tcp_lro_flush_all(&rx->lro);
gve_rx_post_buffers_dqo(rx, M_NOWAIT);
+ if (gve_is_qpl(priv))
+ gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/false);
return (work_done == budget);
}
diff --git a/sys/dev/gve/gve_sysctl.c b/sys/dev/gve/gve_sysctl.c
--- a/sys/dev/gve/gve_sysctl.c
+++ b/sys/dev/gve/gve_sysctl.c
@@ -83,6 +83,10 @@
"rx_dropped_pkt_desc_err", CTLFLAG_RD,
&stats->rx_dropped_pkt_desc_err,
"Packets dropped due to descriptor error");
+ SYSCTL_ADD_COUNTER_U64(ctx, list, OID_AUTO,
+ "rx_dropped_pkt_buf_post_fail", CTLFLAG_RD,
+ &stats->rx_dropped_pkt_buf_post_fail,
+ "Packets dropped due to failure to post enough buffers");
SYSCTL_ADD_COUNTER_U64(ctx, list, OID_AUTO,
"rx_dropped_pkt_mbuf_alloc_fail", CTLFLAG_RD,
&stats->rx_dropped_pkt_mbuf_alloc_fail,
@@ -155,6 +159,10 @@
"tx_delayed_pkt_nospace_compring", CTLFLAG_RD,
&stats->tx_delayed_pkt_nospace_compring,
"Packets delayed due to no space in comp ring");
+ SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
+ "tx_delayed_pkt_nospace_qpl_bufs", CTLFLAG_RD,
+ &stats->tx_delayed_pkt_nospace_qpl_bufs,
+ "Packets delayed due to not enough qpl bufs");
SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
"tx_delayed_pkt_tsoerr", CTLFLAG_RD,
&stats->tx_delayed_pkt_tsoerr,
diff --git a/sys/dev/gve/gve_tx.c b/sys/dev/gve/gve_tx.c
--- a/sys/dev/gve/gve_tx.c
+++ b/sys/dev/gve/gve_tx.c
@@ -752,7 +752,10 @@
* The reference is passed in so that in the case of
* errors, the new mbuf chain is what's put back on the br.
*/
- err = gve_xmit_dqo(tx, &mbuf);
+ if (gve_is_qpl(priv))
+ err = gve_xmit_dqo_qpl(tx, mbuf);
+ else
+ err = gve_xmit_dqo(tx, &mbuf);
}
if (__predict_false(err != 0 && mbuf != NULL)) {
diff --git a/sys/dev/gve/gve_tx_dqo.c b/sys/dev/gve/gve_tx_dqo.c
--- a/sys/dev/gve/gve_tx_dqo.c
+++ b/sys/dev/gve/gve_tx_dqo.c
@@ -51,7 +51,12 @@
if (!pending_pkt->mbuf)
continue;
- gve_unmap_packet(tx, pending_pkt);
+ if (gve_is_qpl(tx->com.priv)) {
+ pending_pkt->qpl_buf_head = -1;
+ pending_pkt->num_qpl_bufs = 0;
+ } else
+ gve_unmap_packet(tx, pending_pkt);
+
m_freem(pending_pkt->mbuf);
pending_pkt->mbuf = NULL;
}
@@ -76,7 +81,7 @@
if (tx->dqo.pending_pkts != NULL) {
gve_free_tx_mbufs_dqo(tx);
- if (tx->dqo.buf_dmatag) {
+ if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag) {
for (j = 0; j < tx->dqo.num_pending_pkts; j++)
if (tx->dqo.pending_pkts[j].state !=
GVE_PACKET_STATE_UNALLOCATED)
@@ -88,8 +93,59 @@
tx->dqo.pending_pkts = NULL;
}
- if (tx->dqo.buf_dmatag)
+ if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag)
bus_dma_tag_destroy(tx->dqo.buf_dmatag);
+
+ if (gve_is_qpl(priv) && tx->dqo.qpl_bufs != NULL) {
+ free(tx->dqo.qpl_bufs, M_GVE);
+ tx->dqo.qpl_bufs = NULL;
+ }
+}
+
+static int
+gve_tx_alloc_rda_fields_dqo(struct gve_tx_ring *tx)
+{
+ struct gve_priv *priv = tx->com.priv;
+ int err;
+ int j;
+
+ /*
+ * DMA tag for mapping Tx mbufs
+ * The maxsize, nsegments, and maxsegsize params should match
+ * the if_sethwtso* arguments in gve_setup_ifnet in gve_main.c.
+ */
+ err = bus_dma_tag_create(
+ bus_get_dma_tag(priv->dev), /* parent */
+ 1, 0, /* alignment, bounds */
+ BUS_SPACE_MAXADDR, /* lowaddr */
+ BUS_SPACE_MAXADDR, /* highaddr */
+ NULL, NULL, /* filter, filterarg */
+ GVE_TSO_MAXSIZE_DQO, /* maxsize */
+ GVE_TX_MAX_DATA_DESCS_DQO, /* nsegments */
+ GVE_TX_MAX_BUF_SIZE_DQO, /* maxsegsize */
+ BUS_DMA_ALLOCNOW, /* flags */
+ NULL, /* lockfunc */
+ NULL, /* lockarg */
+ &tx->dqo.buf_dmatag);
+ if (err != 0) {
+ device_printf(priv->dev, "%s: bus_dma_tag_create failed: %d\n",
+ __func__, err);
+ return (err);
+ }
+
+ for (j = 0; j < tx->dqo.num_pending_pkts; j++) {
+ err = bus_dmamap_create(tx->dqo.buf_dmatag, 0,
+ &tx->dqo.pending_pkts[j].dmamap);
+ if (err != 0) {
+ device_printf(priv->dev,
+ "err in creating pending pkt dmamap %d: %d",
+ j, err);
+ return (err);
+ }
+ tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE;
+ }
+
+ return (0);
}
int
@@ -98,7 +154,6 @@
struct gve_tx_ring *tx = &priv->tx[i];
uint16_t num_pending_pkts;
int err;
- int j;
/* Descriptor ring */
err = gve_dma_alloc_coherent(priv,
@@ -122,30 +177,6 @@
}
tx->dqo.compl_ring = tx->dqo.compl_ring_mem.cpu_addr;
- /*
- * DMA tag for mapping Tx mbufs
- * The maxsize, nsegments, and maxsegsize params should match
- * the if_sethwtso* arguments in gve_setup_ifnet in gve_main.c.
- */
- err = bus_dma_tag_create(
- bus_get_dma_tag(priv->dev), /* parent */
- 1, 0, /* alignment, bounds */
- BUS_SPACE_MAXADDR, /* lowaddr */
- BUS_SPACE_MAXADDR, /* highaddr */
- NULL, NULL, /* filter, filterarg */
- GVE_TSO_MAXSIZE_DQO, /* maxsize */
- GVE_TX_MAX_DATA_DESCS_DQO, /* nsegments */
- GVE_TX_MAX_BUF_SIZE_DQO, /* maxsegsize */
- BUS_DMA_ALLOCNOW, /* flags */
- NULL, /* lockfunc */
- NULL, /* lockarg */
- &tx->dqo.buf_dmatag);
- if (err != 0) {
- device_printf(priv->dev, "%s: bus_dma_tag_create failed: %d\n",
- __func__, err);
- goto abort;
- }
-
/*
* pending_pkts array
*
@@ -167,18 +198,18 @@
sizeof(struct gve_tx_pending_pkt_dqo) * num_pending_pkts,
M_GVE, M_WAITOK | M_ZERO);
- for (j = 0; j < tx->dqo.num_pending_pkts; j++) {
- err = bus_dmamap_create(tx->dqo.buf_dmatag, 0,
- &tx->dqo.pending_pkts[j].dmamap);
- if (err != 0) {
- device_printf(priv->dev,
- "err in creating pending pkt dmamap %d: %d",
- j, err);
- goto abort;
- }
- tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE;
- }
+ if (gve_is_qpl(priv)) {
+ int qpl_buf_cnt;
+ tx->com.qpl = &priv->qpls[i];
+ qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO *
+ tx->com.qpl->num_pages;
+
+ tx->dqo.qpl_bufs = malloc(
+ sizeof(*tx->dqo.qpl_bufs) * qpl_buf_cnt,
+ M_GVE, M_WAITOK | M_ZERO);
+ } else
+ gve_tx_alloc_rda_fields_dqo(tx);
return (0);
abort:
@@ -330,6 +361,44 @@
return (0);
}
+static int
+gve_tx_fill_ctx_descs(struct gve_tx_ring *tx, struct mbuf *mbuf,
+ bool is_tso, uint32_t *desc_idx)
+{
+ struct gve_tx_general_context_desc_dqo *gen_desc;
+ struct gve_tx_tso_context_desc_dqo *tso_desc;
+ struct gve_tx_metadata_dqo metadata;
+ int header_len;
+ int err;
+
+ metadata = (struct gve_tx_metadata_dqo){0};
+ gve_extract_tx_metadata_dqo(mbuf, &metadata);
+
+ if (is_tso) {
+ err = gve_prep_tso(mbuf, &header_len);
+ if (__predict_false(err)) {
+ counter_enter();
+ counter_u64_add_protected(
+ tx->stats.tx_delayed_pkt_tsoerr, 1);
+ counter_exit();
+ return (err);
+ }
+
+ tso_desc = &tx->dqo.desc_ring[*desc_idx].tso_ctx;
+ gve_tx_fill_tso_ctx_desc(tso_desc, mbuf, &metadata, header_len);
+
+ *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
+ counter_enter();
+ counter_u64_add_protected(tx->stats.tso_packet_cnt, 1);
+ counter_exit();
+ }
+
+ gen_desc = &tx->dqo.desc_ring[*desc_idx].general_ctx;
+ gve_tx_fill_general_ctx_desc(gen_desc, &metadata);
+ *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
+ return (0);
+}
+
static int
gve_map_mbuf_dqo(struct gve_tx_ring *tx,
struct mbuf **mbuf, bus_dmamap_t dmamap,
@@ -495,18 +564,197 @@
}
}
+static bool
+gve_tx_have_enough_qpl_bufs(struct gve_tx_ring *tx, int num_bufs)
+{
+ uint32_t available = tx->dqo.qpl_bufs_produced_cached -
+ tx->dqo.qpl_bufs_consumed;
+
+ if (__predict_true(available >= num_bufs))
+ return (true);
+
+ tx->dqo.qpl_bufs_produced_cached = atomic_load_acq_32(
+ &tx->dqo.qpl_bufs_produced);
+ available = tx->dqo.qpl_bufs_produced_cached -
+ tx->dqo.qpl_bufs_consumed;
+
+ if (__predict_true(available >= num_bufs))
+ return (true);
+ return (false);
+}
+
+static int32_t
+gve_tx_alloc_qpl_buf(struct gve_tx_ring *tx)
+{
+ int32_t buf = tx->dqo.free_qpl_bufs_csm;
+
+ if (__predict_false(buf == -1)) {
+ tx->dqo.free_qpl_bufs_csm = atomic_swap_32(
+ &tx->dqo.free_qpl_bufs_prd, -1);
+ buf = tx->dqo.free_qpl_bufs_csm;
+ if (__predict_false(buf == -1))
+ return (-1);
+ }
+
+ tx->dqo.free_qpl_bufs_csm = tx->dqo.qpl_bufs[buf];
+ tx->dqo.qpl_bufs_consumed++;
+ return (buf);
+}
+
+/*
+ * Tx buffer i corresponds to
+ * qpl_page_id = i / GVE_TX_BUFS_PER_PAGE_DQO
+ * qpl_page_offset = (i % GVE_TX_BUFS_PER_PAGE_DQO) * GVE_TX_BUF_SIZE_DQO
+ */
+static void
+gve_tx_buf_get_addr_dqo(struct gve_tx_ring *tx,
+ int32_t index, void **va, bus_addr_t *dma_addr)
+{
+ int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO);
+ int offset = (index & (GVE_TX_BUFS_PER_PAGE_DQO - 1)) <<
+ GVE_TX_BUF_SHIFT_DQO;
+
+ *va = (char *)tx->com.qpl->dmas[page_id].cpu_addr + offset;
+ *dma_addr = tx->com.qpl->dmas[page_id].bus_addr + offset;
+}
+
+static struct gve_dma_handle *
+gve_get_page_dma_handle(struct gve_tx_ring *tx, int32_t index)
+{
+ int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO);
+
+ return (&tx->com.qpl->dmas[page_id]);
+}
+
+static void
+gve_tx_copy_mbuf_and_write_pkt_descs(struct gve_tx_ring *tx,
+ struct mbuf *mbuf, struct gve_tx_pending_pkt_dqo *pkt,
+ bool csum_enabled, int16_t completion_tag,
+ uint32_t *desc_idx)
+{
+ int32_t pkt_len = mbuf->m_pkthdr.len;
+ struct gve_dma_handle *dma;
+ uint32_t copy_offset = 0;
+ int32_t prev_buf = -1;
+ uint32_t copy_len;
+ bus_addr_t addr;
+ int32_t buf;
+ void *va;
+
+ MPASS(pkt->num_qpl_bufs == 0);
+ MPASS(pkt->qpl_buf_head == -1);
+
+ while (copy_offset < pkt_len) {
+ buf = gve_tx_alloc_qpl_buf(tx);
+ /* We already checked for availability */
+ MPASS(buf != -1);
+
+ gve_tx_buf_get_addr_dqo(tx, buf, &va, &addr);
+ copy_len = MIN(GVE_TX_BUF_SIZE_DQO, pkt_len - copy_offset);
+ m_copydata(mbuf, copy_offset, copy_len, va);
+ copy_offset += copy_len;
+
+ dma = gve_get_page_dma_handle(tx, buf);
+ bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE);
+
+ gve_tx_fill_pkt_desc_dqo(tx, desc_idx,
+ copy_len, addr, completion_tag,
+ /*eop=*/copy_offset == pkt_len,
+ csum_enabled);
+
+ /* Link all the qpl bufs for a packet */
+ if (prev_buf == -1)
+ pkt->qpl_buf_head = buf;
+ else
+ tx->dqo.qpl_bufs[prev_buf] = buf;
+
+ prev_buf = buf;
+ pkt->num_qpl_bufs++;
+ }
+
+ tx->dqo.qpl_bufs[buf] = -1;
+}
+
+int
+gve_xmit_dqo_qpl(struct gve_tx_ring *tx, struct mbuf *mbuf)
+{
+ uint32_t desc_idx = tx->dqo.desc_tail;
+ struct gve_tx_pending_pkt_dqo *pkt;
+ int total_descs_needed;
+ int16_t completion_tag;
+ bool has_csum_flag;
+ int csum_flags;
+ bool is_tso;
+ int nsegs;
+ int err;
+
+ csum_flags = mbuf->m_pkthdr.csum_flags;
+ has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP |
+ CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO);
+ is_tso = csum_flags & CSUM_TSO;
+
+ nsegs = howmany(mbuf->m_pkthdr.len, GVE_TX_BUF_SIZE_DQO);
+ /* Check if we have enough room in the desc ring */
+ total_descs_needed = 1 + /* general_ctx_desc */
+ nsegs + /* pkt_desc */
+ (is_tso ? 1 : 0); /* tso_ctx_desc */
+ if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed)))
+ return (ENOBUFS);
+
+ if (!gve_tx_have_enough_qpl_bufs(tx, nsegs)) {
+ counter_enter();
+ counter_u64_add_protected(
+ tx->stats.tx_delayed_pkt_nospace_qpl_bufs, 1);
+ counter_exit();
+ return (ENOBUFS);
+ }
+
+ pkt = gve_alloc_pending_packet(tx);
+ if (pkt == NULL) {
+ counter_enter();
+ counter_u64_add_protected(
+ tx->stats.tx_delayed_pkt_nospace_compring, 1);
+ counter_exit();
+ return (ENOBUFS);
+ }
+ completion_tag = pkt - tx->dqo.pending_pkts;
+ pkt->mbuf = mbuf;
+
+ err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx);
+ if (err)
+ goto abort;
+
+ gve_tx_copy_mbuf_and_write_pkt_descs(tx, mbuf, pkt,
+ has_csum_flag, completion_tag, &desc_idx);
+
+ /* Remember the index of the last desc written */
+ tx->dqo.desc_tail = desc_idx;
+
+ /*
+ * Request a descriptor completion on the last descriptor of the
+ * packet if we are allowed to by the HW enforced interval.
+ */
+ gve_tx_request_desc_compl(tx, desc_idx);
+
+ tx->req += total_descs_needed; /* tx->req is just a sysctl counter */
+ return (0);
+
+abort:
+ pkt->mbuf = NULL;
+ gve_free_pending_packet(tx, pkt);
+ return (err);
+}
+
int
gve_xmit_dqo(struct gve_tx_ring *tx, struct mbuf **mbuf_ptr)
{
bus_dma_segment_t segs[GVE_TX_MAX_DATA_DESCS_DQO];
uint32_t desc_idx = tx->dqo.desc_tail;
- struct gve_tx_metadata_dqo metadata;
struct gve_tx_pending_pkt_dqo *pkt;
struct mbuf *mbuf = *mbuf_ptr;
int total_descs_needed;
int16_t completion_tag;
bool has_csum_flag;
- int header_len;
int csum_flags;
bool is_tso;
int nsegs;
@@ -556,34 +804,11 @@
goto abort_with_dma;
}
- bus_dmamap_sync(tx->dqo.buf_dmatag, pkt->dmamap, BUS_DMASYNC_PREWRITE);
-
- metadata = (struct gve_tx_metadata_dqo){0};
- gve_extract_tx_metadata_dqo(mbuf, &metadata);
-
- if (is_tso) {
- err = gve_prep_tso(mbuf, &header_len);
- if (__predict_false(err)) {
- counter_enter();
- counter_u64_add_protected(
- tx->stats.tx_delayed_pkt_tsoerr, 1);
- counter_exit();
- goto abort_with_dma;
- }
-
- gve_tx_fill_tso_ctx_desc(&tx->dqo.desc_ring[desc_idx].tso_ctx,
- mbuf, &metadata, header_len);
- desc_idx = (desc_idx + 1) & tx->dqo.desc_mask;
-
- counter_enter();
- counter_u64_add_protected(tx->stats.tso_packet_cnt, 1);
- counter_exit();
- }
-
- gve_tx_fill_general_ctx_desc(&tx->dqo.desc_ring[desc_idx].general_ctx,
- &metadata);
- desc_idx = (desc_idx + 1) & tx->dqo.desc_mask;
+ err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx);
+ if (err)
+ goto abort_with_dma;
+ bus_dmamap_sync(tx->dqo.buf_dmatag, pkt->dmamap, BUS_DMASYNC_PREWRITE);
for (i = 0; i < nsegs; i++) {
gve_tx_fill_pkt_desc_dqo(tx, &desc_idx,
segs[i].ds_len, segs[i].ds_addr,
@@ -611,6 +836,39 @@
return (err);
}
+static void
+gve_reap_qpl_bufs_dqo(struct gve_tx_ring *tx,
+ struct gve_tx_pending_pkt_dqo *pkt)
+{
+ int32_t buf = pkt->qpl_buf_head;
+ struct gve_dma_handle *dma;
+ int32_t qpl_buf_tail;
+ int32_t old_head;
+ int i;
+
+ for (i = 0; i < pkt->num_qpl_bufs; i++) {
+ dma = gve_get_page_dma_handle(tx, buf);
+ bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_POSTWRITE);
+ qpl_buf_tail = buf;
+ buf = tx->dqo.qpl_bufs[buf];
+ }
+ MPASS(buf == -1);
+ buf = qpl_buf_tail;
+
+ while (true) {
+ old_head = atomic_load_32(&tx->dqo.free_qpl_bufs_prd);
+ tx->dqo.qpl_bufs[buf] = old_head;
+
+ if (atomic_cmpset_rel_32(&tx->dqo.free_qpl_bufs_prd,
+ old_head, pkt->qpl_buf_head))
+ break;
+ }
+ atomic_add_rel_32(&tx->dqo.qpl_bufs_produced, pkt->num_qpl_bufs);
+
+ pkt->qpl_buf_head = -1;
+ pkt->num_qpl_bufs = 0;
+}
+
static uint64_t
gve_handle_packet_completion(struct gve_priv *priv,
struct gve_tx_ring *tx, uint16_t compl_tag)
@@ -635,7 +893,12 @@
}
pkt_len = pending_pkt->mbuf->m_pkthdr.len;
- gve_unmap_packet(tx, pending_pkt);
+
+ if (gve_is_qpl(priv))
+ gve_reap_qpl_bufs_dqo(tx, pending_pkt);
+ else
+ gve_unmap_packet(tx, pending_pkt);
+
m_freem(pending_pkt->mbuf);
pending_pkt->mbuf = NULL;
gve_free_pending_packet(tx, pending_pkt);
@@ -711,6 +974,21 @@
tx->dqo.free_pending_pkts_csm = 0;
atomic_store_rel_32(&tx->dqo.free_pending_pkts_prd, -1);
+ if (gve_is_qpl(priv)) {
+ int qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO *
+ tx->com.qpl->num_pages;
+
+ for (j = 0; j < qpl_buf_cnt - 1; j++)
+ tx->dqo.qpl_bufs[j] = j + 1;
+ tx->dqo.qpl_bufs[j] = -1;
+
+ tx->dqo.free_qpl_bufs_csm = 0;
+ atomic_store_32(&tx->dqo.free_qpl_bufs_prd, -1);
+ atomic_store_32(&tx->dqo.qpl_bufs_produced, qpl_buf_cnt);
+ tx->dqo.qpl_bufs_produced_cached = qpl_buf_cnt;
+ tx->dqo.qpl_bufs_consumed = 0;
+ }
+
gve_tx_clear_desc_ring_dqo(tx);
gve_tx_clear_compl_ring_dqo(tx);
}

File Metadata

Mime Type
text/plain
Expires
Fri, Jan 16, 4:11 PM (14 h, 44 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27662102
Default Alt Text
D46691.id145996.diff (47 KB)

Event Timeline