Page MenuHomeFreeBSD

D47138.diff
No OneTemporary

D47138.diff

diff --git a/sys/dev/gve/gve.h b/sys/dev/gve/gve.h
--- a/sys/dev/gve/gve.h
+++ b/sys/dev/gve/gve.h
@@ -341,7 +341,7 @@
counter_u64_t tpackets;
counter_u64_t tso_packet_cnt;
counter_u64_t tx_dropped_pkt;
- counter_u64_t tx_dropped_pkt_nospace_device;
+ counter_u64_t tx_delayed_pkt_nospace_device;
counter_u64_t tx_dropped_pkt_nospace_bufring;
counter_u64_t tx_delayed_pkt_nospace_descring;
counter_u64_t tx_delayed_pkt_nospace_compring;
@@ -383,6 +383,7 @@
struct task xmit_task;
struct taskqueue *xmit_tq;
+ bool stopped;
/* Accessed when writing descriptors */
struct buf_ring *br;
diff --git a/sys/dev/gve/gve_main.c b/sys/dev/gve/gve_main.c
--- a/sys/dev/gve/gve_main.c
+++ b/sys/dev/gve/gve_main.c
@@ -32,10 +32,10 @@
#include "gve_adminq.h"
#include "gve_dqo.h"
-#define GVE_DRIVER_VERSION "GVE-FBSD-1.3.0\n"
+#define GVE_DRIVER_VERSION "GVE-FBSD-1.3.1\n"
#define GVE_VERSION_MAJOR 1
#define GVE_VERSION_MINOR 3
-#define GVE_VERSION_SUB 0
+#define GVE_VERSION_SUB 1
#define GVE_DEFAULT_RX_COPYBREAK 256
diff --git a/sys/dev/gve/gve_sysctl.c b/sys/dev/gve/gve_sysctl.c
--- a/sys/dev/gve/gve_sysctl.c
+++ b/sys/dev/gve/gve_sysctl.c
@@ -140,9 +140,9 @@
"tx_bytes", CTLFLAG_RD,
&stats->tbytes, "Bytes transmitted");
SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
- "tx_dropped_pkt_nospace_device", CTLFLAG_RD,
- &stats->tx_dropped_pkt_nospace_device,
- "Packets dropped due to no space in device");
+ "tx_delayed_pkt_nospace_device", CTLFLAG_RD,
+ &stats->tx_delayed_pkt_nospace_device,
+ "Packets delayed due to no space in device");
SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
"tx_dropped_pkt_nospace_bufring", CTLFLAG_RD,
&stats->tx_dropped_pkt_nospace_bufring,
diff --git a/sys/dev/gve/gve_tx.c b/sys/dev/gve/gve_tx.c
--- a/sys/dev/gve/gve_tx.c
+++ b/sys/dev/gve/gve_tx.c
@@ -246,6 +246,8 @@
struct gve_tx_ring *tx = &priv->tx[i];
struct gve_ring_com *com = &tx->com;
+ atomic_store_bool(&tx->stopped, false);
+
NET_TASK_INIT(&com->cleanup_task, 0, cleanup, tx);
com->cleanup_tq = taskqueue_create_fast("gve tx", M_WAITOK,
taskqueue_thread_enqueue, &com->cleanup_tq);
@@ -427,6 +429,11 @@
gve_db_bar_write_4(priv, tx->com.irq_db_offset, GVE_IRQ_MASK);
taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task);
}
+
+ if (atomic_load_bool(&tx->stopped) && space_freed) {
+ atomic_store_bool(&tx->stopped, false);
+ taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
+ }
}
static void
@@ -671,8 +678,7 @@
bytes_required = gve_fifo_bytes_required(tx, first_seg_len, pkt_len);
if (__predict_false(!gve_can_tx(tx, bytes_required))) {
counter_enter();
- counter_u64_add_protected(tx->stats.tx_dropped_pkt_nospace_device, 1);
- counter_u64_add_protected(tx->stats.tx_dropped_pkt, 1);
+ counter_u64_add_protected(tx->stats.tx_delayed_pkt_nospace_device, 1);
counter_exit();
return (ENOBUFS);
}
@@ -733,6 +739,59 @@
return (0);
}
+static int
+gve_xmit_mbuf(struct gve_tx_ring *tx,
+ struct mbuf **mbuf)
+{
+ if (gve_is_gqi(tx->com.priv))
+ return (gve_xmit(tx, *mbuf));
+
+ if (gve_is_qpl(tx->com.priv))
+ return (gve_xmit_dqo_qpl(tx, *mbuf));
+
+ /*
+ * gve_xmit_dqo might attempt to defrag the mbuf chain.
+ * The reference is passed in so that in the case of
+ * errors, the new mbuf chain is what's put back on the br.
+ */
+ return (gve_xmit_dqo(tx, mbuf));
+}
+
+/*
+ * Has the side-effect of stopping the xmit queue by setting tx->stopped
+ */
+static int
+gve_xmit_retry_enobuf_mbuf(struct gve_tx_ring *tx,
+ struct mbuf **mbuf)
+{
+ int err;
+
+ atomic_store_bool(&tx->stopped, true);
+
+ /*
+ * Room made in the queue BEFORE the barrier will be seen by the
+ * gve_xmit_mbuf retry below.
+ *
+ * If room is made in the queue AFTER the barrier, the cleanup tq
+ * iteration creating the room will either see a tx->stopped value
+ * of 0 or the 1 we just wrote:
+ *
+ * If it sees a 1, then it would enqueue the xmit tq. Enqueue
+ * implies a retry on the waiting pkt.
+ *
+ * If it sees a 0, then that implies a previous iteration overwrote
+ * our 1, and that iteration would enqueue the xmit tq. Enqueue
+ * implies a retry on the waiting pkt.
+ */
+ atomic_thread_fence_seq_cst();
+
+ err = gve_xmit_mbuf(tx, mbuf);
+ if (err == 0)
+ atomic_store_bool(&tx->stopped, false);
+
+ return (err);
+}
+
static void
gve_xmit_br(struct gve_tx_ring *tx)
{
@@ -743,24 +802,23 @@
while ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0 &&
(mbuf = drbr_peek(ifp, tx->br)) != NULL) {
+ err = gve_xmit_mbuf(tx, &mbuf);
- if (gve_is_gqi(priv))
- err = gve_xmit(tx, mbuf);
- else {
- /*
- * gve_xmit_dqo might attempt to defrag the mbuf chain.
- * The reference is passed in so that in the case of
- * errors, the new mbuf chain is what's put back on the br.
- */
- if (gve_is_qpl(priv))
- err = gve_xmit_dqo_qpl(tx, mbuf);
- else
- err = gve_xmit_dqo(tx, &mbuf);
- }
+ /*
+ * We need to stop this taskqueue when we can't xmit the pkt due
+ * to lack of space in the NIC ring (ENOBUFS). The retry exists
+ * to guard against a TOCTTOU bug that could end up freezing the
+ * queue forever.
+ */
+ if (__predict_false(mbuf != NULL && err == ENOBUFS))
+ err = gve_xmit_retry_enobuf_mbuf(tx, &mbuf);
if (__predict_false(err != 0 && mbuf != NULL)) {
- drbr_putback(ifp, tx->br, mbuf);
- taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
+ if (err == EINVAL) {
+ drbr_advance(ifp, tx->br);
+ m_freem(mbuf);
+ } else
+ drbr_putback(ifp, tx->br, mbuf);
break;
}
@@ -827,7 +885,8 @@
is_br_empty = drbr_empty(ifp, tx->br);
err = drbr_enqueue(ifp, tx->br, mbuf);
if (__predict_false(err != 0)) {
- taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
+ if (!atomic_load_bool(&tx->stopped))
+ taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
counter_enter();
counter_u64_add_protected(tx->stats.tx_dropped_pkt_nospace_bufring, 1);
counter_u64_add_protected(tx->stats.tx_dropped_pkt, 1);
@@ -842,9 +901,8 @@
if (is_br_empty && (GVE_RING_TRYLOCK(tx) != 0)) {
gve_xmit_br(tx);
GVE_RING_UNLOCK(tx);
- } else {
+ } else if (!atomic_load_bool(&tx->stopped))
taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
- }
return (0);
}
diff --git a/sys/dev/gve/gve_tx_dqo.c b/sys/dev/gve/gve_tx_dqo.c
--- a/sys/dev/gve/gve_tx_dqo.c
+++ b/sys/dev/gve/gve_tx_dqo.c
@@ -1052,6 +1052,16 @@
work_done++;
}
+ /*
+ * Waking the xmit taskqueue has to occur after room has been made in
+ * the queue.
+ */
+ atomic_thread_fence_seq_cst();
+ if (atomic_load_bool(&tx->stopped) && work_done) {
+ atomic_store_bool(&tx->stopped, false);
+ taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
+ }
+
tx->done += work_done; /* tx->done is just a sysctl counter */
counter_enter();
counter_u64_add_protected(tx->stats.tbytes, bytes_done);

File Metadata

Mime Type
text/plain
Expires
Sat, Oct 11, 9:51 AM (19 h, 58 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23506612
Default Alt Text
D47138.diff (6 KB)

Event Timeline