Page MenuHomeFreeBSD

D20117.diff
No OneTemporary

D20117.diff

Index: sys/dev/cxgbe/t4_main.c
===================================================================
--- sys/dev/cxgbe/t4_main.c
+++ sys/dev/cxgbe/t4_main.c
@@ -2057,13 +2057,8 @@
return (rc);
}
#ifdef RATELIMIT
- if (m->m_pkthdr.snd_tag != NULL) {
- /* EAGAIN tells the stack we are not the correct interface. */
- if (__predict_false(ifp != m->m_pkthdr.snd_tag->ifp)) {
- m_freem(m);
- return (EAGAIN);
- }
-
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
return (ethofld_transmit(ifp, m));
}
#endif
Index: sys/dev/cxgbe/t4_sched.c
===================================================================
--- sys/dev/cxgbe/t4_sched.c
+++ sys/dev/cxgbe/t4_sched.c
@@ -789,7 +789,7 @@
mtx_init(&cst->lock, "cst_lock", NULL, MTX_DEF);
mbufq_init(&cst->pending_tx, INT_MAX);
mbufq_init(&cst->pending_fwack, INT_MAX);
- cst->com.ifp = ifp;
+ m_snd_tag_init(&cst->com, ifp);
cst->flags |= EO_FLOWC_PENDING | EO_SND_TAG_REF;
cst->adapter = sc;
cst->port_id = pi->port_id;
Index: sys/dev/cxgbe/t4_sge.c
===================================================================
--- sys/dev/cxgbe/t4_sge.c
+++ sys/dev/cxgbe/t4_sge.c
@@ -2325,7 +2325,7 @@
needs_eo(struct mbuf *m)
{
- return (m->m_pkthdr.snd_tag != NULL);
+ return (m->m_pkthdr.csum_flags & CSUM_SND_TAG);
}
#endif
@@ -2539,8 +2539,11 @@
* checksumming is enabled. needs_l4_csum happens to check for all the
* right things.
*/
- if (__predict_false(needs_eo(m0) && !needs_l4_csum(m0)))
+ if (__predict_false(needs_eo(m0) && !needs_l4_csum(m0))) {
+ m_snd_tag_rele(m0->m_pkthdr.snd_tag);
m0->m_pkthdr.snd_tag = NULL;
+ m0->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
+ }
#endif
if (!needs_tso(m0) &&
@@ -5922,6 +5925,21 @@
cst->tx_nocompl = 0;
}
(void) mbufq_dequeue(&cst->pending_tx);
+
+ /*
+ * Drop the mbuf's reference on the tag now rather
+ * than waiting until m_freem(). This ensures that
+ * cxgbe_snd_tag_free gets called when the inp drops
+ * its reference on the tag and there are no more
+ * mbufs in the pending_tx queue and can flush any
+ * pending requests. Otherwise if the last mbuf
+ * doesn't request a completion the etid will never be
+ * released.
+ */
+ m->m_pkthdr.snd_tag = NULL;
+ m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
+ m_snd_tag_rele(&cst->com);
+
mbufq_enqueue(&cst->pending_fwack, m);
}
}
@@ -5933,6 +5951,7 @@
int rc;
MPASS(m0->m_nextpkt == NULL);
+ MPASS(m0->m_pkthdr.csum_flags & CSUM_SND_TAG);
MPASS(m0->m_pkthdr.snd_tag != NULL);
cst = mst_to_cst(m0->m_pkthdr.snd_tag);
@@ -5967,8 +5986,18 @@
mbufq_enqueue(&cst->pending_tx, m0);
cst->plen += m0->m_pkthdr.len;
+ /*
+ * Hold an extra reference on the tag while generating work
+ * requests to ensure that we don't try to free the tag during
+ * ethofld_tx() in case we are sending the final mbuf after
+ * the inp was freed.
+ */
+ m_snd_tag_ref(&cst->com);
ethofld_tx(cst);
- rc = 0;
+ mtx_unlock(&cst->lock);
+ m_snd_tag_rele(&cst->com);
+ return (0);
+
done:
mtx_unlock(&cst->lock);
if (__predict_false(rc != 0))
@@ -6015,7 +6044,6 @@
cst->flags &= ~EO_FLUSH_RPL_PENDING;
cst->tx_credits += cpl->credits;
-freetag:
cxgbe_snd_tag_free_locked(cst);
return (0); /* cst is gone. */
}
@@ -6033,22 +6061,27 @@
cst->tx_credits += cpl->credits;
MPASS(cst->tx_credits <= cst->tx_total);
- m = mbufq_first(&cst->pending_tx);
- if (m != NULL && cst->tx_credits >= mbuf_eo_len16(m))
- ethofld_tx(cst);
-
- if (__predict_false((cst->flags & EO_SND_TAG_REF) == 0) &&
- cst->ncompl == 0) {
- if (cst->tx_credits == cst->tx_total)
- goto freetag;
- else {
- MPASS((cst->flags & EO_FLUSH_RPL_PENDING) == 0);
- send_etid_flush_wr(cst);
- }
+ if (cst->flags & EO_SND_TAG_REF) {
+ /*
+ * As with ethofld_transmit(), hold an extra reference
+ * so that the tag is stable across ethold_tx().
+ */
+ m_snd_tag_ref(&cst->com);
+ m = mbufq_first(&cst->pending_tx);
+ if (m != NULL && cst->tx_credits >= mbuf_eo_len16(m))
+ ethofld_tx(cst);
+ mtx_unlock(&cst->lock);
+ m_snd_tag_rele(&cst->com);
+ } else {
+ /*
+ * There shouldn't be any pending packets if the tag
+ * was freed by the kernel since any pending packet
+ * should hold a reference to the tag.
+ */
+ MPASS(mbufq_first(&cst->pending_tx) == NULL);
+ mtx_unlock(&cst->lock);
}
- mtx_unlock(&cst->lock);
-
return (0);
}
#endif
Index: sys/dev/mlx5/mlx5_en/mlx5_en_main.c
===================================================================
--- sys/dev/mlx5/mlx5_en/mlx5_en_main.c
+++ sys/dev/mlx5/mlx5_en/mlx5_en_main.c
@@ -1247,7 +1247,7 @@
wq_sz = mlx5_wq_ll_get_size(&rq->wq);
- err = -tcp_lro_init_args(&rq->lro, c->tag.m_snd_tag.ifp, TCP_LRO_ENTRIES, wq_sz);
+ err = -tcp_lro_init_args(&rq->lro, priv->ifp, TCP_LRO_ENTRIES, wq_sz);
if (err)
goto err_rq_wq_destroy;
@@ -1288,7 +1288,7 @@
}
}
- rq->ifp = c->tag.m_snd_tag.ifp;
+ rq->ifp = priv->ifp;
rq->channel = c;
rq->ix = c->ix;
@@ -2145,7 +2145,6 @@
c->priv = priv;
c->ix = ix;
/* setup send tag */
- c->tag.m_snd_tag.ifp = priv->ifp;
c->tag.type = IF_SND_TAG_TYPE_UNLIMITED;
c->mkey_be = cpu_to_be32(priv->mr.key);
c->num_tc = priv->num_tc;
@@ -3987,6 +3986,8 @@
if (unlikely(pch->sq[0].running == 0))
return (ENXIO);
mlx5e_ref_channel(priv);
+ MPASS(pch->tag.m_snd_tag.refcount == 0);
+ m_snd_tag_init(&pch->tag.m_snd_tag, ifp);
*ppmt = &pch->tag.m_snd_tag;
return (0);
}
Index: sys/dev/mlx5/mlx5_en/mlx5_en_rl.c
===================================================================
--- sys/dev/mlx5/mlx5_en/mlx5_en_rl.c
+++ sys/dev/mlx5/mlx5_en/mlx5_en_rl.c
@@ -843,7 +843,6 @@
for (i = 0; i < rl->param.tx_channels_per_worker_def; i++) {
struct mlx5e_rl_channel *channel = rlw->channels + i;
channel->worker = rlw;
- channel->tag.m_snd_tag.ifp = priv->ifp;
channel->tag.type = IF_SND_TAG_TYPE_RATE_LIMIT;
STAILQ_INSERT_TAIL(&rlw->index_list_head, channel, entry);
}
@@ -1127,6 +1126,8 @@
}
/* store pointer to mbuf tag */
+ MPASS(channel->tag.m_snd_tag.refcount == 0);
+ m_snd_tag_init(&channel->tag.m_snd_tag, ifp);
*ppmt = &channel->tag.m_snd_tag;
done:
return (error);
Index: sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
===================================================================
--- sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
+++ sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
@@ -83,10 +83,6 @@
struct mlx5e_snd_tag *ptag;
struct mlx5e_sq *sq;
- /* check for route change */
- if (mb->m_pkthdr.snd_tag->ifp != ifp)
- return (NULL);
-
/* get pointer to sendqueue */
ptag = container_of(mb->m_pkthdr.snd_tag,
struct mlx5e_snd_tag, m_snd_tag);
@@ -609,21 +605,10 @@
struct mlx5e_sq *sq;
int ret;
- if (mb->m_pkthdr.snd_tag != NULL) {
+ if (mb->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ MPASS(mb->m_pkthdr.snd_tag->ifp == ifp);
sq = mlx5e_select_queue_by_send_tag(ifp, mb);
if (unlikely(sq == NULL)) {
- /* Check for route change */
- if (mb->m_pkthdr.snd_tag->ifp != ifp) {
- /* Free mbuf */
- m_freem(mb);
-
- /*
- * Tell upper layers about route
- * change and to re-transmit this
- * packet:
- */
- return (EAGAIN);
- }
goto select_queue;
}
} else {
Index: sys/kern/kern_mbuf.c
===================================================================
--- sys/kern/kern_mbuf.c
+++ sys/kern/kern_mbuf.c
@@ -46,8 +46,12 @@
#include <sys/mutex.h>
#include <sys/protosw.h>
#include <sys/smp.h>
+#include <sys/socket.h>
#include <sys/sysctl.h>
+#include <net/if.h>
+#include <net/if_var.h>
+
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
@@ -112,6 +116,10 @@
SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0,
"Maximum real memory allocatable to various mbuf types");
+static counter_u64_t snd_tag_count;
+SYSCTL_COUNTER_U64(_kern_ipc, OID_AUTO, num_snd_tags, CTLFLAG_RW,
+ &snd_tag_count, "# of active mbuf send tags");
+
/*
* tunable_mbinit() has to be run before any mbuf allocations are done.
*/
@@ -378,6 +386,8 @@
*/
EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
EVENTHANDLER_PRI_FIRST);
+
+ snd_tag_count = counter_u64_alloc(M_WAITOK);
}
SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
@@ -1149,3 +1159,24 @@
while (mb != NULL)
mb = m_free(mb);
}
+
+void
+m_snd_tag_init(struct m_snd_tag *mst, struct ifnet *ifp)
+{
+
+ if_ref(ifp);
+ mst->ifp = ifp;
+ refcount_init(&mst->refcount, 1);
+ counter_u64_add(snd_tag_count, 1);
+}
+
+void
+m_snd_tag_destroy(struct m_snd_tag *mst)
+{
+ struct ifnet *ifp;
+
+ ifp = mst->ifp;
+ ifp->if_snd_tag_free(mst);
+ if_rele(ifp);
+ counter_u64_add(snd_tag_count, -1);
+}
Index: sys/kern/uipc_mbuf.c
===================================================================
--- sys/kern/uipc_mbuf.c
+++ sys/kern/uipc_mbuf.c
@@ -382,6 +382,10 @@
to->m_pkthdr = from->m_pkthdr; /* especially tags */
SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */
from->m_flags &= ~M_PKTHDR;
+ if (from->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ from->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
+ from->m_pkthdr.snd_tag = NULL;
+ }
}
/*
@@ -414,6 +418,8 @@
if ((to->m_flags & M_EXT) == 0)
to->m_data = to->m_pktdat;
to->m_pkthdr = from->m_pkthdr;
+ if (from->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ m_snd_tag_ref(from->m_pkthdr.snd_tag);
SLIST_INIT(&to->m_pkthdr.tags);
return (m_tag_copy_chain(to, from, how));
}
@@ -924,7 +930,12 @@
return (NULL);
n->m_next = m->m_next;
m->m_next = NULL;
- n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
+ if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ n->m_pkthdr.snd_tag =
+ m_snd_tag_ref(m0->m_pkthdr.snd_tag);
+ n->m_pkthdr.csum_flags |= CSUM_SND_TAG;
+ } else
+ n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
n->m_pkthdr.len = m0->m_pkthdr.len - len0;
m0->m_pkthdr.len = len0;
return (n);
@@ -932,7 +943,12 @@
n = m_gethdr(wait, m0->m_type);
if (n == NULL)
return (NULL);
- n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
+ if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ n->m_pkthdr.snd_tag =
+ m_snd_tag_ref(m0->m_pkthdr.snd_tag);
+ n->m_pkthdr.csum_flags |= CSUM_SND_TAG;
+ } else
+ n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
n->m_pkthdr.len = m0->m_pkthdr.len - len0;
m0->m_pkthdr.len = len0;
if (m->m_flags & M_EXT)
Index: sys/net/bpf.c
===================================================================
--- sys/net/bpf.c
+++ sys/net/bpf.c
@@ -2304,7 +2304,7 @@
int gottime;
/* Skip outgoing duplicate packets. */
- if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
+ if ((m->m_flags & M_PROMISC) != 0 && m_rcvif(m) == NULL) {
m->m_flags &= ~M_PROMISC;
return;
}
@@ -2314,7 +2314,7 @@
NET_EPOCH_ENTER(et);
CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
- if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
+ if (BPF_CHECK_DIRECTION(d, m_rcvif(m), bp->bif_ifp))
continue;
counter_u64_add(d->bd_rcount, 1);
#ifdef BPF_JITTER
Index: sys/net/if.c
===================================================================
--- sys/net/if.c
+++ sys/net/if.c
@@ -4295,6 +4295,8 @@
void
if_setrcvif(struct mbuf *m, if_t ifp)
{
+
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (struct ifnet *)ifp;
}
Index: sys/net/if_ethersubr.c
===================================================================
--- sys/net/if_ethersubr.c
+++ sys/net/if_ethersubr.c
@@ -816,6 +816,7 @@
* We will rely on rcvif being set properly in the deferred context,
* so assert it is correct here.
*/
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch m %p "
"rcvif %p ifp %p", __func__, m, m->m_pkthdr.rcvif, ifp));
CURVNET_SET_QUIET(ifp->if_vnet);
Index: sys/net/if_lagg.c
===================================================================
--- sys/net/if_lagg.c
+++ sys/net/if_lagg.c
@@ -95,6 +95,11 @@
{0, NULL}
};
+struct lagg_snd_tag {
+ struct m_snd_tag com;
+ struct m_snd_tag *tag;
+};
+
VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
#define V_lagg_list VNET(lagg_list)
VNET_DEFINE_STATIC(struct mtx, lagg_list_mtx);
@@ -134,6 +139,10 @@
static int lagg_snd_tag_alloc(struct ifnet *,
union if_snd_tag_alloc_params *,
struct m_snd_tag **);
+static int lagg_snd_tag_modify(struct m_snd_tag *,
+ union if_snd_tag_modify_params *);
+static int lagg_snd_tag_query(struct m_snd_tag *,
+ union if_snd_tag_query_params *);
static void lagg_snd_tag_free(struct m_snd_tag *);
#endif
static int lagg_setmulti(struct lagg_port *);
@@ -525,6 +534,8 @@
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
#ifdef RATELIMIT
ifp->if_snd_tag_alloc = lagg_snd_tag_alloc;
+ ifp->if_snd_tag_modify = lagg_snd_tag_modify;
+ ifp->if_snd_tag_query = lagg_snd_tag_query;
ifp->if_snd_tag_free = lagg_snd_tag_free;
#endif
ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
@@ -1537,63 +1548,126 @@
}
#ifdef RATELIMIT
-static int
-lagg_snd_tag_alloc(struct ifnet *ifp,
- union if_snd_tag_alloc_params *params,
- struct m_snd_tag **ppmt)
+static inline struct lagg_snd_tag *
+mst_to_lst(struct m_snd_tag *mst)
{
- struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+
+ return (__containerof(mst, struct lagg_snd_tag, com));
+}
+
+/*
+ * Look up the port used by a specific flow. This only works for lagg
+ * protocols with deterministic port mappings (e.g. not roundrobin).
+ * In addition protocols which use a hash to map flows to ports must
+ * be configured to use the mbuf flowid rather than hashing packet
+ * contents.
+ */
+static struct lagg_port *
+lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype)
+{
+ struct lagg_softc *sc;
struct lagg_port *lp;
struct lagg_lb *lb;
uint32_t p;
- LAGG_RLOCK();
+ sc = ifp->if_softc;
+
switch (sc->sc_proto) {
case LAGG_PROTO_FAILOVER:
- lp = lagg_link_active(sc, sc->sc_primary);
- break;
+ return (lagg_link_active(sc, sc->sc_primary));
case LAGG_PROTO_LOADBALANCE:
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
- params->hdr.flowtype == M_HASHTYPE_NONE) {
- LAGG_RUNLOCK();
- return (EOPNOTSUPP);
- }
- p = params->hdr.flowid >> sc->flowid_shift;
+ flowtype == M_HASHTYPE_NONE)
+ return (NULL);
+ p = flowid >> sc->flowid_shift;
p %= sc->sc_count;
lb = (struct lagg_lb *)sc->sc_psc;
lp = lb->lb_ports[p];
- lp = lagg_link_active(sc, lp);
- break;
+ return (lagg_link_active(sc, lp));
case LAGG_PROTO_LACP:
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
- params->hdr.flowtype == M_HASHTYPE_NONE) {
- LAGG_RUNLOCK();
- return (EOPNOTSUPP);
- }
- lp = lacp_select_tx_port_by_hash(sc, params->hdr.flowid);
- break;
+ flowtype == M_HASHTYPE_NONE)
+ return (NULL);
+ return (lacp_select_tx_port_by_hash(sc, flowid));
default:
- LAGG_RUNLOCK();
- return (EOPNOTSUPP);
+ return (NULL);
}
+}
+
+static int
+lagg_snd_tag_alloc(struct ifnet *ifp,
+ union if_snd_tag_alloc_params *params,
+ struct m_snd_tag **ppmt)
+{
+ struct lagg_snd_tag *lst;
+ struct lagg_softc *sc;
+ struct lagg_port *lp;
+ struct ifnet *lp_ifp;
+ int error;
+
+ sc = ifp->if_softc;
+
+ LAGG_RLOCK();
+ lp = lookup_snd_tag_port(ifp, params->hdr.flowid, params->hdr.flowtype);
if (lp == NULL) {
LAGG_RUNLOCK();
return (EOPNOTSUPP);
}
- ifp = lp->lp_ifp;
+ if (lp->lp_ifp == NULL || lp->lp_ifp->if_snd_tag_alloc == NULL) {
+ LAGG_RUNLOCK();
+ return (EOPNOTSUPP);
+ }
+ lp_ifp = lp->lp_ifp;
+ if_ref(lp_ifp);
LAGG_RUNLOCK();
- if (ifp == NULL || ifp->if_snd_tag_alloc == NULL ||
- (ifp->if_capenable & IFCAP_TXRTLMT) == 0)
- return (EOPNOTSUPP);
- /* forward allocation request */
- return (ifp->if_snd_tag_alloc(ifp, params, ppmt));
+ lst = malloc(sizeof(*lst), M_LAGG, M_NOWAIT);
+ if (lst == NULL) {
+ if_rele(lp_ifp);
+ return (ENOMEM);
+ }
+
+ error = lp_ifp->if_snd_tag_alloc(lp_ifp, params, &lst->tag);
+ if_rele(lp_ifp);
+ if (error) {
+ free(lst, M_LAGG);
+ return (error);
+ }
+
+ m_snd_tag_init(&lst->com, ifp);
+
+ *ppmt = &lst->com;
+ return (0);
+}
+
+static int
+lagg_snd_tag_modify(struct m_snd_tag *mst,
+ union if_snd_tag_modify_params *params)
+{
+ struct lagg_snd_tag *lst;
+
+ lst = mst_to_lst(mst);
+ return (lst->tag->ifp->if_snd_tag_modify(lst->tag, params));
+}
+
+static int
+lagg_snd_tag_query(struct m_snd_tag *mst,
+ union if_snd_tag_query_params *params)
+{
+ struct lagg_snd_tag *lst;
+
+ lst = mst_to_lst(mst);
+ return (lst->tag->ifp->if_snd_tag_query(lst->tag, params));
}
static void
-lagg_snd_tag_free(struct m_snd_tag *tag)
+lagg_snd_tag_free(struct m_snd_tag *mst)
{
- tag->ifp->if_snd_tag_free(tag);
+ struct lagg_snd_tag *lst;
+
+ lst = mst_to_lst(mst);
+ m_snd_tag_rele(lst->tag);
+ free(lst, M_LAGG);
}
#endif
@@ -1720,6 +1794,10 @@
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
int error;
+#ifdef RATELIMIT
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
+#endif
LAGG_RLOCK();
/* We need a Tx algorithm and at least one port */
if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
@@ -1910,6 +1988,21 @@
lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
{
+#ifdef RATELIMIT
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ struct lagg_snd_tag *lst;
+ struct m_snd_tag *mst;
+
+ mst = m->m_pkthdr.snd_tag;
+ lst = mst_to_lst(mst);
+ if (lst->tag->ifp != ifp) {
+ m_freem(m);
+ return (EAGAIN);
+ }
+ m->m_pkthdr.snd_tag = m_snd_tag_ref(lst->tag);
+ m_snd_tag_rele(mst);
+ }
+#endif
return (ifp->if_transmit)(ifp, m);
}
Index: sys/net/if_vlan.c
===================================================================
--- sys/net/if_vlan.c
+++ sys/net/if_vlan.c
@@ -103,6 +103,20 @@
int refcnt;
};
+#ifdef RATELIMIT
+struct vlan_snd_tag {
+ struct m_snd_tag com;
+ struct m_snd_tag *tag;
+};
+
+static inline struct vlan_snd_tag *
+mst_to_vst(struct m_snd_tag *mst)
+{
+
+ return (__containerof(mst, struct vlan_snd_tag, com));
+}
+#endif
+
/*
* This macro provides a facility to iterate over every vlan on a trunk with
* the assumption that none will be added/removed during iteration.
@@ -267,7 +281,11 @@
#ifdef RATELIMIT
static int vlan_snd_tag_alloc(struct ifnet *,
union if_snd_tag_alloc_params *, struct m_snd_tag **);
-static void vlan_snd_tag_free(struct m_snd_tag *);
+static int vlan_snd_tag_modify(struct m_snd_tag *,
+ union if_snd_tag_modify_params *);
+static int vlan_snd_tag_query(struct m_snd_tag *,
+ union if_snd_tag_query_params *);
+static void vlan_snd_tag_free(struct m_snd_tag *);
#endif
static void vlan_qflush(struct ifnet *ifp);
static int vlan_setflag(struct ifnet *ifp, int flag, int status,
@@ -1048,6 +1066,8 @@
ifp->if_ioctl = vlan_ioctl;
#ifdef RATELIMIT
ifp->if_snd_tag_alloc = vlan_snd_tag_alloc;
+ ifp->if_snd_tag_modify = vlan_snd_tag_modify;
+ ifp->if_snd_tag_query = vlan_snd_tag_query;
ifp->if_snd_tag_free = vlan_snd_tag_free;
#endif
ifp->if_flags = VLAN_IFFLAGS;
@@ -1137,6 +1157,26 @@
BPF_MTAP(ifp, m);
+#ifdef RATELIMIT
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ struct vlan_snd_tag *vst;
+ struct m_snd_tag *mst;
+
+ MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
+ mst = m->m_pkthdr.snd_tag;
+ vst = mst_to_vst(mst);
+ if (vst->tag->ifp != p) {
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ NET_EPOCH_EXIT(et);
+ m_freem(m);
+ return (EAGAIN);
+ }
+
+ m->m_pkthdr.snd_tag = m_snd_tag_ref(vst->tag);
+ m_snd_tag_rele(mst);
+ }
+#endif
+
/*
* Do not run parent's if_transmit() if the parent is not up,
* or parent's driver will cause a system crash.
@@ -1928,18 +1968,71 @@
union if_snd_tag_alloc_params *params,
struct m_snd_tag **ppmt)
{
+ struct epoch_tracker et;
+ struct vlan_snd_tag *vst;
+ struct ifvlan *ifv;
+ struct ifnet *parent;
+ int error;
- /* get trunk device */
- ifp = vlan_trunkdev(ifp);
- if (ifp == NULL || (ifp->if_capenable & IFCAP_TXRTLMT) == 0)
+ NET_EPOCH_ENTER(et);
+ ifv = ifp->if_softc;
+ if (ifv->ifv_trunk != NULL)
+ parent = PARENT(ifv);
+ else
+ parent = NULL;
+ if (parent == NULL || parent->if_snd_tag_alloc == NULL) {
+ NET_EPOCH_EXIT(et);
return (EOPNOTSUPP);
- /* forward allocation request */
- return (ifp->if_snd_tag_alloc(ifp, params, ppmt));
+ }
+ if_ref(parent);
+ NET_EPOCH_EXIT(et);
+
+ vst = malloc(sizeof(*vst), M_VLAN, M_NOWAIT);
+ if (vst == NULL) {
+ if_rele(parent);
+ return (ENOMEM);
+ }
+
+ error = parent->if_snd_tag_alloc(parent, params, &vst->tag);
+ if_rele(parent);
+ if (error) {
+ free(vst, M_VLAN);
+ return (error);
+ }
+
+ m_snd_tag_init(&vst->com, ifp);
+
+ *ppmt = &vst->com;
+ return (0);
+}
+
+static int
+vlan_snd_tag_modify(struct m_snd_tag *mst,
+ union if_snd_tag_modify_params *params)
+{
+ struct vlan_snd_tag *vst;
+
+ vst = mst_to_vst(mst);
+ return (vst->tag->ifp->if_snd_tag_modify(vst->tag, params));
+}
+
+static int
+vlan_snd_tag_query(struct m_snd_tag *mst,
+ union if_snd_tag_query_params *params)
+{
+ struct vlan_snd_tag *vst;
+
+ vst = mst_to_vst(mst);
+ return (vst->tag->ifp->if_snd_tag_query(vst->tag, params));
}
static void
-vlan_snd_tag_free(struct m_snd_tag *tag)
+vlan_snd_tag_free(struct m_snd_tag *mst)
{
- tag->ifp->if_snd_tag_free(tag);
+ struct vlan_snd_tag *vst;
+
+ vst = mst_to_vst(mst);
+ m_snd_tag_rele(vst->tag);
+ free(vst, M_VLAN);
}
#endif
Index: sys/net/netisr.c
===================================================================
--- sys/net/netisr.c
+++ sys/net/netisr.c
@@ -839,6 +839,7 @@
("%s: invalid policy %u for %s", __func__, npp->np_policy,
npp->np_name));
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
ifp = m->m_pkthdr.rcvif;
if (ifp != NULL)
*cpuidp = nws_array[(ifp->if_index + source) % nws_count];
Index: sys/net80211/ieee80211_hwmp.c
===================================================================
--- sys/net80211/ieee80211_hwmp.c
+++ sys/net80211/ieee80211_hwmp.c
@@ -2015,6 +2015,7 @@
*/
IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_HWMP, dest,
"%s", "queue frame until path found");
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (void *)(uintptr_t)
ieee80211_mac_hash(ic, dest);
/* XXX age chosen randomly */
Index: sys/net80211/ieee80211_mesh.c
===================================================================
--- sys/net80211/ieee80211_mesh.c
+++ sys/net80211/ieee80211_mesh.c
@@ -1225,6 +1225,7 @@
M_WME_SETAC(mcopy, WME_AC_BE);
/* XXX do we know m_nextpkt is NULL? */
+ MPASS((mcopy->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
mcopy->m_pkthdr.rcvif = (void *) ni;
/*
Index: sys/net80211/ieee80211_output.c
===================================================================
--- sys/net80211/ieee80211_output.c
+++ sys/net80211/ieee80211_output.c
@@ -163,6 +163,7 @@
* uses any existing value for rcvif to identify the
* interface it (might have been) received on.
*/
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (void *)ni;
mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1: 0;
@@ -528,6 +529,7 @@
* that the mbuf has the same node value that
* it would if it were going via the normal path.
*/
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (void *)ni;
/*
Index: sys/net80211/ieee80211_wds.c
===================================================================
--- sys/net80211/ieee80211_wds.c
+++ sys/net80211/ieee80211_wds.c
@@ -299,6 +299,7 @@
continue;
}
mcopy->m_flags |= M_MCAST;
+ MPASS((mcopy->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
mcopy->m_pkthdr.rcvif = (void *) ni;
err = ieee80211_parent_xmitpkt(ic, mcopy);
@@ -332,6 +333,7 @@
* XXX handle overflow?
* XXX per/vap beacon interval?
*/
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (void *)(uintptr_t)
ieee80211_mac_hash(ic, ni->ni_macaddr);
(void) ieee80211_ageq_append(&ic->ic_stageq, m,
Index: sys/netinet/in_pcb.c
===================================================================
--- sys/netinet/in_pcb.c
+++ sys/netinet/in_pcb.c
@@ -3274,13 +3274,6 @@
error = EOPNOTSUPP;
} else {
error = ifp->if_snd_tag_alloc(ifp, &params, &inp->inp_snd_tag);
-
- /*
- * At success increment the refcount on
- * the send tag's network interface:
- */
- if (error == 0)
- if_ref(inp->inp_snd_tag->ifp);
}
return (error);
}
@@ -3293,7 +3286,6 @@
in_pcbdetach_txrtlmt(struct inpcb *inp)
{
struct m_snd_tag *mst;
- struct ifnet *ifp;
INP_WLOCK_ASSERT(inp);
@@ -3303,19 +3295,7 @@
if (mst == NULL)
return;
- ifp = mst->ifp;
- if (ifp == NULL)
- return;
-
- /*
- * If the device was detached while we still had reference(s)
- * on the ifp, we assume if_snd_tag_free() was replaced with
- * stubs.
- */
- ifp->if_snd_tag_free(mst);
-
- /* release reference count on network interface */
- if_rele(ifp);
+ m_snd_tag_rele(mst);
}
/*
@@ -3360,6 +3340,17 @@
*/
max_pacing_rate = socket->so_max_pacing_rate;
+ /*
+ * If the existing send tag is for the wrong interface due to
+ * a route change, first drop the existing tag. Set the
+ * CHANGED flag so that we will keep trying to allocate a new
+ * tag if we fail to allocate one this time.
+ */
+ if (inp->inp_snd_tag != NULL && inp->inp_snd_tag->ifp != ifp) {
+ in_pcbdetach_txrtlmt(inp);
+ inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
+ }
+
/*
* NOTE: When attaching to a network interface a reference is
* made to ensure the network interface doesn't go away until
Index: sys/netinet/ip_output.c
===================================================================
--- sys/netinet/ip_output.c
+++ sys/netinet/ip_output.c
@@ -204,6 +204,51 @@
return 0;
}
+static int
+ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m,
+ const struct sockaddr_in *gw, struct route *ro)
+{
+ struct m_snd_tag *mst;
+ int error;
+
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
+ mst = NULL;
+
+#ifdef RATELIMIT
+ if (inp != NULL) {
+ if ((inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) != 0 ||
+ (inp->inp_snd_tag != NULL &&
+ inp->inp_snd_tag->ifp != ifp))
+ in_pcboutput_txrtlmt(inp, ifp, m);
+
+ if (inp->inp_snd_tag != NULL)
+ mst = inp->inp_snd_tag;
+ }
+#endif
+ if (mst != NULL) {
+ KASSERT(m->m_pkthdr.rcvif == NULL,
+ ("trying to add a send tag to a forwarded packet"));
+ if (mst->ifp != ifp) {
+ error = EAGAIN;
+ goto done;
+ }
+
+ /* stamp send tag on mbuf */
+ m->m_pkthdr.snd_tag = m_snd_tag_ref(mst);
+ m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
+ }
+
+ error = (*ifp->if_output)(ifp, m, (const struct sockaddr *)gw, ro);
+
+done:
+ /* Check for route change invalidating send tags. */
+#ifdef RATELIMIT
+ if (error == EAGAIN)
+ in_pcboutput_eagain(inp);
+#endif
+ return (error);
+}
+
/*
* IP output. The packet in mbuf chain m contains a skeletal IP
* header (with len, off, ttl, proto, tos, src, dst).
@@ -687,23 +732,7 @@
*/
m_clrprotoflags(m);
IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
-#ifdef RATELIMIT
- if (inp != NULL) {
- if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
- in_pcboutput_txrtlmt(inp, ifp, m);
- /* stamp send tag on mbuf */
- m->m_pkthdr.snd_tag = inp->inp_snd_tag;
- } else {
- m->m_pkthdr.snd_tag = NULL;
- }
-#endif
- error = (*ifp->if_output)(ifp, m,
- (const struct sockaddr *)gw, ro);
-#ifdef RATELIMIT
- /* check for route change */
- if (error == EAGAIN)
- in_pcboutput_eagain(inp);
-#endif
+ error = ip_output_send(inp, ifp, m, gw, ro);
goto done;
}
@@ -739,23 +768,7 @@
IP_PROBE(send, NULL, NULL, mtod(m, struct ip *), ifp,
mtod(m, struct ip *), NULL);
-#ifdef RATELIMIT
- if (inp != NULL) {
- if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
- in_pcboutput_txrtlmt(inp, ifp, m);
- /* stamp send tag on mbuf */
- m->m_pkthdr.snd_tag = inp->inp_snd_tag;
- } else {
- m->m_pkthdr.snd_tag = NULL;
- }
-#endif
- error = (*ifp->if_output)(ifp, m,
- (const struct sockaddr *)gw, ro);
-#ifdef RATELIMIT
- /* check for route change */
- if (error == EAGAIN)
- in_pcboutput_eagain(inp);
-#endif
+ error = ip_output_send(inp, ifp, m, gw, ro);
} else
m_freem(m);
}
Index: sys/netinet6/ip6_output.c
===================================================================
--- sys/netinet6/ip6_output.c
+++ sys/netinet6/ip6_output.c
@@ -276,6 +276,51 @@
return (0);
}
+static int
+ip6_output_send(struct inpcb *inp, struct ifnet *ifp, struct ifnet *origifp,
+ struct mbuf *m, struct sockaddr_in6 *dst, struct route_in6 *ro)
+{
+ struct m_snd_tag *mst;
+ int error;
+
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
+ mst = NULL;
+
+#ifdef RATELIMIT
+ if (inp != NULL) {
+ if ((inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) != 0 ||
+ (inp->inp_snd_tag != NULL &&
+ inp->inp_snd_tag->ifp != ifp))
+ in_pcboutput_txrtlmt(inp, ifp, m);
+
+ if (inp->inp_snd_tag != NULL)
+ mst = inp->inp_snd_tag;
+ }
+#endif
+ if (mst != NULL) {
+ KASSERT(m->m_pkthdr.rcvif == NULL,
+ ("trying to add a send tag to a forwarded packet"));
+ if (mst->ifp != ifp) {
+ error = EAGAIN;
+ goto done;
+ }
+
+ /* stamp send tag on mbuf */
+ m->m_pkthdr.snd_tag = m_snd_tag_ref(mst);
+ m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
+ }
+
+ error = nd6_output_ifp(ifp, origifp, m, dst, (struct route *)ro);
+
+done:
+ /* Check for route change invalidating send tags. */
+#ifdef RATELIMIT
+ if (error == EAGAIN)
+ in_pcboutput_eagain(inp);
+#endif
+ return (error);
+}
+
/*
* IP6 output. The packet in mbuf chain m contains a skeletal IP6
* header (with pri, len, nxt, hlim, src, dst).
@@ -968,23 +1013,7 @@
m->m_pkthdr.len);
ifa_free(&ia6->ia_ifa);
}
-#ifdef RATELIMIT
- if (inp != NULL) {
- if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
- in_pcboutput_txrtlmt(inp, ifp, m);
- /* stamp send tag on mbuf */
- m->m_pkthdr.snd_tag = inp->inp_snd_tag;
- } else {
- m->m_pkthdr.snd_tag = NULL;
- }
-#endif
- error = nd6_output_ifp(ifp, origifp, m, dst,
- (struct route *)ro);
-#ifdef RATELIMIT
- /* check for route change */
- if (error == EAGAIN)
- in_pcboutput_eagain(inp);
-#endif
+ error = ip6_output_send(inp, ifp, origifp, m, dst, ro);
goto done;
}
@@ -1083,23 +1112,7 @@
counter_u64_add(ia->ia_ifa.ifa_obytes,
m->m_pkthdr.len);
}
-#ifdef RATELIMIT
- if (inp != NULL) {
- if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
- in_pcboutput_txrtlmt(inp, ifp, m);
- /* stamp send tag on mbuf */
- m->m_pkthdr.snd_tag = inp->inp_snd_tag;
- } else {
- m->m_pkthdr.snd_tag = NULL;
- }
-#endif
- error = nd6_output_ifp(ifp, origifp, m, dst,
- (struct route *)ro);
-#ifdef RATELIMIT
- /* check for route change */
- if (error == EAGAIN)
- in_pcboutput_eagain(inp);
-#endif
+ error = ip6_output_send(inp, ifp, origifp, m, dst, ro);
} else
m_freem(m);
}
Index: sys/netpfil/ipfw/ip_fw2.c
===================================================================
--- sys/netpfil/ipfw/ip_fw2.c
+++ sys/netpfil/ipfw/ip_fw2.c
@@ -1758,7 +1758,7 @@
oif = NULL;
} else {
MPASS(args->flags & IPFW_ARGS_OUT);
- iif = mem ? NULL : m->m_pkthdr.rcvif;
+ iif = mem ? NULL : m_rcvif(m);
oif = args->ifp;
}
Index: sys/sys/mbuf.h
===================================================================
--- sys/sys/mbuf.h
+++ sys/sys/mbuf.h
@@ -40,6 +40,7 @@
#include <sys/queue.h>
#ifdef _KERNEL
#include <sys/systm.h>
+#include <sys/refcount.h>
#include <vm/uma.h>
#ifdef WITNESS
#include <sys/lock.h>
@@ -138,6 +139,7 @@
*/
struct m_snd_tag {
struct ifnet *ifp; /* network interface tag belongs to */
+ volatile u_int refcount;
};
/*
@@ -494,6 +496,8 @@
#define CSUM_L5_VALID 0x20000000 /* checksum is correct */
#define CSUM_COALESCED 0x40000000 /* contains merged segments */
+#define CSUM_SND_TAG 0x80000000 /* Packet header has send tag */
+
/*
* CSUM flag description for use with printf(9) %b identifier.
*/
@@ -503,7 +507,7 @@
"\12CSUM_IP6_UDP\13CSUM_IP6_TCP\14CSUM_IP6_SCTP\15CSUM_IP6_TSO" \
"\16CSUM_IP6_ISCSI" \
"\31CSUM_L3_CALC\32CSUM_L3_VALID\33CSUM_L4_CALC\34CSUM_L4_VALID" \
- "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESCED"
+ "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESCED\40CSUM_SND_TAG"
/* CSUM flags compatibility mappings. */
#define CSUM_IP_CHECKED CSUM_L3_CALC
@@ -633,6 +637,8 @@
struct mbuf *m_split(struct mbuf *, int, int);
struct mbuf *m_uiotombuf(struct uio *, int, int, int, int);
struct mbuf *m_unshare(struct mbuf *, int);
+void m_snd_tag_init(struct m_snd_tag *, struct ifnet *);
+void m_snd_tag_destroy(struct m_snd_tag *);
static __inline int
m_gettype(int size)
@@ -995,6 +1001,17 @@
*/
#define MCHTYPE(m, t) m_chtype((m), (t))
+/* Return the rcvif of a packet header. */
+static __inline struct ifnet *
+m_rcvif(struct mbuf *m)
+{
+
+ M_ASSERTPKTHDR(m);
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ return (NULL);
+ return (m->m_pkthdr.rcvif);
+}
+
/* Length to m_copy to copy all. */
#define M_COPYALL 1000000000
@@ -1185,6 +1202,22 @@
m_tag_locate(m, MTAG_ABI_COMPAT, type, start));
}
+static inline struct m_snd_tag *
+m_snd_tag_ref(struct m_snd_tag *mst)
+{
+
+ refcount_acquire(&mst->refcount);
+ return (mst);
+}
+
+static inline void
+m_snd_tag_rele(struct m_snd_tag *mst)
+{
+
+ if (refcount_release(&mst->refcount))
+ m_snd_tag_destroy(mst);
+}
+
static __inline struct mbuf *
m_free(struct mbuf *m)
{
@@ -1193,6 +1226,8 @@
MBUF_PROBE1(m__free, m);
if ((m->m_flags & (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE))
m_tag_delete_chain(m, NULL);
+ if (m->m_flags & M_PKTHDR && m->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ m_snd_tag_rele(m->m_pkthdr.snd_tag);
if (m->m_flags & M_EXT)
mb_free_ext(m);
else if ((m->m_flags & M_NOFREE) == 0)

File Metadata

Mime Type
text/plain
Expires
Sat, Nov 23, 4:20 AM (17 h, 45 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14789633
Default Alt Text
D20117.diff (33 KB)

Event Timeline