Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F103199407
D20117.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
33 KB
Referenced Files
None
Subscribers
None
D20117.diff
View Options
Index: sys/dev/cxgbe/t4_main.c
===================================================================
--- sys/dev/cxgbe/t4_main.c
+++ sys/dev/cxgbe/t4_main.c
@@ -2057,13 +2057,8 @@
return (rc);
}
#ifdef RATELIMIT
- if (m->m_pkthdr.snd_tag != NULL) {
- /* EAGAIN tells the stack we are not the correct interface. */
- if (__predict_false(ifp != m->m_pkthdr.snd_tag->ifp)) {
- m_freem(m);
- return (EAGAIN);
- }
-
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
return (ethofld_transmit(ifp, m));
}
#endif
Index: sys/dev/cxgbe/t4_sched.c
===================================================================
--- sys/dev/cxgbe/t4_sched.c
+++ sys/dev/cxgbe/t4_sched.c
@@ -789,7 +789,7 @@
mtx_init(&cst->lock, "cst_lock", NULL, MTX_DEF);
mbufq_init(&cst->pending_tx, INT_MAX);
mbufq_init(&cst->pending_fwack, INT_MAX);
- cst->com.ifp = ifp;
+ m_snd_tag_init(&cst->com, ifp);
cst->flags |= EO_FLOWC_PENDING | EO_SND_TAG_REF;
cst->adapter = sc;
cst->port_id = pi->port_id;
Index: sys/dev/cxgbe/t4_sge.c
===================================================================
--- sys/dev/cxgbe/t4_sge.c
+++ sys/dev/cxgbe/t4_sge.c
@@ -2325,7 +2325,7 @@
needs_eo(struct mbuf *m)
{
- return (m->m_pkthdr.snd_tag != NULL);
+ return (m->m_pkthdr.csum_flags & CSUM_SND_TAG);
}
#endif
@@ -2539,8 +2539,11 @@
* checksumming is enabled. needs_l4_csum happens to check for all the
* right things.
*/
- if (__predict_false(needs_eo(m0) && !needs_l4_csum(m0)))
+ if (__predict_false(needs_eo(m0) && !needs_l4_csum(m0))) {
+ m_snd_tag_rele(m0->m_pkthdr.snd_tag);
m0->m_pkthdr.snd_tag = NULL;
+ m0->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
+ }
#endif
if (!needs_tso(m0) &&
@@ -5922,6 +5925,21 @@
cst->tx_nocompl = 0;
}
(void) mbufq_dequeue(&cst->pending_tx);
+
+ /*
+ * Drop the mbuf's reference on the tag now rather
+ * than waiting until m_freem(). This ensures that
+ * cxgbe_snd_tag_free gets called when the inp drops
+ * its reference on the tag and there are no more
+ * mbufs in the pending_tx queue and can flush any
+ * pending requests. Otherwise if the last mbuf
+ * doesn't request a completion the etid will never be
+ * released.
+ */
+ m->m_pkthdr.snd_tag = NULL;
+ m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
+ m_snd_tag_rele(&cst->com);
+
mbufq_enqueue(&cst->pending_fwack, m);
}
}
@@ -5933,6 +5951,7 @@
int rc;
MPASS(m0->m_nextpkt == NULL);
+ MPASS(m0->m_pkthdr.csum_flags & CSUM_SND_TAG);
MPASS(m0->m_pkthdr.snd_tag != NULL);
cst = mst_to_cst(m0->m_pkthdr.snd_tag);
@@ -5967,8 +5986,18 @@
mbufq_enqueue(&cst->pending_tx, m0);
cst->plen += m0->m_pkthdr.len;
+ /*
+ * Hold an extra reference on the tag while generating work
+ * requests to ensure that we don't try to free the tag during
+ * ethofld_tx() in case we are sending the final mbuf after
+ * the inp was freed.
+ */
+ m_snd_tag_ref(&cst->com);
ethofld_tx(cst);
- rc = 0;
+ mtx_unlock(&cst->lock);
+ m_snd_tag_rele(&cst->com);
+ return (0);
+
done:
mtx_unlock(&cst->lock);
if (__predict_false(rc != 0))
@@ -6015,7 +6044,6 @@
cst->flags &= ~EO_FLUSH_RPL_PENDING;
cst->tx_credits += cpl->credits;
-freetag:
cxgbe_snd_tag_free_locked(cst);
return (0); /* cst is gone. */
}
@@ -6033,22 +6061,27 @@
cst->tx_credits += cpl->credits;
MPASS(cst->tx_credits <= cst->tx_total);
- m = mbufq_first(&cst->pending_tx);
- if (m != NULL && cst->tx_credits >= mbuf_eo_len16(m))
- ethofld_tx(cst);
-
- if (__predict_false((cst->flags & EO_SND_TAG_REF) == 0) &&
- cst->ncompl == 0) {
- if (cst->tx_credits == cst->tx_total)
- goto freetag;
- else {
- MPASS((cst->flags & EO_FLUSH_RPL_PENDING) == 0);
- send_etid_flush_wr(cst);
- }
+ if (cst->flags & EO_SND_TAG_REF) {
+ /*
+ * As with ethofld_transmit(), hold an extra reference
+ * so that the tag is stable across ethold_tx().
+ */
+ m_snd_tag_ref(&cst->com);
+ m = mbufq_first(&cst->pending_tx);
+ if (m != NULL && cst->tx_credits >= mbuf_eo_len16(m))
+ ethofld_tx(cst);
+ mtx_unlock(&cst->lock);
+ m_snd_tag_rele(&cst->com);
+ } else {
+ /*
+ * There shouldn't be any pending packets if the tag
+ * was freed by the kernel since any pending packet
+ * should hold a reference to the tag.
+ */
+ MPASS(mbufq_first(&cst->pending_tx) == NULL);
+ mtx_unlock(&cst->lock);
}
- mtx_unlock(&cst->lock);
-
return (0);
}
#endif
Index: sys/dev/mlx5/mlx5_en/mlx5_en_main.c
===================================================================
--- sys/dev/mlx5/mlx5_en/mlx5_en_main.c
+++ sys/dev/mlx5/mlx5_en/mlx5_en_main.c
@@ -1247,7 +1247,7 @@
wq_sz = mlx5_wq_ll_get_size(&rq->wq);
- err = -tcp_lro_init_args(&rq->lro, c->tag.m_snd_tag.ifp, TCP_LRO_ENTRIES, wq_sz);
+ err = -tcp_lro_init_args(&rq->lro, priv->ifp, TCP_LRO_ENTRIES, wq_sz);
if (err)
goto err_rq_wq_destroy;
@@ -1288,7 +1288,7 @@
}
}
- rq->ifp = c->tag.m_snd_tag.ifp;
+ rq->ifp = priv->ifp;
rq->channel = c;
rq->ix = c->ix;
@@ -2145,7 +2145,6 @@
c->priv = priv;
c->ix = ix;
/* setup send tag */
- c->tag.m_snd_tag.ifp = priv->ifp;
c->tag.type = IF_SND_TAG_TYPE_UNLIMITED;
c->mkey_be = cpu_to_be32(priv->mr.key);
c->num_tc = priv->num_tc;
@@ -3987,6 +3986,8 @@
if (unlikely(pch->sq[0].running == 0))
return (ENXIO);
mlx5e_ref_channel(priv);
+ MPASS(pch->tag.m_snd_tag.refcount == 0);
+ m_snd_tag_init(&pch->tag.m_snd_tag, ifp);
*ppmt = &pch->tag.m_snd_tag;
return (0);
}
Index: sys/dev/mlx5/mlx5_en/mlx5_en_rl.c
===================================================================
--- sys/dev/mlx5/mlx5_en/mlx5_en_rl.c
+++ sys/dev/mlx5/mlx5_en/mlx5_en_rl.c
@@ -843,7 +843,6 @@
for (i = 0; i < rl->param.tx_channels_per_worker_def; i++) {
struct mlx5e_rl_channel *channel = rlw->channels + i;
channel->worker = rlw;
- channel->tag.m_snd_tag.ifp = priv->ifp;
channel->tag.type = IF_SND_TAG_TYPE_RATE_LIMIT;
STAILQ_INSERT_TAIL(&rlw->index_list_head, channel, entry);
}
@@ -1127,6 +1126,8 @@
}
/* store pointer to mbuf tag */
+ MPASS(channel->tag.m_snd_tag.refcount == 0);
+ m_snd_tag_init(&channel->tag.m_snd_tag, ifp);
*ppmt = &channel->tag.m_snd_tag;
done:
return (error);
Index: sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
===================================================================
--- sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
+++ sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
@@ -83,10 +83,6 @@
struct mlx5e_snd_tag *ptag;
struct mlx5e_sq *sq;
- /* check for route change */
- if (mb->m_pkthdr.snd_tag->ifp != ifp)
- return (NULL);
-
/* get pointer to sendqueue */
ptag = container_of(mb->m_pkthdr.snd_tag,
struct mlx5e_snd_tag, m_snd_tag);
@@ -609,21 +605,10 @@
struct mlx5e_sq *sq;
int ret;
- if (mb->m_pkthdr.snd_tag != NULL) {
+ if (mb->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ MPASS(mb->m_pkthdr.snd_tag->ifp == ifp);
sq = mlx5e_select_queue_by_send_tag(ifp, mb);
if (unlikely(sq == NULL)) {
- /* Check for route change */
- if (mb->m_pkthdr.snd_tag->ifp != ifp) {
- /* Free mbuf */
- m_freem(mb);
-
- /*
- * Tell upper layers about route
- * change and to re-transmit this
- * packet:
- */
- return (EAGAIN);
- }
goto select_queue;
}
} else {
Index: sys/kern/kern_mbuf.c
===================================================================
--- sys/kern/kern_mbuf.c
+++ sys/kern/kern_mbuf.c
@@ -46,8 +46,12 @@
#include <sys/mutex.h>
#include <sys/protosw.h>
#include <sys/smp.h>
+#include <sys/socket.h>
#include <sys/sysctl.h>
+#include <net/if.h>
+#include <net/if_var.h>
+
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
@@ -112,6 +116,10 @@
SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0,
"Maximum real memory allocatable to various mbuf types");
+static counter_u64_t snd_tag_count;
+SYSCTL_COUNTER_U64(_kern_ipc, OID_AUTO, num_snd_tags, CTLFLAG_RW,
+ &snd_tag_count, "# of active mbuf send tags");
+
/*
* tunable_mbinit() has to be run before any mbuf allocations are done.
*/
@@ -378,6 +386,8 @@
*/
EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
EVENTHANDLER_PRI_FIRST);
+
+ snd_tag_count = counter_u64_alloc(M_WAITOK);
}
SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
@@ -1149,3 +1159,24 @@
while (mb != NULL)
mb = m_free(mb);
}
+
+void
+m_snd_tag_init(struct m_snd_tag *mst, struct ifnet *ifp)
+{
+
+ if_ref(ifp);
+ mst->ifp = ifp;
+ refcount_init(&mst->refcount, 1);
+ counter_u64_add(snd_tag_count, 1);
+}
+
+void
+m_snd_tag_destroy(struct m_snd_tag *mst)
+{
+ struct ifnet *ifp;
+
+ ifp = mst->ifp;
+ ifp->if_snd_tag_free(mst);
+ if_rele(ifp);
+ counter_u64_add(snd_tag_count, -1);
+}
Index: sys/kern/uipc_mbuf.c
===================================================================
--- sys/kern/uipc_mbuf.c
+++ sys/kern/uipc_mbuf.c
@@ -382,6 +382,10 @@
to->m_pkthdr = from->m_pkthdr; /* especially tags */
SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */
from->m_flags &= ~M_PKTHDR;
+ if (from->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ from->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
+ from->m_pkthdr.snd_tag = NULL;
+ }
}
/*
@@ -414,6 +418,8 @@
if ((to->m_flags & M_EXT) == 0)
to->m_data = to->m_pktdat;
to->m_pkthdr = from->m_pkthdr;
+ if (from->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ m_snd_tag_ref(from->m_pkthdr.snd_tag);
SLIST_INIT(&to->m_pkthdr.tags);
return (m_tag_copy_chain(to, from, how));
}
@@ -924,7 +930,12 @@
return (NULL);
n->m_next = m->m_next;
m->m_next = NULL;
- n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
+ if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ n->m_pkthdr.snd_tag =
+ m_snd_tag_ref(m0->m_pkthdr.snd_tag);
+ n->m_pkthdr.csum_flags |= CSUM_SND_TAG;
+ } else
+ n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
n->m_pkthdr.len = m0->m_pkthdr.len - len0;
m0->m_pkthdr.len = len0;
return (n);
@@ -932,7 +943,12 @@
n = m_gethdr(wait, m0->m_type);
if (n == NULL)
return (NULL);
- n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
+ if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ n->m_pkthdr.snd_tag =
+ m_snd_tag_ref(m0->m_pkthdr.snd_tag);
+ n->m_pkthdr.csum_flags |= CSUM_SND_TAG;
+ } else
+ n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
n->m_pkthdr.len = m0->m_pkthdr.len - len0;
m0->m_pkthdr.len = len0;
if (m->m_flags & M_EXT)
Index: sys/net/bpf.c
===================================================================
--- sys/net/bpf.c
+++ sys/net/bpf.c
@@ -2304,7 +2304,7 @@
int gottime;
/* Skip outgoing duplicate packets. */
- if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
+ if ((m->m_flags & M_PROMISC) != 0 && m_rcvif(m) == NULL) {
m->m_flags &= ~M_PROMISC;
return;
}
@@ -2314,7 +2314,7 @@
NET_EPOCH_ENTER(et);
CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
- if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
+ if (BPF_CHECK_DIRECTION(d, m_rcvif(m), bp->bif_ifp))
continue;
counter_u64_add(d->bd_rcount, 1);
#ifdef BPF_JITTER
Index: sys/net/if.c
===================================================================
--- sys/net/if.c
+++ sys/net/if.c
@@ -4295,6 +4295,8 @@
void
if_setrcvif(struct mbuf *m, if_t ifp)
{
+
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (struct ifnet *)ifp;
}
Index: sys/net/if_ethersubr.c
===================================================================
--- sys/net/if_ethersubr.c
+++ sys/net/if_ethersubr.c
@@ -816,6 +816,7 @@
* We will rely on rcvif being set properly in the deferred context,
* so assert it is correct here.
*/
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch m %p "
"rcvif %p ifp %p", __func__, m, m->m_pkthdr.rcvif, ifp));
CURVNET_SET_QUIET(ifp->if_vnet);
Index: sys/net/if_lagg.c
===================================================================
--- sys/net/if_lagg.c
+++ sys/net/if_lagg.c
@@ -95,6 +95,11 @@
{0, NULL}
};
+struct lagg_snd_tag {
+ struct m_snd_tag com;
+ struct m_snd_tag *tag;
+};
+
VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
#define V_lagg_list VNET(lagg_list)
VNET_DEFINE_STATIC(struct mtx, lagg_list_mtx);
@@ -134,6 +139,10 @@
static int lagg_snd_tag_alloc(struct ifnet *,
union if_snd_tag_alloc_params *,
struct m_snd_tag **);
+static int lagg_snd_tag_modify(struct m_snd_tag *,
+ union if_snd_tag_modify_params *);
+static int lagg_snd_tag_query(struct m_snd_tag *,
+ union if_snd_tag_query_params *);
static void lagg_snd_tag_free(struct m_snd_tag *);
#endif
static int lagg_setmulti(struct lagg_port *);
@@ -525,6 +534,8 @@
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
#ifdef RATELIMIT
ifp->if_snd_tag_alloc = lagg_snd_tag_alloc;
+ ifp->if_snd_tag_modify = lagg_snd_tag_modify;
+ ifp->if_snd_tag_query = lagg_snd_tag_query;
ifp->if_snd_tag_free = lagg_snd_tag_free;
#endif
ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
@@ -1537,63 +1548,126 @@
}
#ifdef RATELIMIT
-static int
-lagg_snd_tag_alloc(struct ifnet *ifp,
- union if_snd_tag_alloc_params *params,
- struct m_snd_tag **ppmt)
+static inline struct lagg_snd_tag *
+mst_to_lst(struct m_snd_tag *mst)
{
- struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+
+ return (__containerof(mst, struct lagg_snd_tag, com));
+}
+
+/*
+ * Look up the port used by a specific flow. This only works for lagg
+ * protocols with deterministic port mappings (e.g. not roundrobin).
+ * In addition protocols which use a hash to map flows to ports must
+ * be configured to use the mbuf flowid rather than hashing packet
+ * contents.
+ */
+static struct lagg_port *
+lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype)
+{
+ struct lagg_softc *sc;
struct lagg_port *lp;
struct lagg_lb *lb;
uint32_t p;
- LAGG_RLOCK();
+ sc = ifp->if_softc;
+
switch (sc->sc_proto) {
case LAGG_PROTO_FAILOVER:
- lp = lagg_link_active(sc, sc->sc_primary);
- break;
+ return (lagg_link_active(sc, sc->sc_primary));
case LAGG_PROTO_LOADBALANCE:
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
- params->hdr.flowtype == M_HASHTYPE_NONE) {
- LAGG_RUNLOCK();
- return (EOPNOTSUPP);
- }
- p = params->hdr.flowid >> sc->flowid_shift;
+ flowtype == M_HASHTYPE_NONE)
+ return (NULL);
+ p = flowid >> sc->flowid_shift;
p %= sc->sc_count;
lb = (struct lagg_lb *)sc->sc_psc;
lp = lb->lb_ports[p];
- lp = lagg_link_active(sc, lp);
- break;
+ return (lagg_link_active(sc, lp));
case LAGG_PROTO_LACP:
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
- params->hdr.flowtype == M_HASHTYPE_NONE) {
- LAGG_RUNLOCK();
- return (EOPNOTSUPP);
- }
- lp = lacp_select_tx_port_by_hash(sc, params->hdr.flowid);
- break;
+ flowtype == M_HASHTYPE_NONE)
+ return (NULL);
+ return (lacp_select_tx_port_by_hash(sc, flowid));
default:
- LAGG_RUNLOCK();
- return (EOPNOTSUPP);
+ return (NULL);
}
+}
+
+static int
+lagg_snd_tag_alloc(struct ifnet *ifp,
+ union if_snd_tag_alloc_params *params,
+ struct m_snd_tag **ppmt)
+{
+ struct lagg_snd_tag *lst;
+ struct lagg_softc *sc;
+ struct lagg_port *lp;
+ struct ifnet *lp_ifp;
+ int error;
+
+ sc = ifp->if_softc;
+
+ LAGG_RLOCK();
+ lp = lookup_snd_tag_port(ifp, params->hdr.flowid, params->hdr.flowtype);
if (lp == NULL) {
LAGG_RUNLOCK();
return (EOPNOTSUPP);
}
- ifp = lp->lp_ifp;
+ if (lp->lp_ifp == NULL || lp->lp_ifp->if_snd_tag_alloc == NULL) {
+ LAGG_RUNLOCK();
+ return (EOPNOTSUPP);
+ }
+ lp_ifp = lp->lp_ifp;
+ if_ref(lp_ifp);
LAGG_RUNLOCK();
- if (ifp == NULL || ifp->if_snd_tag_alloc == NULL ||
- (ifp->if_capenable & IFCAP_TXRTLMT) == 0)
- return (EOPNOTSUPP);
- /* forward allocation request */
- return (ifp->if_snd_tag_alloc(ifp, params, ppmt));
+ lst = malloc(sizeof(*lst), M_LAGG, M_NOWAIT);
+ if (lst == NULL) {
+ if_rele(lp_ifp);
+ return (ENOMEM);
+ }
+
+ error = lp_ifp->if_snd_tag_alloc(lp_ifp, params, &lst->tag);
+ if_rele(lp_ifp);
+ if (error) {
+ free(lst, M_LAGG);
+ return (error);
+ }
+
+ m_snd_tag_init(&lst->com, ifp);
+
+ *ppmt = &lst->com;
+ return (0);
+}
+
+static int
+lagg_snd_tag_modify(struct m_snd_tag *mst,
+ union if_snd_tag_modify_params *params)
+{
+ struct lagg_snd_tag *lst;
+
+ lst = mst_to_lst(mst);
+ return (lst->tag->ifp->if_snd_tag_modify(lst->tag, params));
+}
+
+static int
+lagg_snd_tag_query(struct m_snd_tag *mst,
+ union if_snd_tag_query_params *params)
+{
+ struct lagg_snd_tag *lst;
+
+ lst = mst_to_lst(mst);
+ return (lst->tag->ifp->if_snd_tag_query(lst->tag, params));
}
static void
-lagg_snd_tag_free(struct m_snd_tag *tag)
+lagg_snd_tag_free(struct m_snd_tag *mst)
{
- tag->ifp->if_snd_tag_free(tag);
+ struct lagg_snd_tag *lst;
+
+ lst = mst_to_lst(mst);
+ m_snd_tag_rele(lst->tag);
+ free(lst, M_LAGG);
}
#endif
@@ -1720,6 +1794,10 @@
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
int error;
+#ifdef RATELIMIT
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
+#endif
LAGG_RLOCK();
/* We need a Tx algorithm and at least one port */
if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
@@ -1910,6 +1988,21 @@
lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
{
+#ifdef RATELIMIT
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ struct lagg_snd_tag *lst;
+ struct m_snd_tag *mst;
+
+ mst = m->m_pkthdr.snd_tag;
+ lst = mst_to_lst(mst);
+ if (lst->tag->ifp != ifp) {
+ m_freem(m);
+ return (EAGAIN);
+ }
+ m->m_pkthdr.snd_tag = m_snd_tag_ref(lst->tag);
+ m_snd_tag_rele(mst);
+ }
+#endif
return (ifp->if_transmit)(ifp, m);
}
Index: sys/net/if_vlan.c
===================================================================
--- sys/net/if_vlan.c
+++ sys/net/if_vlan.c
@@ -103,6 +103,20 @@
int refcnt;
};
+#ifdef RATELIMIT
+struct vlan_snd_tag {
+ struct m_snd_tag com;
+ struct m_snd_tag *tag;
+};
+
+static inline struct vlan_snd_tag *
+mst_to_vst(struct m_snd_tag *mst)
+{
+
+ return (__containerof(mst, struct vlan_snd_tag, com));
+}
+#endif
+
/*
* This macro provides a facility to iterate over every vlan on a trunk with
* the assumption that none will be added/removed during iteration.
@@ -267,7 +281,11 @@
#ifdef RATELIMIT
static int vlan_snd_tag_alloc(struct ifnet *,
union if_snd_tag_alloc_params *, struct m_snd_tag **);
-static void vlan_snd_tag_free(struct m_snd_tag *);
+static int vlan_snd_tag_modify(struct m_snd_tag *,
+ union if_snd_tag_modify_params *);
+static int vlan_snd_tag_query(struct m_snd_tag *,
+ union if_snd_tag_query_params *);
+static void vlan_snd_tag_free(struct m_snd_tag *);
#endif
static void vlan_qflush(struct ifnet *ifp);
static int vlan_setflag(struct ifnet *ifp, int flag, int status,
@@ -1048,6 +1066,8 @@
ifp->if_ioctl = vlan_ioctl;
#ifdef RATELIMIT
ifp->if_snd_tag_alloc = vlan_snd_tag_alloc;
+ ifp->if_snd_tag_modify = vlan_snd_tag_modify;
+ ifp->if_snd_tag_query = vlan_snd_tag_query;
ifp->if_snd_tag_free = vlan_snd_tag_free;
#endif
ifp->if_flags = VLAN_IFFLAGS;
@@ -1137,6 +1157,26 @@
BPF_MTAP(ifp, m);
+#ifdef RATELIMIT
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ struct vlan_snd_tag *vst;
+ struct m_snd_tag *mst;
+
+ MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
+ mst = m->m_pkthdr.snd_tag;
+ vst = mst_to_vst(mst);
+ if (vst->tag->ifp != p) {
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ NET_EPOCH_EXIT(et);
+ m_freem(m);
+ return (EAGAIN);
+ }
+
+ m->m_pkthdr.snd_tag = m_snd_tag_ref(vst->tag);
+ m_snd_tag_rele(mst);
+ }
+#endif
+
/*
* Do not run parent's if_transmit() if the parent is not up,
* or parent's driver will cause a system crash.
@@ -1928,18 +1968,71 @@
union if_snd_tag_alloc_params *params,
struct m_snd_tag **ppmt)
{
+ struct epoch_tracker et;
+ struct vlan_snd_tag *vst;
+ struct ifvlan *ifv;
+ struct ifnet *parent;
+ int error;
- /* get trunk device */
- ifp = vlan_trunkdev(ifp);
- if (ifp == NULL || (ifp->if_capenable & IFCAP_TXRTLMT) == 0)
+ NET_EPOCH_ENTER(et);
+ ifv = ifp->if_softc;
+ if (ifv->ifv_trunk != NULL)
+ parent = PARENT(ifv);
+ else
+ parent = NULL;
+ if (parent == NULL || parent->if_snd_tag_alloc == NULL) {
+ NET_EPOCH_EXIT(et);
return (EOPNOTSUPP);
- /* forward allocation request */
- return (ifp->if_snd_tag_alloc(ifp, params, ppmt));
+ }
+ if_ref(parent);
+ NET_EPOCH_EXIT(et);
+
+ vst = malloc(sizeof(*vst), M_VLAN, M_NOWAIT);
+ if (vst == NULL) {
+ if_rele(parent);
+ return (ENOMEM);
+ }
+
+ error = parent->if_snd_tag_alloc(parent, params, &vst->tag);
+ if_rele(parent);
+ if (error) {
+ free(vst, M_VLAN);
+ return (error);
+ }
+
+ m_snd_tag_init(&vst->com, ifp);
+
+ *ppmt = &vst->com;
+ return (0);
+}
+
+static int
+vlan_snd_tag_modify(struct m_snd_tag *mst,
+ union if_snd_tag_modify_params *params)
+{
+ struct vlan_snd_tag *vst;
+
+ vst = mst_to_vst(mst);
+ return (vst->tag->ifp->if_snd_tag_modify(vst->tag, params));
+}
+
+static int
+vlan_snd_tag_query(struct m_snd_tag *mst,
+ union if_snd_tag_query_params *params)
+{
+ struct vlan_snd_tag *vst;
+
+ vst = mst_to_vst(mst);
+ return (vst->tag->ifp->if_snd_tag_query(vst->tag, params));
}
static void
-vlan_snd_tag_free(struct m_snd_tag *tag)
+vlan_snd_tag_free(struct m_snd_tag *mst)
{
- tag->ifp->if_snd_tag_free(tag);
+ struct vlan_snd_tag *vst;
+
+ vst = mst_to_vst(mst);
+ m_snd_tag_rele(vst->tag);
+ free(vst, M_VLAN);
}
#endif
Index: sys/net/netisr.c
===================================================================
--- sys/net/netisr.c
+++ sys/net/netisr.c
@@ -839,6 +839,7 @@
("%s: invalid policy %u for %s", __func__, npp->np_policy,
npp->np_name));
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
ifp = m->m_pkthdr.rcvif;
if (ifp != NULL)
*cpuidp = nws_array[(ifp->if_index + source) % nws_count];
Index: sys/net80211/ieee80211_hwmp.c
===================================================================
--- sys/net80211/ieee80211_hwmp.c
+++ sys/net80211/ieee80211_hwmp.c
@@ -2015,6 +2015,7 @@
*/
IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_HWMP, dest,
"%s", "queue frame until path found");
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (void *)(uintptr_t)
ieee80211_mac_hash(ic, dest);
/* XXX age chosen randomly */
Index: sys/net80211/ieee80211_mesh.c
===================================================================
--- sys/net80211/ieee80211_mesh.c
+++ sys/net80211/ieee80211_mesh.c
@@ -1225,6 +1225,7 @@
M_WME_SETAC(mcopy, WME_AC_BE);
/* XXX do we know m_nextpkt is NULL? */
+ MPASS((mcopy->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
mcopy->m_pkthdr.rcvif = (void *) ni;
/*
Index: sys/net80211/ieee80211_output.c
===================================================================
--- sys/net80211/ieee80211_output.c
+++ sys/net80211/ieee80211_output.c
@@ -163,6 +163,7 @@
* uses any existing value for rcvif to identify the
* interface it (might have been) received on.
*/
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (void *)ni;
mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1: 0;
@@ -528,6 +529,7 @@
* that the mbuf has the same node value that
* it would if it were going via the normal path.
*/
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (void *)ni;
/*
Index: sys/net80211/ieee80211_wds.c
===================================================================
--- sys/net80211/ieee80211_wds.c
+++ sys/net80211/ieee80211_wds.c
@@ -299,6 +299,7 @@
continue;
}
mcopy->m_flags |= M_MCAST;
+ MPASS((mcopy->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
mcopy->m_pkthdr.rcvif = (void *) ni;
err = ieee80211_parent_xmitpkt(ic, mcopy);
@@ -332,6 +333,7 @@
* XXX handle overflow?
* XXX per/vap beacon interval?
*/
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (void *)(uintptr_t)
ieee80211_mac_hash(ic, ni->ni_macaddr);
(void) ieee80211_ageq_append(&ic->ic_stageq, m,
Index: sys/netinet/in_pcb.c
===================================================================
--- sys/netinet/in_pcb.c
+++ sys/netinet/in_pcb.c
@@ -3274,13 +3274,6 @@
error = EOPNOTSUPP;
} else {
error = ifp->if_snd_tag_alloc(ifp, ¶ms, &inp->inp_snd_tag);
-
- /*
- * At success increment the refcount on
- * the send tag's network interface:
- */
- if (error == 0)
- if_ref(inp->inp_snd_tag->ifp);
}
return (error);
}
@@ -3293,7 +3286,6 @@
in_pcbdetach_txrtlmt(struct inpcb *inp)
{
struct m_snd_tag *mst;
- struct ifnet *ifp;
INP_WLOCK_ASSERT(inp);
@@ -3303,19 +3295,7 @@
if (mst == NULL)
return;
- ifp = mst->ifp;
- if (ifp == NULL)
- return;
-
- /*
- * If the device was detached while we still had reference(s)
- * on the ifp, we assume if_snd_tag_free() was replaced with
- * stubs.
- */
- ifp->if_snd_tag_free(mst);
-
- /* release reference count on network interface */
- if_rele(ifp);
+ m_snd_tag_rele(mst);
}
/*
@@ -3360,6 +3340,17 @@
*/
max_pacing_rate = socket->so_max_pacing_rate;
+ /*
+ * If the existing send tag is for the wrong interface due to
+ * a route change, first drop the existing tag. Set the
+ * CHANGED flag so that we will keep trying to allocate a new
+ * tag if we fail to allocate one this time.
+ */
+ if (inp->inp_snd_tag != NULL && inp->inp_snd_tag->ifp != ifp) {
+ in_pcbdetach_txrtlmt(inp);
+ inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
+ }
+
/*
* NOTE: When attaching to a network interface a reference is
* made to ensure the network interface doesn't go away until
Index: sys/netinet/ip_output.c
===================================================================
--- sys/netinet/ip_output.c
+++ sys/netinet/ip_output.c
@@ -204,6 +204,51 @@
return 0;
}
+static int
+ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m,
+ const struct sockaddr_in *gw, struct route *ro)
+{
+ struct m_snd_tag *mst;
+ int error;
+
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
+ mst = NULL;
+
+#ifdef RATELIMIT
+ if (inp != NULL) {
+ if ((inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) != 0 ||
+ (inp->inp_snd_tag != NULL &&
+ inp->inp_snd_tag->ifp != ifp))
+ in_pcboutput_txrtlmt(inp, ifp, m);
+
+ if (inp->inp_snd_tag != NULL)
+ mst = inp->inp_snd_tag;
+ }
+#endif
+ if (mst != NULL) {
+ KASSERT(m->m_pkthdr.rcvif == NULL,
+ ("trying to add a send tag to a forwarded packet"));
+ if (mst->ifp != ifp) {
+ error = EAGAIN;
+ goto done;
+ }
+
+ /* stamp send tag on mbuf */
+ m->m_pkthdr.snd_tag = m_snd_tag_ref(mst);
+ m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
+ }
+
+ error = (*ifp->if_output)(ifp, m, (const struct sockaddr *)gw, ro);
+
+done:
+ /* Check for route change invalidating send tags. */
+#ifdef RATELIMIT
+ if (error == EAGAIN)
+ in_pcboutput_eagain(inp);
+#endif
+ return (error);
+}
+
/*
* IP output. The packet in mbuf chain m contains a skeletal IP
* header (with len, off, ttl, proto, tos, src, dst).
@@ -687,23 +732,7 @@
*/
m_clrprotoflags(m);
IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
-#ifdef RATELIMIT
- if (inp != NULL) {
- if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
- in_pcboutput_txrtlmt(inp, ifp, m);
- /* stamp send tag on mbuf */
- m->m_pkthdr.snd_tag = inp->inp_snd_tag;
- } else {
- m->m_pkthdr.snd_tag = NULL;
- }
-#endif
- error = (*ifp->if_output)(ifp, m,
- (const struct sockaddr *)gw, ro);
-#ifdef RATELIMIT
- /* check for route change */
- if (error == EAGAIN)
- in_pcboutput_eagain(inp);
-#endif
+ error = ip_output_send(inp, ifp, m, gw, ro);
goto done;
}
@@ -739,23 +768,7 @@
IP_PROBE(send, NULL, NULL, mtod(m, struct ip *), ifp,
mtod(m, struct ip *), NULL);
-#ifdef RATELIMIT
- if (inp != NULL) {
- if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
- in_pcboutput_txrtlmt(inp, ifp, m);
- /* stamp send tag on mbuf */
- m->m_pkthdr.snd_tag = inp->inp_snd_tag;
- } else {
- m->m_pkthdr.snd_tag = NULL;
- }
-#endif
- error = (*ifp->if_output)(ifp, m,
- (const struct sockaddr *)gw, ro);
-#ifdef RATELIMIT
- /* check for route change */
- if (error == EAGAIN)
- in_pcboutput_eagain(inp);
-#endif
+ error = ip_output_send(inp, ifp, m, gw, ro);
} else
m_freem(m);
}
Index: sys/netinet6/ip6_output.c
===================================================================
--- sys/netinet6/ip6_output.c
+++ sys/netinet6/ip6_output.c
@@ -276,6 +276,51 @@
return (0);
}
+static int
+ip6_output_send(struct inpcb *inp, struct ifnet *ifp, struct ifnet *origifp,
+ struct mbuf *m, struct sockaddr_in6 *dst, struct route_in6 *ro)
+{
+ struct m_snd_tag *mst;
+ int error;
+
+ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
+ mst = NULL;
+
+#ifdef RATELIMIT
+ if (inp != NULL) {
+ if ((inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) != 0 ||
+ (inp->inp_snd_tag != NULL &&
+ inp->inp_snd_tag->ifp != ifp))
+ in_pcboutput_txrtlmt(inp, ifp, m);
+
+ if (inp->inp_snd_tag != NULL)
+ mst = inp->inp_snd_tag;
+ }
+#endif
+ if (mst != NULL) {
+ KASSERT(m->m_pkthdr.rcvif == NULL,
+ ("trying to add a send tag to a forwarded packet"));
+ if (mst->ifp != ifp) {
+ error = EAGAIN;
+ goto done;
+ }
+
+ /* stamp send tag on mbuf */
+ m->m_pkthdr.snd_tag = m_snd_tag_ref(mst);
+ m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
+ }
+
+ error = nd6_output_ifp(ifp, origifp, m, dst, (struct route *)ro);
+
+done:
+ /* Check for route change invalidating send tags. */
+#ifdef RATELIMIT
+ if (error == EAGAIN)
+ in_pcboutput_eagain(inp);
+#endif
+ return (error);
+}
+
/*
* IP6 output. The packet in mbuf chain m contains a skeletal IP6
* header (with pri, len, nxt, hlim, src, dst).
@@ -968,23 +1013,7 @@
m->m_pkthdr.len);
ifa_free(&ia6->ia_ifa);
}
-#ifdef RATELIMIT
- if (inp != NULL) {
- if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
- in_pcboutput_txrtlmt(inp, ifp, m);
- /* stamp send tag on mbuf */
- m->m_pkthdr.snd_tag = inp->inp_snd_tag;
- } else {
- m->m_pkthdr.snd_tag = NULL;
- }
-#endif
- error = nd6_output_ifp(ifp, origifp, m, dst,
- (struct route *)ro);
-#ifdef RATELIMIT
- /* check for route change */
- if (error == EAGAIN)
- in_pcboutput_eagain(inp);
-#endif
+ error = ip6_output_send(inp, ifp, origifp, m, dst, ro);
goto done;
}
@@ -1083,23 +1112,7 @@
counter_u64_add(ia->ia_ifa.ifa_obytes,
m->m_pkthdr.len);
}
-#ifdef RATELIMIT
- if (inp != NULL) {
- if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
- in_pcboutput_txrtlmt(inp, ifp, m);
- /* stamp send tag on mbuf */
- m->m_pkthdr.snd_tag = inp->inp_snd_tag;
- } else {
- m->m_pkthdr.snd_tag = NULL;
- }
-#endif
- error = nd6_output_ifp(ifp, origifp, m, dst,
- (struct route *)ro);
-#ifdef RATELIMIT
- /* check for route change */
- if (error == EAGAIN)
- in_pcboutput_eagain(inp);
-#endif
+ error = ip6_output_send(inp, ifp, origifp, m, dst, ro);
} else
m_freem(m);
}
Index: sys/netpfil/ipfw/ip_fw2.c
===================================================================
--- sys/netpfil/ipfw/ip_fw2.c
+++ sys/netpfil/ipfw/ip_fw2.c
@@ -1758,7 +1758,7 @@
oif = NULL;
} else {
MPASS(args->flags & IPFW_ARGS_OUT);
- iif = mem ? NULL : m->m_pkthdr.rcvif;
+ iif = mem ? NULL : m_rcvif(m);
oif = args->ifp;
}
Index: sys/sys/mbuf.h
===================================================================
--- sys/sys/mbuf.h
+++ sys/sys/mbuf.h
@@ -40,6 +40,7 @@
#include <sys/queue.h>
#ifdef _KERNEL
#include <sys/systm.h>
+#include <sys/refcount.h>
#include <vm/uma.h>
#ifdef WITNESS
#include <sys/lock.h>
@@ -138,6 +139,7 @@
*/
struct m_snd_tag {
struct ifnet *ifp; /* network interface tag belongs to */
+ volatile u_int refcount;
};
/*
@@ -494,6 +496,8 @@
#define CSUM_L5_VALID 0x20000000 /* checksum is correct */
#define CSUM_COALESCED 0x40000000 /* contains merged segments */
+#define CSUM_SND_TAG 0x80000000 /* Packet header has send tag */
+
/*
* CSUM flag description for use with printf(9) %b identifier.
*/
@@ -503,7 +507,7 @@
"\12CSUM_IP6_UDP\13CSUM_IP6_TCP\14CSUM_IP6_SCTP\15CSUM_IP6_TSO" \
"\16CSUM_IP6_ISCSI" \
"\31CSUM_L3_CALC\32CSUM_L3_VALID\33CSUM_L4_CALC\34CSUM_L4_VALID" \
- "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESCED"
+ "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESCED\40CSUM_SND_TAG"
/* CSUM flags compatibility mappings. */
#define CSUM_IP_CHECKED CSUM_L3_CALC
@@ -633,6 +637,8 @@
struct mbuf *m_split(struct mbuf *, int, int);
struct mbuf *m_uiotombuf(struct uio *, int, int, int, int);
struct mbuf *m_unshare(struct mbuf *, int);
+void m_snd_tag_init(struct m_snd_tag *, struct ifnet *);
+void m_snd_tag_destroy(struct m_snd_tag *);
static __inline int
m_gettype(int size)
@@ -995,6 +1001,17 @@
*/
#define MCHTYPE(m, t) m_chtype((m), (t))
+/* Return the rcvif of a packet header. */
+static __inline struct ifnet *
+m_rcvif(struct mbuf *m)
+{
+
+ M_ASSERTPKTHDR(m);
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ return (NULL);
+ return (m->m_pkthdr.rcvif);
+}
+
/* Length to m_copy to copy all. */
#define M_COPYALL 1000000000
@@ -1185,6 +1202,22 @@
m_tag_locate(m, MTAG_ABI_COMPAT, type, start));
}
+static inline struct m_snd_tag *
+m_snd_tag_ref(struct m_snd_tag *mst)
+{
+
+ refcount_acquire(&mst->refcount);
+ return (mst);
+}
+
+static inline void
+m_snd_tag_rele(struct m_snd_tag *mst)
+{
+
+ if (refcount_release(&mst->refcount))
+ m_snd_tag_destroy(mst);
+}
+
static __inline struct mbuf *
m_free(struct mbuf *m)
{
@@ -1193,6 +1226,8 @@
MBUF_PROBE1(m__free, m);
if ((m->m_flags & (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE))
m_tag_delete_chain(m, NULL);
+ if (m->m_flags & M_PKTHDR && m->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ m_snd_tag_rele(m->m_pkthdr.snd_tag);
if (m->m_flags & M_EXT)
mb_free_ext(m);
else if ((m->m_flags & M_NOFREE) == 0)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Nov 23, 4:20 AM (17 h, 45 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14789633
Default Alt Text
D20117.diff (33 KB)
Attached To
Mode
D20117: Restructure mbuf send tags to provide stronger guarantees.
Attached
Detach File
Event Timeline
Log In to Comment