Index: sbin/ifconfig/ifconfig.8 =================================================================== --- sbin/ifconfig/ifconfig.8 +++ sbin/ifconfig/ifconfig.8 @@ -2548,6 +2548,14 @@ Disable selection of egress ports based on the native .Xr NUMA 4 domain for the packets being transmitted. +.It Cm use_rl +Attempt to use a hardware rate limited send tag for lacp metatdata. +On some NICs, this will cause the NIC to allocate a queue dedicated +to lacp, and will prevent the loss of lacp metadata and subsequent +link flapping when egress traffic overloads the nic and most transmit +queues are full. +.It Cm -use_rl +Disable use of hardware rate limited send tag for lacp metadata. .It Cm lacp_fast_timeout Enable lacp fast-timeout on the interface. .It Cm -lacp_fast_timeout Index: sbin/ifconfig/iflagg.c =================================================================== --- sbin/ifconfig/iflagg.c +++ sbin/ifconfig/iflagg.c @@ -135,6 +135,8 @@ case -LAGG_OPT_USE_FLOWID: case LAGG_OPT_USE_NUMA: case -LAGG_OPT_USE_NUMA: + case LAGG_OPT_USE_RL: + case -LAGG_OPT_USE_RL: case LAGG_OPT_LACP_STRICT: case -LAGG_OPT_LACP_STRICT: case LAGG_OPT_LACP_TXTEST: @@ -310,6 +312,8 @@ DEF_CMD("-use_flowid", -LAGG_OPT_USE_FLOWID, setlaggsetopt), DEF_CMD("use_numa", LAGG_OPT_USE_NUMA, setlaggsetopt), DEF_CMD("-use_numa", -LAGG_OPT_USE_NUMA, setlaggsetopt), + DEF_CMD("use_rl", LAGG_OPT_USE_RL, setlaggsetopt), + DEF_CMD("-use_rl", -LAGG_OPT_USE_RL, setlaggsetopt), DEF_CMD("lacp_strict", LAGG_OPT_LACP_STRICT, setlaggsetopt), DEF_CMD("-lacp_strict", -LAGG_OPT_LACP_STRICT, setlaggsetopt), DEF_CMD("lacp_txtest", LAGG_OPT_LACP_TXTEST, setlaggsetopt), Index: sys/net/ieee8023ad_lacp.h =================================================================== --- sys/net/ieee8023ad_lacp.h +++ sys/net/ieee8023ad_lacp.h @@ -229,6 +229,7 @@ u_int lp_media; /* XXX redundant */ int lp_timer[LACP_NTIMER]; struct ifmultiaddr *lp_ifma; + struct lagg_snd_tag *lp_rl_tag; struct lacp_aggregator *lp_aggregator; }; Index: sys/net/ieee8023ad_lacp.c =================================================================== --- sys/net/ieee8023ad_lacp.c +++ sys/net/ieee8023ad_lacp.c @@ -362,6 +362,7 @@ lacp_xmit_lacpdu(struct lacp_port *lp) { struct lagg_port *lgp = lp->lp_lagg; + struct lagg_softc *sc = lgp->lp_softc; struct mbuf *m; struct lacpdu *du; int error; @@ -373,7 +374,11 @@ return (ENOMEM); } m->m_len = m->m_pkthdr.len = sizeof(*du); - + if ((sc->sc_opts & LAGG_OPT_USE_RL) && lp->lp_rl_tag != NULL) { + m->m_pkthdr.snd_tag = + m_snd_tag_ref(&lp->lp_rl_tag->com); + m->m_pkthdr.csum_flags |= CSUM_SND_TAG; + } du = mtod(m, struct lacpdu *); memset(du, 0, sizeof(*du)); @@ -416,6 +421,7 @@ lacp_xmit_marker(struct lacp_port *lp) { struct lagg_port *lgp = lp->lp_lagg; + struct lagg_softc *sc = lgp->lp_softc; struct mbuf *m; struct markerdu *mdu; int error; @@ -427,6 +433,11 @@ return (ENOMEM); } m->m_len = m->m_pkthdr.len = sizeof(*mdu); + if ((sc->sc_opts & LAGG_OPT_USE_RL) && lp->lp_rl_tag != NULL) { + m->m_pkthdr.snd_tag = + m_snd_tag_ref(&lp->lp_rl_tag->com); + m->m_pkthdr.csum_flags |= CSUM_SND_TAG; + } mdu = mtod(m, struct markerdu *); memset(mdu, 0, sizeof(*mdu)); @@ -524,6 +535,52 @@ callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc); } +static void +lacp_rl_alloc(struct lacp_port *lp, struct ifnet *sc_ifp) +{ + union if_snd_tag_alloc_params params = { + .rate_limit.hdr.type = IF_SND_TAG_TYPE_RATE_LIMIT, + .rate_limit.hdr.flowid = 0, + .rate_limit.hdr.flowtype = 0, + .rate_limit.max_rate = -1, + .rate_limit.flags = M_NOWAIT, + }; + struct ifnet *ifp; + struct m_snd_tag *tag; + struct lagg_snd_tag *lst; + + lp->lp_rl_tag = NULL; + ifp = lp->lp_ifp; + if (ifp->if_snd_tag_alloc == NULL) + return; + if (ifp->if_snd_tag_alloc(ifp, ¶ms, &tag) != 0) + return; + lst = malloc(sizeof(*lst), M_TEMP, M_NOWAIT); + if (lst == NULL) { + ifp->if_snd_tag_free(tag); + return; + } + m_snd_tag_init(&lst->com, sc_ifp); + lst->tag = tag; + lp->lp_rl_tag = lst; +} + +static void +lacp_rl_free(struct lacp_port *lp) +{ + struct ifnet *ifp; + struct lagg_snd_tag *lst; + + if (lp->lp_rl_tag == NULL) + return; + lst = lp->lp_rl_tag; + lp->lp_rl_tag = NULL; + ifp = lp->lp_ifp; + ifp->if_snd_tag_free(lst->tag); + free(lst, M_TEMP); + lp->lp_rl_tag = NULL; +} + int lacp_port_create(struct lagg_port *lgp) { @@ -552,13 +609,14 @@ if (lp == NULL) return (ENOMEM); - LACP_LOCK(lsc); lgp->lp_psc = lp; lp->lp_ifp = ifp; lp->lp_lagg = lgp; lp->lp_lsc = lsc; lp->lp_ifma = rifma; + lacp_rl_alloc(lp, sc->sc_ifp); + LACP_LOCK(lsc); LIST_INSERT_HEAD(&lsc->lsc_ports, lp, lp_next); lacp_fill_actorinfo(lp, &lp->lp_actor); @@ -579,6 +637,7 @@ struct lacp_softc *lsc = lp->lp_lsc; int i; + lacp_rl_free(lp); LACP_LOCK(lsc); for (i = 0; i < LACP_NTIMER; i++) { LACP_TIMER_DISARM(lp, i); Index: sys/net/if_lagg.h =================================================================== --- sys/net/if_lagg.h +++ sys/net/if_lagg.h @@ -144,6 +144,7 @@ /* Pseudo flags which are used in ro_opts but not stored into sc_opts. */ #define LAGG_OPT_FLOWIDSHIFT 0x02 /* set flowid shift */ #define LAGG_OPT_USE_NUMA 0x04 /* enable use of numa */ +#define LAGG_OPT_USE_RL 0x08 /* enable use of ratelimit */ #define LAGG_OPT_FLOWIDSHIFT_MASK 0x1f /* flowid is uint32_t */ #define LAGG_OPT_LACP_STRICT 0x10 /* LACP strict mode */ #define LAGG_OPT_LACP_TXTEST 0x20 /* LACP debug: txtest */ @@ -161,7 +162,7 @@ #define SIOCSLAGGOPTS _IOW('i', 153, struct lagg_reqopts) #define LAGG_OPT_BITS "\020\001USE_FLOWID\003USE_NUMA" \ - "\005LACP_STRICT\006LACP_TXTEST" \ + "\004USE_RL\005LACP_STRICT\006LACP_TXTEST" \ "\007LACP_RXTEST" #ifdef _KERNEL @@ -257,6 +258,12 @@ struct epoch_context lp_epoch_ctx; }; +struct lagg_snd_tag { + struct m_snd_tag com; + struct m_snd_tag *tag; +}; + + extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); extern void (*lagg_linkstate_p)(struct ifnet *, int ); Index: sys/net/if_lagg.c =================================================================== --- sys/net/if_lagg.c +++ sys/net/if_lagg.c @@ -105,11 +105,6 @@ {0, NULL} }; -struct lagg_snd_tag { - struct m_snd_tag com; - struct m_snd_tag *tag; -}; - VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */ #define V_lagg_list VNET(lagg_list) VNET_DEFINE_STATIC(struct mtx, lagg_list_mtx); @@ -292,6 +287,13 @@ &VNET_NAME(def_use_numa), 0, "Use numa to steer flows"); +/* Default value for using rate limited tags */ +VNET_DEFINE_STATIC(int, def_use_rl) = 0; +#define V_def_use_rl VNET(def_use_rl) +SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_ratelimit, CTLFLAG_RWTUN, + &VNET_NAME(def_use_rl), 0, + "Use ratelimited queue for LACP"); + /* Default value for flowid shift */ VNET_DEFINE_STATIC(int, def_flowid_shift) = 16; #define V_def_flowid_shift VNET(def_flowid_shift) @@ -521,6 +523,8 @@ sc->sc_opts |= LAGG_OPT_USE_FLOWID; if (V_def_use_numa) sc->sc_opts |= LAGG_OPT_USE_NUMA; + if (V_def_use_rl) + sc->sc_opts |= LAGG_OPT_USE_RL; sc->flowid_shift = V_def_flowid_shift; /* Hash all layers by default */ @@ -1294,6 +1298,8 @@ case -LAGG_OPT_USE_FLOWID: case LAGG_OPT_USE_NUMA: case -LAGG_OPT_USE_NUMA: + case LAGG_OPT_USE_RL: + case -LAGG_OPT_USE_RL: case LAGG_OPT_FLOWIDSHIFT: case LAGG_OPT_RR_LIMIT: valid = 1;