diff --git a/sys/net/ieee8023ad_lacp.c b/sys/net/ieee8023ad_lacp.c index 1e2e638fcdf5..6656ebb2b400 100644 --- a/sys/net/ieee8023ad_lacp.c +++ b/sys/net/ieee8023ad_lacp.c @@ -1,2221 +1,2223 @@ /* $NetBSD: ieee8023ad_lacp.c,v 1.3 2005/12/11 12:24:54 christos Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-NetBSD * * Copyright (c)2005 YAMAMOTO Takashi, * Copyright (c)2008 Andrew Thompson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_kern_tls.h" #include "opt_ratelimit.h" #include #include #include #include #include #include #include /* hz */ #include /* for net/if.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * actor system priority and port priority. * XXX should be configurable. */ #define LACP_SYSTEM_PRIO 0x8000 #define LACP_PORT_PRIO 0x8000 const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 }; static const struct tlv_template lacp_info_tlv_template[] = { { LACP_TYPE_ACTORINFO, sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) }, { LACP_TYPE_PARTNERINFO, sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) }, { LACP_TYPE_COLLECTORINFO, sizeof(struct tlvhdr) + sizeof(struct lacp_collectorinfo) }, { 0, 0 }, }; static const struct tlv_template marker_info_tlv_template[] = { { MARKER_TYPE_INFO, sizeof(struct tlvhdr) + sizeof(struct lacp_markerinfo) }, { 0, 0 }, }; static const struct tlv_template marker_response_tlv_template[] = { { MARKER_TYPE_RESPONSE, sizeof(struct tlvhdr) + sizeof(struct lacp_markerinfo) }, { 0, 0 }, }; typedef void (*lacp_timer_func_t)(struct lacp_port *); static void lacp_fill_actorinfo(struct lacp_port *, struct lacp_peerinfo *); static void lacp_fill_markerinfo(struct lacp_port *, struct lacp_markerinfo *); static uint64_t lacp_aggregator_bandwidth(struct lacp_aggregator *); static void lacp_suppress_distributing(struct lacp_softc *, struct lacp_aggregator *); static void lacp_transit_expire(void *); static void lacp_update_portmap(struct lacp_softc *); static void lacp_select_active_aggregator(struct lacp_softc *); static uint16_t lacp_compose_key(struct lacp_port *); static int tlv_check(const void *, size_t, const struct tlvhdr *, const struct tlv_template *, boolean_t); static void lacp_tick(void *); static void lacp_fill_aggregator_id(struct lacp_aggregator *, const struct lacp_port *); static void lacp_fill_aggregator_id_peer(struct lacp_peerinfo *, const struct lacp_peerinfo *); static int lacp_aggregator_is_compatible(const struct lacp_aggregator *, const struct lacp_port *); static int lacp_peerinfo_is_compatible(const struct lacp_peerinfo *, const struct lacp_peerinfo *); static struct lacp_aggregator *lacp_aggregator_get(struct lacp_softc *, struct lacp_port *); static void lacp_aggregator_addref(struct lacp_softc *, struct lacp_aggregator *); static void lacp_aggregator_delref(struct lacp_softc *, struct lacp_aggregator *); /* receive machine */ static int lacp_pdu_input(struct lacp_port *, struct mbuf *); static int lacp_marker_input(struct lacp_port *, struct mbuf *); static void lacp_sm_rx(struct lacp_port *, const struct lacpdu *); static void lacp_sm_rx_timer(struct lacp_port *); static void lacp_sm_rx_set_expired(struct lacp_port *); static void lacp_sm_rx_update_ntt(struct lacp_port *, const struct lacpdu *); static void lacp_sm_rx_record_pdu(struct lacp_port *, const struct lacpdu *); static void lacp_sm_rx_update_selected(struct lacp_port *, const struct lacpdu *); static void lacp_sm_rx_record_default(struct lacp_port *); static void lacp_sm_rx_update_default_selected(struct lacp_port *); static void lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *, const struct lacp_peerinfo *); /* mux machine */ static void lacp_sm_mux(struct lacp_port *); static void lacp_set_mux(struct lacp_port *, enum lacp_mux_state); static void lacp_sm_mux_timer(struct lacp_port *); /* periodic transmit machine */ static void lacp_sm_ptx_update_timeout(struct lacp_port *, uint8_t); static void lacp_sm_ptx_tx_schedule(struct lacp_port *); static void lacp_sm_ptx_timer(struct lacp_port *); /* transmit machine */ static void lacp_sm_tx(struct lacp_port *); static void lacp_sm_assert_ntt(struct lacp_port *); static void lacp_run_timers(struct lacp_port *); static int lacp_compare_peerinfo(const struct lacp_peerinfo *, const struct lacp_peerinfo *); static int lacp_compare_systemid(const struct lacp_systemid *, const struct lacp_systemid *); static void lacp_port_enable(struct lacp_port *); static void lacp_port_disable(struct lacp_port *); static void lacp_select(struct lacp_port *); static void lacp_unselect(struct lacp_port *); static void lacp_disable_collecting(struct lacp_port *); static void lacp_enable_collecting(struct lacp_port *); static void lacp_disable_distributing(struct lacp_port *); static void lacp_enable_distributing(struct lacp_port *); static int lacp_xmit_lacpdu(struct lacp_port *); static int lacp_xmit_marker(struct lacp_port *); /* Debugging */ static void lacp_dump_lacpdu(const struct lacpdu *); static const char *lacp_format_partner(const struct lacp_peerinfo *, char *, size_t); static const char *lacp_format_lagid(const struct lacp_peerinfo *, const struct lacp_peerinfo *, char *, size_t); static const char *lacp_format_lagid_aggregator(const struct lacp_aggregator *, char *, size_t); static const char *lacp_format_state(uint8_t, char *, size_t); static const char *lacp_format_mac(const uint8_t *, char *, size_t); static const char *lacp_format_systemid(const struct lacp_systemid *, char *, size_t); static const char *lacp_format_portid(const struct lacp_portid *, char *, size_t); static void lacp_dprintf(const struct lacp_port *, const char *, ...) __attribute__((__format__(__printf__, 2, 3))); VNET_DEFINE_STATIC(int, lacp_debug); #define V_lacp_debug VNET(lacp_debug) SYSCTL_NODE(_net_link_lagg, OID_AUTO, lacp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "ieee802.3ad"); SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(lacp_debug), 0, "Enable LACP debug logging (1=debug, 2=trace)"); VNET_DEFINE_STATIC(int, lacp_default_strict_mode) = 1; SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, default_strict_mode, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(lacp_default_strict_mode), 0, "LACP strict protocol compliance default"); #define LACP_DPRINTF(a) if (V_lacp_debug & 0x01) { lacp_dprintf a ; } #define LACP_TRACE(a) if (V_lacp_debug & 0x02) { lacp_dprintf(a,"%s\n",__func__); } #define LACP_TPRINTF(a) if (V_lacp_debug & 0x04) { lacp_dprintf a ; } /* * partner administration variables. * XXX should be configurable. */ static const struct lacp_peerinfo lacp_partner_admin_optimistic = { .lip_systemid = { .lsi_prio = 0xffff }, .lip_portid = { .lpi_prio = 0xffff }, .lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION | LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING, }; static const struct lacp_peerinfo lacp_partner_admin_strict = { .lip_systemid = { .lsi_prio = 0xffff }, .lip_portid = { .lpi_prio = 0xffff }, .lip_state = 0, }; static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = { [LACP_TIMER_CURRENT_WHILE] = lacp_sm_rx_timer, [LACP_TIMER_PERIODIC] = lacp_sm_ptx_timer, [LACP_TIMER_WAIT_WHILE] = lacp_sm_mux_timer, }; struct mbuf * lacp_input(struct lagg_port *lgp, struct mbuf *m) { struct lacp_port *lp = LACP_PORT(lgp); uint8_t subtype; if (m->m_pkthdr.len < sizeof(struct ether_header) + sizeof(subtype)) { m_freem(m); return (NULL); } m_copydata(m, sizeof(struct ether_header), sizeof(subtype), &subtype); switch (subtype) { case SLOWPROTOCOLS_SUBTYPE_LACP: lacp_pdu_input(lp, m); return (NULL); case SLOWPROTOCOLS_SUBTYPE_MARKER: lacp_marker_input(lp, m); return (NULL); } /* Not a subtype we are interested in */ return (m); } /* * lacp_pdu_input: process lacpdu */ static int lacp_pdu_input(struct lacp_port *lp, struct mbuf *m) { struct lacp_softc *lsc = lp->lp_lsc; struct lacpdu *du; int error = 0; if (m->m_pkthdr.len != sizeof(*du)) { goto bad; } if ((m->m_flags & M_MCAST) == 0) { goto bad; } if (m->m_len < sizeof(*du)) { m = m_pullup(m, sizeof(*du)); if (m == NULL) { return (ENOMEM); } } du = mtod(m, struct lacpdu *); if (memcmp(&du->ldu_eh.ether_dhost, ðermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) { goto bad; } /* * ignore the version for compatibility with * the future protocol revisions. */ #if 0 if (du->ldu_sph.sph_version != 1) { goto bad; } #endif /* * ignore tlv types for compatibility with * the future protocol revisions. */ if (tlv_check(du, sizeof(*du), &du->ldu_tlv_actor, lacp_info_tlv_template, FALSE)) { goto bad; } if (V_lacp_debug > 0) { lacp_dprintf(lp, "lacpdu receive\n"); lacp_dump_lacpdu(du); } if ((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_rx_test) { LACP_TPRINTF((lp, "Dropping RX PDU\n")); goto bad; } LACP_LOCK(lsc); lacp_sm_rx(lp, du); LACP_UNLOCK(lsc); m_freem(m); return (error); bad: m_freem(m); return (EINVAL); } static void lacp_fill_actorinfo(struct lacp_port *lp, struct lacp_peerinfo *info) { struct lagg_port *lgp = lp->lp_lagg; struct lagg_softc *sc = lgp->lp_softc; info->lip_systemid.lsi_prio = htons(LACP_SYSTEM_PRIO); memcpy(&info->lip_systemid.lsi_mac, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); info->lip_portid.lpi_prio = htons(LACP_PORT_PRIO); info->lip_portid.lpi_portno = htons(lp->lp_ifp->if_index); info->lip_state = lp->lp_state; } static void lacp_fill_markerinfo(struct lacp_port *lp, struct lacp_markerinfo *info) { struct ifnet *ifp = lp->lp_ifp; /* Fill in the port index and system id (encoded as the MAC) */ info->mi_rq_port = htons(ifp->if_index); memcpy(&info->mi_rq_system, lp->lp_systemid.lsi_mac, ETHER_ADDR_LEN); info->mi_rq_xid = htonl(0); } static int lacp_xmit_lacpdu(struct lacp_port *lp) { struct lagg_port *lgp = lp->lp_lagg; struct mbuf *m; struct lacpdu *du; int error; LACP_LOCK_ASSERT(lp->lp_lsc); m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { return (ENOMEM); } m->m_len = m->m_pkthdr.len = sizeof(*du); du = mtod(m, struct lacpdu *); memset(du, 0, sizeof(*du)); memcpy(&du->ldu_eh.ether_dhost, ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN); memcpy(&du->ldu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN); du->ldu_eh.ether_type = htons(ETHERTYPE_SLOW); du->ldu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP; du->ldu_sph.sph_version = 1; TLV_SET(&du->ldu_tlv_actor, LACP_TYPE_ACTORINFO, sizeof(du->ldu_actor)); du->ldu_actor = lp->lp_actor; TLV_SET(&du->ldu_tlv_partner, LACP_TYPE_PARTNERINFO, sizeof(du->ldu_partner)); du->ldu_partner = lp->lp_partner; TLV_SET(&du->ldu_tlv_collector, LACP_TYPE_COLLECTORINFO, sizeof(du->ldu_collector)); du->ldu_collector.lci_maxdelay = 0; if (V_lacp_debug > 0) { lacp_dprintf(lp, "lacpdu transmit\n"); lacp_dump_lacpdu(du); } m->m_flags |= M_MCAST; /* * XXX should use higher priority queue. * otherwise network congestion can break aggregation. */ error = lagg_enqueue(lp->lp_ifp, m); return (error); } static int lacp_xmit_marker(struct lacp_port *lp) { struct lagg_port *lgp = lp->lp_lagg; struct mbuf *m; struct markerdu *mdu; int error; LACP_LOCK_ASSERT(lp->lp_lsc); m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { return (ENOMEM); } m->m_len = m->m_pkthdr.len = sizeof(*mdu); mdu = mtod(m, struct markerdu *); memset(mdu, 0, sizeof(*mdu)); memcpy(&mdu->mdu_eh.ether_dhost, ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN); memcpy(&mdu->mdu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN); mdu->mdu_eh.ether_type = htons(ETHERTYPE_SLOW); mdu->mdu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_MARKER; mdu->mdu_sph.sph_version = 1; /* Bump the transaction id and copy over the marker info */ lp->lp_marker.mi_rq_xid = htonl(ntohl(lp->lp_marker.mi_rq_xid) + 1); TLV_SET(&mdu->mdu_tlv, MARKER_TYPE_INFO, sizeof(mdu->mdu_info)); mdu->mdu_info = lp->lp_marker; LACP_DPRINTF((lp, "marker transmit, port=%u, sys=%6D, id=%u\n", ntohs(mdu->mdu_info.mi_rq_port), mdu->mdu_info.mi_rq_system, ":", ntohl(mdu->mdu_info.mi_rq_xid))); m->m_flags |= M_MCAST; error = lagg_enqueue(lp->lp_ifp, m); return (error); } void lacp_linkstate(struct lagg_port *lgp) { struct lacp_port *lp = LACP_PORT(lgp); struct lacp_softc *lsc = lp->lp_lsc; struct ifnet *ifp = lgp->lp_ifp; struct ifmediareq ifmr; int error = 0; u_int media; uint8_t old_state; uint16_t old_key; bzero((char *)&ifmr, sizeof(ifmr)); error = (*ifp->if_ioctl)(ifp, SIOCGIFXMEDIA, (caddr_t)&ifmr); if (error != 0) { bzero((char *)&ifmr, sizeof(ifmr)); error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr); } if (error != 0) return; LACP_LOCK(lsc); media = ifmr.ifm_active; LACP_DPRINTF((lp, "media changed 0x%x -> 0x%x, ether = %d, fdx = %d, " "link = %d\n", lp->lp_media, media, IFM_TYPE(media) == IFM_ETHER, (media & IFM_FDX) != 0, ifp->if_link_state == LINK_STATE_UP)); old_state = lp->lp_state; old_key = lp->lp_key; lp->lp_media = media; /* * If the port is not an active full duplex Ethernet link then it can * not be aggregated. */ if (IFM_TYPE(media) != IFM_ETHER || (media & IFM_FDX) == 0 || ifp->if_link_state != LINK_STATE_UP) { lacp_port_disable(lp); } else { lacp_port_enable(lp); } lp->lp_key = lacp_compose_key(lp); if (old_state != lp->lp_state || old_key != lp->lp_key) { LACP_DPRINTF((lp, "-> UNSELECTED\n")); lp->lp_selected = LACP_UNSELECTED; } LACP_UNLOCK(lsc); } static void lacp_tick(void *arg) { struct lacp_softc *lsc = arg; struct lacp_port *lp; LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) { if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) continue; CURVNET_SET(lp->lp_ifp->if_vnet); lacp_run_timers(lp); lacp_select(lp); lacp_sm_mux(lp); lacp_sm_tx(lp); lacp_sm_ptx_tx_schedule(lp); CURVNET_RESTORE(); } callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc); } int lacp_port_create(struct lagg_port *lgp) { struct lagg_softc *sc = lgp->lp_softc; struct lacp_softc *lsc = LACP_SOFTC(sc); struct lacp_port *lp; struct ifnet *ifp = lgp->lp_ifp; struct sockaddr_dl sdl; struct ifmultiaddr *rifma = NULL; int error; link_init_sdl(ifp, (struct sockaddr *)&sdl, IFT_ETHER); sdl.sdl_alen = ETHER_ADDR_LEN; bcopy(ðermulticastaddr_slowprotocols, LLADDR(&sdl), ETHER_ADDR_LEN); error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma); if (error) { printf("%s: ADDMULTI failed on %s\n", __func__, lgp->lp_ifp->if_xname); return (error); } lp = malloc(sizeof(struct lacp_port), M_DEVBUF, M_NOWAIT|M_ZERO); if (lp == NULL) return (ENOMEM); LACP_LOCK(lsc); lgp->lp_psc = lp; lp->lp_ifp = ifp; lp->lp_lagg = lgp; lp->lp_lsc = lsc; lp->lp_ifma = rifma; LIST_INSERT_HEAD(&lsc->lsc_ports, lp, lp_next); lacp_fill_actorinfo(lp, &lp->lp_actor); lacp_fill_markerinfo(lp, &lp->lp_marker); lp->lp_state = LACP_STATE_ACTIVITY; lp->lp_aggregator = NULL; lacp_sm_rx_set_expired(lp); LACP_UNLOCK(lsc); lacp_linkstate(lgp); return (0); } void lacp_port_destroy(struct lagg_port *lgp) { struct lacp_port *lp = LACP_PORT(lgp); struct lacp_softc *lsc = lp->lp_lsc; int i; LACP_LOCK(lsc); for (i = 0; i < LACP_NTIMER; i++) { LACP_TIMER_DISARM(lp, i); } lacp_disable_collecting(lp); lacp_disable_distributing(lp); lacp_unselect(lp); LIST_REMOVE(lp, lp_next); LACP_UNLOCK(lsc); /* The address may have already been removed by if_purgemaddrs() */ if (!lgp->lp_detaching) if_delmulti_ifma(lp->lp_ifma); free(lp, M_DEVBUF); } void lacp_req(struct lagg_softc *sc, void *data) { struct lacp_opreq *req = (struct lacp_opreq *)data; struct lacp_softc *lsc = LACP_SOFTC(sc); struct lacp_aggregator *la; bzero(req, sizeof(struct lacp_opreq)); /* * If the LACP softc is NULL, return with the opreq structure full of * zeros. It is normal for the softc to be NULL while the lagg is * being destroyed. */ if (NULL == lsc) return; la = lsc->lsc_active_aggregator; LACP_LOCK(lsc); if (la != NULL) { req->actor_prio = ntohs(la->la_actor.lip_systemid.lsi_prio); memcpy(&req->actor_mac, &la->la_actor.lip_systemid.lsi_mac, ETHER_ADDR_LEN); req->actor_key = ntohs(la->la_actor.lip_key); req->actor_portprio = ntohs(la->la_actor.lip_portid.lpi_prio); req->actor_portno = ntohs(la->la_actor.lip_portid.lpi_portno); req->actor_state = la->la_actor.lip_state; req->partner_prio = ntohs(la->la_partner.lip_systemid.lsi_prio); memcpy(&req->partner_mac, &la->la_partner.lip_systemid.lsi_mac, ETHER_ADDR_LEN); req->partner_key = ntohs(la->la_partner.lip_key); req->partner_portprio = ntohs(la->la_partner.lip_portid.lpi_prio); req->partner_portno = ntohs(la->la_partner.lip_portid.lpi_portno); req->partner_state = la->la_partner.lip_state; } LACP_UNLOCK(lsc); } void lacp_portreq(struct lagg_port *lgp, void *data) { struct lacp_opreq *req = (struct lacp_opreq *)data; struct lacp_port *lp = LACP_PORT(lgp); struct lacp_softc *lsc = lp->lp_lsc; LACP_LOCK(lsc); req->actor_prio = ntohs(lp->lp_actor.lip_systemid.lsi_prio); memcpy(&req->actor_mac, &lp->lp_actor.lip_systemid.lsi_mac, ETHER_ADDR_LEN); req->actor_key = ntohs(lp->lp_actor.lip_key); req->actor_portprio = ntohs(lp->lp_actor.lip_portid.lpi_prio); req->actor_portno = ntohs(lp->lp_actor.lip_portid.lpi_portno); req->actor_state = lp->lp_actor.lip_state; req->partner_prio = ntohs(lp->lp_partner.lip_systemid.lsi_prio); memcpy(&req->partner_mac, &lp->lp_partner.lip_systemid.lsi_mac, ETHER_ADDR_LEN); req->partner_key = ntohs(lp->lp_partner.lip_key); req->partner_portprio = ntohs(lp->lp_partner.lip_portid.lpi_prio); req->partner_portno = ntohs(lp->lp_partner.lip_portid.lpi_portno); req->partner_state = lp->lp_partner.lip_state; LACP_UNLOCK(lsc); } static void lacp_disable_collecting(struct lacp_port *lp) { LACP_DPRINTF((lp, "collecting disabled\n")); lp->lp_state &= ~LACP_STATE_COLLECTING; } static void lacp_enable_collecting(struct lacp_port *lp) { LACP_DPRINTF((lp, "collecting enabled\n")); lp->lp_state |= LACP_STATE_COLLECTING; } static void lacp_disable_distributing(struct lacp_port *lp) { struct lacp_aggregator *la = lp->lp_aggregator; struct lacp_softc *lsc = lp->lp_lsc; struct lagg_softc *sc = lsc->lsc_softc; char buf[LACP_LAGIDSTR_MAX+1]; LACP_LOCK_ASSERT(lsc); if (la == NULL || (lp->lp_state & LACP_STATE_DISTRIBUTING) == 0) { return; } KASSERT(!TAILQ_EMPTY(&la->la_ports), ("no aggregator ports")); KASSERT(la->la_nports > 0, ("nports invalid (%d)", la->la_nports)); KASSERT(la->la_refcnt >= la->la_nports, ("aggregator refcnt invalid")); LACP_DPRINTF((lp, "disable distributing on aggregator %s, " "nports %d -> %d\n", lacp_format_lagid_aggregator(la, buf, sizeof(buf)), la->la_nports, la->la_nports - 1)); TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q); la->la_nports--; sc->sc_active = la->la_nports; if (lsc->lsc_active_aggregator == la) { lacp_suppress_distributing(lsc, la); lacp_select_active_aggregator(lsc); /* regenerate the port map, the active aggregator has changed */ lacp_update_portmap(lsc); } lp->lp_state &= ~LACP_STATE_DISTRIBUTING; if_link_state_change(sc->sc_ifp, sc->sc_active ? LINK_STATE_UP : LINK_STATE_DOWN); } static void lacp_enable_distributing(struct lacp_port *lp) { struct lacp_aggregator *la = lp->lp_aggregator; struct lacp_softc *lsc = lp->lp_lsc; struct lagg_softc *sc = lsc->lsc_softc; char buf[LACP_LAGIDSTR_MAX+1]; LACP_LOCK_ASSERT(lsc); if ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0) { return; } LACP_DPRINTF((lp, "enable distributing on aggregator %s, " "nports %d -> %d\n", lacp_format_lagid_aggregator(la, buf, sizeof(buf)), la->la_nports, la->la_nports + 1)); KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid")); TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q); la->la_nports++; sc->sc_active = la->la_nports; lp->lp_state |= LACP_STATE_DISTRIBUTING; if (lsc->lsc_active_aggregator == la) { lacp_suppress_distributing(lsc, la); lacp_update_portmap(lsc); } else /* try to become the active aggregator */ lacp_select_active_aggregator(lsc); if_link_state_change(sc->sc_ifp, sc->sc_active ? LINK_STATE_UP : LINK_STATE_DOWN); } static void lacp_transit_expire(void *vp) { struct lacp_softc *lsc = vp; LACP_LOCK_ASSERT(lsc); CURVNET_SET(lsc->lsc_softc->sc_ifp->if_vnet); LACP_TRACE(NULL); CURVNET_RESTORE(); lsc->lsc_suppress_distributing = FALSE; } void lacp_attach(struct lagg_softc *sc) { struct lacp_softc *lsc; lsc = malloc(sizeof(struct lacp_softc), M_DEVBUF, M_WAITOK | M_ZERO); sc->sc_psc = lsc; lsc->lsc_softc = sc; lsc->lsc_hashkey = m_ether_tcpip_hash_init(); lsc->lsc_active_aggregator = NULL; lsc->lsc_strict_mode = VNET(lacp_default_strict_mode); LACP_LOCK_INIT(lsc); TAILQ_INIT(&lsc->lsc_aggregators); LIST_INIT(&lsc->lsc_ports); callout_init_mtx(&lsc->lsc_transit_callout, &lsc->lsc_mtx, 0); callout_init_mtx(&lsc->lsc_callout, &lsc->lsc_mtx, 0); /* if the lagg is already up then do the same */ if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) lacp_init(sc); } void lacp_detach(void *psc) { struct lacp_softc *lsc = (struct lacp_softc *)psc; KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators), ("aggregators still active")); KASSERT(lsc->lsc_active_aggregator == NULL, ("aggregator still attached")); callout_drain(&lsc->lsc_transit_callout); callout_drain(&lsc->lsc_callout); LACP_LOCK_DESTROY(lsc); free(lsc, M_DEVBUF); } void lacp_init(struct lagg_softc *sc) { struct lacp_softc *lsc = LACP_SOFTC(sc); LACP_LOCK(lsc); callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc); LACP_UNLOCK(lsc); } void lacp_stop(struct lagg_softc *sc) { struct lacp_softc *lsc = LACP_SOFTC(sc); LACP_LOCK(lsc); callout_stop(&lsc->lsc_transit_callout); callout_stop(&lsc->lsc_callout); LACP_UNLOCK(lsc); } struct lagg_port * lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t hash, uint8_t numa_domain, int *err) { struct lacp_softc *lsc = LACP_SOFTC(sc); struct lacp_portmap *pm; struct lacp_port *lp; struct lacp_port **map; int count; if (__predict_false(lsc->lsc_suppress_distributing)) { LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__)); *err = ENOBUFS; return (NULL); } pm = &lsc->lsc_pmap[lsc->lsc_activemap]; if (pm->pm_count == 0) { LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__)); *err = ENETDOWN; return (NULL); } #ifdef NUMA if ((sc->sc_opts & LAGG_OPT_USE_NUMA) && pm->pm_num_dom > 1 && numa_domain < MAXMEMDOM) { count = pm->pm_numa[numa_domain].count; if (count > 0) { map = pm->pm_numa[numa_domain].map; } else { /* No ports on this domain; use global hash. */ map = pm->pm_map; count = pm->pm_count; } } else #endif { map = pm->pm_map; count = pm->pm_count; } hash %= count; lp = map[hash]; KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0, ("aggregated port is not distributing")); return (lp->lp_lagg); } struct lagg_port * lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m, int *err) { struct lacp_softc *lsc = LACP_SOFTC(sc); uint32_t hash; uint8_t numa_domain; if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) hash = m->m_pkthdr.flowid >> sc->flowid_shift; else hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey); numa_domain = m->m_pkthdr.numa_domain; return (lacp_select_tx_port_by_hash(sc, hash, numa_domain, err)); } /* * lacp_suppress_distributing: drop transmit packets for a while * to preserve packet ordering. */ static void lacp_suppress_distributing(struct lacp_softc *lsc, struct lacp_aggregator *la) { struct lacp_port *lp; if (lsc->lsc_active_aggregator != la) { return; } LACP_TRACE(NULL); lsc->lsc_suppress_distributing = TRUE; /* send a marker frame down each port to verify the queues are empty */ LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) { lp->lp_flags |= LACP_PORT_MARK; if (lacp_xmit_marker(lp) != 0) lp->lp_flags &= ~LACP_PORT_MARK; } /* set a timeout for the marker frames */ callout_reset(&lsc->lsc_transit_callout, LACP_TRANSIT_DELAY * hz / 1000, lacp_transit_expire, lsc); } static int lacp_compare_peerinfo(const struct lacp_peerinfo *a, const struct lacp_peerinfo *b) { return (memcmp(a, b, offsetof(struct lacp_peerinfo, lip_state))); } static int lacp_compare_systemid(const struct lacp_systemid *a, const struct lacp_systemid *b) { return (memcmp(a, b, sizeof(*a))); } #if 0 /* unused */ static int lacp_compare_portid(const struct lacp_portid *a, const struct lacp_portid *b) { return (memcmp(a, b, sizeof(*a))); } #endif static uint64_t lacp_aggregator_bandwidth(struct lacp_aggregator *la) { struct lacp_port *lp; uint64_t speed; lp = TAILQ_FIRST(&la->la_ports); if (lp == NULL) { return (0); } speed = ifmedia_baudrate(lp->lp_media); speed *= la->la_nports; if (speed == 0) { LACP_DPRINTF((lp, "speed 0? media=0x%x nports=%d\n", lp->lp_media, la->la_nports)); } return (speed); } /* * lacp_select_active_aggregator: select an aggregator to be used to transmit * packets from lagg(4) interface. */ static void lacp_select_active_aggregator(struct lacp_softc *lsc) { struct lacp_aggregator *la; struct lacp_aggregator *best_la = NULL; uint64_t best_speed = 0; char buf[LACP_LAGIDSTR_MAX+1]; LACP_TRACE(NULL); TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) { uint64_t speed; if (la->la_nports == 0) { continue; } speed = lacp_aggregator_bandwidth(la); LACP_DPRINTF((NULL, "%s, speed=%jd, nports=%d\n", lacp_format_lagid_aggregator(la, buf, sizeof(buf)), speed, la->la_nports)); /* * This aggregator is chosen if the partner has a better * system priority or, the total aggregated speed is higher * or, it is already the chosen aggregator */ if ((best_la != NULL && LACP_SYS_PRI(la->la_partner) < LACP_SYS_PRI(best_la->la_partner)) || speed > best_speed || (speed == best_speed && la == lsc->lsc_active_aggregator)) { best_la = la; best_speed = speed; } } KASSERT(best_la == NULL || best_la->la_nports > 0, ("invalid aggregator refcnt")); KASSERT(best_la == NULL || !TAILQ_EMPTY(&best_la->la_ports), ("invalid aggregator list")); if (lsc->lsc_active_aggregator != best_la) { LACP_DPRINTF((NULL, "active aggregator changed\n")); LACP_DPRINTF((NULL, "old %s\n", lacp_format_lagid_aggregator(lsc->lsc_active_aggregator, buf, sizeof(buf)))); } else { LACP_DPRINTF((NULL, "active aggregator not changed\n")); } LACP_DPRINTF((NULL, "new %s\n", lacp_format_lagid_aggregator(best_la, buf, sizeof(buf)))); if (lsc->lsc_active_aggregator != best_la) { lsc->lsc_active_aggregator = best_la; lacp_update_portmap(lsc); if (best_la) { lacp_suppress_distributing(lsc, best_la); } } } /* * Updated the inactive portmap array with the new list of ports and * make it live. */ static void lacp_update_portmap(struct lacp_softc *lsc) { struct lagg_softc *sc = lsc->lsc_softc; struct lacp_aggregator *la; struct lacp_portmap *p; struct lacp_port *lp; uint64_t speed; u_int newmap; int i; #ifdef NUMA int count; uint8_t domain; #endif newmap = lsc->lsc_activemap == 0 ? 1 : 0; p = &lsc->lsc_pmap[newmap]; la = lsc->lsc_active_aggregator; speed = 0; bzero(p, sizeof(struct lacp_portmap)); if (la != NULL && la->la_nports > 0) { p->pm_count = la->la_nports; i = 0; TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q) { p->pm_map[i++] = lp; #ifdef NUMA domain = lp->lp_ifp->if_numa_domain; if (domain >= MAXMEMDOM) continue; count = p->pm_numa[domain].count; p->pm_numa[domain].map[count] = lp; p->pm_numa[domain].count++; #endif } KASSERT(i == p->pm_count, ("Invalid port count")); #ifdef NUMA for (i = 0; i < MAXMEMDOM; i++) { if (p->pm_numa[i].count != 0) p->pm_num_dom++; } #endif speed = lacp_aggregator_bandwidth(la); } sc->sc_ifp->if_baudrate = speed; + EVENTHANDLER_INVOKE(ifnet_event, sc->sc_ifp, + IFNET_EVENT_UPDATE_BAUDRATE); /* switch the active portmap over */ atomic_store_rel_int(&lsc->lsc_activemap, newmap); LACP_DPRINTF((NULL, "Set table %d with %d ports\n", lsc->lsc_activemap, lsc->lsc_pmap[lsc->lsc_activemap].pm_count)); } static uint16_t lacp_compose_key(struct lacp_port *lp) { struct lagg_port *lgp = lp->lp_lagg; struct lagg_softc *sc = lgp->lp_softc; u_int media = lp->lp_media; uint16_t key; if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) { /* * non-aggregatable links should have unique keys. * * XXX this isn't really unique as if_index is 16 bit. */ /* bit 0..14: (some bits of) if_index of this port */ key = lp->lp_ifp->if_index; /* bit 15: 1 */ key |= 0x8000; } else { u_int subtype = IFM_SUBTYPE(media); KASSERT(IFM_TYPE(media) == IFM_ETHER, ("invalid media type")); KASSERT((media & IFM_FDX) != 0, ("aggregating HDX interface")); /* bit 0..4: IFM_SUBTYPE modulo speed */ switch (subtype) { case IFM_10_T: case IFM_10_2: case IFM_10_5: case IFM_10_STP: case IFM_10_FL: key = IFM_10_T; break; case IFM_100_TX: case IFM_100_FX: case IFM_100_T4: case IFM_100_VG: case IFM_100_T2: case IFM_100_T: case IFM_100_SGMII: key = IFM_100_TX; break; case IFM_1000_SX: case IFM_1000_LX: case IFM_1000_CX: case IFM_1000_T: case IFM_1000_KX: case IFM_1000_SGMII: case IFM_1000_CX_SGMII: key = IFM_1000_SX; break; case IFM_10G_LR: case IFM_10G_SR: case IFM_10G_CX4: case IFM_10G_TWINAX: case IFM_10G_TWINAX_LONG: case IFM_10G_LRM: case IFM_10G_T: case IFM_10G_KX4: case IFM_10G_KR: case IFM_10G_CR1: case IFM_10G_ER: case IFM_10G_SFI: case IFM_10G_AOC: key = IFM_10G_LR; break; case IFM_20G_KR2: key = IFM_20G_KR2; break; case IFM_2500_KX: case IFM_2500_T: case IFM_2500_X: key = IFM_2500_KX; break; case IFM_5000_T: case IFM_5000_KR: case IFM_5000_KR_S: case IFM_5000_KR1: key = IFM_5000_T; break; case IFM_50G_PCIE: case IFM_50G_CR2: case IFM_50G_KR2: case IFM_50G_KR4: case IFM_50G_SR2: case IFM_50G_LR2: case IFM_50G_LAUI2_AC: case IFM_50G_LAUI2: case IFM_50G_AUI2_AC: case IFM_50G_AUI2: case IFM_50G_CP: case IFM_50G_SR: case IFM_50G_LR: case IFM_50G_FR: case IFM_50G_KR_PAM4: case IFM_50G_AUI1_AC: case IFM_50G_AUI1: key = IFM_50G_PCIE; break; case IFM_56G_R4: key = IFM_56G_R4; break; case IFM_25G_PCIE: case IFM_25G_CR: case IFM_25G_KR: case IFM_25G_SR: case IFM_25G_LR: case IFM_25G_ACC: case IFM_25G_AOC: case IFM_25G_T: case IFM_25G_CR_S: case IFM_25G_CR1: case IFM_25G_KR_S: case IFM_25G_AUI: case IFM_25G_KR1: key = IFM_25G_PCIE; break; case IFM_40G_CR4: case IFM_40G_SR4: case IFM_40G_LR4: case IFM_40G_LM4: case IFM_40G_XLPPI: case IFM_40G_KR4: case IFM_40G_XLAUI: case IFM_40G_XLAUI_AC: case IFM_40G_ER4: key = IFM_40G_CR4; break; case IFM_100G_CR4: case IFM_100G_SR4: case IFM_100G_KR4: case IFM_100G_LR4: case IFM_100G_CAUI4_AC: case IFM_100G_CAUI4: case IFM_100G_AUI4_AC: case IFM_100G_AUI4: case IFM_100G_CR_PAM4: case IFM_100G_KR_PAM4: case IFM_100G_CP2: case IFM_100G_SR2: case IFM_100G_DR: case IFM_100G_KR2_PAM4: case IFM_100G_CAUI2_AC: case IFM_100G_CAUI2: case IFM_100G_AUI2_AC: case IFM_100G_AUI2: key = IFM_100G_CR4; break; case IFM_200G_CR4_PAM4: case IFM_200G_SR4: case IFM_200G_FR4: case IFM_200G_LR4: case IFM_200G_DR4: case IFM_200G_KR4_PAM4: case IFM_200G_AUI4_AC: case IFM_200G_AUI4: case IFM_200G_AUI8_AC: case IFM_200G_AUI8: key = IFM_200G_CR4_PAM4; break; case IFM_400G_FR8: case IFM_400G_LR8: case IFM_400G_DR4: case IFM_400G_AUI8_AC: case IFM_400G_AUI8: key = IFM_400G_FR8; break; default: key = subtype; break; } /* bit 5..14: (some bits of) if_index of lagg device */ key |= 0x7fe0 & ((sc->sc_ifp->if_index) << 5); /* bit 15: 0 */ } return (htons(key)); } static void lacp_aggregator_addref(struct lacp_softc *lsc, struct lacp_aggregator *la) { char buf[LACP_LAGIDSTR_MAX+1]; LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n", __func__, lacp_format_lagid(&la->la_actor, &la->la_partner, buf, sizeof(buf)), la->la_refcnt, la->la_refcnt + 1)); KASSERT(la->la_refcnt > 0, ("refcount <= 0")); la->la_refcnt++; KASSERT(la->la_refcnt > la->la_nports, ("invalid refcount")); } static void lacp_aggregator_delref(struct lacp_softc *lsc, struct lacp_aggregator *la) { char buf[LACP_LAGIDSTR_MAX+1]; LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n", __func__, lacp_format_lagid(&la->la_actor, &la->la_partner, buf, sizeof(buf)), la->la_refcnt, la->la_refcnt - 1)); KASSERT(la->la_refcnt > la->la_nports, ("invalid refcnt")); la->la_refcnt--; if (la->la_refcnt > 0) { return; } KASSERT(la->la_refcnt == 0, ("refcount not zero")); KASSERT(lsc->lsc_active_aggregator != la, ("aggregator active")); TAILQ_REMOVE(&lsc->lsc_aggregators, la, la_q); free(la, M_DEVBUF); } /* * lacp_aggregator_get: allocate an aggregator. */ static struct lacp_aggregator * lacp_aggregator_get(struct lacp_softc *lsc, struct lacp_port *lp) { struct lacp_aggregator *la; la = malloc(sizeof(*la), M_DEVBUF, M_NOWAIT); if (la) { la->la_refcnt = 1; la->la_nports = 0; TAILQ_INIT(&la->la_ports); la->la_pending = 0; TAILQ_INSERT_TAIL(&lsc->lsc_aggregators, la, la_q); } return (la); } /* * lacp_fill_aggregator_id: setup a newly allocated aggregator from a port. */ static void lacp_fill_aggregator_id(struct lacp_aggregator *la, const struct lacp_port *lp) { lacp_fill_aggregator_id_peer(&la->la_partner, &lp->lp_partner); lacp_fill_aggregator_id_peer(&la->la_actor, &lp->lp_actor); la->la_actor.lip_state = lp->lp_state & LACP_STATE_AGGREGATION; } static void lacp_fill_aggregator_id_peer(struct lacp_peerinfo *lpi_aggr, const struct lacp_peerinfo *lpi_port) { memset(lpi_aggr, 0, sizeof(*lpi_aggr)); lpi_aggr->lip_systemid = lpi_port->lip_systemid; lpi_aggr->lip_key = lpi_port->lip_key; } /* * lacp_aggregator_is_compatible: check if a port can join to an aggregator. */ static int lacp_aggregator_is_compatible(const struct lacp_aggregator *la, const struct lacp_port *lp) { if (!(lp->lp_state & LACP_STATE_AGGREGATION) || !(lp->lp_partner.lip_state & LACP_STATE_AGGREGATION)) { return (0); } if (!(la->la_actor.lip_state & LACP_STATE_AGGREGATION)) { return (0); } if (!lacp_peerinfo_is_compatible(&la->la_partner, &lp->lp_partner)) { return (0); } if (!lacp_peerinfo_is_compatible(&la->la_actor, &lp->lp_actor)) { return (0); } return (1); } static int lacp_peerinfo_is_compatible(const struct lacp_peerinfo *a, const struct lacp_peerinfo *b) { if (memcmp(&a->lip_systemid, &b->lip_systemid, sizeof(a->lip_systemid))) { return (0); } if (memcmp(&a->lip_key, &b->lip_key, sizeof(a->lip_key))) { return (0); } return (1); } static void lacp_port_enable(struct lacp_port *lp) { lp->lp_state |= LACP_STATE_AGGREGATION; } static void lacp_port_disable(struct lacp_port *lp) { lacp_set_mux(lp, LACP_MUX_DETACHED); lp->lp_state &= ~LACP_STATE_AGGREGATION; lp->lp_selected = LACP_UNSELECTED; lacp_sm_rx_record_default(lp); lp->lp_partner.lip_state &= ~LACP_STATE_AGGREGATION; lp->lp_state &= ~LACP_STATE_EXPIRED; } /* * lacp_select: select an aggregator. create one if necessary. */ static void lacp_select(struct lacp_port *lp) { struct lacp_softc *lsc = lp->lp_lsc; struct lacp_aggregator *la; char buf[LACP_LAGIDSTR_MAX+1]; if (lp->lp_aggregator) { return; } /* If we haven't heard from our peer, skip this step. */ if (lp->lp_state & LACP_STATE_DEFAULTED) return; KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE), ("timer_wait_while still active")); LACP_DPRINTF((lp, "port lagid=%s\n", lacp_format_lagid(&lp->lp_actor, &lp->lp_partner, buf, sizeof(buf)))); TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) { if (lacp_aggregator_is_compatible(la, lp)) { break; } } if (la == NULL) { la = lacp_aggregator_get(lsc, lp); if (la == NULL) { LACP_DPRINTF((lp, "aggregator creation failed\n")); /* * will retry on the next tick. */ return; } lacp_fill_aggregator_id(la, lp); LACP_DPRINTF((lp, "aggregator created\n")); } else { LACP_DPRINTF((lp, "compatible aggregator found\n")); if (la->la_refcnt == LACP_MAX_PORTS) return; lacp_aggregator_addref(lsc, la); } LACP_DPRINTF((lp, "aggregator lagid=%s\n", lacp_format_lagid(&la->la_actor, &la->la_partner, buf, sizeof(buf)))); lp->lp_aggregator = la; lp->lp_selected = LACP_SELECTED; } /* * lacp_unselect: finish unselect/detach process. */ static void lacp_unselect(struct lacp_port *lp) { struct lacp_softc *lsc = lp->lp_lsc; struct lacp_aggregator *la = lp->lp_aggregator; KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE), ("timer_wait_while still active")); if (la == NULL) { return; } lp->lp_aggregator = NULL; lacp_aggregator_delref(lsc, la); } /* mux machine */ static void lacp_sm_mux(struct lacp_port *lp) { struct lagg_port *lgp = lp->lp_lagg; struct lagg_softc *sc = lgp->lp_softc; enum lacp_mux_state new_state; boolean_t p_sync = (lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0; boolean_t p_collecting = (lp->lp_partner.lip_state & LACP_STATE_COLLECTING) != 0; enum lacp_selected selected = lp->lp_selected; struct lacp_aggregator *la; if (V_lacp_debug > 1) lacp_dprintf(lp, "%s: state= 0x%x, selected= 0x%x, " "p_sync= 0x%x, p_collecting= 0x%x\n", __func__, lp->lp_mux_state, selected, p_sync, p_collecting); re_eval: la = lp->lp_aggregator; KASSERT(lp->lp_mux_state == LACP_MUX_DETACHED || la != NULL, ("MUX not detached")); new_state = lp->lp_mux_state; switch (lp->lp_mux_state) { case LACP_MUX_DETACHED: if (selected != LACP_UNSELECTED) { new_state = LACP_MUX_WAITING; } break; case LACP_MUX_WAITING: KASSERT(la->la_pending > 0 || !LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE), ("timer_wait_while still active")); if (selected == LACP_SELECTED && la->la_pending == 0) { new_state = LACP_MUX_ATTACHED; } else if (selected == LACP_UNSELECTED) { new_state = LACP_MUX_DETACHED; } break; case LACP_MUX_ATTACHED: if (selected == LACP_SELECTED && p_sync) { new_state = LACP_MUX_COLLECTING; } else if (selected != LACP_SELECTED) { new_state = LACP_MUX_DETACHED; } break; case LACP_MUX_COLLECTING: if (selected == LACP_SELECTED && p_sync && p_collecting) { new_state = LACP_MUX_DISTRIBUTING; } else if (selected != LACP_SELECTED || !p_sync) { new_state = LACP_MUX_ATTACHED; } break; case LACP_MUX_DISTRIBUTING: if (selected != LACP_SELECTED || !p_sync || !p_collecting) { new_state = LACP_MUX_COLLECTING; lacp_dprintf(lp, "Interface stopped DISTRIBUTING, possible flapping\n"); sc->sc_flapping++; } break; default: panic("%s: unknown state", __func__); } if (lp->lp_mux_state == new_state) { return; } lacp_set_mux(lp, new_state); goto re_eval; } static void lacp_set_mux(struct lacp_port *lp, enum lacp_mux_state new_state) { struct lacp_aggregator *la = lp->lp_aggregator; if (lp->lp_mux_state == new_state) { return; } switch (new_state) { case LACP_MUX_DETACHED: lp->lp_state &= ~LACP_STATE_SYNC; lacp_disable_distributing(lp); lacp_disable_collecting(lp); lacp_sm_assert_ntt(lp); /* cancel timer */ if (LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE)) { KASSERT(la->la_pending > 0, ("timer_wait_while not active")); la->la_pending--; } LACP_TIMER_DISARM(lp, LACP_TIMER_WAIT_WHILE); lacp_unselect(lp); break; case LACP_MUX_WAITING: LACP_TIMER_ARM(lp, LACP_TIMER_WAIT_WHILE, LACP_AGGREGATE_WAIT_TIME); la->la_pending++; break; case LACP_MUX_ATTACHED: lp->lp_state |= LACP_STATE_SYNC; lacp_disable_collecting(lp); lacp_sm_assert_ntt(lp); break; case LACP_MUX_COLLECTING: lacp_enable_collecting(lp); lacp_disable_distributing(lp); lacp_sm_assert_ntt(lp); break; case LACP_MUX_DISTRIBUTING: lacp_enable_distributing(lp); break; default: panic("%s: unknown state", __func__); } LACP_DPRINTF((lp, "mux_state %d -> %d\n", lp->lp_mux_state, new_state)); lp->lp_mux_state = new_state; } static void lacp_sm_mux_timer(struct lacp_port *lp) { struct lacp_aggregator *la = lp->lp_aggregator; char buf[LACP_LAGIDSTR_MAX+1]; KASSERT(la->la_pending > 0, ("no pending event")); LACP_DPRINTF((lp, "%s: aggregator %s, pending %d -> %d\n", __func__, lacp_format_lagid(&la->la_actor, &la->la_partner, buf, sizeof(buf)), la->la_pending, la->la_pending - 1)); la->la_pending--; } /* periodic transmit machine */ static void lacp_sm_ptx_update_timeout(struct lacp_port *lp, uint8_t oldpstate) { if (LACP_STATE_EQ(oldpstate, lp->lp_partner.lip_state, LACP_STATE_TIMEOUT)) { return; } LACP_DPRINTF((lp, "partner timeout changed\n")); /* * FAST_PERIODIC -> SLOW_PERIODIC * or * SLOW_PERIODIC (-> PERIODIC_TX) -> FAST_PERIODIC * * let lacp_sm_ptx_tx_schedule to update timeout. */ LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC); /* * if timeout has been shortened, assert NTT. */ if ((lp->lp_partner.lip_state & LACP_STATE_TIMEOUT)) { lacp_sm_assert_ntt(lp); } } static void lacp_sm_ptx_tx_schedule(struct lacp_port *lp) { int timeout; if (!(lp->lp_state & LACP_STATE_ACTIVITY) && !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) { /* * NO_PERIODIC */ LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC); return; } if (LACP_TIMER_ISARMED(lp, LACP_TIMER_PERIODIC)) { return; } timeout = (lp->lp_partner.lip_state & LACP_STATE_TIMEOUT) ? LACP_FAST_PERIODIC_TIME : LACP_SLOW_PERIODIC_TIME; LACP_TIMER_ARM(lp, LACP_TIMER_PERIODIC, timeout); } static void lacp_sm_ptx_timer(struct lacp_port *lp) { lacp_sm_assert_ntt(lp); } static void lacp_sm_rx(struct lacp_port *lp, const struct lacpdu *du) { int timeout; /* * check LACP_DISABLED first */ if (!(lp->lp_state & LACP_STATE_AGGREGATION)) { return; } /* * check loopback condition. */ if (!lacp_compare_systemid(&du->ldu_actor.lip_systemid, &lp->lp_actor.lip_systemid)) { return; } /* * EXPIRED, DEFAULTED, CURRENT -> CURRENT */ microuptime(&lp->lp_last_lacpdu_rx); lacp_sm_rx_update_selected(lp, du); lacp_sm_rx_update_ntt(lp, du); lacp_sm_rx_record_pdu(lp, du); timeout = (lp->lp_state & LACP_STATE_TIMEOUT) ? LACP_SHORT_TIMEOUT_TIME : LACP_LONG_TIMEOUT_TIME; LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, timeout); lp->lp_state &= ~LACP_STATE_EXPIRED; /* * kick transmit machine without waiting the next tick. */ lacp_sm_tx(lp); } static void lacp_sm_rx_set_expired(struct lacp_port *lp) { lp->lp_partner.lip_state &= ~LACP_STATE_SYNC; lp->lp_partner.lip_state |= LACP_STATE_TIMEOUT; LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, LACP_SHORT_TIMEOUT_TIME); lp->lp_state |= LACP_STATE_EXPIRED; } static void lacp_sm_rx_timer(struct lacp_port *lp) { if ((lp->lp_state & LACP_STATE_EXPIRED) == 0) { /* CURRENT -> EXPIRED */ LACP_DPRINTF((lp, "%s: CURRENT -> EXPIRED\n", __func__)); lacp_sm_rx_set_expired(lp); } else { /* EXPIRED -> DEFAULTED */ LACP_DPRINTF((lp, "%s: EXPIRED -> DEFAULTED\n", __func__)); lacp_sm_rx_update_default_selected(lp); lacp_sm_rx_record_default(lp); lp->lp_state &= ~LACP_STATE_EXPIRED; } } static void lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du) { boolean_t active; uint8_t oldpstate; char buf[LACP_STATESTR_MAX+1]; LACP_TRACE(lp); oldpstate = lp->lp_partner.lip_state; active = (du->ldu_actor.lip_state & LACP_STATE_ACTIVITY) || ((lp->lp_state & LACP_STATE_ACTIVITY) && (du->ldu_partner.lip_state & LACP_STATE_ACTIVITY)); lp->lp_partner = du->ldu_actor; if (active && ((LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state, LACP_STATE_AGGREGATION) && !lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner)) || (du->ldu_partner.lip_state & LACP_STATE_AGGREGATION) == 0)) { /* * XXX Maintain legacy behavior of leaving the * LACP_STATE_SYNC bit unchanged from the partner's * advertisement if lsc_strict_mode is false. * TODO: We should re-examine the concept of the "strict mode" * to ensure it makes sense to maintain a non-strict mode. */ if (lp->lp_lsc->lsc_strict_mode) lp->lp_partner.lip_state |= LACP_STATE_SYNC; } else { lp->lp_partner.lip_state &= ~LACP_STATE_SYNC; } lp->lp_state &= ~LACP_STATE_DEFAULTED; if (oldpstate != lp->lp_partner.lip_state) { LACP_DPRINTF((lp, "old pstate %s\n", lacp_format_state(oldpstate, buf, sizeof(buf)))); LACP_DPRINTF((lp, "new pstate %s\n", lacp_format_state(lp->lp_partner.lip_state, buf, sizeof(buf)))); } lacp_sm_ptx_update_timeout(lp, oldpstate); } static void lacp_sm_rx_update_ntt(struct lacp_port *lp, const struct lacpdu *du) { LACP_TRACE(lp); if (lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner) || !LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state, LACP_STATE_ACTIVITY | LACP_STATE_SYNC | LACP_STATE_AGGREGATION)) { LACP_DPRINTF((lp, "%s: assert ntt\n", __func__)); lacp_sm_assert_ntt(lp); } } static void lacp_sm_rx_record_default(struct lacp_port *lp) { uint8_t oldpstate; LACP_TRACE(lp); oldpstate = lp->lp_partner.lip_state; if (lp->lp_lsc->lsc_strict_mode) lp->lp_partner = lacp_partner_admin_strict; else lp->lp_partner = lacp_partner_admin_optimistic; lp->lp_state |= LACP_STATE_DEFAULTED; lacp_sm_ptx_update_timeout(lp, oldpstate); } static void lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *lp, const struct lacp_peerinfo *info) { LACP_TRACE(lp); if (lacp_compare_peerinfo(&lp->lp_partner, info) || !LACP_STATE_EQ(lp->lp_partner.lip_state, info->lip_state, LACP_STATE_AGGREGATION)) { lp->lp_selected = LACP_UNSELECTED; /* mux machine will clean up lp->lp_aggregator */ } } static void lacp_sm_rx_update_selected(struct lacp_port *lp, const struct lacpdu *du) { LACP_TRACE(lp); lacp_sm_rx_update_selected_from_peerinfo(lp, &du->ldu_actor); } static void lacp_sm_rx_update_default_selected(struct lacp_port *lp) { LACP_TRACE(lp); if (lp->lp_lsc->lsc_strict_mode) lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin_strict); else lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin_optimistic); } /* transmit machine */ static void lacp_sm_tx(struct lacp_port *lp) { int error = 0; if (!(lp->lp_state & LACP_STATE_AGGREGATION) #if 1 || (!(lp->lp_state & LACP_STATE_ACTIVITY) && !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) #endif ) { lp->lp_flags &= ~LACP_PORT_NTT; } if (!(lp->lp_flags & LACP_PORT_NTT)) { return; } /* Rate limit to 3 PDUs per LACP_FAST_PERIODIC_TIME */ if (ppsratecheck(&lp->lp_last_lacpdu, &lp->lp_lacpdu_sent, (3 / LACP_FAST_PERIODIC_TIME)) == 0) { LACP_DPRINTF((lp, "rate limited pdu\n")); return; } if (((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_tx_test) == 0) { error = lacp_xmit_lacpdu(lp); } else { LACP_TPRINTF((lp, "Dropping TX PDU\n")); } if (error == 0) { lp->lp_flags &= ~LACP_PORT_NTT; } else { LACP_DPRINTF((lp, "lacpdu transmit failure, error %d\n", error)); } } static void lacp_sm_assert_ntt(struct lacp_port *lp) { lp->lp_flags |= LACP_PORT_NTT; } static void lacp_run_timers(struct lacp_port *lp) { int i; struct timeval time_diff; for (i = 0; i < LACP_NTIMER; i++) { KASSERT(lp->lp_timer[i] >= 0, ("invalid timer value %d", lp->lp_timer[i])); if (lp->lp_timer[i] == 0) { continue; } else { if (i == LACP_TIMER_CURRENT_WHILE) { microuptime(&time_diff); timevalsub(&time_diff, &lp->lp_last_lacpdu_rx); if (time_diff.tv_sec) { /* At least one sec has elapsed since last LACP packet. */ --lp->lp_timer[i]; } } else { --lp->lp_timer[i]; } if ((lp->lp_timer[i] <= 0) && (lacp_timer_funcs[i])) { (*lacp_timer_funcs[i])(lp); } } } } int lacp_marker_input(struct lacp_port *lp, struct mbuf *m) { struct lacp_softc *lsc = lp->lp_lsc; struct lagg_port *lgp = lp->lp_lagg; struct lacp_port *lp2; struct markerdu *mdu; int error = 0; int pending = 0; if (m->m_pkthdr.len != sizeof(*mdu)) { goto bad; } if ((m->m_flags & M_MCAST) == 0) { goto bad; } if (m->m_len < sizeof(*mdu)) { m = m_pullup(m, sizeof(*mdu)); if (m == NULL) { return (ENOMEM); } } mdu = mtod(m, struct markerdu *); if (memcmp(&mdu->mdu_eh.ether_dhost, ðermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) { goto bad; } if (mdu->mdu_sph.sph_version != 1) { goto bad; } switch (mdu->mdu_tlv.tlv_type) { case MARKER_TYPE_INFO: if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv, marker_info_tlv_template, TRUE)) { goto bad; } mdu->mdu_tlv.tlv_type = MARKER_TYPE_RESPONSE; memcpy(&mdu->mdu_eh.ether_dhost, ðermulticastaddr_slowprotocols, ETHER_ADDR_LEN); memcpy(&mdu->mdu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN); error = lagg_enqueue(lp->lp_ifp, m); break; case MARKER_TYPE_RESPONSE: if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv, marker_response_tlv_template, TRUE)) { goto bad; } LACP_DPRINTF((lp, "marker response, port=%u, sys=%6D, id=%u\n", ntohs(mdu->mdu_info.mi_rq_port), mdu->mdu_info.mi_rq_system, ":", ntohl(mdu->mdu_info.mi_rq_xid))); /* Verify that it is the last marker we sent out */ if (memcmp(&mdu->mdu_info, &lp->lp_marker, sizeof(struct lacp_markerinfo))) goto bad; LACP_LOCK(lsc); lp->lp_flags &= ~LACP_PORT_MARK; if (lsc->lsc_suppress_distributing) { /* Check if any ports are waiting for a response */ LIST_FOREACH(lp2, &lsc->lsc_ports, lp_next) { if (lp2->lp_flags & LACP_PORT_MARK) { pending = 1; break; } } if (pending == 0) { /* All interface queues are clear */ LACP_DPRINTF((NULL, "queue flush complete\n")); lsc->lsc_suppress_distributing = FALSE; } } LACP_UNLOCK(lsc); m_freem(m); break; default: goto bad; } return (error); bad: LACP_DPRINTF((lp, "bad marker frame\n")); m_freem(m); return (EINVAL); } static int tlv_check(const void *p, size_t size, const struct tlvhdr *tlv, const struct tlv_template *tmpl, boolean_t check_type) { while (/* CONSTCOND */ 1) { if ((const char *)tlv - (const char *)p + sizeof(*tlv) > size) { return (EINVAL); } if ((check_type && tlv->tlv_type != tmpl->tmpl_type) || tlv->tlv_length != tmpl->tmpl_length) { return (EINVAL); } if (tmpl->tmpl_type == 0) { break; } tlv = (const struct tlvhdr *) ((const char *)tlv + tlv->tlv_length); tmpl++; } return (0); } /* Debugging */ const char * lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen) { snprintf(buf, buflen, "%02X-%02X-%02X-%02X-%02X-%02X", (int)mac[0], (int)mac[1], (int)mac[2], (int)mac[3], (int)mac[4], (int)mac[5]); return (buf); } const char * lacp_format_systemid(const struct lacp_systemid *sysid, char *buf, size_t buflen) { char macbuf[LACP_MACSTR_MAX+1]; snprintf(buf, buflen, "%04X,%s", ntohs(sysid->lsi_prio), lacp_format_mac(sysid->lsi_mac, macbuf, sizeof(macbuf))); return (buf); } const char * lacp_format_portid(const struct lacp_portid *portid, char *buf, size_t buflen) { snprintf(buf, buflen, "%04X,%04X", ntohs(portid->lpi_prio), ntohs(portid->lpi_portno)); return (buf); } const char * lacp_format_partner(const struct lacp_peerinfo *peer, char *buf, size_t buflen) { char sysid[LACP_SYSTEMIDSTR_MAX+1]; char portid[LACP_PORTIDSTR_MAX+1]; snprintf(buf, buflen, "(%s,%04X,%s)", lacp_format_systemid(&peer->lip_systemid, sysid, sizeof(sysid)), ntohs(peer->lip_key), lacp_format_portid(&peer->lip_portid, portid, sizeof(portid))); return (buf); } const char * lacp_format_lagid(const struct lacp_peerinfo *a, const struct lacp_peerinfo *b, char *buf, size_t buflen) { char astr[LACP_PARTNERSTR_MAX+1]; char bstr[LACP_PARTNERSTR_MAX+1]; #if 0 /* * there's a convention to display small numbered peer * in the left. */ if (lacp_compare_peerinfo(a, b) > 0) { const struct lacp_peerinfo *t; t = a; a = b; b = t; } #endif snprintf(buf, buflen, "[%s,%s]", lacp_format_partner(a, astr, sizeof(astr)), lacp_format_partner(b, bstr, sizeof(bstr))); return (buf); } const char * lacp_format_lagid_aggregator(const struct lacp_aggregator *la, char *buf, size_t buflen) { if (la == NULL) { return ("(none)"); } return (lacp_format_lagid(&la->la_actor, &la->la_partner, buf, buflen)); } const char * lacp_format_state(uint8_t state, char *buf, size_t buflen) { snprintf(buf, buflen, "%b", state, LACP_STATE_BITS); return (buf); } static void lacp_dump_lacpdu(const struct lacpdu *du) { char buf[LACP_PARTNERSTR_MAX+1]; char buf2[LACP_STATESTR_MAX+1]; printf("actor=%s\n", lacp_format_partner(&du->ldu_actor, buf, sizeof(buf))); printf("actor.state=%s\n", lacp_format_state(du->ldu_actor.lip_state, buf2, sizeof(buf2))); printf("partner=%s\n", lacp_format_partner(&du->ldu_partner, buf, sizeof(buf))); printf("partner.state=%s\n", lacp_format_state(du->ldu_partner.lip_state, buf2, sizeof(buf2))); printf("maxdelay=%d\n", ntohs(du->ldu_collector.lci_maxdelay)); } static void lacp_dprintf(const struct lacp_port *lp, const char *fmt, ...) { va_list va; if (lp) { printf("%s: ", lp->lp_ifp->if_xname); } va_start(va, fmt); vprintf(fmt, va); va_end(va); } diff --git a/sys/net/if_var.h b/sys/net/if_var.h index e054c613e9e6..2c09795c6b36 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -1,823 +1,824 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)if.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NET_IF_VAR_H_ #define _NET_IF_VAR_H_ /* * Structures defining a network interface, providing a packet * transport mechanism (ala level 0 of the PUP protocols). * * Each interface accepts output datagrams of a specified maximum * length, and provides higher level routines with input datagrams * received from its medium. * * Output occurs when the routine if_output is called, with three parameters: * (*ifp->if_output)(ifp, m, dst, ro) * Here m is the mbuf chain to be sent and dst is the destination address. * The output routine encapsulates the supplied datagram if necessary, * and then transmits it on its medium. * * On input, each interface unwraps the data received by it, and either * places it on the input queue of an internetwork datagram routine * and posts the associated software interrupt, or passes the datagram to a raw * packet input routine. * * Routines exist for locating interfaces by their addresses * or for locating an interface on a certain network, as well as more general * routing and gateway routines maintaining information used to locate * interfaces. These routines live in the files if.c and route.c */ struct rtentry; /* ifa_rtrequest */ struct socket; struct carp_if; struct carp_softc; struct ifvlantrunk; struct route; /* if_output */ struct vnet; struct ifmedia; struct netmap_adapter; struct debugnet_methods; #ifdef _KERNEL #include #include /* ifqueue only? */ #include #include #endif /* _KERNEL */ #include #include #include #include /* XXX */ #include /* struct ifqueue */ #include /* XXX */ #include /* XXX */ #include /* if_link_task */ #define IF_DUNIT_NONE -1 #include CK_STAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */ CK_STAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */ CK_STAILQ_HEAD(ifmultihead, ifmultiaddr); CK_STAILQ_HEAD(ifgrouphead, ifg_group); #ifdef _KERNEL VNET_DECLARE(struct pfil_head *, link_pfil_head); #define V_link_pfil_head VNET(link_pfil_head) #define PFIL_ETHER_NAME "ethernet" #define HHOOK_IPSEC_INET 0 #define HHOOK_IPSEC_INET6 1 #define HHOOK_IPSEC_COUNT 2 VNET_DECLARE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]); VNET_DECLARE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]); #define V_ipsec_hhh_in VNET(ipsec_hhh_in) #define V_ipsec_hhh_out VNET(ipsec_hhh_out) #endif /* _KERNEL */ typedef enum { IFCOUNTER_IPACKETS = 0, IFCOUNTER_IERRORS, IFCOUNTER_OPACKETS, IFCOUNTER_OERRORS, IFCOUNTER_COLLISIONS, IFCOUNTER_IBYTES, IFCOUNTER_OBYTES, IFCOUNTER_IMCASTS, IFCOUNTER_OMCASTS, IFCOUNTER_IQDROPS, IFCOUNTER_OQDROPS, IFCOUNTER_NOPROTO, IFCOUNTERS /* Array size. */ } ift_counter; typedef struct ifnet * if_t; typedef void (*if_start_fn_t)(if_t); typedef int (*if_ioctl_fn_t)(if_t, u_long, caddr_t); typedef void (*if_init_fn_t)(void *); typedef void (*if_qflush_fn_t)(if_t); typedef int (*if_transmit_fn_t)(if_t, struct mbuf *); typedef uint64_t (*if_get_counter_t)(if_t, ift_counter); struct ifnet_hw_tsomax { u_int tsomaxbytes; /* TSO total burst length limit in bytes */ u_int tsomaxsegcount; /* TSO maximum segment count */ u_int tsomaxsegsize; /* TSO maximum segment size in bytes */ }; /* Interface encap request types */ typedef enum { IFENCAP_LL = 1 /* pre-calculate link-layer header */ } ife_type; /* * The structure below allows to request various pre-calculated L2/L3 headers * for different media. Requests varies by type (rtype field). * * IFENCAP_LL type: pre-calculates link header based on address family * and destination lladdr. * * Input data fields: * buf: pointer to destination buffer * bufsize: buffer size * flags: IFENCAP_FLAG_BROADCAST if destination is broadcast * family: address family defined by AF_ constant. * lladdr: pointer to link-layer address * lladdr_len: length of link-layer address * hdata: pointer to L3 header (optional, used for ARP requests). * Output data fields: * buf: encap data is stored here * bufsize: resulting encap length is stored here * lladdr_off: offset of link-layer address from encap hdr start * hdata: L3 header may be altered if necessary */ struct if_encap_req { u_char *buf; /* Destination buffer (w) */ size_t bufsize; /* size of provided buffer (r) */ ife_type rtype; /* request type (r) */ uint32_t flags; /* Request flags (r) */ int family; /* Address family AF_* (r) */ int lladdr_off; /* offset from header start (w) */ int lladdr_len; /* lladdr length (r) */ char *lladdr; /* link-level address pointer (r) */ char *hdata; /* Upper layer header data (rw) */ }; #define IFENCAP_FLAG_BROADCAST 0x02 /* Destination is broadcast */ /* * Network interface send tag support. The storage of "struct * m_snd_tag" comes from the network driver and it is free to allocate * as much additional space as it wants for its own use. */ struct ktls_session; struct m_snd_tag; #define IF_SND_TAG_TYPE_RATE_LIMIT 0 #define IF_SND_TAG_TYPE_UNLIMITED 1 #define IF_SND_TAG_TYPE_TLS 2 #define IF_SND_TAG_TYPE_TLS_RATE_LIMIT 3 #define IF_SND_TAG_TYPE_TLS_RX 4 #define IF_SND_TAG_TYPE_MAX 5 struct if_snd_tag_alloc_header { uint32_t type; /* send tag type, see IF_SND_TAG_XXX */ uint32_t flowid; /* mbuf hash value */ uint32_t flowtype; /* mbuf hash type */ uint8_t numa_domain; /* numa domain of associated inp */ }; struct if_snd_tag_alloc_rate_limit { struct if_snd_tag_alloc_header hdr; uint64_t max_rate; /* in bytes/s */ uint32_t flags; /* M_NOWAIT or M_WAITOK */ uint32_t reserved; /* alignment */ }; struct if_snd_tag_alloc_tls { struct if_snd_tag_alloc_header hdr; struct inpcb *inp; const struct ktls_session *tls; }; struct if_snd_tag_alloc_tls_rx { struct if_snd_tag_alloc_header hdr; struct inpcb *inp; const struct ktls_session *tls; uint16_t vlan_id; /* valid if non-zero */ }; struct if_snd_tag_alloc_tls_rate_limit { struct if_snd_tag_alloc_header hdr; struct inpcb *inp; const struct ktls_session *tls; uint64_t max_rate; /* in bytes/s */ }; struct if_snd_tag_rate_limit_params { uint64_t max_rate; /* in bytes/s */ uint32_t queue_level; /* 0 (empty) .. 65535 (full) */ #define IF_SND_QUEUE_LEVEL_MIN 0 #define IF_SND_QUEUE_LEVEL_MAX 65535 uint32_t flags; /* M_NOWAIT or M_WAITOK */ }; struct if_snd_tag_modify_tls_rx { /* TCP sequence number of TLS header in host endian format */ uint32_t tls_hdr_tcp_sn; /* * TLS record length, including all headers, data and trailers. * If the tls_rec_length is zero, it means HW encryption resumed. */ uint32_t tls_rec_length; /* TLS sequence number in host endian format */ uint64_t tls_seq_number; }; union if_snd_tag_alloc_params { struct if_snd_tag_alloc_header hdr; struct if_snd_tag_alloc_rate_limit rate_limit; struct if_snd_tag_alloc_rate_limit unlimited; struct if_snd_tag_alloc_tls tls; struct if_snd_tag_alloc_tls_rx tls_rx; struct if_snd_tag_alloc_tls_rate_limit tls_rate_limit; }; union if_snd_tag_modify_params { struct if_snd_tag_rate_limit_params rate_limit; struct if_snd_tag_rate_limit_params unlimited; struct if_snd_tag_rate_limit_params tls_rate_limit; struct if_snd_tag_modify_tls_rx tls_rx; }; union if_snd_tag_query_params { struct if_snd_tag_rate_limit_params rate_limit; struct if_snd_tag_rate_limit_params unlimited; struct if_snd_tag_rate_limit_params tls_rate_limit; }; typedef int (if_snd_tag_alloc_t)(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); typedef int (if_snd_tag_modify_t)(struct m_snd_tag *, union if_snd_tag_modify_params *); typedef int (if_snd_tag_query_t)(struct m_snd_tag *, union if_snd_tag_query_params *); typedef void (if_snd_tag_free_t)(struct m_snd_tag *); typedef struct m_snd_tag *(if_next_send_tag_t)(struct m_snd_tag *); struct if_snd_tag_sw { if_snd_tag_modify_t *snd_tag_modify; if_snd_tag_query_t *snd_tag_query; if_snd_tag_free_t *snd_tag_free; if_next_send_tag_t *next_snd_tag; u_int type; /* One of IF_SND_TAG_TYPE_*. */ }; /* Query return flags */ #define RT_NOSUPPORT 0x00000000 /* Not supported */ #define RT_IS_INDIRECT 0x00000001 /* * Interface like a lagg, select * the actual interface for * capabilities. */ #define RT_IS_SELECTABLE 0x00000002 /* * No rate table, you select * rates and the first * number_of_rates are created. */ #define RT_IS_FIXED_TABLE 0x00000004 /* A fixed table is attached */ #define RT_IS_UNUSABLE 0x00000008 /* It is not usable for this */ #define RT_IS_SETUP_REQ 0x00000010 /* The interface setup must be called before use */ struct if_ratelimit_query_results { const uint64_t *rate_table; /* Pointer to table if present */ uint32_t flags; /* Flags indicating results */ uint32_t max_flows; /* Max flows using, 0=unlimited */ uint32_t number_of_rates; /* How many unique rates can be created */ uint32_t min_segment_burst; /* The amount the adapter bursts at each send */ }; typedef void (if_ratelimit_query_t)(struct ifnet *, struct if_ratelimit_query_results *); typedef int (if_ratelimit_setup_t)(struct ifnet *, uint64_t, uint32_t); /* * Structure defining a network interface. */ struct ifnet { /* General book keeping of interface lists. */ CK_STAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained (CK_) */ LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */ CK_STAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if (CK_) */ /* protected by if_addr_lock */ u_char if_alloctype; /* if_type at time of allocation */ uint8_t if_numa_domain; /* NUMA domain of device */ /* Driver and protocol specific information that remains stable. */ void *if_softc; /* pointer to driver state */ void *if_llsoftc; /* link layer softc */ void *if_l2com; /* pointer to protocol bits */ const char *if_dname; /* driver name */ int if_dunit; /* unit or IF_DUNIT_NONE */ u_short if_index; /* numeric abbreviation for this if */ u_short if_idxgen; /* ... and its generation count */ char if_xname[IFNAMSIZ]; /* external name (name + unit) */ char *if_description; /* interface description */ /* Variable fields that are touched by the stack and drivers. */ int if_flags; /* up/down, broadcast, etc. */ int if_drv_flags; /* driver-managed status flags */ int if_capabilities; /* interface features & capabilities */ int if_capenable; /* enabled features & capabilities */ void *if_linkmib; /* link-type-specific MIB data */ size_t if_linkmiblen; /* length of above data */ u_int if_refcount; /* reference count */ /* These fields are shared with struct if_data. */ uint8_t if_type; /* ethernet, tokenring, etc */ uint8_t if_addrlen; /* media address length */ uint8_t if_hdrlen; /* media header length */ uint8_t if_link_state; /* current link state */ uint32_t if_mtu; /* maximum transmission unit */ uint32_t if_metric; /* routing metric (external only) */ uint64_t if_baudrate; /* linespeed */ uint64_t if_hwassist; /* HW offload capabilities, see IFCAP */ time_t if_epoch; /* uptime at attach or stat reset */ struct timeval if_lastchange; /* time of last administrative change */ struct ifaltq if_snd; /* output queue (includes altq) */ struct task if_linktask; /* task for link change events */ struct task if_addmultitask; /* task for SIOCADDMULTI */ /* Addresses of different protocol families assigned to this if. */ struct mtx if_addr_lock; /* lock to protect address lists */ /* * if_addrhead is the list of all addresses associated to * an interface. * Some code in the kernel assumes that first element * of the list has type AF_LINK, and contains sockaddr_dl * addresses which store the link-level address and the name * of the interface. * However, access to the AF_LINK address through this * field is deprecated. Use if_addr instead. */ struct ifaddrhead if_addrhead; /* linked list of addresses per if */ struct ifmultihead if_multiaddrs; /* multicast addresses configured */ int if_amcount; /* number of all-multicast requests */ struct ifaddr *if_addr; /* pointer to link-level address */ void *if_hw_addr; /* hardware link-level address */ const u_int8_t *if_broadcastaddr; /* linklevel broadcast bytestring */ struct mtx if_afdata_lock; void *if_afdata[AF_MAX]; int if_afdata_initialized; /* Additional features hung off the interface. */ u_int if_fib; /* interface FIB */ struct vnet *if_vnet; /* pointer to network stack instance */ struct vnet *if_home_vnet; /* where this ifnet originates from */ struct ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */ struct bpf_if *if_bpf; /* packet filter structure */ int if_pcount; /* number of promiscuous listeners */ void *if_bridge; /* bridge glue */ void *if_lagg; /* lagg glue */ void *if_pf_kif; /* pf glue */ struct carp_if *if_carp; /* carp interface structure */ struct label *if_label; /* interface MAC label */ struct netmap_adapter *if_netmap; /* netmap(4) softc */ /* Various procedures of the layer2 encapsulation and drivers. */ int (*if_output) /* output routine (enqueue) */ (struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); void (*if_input) /* input routine (from h/w driver) */ (struct ifnet *, struct mbuf *); struct mbuf *(*if_bridge_input)(struct ifnet *, struct mbuf *); int (*if_bridge_output)(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); void (*if_bridge_linkstate)(struct ifnet *ifp); if_start_fn_t if_start; /* initiate output routine */ if_ioctl_fn_t if_ioctl; /* ioctl routine */ if_init_fn_t if_init; /* Init routine */ int (*if_resolvemulti) /* validate/resolve multicast */ (struct ifnet *, struct sockaddr **, struct sockaddr *); if_qflush_fn_t if_qflush; /* flush any queue */ if_transmit_fn_t if_transmit; /* initiate output routine */ void (*if_reassign) /* reassign to vnet routine */ (struct ifnet *, struct vnet *, char *); if_get_counter_t if_get_counter; /* get counter values */ int (*if_requestencap) /* make link header from request */ (struct ifnet *, struct if_encap_req *); /* Statistics. */ counter_u64_t if_counters[IFCOUNTERS]; /* Stuff that's only temporary and doesn't belong here. */ /* * Network adapter TSO limits: * =========================== * * If the "if_hw_tsomax" field is zero the maximum segment * length limit does not apply. If the "if_hw_tsomaxsegcount" * or the "if_hw_tsomaxsegsize" field is zero the TSO segment * count limit does not apply. If all three fields are zero, * there is no TSO limit. * * NOTE: The TSO limits should reflect the values used in the * BUSDMA tag a network adapter is using to load a mbuf chain * for transmission. The TCP/IP network stack will subtract * space for all linklevel and protocol level headers and * ensure that the full mbuf chain passed to the network * adapter fits within the given limits. */ u_int if_hw_tsomax; /* TSO maximum size in bytes */ u_int if_hw_tsomaxsegcount; /* TSO maximum segment count */ u_int if_hw_tsomaxsegsize; /* TSO maximum segment size in bytes */ /* * Network adapter send tag support: */ if_snd_tag_alloc_t *if_snd_tag_alloc; /* Ratelimit (packet pacing) */ if_ratelimit_query_t *if_ratelimit_query; if_ratelimit_setup_t *if_ratelimit_setup; /* Ethernet PCP */ uint8_t if_pcp; /* * Debugnet (Netdump) hooks to be called while in db/panic. */ struct debugnet_methods *if_debugnet_methods; struct epoch_context if_epoch_ctx; /* * Spare fields to be added before branching a stable branch, so * that structure can be enhanced without changing the kernel * binary interface. */ int if_ispare[4]; /* general use */ }; /* for compatibility with other BSDs */ #define if_name(ifp) ((ifp)->if_xname) #define IF_NODOM 255 /* * Locks for address lists on the network interface. */ #define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_lock, "if_addr_lock", NULL, MTX_DEF) #define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_lock) #define IF_ADDR_WLOCK(if) mtx_lock(&(if)->if_addr_lock) #define IF_ADDR_WUNLOCK(if) mtx_unlock(&(if)->if_addr_lock) #define IF_ADDR_LOCK_ASSERT(if) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(if)->if_addr_lock)) #define IF_ADDR_WLOCK_ASSERT(if) mtx_assert(&(if)->if_addr_lock, MA_OWNED) #ifdef _KERNEL /* interface link layer address change event */ typedef void (*iflladdr_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(iflladdr_event, iflladdr_event_handler_t); /* interface address change event */ typedef void (*ifaddr_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifaddr_event, ifaddr_event_handler_t); typedef void (*ifaddr_event_ext_handler_t)(void *, struct ifnet *, struct ifaddr *, int); EVENTHANDLER_DECLARE(ifaddr_event_ext, ifaddr_event_ext_handler_t); #define IFADDR_EVENT_ADD 0 #define IFADDR_EVENT_DEL 1 /* new interface arrival event */ typedef void (*ifnet_arrival_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifnet_arrival_event, ifnet_arrival_event_handler_t); /* interface departure event */ typedef void (*ifnet_departure_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t); /* Interface link state change event */ typedef void (*ifnet_link_event_handler_t)(void *, struct ifnet *, int); EVENTHANDLER_DECLARE(ifnet_link_event, ifnet_link_event_handler_t); /* Interface up/down event */ #define IFNET_EVENT_UP 0 #define IFNET_EVENT_DOWN 1 #define IFNET_EVENT_PCP 2 /* priority code point, PCP */ +#define IFNET_EVENT_UPDATE_BAUDRATE 3 typedef void (*ifnet_event_fn)(void *, struct ifnet *ifp, int event); EVENTHANDLER_DECLARE(ifnet_event, ifnet_event_fn); /* * interface groups */ struct ifg_group { char ifg_group[IFNAMSIZ]; u_int ifg_refcnt; void *ifg_pf_kif; CK_STAILQ_HEAD(, ifg_member) ifg_members; /* (CK_) */ CK_STAILQ_ENTRY(ifg_group) ifg_next; /* (CK_) */ }; struct ifg_member { CK_STAILQ_ENTRY(ifg_member) ifgm_next; /* (CK_) */ struct ifnet *ifgm_ifp; }; struct ifg_list { struct ifg_group *ifgl_group; CK_STAILQ_ENTRY(ifg_list) ifgl_next; /* (CK_) */ }; #ifdef _SYS_EVENTHANDLER_H_ /* group attach event */ typedef void (*group_attach_event_handler_t)(void *, struct ifg_group *); EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t); /* group detach event */ typedef void (*group_detach_event_handler_t)(void *, struct ifg_group *); EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t); /* group change event */ typedef void (*group_change_event_handler_t)(void *, const char *); EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t); #endif /* _SYS_EVENTHANDLER_H_ */ #define IF_AFDATA_LOCK_INIT(ifp) \ mtx_init(&(ifp)->if_afdata_lock, "if_afdata", NULL, MTX_DEF) #define IF_AFDATA_WLOCK(ifp) mtx_lock(&(ifp)->if_afdata_lock) #define IF_AFDATA_WUNLOCK(ifp) mtx_unlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_LOCK(ifp) IF_AFDATA_WLOCK(ifp) #define IF_AFDATA_UNLOCK(ifp) IF_AFDATA_WUNLOCK(ifp) #define IF_AFDATA_TRYLOCK(ifp) mtx_trylock(&(ifp)->if_afdata_lock) #define IF_AFDATA_DESTROY(ifp) mtx_destroy(&(ifp)->if_afdata_lock) #define IF_AFDATA_LOCK_ASSERT(ifp) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ifp)->if_afdata_lock)) #define IF_AFDATA_WLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_OWNED) #define IF_AFDATA_UNLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_NOTOWNED) /* * 72 was chosen below because it is the size of a TCP/IP * header (40) + the minimum mss (32). */ #define IF_MINMTU 72 #define IF_MAXMTU 65535 #define TOEDEV(ifp) ((ifp)->if_llsoftc) /* * The ifaddr structure contains information about one address * of an interface. They are maintained by the different address families, * are allocated and attached when an address is set, and are linked * together so all addresses for an interface can be located. * * NOTE: a 'struct ifaddr' is always at the beginning of a larger * chunk of malloc'ed memory, where we store the three addresses * (ifa_addr, ifa_dstaddr and ifa_netmask) referenced here. */ struct ifaddr { struct sockaddr *ifa_addr; /* address of interface */ struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */ #define ifa_broadaddr ifa_dstaddr /* broadcast address interface */ struct sockaddr *ifa_netmask; /* used to determine subnet */ struct ifnet *ifa_ifp; /* back-pointer to interface */ struct carp_softc *ifa_carp; /* pointer to CARP data */ CK_STAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */ u_short ifa_flags; /* mostly rt_flags for cloning */ #define IFA_ROUTE RTF_UP /* route installed */ #define IFA_RTSELF RTF_HOST /* loopback route to self installed */ u_int ifa_refcnt; /* references to this structure */ counter_u64_t ifa_ipackets; counter_u64_t ifa_opackets; counter_u64_t ifa_ibytes; counter_u64_t ifa_obytes; struct epoch_context ifa_epoch_ctx; }; struct ifaddr * ifa_alloc(size_t size, int flags); void ifa_free(struct ifaddr *ifa); void ifa_ref(struct ifaddr *ifa); int __result_use_check ifa_try_ref(struct ifaddr *ifa); /* * Multicast address structure. This is analogous to the ifaddr * structure except that it keeps track of multicast addresses. */ #define IFMA_F_ENQUEUED 0x1 struct ifmultiaddr { CK_STAILQ_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */ struct sockaddr *ifma_addr; /* address this membership is for */ struct sockaddr *ifma_lladdr; /* link-layer translation, if any */ struct ifnet *ifma_ifp; /* back-pointer to interface */ u_int ifma_refcount; /* reference count */ int ifma_flags; void *ifma_protospec; /* protocol-specific state, if any */ struct ifmultiaddr *ifma_llifma; /* pointer to ifma for ifma_lladdr */ struct epoch_context ifma_epoch_ctx; }; extern struct sx ifnet_sxlock; #define IFNET_WLOCK() sx_xlock(&ifnet_sxlock) #define IFNET_WUNLOCK() sx_xunlock(&ifnet_sxlock) #define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED) #define IFNET_WLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_XLOCKED) #define IFNET_RLOCK() sx_slock(&ifnet_sxlock) #define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock) /* * Look up an ifnet given its index. The returned value protected from * being freed by the network epoch. The _ref variant also acquires a * reference that must be freed using if_rele(). */ struct ifnet *ifnet_byindex(u_int); struct ifnet *ifnet_byindex_ref(u_int); /* * ifnet_byindexgen() looks up ifnet by index and generation count, * attempting to restore a weak pointer that had been stored across * the epoch. */ struct ifnet *ifnet_byindexgen(uint16_t idx, uint16_t gen); VNET_DECLARE(struct ifnethead, ifnet); VNET_DECLARE(struct ifgrouphead, ifg_head); VNET_DECLARE(struct ifnet *, loif); /* first loopback interface */ #define V_ifnet VNET(ifnet) #define V_ifg_head VNET(ifg_head) #define V_loif VNET(loif) #ifdef MCAST_VERBOSE #define MCDPRINTF printf #else #define MCDPRINTF(...) #endif int if_addgroup(struct ifnet *, const char *); int if_delgroup(struct ifnet *, const char *); int if_addmulti(struct ifnet *, struct sockaddr *, struct ifmultiaddr **); int if_allmulti(struct ifnet *, int); struct ifnet* if_alloc(u_char); struct ifnet* if_alloc_dev(u_char, device_t dev); void if_attach(struct ifnet *); void if_dead(struct ifnet *); int if_delmulti(struct ifnet *, struct sockaddr *); void if_delmulti_ifma(struct ifmultiaddr *); void if_delmulti_ifma_flags(struct ifmultiaddr *, int flags); void if_detach(struct ifnet *); void if_purgeaddrs(struct ifnet *); void if_delallmulti(struct ifnet *); void if_down(struct ifnet *); struct ifmultiaddr * if_findmulti(struct ifnet *, const struct sockaddr *); void if_freemulti(struct ifmultiaddr *ifma); void if_free(struct ifnet *); void if_initname(struct ifnet *, const char *, int); void if_link_state_change(struct ifnet *, int); int if_printf(struct ifnet *, const char *, ...) __printflike(2, 3); int if_log(struct ifnet *, int, const char *, ...) __printflike(3, 4); void if_ref(struct ifnet *); void if_rele(struct ifnet *); bool __result_use_check if_try_ref(struct ifnet *); int if_setlladdr(struct ifnet *, const u_char *, int); int if_tunnel_check_nesting(struct ifnet *, struct mbuf *, uint32_t, int); void if_up(struct ifnet *); int ifioctl(struct socket *, u_long, caddr_t, struct thread *); int ifpromisc(struct ifnet *, int); struct ifnet *ifunit(const char *); struct ifnet *ifunit_ref(const char *); int ifa_add_loopback_route(struct ifaddr *, struct sockaddr *); int ifa_del_loopback_route(struct ifaddr *, struct sockaddr *); int ifa_switch_loopback_route(struct ifaddr *, struct sockaddr *); struct ifaddr *ifa_ifwithaddr(const struct sockaddr *); int ifa_ifwithaddr_check(const struct sockaddr *); struct ifaddr *ifa_ifwithbroadaddr(const struct sockaddr *, int); struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *, int); struct ifaddr *ifa_ifwithnet(const struct sockaddr *, int, int); struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *, const struct sockaddr *, u_int); struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, struct ifnet *); int ifa_preferred(struct ifaddr *, struct ifaddr *); int if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen); typedef void *if_com_alloc_t(u_char type, struct ifnet *ifp); typedef void if_com_free_t(void *com, u_char type); void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f); void if_deregister_com_alloc(u_char type); void if_data_copy(struct ifnet *, struct if_data *); uint64_t if_get_counter_default(struct ifnet *, ift_counter); void if_inc_counter(struct ifnet *, ift_counter, int64_t); #define IF_LLADDR(ifp) \ LLADDR((struct sockaddr_dl *)((ifp)->if_addr->ifa_addr)) uint64_t if_setbaudrate(if_t ifp, uint64_t baudrate); uint64_t if_getbaudrate(if_t ifp); int if_setcapabilities(if_t ifp, int capabilities); int if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit); int if_getcapabilities(if_t ifp); int if_togglecapenable(if_t ifp, int togglecap); int if_setcapenable(if_t ifp, int capenable); int if_setcapenablebit(if_t ifp, int setcap, int clearcap); int if_getcapenable(if_t ifp); const char *if_getdname(if_t ifp); int if_setdev(if_t ifp, void *dev); int if_setdrvflagbits(if_t ifp, int if_setflags, int clear_flags); int if_getdrvflags(if_t ifp); int if_setdrvflags(if_t ifp, int flags); int if_clearhwassist(if_t ifp); int if_sethwassistbits(if_t ifp, int toset, int toclear); int if_sethwassist(if_t ifp, int hwassist_bit); int if_gethwassist(if_t ifp); int if_setsoftc(if_t ifp, void *softc); void *if_getsoftc(if_t ifp); int if_setflags(if_t ifp, int flags); int if_gethwaddr(if_t ifp, struct ifreq *); int if_setmtu(if_t ifp, int mtu); int if_getmtu(if_t ifp); int if_getmtu_family(if_t ifp, int family); int if_setflagbits(if_t ifp, int set, int clear); int if_getflags(if_t ifp); int if_sendq_empty(if_t ifp); int if_setsendqready(if_t ifp); int if_setsendqlen(if_t ifp, int tx_desc_count); int if_sethwtsomax(if_t ifp, u_int if_hw_tsomax); int if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount); int if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize); u_int if_gethwtsomax(if_t ifp); u_int if_gethwtsomaxsegcount(if_t ifp); u_int if_gethwtsomaxsegsize(if_t ifp); int if_input(if_t ifp, struct mbuf* sendmp); int if_sendq_prepend(if_t ifp, struct mbuf *m); struct mbuf *if_dequeue(if_t ifp); int if_setifheaderlen(if_t ifp, int len); void if_setrcvif(struct mbuf *m, if_t ifp); void if_setvtag(struct mbuf *m, u_int16_t tag); u_int16_t if_getvtag(struct mbuf *m); int if_vlantrunkinuse(if_t ifp); caddr_t if_getlladdr(if_t ifp); void *if_gethandle(u_char); void if_bpfmtap(if_t ifp, struct mbuf *m); void if_etherbpfmtap(if_t ifp, struct mbuf *m); void if_vlancap(if_t ifp); /* * Traversing through interface address lists. */ struct sockaddr_dl; typedef u_int iflladdr_cb_t(void *, struct sockaddr_dl *, u_int); u_int if_foreach_lladdr(if_t, iflladdr_cb_t, void *); u_int if_foreach_llmaddr(if_t, iflladdr_cb_t, void *); u_int if_lladdr_count(if_t); u_int if_llmaddr_count(if_t); int if_getamcount(if_t ifp); struct ifaddr * if_getifaddr(if_t ifp); /* Functions */ void if_setinitfn(if_t ifp, void (*)(void *)); void if_setioctlfn(if_t ifp, int (*)(if_t, u_long, caddr_t)); void if_setstartfn(if_t ifp, void (*)(if_t)); void if_settransmitfn(if_t ifp, if_transmit_fn_t); void if_setqflushfn(if_t ifp, if_qflush_fn_t); void if_setgetcounterfn(if_t ifp, if_get_counter_t); /* TSO */ void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *); int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *); /* accessors for struct ifreq */ void *ifr_data_get_ptr(void *ifrp); void *ifr_buffer_get_buffer(void *data); size_t ifr_buffer_get_length(void *data); int ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *); #ifdef DEVICE_POLLING enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS }; typedef int poll_handler_t(if_t ifp, enum poll_cmd cmd, int count); int ether_poll_register(poll_handler_t *h, if_t ifp); int ether_poll_deregister(if_t ifp); #endif /* DEVICE_POLLING */ #endif /* _KERNEL */ #include /* XXXAO: temporary unconditional include */ #endif /* !_NET_IF_VAR_H_ */ diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c index 2bb5284c2129..dc7a5fcb0c73 100644 --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -1,2283 +1,2318 @@ /*- * Copyright 1998 Massachusetts Institute of Technology * Copyright 2012 ADARA Networks, Inc. * Copyright 2017 Dell EMC Isilon * * Portions of this software were developed by Robert N. M. Watson under * contract to ADARA Networks, Inc. * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs. * This is sort of sneaky in the implementation, since * we need to pretend to be enough of an Ethernet implementation * to make arp work. The way we do this is by telling everyone * that we are an Ethernet, and then catch the packets that * ether_output() sends to us via if_transmit(), rewrite them for * use by the real outgoing interface, and ask it to send them. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_kern_tls.h" #include "opt_vlan.h" #include "opt_ratelimit.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif #ifdef INET6 /* * XXX: declare here to avoid to include many inet6 related files.. * should be more generalized? */ extern void nd6_setmtu(struct ifnet *); #endif #define VLAN_DEF_HWIDTH 4 #define VLAN_IFFLAGS (IFF_BROADCAST | IFF_MULTICAST) #define UP_AND_RUNNING(ifp) \ ((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING) CK_SLIST_HEAD(ifvlanhead, ifvlan); struct ifvlantrunk { struct ifnet *parent; /* parent interface of this trunk */ struct mtx lock; #ifdef VLAN_ARRAY #define VLAN_ARRAY_SIZE (EVL_VLID_MASK + 1) struct ifvlan *vlans[VLAN_ARRAY_SIZE]; /* static table */ #else struct ifvlanhead *hash; /* dynamic hash-list table */ uint16_t hmask; uint16_t hwidth; #endif int refcnt; }; #if defined(KERN_TLS) || defined(RATELIMIT) struct vlan_snd_tag { struct m_snd_tag com; struct m_snd_tag *tag; }; static inline struct vlan_snd_tag * mst_to_vst(struct m_snd_tag *mst) { return (__containerof(mst, struct vlan_snd_tag, com)); } #endif /* * This macro provides a facility to iterate over every vlan on a trunk with * the assumption that none will be added/removed during iteration. */ #ifdef VLAN_ARRAY #define VLAN_FOREACH(_ifv, _trunk) \ size_t _i; \ for (_i = 0; _i < VLAN_ARRAY_SIZE; _i++) \ if (((_ifv) = (_trunk)->vlans[_i]) != NULL) #else /* VLAN_ARRAY */ #define VLAN_FOREACH(_ifv, _trunk) \ struct ifvlan *_next; \ size_t _i; \ for (_i = 0; _i < (1 << (_trunk)->hwidth); _i++) \ CK_SLIST_FOREACH_SAFE((_ifv), &(_trunk)->hash[_i], ifv_list, _next) #endif /* VLAN_ARRAY */ /* * This macro provides a facility to iterate over every vlan on a trunk while * also modifying the number of vlans on the trunk. The iteration continues * until some condition is met or there are no more vlans on the trunk. */ #ifdef VLAN_ARRAY /* The VLAN_ARRAY case is simple -- just a for loop using the condition. */ #define VLAN_FOREACH_UNTIL_SAFE(_ifv, _trunk, _cond) \ size_t _i; \ for (_i = 0; !(_cond) && _i < VLAN_ARRAY_SIZE; _i++) \ if (((_ifv) = (_trunk)->vlans[_i])) #else /* VLAN_ARRAY */ /* * The hash table case is more complicated. We allow for the hash table to be * modified (i.e. vlans removed) while we are iterating over it. To allow for * this we must restart the iteration every time we "touch" something during * the iteration, since removal will resize the hash table and invalidate our * current position. If acting on the touched element causes the trunk to be * emptied, then iteration also stops. */ #define VLAN_FOREACH_UNTIL_SAFE(_ifv, _trunk, _cond) \ size_t _i; \ bool _touch = false; \ for (_i = 0; \ !(_cond) && _i < (1 << (_trunk)->hwidth); \ _i = (_touch && ((_trunk) != NULL) ? 0 : _i + 1), _touch = false) \ if (((_ifv) = CK_SLIST_FIRST(&(_trunk)->hash[_i])) != NULL && \ (_touch = true)) #endif /* VLAN_ARRAY */ struct vlan_mc_entry { struct sockaddr_dl mc_addr; CK_SLIST_ENTRY(vlan_mc_entry) mc_entries; struct epoch_context mc_epoch_ctx; }; struct ifvlan { struct ifvlantrunk *ifv_trunk; struct ifnet *ifv_ifp; #define TRUNK(ifv) ((ifv)->ifv_trunk) #define PARENT(ifv) (TRUNK(ifv)->parent) void *ifv_cookie; int ifv_pflags; /* special flags we have set on parent */ int ifv_capenable; int ifv_encaplen; /* encapsulation length */ int ifv_mtufudge; /* MTU fudged by this much */ int ifv_mintu; /* min transmission unit */ struct ether_8021q_tag ifv_qtag; #define ifv_proto ifv_qtag.proto #define ifv_vid ifv_qtag.vid #define ifv_pcp ifv_qtag.pcp struct task lladdr_task; CK_SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead; #ifndef VLAN_ARRAY CK_SLIST_ENTRY(ifvlan) ifv_list; #endif }; /* Special flags we should propagate to parent. */ static struct { int flag; int (*func)(struct ifnet *, int); } vlan_pflags[] = { {IFF_PROMISC, ifpromisc}, {IFF_ALLMULTI, if_allmulti}, {0, NULL} }; VNET_DECLARE(int, vlan_mtag_pcp); #define V_vlan_mtag_pcp VNET(vlan_mtag_pcp) static const char vlanname[] = "vlan"; static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface"); static eventhandler_tag ifdetach_tag; static eventhandler_tag iflladdr_tag; +static eventhandler_tag ifevent_tag; /* * if_vlan uses two module-level synchronizations primitives to allow concurrent * modification of vlan interfaces and (mostly) allow for vlans to be destroyed * while they are being used for tx/rx. To accomplish this in a way that has * acceptable performance and cooperation with other parts of the network stack * there is a non-sleepable epoch(9) and an sx(9). * * The performance-sensitive paths that warrant using the epoch(9) are * vlan_transmit and vlan_input. Both have to check for the vlan interface's * existence using if_vlantrunk, and being in the network tx/rx paths the use * of an epoch(9) gives a measureable improvement in performance. * * The reason for having an sx(9) is mostly because there are still areas that * must be sleepable and also have safe concurrent access to a vlan interface. * Since the sx(9) exists, it is used by default in most paths unless sleeping * is not permitted, or if it is not clear whether sleeping is permitted. * */ #define _VLAN_SX_ID ifv_sx static struct sx _VLAN_SX_ID; #define VLAN_LOCKING_INIT() \ sx_init_flags(&_VLAN_SX_ID, "vlan_sx", SX_RECURSE) #define VLAN_LOCKING_DESTROY() \ sx_destroy(&_VLAN_SX_ID) #define VLAN_SLOCK() sx_slock(&_VLAN_SX_ID) #define VLAN_SUNLOCK() sx_sunlock(&_VLAN_SX_ID) #define VLAN_XLOCK() sx_xlock(&_VLAN_SX_ID) #define VLAN_XUNLOCK() sx_xunlock(&_VLAN_SX_ID) #define VLAN_SLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_SLOCKED) #define VLAN_XLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_XLOCKED) #define VLAN_SXLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_LOCKED) /* * We also have a per-trunk mutex that should be acquired when changing * its state. */ #define TRUNK_LOCK_INIT(trunk) mtx_init(&(trunk)->lock, vlanname, NULL, MTX_DEF) #define TRUNK_LOCK_DESTROY(trunk) mtx_destroy(&(trunk)->lock) #define TRUNK_WLOCK(trunk) mtx_lock(&(trunk)->lock) #define TRUNK_WUNLOCK(trunk) mtx_unlock(&(trunk)->lock) #define TRUNK_WLOCK_ASSERT(trunk) mtx_assert(&(trunk)->lock, MA_OWNED); /* * The VLAN_ARRAY substitutes the dynamic hash with a static array * with 4096 entries. In theory this can give a boost in processing, * however in practice it does not. Probably this is because the array * is too big to fit into CPU cache. */ #ifndef VLAN_ARRAY static void vlan_inithash(struct ifvlantrunk *trunk); static void vlan_freehash(struct ifvlantrunk *trunk); static int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv); static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv); static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch); static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid); #endif static void trunk_destroy(struct ifvlantrunk *trunk); static void vlan_init(void *foo); static void vlan_input(struct ifnet *ifp, struct mbuf *m); static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr); #if defined(KERN_TLS) || defined(RATELIMIT) static int vlan_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); static int vlan_snd_tag_modify(struct m_snd_tag *, union if_snd_tag_modify_params *); static int vlan_snd_tag_query(struct m_snd_tag *, union if_snd_tag_query_params *); static void vlan_snd_tag_free(struct m_snd_tag *); static struct m_snd_tag *vlan_next_snd_tag(struct m_snd_tag *); static void vlan_ratelimit_query(struct ifnet *, struct if_ratelimit_query_results *); #endif static void vlan_qflush(struct ifnet *ifp); static int vlan_setflag(struct ifnet *ifp, int flag, int status, int (*func)(struct ifnet *, int)); static int vlan_setflags(struct ifnet *ifp, int status); static int vlan_setmulti(struct ifnet *ifp); static int vlan_transmit(struct ifnet *ifp, struct mbuf *m); #ifdef ALTQ static void vlan_altq_start(struct ifnet *ifp); static int vlan_altq_transmit(struct ifnet *ifp, struct mbuf *m); #endif static int vlan_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro); static void vlan_unconfig(struct ifnet *ifp); static void vlan_unconfig_locked(struct ifnet *ifp, int departing); static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag, uint16_t proto); static void vlan_link_state(struct ifnet *ifp); static void vlan_capabilities(struct ifvlan *ifv); static void vlan_trunk_capabilities(struct ifnet *ifp); static struct ifnet *vlan_clone_match_ethervid(const char *, int *); static int vlan_clone_match(struct if_clone *, const char *); static int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t); static int vlan_clone_destroy(struct if_clone *, struct ifnet *); static void vlan_ifdetach(void *arg, struct ifnet *ifp); static void vlan_iflladdr(void *arg, struct ifnet *ifp); +static void vlan_ifevent(void *arg, struct ifnet *ifp, int event); static void vlan_lladdr_fn(void *arg, int pending); static struct if_clone *vlan_cloner; #ifdef VIMAGE VNET_DEFINE_STATIC(struct if_clone *, vlan_cloner); #define V_vlan_cloner VNET(vlan_cloner) #endif #ifdef RATELIMIT static const struct if_snd_tag_sw vlan_snd_tag_ul_sw = { .snd_tag_modify = vlan_snd_tag_modify, .snd_tag_query = vlan_snd_tag_query, .snd_tag_free = vlan_snd_tag_free, .next_snd_tag = vlan_next_snd_tag, .type = IF_SND_TAG_TYPE_UNLIMITED }; static const struct if_snd_tag_sw vlan_snd_tag_rl_sw = { .snd_tag_modify = vlan_snd_tag_modify, .snd_tag_query = vlan_snd_tag_query, .snd_tag_free = vlan_snd_tag_free, .next_snd_tag = vlan_next_snd_tag, .type = IF_SND_TAG_TYPE_RATE_LIMIT }; #endif #ifdef KERN_TLS static const struct if_snd_tag_sw vlan_snd_tag_tls_sw = { .snd_tag_modify = vlan_snd_tag_modify, .snd_tag_query = vlan_snd_tag_query, .snd_tag_free = vlan_snd_tag_free, .next_snd_tag = vlan_next_snd_tag, .type = IF_SND_TAG_TYPE_TLS }; #ifdef RATELIMIT static const struct if_snd_tag_sw vlan_snd_tag_tls_rl_sw = { .snd_tag_modify = vlan_snd_tag_modify, .snd_tag_query = vlan_snd_tag_query, .snd_tag_free = vlan_snd_tag_free, .next_snd_tag = vlan_next_snd_tag, .type = IF_SND_TAG_TYPE_TLS_RATE_LIMIT }; #endif #endif static void vlan_mc_free(struct epoch_context *ctx) { struct vlan_mc_entry *mc = __containerof(ctx, struct vlan_mc_entry, mc_epoch_ctx); free(mc, M_VLAN); } #ifndef VLAN_ARRAY #define HASH(n, m) ((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m)) static void vlan_inithash(struct ifvlantrunk *trunk) { int i, n; /* * The trunk must not be locked here since we call malloc(M_WAITOK). * It is OK in case this function is called before the trunk struct * gets hooked up and becomes visible from other threads. */ KASSERT(trunk->hwidth == 0 && trunk->hash == NULL, ("%s: hash already initialized", __func__)); trunk->hwidth = VLAN_DEF_HWIDTH; n = 1 << trunk->hwidth; trunk->hmask = n - 1; trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK); for (i = 0; i < n; i++) CK_SLIST_INIT(&trunk->hash[i]); } static void vlan_freehash(struct ifvlantrunk *trunk) { #ifdef INVARIANTS int i; KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); for (i = 0; i < (1 << trunk->hwidth); i++) KASSERT(CK_SLIST_EMPTY(&trunk->hash[i]), ("%s: hash table not empty", __func__)); #endif free(trunk->hash, M_VLAN); trunk->hash = NULL; trunk->hwidth = trunk->hmask = 0; } static int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { int i, b; struct ifvlan *ifv2; VLAN_XLOCK_ASSERT(); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); b = 1 << trunk->hwidth; i = HASH(ifv->ifv_vid, trunk->hmask); CK_SLIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) if (ifv->ifv_vid == ifv2->ifv_vid) return (EEXIST); /* * Grow the hash when the number of vlans exceeds half of the number of * hash buckets squared. This will make the average linked-list length * buckets/2. */ if (trunk->refcnt > (b * b) / 2) { vlan_growhash(trunk, 1); i = HASH(ifv->ifv_vid, trunk->hmask); } CK_SLIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list); trunk->refcnt++; return (0); } static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { int i, b; struct ifvlan *ifv2; VLAN_XLOCK_ASSERT(); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); b = 1 << trunk->hwidth; i = HASH(ifv->ifv_vid, trunk->hmask); CK_SLIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) if (ifv2 == ifv) { trunk->refcnt--; CK_SLIST_REMOVE(&trunk->hash[i], ifv2, ifvlan, ifv_list); if (trunk->refcnt < (b * b) / 2) vlan_growhash(trunk, -1); return (0); } panic("%s: vlan not found\n", __func__); return (ENOENT); /*NOTREACHED*/ } /* * Grow the hash larger or smaller if memory permits. */ static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch) { struct ifvlan *ifv; struct ifvlanhead *hash2; int hwidth2, i, j, n, n2; VLAN_XLOCK_ASSERT(); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); if (howmuch == 0) { /* Harmless yet obvious coding error */ printf("%s: howmuch is 0\n", __func__); return; } hwidth2 = trunk->hwidth + howmuch; n = 1 << trunk->hwidth; n2 = 1 << hwidth2; /* Do not shrink the table below the default */ if (hwidth2 < VLAN_DEF_HWIDTH) return; hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_WAITOK); if (hash2 == NULL) { printf("%s: out of memory -- hash size not changed\n", __func__); return; /* We can live with the old hash table */ } for (j = 0; j < n2; j++) CK_SLIST_INIT(&hash2[j]); for (i = 0; i < n; i++) while ((ifv = CK_SLIST_FIRST(&trunk->hash[i])) != NULL) { CK_SLIST_REMOVE(&trunk->hash[i], ifv, ifvlan, ifv_list); j = HASH(ifv->ifv_vid, n2 - 1); CK_SLIST_INSERT_HEAD(&hash2[j], ifv, ifv_list); } NET_EPOCH_WAIT(); free(trunk->hash, M_VLAN); trunk->hash = hash2; trunk->hwidth = hwidth2; trunk->hmask = n2 - 1; if (bootverbose) if_printf(trunk->parent, "VLAN hash table resized from %d to %d buckets\n", n, n2); } static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid) { struct ifvlan *ifv; NET_EPOCH_ASSERT(); CK_SLIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list) if (ifv->ifv_vid == vid) return (ifv); return (NULL); } #if 0 /* Debugging code to view the hashtables. */ static void vlan_dumphash(struct ifvlantrunk *trunk) { int i; struct ifvlan *ifv; for (i = 0; i < (1 << trunk->hwidth); i++) { printf("%d: ", i); CK_SLIST_FOREACH(ifv, &trunk->hash[i], ifv_list) printf("%s ", ifv->ifv_ifp->if_xname); printf("\n"); } } #endif /* 0 */ #else static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid) { return trunk->vlans[vid]; } static __inline int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { if (trunk->vlans[ifv->ifv_vid] != NULL) return EEXIST; trunk->vlans[ifv->ifv_vid] = ifv; trunk->refcnt++; return (0); } static __inline int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { trunk->vlans[ifv->ifv_vid] = NULL; trunk->refcnt--; return (0); } static __inline void vlan_freehash(struct ifvlantrunk *trunk) { } static __inline void vlan_inithash(struct ifvlantrunk *trunk) { } #endif /* !VLAN_ARRAY */ static void trunk_destroy(struct ifvlantrunk *trunk) { VLAN_XLOCK_ASSERT(); vlan_freehash(trunk); trunk->parent->if_vlantrunk = NULL; TRUNK_LOCK_DESTROY(trunk); if_rele(trunk->parent); free(trunk, M_VLAN); } /* * Program our multicast filter. What we're actually doing is * programming the multicast filter of the parent. This has the * side effect of causing the parent interface to receive multicast * traffic that it doesn't really want, which ends up being discarded * later by the upper protocol layers. Unfortunately, there's no way * to avoid this: there really is only one physical interface. */ static int vlan_setmulti(struct ifnet *ifp) { struct ifnet *ifp_p; struct ifmultiaddr *ifma; struct ifvlan *sc; struct vlan_mc_entry *mc; int error; VLAN_XLOCK_ASSERT(); /* Find the parent. */ sc = ifp->if_softc; ifp_p = PARENT(sc); CURVNET_SET_QUIET(ifp_p->if_vnet); /* First, remove any existing filter entries. */ while ((mc = CK_SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) { CK_SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries); (void)if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr); NET_EPOCH_CALL(vlan_mc_free, &mc->mc_epoch_ctx); } /* Now program new ones. */ IF_ADDR_WLOCK(ifp); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT); if (mc == NULL) { IF_ADDR_WUNLOCK(ifp); CURVNET_RESTORE(); return (ENOMEM); } bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len); mc->mc_addr.sdl_index = ifp_p->if_index; CK_SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries); } IF_ADDR_WUNLOCK(ifp); CK_SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) { error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr, NULL); if (error) { CURVNET_RESTORE(); return (error); } } CURVNET_RESTORE(); return (0); } +/* + * A handler for interface ifnet events. + */ +static void +vlan_ifevent(void *arg __unused, struct ifnet *ifp, int event) +{ + struct epoch_tracker et; + struct ifvlan *ifv; + struct ifvlantrunk *trunk; + + if (event != IFNET_EVENT_UPDATE_BAUDRATE) + return; + + NET_EPOCH_ENTER(et); + trunk = ifp->if_vlantrunk; + if (trunk == NULL) { + NET_EPOCH_EXIT(et); + return; + } + + TRUNK_WLOCK(trunk); + VLAN_FOREACH(ifv, trunk) { + ifv->ifv_ifp->if_baudrate = ifp->if_baudrate; + } + TRUNK_WUNLOCK(trunk); + NET_EPOCH_EXIT(et); +} + /* * A handler for parent interface link layer address changes. * If the parent interface link layer address is changed we * should also change it on all children vlans. */ static void vlan_iflladdr(void *arg __unused, struct ifnet *ifp) { struct epoch_tracker et; struct ifvlan *ifv; struct ifnet *ifv_ifp; struct ifvlantrunk *trunk; struct sockaddr_dl *sdl; /* Need the epoch since this is run on taskqueue_swi. */ NET_EPOCH_ENTER(et); trunk = ifp->if_vlantrunk; if (trunk == NULL) { NET_EPOCH_EXIT(et); return; } /* * OK, it's a trunk. Loop over and change all vlan's lladdrs on it. * We need an exclusive lock here to prevent concurrent SIOCSIFLLADDR * ioctl calls on the parent garbling the lladdr of the child vlan. */ TRUNK_WLOCK(trunk); VLAN_FOREACH(ifv, trunk) { /* * Copy new new lladdr into the ifv_ifp, enqueue a task * to actually call if_setlladdr. if_setlladdr needs to * be deferred to a taskqueue because it will call into * the if_vlan ioctl path and try to acquire the global * lock. */ ifv_ifp = ifv->ifv_ifp; bcopy(IF_LLADDR(ifp), IF_LLADDR(ifv_ifp), ifp->if_addrlen); sdl = (struct sockaddr_dl *)ifv_ifp->if_addr->ifa_addr; sdl->sdl_alen = ifp->if_addrlen; taskqueue_enqueue(taskqueue_thread, &ifv->lladdr_task); } TRUNK_WUNLOCK(trunk); NET_EPOCH_EXIT(et); } /* * A handler for network interface departure events. * Track departure of trunks here so that we don't access invalid * pointers or whatever if a trunk is ripped from under us, e.g., * by ejecting its hot-plug card. However, if an ifnet is simply * being renamed, then there's no need to tear down the state. */ static void vlan_ifdetach(void *arg __unused, struct ifnet *ifp) { struct ifvlan *ifv; struct ifvlantrunk *trunk; /* If the ifnet is just being renamed, don't do anything. */ if (ifp->if_flags & IFF_RENAMING) return; VLAN_XLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { VLAN_XUNLOCK(); return; } /* * OK, it's a trunk. Loop over and detach all vlan's on it. * Check trunk pointer after each vlan_unconfig() as it will * free it and set to NULL after the last vlan was detached. */ VLAN_FOREACH_UNTIL_SAFE(ifv, ifp->if_vlantrunk, ifp->if_vlantrunk == NULL) vlan_unconfig_locked(ifv->ifv_ifp, 1); /* Trunk should have been destroyed in vlan_unconfig(). */ KASSERT(ifp->if_vlantrunk == NULL, ("%s: purge failed", __func__)); VLAN_XUNLOCK(); } /* * Return the trunk device for a virtual interface. */ static struct ifnet * vlan_trunkdev(struct ifnet *ifp) { struct ifvlan *ifv; NET_EPOCH_ASSERT(); if (ifp->if_type != IFT_L2VLAN) return (NULL); ifv = ifp->if_softc; ifp = NULL; if (ifv->ifv_trunk) ifp = PARENT(ifv); return (ifp); } /* * Return the 12-bit VLAN VID for this interface, for use by external * components such as Infiniband. * * XXXRW: Note that the function name here is historical; it should be named * vlan_vid(). */ static int vlan_tag(struct ifnet *ifp, uint16_t *vidp) { struct ifvlan *ifv; if (ifp->if_type != IFT_L2VLAN) return (EINVAL); ifv = ifp->if_softc; *vidp = ifv->ifv_vid; return (0); } static int vlan_pcp(struct ifnet *ifp, uint16_t *pcpp) { struct ifvlan *ifv; if (ifp->if_type != IFT_L2VLAN) return (EINVAL); ifv = ifp->if_softc; *pcpp = ifv->ifv_pcp; return (0); } /* * Return a driver specific cookie for this interface. Synchronization * with setcookie must be provided by the driver. */ static void * vlan_cookie(struct ifnet *ifp) { struct ifvlan *ifv; if (ifp->if_type != IFT_L2VLAN) return (NULL); ifv = ifp->if_softc; return (ifv->ifv_cookie); } /* * Store a cookie in our softc that drivers can use to store driver * private per-instance data in. */ static int vlan_setcookie(struct ifnet *ifp, void *cookie) { struct ifvlan *ifv; if (ifp->if_type != IFT_L2VLAN) return (EINVAL); ifv = ifp->if_softc; ifv->ifv_cookie = cookie; return (0); } /* * Return the vlan device present at the specific VID. */ static struct ifnet * vlan_devat(struct ifnet *ifp, uint16_t vid) { struct ifvlantrunk *trunk; struct ifvlan *ifv; NET_EPOCH_ASSERT(); trunk = ifp->if_vlantrunk; if (trunk == NULL) return (NULL); ifp = NULL; ifv = vlan_gethash(trunk, vid); if (ifv) ifp = ifv->ifv_ifp; return (ifp); } /* * VLAN support can be loaded as a module. The only place in the * system that's intimately aware of this is ether_input. We hook * into this code through vlan_input_p which is defined there and * set here. No one else in the system should be aware of this so * we use an explicit reference here. */ extern void (*vlan_input_p)(struct ifnet *, struct mbuf *); /* For if_link_state_change() eyes only... */ extern void (*vlan_link_state_p)(struct ifnet *); static int vlan_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: ifdetach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, vlan_ifdetach, NULL, EVENTHANDLER_PRI_ANY); if (ifdetach_tag == NULL) return (ENOMEM); iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event, vlan_iflladdr, NULL, EVENTHANDLER_PRI_ANY); if (iflladdr_tag == NULL) return (ENOMEM); + ifevent_tag = EVENTHANDLER_REGISTER(ifnet_event, + vlan_ifevent, NULL, EVENTHANDLER_PRI_ANY); + if (ifevent_tag == NULL) + return (ENOMEM); VLAN_LOCKING_INIT(); vlan_input_p = vlan_input; vlan_link_state_p = vlan_link_state; vlan_trunk_cap_p = vlan_trunk_capabilities; vlan_trunkdev_p = vlan_trunkdev; vlan_cookie_p = vlan_cookie; vlan_setcookie_p = vlan_setcookie; vlan_tag_p = vlan_tag; vlan_pcp_p = vlan_pcp; vlan_devat_p = vlan_devat; #ifndef VIMAGE vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match, vlan_clone_create, vlan_clone_destroy); #endif if (bootverbose) printf("vlan: initialized, using " #ifdef VLAN_ARRAY "full-size arrays" #else "hash tables with chaining" #endif "\n"); break; case MOD_UNLOAD: #ifndef VIMAGE if_clone_detach(vlan_cloner); #endif EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag); EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag); + EVENTHANDLER_DEREGISTER(ifnet_event, ifevent_tag); vlan_input_p = NULL; vlan_link_state_p = NULL; vlan_trunk_cap_p = NULL; vlan_trunkdev_p = NULL; vlan_tag_p = NULL; vlan_cookie_p = NULL; vlan_setcookie_p = NULL; vlan_devat_p = NULL; VLAN_LOCKING_DESTROY(); if (bootverbose) printf("vlan: unloaded\n"); break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t vlan_mod = { "if_vlan", vlan_modevent, 0 }; DECLARE_MODULE(if_vlan, vlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_vlan, 3); #ifdef VIMAGE static void vnet_vlan_init(const void *unused __unused) { vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match, vlan_clone_create, vlan_clone_destroy); V_vlan_cloner = vlan_cloner; } VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_vlan_init, NULL); static void vnet_vlan_uninit(const void *unused __unused) { if_clone_detach(V_vlan_cloner); } VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, vnet_vlan_uninit, NULL); #endif /* * Check for .[. ...] style interface names. */ static struct ifnet * vlan_clone_match_ethervid(const char *name, int *vidp) { char ifname[IFNAMSIZ]; char *cp; struct ifnet *ifp; int vid; strlcpy(ifname, name, IFNAMSIZ); if ((cp = strrchr(ifname, '.')) == NULL) return (NULL); *cp = '\0'; if ((ifp = ifunit_ref(ifname)) == NULL) return (NULL); /* Parse VID. */ if (*++cp == '\0') { if_rele(ifp); return (NULL); } vid = 0; for(; *cp >= '0' && *cp <= '9'; cp++) vid = (vid * 10) + (*cp - '0'); if (*cp != '\0') { if_rele(ifp); return (NULL); } if (vidp != NULL) *vidp = vid; return (ifp); } static int vlan_clone_match(struct if_clone *ifc, const char *name) { struct ifnet *ifp; const char *cp; ifp = vlan_clone_match_ethervid(name, NULL); if (ifp != NULL) { if_rele(ifp); return (1); } if (strncmp(vlanname, name, strlen(vlanname)) != 0) return (0); for (cp = name + 4; *cp != '\0'; cp++) { if (*cp < '0' || *cp > '9') return (0); } return (1); } static int vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) { char *dp; bool wildcard = false; bool subinterface = false; int unit; int error; int vid = 0; uint16_t proto = ETHERTYPE_VLAN; struct ifvlan *ifv; struct ifnet *ifp; struct ifnet *p = NULL; struct ifaddr *ifa; struct sockaddr_dl *sdl; struct vlanreq vlr; static const u_char eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ /* * There are three ways to specify the cloned device: * o pass a parameter block with the clone request. * o specify parameters in the text of the clone device name * o specify no parameters and get an unattached device that * must be configured separately. * The first technique is preferred; the latter two are supported * for backwards compatibility. * * XXXRW: Note historic use of the word "tag" here. New ioctls may be * called for. */ if (params) { error = copyin(params, &vlr, sizeof(vlr)); if (error) return error; vid = vlr.vlr_tag; proto = vlr.vlr_proto; #ifdef COMPAT_FREEBSD12 if (proto == 0) proto = ETHERTYPE_VLAN; #endif p = ifunit_ref(vlr.vlr_parent); if (p == NULL) return (ENXIO); } if ((error = ifc_name2unit(name, &unit)) == 0) { /* * vlanX interface. Set wildcard to true if the unit number * is not fixed (-1) */ wildcard = (unit < 0); } else { struct ifnet *p_tmp = vlan_clone_match_ethervid(name, &vid); if (p_tmp != NULL) { error = 0; subinterface = true; unit = IF_DUNIT_NONE; wildcard = false; if (p != NULL) { if_rele(p_tmp); if (p != p_tmp) error = EINVAL; } else p = p_tmp; } else error = ENXIO; } if (error != 0) { if (p != NULL) if_rele(p); return (error); } if (!subinterface) { /* vlanX interface, mark X as busy or allocate new unit # */ error = ifc_alloc_unit(ifc, &unit); if (error != 0) { if (p != NULL) if_rele(p); return (error); } } /* In the wildcard case, we need to update the name. */ if (wildcard) { for (dp = name; *dp != '\0'; dp++); if (snprintf(dp, len - (dp-name), "%d", unit) > len - (dp-name) - 1) { panic("%s: interface name too long", __func__); } } ifv = malloc(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO); ifp = ifv->ifv_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { if (!subinterface) ifc_free_unit(ifc, unit); free(ifv, M_VLAN); if (p != NULL) if_rele(p); return (ENOSPC); } CK_SLIST_INIT(&ifv->vlan_mc_listhead); ifp->if_softc = ifv; /* * Set the name manually rather than using if_initname because * we don't conform to the default naming convention for interfaces. */ strlcpy(ifp->if_xname, name, IFNAMSIZ); ifp->if_dname = vlanname; ifp->if_dunit = unit; ifp->if_init = vlan_init; #ifdef ALTQ ifp->if_start = vlan_altq_start; ifp->if_transmit = vlan_altq_transmit; IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); ifp->if_snd.ifq_drv_maxlen = 0; IFQ_SET_READY(&ifp->if_snd); #else ifp->if_transmit = vlan_transmit; #endif ifp->if_qflush = vlan_qflush; ifp->if_ioctl = vlan_ioctl; #if defined(KERN_TLS) || defined(RATELIMIT) ifp->if_snd_tag_alloc = vlan_snd_tag_alloc; ifp->if_ratelimit_query = vlan_ratelimit_query; #endif ifp->if_flags = VLAN_IFFLAGS; ether_ifattach(ifp, eaddr); /* Now undo some of the damage... */ ifp->if_baudrate = 0; ifp->if_type = IFT_L2VLAN; ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN; ifa = ifp->if_addr; sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_L2VLAN; if (p != NULL) { error = vlan_config(ifv, p, vid, proto); if_rele(p); if (error != 0) { /* * Since we've partially failed, we need to back * out all the way, otherwise userland could get * confused. Thus, we destroy the interface. */ ether_ifdetach(ifp); vlan_unconfig(ifp); if_free(ifp); if (!subinterface) ifc_free_unit(ifc, unit); free(ifv, M_VLAN); return (error); } } return (0); } static int vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) { struct ifvlan *ifv = ifp->if_softc; int unit = ifp->if_dunit; if (ifp->if_vlantrunk) return (EBUSY); #ifdef ALTQ IFQ_PURGE(&ifp->if_snd); #endif ether_ifdetach(ifp); /* first, remove it from system-wide lists */ vlan_unconfig(ifp); /* now it can be unconfigured and freed */ /* * We should have the only reference to the ifv now, so we can now * drain any remaining lladdr task before freeing the ifnet and the * ifvlan. */ taskqueue_drain(taskqueue_thread, &ifv->lladdr_task); NET_EPOCH_WAIT(); if_free(ifp); free(ifv, M_VLAN); if (unit != IF_DUNIT_NONE) ifc_free_unit(ifc, unit); return (0); } /* * The ifp->if_init entry point for vlan(4) is a no-op. */ static void vlan_init(void *foo __unused) { } /* * The if_transmit method for vlan(4) interface. */ static int vlan_transmit(struct ifnet *ifp, struct mbuf *m) { struct ifvlan *ifv; struct ifnet *p; int error, len, mcast; NET_EPOCH_ASSERT(); ifv = ifp->if_softc; if (TRUNK(ifv) == NULL) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); m_freem(m); return (ENETDOWN); } p = PARENT(ifv); len = m->m_pkthdr.len; mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; BPF_MTAP(ifp, m); #if defined(KERN_TLS) || defined(RATELIMIT) if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) { struct vlan_snd_tag *vst; struct m_snd_tag *mst; MPASS(m->m_pkthdr.snd_tag->ifp == ifp); mst = m->m_pkthdr.snd_tag; vst = mst_to_vst(mst); if (vst->tag->ifp != p) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); m_freem(m); return (EAGAIN); } m->m_pkthdr.snd_tag = m_snd_tag_ref(vst->tag); m_snd_tag_rele(mst); } #endif /* * Do not run parent's if_transmit() if the parent is not up, * or parent's driver will cause a system crash. */ if (!UP_AND_RUNNING(p)) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); m_freem(m); return (ENETDOWN); } if (!ether_8021q_frame(&m, ifp, p, &ifv->ifv_qtag)) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (0); } /* * Send it, precisely as ether_output() would have. */ error = (p->if_transmit)(p, m); if (error == 0) { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, len); if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast); } else if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); } static int vlan_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { struct ifvlan *ifv; struct ifnet *p; NET_EPOCH_ASSERT(); /* * Find the first non-VLAN parent interface. */ ifv = ifp->if_softc; do { if (TRUNK(ifv) == NULL) { m_freem(m); return (ENETDOWN); } p = PARENT(ifv); ifv = p->if_softc; } while (p->if_type == IFT_L2VLAN); return p->if_output(ifp, m, dst, ro); } #ifdef ALTQ static void vlan_altq_start(if_t ifp) { struct ifaltq *ifq = &ifp->if_snd; struct mbuf *m; IFQ_LOCK(ifq); IFQ_DEQUEUE_NOLOCK(ifq, m); while (m != NULL) { vlan_transmit(ifp, m); IFQ_DEQUEUE_NOLOCK(ifq, m); } IFQ_UNLOCK(ifq); } static int vlan_altq_transmit(if_t ifp, struct mbuf *m) { int err; if (ALTQ_IS_ENABLED(&ifp->if_snd)) { IFQ_ENQUEUE(&ifp->if_snd, m, err); if (err == 0) vlan_altq_start(ifp); } else err = vlan_transmit(ifp, m); return (err); } #endif /* ALTQ */ /* * The ifp->if_qflush entry point for vlan(4) is a no-op. */ static void vlan_qflush(struct ifnet *ifp __unused) { } static void vlan_input(struct ifnet *ifp, struct mbuf *m) { struct ifvlantrunk *trunk; struct ifvlan *ifv; struct m_tag *mtag; uint16_t vid, tag; NET_EPOCH_ASSERT(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { m_freem(m); return; } if (m->m_flags & M_VLANTAG) { /* * Packet is tagged, but m contains a normal * Ethernet frame; the tag is stored out-of-band. */ tag = m->m_pkthdr.ether_vtag; m->m_flags &= ~M_VLANTAG; } else { struct ether_vlan_header *evl; /* * Packet is tagged in-band as specified by 802.1q. */ switch (ifp->if_type) { case IFT_ETHER: if (m->m_len < sizeof(*evl) && (m = m_pullup(m, sizeof(*evl))) == NULL) { if_printf(ifp, "cannot pullup VLAN header\n"); return; } evl = mtod(m, struct ether_vlan_header *); tag = ntohs(evl->evl_tag); /* * Remove the 802.1q header by copying the Ethernet * addresses over it and adjusting the beginning of * the data in the mbuf. The encapsulated Ethernet * type field is already in place. */ bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, ETHER_HDR_LEN - ETHER_TYPE_LEN); m_adj(m, ETHER_VLAN_ENCAP_LEN); break; default: #ifdef INVARIANTS panic("%s: %s has unsupported if_type %u", __func__, ifp->if_xname, ifp->if_type); #endif if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); m_freem(m); return; } } vid = EVL_VLANOFTAG(tag); ifv = vlan_gethash(trunk, vid); if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) { if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); m_freem(m); return; } if (V_vlan_mtag_pcp) { /* * While uncommon, it is possible that we will find a 802.1q * packet encapsulated inside another packet that also had an * 802.1q header. For example, ethernet tunneled over IPSEC * arriving over ethernet. In that case, we replace the * existing 802.1q PCP m_tag value. */ mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL); if (mtag == NULL) { mtag = m_tag_alloc(MTAG_8021Q, MTAG_8021Q_PCP_IN, sizeof(uint8_t), M_NOWAIT); if (mtag == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return; } m_tag_prepend(m, mtag); } *(uint8_t *)(mtag + 1) = EVL_PRIOFTAG(tag); } m->m_pkthdr.rcvif = ifv->ifv_ifp; if_inc_counter(ifv->ifv_ifp, IFCOUNTER_IPACKETS, 1); /* Pass it back through the parent's input routine. */ (*ifv->ifv_ifp->if_input)(ifv->ifv_ifp, m); } static void vlan_lladdr_fn(void *arg, int pending __unused) { struct ifvlan *ifv; struct ifnet *ifp; ifv = (struct ifvlan *)arg; ifp = ifv->ifv_ifp; CURVNET_SET(ifp->if_vnet); /* The ifv_ifp already has the lladdr copied in. */ if_setlladdr(ifp, IF_LLADDR(ifp), ifp->if_addrlen); CURVNET_RESTORE(); } static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid, uint16_t proto) { struct epoch_tracker et; struct ifvlantrunk *trunk; struct ifnet *ifp; int error = 0; /* * We can handle non-ethernet hardware types as long as * they handle the tagging and headers themselves. */ if (p->if_type != IFT_ETHER && p->if_type != IFT_L2VLAN && (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) return (EPROTONOSUPPORT); if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS) return (EPROTONOSUPPORT); /* * Don't let the caller set up a VLAN VID with * anything except VLID bits. * VID numbers 0x0 and 0xFFF are reserved. */ if (vid == 0 || vid == 0xFFF || (vid & ~EVL_VLID_MASK)) return (EINVAL); if (ifv->ifv_trunk) return (EBUSY); VLAN_XLOCK(); if (p->if_vlantrunk == NULL) { trunk = malloc(sizeof(struct ifvlantrunk), M_VLAN, M_WAITOK | M_ZERO); vlan_inithash(trunk); TRUNK_LOCK_INIT(trunk); TRUNK_WLOCK(trunk); p->if_vlantrunk = trunk; trunk->parent = p; if_ref(trunk->parent); TRUNK_WUNLOCK(trunk); } else { trunk = p->if_vlantrunk; } ifv->ifv_vid = vid; /* must set this before vlan_inshash() */ ifv->ifv_pcp = 0; /* Default: best effort delivery. */ error = vlan_inshash(trunk, ifv); if (error) goto done; ifv->ifv_proto = proto; ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN; ifv->ifv_mintu = ETHERMIN; ifv->ifv_pflags = 0; ifv->ifv_capenable = -1; /* * If the parent supports the VLAN_MTU capability, * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames, * use it. */ if (p->if_capenable & IFCAP_VLAN_MTU) { /* * No need to fudge the MTU since the parent can * handle extended frames. */ ifv->ifv_mtufudge = 0; } else { /* * Fudge the MTU by the encapsulation size. This * makes us incompatible with strictly compliant * 802.1Q implementations, but allows us to use * the feature with other NetBSD implementations, * which might still be useful. */ ifv->ifv_mtufudge = ifv->ifv_encaplen; } ifv->ifv_trunk = trunk; ifp = ifv->ifv_ifp; /* * Initialize fields from our parent. This duplicates some * work with ether_ifattach() but allows for non-ethernet * interfaces to also work. */ ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge; ifp->if_baudrate = p->if_baudrate; ifp->if_input = p->if_input; ifp->if_resolvemulti = p->if_resolvemulti; ifp->if_addrlen = p->if_addrlen; ifp->if_broadcastaddr = p->if_broadcastaddr; ifp->if_pcp = ifv->ifv_pcp; /* * We wrap the parent's if_output using vlan_output to ensure that it * can't become stale. */ ifp->if_output = vlan_output; /* * Copy only a selected subset of flags from the parent. * Other flags are none of our business. */ #define VLAN_COPY_FLAGS (IFF_SIMPLEX) ifp->if_flags &= ~VLAN_COPY_FLAGS; ifp->if_flags |= p->if_flags & VLAN_COPY_FLAGS; #undef VLAN_COPY_FLAGS ifp->if_link_state = p->if_link_state; NET_EPOCH_ENTER(et); vlan_capabilities(ifv); NET_EPOCH_EXIT(et); /* * Set up our interface address to reflect the underlying * physical interface's. */ TASK_INIT(&ifv->lladdr_task, 0, vlan_lladdr_fn, ifv); ((struct sockaddr_dl *)ifp->if_addr->ifa_addr)->sdl_alen = p->if_addrlen; /* * Do not schedule link address update if it was the same * as previous parent's. This helps avoid updating for each * associated llentry. */ if (memcmp(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen) != 0) { bcopy(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen); taskqueue_enqueue(taskqueue_thread, &ifv->lladdr_task); } /* We are ready for operation now. */ ifp->if_drv_flags |= IFF_DRV_RUNNING; /* Update flags on the parent, if necessary. */ vlan_setflags(ifp, 1); /* * Configure multicast addresses that may already be * joined on the vlan device. */ (void)vlan_setmulti(ifp); done: if (error == 0) EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid); VLAN_XUNLOCK(); return (error); } static void vlan_unconfig(struct ifnet *ifp) { VLAN_XLOCK(); vlan_unconfig_locked(ifp, 0); VLAN_XUNLOCK(); } static void vlan_unconfig_locked(struct ifnet *ifp, int departing) { struct ifvlantrunk *trunk; struct vlan_mc_entry *mc; struct ifvlan *ifv; struct ifnet *parent; int error; VLAN_XLOCK_ASSERT(); ifv = ifp->if_softc; trunk = ifv->ifv_trunk; parent = NULL; if (trunk != NULL) { parent = trunk->parent; /* * Since the interface is being unconfigured, we need to * empty the list of multicast groups that we may have joined * while we were alive from the parent's list. */ while ((mc = CK_SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) { /* * If the parent interface is being detached, * all its multicast addresses have already * been removed. Warn about errors if * if_delmulti() does fail, but don't abort as * all callers expect vlan destruction to * succeed. */ if (!departing) { error = if_delmulti(parent, (struct sockaddr *)&mc->mc_addr); if (error) if_printf(ifp, "Failed to delete multicast address from parent: %d\n", error); } CK_SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries); NET_EPOCH_CALL(vlan_mc_free, &mc->mc_epoch_ctx); } vlan_setflags(ifp, 0); /* clear special flags on parent */ vlan_remhash(trunk, ifv); ifv->ifv_trunk = NULL; /* * Check if we were the last. */ if (trunk->refcnt == 0) { parent->if_vlantrunk = NULL; NET_EPOCH_WAIT(); trunk_destroy(trunk); } } /* Disconnect from parent. */ if (ifv->ifv_pflags) if_printf(ifp, "%s: ifv_pflags unclean\n", __func__); ifp->if_mtu = ETHERMTU; ifp->if_link_state = LINK_STATE_UNKNOWN; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; /* * Only dispatch an event if vlan was * attached, otherwise there is nothing * to cleanup anyway. */ if (parent != NULL) EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_vid); } /* Handle a reference counted flag that should be set on the parent as well */ static int vlan_setflag(struct ifnet *ifp, int flag, int status, int (*func)(struct ifnet *, int)) { struct ifvlan *ifv; int error; VLAN_SXLOCK_ASSERT(); ifv = ifp->if_softc; status = status ? (ifp->if_flags & flag) : 0; /* Now "status" contains the flag value or 0 */ /* * See if recorded parent's status is different from what * we want it to be. If it is, flip it. We record parent's * status in ifv_pflags so that we won't clear parent's flag * we haven't set. In fact, we don't clear or set parent's * flags directly, but get or release references to them. * That's why we can be sure that recorded flags still are * in accord with actual parent's flags. */ if (status != (ifv->ifv_pflags & flag)) { error = (*func)(PARENT(ifv), status); if (error) return (error); ifv->ifv_pflags &= ~flag; ifv->ifv_pflags |= status; } return (0); } /* * Handle IFF_* flags that require certain changes on the parent: * if "status" is true, update parent's flags respective to our if_flags; * if "status" is false, forcedly clear the flags set on parent. */ static int vlan_setflags(struct ifnet *ifp, int status) { int error, i; for (i = 0; vlan_pflags[i].flag; i++) { error = vlan_setflag(ifp, vlan_pflags[i].flag, status, vlan_pflags[i].func); if (error) return (error); } return (0); } /* Inform all vlans that their parent has changed link state */ static void vlan_link_state(struct ifnet *ifp) { struct epoch_tracker et; struct ifvlantrunk *trunk; struct ifvlan *ifv; NET_EPOCH_ENTER(et); trunk = ifp->if_vlantrunk; if (trunk == NULL) { NET_EPOCH_EXIT(et); return; } TRUNK_WLOCK(trunk); VLAN_FOREACH(ifv, trunk) { ifv->ifv_ifp->if_baudrate = trunk->parent->if_baudrate; if_link_state_change(ifv->ifv_ifp, trunk->parent->if_link_state); } TRUNK_WUNLOCK(trunk); NET_EPOCH_EXIT(et); } static void vlan_capabilities(struct ifvlan *ifv) { struct ifnet *p; struct ifnet *ifp; struct ifnet_hw_tsomax hw_tsomax; int cap = 0, ena = 0, mena; u_long hwa = 0; NET_EPOCH_ASSERT(); VLAN_SXLOCK_ASSERT(); p = PARENT(ifv); ifp = ifv->ifv_ifp; /* Mask parent interface enabled capabilities disabled by user. */ mena = p->if_capenable & ifv->ifv_capenable; /* * If the parent interface can do checksum offloading * on VLANs, then propagate its hardware-assisted * checksumming flags. Also assert that checksum * offloading requires hardware VLAN tagging. */ if (p->if_capabilities & IFCAP_VLAN_HWCSUM) cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); if (p->if_capenable & IFCAP_VLAN_HWCSUM && p->if_capenable & IFCAP_VLAN_HWTAGGING) { ena |= mena & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); if (ena & IFCAP_TXCSUM) hwa |= p->if_hwassist & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP); if (ena & IFCAP_TXCSUM_IPV6) hwa |= p->if_hwassist & (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6); } /* * If the parent interface can do TSO on VLANs then * propagate the hardware-assisted flag. TSO on VLANs * does not necessarily require hardware VLAN tagging. */ memset(&hw_tsomax, 0, sizeof(hw_tsomax)); if_hw_tsomax_common(p, &hw_tsomax); if_hw_tsomax_update(ifp, &hw_tsomax); if (p->if_capabilities & IFCAP_VLAN_HWTSO) cap |= p->if_capabilities & IFCAP_TSO; if (p->if_capenable & IFCAP_VLAN_HWTSO) { ena |= mena & IFCAP_TSO; if (ena & IFCAP_TSO) hwa |= p->if_hwassist & CSUM_TSO; } /* * If the parent interface can do LRO and checksum offloading on * VLANs, then guess it may do LRO on VLANs. False positive here * cost nothing, while false negative may lead to some confusions. */ if (p->if_capabilities & IFCAP_VLAN_HWCSUM) cap |= p->if_capabilities & IFCAP_LRO; if (p->if_capenable & IFCAP_VLAN_HWCSUM) ena |= p->if_capenable & IFCAP_LRO; /* * If the parent interface can offload TCP connections over VLANs then * propagate its TOE capability to the VLAN interface. * * All TOE drivers in the tree today can deal with VLANs. If this * changes then IFCAP_VLAN_TOE should be promoted to a full capability * with its own bit. */ #define IFCAP_VLAN_TOE IFCAP_TOE if (p->if_capabilities & IFCAP_VLAN_TOE) cap |= p->if_capabilities & IFCAP_TOE; if (p->if_capenable & IFCAP_VLAN_TOE) { TOEDEV(ifp) = TOEDEV(p); ena |= mena & IFCAP_TOE; } /* * If the parent interface supports dynamic link state, so does the * VLAN interface. */ cap |= (p->if_capabilities & IFCAP_LINKSTATE); ena |= (mena & IFCAP_LINKSTATE); #ifdef RATELIMIT /* * If the parent interface supports ratelimiting, so does the * VLAN interface. */ cap |= (p->if_capabilities & IFCAP_TXRTLMT); ena |= (mena & IFCAP_TXRTLMT); #endif /* * If the parent interface supports unmapped mbufs, so does * the VLAN interface. Note that this should be fine even for * interfaces that don't support hardware tagging as headers * are prepended in normal mbufs to unmapped mbufs holding * payload data. */ cap |= (p->if_capabilities & IFCAP_MEXTPG); ena |= (mena & IFCAP_MEXTPG); /* * If the parent interface can offload encryption and segmentation * of TLS records over TCP, propagate it's capability to the VLAN * interface. * * All TLS drivers in the tree today can deal with VLANs. If * this ever changes, then a new IFCAP_VLAN_TXTLS can be * defined. */ if (p->if_capabilities & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT)) cap |= p->if_capabilities & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT); if (p->if_capenable & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT)) ena |= mena & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT); ifp->if_capabilities = cap; ifp->if_capenable = ena; ifp->if_hwassist = hwa; } static void vlan_trunk_capabilities(struct ifnet *ifp) { struct epoch_tracker et; struct ifvlantrunk *trunk; struct ifvlan *ifv; VLAN_SLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { VLAN_SUNLOCK(); return; } NET_EPOCH_ENTER(et); VLAN_FOREACH(ifv, trunk) vlan_capabilities(ifv); NET_EPOCH_EXIT(et); VLAN_SUNLOCK(); } static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifnet *p; struct ifreq *ifr; #ifdef INET struct ifaddr *ifa; #endif struct ifvlan *ifv; struct ifvlantrunk *trunk; struct vlanreq vlr; int error = 0, oldmtu; ifr = (struct ifreq *)data; #ifdef INET ifa = (struct ifaddr *) data; #endif ifv = ifp->if_softc; switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) arp_ifinit(ifp, ifa); #endif break; case SIOCGIFADDR: bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0], ifp->if_addrlen); break; case SIOCGIFMEDIA: VLAN_SLOCK(); if (TRUNK(ifv) != NULL) { p = PARENT(ifv); if_ref(p); error = (*p->if_ioctl)(p, SIOCGIFMEDIA, data); if_rele(p); /* Limit the result to the parent's current config. */ if (error == 0) { struct ifmediareq *ifmr; ifmr = (struct ifmediareq *)data; if (ifmr->ifm_count >= 1 && ifmr->ifm_ulist) { ifmr->ifm_count = 1; error = copyout(&ifmr->ifm_current, ifmr->ifm_ulist, sizeof(int)); } } } else { error = EINVAL; } VLAN_SUNLOCK(); break; case SIOCSIFMEDIA: error = EINVAL; break; case SIOCSIFMTU: /* * Set the interface MTU. */ VLAN_SLOCK(); trunk = TRUNK(ifv); if (trunk != NULL) { TRUNK_WLOCK(trunk); if (ifr->ifr_mtu > (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) || ifr->ifr_mtu < (ifv->ifv_mintu - ifv->ifv_mtufudge)) error = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; TRUNK_WUNLOCK(trunk); } else error = EINVAL; VLAN_SUNLOCK(); break; case SIOCSETVLAN: #ifdef VIMAGE /* * XXXRW/XXXBZ: The goal in these checks is to allow a VLAN * interface to be delegated to a jail without allowing the * jail to change what underlying interface/VID it is * associated with. We are not entirely convinced that this * is the right way to accomplish that policy goal. */ if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif error = copyin(ifr_data_get_ptr(ifr), &vlr, sizeof(vlr)); if (error) break; if (vlr.vlr_parent[0] == '\0') { vlan_unconfig(ifp); break; } p = ifunit_ref(vlr.vlr_parent); if (p == NULL) { error = ENOENT; break; } #ifdef COMPAT_FREEBSD12 if (vlr.vlr_proto == 0) vlr.vlr_proto = ETHERTYPE_VLAN; #endif oldmtu = ifp->if_mtu; error = vlan_config(ifv, p, vlr.vlr_tag, vlr.vlr_proto); if_rele(p); /* * VLAN MTU may change during addition of the vlandev. * If it did, do network layer specific procedure. */ if (ifp->if_mtu != oldmtu) { #ifdef INET6 nd6_setmtu(ifp); #endif rt_updatemtu(ifp); } break; case SIOCGETVLAN: #ifdef VIMAGE if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif bzero(&vlr, sizeof(vlr)); VLAN_SLOCK(); if (TRUNK(ifv) != NULL) { strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname, sizeof(vlr.vlr_parent)); vlr.vlr_tag = ifv->ifv_vid; vlr.vlr_proto = ifv->ifv_proto; } VLAN_SUNLOCK(); error = copyout(&vlr, ifr_data_get_ptr(ifr), sizeof(vlr)); break; case SIOCSIFFLAGS: /* * We should propagate selected flags to the parent, * e.g., promiscuous mode. */ VLAN_XLOCK(); if (TRUNK(ifv) != NULL) error = vlan_setflags(ifp, 1); VLAN_XUNLOCK(); break; case SIOCADDMULTI: case SIOCDELMULTI: /* * If we don't have a parent, just remember the membership for * when we do. * * XXX We need the rmlock here to avoid sleeping while * holding in6_multi_mtx. */ VLAN_XLOCK(); trunk = TRUNK(ifv); if (trunk != NULL) error = vlan_setmulti(ifp); VLAN_XUNLOCK(); break; case SIOCGVLANPCP: #ifdef VIMAGE if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif ifr->ifr_vlan_pcp = ifv->ifv_pcp; break; case SIOCSVLANPCP: #ifdef VIMAGE if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif error = priv_check(curthread, PRIV_NET_SETVLANPCP); if (error) break; if (ifr->ifr_vlan_pcp > VLAN_PCP_MAX) { error = EINVAL; break; } ifv->ifv_pcp = ifr->ifr_vlan_pcp; ifp->if_pcp = ifv->ifv_pcp; /* broadcast event about PCP change */ EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_PCP); break; case SIOCSIFCAP: VLAN_SLOCK(); ifv->ifv_capenable = ifr->ifr_reqcap; trunk = TRUNK(ifv); if (trunk != NULL) { struct epoch_tracker et; NET_EPOCH_ENTER(et); vlan_capabilities(ifv); NET_EPOCH_EXIT(et); } VLAN_SUNLOCK(); break; default: error = EINVAL; break; } return (error); } #if defined(KERN_TLS) || defined(RATELIMIT) static int vlan_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **ppmt) { struct epoch_tracker et; const struct if_snd_tag_sw *sw; struct vlan_snd_tag *vst; struct ifvlan *ifv; struct ifnet *parent; int error; switch (params->hdr.type) { #ifdef RATELIMIT case IF_SND_TAG_TYPE_UNLIMITED: sw = &vlan_snd_tag_ul_sw; break; case IF_SND_TAG_TYPE_RATE_LIMIT: sw = &vlan_snd_tag_rl_sw; break; #endif #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS: sw = &vlan_snd_tag_tls_sw; break; #ifdef RATELIMIT case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: sw = &vlan_snd_tag_tls_rl_sw; break; #endif #endif default: return (EOPNOTSUPP); } NET_EPOCH_ENTER(et); ifv = ifp->if_softc; if (ifv->ifv_trunk != NULL) parent = PARENT(ifv); else parent = NULL; if (parent == NULL) { NET_EPOCH_EXIT(et); return (EOPNOTSUPP); } if_ref(parent); NET_EPOCH_EXIT(et); vst = malloc(sizeof(*vst), M_VLAN, M_NOWAIT); if (vst == NULL) { if_rele(parent); return (ENOMEM); } error = m_snd_tag_alloc(parent, params, &vst->tag); if_rele(parent); if (error) { free(vst, M_VLAN); return (error); } m_snd_tag_init(&vst->com, ifp, sw); *ppmt = &vst->com; return (0); } static struct m_snd_tag * vlan_next_snd_tag(struct m_snd_tag *mst) { struct vlan_snd_tag *vst; vst = mst_to_vst(mst); return (vst->tag); } static int vlan_snd_tag_modify(struct m_snd_tag *mst, union if_snd_tag_modify_params *params) { struct vlan_snd_tag *vst; vst = mst_to_vst(mst); return (vst->tag->sw->snd_tag_modify(vst->tag, params)); } static int vlan_snd_tag_query(struct m_snd_tag *mst, union if_snd_tag_query_params *params) { struct vlan_snd_tag *vst; vst = mst_to_vst(mst); return (vst->tag->sw->snd_tag_query(vst->tag, params)); } static void vlan_snd_tag_free(struct m_snd_tag *mst) { struct vlan_snd_tag *vst; vst = mst_to_vst(mst); m_snd_tag_rele(vst->tag); free(vst, M_VLAN); } static void vlan_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q) { /* * For vlan, we have an indirect * interface. The caller needs to * get a ratelimit tag on the actual * interface the flow will go on. */ q->rate_table = NULL; q->flags = RT_IS_INDIRECT; q->max_flows = 0; q->number_of_rates = 0; } #endif