diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 64aaecf6f029..0a35fb4095fb 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -1,4321 +1,4344 @@ /* $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright 2001 Wasabi Systems, Inc. * All rights reserved. * * Written by Jason R. Thorpe for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp */ /* * Network interface bridge support. * * TODO: * * - Currently only supports Ethernet-like interfaces (Ethernet, * 802.11, VLANs on Ethernet, etc.) Figure out a nice way * to bridge other types of interfaces (maybe consider * heterogeneous bridges). */ #include "opt_inet.h" #include "opt_inet6.h" #define EXTERR_CATEGORY EXTERR_CAT_BRIDGE #include #include /* string functions */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for net/if.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #endif #if defined(INET) || defined(INET6) #include #endif #include #include #include #include #include #include #include /* * At various points in the code we need to know if we're hooked into the INET * and/or INET6 pfil. Define some macros to do that based on which IP versions * are enabled in the kernel. This avoids littering the rest of the code with * #ifnet INET6 to avoid referencing V_inet6_pfil_head. */ #ifdef INET6 #define PFIL_HOOKED_IN_INET6 PFIL_HOOKED_IN(V_inet6_pfil_head) #define PFIL_HOOKED_OUT_INET6 PFIL_HOOKED_OUT(V_inet6_pfil_head) #else #define PFIL_HOOKED_IN_INET6 false #define PFIL_HOOKED_OUT_INET6 false #endif #ifdef INET #define PFIL_HOOKED_IN_INET PFIL_HOOKED_IN(V_inet_pfil_head) #define PFIL_HOOKED_OUT_INET PFIL_HOOKED_OUT(V_inet_pfil_head) #else #define PFIL_HOOKED_IN_INET false #define PFIL_HOOKED_OUT_INET false #endif #define PFIL_HOOKED_IN_46 (PFIL_HOOKED_IN_INET6 || PFIL_HOOKED_IN_INET) #define PFIL_HOOKED_OUT_46 (PFIL_HOOKED_OUT_INET6 || PFIL_HOOKED_OUT_INET) /* * Size of the route hash table. Must be a power of two. */ #ifndef BRIDGE_RTHASH_SIZE #define BRIDGE_RTHASH_SIZE 1024 #endif #define BRIDGE_RTHASH_MASK (BRIDGE_RTHASH_SIZE - 1) /* * Default maximum number of addresses to cache. */ #ifndef BRIDGE_RTABLE_MAX #define BRIDGE_RTABLE_MAX 2000 #endif /* * Timeout (in seconds) for entries learned dynamically. */ #ifndef BRIDGE_RTABLE_TIMEOUT #define BRIDGE_RTABLE_TIMEOUT (20 * 60) /* same as ARP */ #endif /* * Number of seconds between walks of the route list. */ #ifndef BRIDGE_RTABLE_PRUNE_PERIOD #define BRIDGE_RTABLE_PRUNE_PERIOD (5 * 60) #endif /* * List of capabilities to possibly mask on the member interface. */ #define BRIDGE_IFCAPS_MASK (IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM|\ IFCAP_TXCSUM_IPV6|IFCAP_MEXTPG) /* * List of capabilities to strip */ #define BRIDGE_IFCAPS_STRIP IFCAP_LRO /* * Bridge locking * * The bridge relies heavily on the epoch(9) system to protect its data * structures. This means we can safely use CK_LISTs while in NET_EPOCH, but we * must ensure there is only one writer at a time. * * That is: for read accesses we only need to be in NET_EPOCH, but for write * accesses we must hold: * * - BRIDGE_RT_LOCK, for any change to bridge_rtnodes * - BRIDGE_LOCK, for any other change * * The BRIDGE_LOCK is a sleepable lock, because it is held across ioctl() * calls to bridge member interfaces and these ioctl()s can sleep. * The BRIDGE_RT_LOCK is a non-sleepable mutex, because it is sometimes * required while we're in NET_EPOCH and then we're not allowed to sleep. */ #define BRIDGE_LOCK_INIT(_sc) do { \ sx_init(&(_sc)->sc_sx, "if_bridge"); \ mtx_init(&(_sc)->sc_rt_mtx, "if_bridge rt", NULL, MTX_DEF); \ } while (0) #define BRIDGE_LOCK_DESTROY(_sc) do { \ sx_destroy(&(_sc)->sc_sx); \ mtx_destroy(&(_sc)->sc_rt_mtx); \ } while (0) #define BRIDGE_LOCK(_sc) sx_xlock(&(_sc)->sc_sx) #define BRIDGE_UNLOCK(_sc) sx_xunlock(&(_sc)->sc_sx) #define BRIDGE_LOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SX_XLOCKED) #define BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(_sc) \ MPASS(in_epoch(net_epoch_preempt) || sx_xlocked(&(_sc)->sc_sx)) #define BRIDGE_UNLOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SX_UNLOCKED) #define BRIDGE_RT_LOCK(_sc) mtx_lock(&(_sc)->sc_rt_mtx) #define BRIDGE_RT_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_rt_mtx) #define BRIDGE_RT_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_rt_mtx, MA_OWNED) #define BRIDGE_RT_LOCK_OR_NET_EPOCH_ASSERT(_sc) \ MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(_sc)->sc_rt_mtx)) struct bridge_softc; /* * Bridge interface list entry. */ struct bridge_iflist { CK_LIST_ENTRY(bridge_iflist) bif_next; struct ifnet *bif_ifp; /* member if */ struct bridge_softc *bif_sc; /* parent bridge */ struct bstp_port bif_stp; /* STP state */ uint32_t bif_flags; /* member if flags */ int bif_savedcaps; /* saved capabilities */ uint32_t bif_addrmax; /* max # of addresses */ uint32_t bif_addrcnt; /* cur. # of addresses */ uint32_t bif_addrexceeded;/* # of address violations */ struct epoch_context bif_epoch_ctx; ether_vlanid_t bif_pvid; /* port vlan id */ ifbvlan_set_t bif_vlan_set; /* if allowed tagged vlans */ }; /* * Bridge route node. */ struct bridge_rtnode { CK_LIST_ENTRY(bridge_rtnode) brt_hash; /* hash table linkage */ CK_LIST_ENTRY(bridge_rtnode) brt_list; /* list linkage */ struct bridge_iflist *brt_dst; /* destination if */ unsigned long brt_expire; /* expiration time */ uint8_t brt_flags; /* address flags */ uint8_t brt_addr[ETHER_ADDR_LEN]; ether_vlanid_t brt_vlan; /* vlan id */ struct vnet *brt_vnet; struct epoch_context brt_epoch_ctx; }; #define brt_ifp brt_dst->bif_ifp /* * Software state for each bridge. */ struct bridge_softc { struct ifnet *sc_ifp; /* make this an interface */ LIST_ENTRY(bridge_softc) sc_list; struct sx sc_sx; struct mtx sc_rt_mtx; uint32_t sc_brtmax; /* max # of addresses */ uint32_t sc_brtcnt; /* cur. # of addresses */ uint32_t sc_brttimeout; /* rt timeout in seconds */ struct callout sc_brcallout; /* bridge callout */ CK_LIST_HEAD(, bridge_iflist) sc_iflist; /* member interface list */ CK_LIST_HEAD(, bridge_rtnode) *sc_rthash; /* our forwarding table */ CK_LIST_HEAD(, bridge_rtnode) sc_rtlist; /* list version of above */ uint32_t sc_rthash_key; /* key for hash */ CK_LIST_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */ struct bstp_state sc_stp; /* STP state */ uint32_t sc_brtexceeded; /* # of cache drops */ struct ifnet *sc_ifaddr; /* member mac copied from */ struct ether_addr sc_defaddr; /* Default MAC address */ if_input_fn_t sc_if_input; /* Saved copy of if_input */ struct epoch_context sc_epoch_ctx; }; VNET_DEFINE_STATIC(struct sx, bridge_list_sx); #define V_bridge_list_sx VNET(bridge_list_sx) static eventhandler_tag bridge_detach_cookie; int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD; VNET_DEFINE_STATIC(uma_zone_t, bridge_rtnode_zone); #define V_bridge_rtnode_zone VNET(bridge_rtnode_zone) static int bridge_clone_create(struct if_clone *, char *, size_t, struct ifc_data *, struct ifnet **); static int bridge_clone_destroy(struct if_clone *, struct ifnet *, uint32_t); static int bridge_ioctl(struct ifnet *, u_long, caddr_t); static void bridge_mutecaps(struct bridge_softc *); static void bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *, int); static void bridge_ifdetach(void *arg __unused, struct ifnet *); static void bridge_init(void *); static void bridge_dummynet(struct mbuf *, struct ifnet *); static bool bridge_same(const void *, const void *); static void *bridge_get_softc(struct ifnet *); static void bridge_stop(struct ifnet *, int); static int bridge_transmit(struct ifnet *, struct mbuf *); #ifdef ALTQ static void bridge_altq_start(if_t); static int bridge_altq_transmit(if_t, struct mbuf *); #endif static void bridge_qflush(struct ifnet *); static struct mbuf *bridge_input(struct ifnet *, struct mbuf *); static void bridge_inject(struct ifnet *, struct mbuf *); static int bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); static int bridge_enqueue(struct bridge_softc *, struct ifnet *, struct mbuf *, struct bridge_iflist *); static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int); static void bridge_forward(struct bridge_softc *, struct bridge_iflist *, struct mbuf *m); static bool bridge_member_ifaddrs(void); static void bridge_timer(void *); static void bridge_broadcast(struct bridge_softc *, struct ifnet *, struct mbuf *, int); static void bridge_span(struct bridge_softc *, struct mbuf *); static int bridge_rtupdate(struct bridge_softc *, const uint8_t *, ether_vlanid_t, struct bridge_iflist *, int, uint8_t); static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *, ether_vlanid_t); static void bridge_rttrim(struct bridge_softc *); static void bridge_rtage(struct bridge_softc *); static void bridge_rtflush(struct bridge_softc *, int); static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *, ether_vlanid_t); static bool bridge_vfilter_in(const struct bridge_iflist *, struct mbuf *); static bool bridge_vfilter_out(const struct bridge_iflist *, const struct mbuf *); static void bridge_rtable_init(struct bridge_softc *); static void bridge_rtable_fini(struct bridge_softc *); static int bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *); static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *, const uint8_t *, ether_vlanid_t); static int bridge_rtnode_insert(struct bridge_softc *, struct bridge_rtnode *); static void bridge_rtnode_destroy(struct bridge_softc *, struct bridge_rtnode *); static void bridge_rtable_expire(struct ifnet *, int); static void bridge_state_change(struct ifnet *, int); static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *, const char *name); static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *, struct ifnet *ifp); static void bridge_delete_member(struct bridge_softc *, struct bridge_iflist *, int); static void bridge_delete_span(struct bridge_softc *, struct bridge_iflist *); static int bridge_ioctl_add(struct bridge_softc *, void *); static int bridge_ioctl_del(struct bridge_softc *, void *); static int bridge_ioctl_gifflags(struct bridge_softc *, void *); static int bridge_ioctl_sifflags(struct bridge_softc *, void *); static int bridge_ioctl_scache(struct bridge_softc *, void *); static int bridge_ioctl_gcache(struct bridge_softc *, void *); static int bridge_ioctl_gifs(struct bridge_softc *, void *); static int bridge_ioctl_rts(struct bridge_softc *, void *); static int bridge_ioctl_saddr(struct bridge_softc *, void *); static int bridge_ioctl_sto(struct bridge_softc *, void *); static int bridge_ioctl_gto(struct bridge_softc *, void *); static int bridge_ioctl_daddr(struct bridge_softc *, void *); static int bridge_ioctl_flush(struct bridge_softc *, void *); static int bridge_ioctl_gpri(struct bridge_softc *, void *); static int bridge_ioctl_spri(struct bridge_softc *, void *); static int bridge_ioctl_ght(struct bridge_softc *, void *); static int bridge_ioctl_sht(struct bridge_softc *, void *); static int bridge_ioctl_gfd(struct bridge_softc *, void *); static int bridge_ioctl_sfd(struct bridge_softc *, void *); static int bridge_ioctl_gma(struct bridge_softc *, void *); static int bridge_ioctl_sma(struct bridge_softc *, void *); static int bridge_ioctl_sifprio(struct bridge_softc *, void *); static int bridge_ioctl_sifcost(struct bridge_softc *, void *); static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *); static int bridge_ioctl_sifpvid(struct bridge_softc *, void *); static int bridge_ioctl_sifvlanset(struct bridge_softc *, void *); static int bridge_ioctl_gifvlanset(struct bridge_softc *, void *); static int bridge_ioctl_addspan(struct bridge_softc *, void *); static int bridge_ioctl_delspan(struct bridge_softc *, void *); static int bridge_ioctl_gbparam(struct bridge_softc *, void *); static int bridge_ioctl_grte(struct bridge_softc *, void *); static int bridge_ioctl_gifsstp(struct bridge_softc *, void *); static int bridge_ioctl_sproto(struct bridge_softc *, void *); static int bridge_ioctl_stxhc(struct bridge_softc *, void *); static int bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *, int); #ifdef INET static int bridge_ip_checkbasic(struct mbuf **mp); static int bridge_fragment(struct ifnet *, struct mbuf **mp, struct ether_header *, int, struct llc *); #endif /* INET */ #ifdef INET6 static int bridge_ip6_checkbasic(struct mbuf **mp); #endif /* INET6 */ static void bridge_linkstate(struct ifnet *ifp); static void bridge_linkcheck(struct bridge_softc *sc); /* * Use the "null" value from IEEE 802.1Q-2014 Table 9-2 * to indicate untagged frames. */ #define VLANTAGOF(_m) \ ((_m->m_flags & M_VLANTAG) ? EVL_VLANOFTAG(_m->m_pkthdr.ether_vtag) : DOT1Q_VID_NULL) static struct bstp_cb_ops bridge_ops = { .bcb_state = bridge_state_change, .bcb_rtage = bridge_rtable_expire }; SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Bridge"); /* only pass IP[46] packets when pfil is enabled */ VNET_DEFINE_STATIC(int, pfil_onlyip) = 1; #define V_pfil_onlyip VNET(pfil_onlyip) SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_onlyip), 0, "Only pass IP packets when pfil is enabled"); /* run pfil hooks on the bridge interface */ VNET_DEFINE_STATIC(int, pfil_bridge) = 0; #define V_pfil_bridge VNET(pfil_bridge) SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_bridge), 0, "Packet filter on the bridge interface"); /* layer2 filter with ipfw */ VNET_DEFINE_STATIC(int, pfil_ipfw); #define V_pfil_ipfw VNET(pfil_ipfw) /* layer2 ARP filter with ipfw */ VNET_DEFINE_STATIC(int, pfil_ipfw_arp); #define V_pfil_ipfw_arp VNET(pfil_ipfw_arp) SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_ipfw_arp), 0, "Filter ARP packets through IPFW layer2"); /* run pfil hooks on the member interface */ VNET_DEFINE_STATIC(int, pfil_member) = 0; #define V_pfil_member VNET(pfil_member) SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_member), 0, "Packet filter on the member interface"); /* run pfil hooks on the physical interface for locally destined packets */ VNET_DEFINE_STATIC(int, pfil_local_phys); #define V_pfil_local_phys VNET(pfil_local_phys) SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_local_phys), 0, "Packet filter on the physical interface for locally destined packets"); /* log STP state changes */ VNET_DEFINE_STATIC(int, log_stp); #define V_log_stp VNET(log_stp) SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(log_stp), 0, "Log STP state changes"); /* share MAC with first bridge member */ VNET_DEFINE_STATIC(int, bridge_inherit_mac); #define V_bridge_inherit_mac VNET(bridge_inherit_mac) SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(bridge_inherit_mac), 0, "Inherit MAC address from the first bridge member"); VNET_DEFINE_STATIC(int, allow_llz_overlap) = 0; #define V_allow_llz_overlap VNET(allow_llz_overlap) SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(allow_llz_overlap), 0, "Allow overlap of link-local scope " "zones of a bridge interface and the member interfaces"); /* log MAC address port flapping */ VNET_DEFINE_STATIC(bool, log_mac_flap) = true; #define V_log_mac_flap VNET(log_mac_flap) SYSCTL_BOOL(_net_link_bridge, OID_AUTO, log_mac_flap, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(log_mac_flap), true, "Log MAC address port flapping"); /* allow IP addresses on bridge members */ VNET_DEFINE_STATIC(bool, member_ifaddrs) = false; #define V_member_ifaddrs VNET(member_ifaddrs) SYSCTL_BOOL(_net_link_bridge, OID_AUTO, member_ifaddrs, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(member_ifaddrs), false, "Allow layer 3 addresses on bridge members"); static bool bridge_member_ifaddrs(void) { return (V_member_ifaddrs); } VNET_DEFINE_STATIC(int, log_interval) = 5; VNET_DEFINE_STATIC(int, log_count) = 0; VNET_DEFINE_STATIC(struct timeval, log_last) = { 0 }; #define V_log_interval VNET(log_interval) #define V_log_count VNET(log_count) #define V_log_last VNET(log_last) struct bridge_control { int (*bc_func)(struct bridge_softc *, void *); int bc_argsize; int bc_flags; }; #define BC_F_COPYIN 0x01 /* copy arguments in */ #define BC_F_COPYOUT 0x02 /* copy arguments out */ #define BC_F_SUSER 0x04 /* do super-user check */ static const struct bridge_control bridge_control_table[] = { { bridge_ioctl_add, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_del, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_COPYOUT }, { bridge_ioctl_sifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_scache, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gcache, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_COPYIN|BC_F_COPYOUT }, { bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_COPYIN|BC_F_COPYOUT }, { bridge_ioctl_saddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sto, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gto, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_daddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_flush, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gpri, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_spri, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_ght, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_sht, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gfd, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_sfd, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gma, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_sma, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sifprio, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sifcost, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_addspan, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_delspan, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gbparam, sizeof(struct ifbropreq), BC_F_COPYOUT }, { bridge_ioctl_grte, sizeof(struct ifbrparam), BC_F_COPYOUT }, { bridge_ioctl_gifsstp, sizeof(struct ifbpstpconf), BC_F_COPYIN|BC_F_COPYOUT }, { bridge_ioctl_sproto, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_stxhc, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sifmaxaddr, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sifpvid, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sifvlanset, sizeof(struct ifbif_vlan_req), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_gifvlanset, sizeof(struct ifbif_vlan_req), BC_F_COPYIN|BC_F_COPYOUT }, }; static const int bridge_control_table_size = nitems(bridge_control_table); VNET_DEFINE_STATIC(LIST_HEAD(, bridge_softc), bridge_list) = LIST_HEAD_INITIALIZER(); #define V_bridge_list VNET(bridge_list) #define BRIDGE_LIST_LOCK_INIT(x) sx_init(&V_bridge_list_sx, \ "if_bridge list") #define BRIDGE_LIST_LOCK_DESTROY(x) sx_destroy(&V_bridge_list_sx) #define BRIDGE_LIST_LOCK(x) sx_xlock(&V_bridge_list_sx) #define BRIDGE_LIST_UNLOCK(x) sx_xunlock(&V_bridge_list_sx) VNET_DEFINE_STATIC(struct if_clone *, bridge_cloner); #define V_bridge_cloner VNET(bridge_cloner) static const char bridge_name[] = "bridge"; static void vnet_bridge_init(const void *unused __unused) { V_bridge_rtnode_zone = uma_zcreate("bridge_rtnode", sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); BRIDGE_LIST_LOCK_INIT(); struct if_clone_addreq req = { .create_f = bridge_clone_create, .destroy_f = bridge_clone_destroy, .flags = IFC_F_AUTOUNIT, }; V_bridge_cloner = ifc_attach_cloner(bridge_name, &req); } VNET_SYSINIT(vnet_bridge_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_bridge_init, NULL); static void vnet_bridge_uninit(const void *unused __unused) { ifc_detach_cloner(V_bridge_cloner); V_bridge_cloner = NULL; BRIDGE_LIST_LOCK_DESTROY(); /* Callbacks may use the UMA zone. */ NET_EPOCH_DRAIN_CALLBACKS(); uma_zdestroy(V_bridge_rtnode_zone); } VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_bridge_uninit, NULL); static int bridge_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: bridge_dn_p = bridge_dummynet; bridge_same_p = bridge_same; bridge_get_softc_p = bridge_get_softc; bridge_member_ifaddrs_p = bridge_member_ifaddrs; bridge_detach_cookie = EVENTHANDLER_REGISTER( ifnet_departure_event, bridge_ifdetach, NULL, EVENTHANDLER_PRI_ANY); break; case MOD_UNLOAD: EVENTHANDLER_DEREGISTER(ifnet_departure_event, bridge_detach_cookie); bridge_dn_p = NULL; bridge_same_p = NULL; bridge_get_softc_p = NULL; bridge_member_ifaddrs_p = NULL; break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t bridge_mod = { "if_bridge", bridge_modevent, 0 }; DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_bridge, 1); MODULE_DEPEND(if_bridge, bridgestp, 1, 1, 1); /* * handler for net.link.bridge.ipfw */ static int sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS) { int enable = V_pfil_ipfw; int error; error = sysctl_handle_int(oidp, &enable, 0, req); enable &= 1; if (enable != V_pfil_ipfw) { V_pfil_ipfw = enable; /* * Disable pfil so that ipfw doesnt run twice, if the user * really wants both then they can re-enable pfil_bridge and/or * pfil_member. Also allow non-ip packets as ipfw can filter by * layer2 type. */ if (V_pfil_ipfw) { V_pfil_onlyip = 0; V_pfil_bridge = 0; V_pfil_member = 0; } } return (error); } SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET | CTLFLAG_NEEDGIANT, &VNET_NAME(pfil_ipfw), 0, &sysctl_pfil_ipfw, "I", "Layer2 filter with IPFW"); #ifdef VIMAGE static void bridge_reassign(struct ifnet *ifp, struct vnet *newvnet, char *arg) { struct bridge_softc *sc = ifp->if_softc; struct bridge_iflist *bif; BRIDGE_LOCK(sc); while ((bif = CK_LIST_FIRST(&sc->sc_iflist)) != NULL) bridge_delete_member(sc, bif, 0); while ((bif = CK_LIST_FIRST(&sc->sc_spanlist)) != NULL) { bridge_delete_span(sc, bif); } BRIDGE_UNLOCK(sc); ether_reassign(ifp, newvnet, arg); } #endif /* * bridge_get_softc: * * Return the bridge softc for an ifnet. */ static void * bridge_get_softc(struct ifnet *ifp) { struct bridge_iflist *bif; NET_EPOCH_ASSERT(); bif = ifp->if_bridge; if (bif == NULL) return (NULL); return (bif->bif_sc); } /* * bridge_same: * * Return true if two interfaces are in the same bridge. This is only used by * bridgestp via bridge_same_p. */ static bool bridge_same(const void *bifap, const void *bifbp) { const struct bridge_iflist *bifa = bifap, *bifb = bifbp; NET_EPOCH_ASSERT(); if (bifa == NULL || bifb == NULL) return (false); return (bifa->bif_sc == bifb->bif_sc); } /* * bridge_clone_create: * * Create a new bridge instance. */ static int bridge_clone_create(struct if_clone *ifc, char *name, size_t len, struct ifc_data *ifd, struct ifnet **ifpp) { struct bridge_softc *sc; struct ifnet *ifp; sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); ifp = sc->sc_ifp = if_alloc(IFT_ETHER); BRIDGE_LOCK_INIT(sc); sc->sc_brtmax = BRIDGE_RTABLE_MAX; sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT; /* Initialize our routing table. */ bridge_rtable_init(sc); callout_init_mtx(&sc->sc_brcallout, &sc->sc_rt_mtx, 0); CK_LIST_INIT(&sc->sc_iflist); CK_LIST_INIT(&sc->sc_spanlist); ifp->if_softc = sc; if_initname(ifp, bridge_name, ifd->unit); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_capabilities = ifp->if_capenable = IFCAP_VLAN_HWTAGGING; ifp->if_ioctl = bridge_ioctl; #ifdef ALTQ ifp->if_start = bridge_altq_start; ifp->if_transmit = bridge_altq_transmit; IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); ifp->if_snd.ifq_drv_maxlen = 0; IFQ_SET_READY(&ifp->if_snd); #else ifp->if_transmit = bridge_transmit; #endif ifp->if_qflush = bridge_qflush; ifp->if_init = bridge_init; ifp->if_type = IFT_BRIDGE; ether_gen_addr(ifp, &sc->sc_defaddr); bstp_attach(&sc->sc_stp, &bridge_ops); ether_ifattach(ifp, sc->sc_defaddr.octet); /* Now undo some of the damage... */ ifp->if_baudrate = 0; #ifdef VIMAGE ifp->if_reassign = bridge_reassign; #endif sc->sc_if_input = ifp->if_input; /* ether_input */ ifp->if_input = bridge_inject; /* * Allow BRIDGE_INPUT() to pass in packets originating from the bridge * itself via bridge_inject(). This is required for netmap but * otherwise has no effect. */ ifp->if_bridge_input = bridge_input; BRIDGE_LIST_LOCK(); LIST_INSERT_HEAD(&V_bridge_list, sc, sc_list); BRIDGE_LIST_UNLOCK(); *ifpp = ifp; return (0); } static void bridge_clone_destroy_cb(struct epoch_context *ctx) { struct bridge_softc *sc; sc = __containerof(ctx, struct bridge_softc, sc_epoch_ctx); BRIDGE_LOCK_DESTROY(sc); free(sc, M_DEVBUF); } /* * bridge_clone_destroy: * * Destroy a bridge instance. */ static int bridge_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) { struct bridge_softc *sc = ifp->if_softc; struct bridge_iflist *bif; struct epoch_tracker et; BRIDGE_LOCK(sc); bridge_stop(ifp, 1); ifp->if_flags &= ~IFF_UP; while ((bif = CK_LIST_FIRST(&sc->sc_iflist)) != NULL) bridge_delete_member(sc, bif, 0); while ((bif = CK_LIST_FIRST(&sc->sc_spanlist)) != NULL) { bridge_delete_span(sc, bif); } /* Tear down the routing table. */ bridge_rtable_fini(sc); BRIDGE_UNLOCK(sc); NET_EPOCH_ENTER(et); callout_drain(&sc->sc_brcallout); BRIDGE_LIST_LOCK(); LIST_REMOVE(sc, sc_list); BRIDGE_LIST_UNLOCK(); bstp_detach(&sc->sc_stp); #ifdef ALTQ IFQ_PURGE(&ifp->if_snd); #endif NET_EPOCH_EXIT(et); ether_ifdetach(ifp); if_free(ifp); NET_EPOCH_CALL(bridge_clone_destroy_cb, &sc->sc_epoch_ctx); return (0); } /* * bridge_ioctl: * * Handle a control request from the operator. */ static int bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct bridge_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; struct bridge_iflist *bif; struct thread *td = curthread; union { struct ifbreq ifbreq; struct ifbifconf ifbifconf; struct ifbareq ifbareq; struct ifbaconf ifbaconf; struct ifbrparam ifbrparam; struct ifbropreq ifbropreq; struct ifbif_vlan_req ifvlanreq; } args; struct ifdrv *ifd = (struct ifdrv *) data; const struct bridge_control *bc; int error = 0, oldmtu; BRIDGE_LOCK(sc); switch (cmd) { case SIOCADDMULTI: case SIOCDELMULTI: break; case SIOCGDRVSPEC: case SIOCSDRVSPEC: if (ifd->ifd_cmd >= bridge_control_table_size) { error = EXTERROR(EINVAL, "Invalid control command"); break; } bc = &bridge_control_table[ifd->ifd_cmd]; if (cmd == SIOCGDRVSPEC && (bc->bc_flags & BC_F_COPYOUT) == 0) { error = EXTERROR(EINVAL, "Inappropriate ioctl for command " "(expected SIOCSDRVSPEC)"); break; } else if (cmd == SIOCSDRVSPEC && (bc->bc_flags & BC_F_COPYOUT) != 0) { error = EXTERROR(EINVAL, "Inappropriate ioctl for command " "(expected SIOCGDRVSPEC)"); break; } if (bc->bc_flags & BC_F_SUSER) { error = priv_check(td, PRIV_NET_BRIDGE); if (error) { EXTERROR(error, "PRIV_NET_BRIDGE required"); break; } } if (ifd->ifd_len != bc->bc_argsize || ifd->ifd_len > sizeof(args)) { error = EXTERROR(EINVAL, "Invalid argument size"); break; } bzero(&args, sizeof(args)); if (bc->bc_flags & BC_F_COPYIN) { error = copyin(ifd->ifd_data, &args, ifd->ifd_len); if (error) break; } oldmtu = ifp->if_mtu; error = (*bc->bc_func)(sc, &args); if (error) break; /* * Bridge MTU may change during addition of the first port. * If it did, do network layer specific procedure. */ if (ifp->if_mtu != oldmtu) if_notifymtu(ifp); if (bc->bc_flags & BC_F_COPYOUT) error = copyout(&args, ifd->ifd_data, ifd->ifd_len); break; case SIOCSIFFLAGS: if (!(ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* * If interface is marked down and it is running, * then stop and disable it. */ bridge_stop(ifp, 1); } else if ((ifp->if_flags & IFF_UP) && !(ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* * If interface is marked up and it is stopped, then * start it. */ BRIDGE_UNLOCK(sc); (*ifp->if_init)(sc); BRIDGE_LOCK(sc); } break; case SIOCSIFMTU: oldmtu = sc->sc_ifp->if_mtu; if (ifr->ifr_mtu < IF_MINMTU) { error = EXTERROR(EINVAL, "Requested MTU is lower than IF_MINMTU"); break; } if (CK_LIST_EMPTY(&sc->sc_iflist)) { sc->sc_ifp->if_mtu = ifr->ifr_mtu; break; } CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { error = (*bif->bif_ifp->if_ioctl)(bif->bif_ifp, SIOCSIFMTU, (caddr_t)ifr); if (error != 0) { log(LOG_NOTICE, "%s: invalid MTU: %u for" " member %s\n", sc->sc_ifp->if_xname, ifr->ifr_mtu, bif->bif_ifp->if_xname); error = EINVAL; break; } } if (error) { /* Restore the previous MTU on all member interfaces. */ ifr->ifr_mtu = oldmtu; CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { (*bif->bif_ifp->if_ioctl)(bif->bif_ifp, SIOCSIFMTU, (caddr_t)ifr); } EXTERROR(error, "Failed to set MTU on member interface"); } else { sc->sc_ifp->if_mtu = ifr->ifr_mtu; } break; default: /* * drop the lock as ether_ioctl() will call bridge_start() and * cause the lock to be recursed. */ BRIDGE_UNLOCK(sc); error = ether_ioctl(ifp, cmd, data); BRIDGE_LOCK(sc); break; } BRIDGE_UNLOCK(sc); return (error); } /* * bridge_mutecaps: * * Clear or restore unwanted capabilities on the member interface */ static void bridge_mutecaps(struct bridge_softc *sc) { struct bridge_iflist *bif; int enabled, mask; BRIDGE_LOCK_ASSERT(sc); /* Initial bitmask of capabilities to test */ mask = BRIDGE_IFCAPS_MASK; CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { /* Every member must support it or it's disabled */ mask &= bif->bif_savedcaps; } CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { enabled = bif->bif_ifp->if_capenable; enabled &= ~BRIDGE_IFCAPS_STRIP; /* Strip off mask bits and enable them again if allowed */ enabled &= ~BRIDGE_IFCAPS_MASK; enabled |= mask; bridge_set_ifcap(sc, bif, enabled); } } static void bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set) { struct ifnet *ifp = bif->bif_ifp; struct ifreq ifr; int error, mask, stuck; bzero(&ifr, sizeof(ifr)); ifr.ifr_reqcap = set; if (ifp->if_capenable != set) { error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr); if (error) if_printf(sc->sc_ifp, "error setting capabilities on %s: %d\n", ifp->if_xname, error); mask = BRIDGE_IFCAPS_MASK | BRIDGE_IFCAPS_STRIP; stuck = ifp->if_capenable & mask & ~set; if (stuck != 0) if_printf(sc->sc_ifp, "can't disable some capabilities on %s: 0x%x\n", ifp->if_xname, stuck); } } /* * bridge_lookup_member: * * Lookup a bridge member interface. */ static struct bridge_iflist * bridge_lookup_member(struct bridge_softc *sc, const char *name) { struct bridge_iflist *bif; struct ifnet *ifp; BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc); CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { ifp = bif->bif_ifp; if (strcmp(ifp->if_xname, name) == 0) return (bif); } return (NULL); } /* * bridge_lookup_member_if: * * Lookup a bridge member interface by ifnet*. */ static struct bridge_iflist * bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp) { BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc); return (member_ifp->if_bridge); } static void bridge_delete_member_cb(struct epoch_context *ctx) { struct bridge_iflist *bif; bif = __containerof(ctx, struct bridge_iflist, bif_epoch_ctx); free(bif, M_DEVBUF); } /* * bridge_delete_member: * * Delete the specified member interface. */ static void bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif, int gone) { struct ifnet *ifs = bif->bif_ifp; struct ifnet *fif = NULL; struct bridge_iflist *bifl; BRIDGE_LOCK_ASSERT(sc); if (bif->bif_flags & IFBIF_STP) bstp_disable(&bif->bif_stp); ifs->if_bridge = NULL; CK_LIST_REMOVE(bif, bif_next); /* * If removing the interface that gave the bridge its mac address, set * the mac address of the bridge to the address of the next member, or * to its default address if no members are left. */ if (V_bridge_inherit_mac && sc->sc_ifaddr == ifs) { if (CK_LIST_EMPTY(&sc->sc_iflist)) { bcopy(&sc->sc_defaddr, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); sc->sc_ifaddr = NULL; } else { bifl = CK_LIST_FIRST(&sc->sc_iflist); fif = bifl->bif_ifp; bcopy(IF_LLADDR(fif), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); sc->sc_ifaddr = fif; } EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); } bridge_linkcheck(sc); bridge_mutecaps(sc); /* recalcuate now this interface is removed */ BRIDGE_RT_LOCK(sc); bridge_rtdelete(sc, ifs, IFBF_FLUSHALL); BRIDGE_RT_UNLOCK(sc); KASSERT(bif->bif_addrcnt == 0, ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt)); ifs->if_bridge_output = NULL; ifs->if_bridge_input = NULL; ifs->if_bridge_linkstate = NULL; if (!gone) { switch (ifs->if_type) { case IFT_ETHER: case IFT_L2VLAN: /* * Take the interface out of promiscuous mode, but only * if it was promiscuous in the first place. It might * not be if we're in the bridge_ioctl_add() error path. */ if (ifs->if_flags & IFF_PROMISC) (void) ifpromisc(ifs, 0); break; case IFT_GIF: break; default: #ifdef DIAGNOSTIC panic("bridge_delete_member: impossible"); #endif break; } /* Re-enable any interface capabilities */ bridge_set_ifcap(sc, bif, bif->bif_savedcaps); } bstp_destroy(&bif->bif_stp); /* prepare to free */ NET_EPOCH_CALL(bridge_delete_member_cb, &bif->bif_epoch_ctx); } /* * bridge_delete_span: * * Delete the specified span interface. */ static void bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif) { BRIDGE_LOCK_ASSERT(sc); KASSERT(bif->bif_ifp->if_bridge == NULL, ("%s: not a span interface", __func__)); CK_LIST_REMOVE(bif, bif_next); NET_EPOCH_CALL(bridge_delete_member_cb, &bif->bif_epoch_ctx); } static int bridge_ioctl_add(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif = NULL; struct ifnet *ifs; int error = 0; ifs = ifunit(req->ifbr_ifsname); if (ifs == NULL) return (EXTERROR(ENOENT, "No such interface", req->ifbr_ifsname)); if (ifs->if_ioctl == NULL) /* must be supported */ return (EXTERROR(EINVAL, "Interface must support ioctl(2)")); + /* + * If the new interface is a vlan(4), it could be a bridge SVI. + * Don't allow such things to be added to bridges. + */ + if (ifs->if_type == IFT_L2VLAN) { + struct ifnet *parent; + struct epoch_tracker et; + bool is_bridge; + + /* + * Entering NET_EPOCH with BRIDGE_LOCK held, but this is okay + * since we don't sleep here. + */ + NET_EPOCH_ENTER(et); + parent = VLAN_TRUNKDEV(ifs); + is_bridge = (parent != NULL && parent->if_type == IFT_BRIDGE); + NET_EPOCH_EXIT(et); + + if (is_bridge) + return (EXTERROR(EINVAL, + "Bridge SVI cannot be added to a bridge")); + } + /* If it's in the span list, it can't be a member. */ CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifs == bif->bif_ifp) return (EXTERROR(EBUSY, "Span interface cannot be a member")); if (ifs->if_bridge) { struct bridge_iflist *sbif = ifs->if_bridge; if (sbif->bif_sc == sc) return (EXTERROR(EEXIST, "Interface is already a member of this bridge")); return (EXTERROR(EBUSY, "Interface is already a member of another bridge")); } switch (ifs->if_type) { case IFT_ETHER: case IFT_L2VLAN: case IFT_GIF: /* permitted interface types */ break; default: return (EXTERROR(EINVAL, "Unsupported interface type")); } #ifdef INET6 /* * Two valid inet6 addresses with link-local scope must not be * on the parent interface and the member interfaces at the * same time. This restriction is needed to prevent violation * of link-local scope zone. Attempts to add a member * interface which has inet6 addresses when the parent has * inet6 triggers removal of all inet6 addresses on the member * interface. */ /* Check if the parent interface has a link-local scope addr. */ if (V_allow_llz_overlap == 0 && in6ifa_llaonifp(sc->sc_ifp) != NULL) { /* * If any, remove all inet6 addresses from the member * interfaces. */ CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (in6ifa_llaonifp(bif->bif_ifp)) { in6_ifdetach(bif->bif_ifp); if_printf(sc->sc_ifp, "IPv6 addresses on %s have been removed " "before adding it as a member to prevent " "IPv6 address scope violation.\n", bif->bif_ifp->if_xname); } } if (in6ifa_llaonifp(ifs)) { in6_ifdetach(ifs); if_printf(sc->sc_ifp, "IPv6 addresses on %s have been removed " "before adding it as a member to prevent " "IPv6 address scope violation.\n", ifs->if_xname); } } #endif /* * If member_ifaddrs is disabled, do not allow an interface with * assigned IP addresses to be added to a bridge. */ if (!V_member_ifaddrs) { struct ifaddr *ifa; CK_STAILQ_FOREACH(ifa, &ifs->if_addrhead, ifa_link) { #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) return (EXTERROR(EINVAL, "Member interface may not have " "an IPv4 address configured")); #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) return (EXTERROR(EINVAL, "Member interface may not have " "an IPv6 address configured")); #endif } } /* Allow the first Ethernet member to define the MTU */ if (CK_LIST_EMPTY(&sc->sc_iflist)) sc->sc_ifp->if_mtu = ifs->if_mtu; else if (sc->sc_ifp->if_mtu != ifs->if_mtu) { struct ifreq ifr; snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", ifs->if_xname); ifr.ifr_mtu = sc->sc_ifp->if_mtu; error = (*ifs->if_ioctl)(ifs, SIOCSIFMTU, (caddr_t)&ifr); if (error != 0) { log(LOG_NOTICE, "%s: invalid MTU: %u for" " new member %s\n", sc->sc_ifp->if_xname, ifr.ifr_mtu, ifs->if_xname); return (EXTERROR(EINVAL, "Failed to set MTU on new member")); } } bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO); if (bif == NULL) return (ENOMEM); bif->bif_sc = sc; bif->bif_ifp = ifs; bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER; bif->bif_savedcaps = ifs->if_capenable; /* * Assign the interface's MAC address to the bridge if it's the first * member and the MAC address of the bridge has not been changed from * the default randomly generated one. */ if (V_bridge_inherit_mac && CK_LIST_EMPTY(&sc->sc_iflist) && !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr.octet, ETHER_ADDR_LEN)) { bcopy(IF_LLADDR(ifs), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); sc->sc_ifaddr = ifs; EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); } ifs->if_bridge = bif; ifs->if_bridge_output = bridge_output; ifs->if_bridge_input = bridge_input; ifs->if_bridge_linkstate = bridge_linkstate; bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp); /* * XXX: XLOCK HERE!?! * * NOTE: insert_***HEAD*** should be safe for the traversals. */ CK_LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next); /* Set interface capabilities to the intersection set of all members */ bridge_mutecaps(sc); bridge_linkcheck(sc); /* Place the interface into promiscuous mode */ switch (ifs->if_type) { case IFT_ETHER: case IFT_L2VLAN: error = ifpromisc(ifs, 1); break; } if (error) bridge_delete_member(sc, bif, 0); return (error); } static int bridge_ioctl_del(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (EXTERROR(ENOENT, "Interface is not a bridge member")); bridge_delete_member(sc, bif, 0); return (0); } static int bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; struct bstp_port *bp; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (EXTERROR(ENOENT, "Interface is not a bridge member")); bp = &bif->bif_stp; req->ifbr_ifsflags = bif->bif_flags; req->ifbr_state = bp->bp_state; req->ifbr_priority = bp->bp_priority; req->ifbr_path_cost = bp->bp_path_cost; req->ifbr_portno = bif->bif_ifp->if_index & 0xfff; req->ifbr_proto = bp->bp_protover; req->ifbr_role = bp->bp_role; req->ifbr_stpflags = bp->bp_flags; req->ifbr_addrcnt = bif->bif_addrcnt; req->ifbr_addrmax = bif->bif_addrmax; req->ifbr_addrexceeded = bif->bif_addrexceeded; req->ifbr_pvid = bif->bif_pvid; /* Copy STP state options as flags */ if (bp->bp_operedge) req->ifbr_ifsflags |= IFBIF_BSTP_EDGE; if (bp->bp_flags & BSTP_PORT_AUTOEDGE) req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE; if (bp->bp_ptp_link) req->ifbr_ifsflags |= IFBIF_BSTP_PTP; if (bp->bp_flags & BSTP_PORT_AUTOPTP) req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP; if (bp->bp_flags & BSTP_PORT_ADMEDGE) req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE; if (bp->bp_flags & BSTP_PORT_ADMCOST) req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST; return (0); } static int bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg) { struct epoch_tracker et; struct ifbreq *req = arg; struct bridge_iflist *bif; struct bstp_port *bp; int error; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (EXTERROR(ENOENT, "Interface is not a bridge member")); bp = &bif->bif_stp; if (req->ifbr_ifsflags & IFBIF_SPAN) /* SPAN is readonly */ return (EXTERROR(EINVAL, "Span interface cannot be modified")); NET_EPOCH_ENTER(et); if (req->ifbr_ifsflags & IFBIF_STP) { if ((bif->bif_flags & IFBIF_STP) == 0) { error = bstp_enable(&bif->bif_stp); if (error) { NET_EPOCH_EXIT(et); return (EXTERROR(error, "Failed to enable STP")); } } } else { if ((bif->bif_flags & IFBIF_STP) != 0) bstp_disable(&bif->bif_stp); } /* Pass on STP flags */ bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0); bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0); bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0); bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0); /* Save the bits relating to the bridge */ bif->bif_flags = req->ifbr_ifsflags & IFBIFMASK; NET_EPOCH_EXIT(et); return (0); } static int bridge_ioctl_scache(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; sc->sc_brtmax = param->ifbrp_csize; bridge_rttrim(sc); return (0); } static int bridge_ioctl_gcache(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; param->ifbrp_csize = sc->sc_brtmax; return (0); } static int bridge_ioctl_gifs(struct bridge_softc *sc, void *arg) { struct ifbifconf *bifc = arg; struct bridge_iflist *bif; struct ifbreq breq; char *buf, *outbuf; int count, buflen, len, error = 0; count = 0; CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) count++; CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) count++; buflen = sizeof(breq) * count; if (bifc->ifbic_len == 0) { bifc->ifbic_len = buflen; return (0); } outbuf = malloc(buflen, M_TEMP, M_NOWAIT | M_ZERO); if (outbuf == NULL) return (ENOMEM); count = 0; buf = outbuf; len = min(bifc->ifbic_len, buflen); bzero(&breq, sizeof(breq)); CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (len < sizeof(breq)) break; strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname, sizeof(breq.ifbr_ifsname)); /* Fill in the ifbreq structure */ error = bridge_ioctl_gifflags(sc, &breq); if (error) break; memcpy(buf, &breq, sizeof(breq)); count++; buf += sizeof(breq); len -= sizeof(breq); } CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) { if (len < sizeof(breq)) break; strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname, sizeof(breq.ifbr_ifsname)); breq.ifbr_ifsflags = bif->bif_flags; breq.ifbr_portno = bif->bif_ifp->if_index & 0xfff; memcpy(buf, &breq, sizeof(breq)); count++; buf += sizeof(breq); len -= sizeof(breq); } bifc->ifbic_len = sizeof(breq) * count; error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len); free(outbuf, M_TEMP); return (error); } static int bridge_ioctl_rts(struct bridge_softc *sc, void *arg) { struct ifbaconf *bac = arg; struct bridge_rtnode *brt; struct ifbareq bareq; char *buf, *outbuf; int count, buflen, len, error = 0; if (bac->ifbac_len == 0) return (0); count = 0; CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) count++; buflen = sizeof(bareq) * count; outbuf = malloc(buflen, M_TEMP, M_NOWAIT | M_ZERO); if (outbuf == NULL) return (ENOMEM); count = 0; buf = outbuf; len = min(bac->ifbac_len, buflen); bzero(&bareq, sizeof(bareq)); CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { if (len < sizeof(bareq)) goto out; strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname, sizeof(bareq.ifba_ifsname)); memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr)); bareq.ifba_vlan = brt->brt_vlan; if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC && time_uptime < brt->brt_expire) bareq.ifba_expire = brt->brt_expire - time_uptime; else bareq.ifba_expire = 0; bareq.ifba_flags = brt->brt_flags; memcpy(buf, &bareq, sizeof(bareq)); count++; buf += sizeof(bareq); len -= sizeof(bareq); } out: bac->ifbac_len = sizeof(bareq) * count; error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len); free(outbuf, M_TEMP); return (error); } static int bridge_ioctl_saddr(struct bridge_softc *sc, void *arg) { struct ifbareq *req = arg; struct bridge_iflist *bif; struct epoch_tracker et; int error; NET_EPOCH_ENTER(et); bif = bridge_lookup_member(sc, req->ifba_ifsname); if (bif == NULL) { NET_EPOCH_EXIT(et); return (EXTERROR(ENOENT, "Interface is not a bridge member")); } /* bridge_rtupdate() may acquire the lock. */ error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1, req->ifba_flags); NET_EPOCH_EXIT(et); return (error); } static int bridge_ioctl_sto(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; sc->sc_brttimeout = param->ifbrp_ctime; return (0); } static int bridge_ioctl_gto(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; param->ifbrp_ctime = sc->sc_brttimeout; return (0); } static int bridge_ioctl_daddr(struct bridge_softc *sc, void *arg) { struct ifbareq *req = arg; int vlan = req->ifba_vlan; /* Userspace uses '0' to mean 'any vlan' */ if (vlan == 0) vlan = DOT1Q_VID_RSVD_IMPL; return (bridge_rtdaddr(sc, req->ifba_dst, vlan)); } static int bridge_ioctl_flush(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; BRIDGE_RT_LOCK(sc); bridge_rtflush(sc, req->ifbr_ifsflags); BRIDGE_RT_UNLOCK(sc); return (0); } static int bridge_ioctl_gpri(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; struct bstp_state *bs = &sc->sc_stp; param->ifbrp_prio = bs->bs_bridge_priority; return (0); } static int bridge_ioctl_spri(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_priority(&sc->sc_stp, param->ifbrp_prio)); } static int bridge_ioctl_ght(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; struct bstp_state *bs = &sc->sc_stp; param->ifbrp_hellotime = bs->bs_bridge_htime >> 8; return (0); } static int bridge_ioctl_sht(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime)); } static int bridge_ioctl_gfd(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; struct bstp_state *bs = &sc->sc_stp; param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8; return (0); } static int bridge_ioctl_sfd(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay)); } static int bridge_ioctl_gma(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; struct bstp_state *bs = &sc->sc_stp; param->ifbrp_maxage = bs->bs_bridge_max_age >> 8; return (0); } static int bridge_ioctl_sma(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage)); } static int bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (EXTERROR(ENOENT, "Interface is not a bridge member")); return (bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority)); } static int bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (EXTERROR(ENOENT, "Interface is not a bridge member")); return (bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost)); } static int bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (EXTERROR(ENOENT, "Interface is not a bridge member")); bif->bif_addrmax = req->ifbr_addrmax; return (0); } static int bridge_ioctl_sifpvid(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) return (EXTERROR(ENOENT, "Interface is not a bridge member")); if (req->ifbr_pvid > DOT1Q_VID_MAX) return (EXTERROR(EINVAL, "Invalid VLAN ID")); if (req->ifbr_pvid != DOT1Q_VID_NULL) bif->bif_flags |= IFBIF_VLANFILTER; bif->bif_pvid = req->ifbr_pvid; return (0); } static int bridge_ioctl_sifvlanset(struct bridge_softc *sc, void *arg) { struct ifbif_vlan_req *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->bv_ifname); if (bif == NULL) return (EXTERROR(ENOENT, "Interface is not a bridge member")); /* Reject invalid VIDs. */ if (BRVLAN_TEST(&req->bv_set, DOT1Q_VID_NULL) || BRVLAN_TEST(&req->bv_set, DOT1Q_VID_RSVD_IMPL)) return (EXTERROR(EINVAL, "Invalid VLAN ID in set")); switch (req->bv_op) { /* Replace the existing vlan set with the new set */ case BRDG_VLAN_OP_SET: BIT_COPY(BRVLAN_SETSIZE, &req->bv_set, &bif->bif_vlan_set); break; /* Modify the existing vlan set to add the given vlans */ case BRDG_VLAN_OP_ADD: BIT_OR(BRVLAN_SETSIZE, &bif->bif_vlan_set, &req->bv_set); break; /* Modify the existing vlan set to remove the given vlans */ case BRDG_VLAN_OP_DEL: BIT_ANDNOT(BRVLAN_SETSIZE, &bif->bif_vlan_set, &req->bv_set); break; /* Invalid or unknown operation */ default: return (EXTERROR(EINVAL, "Unsupported BRDGSIFVLANSET operation")); } /* * The only reason to modify the VLAN access list is to use VLAN * filtering on this interface, so enable it automatically. */ bif->bif_flags |= IFBIF_VLANFILTER; return (0); } static int bridge_ioctl_gifvlanset(struct bridge_softc *sc, void *arg) { struct ifbif_vlan_req *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->bv_ifname); if (bif == NULL) return (EXTERROR(ENOENT, "Interface is not a bridge member")); BIT_COPY(BRVLAN_SETSIZE, &bif->bif_vlan_set, &req->bv_set); return (0); } static int bridge_ioctl_addspan(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif = NULL; struct ifnet *ifs; ifs = ifunit(req->ifbr_ifsname); if (ifs == NULL) return (EXTERROR(ENOENT, "No such interface")); CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifs == bif->bif_ifp) return (EXTERROR(EBUSY, "Interface is already a span port")); if (ifs->if_bridge != NULL) return (EXTERROR(EEXIST, "Interface is already a bridge member")); switch (ifs->if_type) { case IFT_ETHER: case IFT_GIF: case IFT_L2VLAN: break; default: return (EXTERROR(EINVAL, "Unsupported interface type")); } bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO); if (bif == NULL) return (ENOMEM); bif->bif_ifp = ifs; bif->bif_flags = IFBIF_SPAN; CK_LIST_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next); return (0); } static int bridge_ioctl_delspan(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; struct ifnet *ifs; ifs = ifunit(req->ifbr_ifsname); if (ifs == NULL) return (EXTERROR(ENOENT, "No such interface")); CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifs == bif->bif_ifp) break; if (bif == NULL) return (EXTERROR(ENOENT, "Interface is not a span port")); bridge_delete_span(sc, bif); return (0); } static int bridge_ioctl_gbparam(struct bridge_softc *sc, void *arg) { struct ifbropreq *req = arg; struct bstp_state *bs = &sc->sc_stp; struct bstp_port *root_port; req->ifbop_maxage = bs->bs_bridge_max_age >> 8; req->ifbop_hellotime = bs->bs_bridge_htime >> 8; req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8; root_port = bs->bs_root_port; if (root_port == NULL) req->ifbop_root_port = 0; else req->ifbop_root_port = root_port->bp_ifp->if_index; req->ifbop_holdcount = bs->bs_txholdcount; req->ifbop_priority = bs->bs_bridge_priority; req->ifbop_protocol = bs->bs_protover; req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost; req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id; req->ifbop_designated_root = bs->bs_root_pv.pv_root_id; req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id; req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec; req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec; return (0); } static int bridge_ioctl_grte(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; param->ifbrp_cexceeded = sc->sc_brtexceeded; return (0); } static int bridge_ioctl_gifsstp(struct bridge_softc *sc, void *arg) { struct ifbpstpconf *bifstp = arg; struct bridge_iflist *bif; struct bstp_port *bp; struct ifbpstpreq bpreq; char *buf, *outbuf; int count, buflen, len, error = 0; count = 0; CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if ((bif->bif_flags & IFBIF_STP) != 0) count++; } buflen = sizeof(bpreq) * count; if (bifstp->ifbpstp_len == 0) { bifstp->ifbpstp_len = buflen; return (0); } outbuf = malloc(buflen, M_TEMP, M_NOWAIT | M_ZERO); if (outbuf == NULL) return (ENOMEM); count = 0; buf = outbuf; len = min(bifstp->ifbpstp_len, buflen); bzero(&bpreq, sizeof(bpreq)); CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (len < sizeof(bpreq)) break; if ((bif->bif_flags & IFBIF_STP) == 0) continue; bp = &bif->bif_stp; bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff; bpreq.ifbp_fwd_trans = bp->bp_forward_transitions; bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost; bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id; bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id; memcpy(buf, &bpreq, sizeof(bpreq)); count++; buf += sizeof(bpreq); len -= sizeof(bpreq); } bifstp->ifbpstp_len = sizeof(bpreq) * count; error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); free(outbuf, M_TEMP); return (error); } static int bridge_ioctl_sproto(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto)); } static int bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg) { struct ifbrparam *param = arg; return (bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc)); } /* * bridge_ifdetach: * * Detach an interface from a bridge. Called when a member * interface is detaching. */ static void bridge_ifdetach(void *arg __unused, struct ifnet *ifp) { struct bridge_iflist *bif = ifp->if_bridge; struct bridge_softc *sc = NULL; if (bif) sc = bif->bif_sc; if (ifp->if_flags & IFF_RENAMING) return; if (V_bridge_cloner == NULL) { /* * This detach handler can be called after * vnet_bridge_uninit(). Just return in that case. */ return; } /* Check if the interface is a bridge member */ if (sc != NULL) { BRIDGE_LOCK(sc); bridge_delete_member(sc, bif, 1); BRIDGE_UNLOCK(sc); return; } /* Check if the interface is a span port */ BRIDGE_LIST_LOCK(); LIST_FOREACH(sc, &V_bridge_list, sc_list) { BRIDGE_LOCK(sc); CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifp == bif->bif_ifp) { bridge_delete_span(sc, bif); break; } BRIDGE_UNLOCK(sc); } BRIDGE_LIST_UNLOCK(); } /* * bridge_init: * * Initialize a bridge interface. */ static void bridge_init(void *xsc) { struct bridge_softc *sc = (struct bridge_softc *)xsc; struct ifnet *ifp = sc->sc_ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; BRIDGE_LOCK(sc); callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz, bridge_timer, sc); ifp->if_drv_flags |= IFF_DRV_RUNNING; bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */ BRIDGE_UNLOCK(sc); } /* * bridge_stop: * * Stop the bridge interface. */ static void bridge_stop(struct ifnet *ifp, int disable) { struct bridge_softc *sc = ifp->if_softc; BRIDGE_LOCK_ASSERT(sc); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; BRIDGE_RT_LOCK(sc); callout_stop(&sc->sc_brcallout); bstp_stop(&sc->sc_stp); bridge_rtflush(sc, IFBF_FLUSHDYN); BRIDGE_RT_UNLOCK(sc); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; } /* * bridge_enqueue: * * Enqueue a packet on a bridge member interface. * */ static int bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m, struct bridge_iflist *bif) { int len, err = 0; short mflags; struct mbuf *m0; /* * Find the bridge member port this packet is being sent on, if the * caller didn't already provide it. */ if (bif == NULL) bif = bridge_lookup_member_if(sc, dst_ifp); if (bif == NULL) { /* Perhaps the interface was removed from the bridge */ m_freem(m); return (EINVAL); } /* We may be sending a fragment so traverse the mbuf */ for (; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = NULL; len = m->m_pkthdr.len; mflags = m->m_flags; /* * If VLAN filtering is enabled, and the native VLAN ID of the * outgoing interface matches the VLAN ID of the frame, remove * the VLAN header. */ if ((bif->bif_flags & IFBIF_VLANFILTER) && bif->bif_pvid != DOT1Q_VID_NULL && VLANTAGOF(m) == bif->bif_pvid) { m->m_flags &= ~M_VLANTAG; m->m_pkthdr.ether_vtag = 0; } /* * If underlying interface can not do VLAN tag insertion itself * then attach a packet tag that holds it. */ if ((m->m_flags & M_VLANTAG) && (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) { m = ether_vlanencap(m, m->m_pkthdr.ether_vtag); if (m == NULL) { if_printf(dst_ifp, "unable to prepend VLAN header\n"); if_inc_counter(dst_ifp, IFCOUNTER_OERRORS, 1); continue; } m->m_flags &= ~M_VLANTAG; } M_ASSERTPKTHDR(m); /* We shouldn't transmit mbuf without pkthdr */ if ((err = dst_ifp->if_transmit(dst_ifp, m))) { int n; for (m = m0, n = 1; m != NULL; m = m0, n++) { m0 = m->m_nextpkt; m_freem(m); } if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, n); break; } if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, len); if (mflags & M_MCAST) if_inc_counter(sc->sc_ifp, IFCOUNTER_OMCASTS, 1); } return (err); } /* * bridge_dummynet: * * Receive a queued packet from dummynet and pass it on to the output * interface. * * The mbuf has the Ethernet header already attached. */ static void bridge_dummynet(struct mbuf *m, struct ifnet *ifp) { struct bridge_iflist *bif = ifp->if_bridge; struct bridge_softc *sc = NULL; if (bif) sc = bif->bif_sc; /* * The packet didnt originate from a member interface. This should only * ever happen if a member interface is removed while packets are * queued for it. */ if (sc == NULL) { m_freem(m); return; } if (PFIL_HOOKED_OUT_46) { if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) return; if (m == NULL) return; } bridge_enqueue(sc, ifp, m, NULL); } /* * bridge_output: * * Send output from a bridge member interface. This * performs the bridging function for locally originated * packets. * * The mbuf has the Ethernet header already attached. We must * enqueue or free the mbuf before returning. */ static int bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, struct rtentry *rt) { struct ether_header *eh; struct bridge_iflist *sbif; struct ifnet *bifp, *dst_if; struct bridge_softc *sc; ether_vlanid_t vlan; NET_EPOCH_ASSERT(); if (m->m_len < ETHER_HDR_LEN) { m = m_pullup(m, ETHER_HDR_LEN); if (m == NULL) return (0); } sbif = ifp->if_bridge; sc = sbif->bif_sc; bifp = sc->sc_ifp; eh = mtod(m, struct ether_header *); vlan = VLANTAGOF(m); /* * If bridge is down, but the original output interface is up, * go ahead and send out that interface. Otherwise, the packet * is dropped below. */ if ((bifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { dst_if = ifp; goto sendunicast; } /* * If the packet is a multicast, or we don't know a better way to * get there, send to all interfaces. */ if (ETHER_IS_MULTICAST(eh->ether_dhost)) dst_if = NULL; else dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan); /* Tap any traffic not passing back out the originating interface */ if (dst_if != ifp) ETHER_BPF_MTAP(bifp, m); if (dst_if == NULL) { struct bridge_iflist *bif; struct mbuf *mc; int used = 0; bridge_span(sc, m); CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { dst_if = bif->bif_ifp; if (dst_if->if_type == IFT_GIF) continue; if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) continue; /* * If this is not the original output interface, * and the interface is participating in spanning * tree, make sure the port is in a state that * allows forwarding. */ if (dst_if != ifp && (bif->bif_flags & IFBIF_STP) && bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) continue; if (CK_LIST_NEXT(bif, bif_next) == NULL) { used = 1; mc = m; } else { mc = m_dup(m, M_NOWAIT); if (mc == NULL) { if_inc_counter(bifp, IFCOUNTER_OERRORS, 1); continue; } } bridge_enqueue(sc, dst_if, mc, bif); } if (used == 0) m_freem(m); return (0); } sendunicast: /* * XXX Spanning tree consideration here? */ bridge_span(sc, m); if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) { m_freem(m); return (0); } bridge_enqueue(sc, dst_if, m, NULL); return (0); } /* * bridge_transmit: * * Do output on a bridge. * */ static int bridge_transmit(struct ifnet *ifp, struct mbuf *m) { struct bridge_softc *sc; struct ether_header *eh; struct ifnet *dst_if; int error = 0; ether_vlanid_t vlan; sc = ifp->if_softc; ETHER_BPF_MTAP(ifp, m); eh = mtod(m, struct ether_header *); vlan = VLANTAGOF(m); if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) && (dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan)) != NULL) { error = bridge_enqueue(sc, dst_if, m, NULL); } else bridge_broadcast(sc, ifp, m, 0); return (error); } #ifdef ALTQ static void bridge_altq_start(if_t ifp) { struct ifaltq *ifq = &ifp->if_snd; struct mbuf *m; IFQ_LOCK(ifq); IFQ_DEQUEUE_NOLOCK(ifq, m); while (m != NULL) { bridge_transmit(ifp, m); IFQ_DEQUEUE_NOLOCK(ifq, m); } IFQ_UNLOCK(ifq); } static int bridge_altq_transmit(if_t ifp, struct mbuf *m) { int err; if (ALTQ_IS_ENABLED(&ifp->if_snd)) { IFQ_ENQUEUE(&ifp->if_snd, m, err); if (err == 0) bridge_altq_start(ifp); } else err = bridge_transmit(ifp, m); return (err); } #endif /* ALTQ */ /* * The ifp->if_qflush entry point for if_bridge(4) is no-op. */ static void bridge_qflush(struct ifnet *ifp __unused) { } /* * bridge_forward: * * The forwarding function of the bridge. * * NOTE: Releases the lock on return. */ static void bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, struct mbuf *m) { struct bridge_iflist *dbif; struct ifnet *src_if, *dst_if, *ifp; struct ether_header *eh; uint8_t *dst; int error; ether_vlanid_t vlan; NET_EPOCH_ASSERT(); src_if = m->m_pkthdr.rcvif; ifp = sc->sc_ifp; vlan = VLANTAGOF(m); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); if ((sbif->bif_flags & IFBIF_STP) && sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) goto drop; eh = mtod(m, struct ether_header *); dst = eh->ether_dhost; /* If the interface is learning, record the address. */ if (sbif->bif_flags & IFBIF_LEARNING) { error = bridge_rtupdate(sc, eh->ether_shost, vlan, sbif, 0, IFBAF_DYNAMIC); /* * If the interface has addresses limits then deny any source * that is not in the cache. */ if (error && sbif->bif_addrmax) goto drop; } if ((sbif->bif_flags & IFBIF_STP) != 0 && sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) goto drop; #ifdef DEV_NETMAP /* * Hand the packet to netmap only if it wasn't injected by netmap * itself. */ if ((m->m_flags & M_BRIDGE_INJECT) == 0 && (if_getcapenable(ifp) & IFCAP_NETMAP) != 0) { ifp->if_input(ifp, m); return; } m->m_flags &= ~M_BRIDGE_INJECT; #endif /* * At this point, the port either doesn't participate * in spanning tree or it is in the forwarding state. */ /* * If the packet is unicast, destined for someone on * "this" side of the bridge, drop it. */ if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) { dst_if = bridge_rtlookup(sc, dst, vlan); if (src_if == dst_if) goto drop; } else { /* * Check if its a reserved multicast address, any address * listed in 802.1D section 7.12.6 may not be forwarded by the * bridge. * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F */ if (dst[0] == 0x01 && dst[1] == 0x80 && dst[2] == 0xc2 && dst[3] == 0x00 && dst[4] == 0x00 && dst[5] <= 0x0f) goto drop; /* ...forward it to all interfaces. */ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); dst_if = NULL; } /* * If we have a destination interface which is a member of our bridge, * OR this is a unicast packet, push it through the bpf(4) machinery. * For broadcast or multicast packets, don't bother because it will * be reinjected into ether_input. We do this before we pass the packets * through the pfil(9) framework, as it is possible that pfil(9) will * drop the packet, or possibly modify it, making it difficult to debug * firewall issues on the bridge. */ if (dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) ETHER_BPF_MTAP(ifp, m); /* run the packet filter */ if (PFIL_HOOKED_IN_46) { if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0) return; if (m == NULL) return; } if (dst_if == NULL) { bridge_broadcast(sc, src_if, m, 1); return; } /* * At this point, we're dealing with a unicast frame * going to a different interface. */ if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) goto drop; dbif = bridge_lookup_member_if(sc, dst_if); if (dbif == NULL) /* Not a member of the bridge (anymore?) */ goto drop; /* Private segments can not talk to each other */ if (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE) goto drop; /* Do VLAN filtering. */ if (!bridge_vfilter_out(dbif, m)) goto drop; if ((dbif->bif_flags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) goto drop; if (PFIL_HOOKED_OUT_46) { if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0) return; if (m == NULL) return; } bridge_enqueue(sc, dst_if, m, dbif); return; drop: m_freem(m); } /* * bridge_input: * * Receive input from a member interface. Queue the packet for * bridging if it is not for us. */ static struct mbuf * bridge_input(struct ifnet *ifp, struct mbuf *m) { struct bridge_softc *sc = NULL; struct bridge_iflist *bif, *bif2; struct ifnet *bifp; struct ether_header *eh; struct mbuf *mc, *mc2; ether_vlanid_t vlan; int error; NET_EPOCH_ASSERT(); eh = mtod(m, struct ether_header *); vlan = VLANTAGOF(m); bif = ifp->if_bridge; if (bif) sc = bif->bif_sc; if (sc == NULL) { /* * This packet originated from the bridge itself, so it must * have been transmitted by netmap. Derive the "source" * interface from the source address and drop the packet if the * source address isn't known. */ KASSERT((m->m_flags & M_BRIDGE_INJECT) != 0, ("%s: ifnet %p missing a bridge softc", __func__, ifp)); sc = if_getsoftc(ifp); ifp = bridge_rtlookup(sc, eh->ether_shost, vlan); if (ifp == NULL) { if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return (NULL); } m->m_pkthdr.rcvif = ifp; } bifp = sc->sc_ifp; if ((bifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return (m); /* * Implement support for bridge monitoring. If this flag has been * set on this interface, discard the packet once we push it through * the bpf(4) machinery, but before we do, increment the byte and * packet counters associated with this interface. */ if ((bifp->if_flags & IFF_MONITOR) != 0) { m->m_pkthdr.rcvif = bifp; ETHER_BPF_MTAP(bifp, m); if_inc_counter(bifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); m_freem(m); return (NULL); } /* Do VLAN filtering. */ if (!bridge_vfilter_in(bif, m)) { if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return (NULL); } /* bridge_vfilter_in() may add a tag */ vlan = VLANTAGOF(m); bridge_span(sc, m); if (m->m_flags & (M_BCAST|M_MCAST)) { /* Tap off 802.1D packets; they do not get forwarded. */ if (memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) { bstp_input(&bif->bif_stp, ifp, m); /* consumes mbuf */ return (NULL); } if ((bif->bif_flags & IFBIF_STP) && bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) { return (m); } /* * Make a deep copy of the packet and enqueue the copy * for bridge processing; return the original packet for * local processing. */ mc = m_dup(m, M_NOWAIT); if (mc == NULL) { return (m); } /* Perform the bridge forwarding function with the copy. */ bridge_forward(sc, bif, mc); #ifdef DEV_NETMAP /* * If netmap is enabled and has not already seen this packet, * then it will be consumed by bridge_forward(). */ if ((if_getcapenable(bifp) & IFCAP_NETMAP) != 0 && (m->m_flags & M_BRIDGE_INJECT) == 0) { m_freem(m); return (NULL); } #endif /* * Reinject the mbuf as arriving on the bridge so we have a * chance at claiming multicast packets. We can not loop back * here from ether_input as a bridge is never a member of a * bridge. */ KASSERT(bifp->if_bridge == NULL, ("loop created in bridge_input")); mc2 = m_dup(m, M_NOWAIT); if (mc2 != NULL) { /* Keep the layer3 header aligned */ int i = min(mc2->m_pkthdr.len, max_protohdr); mc2 = m_copyup(mc2, i, ETHER_ALIGN); } if (mc2 != NULL) { mc2->m_pkthdr.rcvif = bifp; mc2->m_flags &= ~M_BRIDGE_INJECT; sc->sc_if_input(bifp, mc2); } /* Return the original packet for local processing. */ return (m); } if ((bif->bif_flags & IFBIF_STP) && bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) { return (m); } #if defined(INET) || defined(INET6) #define CARP_CHECK_WE_ARE_DST(iface) \ ((iface)->if_carp && (*carp_forus_p)((iface), eh->ether_dhost)) #define CARP_CHECK_WE_ARE_SRC(iface) \ ((iface)->if_carp && (*carp_forus_p)((iface), eh->ether_shost)) #else #define CARP_CHECK_WE_ARE_DST(iface) false #define CARP_CHECK_WE_ARE_SRC(iface) false #endif #ifdef DEV_NETMAP #define GRAB_FOR_NETMAP(ifp, m) do { \ if ((if_getcapenable(ifp) & IFCAP_NETMAP) != 0 && \ ((m)->m_flags & M_BRIDGE_INJECT) == 0) { \ (ifp)->if_input(ifp, m); \ return (NULL); \ } \ } while (0) #else #define GRAB_FOR_NETMAP(ifp, m) #endif #define GRAB_OUR_PACKETS(iface) \ if ((iface)->if_type == IFT_GIF) \ continue; \ /* It is destined for us. */ \ if (memcmp(IF_LLADDR(iface), eh->ether_dhost, ETHER_ADDR_LEN) == 0 || \ CARP_CHECK_WE_ARE_DST(iface)) { \ if (bif->bif_flags & IFBIF_LEARNING) { \ error = bridge_rtupdate(sc, eh->ether_shost, \ vlan, bif, 0, IFBAF_DYNAMIC); \ if (error && bif->bif_addrmax) { \ m_freem(m); \ return (NULL); \ } \ } \ m->m_pkthdr.rcvif = iface; \ if ((iface) == ifp) { \ /* Skip bridge processing... src == dest */ \ return (m); \ } \ /* It's passing over or to the bridge, locally. */ \ ETHER_BPF_MTAP(bifp, m); \ if_inc_counter(bifp, IFCOUNTER_IPACKETS, 1); \ if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);\ /* Hand the packet over to netmap if necessary. */ \ GRAB_FOR_NETMAP(bifp, m); \ /* Filter on the physical interface. */ \ if (V_pfil_local_phys && PFIL_HOOKED_IN_46) { \ if (bridge_pfil(&m, NULL, ifp, \ PFIL_IN) != 0 || m == NULL) { \ return (NULL); \ } \ } \ if ((iface) != bifp) \ ETHER_BPF_MTAP(iface, m); \ /* Pass tagged packets to if_vlan, if it's loaded */ \ if (VLANTAGOF(m) != 0) { \ if (bifp->if_vlantrunk == NULL) { \ m_freem(m); \ return (NULL); \ } \ (*vlan_input_p)(bifp, m); \ return (NULL); \ } \ return (m); \ } \ \ /* We just received a packet that we sent out. */ \ if (memcmp(IF_LLADDR(iface), eh->ether_shost, ETHER_ADDR_LEN) == 0 || \ CARP_CHECK_WE_ARE_SRC(iface)) { \ m_freem(m); \ return (NULL); \ } /* * Unicast. Make sure it's not for the bridge. */ do { GRAB_OUR_PACKETS(bifp) } while (0); /* * Check the interface the packet arrived on. For tagged frames, * we need to do this even if member_ifaddrs is disabled because * vlan(4) might need to handle the traffic. */ if (V_member_ifaddrs || (vlan && ifp->if_vlantrunk)) do { GRAB_OUR_PACKETS(ifp) } while (0); /* * We only need to check other members interface if member_ifaddrs * is enabled; otherwise we should have never traffic destined for * a member's lladdr. */ if (V_member_ifaddrs) { CK_LIST_FOREACH(bif2, &sc->sc_iflist, bif_next) { GRAB_OUR_PACKETS(bif2->bif_ifp) } } #undef CARP_CHECK_WE_ARE_DST #undef CARP_CHECK_WE_ARE_SRC #undef GRAB_FOR_NETMAP #undef GRAB_OUR_PACKETS /* Perform the bridge forwarding function. */ bridge_forward(sc, bif, m); return (NULL); } /* * Inject a packet back into the host ethernet stack. This will generally only * be used by netmap when an application writes to the host TX ring. The * M_BRIDGE_INJECT flag ensures that the packet is re-routed to the bridge * interface after ethernet processing. */ static void bridge_inject(struct ifnet *ifp, struct mbuf *m) { struct bridge_softc *sc; if (ifp->if_type == IFT_L2VLAN) { /* * vlan(4) gives us the vlan ifnet, so we need to get the * bridge softc to get a pointer to ether_input to send the * packet to. */ struct ifnet *bifp = NULL; if (vlan_trunkdev_p == NULL) { m_freem(m); return; } bifp = vlan_trunkdev_p(ifp); if (bifp == NULL) { m_freem(m); return; } sc = if_getsoftc(bifp); sc->sc_if_input(ifp, m); return; } KASSERT((if_getcapenable(ifp) & IFCAP_NETMAP) != 0, ("%s: iface %s is not running in netmap mode", __func__, if_name(ifp))); KASSERT((m->m_flags & M_BRIDGE_INJECT) == 0, ("%s: mbuf %p has M_BRIDGE_INJECT set", __func__, m)); m->m_flags |= M_BRIDGE_INJECT; sc = if_getsoftc(ifp); sc->sc_if_input(ifp, m); } /* * bridge_broadcast: * * Send a frame to all interfaces that are members of * the bridge, except for the one on which the packet * arrived. * * NOTE: Releases the lock on return. */ static void bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, struct mbuf *m, int runfilt) { struct bridge_iflist *dbif, *sbif; struct mbuf *mc; struct ifnet *dst_if; int used = 0, i; NET_EPOCH_ASSERT(); sbif = bridge_lookup_member_if(sc, src_if); /* Filter on the bridge interface before broadcasting */ if (runfilt && PFIL_HOOKED_OUT_46) { if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0) return; if (m == NULL) return; } CK_LIST_FOREACH(dbif, &sc->sc_iflist, bif_next) { dst_if = dbif->bif_ifp; if (dst_if == src_if) continue; /* Private segments can not talk to each other */ if (sbif && (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE)) continue; /* Do VLAN filtering. */ if (!bridge_vfilter_out(dbif, m)) continue; if ((dbif->bif_flags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) continue; if ((dbif->bif_flags & IFBIF_DISCOVER) == 0 && (m->m_flags & (M_BCAST|M_MCAST)) == 0) continue; if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) continue; if (CK_LIST_NEXT(dbif, bif_next) == NULL) { mc = m; used = 1; } else { mc = m_dup(m, M_NOWAIT); if (mc == NULL) { if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); continue; } } /* * Filter on the output interface. Pass a NULL bridge interface * pointer so we do not redundantly filter on the bridge for * each interface we broadcast on. */ if (runfilt && PFIL_HOOKED_OUT_46) { if (used == 0) { /* Keep the layer3 header aligned */ i = min(mc->m_pkthdr.len, max_protohdr); mc = m_copyup(mc, i, ETHER_ALIGN); if (mc == NULL) { if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); continue; } } if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0) continue; if (mc == NULL) continue; } bridge_enqueue(sc, dst_if, mc, dbif); } if (used == 0) m_freem(m); } /* * bridge_span: * * Duplicate a packet out one or more interfaces that are in span mode, * the original mbuf is unmodified. */ static void bridge_span(struct bridge_softc *sc, struct mbuf *m) { struct bridge_iflist *bif; struct ifnet *dst_if; struct mbuf *mc; NET_EPOCH_ASSERT(); if (CK_LIST_EMPTY(&sc->sc_spanlist)) return; CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) { dst_if = bif->bif_ifp; if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) continue; mc = m_dup(m, M_NOWAIT); if (mc == NULL) { if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); continue; } bridge_enqueue(sc, dst_if, mc, bif); } } /* * Incoming VLAN filtering. Given a frame and the member interface it was * received on, decide whether the port configuration allows it. */ static bool bridge_vfilter_in(const struct bridge_iflist *sbif, struct mbuf *m) { ether_vlanid_t vlan; vlan = VLANTAGOF(m); /* Make sure the vlan id is reasonable. */ if (vlan > DOT1Q_VID_MAX) return (false); /* If VLAN filtering isn't enabled, pass everything. */ if ((sbif->bif_flags & IFBIF_VLANFILTER) == 0) return (true); if (vlan == DOT1Q_VID_NULL) { /* * The frame doesn't have a tag. If the interface does not * have an untagged vlan configured, drop the frame. */ if (sbif->bif_pvid == DOT1Q_VID_NULL) return (false); /* * Otherwise, insert a new tag based on the interface's * untagged vlan id. */ m->m_pkthdr.ether_vtag = sbif->bif_pvid; m->m_flags |= M_VLANTAG; } else { /* * The frame has a tag, so check it matches the interface's * vlan access list. We explicitly do not accept tagged * frames for the untagged vlan id here (unless it's also * in the access list). */ if (!BRVLAN_TEST(&sbif->bif_vlan_set, vlan)) return (false); } /* Accept the frame. */ return (true); } /* * Outgoing VLAN filtering. Given a frame, its vlan, and the member interface * we intend to send it to, decide whether the port configuration allows it to * be sent. */ static bool bridge_vfilter_out(const struct bridge_iflist *dbif, const struct mbuf *m) { struct ether_header *eh; ether_vlanid_t vlan; NET_EPOCH_ASSERT(); /* If VLAN filtering isn't enabled, pass everything. */ if ((dbif->bif_flags & IFBIF_VLANFILTER) == 0) return (true); vlan = VLANTAGOF(m); /* * Always allow untagged 802.1D STP frames, even if they would * otherwise be dropped. This is required for STP to work on * a filtering bridge. * * Tagged STP (Cisco PVST+) is a non-standard extension, so * handle those frames via the normal filtering path. */ eh = mtod(m, struct ether_header *); if (vlan == DOT1Q_VID_NULL && memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) return (true); /* * If the frame wasn't assigned to a vlan at ingress, drop it. * We can't forward these frames to filtering ports because we * don't know what VLAN they're supposed to be in. */ if (vlan == DOT1Q_VID_NULL) return (false); /* * If the frame's vlan matches the interfaces's untagged vlan, * allow it. */ if (vlan == dbif->bif_pvid) return (true); /* * If the frame's vlan is on the interface's tagged access list, * allow it. */ if (BRVLAN_TEST(&dbif->bif_vlan_set, vlan)) return (true); /* The frame was not permitted, so drop it. */ return (false); } /* * bridge_rtupdate: * * Add a bridge routing entry. */ static int bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, ether_vlanid_t vlan, struct bridge_iflist *bif, int setflags, uint8_t flags) { struct bridge_rtnode *brt; struct bridge_iflist *obif; int error; BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc); /* Check the source address is valid and not multicast. */ if (ETHER_IS_MULTICAST(dst)) return (EXTERROR(EINVAL, "Multicast address not permitted")); if (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 && dst[3] == 0 && dst[4] == 0 && dst[5] == 0) return (EXTERROR(EINVAL, "Zero address not permitted")); /* * A route for this destination might already exist. If so, * update it, otherwise create a new one. */ if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) { BRIDGE_RT_LOCK(sc); /* Check again, now that we have the lock. There could have * been a race and we only want to insert this once. */ if (bridge_rtnode_lookup(sc, dst, vlan) != NULL) { BRIDGE_RT_UNLOCK(sc); return (0); } if (sc->sc_brtcnt >= sc->sc_brtmax) { sc->sc_brtexceeded++; BRIDGE_RT_UNLOCK(sc); return (EXTERROR(ENOSPC, "Address table is full")); } /* Check per interface address limits (if enabled) */ if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) { bif->bif_addrexceeded++; BRIDGE_RT_UNLOCK(sc); return (EXTERROR(ENOSPC, "Interface address limit exceeded")); } /* * Allocate a new bridge forwarding node, and * initialize the expiration time and Ethernet * address. */ brt = uma_zalloc(V_bridge_rtnode_zone, M_NOWAIT | M_ZERO); if (brt == NULL) { BRIDGE_RT_UNLOCK(sc); return (EXTERROR(ENOMEM, "Cannot allocate address node")); } brt->brt_vnet = curvnet; if (bif->bif_flags & IFBIF_STICKY) brt->brt_flags = IFBAF_STICKY; else brt->brt_flags = IFBAF_DYNAMIC; memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN); brt->brt_vlan = vlan; brt->brt_dst = bif; if ((error = bridge_rtnode_insert(sc, brt)) != 0) { uma_zfree(V_bridge_rtnode_zone, brt); BRIDGE_RT_UNLOCK(sc); return (error); } bif->bif_addrcnt++; BRIDGE_RT_UNLOCK(sc); } if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC && (obif = brt->brt_dst) != bif) { MPASS(obif != NULL); BRIDGE_RT_LOCK(sc); brt->brt_dst->bif_addrcnt--; brt->brt_dst = bif; brt->brt_dst->bif_addrcnt++; BRIDGE_RT_UNLOCK(sc); if (V_log_mac_flap && ppsratecheck(&V_log_last, &V_log_count, V_log_interval)) { log(LOG_NOTICE, "%s: mac address %6D vlan %d moved from %s to %s\n", sc->sc_ifp->if_xname, &brt->brt_addr[0], ":", brt->brt_vlan, obif->bif_ifp->if_xname, bif->bif_ifp->if_xname); } } if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) brt->brt_expire = time_uptime + sc->sc_brttimeout; if (setflags) brt->brt_flags = flags; return (0); } /* * bridge_rtlookup: * * Lookup the destination interface for an address. */ static struct ifnet * bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, ether_vlanid_t vlan) { struct bridge_rtnode *brt; NET_EPOCH_ASSERT(); if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) return (NULL); return (brt->brt_ifp); } /* * bridge_rttrim: * * Trim the routine table so that we have a number * of routing entries less than or equal to the * maximum number. */ static void bridge_rttrim(struct bridge_softc *sc) { struct bridge_rtnode *brt, *nbrt; NET_EPOCH_ASSERT(); BRIDGE_RT_LOCK_ASSERT(sc); /* Make sure we actually need to do this. */ if (sc->sc_brtcnt <= sc->sc_brtmax) return; /* Force an aging cycle; this might trim enough addresses. */ bridge_rtage(sc); if (sc->sc_brtcnt <= sc->sc_brtmax) return; CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { bridge_rtnode_destroy(sc, brt); if (sc->sc_brtcnt <= sc->sc_brtmax) return; } } } /* * bridge_timer: * * Aging timer for the bridge. */ static void bridge_timer(void *arg) { struct bridge_softc *sc = arg; BRIDGE_RT_LOCK_ASSERT(sc); /* Destruction of rtnodes requires a proper vnet context */ CURVNET_SET(sc->sc_ifp->if_vnet); bridge_rtage(sc); if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz, bridge_timer, sc); CURVNET_RESTORE(); } /* * bridge_rtage: * * Perform an aging cycle. */ static void bridge_rtage(struct bridge_softc *sc) { struct bridge_rtnode *brt, *nbrt; BRIDGE_RT_LOCK_ASSERT(sc); CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { if (time_uptime >= brt->brt_expire) bridge_rtnode_destroy(sc, brt); } } } /* * bridge_rtflush: * * Remove all dynamic addresses from the bridge. */ static void bridge_rtflush(struct bridge_softc *sc, int full) { struct bridge_rtnode *brt, *nbrt; BRIDGE_RT_LOCK_ASSERT(sc); CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) bridge_rtnode_destroy(sc, brt); } } /* * bridge_rtdaddr: * * Remove an address from the table. */ static int bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, ether_vlanid_t vlan) { struct bridge_rtnode *brt; int found = 0; BRIDGE_RT_LOCK(sc); /* * If vlan is DOT1Q_VID_RSVD_IMPL then we want to delete for all vlans * so the lookup may return more than one. */ while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) { bridge_rtnode_destroy(sc, brt); found = 1; } BRIDGE_RT_UNLOCK(sc); return (found ? 0 : ENOENT); } /* * bridge_rtdelete: * * Delete routes to a speicifc member interface. */ static void bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full) { struct bridge_rtnode *brt, *nbrt; BRIDGE_RT_LOCK_ASSERT(sc); CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if (brt->brt_ifp == ifp && (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) bridge_rtnode_destroy(sc, brt); } } /* * bridge_rtable_init: * * Initialize the route table for this bridge. */ static void bridge_rtable_init(struct bridge_softc *sc) { int i; sc->sc_rthash = malloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE, M_DEVBUF, M_WAITOK); for (i = 0; i < BRIDGE_RTHASH_SIZE; i++) CK_LIST_INIT(&sc->sc_rthash[i]); sc->sc_rthash_key = arc4random(); CK_LIST_INIT(&sc->sc_rtlist); } /* * bridge_rtable_fini: * * Deconstruct the route table for this bridge. */ static void bridge_rtable_fini(struct bridge_softc *sc) { KASSERT(sc->sc_brtcnt == 0, ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt)); free(sc->sc_rthash, M_DEVBUF); } /* * The following hash function is adapted from "Hash Functions" by Bob Jenkins * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). */ #define mix(a, b, c) \ do { \ a -= b; a -= c; a ^= (c >> 13); \ b -= c; b -= a; b ^= (a << 8); \ c -= a; c -= b; c ^= (b >> 13); \ a -= b; a -= c; a ^= (c >> 12); \ b -= c; b -= a; b ^= (a << 16); \ c -= a; c -= b; c ^= (b >> 5); \ a -= b; a -= c; a ^= (c >> 3); \ b -= c; b -= a; b ^= (a << 10); \ c -= a; c -= b; c ^= (b >> 15); \ } while (/*CONSTCOND*/0) static __inline uint32_t bridge_rthash(struct bridge_softc *sc, const uint8_t *addr) { uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key; b += addr[5] << 8; b += addr[4]; a += addr[3] << 24; a += addr[2] << 16; a += addr[1] << 8; a += addr[0]; mix(a, b, c); return (c & BRIDGE_RTHASH_MASK); } #undef mix static int bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b) { int i, d; for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) { d = ((int)a[i]) - ((int)b[i]); } return (d); } /* * bridge_rtnode_lookup: * * Look up a bridge route node for the specified destination. Compare the * vlan id or if zero then just return the first match. */ static struct bridge_rtnode * bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr, ether_vlanid_t vlan) { struct bridge_rtnode *brt; uint32_t hash; int dir; BRIDGE_RT_LOCK_OR_NET_EPOCH_ASSERT(sc); hash = bridge_rthash(sc, addr); CK_LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) { dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr); if (dir == 0 && (brt->brt_vlan == vlan || vlan == DOT1Q_VID_RSVD_IMPL)) return (brt); if (dir > 0) return (NULL); } return (NULL); } /* * bridge_rtnode_insert: * * Insert the specified bridge node into the route table. We * assume the entry is not already in the table. */ static int bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt) { struct bridge_rtnode *lbrt; uint32_t hash; int dir; BRIDGE_RT_LOCK_ASSERT(sc); hash = bridge_rthash(sc, brt->brt_addr); lbrt = CK_LIST_FIRST(&sc->sc_rthash[hash]); if (lbrt == NULL) { CK_LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash); goto out; } do { dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr); if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) return (EXTERROR(EEXIST, "Address already exists")); if (dir > 0) { CK_LIST_INSERT_BEFORE(lbrt, brt, brt_hash); goto out; } if (CK_LIST_NEXT(lbrt, brt_hash) == NULL) { CK_LIST_INSERT_AFTER(lbrt, brt, brt_hash); goto out; } lbrt = CK_LIST_NEXT(lbrt, brt_hash); } while (lbrt != NULL); #ifdef DIAGNOSTIC panic("bridge_rtnode_insert: impossible"); #endif out: CK_LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list); sc->sc_brtcnt++; return (0); } static void bridge_rtnode_destroy_cb(struct epoch_context *ctx) { struct bridge_rtnode *brt; brt = __containerof(ctx, struct bridge_rtnode, brt_epoch_ctx); CURVNET_SET(brt->brt_vnet); uma_zfree(V_bridge_rtnode_zone, brt); CURVNET_RESTORE(); } /* * bridge_rtnode_destroy: * * Destroy a bridge rtnode. */ static void bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt) { BRIDGE_RT_LOCK_ASSERT(sc); CK_LIST_REMOVE(brt, brt_hash); CK_LIST_REMOVE(brt, brt_list); sc->sc_brtcnt--; brt->brt_dst->bif_addrcnt--; NET_EPOCH_CALL(bridge_rtnode_destroy_cb, &brt->brt_epoch_ctx); } /* * bridge_rtable_expire: * * Set the expiry time for all routes on an interface. */ static void bridge_rtable_expire(struct ifnet *ifp, int age) { struct bridge_iflist *bif = NULL; struct bridge_softc *sc = NULL; struct bridge_rtnode *brt; CURVNET_SET(ifp->if_vnet); bif = ifp->if_bridge; if (bif) sc = bif->bif_sc; MPASS(sc != NULL); BRIDGE_RT_LOCK(sc); /* * If the age is zero then flush, otherwise set all the expiry times to * age for the interface */ if (age == 0) bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN); else { CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { /* Cap the expiry time to 'age' */ if (brt->brt_ifp == ifp && brt->brt_expire > time_uptime + age && (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) brt->brt_expire = time_uptime + age; } } BRIDGE_RT_UNLOCK(sc); CURVNET_RESTORE(); } /* * bridge_state_change: * * Callback from the bridgestp code when a port changes states. */ static void bridge_state_change(struct ifnet *ifp, int state) { struct bridge_iflist *bif = ifp->if_bridge; struct bridge_softc *sc = bif->bif_sc; static const char *stpstates[] = { "disabled", "listening", "learning", "forwarding", "blocking", "discarding" }; CURVNET_SET(ifp->if_vnet); if (V_log_stp) log(LOG_NOTICE, "%s: state changed to %s on %s\n", sc->sc_ifp->if_xname, stpstates[state], ifp->if_xname); CURVNET_RESTORE(); } /* * Send bridge packets through pfil if they are one of the types pfil can deal * with, or if they are ARP or REVARP. (pfil will pass ARP and REVARP without * question.) If *bifp or *ifp are NULL then packet filtering is skipped for * that interface. */ static int bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) { int snap, error, i; struct ether_header *eh1, eh2; struct llc llc1; u_int16_t ether_type; pfil_return_t rv; #ifdef INET struct ip *ip = NULL; int hlen = 0; #endif snap = 0; error = -1; /* Default error if not error == 0 */ #if 0 /* we may return with the IP fields swapped, ensure its not shared */ KASSERT(M_WRITABLE(*mp), ("%s: modifying a shared mbuf", __func__)); #endif if (V_pfil_bridge == 0 && V_pfil_member == 0 && V_pfil_ipfw == 0) return (0); /* filtering is disabled */ i = min((*mp)->m_pkthdr.len, max_protohdr); if ((*mp)->m_len < i) { *mp = m_pullup(*mp, i); if (*mp == NULL) { printf("%s: m_pullup failed\n", __func__); return (-1); } } eh1 = mtod(*mp, struct ether_header *); ether_type = ntohs(eh1->ether_type); /* * Check for SNAP/LLC. */ if (ether_type < ETHERMTU) { struct llc *llc2 = (struct llc *)(eh1 + 1); if ((*mp)->m_len >= ETHER_HDR_LEN + 8 && llc2->llc_dsap == LLC_SNAP_LSAP && llc2->llc_ssap == LLC_SNAP_LSAP && llc2->llc_control == LLC_UI) { ether_type = htons(llc2->llc_un.type_snap.ether_type); snap = 1; } } /* * If we're trying to filter bridge traffic, only look at traffic for * protocols available in the kernel (IPv4 and/or IPv6) to avoid * passing traffic for an unsupported protocol to the filter. This is * lame since if we really wanted, say, an AppleTalk filter, we are * hosed, but of course we don't have an AppleTalk filter to begin * with. (Note that since pfil doesn't understand ARP it will pass * *ALL* ARP traffic.) */ switch (ether_type) { #ifdef INET case ETHERTYPE_ARP: case ETHERTYPE_REVARP: if (V_pfil_ipfw_arp == 0) return (0); /* Automatically pass */ /* FALLTHROUGH */ case ETHERTYPE_IP: #endif #ifdef INET6 case ETHERTYPE_IPV6: #endif /* INET6 */ break; default: /* * We get here if the packet isn't from a supported * protocol. Check to see if the user wants to pass * non-IP packets, these will not be checked by pfil(9) * and passed unconditionally so the default is to * drop. */ if (V_pfil_onlyip) goto bad; } /* Run the packet through pfil before stripping link headers */ if (PFIL_HOOKED_OUT(V_link_pfil_head) && V_pfil_ipfw != 0 && dir == PFIL_OUT && ifp != NULL) { switch (pfil_mbuf_out(V_link_pfil_head, mp, ifp, NULL)) { case PFIL_DROPPED: return (EACCES); case PFIL_CONSUMED: return (0); } } /* Strip off the Ethernet header and keep a copy. */ m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2); m_adj(*mp, ETHER_HDR_LEN); /* Strip off snap header, if present */ if (snap) { m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1); m_adj(*mp, sizeof(struct llc)); } /* * Check the IP header for alignment and errors */ if (dir == PFIL_IN) { switch (ether_type) { #ifdef INET case ETHERTYPE_IP: error = bridge_ip_checkbasic(mp); break; #endif #ifdef INET6 case ETHERTYPE_IPV6: error = bridge_ip6_checkbasic(mp); break; #endif /* INET6 */ default: error = 0; } if (error) goto bad; } error = 0; /* * Run the packet through pfil */ rv = PFIL_PASS; switch (ether_type) { #ifdef INET case ETHERTYPE_IP: /* * Run pfil on the member interface and the bridge, both can * be skipped by clearing pfil_member or pfil_bridge. * * Keep the order: * in_if -> bridge_if -> out_if */ if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv = pfil_mbuf_out(V_inet_pfil_head, mp, bifp, NULL)) != PFIL_PASS) break; if (V_pfil_member && ifp != NULL) { rv = (dir == PFIL_OUT) ? pfil_mbuf_out(V_inet_pfil_head, mp, ifp, NULL) : pfil_mbuf_in(V_inet_pfil_head, mp, ifp, NULL); if (rv != PFIL_PASS) break; } if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv = pfil_mbuf_in(V_inet_pfil_head, mp, bifp, NULL)) != PFIL_PASS) break; /* check if we need to fragment the packet */ /* bridge_fragment generates a mbuf chain of packets */ /* that already include eth headers */ if (V_pfil_member && ifp != NULL && dir == PFIL_OUT) { i = (*mp)->m_pkthdr.len; if (i > ifp->if_mtu) { error = bridge_fragment(ifp, mp, &eh2, snap, &llc1); return (error); } } /* Recalculate the ip checksum. */ ip = mtod(*mp, struct ip *); hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) goto bad; if (hlen > (*mp)->m_len) { if ((*mp = m_pullup(*mp, hlen)) == NULL) goto bad; ip = mtod(*mp, struct ip *); if (ip == NULL) goto bad; } ip->ip_sum = 0; if (hlen == sizeof(struct ip)) ip->ip_sum = in_cksum_hdr(ip); else ip->ip_sum = in_cksum(*mp, hlen); break; #endif /* INET */ #ifdef INET6 case ETHERTYPE_IPV6: if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv = pfil_mbuf_out(V_inet6_pfil_head, mp, bifp, NULL)) != PFIL_PASS) break; if (V_pfil_member && ifp != NULL) { rv = (dir == PFIL_OUT) ? pfil_mbuf_out(V_inet6_pfil_head, mp, ifp, NULL) : pfil_mbuf_in(V_inet6_pfil_head, mp, ifp, NULL); if (rv != PFIL_PASS) break; } if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv = pfil_mbuf_in(V_inet6_pfil_head, mp, bifp, NULL)) != PFIL_PASS) break; break; #endif } switch (rv) { case PFIL_CONSUMED: return (0); case PFIL_DROPPED: return (EACCES); default: break; } error = -1; /* * Finally, put everything back the way it was and return */ if (snap) { M_PREPEND(*mp, sizeof(struct llc), M_NOWAIT); if (*mp == NULL) return (error); bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc)); } M_PREPEND(*mp, ETHER_HDR_LEN, M_NOWAIT); if (*mp == NULL) return (error); bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN); return (0); bad: m_freem(*mp); *mp = NULL; return (error); } #ifdef INET /* * Perform basic checks on header size since * pfil assumes ip_input has already processed * it for it. Cut-and-pasted from ip_input.c. * Given how simple the IPv6 version is, * does the IPv4 version really need to be * this complicated? * * XXX Should we update ipstat here, or not? * XXX Right now we update ipstat but not * XXX csum_counter. */ static int bridge_ip_checkbasic(struct mbuf **mp) { struct mbuf *m = *mp; struct ip *ip; int len, hlen; u_short sum; if (*mp == NULL) return (-1); if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) { if ((m = m_copyup(m, sizeof(struct ip), (max_linkhdr + 3) & ~3)) == NULL) { /* XXXJRT new stat, please */ KMOD_IPSTAT_INC(ips_toosmall); goto bad; } } else if (__predict_false(m->m_len < sizeof (struct ip))) { if ((m = m_pullup(m, sizeof (struct ip))) == NULL) { KMOD_IPSTAT_INC(ips_toosmall); goto bad; } } ip = mtod(m, struct ip *); if (ip == NULL) goto bad; if (ip->ip_v != IPVERSION) { KMOD_IPSTAT_INC(ips_badvers); goto bad; } hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) { /* minimum header length */ KMOD_IPSTAT_INC(ips_badhlen); goto bad; } if (hlen > m->m_len) { if ((m = m_pullup(m, hlen)) == NULL) { KMOD_IPSTAT_INC(ips_badhlen); goto bad; } ip = mtod(m, struct ip *); if (ip == NULL) goto bad; } if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); } else { if (hlen == sizeof(struct ip)) { sum = in_cksum_hdr(ip); } else { sum = in_cksum(m, hlen); } } if (sum) { KMOD_IPSTAT_INC(ips_badsum); goto bad; } /* Retrieve the packet length. */ len = ntohs(ip->ip_len); /* * Check for additional length bogosity */ if (len < hlen) { KMOD_IPSTAT_INC(ips_badlen); goto bad; } /* * Check that the amount of data in the buffers * is as at least much as the IP header would have us expect. * Drop packet if shorter than we expect. */ if (m->m_pkthdr.len < len) { KMOD_IPSTAT_INC(ips_tooshort); goto bad; } /* Checks out, proceed */ *mp = m; return (0); bad: *mp = m; return (-1); } #endif /* INET */ #ifdef INET6 /* * Same as above, but for IPv6. * Cut-and-pasted from ip6_input.c. * XXX Should we update ip6stat, or not? */ static int bridge_ip6_checkbasic(struct mbuf **mp) { struct mbuf *m = *mp; struct ip6_hdr *ip6; /* * If the IPv6 header is not aligned, slurp it up into a new * mbuf with space for link headers, in the event we forward * it. Otherwise, if it is aligned, make sure the entire base * IPv6 header is in the first mbuf of the chain. */ if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) { struct ifnet *inifp = m->m_pkthdr.rcvif; if ((m = m_copyup(m, sizeof(struct ip6_hdr), (max_linkhdr + 3) & ~3)) == NULL) { /* XXXJRT new stat, please */ IP6STAT_INC(ip6s_toosmall); in6_ifstat_inc(inifp, ifs6_in_hdrerr); goto bad; } } else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) { struct ifnet *inifp = m->m_pkthdr.rcvif; if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) { IP6STAT_INC(ip6s_toosmall); in6_ifstat_inc(inifp, ifs6_in_hdrerr); goto bad; } } ip6 = mtod(m, struct ip6_hdr *); if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { IP6STAT_INC(ip6s_badvers); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); goto bad; } /* Checks out, proceed */ *mp = m; return (0); bad: *mp = m; return (-1); } #endif /* INET6 */ #ifdef INET /* * bridge_fragment: * * Fragment mbuf chain in multiple packets and prepend ethernet header. */ static int bridge_fragment(struct ifnet *ifp, struct mbuf **mp, struct ether_header *eh, int snap, struct llc *llc) { struct mbuf *m = *mp, *nextpkt = NULL, *mprev = NULL, *mcur = NULL; struct ip *ip; int error = -1; if (m->m_len < sizeof(struct ip) && (m = m_pullup(m, sizeof(struct ip))) == NULL) goto dropit; ip = mtod(m, struct ip *); m->m_pkthdr.csum_flags |= CSUM_IP; error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist); if (error) goto dropit; /* * Walk the chain and re-add the Ethernet header for * each mbuf packet. */ for (mcur = m; mcur; mcur = mcur->m_nextpkt) { nextpkt = mcur->m_nextpkt; mcur->m_nextpkt = NULL; if (snap) { M_PREPEND(mcur, sizeof(struct llc), M_NOWAIT); if (mcur == NULL) { error = ENOBUFS; if (mprev != NULL) mprev->m_nextpkt = nextpkt; goto dropit; } bcopy(llc, mtod(mcur, caddr_t),sizeof(struct llc)); } M_PREPEND(mcur, ETHER_HDR_LEN, M_NOWAIT); if (mcur == NULL) { error = ENOBUFS; if (mprev != NULL) mprev->m_nextpkt = nextpkt; goto dropit; } bcopy(eh, mtod(mcur, caddr_t), ETHER_HDR_LEN); /* * The previous two M_PREPEND could have inserted one or two * mbufs in front so we have to update the previous packet's * m_nextpkt. */ mcur->m_nextpkt = nextpkt; if (mprev != NULL) mprev->m_nextpkt = mcur; else { /* The first mbuf in the original chain needs to be * updated. */ *mp = mcur; } mprev = mcur; } KMOD_IPSTAT_INC(ips_fragmented); return (error); dropit: for (mcur = *mp; mcur; mcur = m) { /* droping the full packet chain */ m = mcur->m_nextpkt; m_freem(mcur); } return (error); } #endif /* INET */ static void bridge_linkstate(struct ifnet *ifp) { struct bridge_softc *sc = NULL; struct bridge_iflist *bif; struct epoch_tracker et; NET_EPOCH_ENTER(et); bif = ifp->if_bridge; if (bif) sc = bif->bif_sc; if (sc != NULL) { bridge_linkcheck(sc); bstp_linkstate(&bif->bif_stp); } NET_EPOCH_EXIT(et); } static void bridge_linkcheck(struct bridge_softc *sc) { struct bridge_iflist *bif; int new_link, hasls; BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc); new_link = LINK_STATE_DOWN; hasls = 0; /* Our link is considered up if at least one of our ports is active */ CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (bif->bif_ifp->if_capabilities & IFCAP_LINKSTATE) hasls++; if (bif->bif_ifp->if_link_state == LINK_STATE_UP) { new_link = LINK_STATE_UP; break; } } if (!CK_LIST_EMPTY(&sc->sc_iflist) && !hasls) { /* If no interfaces support link-state then we default to up */ new_link = LINK_STATE_UP; } if_link_state_change(sc->sc_ifp, new_link); } diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c index 22fcb7bf7c64..61000018e5a4 100644 --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -1,2590 +1,2602 @@ /*- * Copyright 1998 Massachusetts Institute of Technology * Copyright 2012 ADARA Networks, Inc. * Copyright 2017 Dell EMC Isilon * * Portions of this software were developed by Robert N. M. Watson under * contract to ADARA Networks, Inc. * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs. * This is sort of sneaky in the implementation, since * we need to pretend to be enough of an Ethernet implementation * to make arp work. The way we do this is by telling everyone * that we are an Ethernet, and then catch the packets that * ether_output() sends to us via if_transmit(), rewrite them for * use by the real outgoing interface, and ask it to send them. */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_kern_tls.h" #include "opt_vlan.h" #include "opt_ratelimit.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif #include #include #include #include #define VLAN_DEF_HWIDTH 4 #define VLAN_IFFLAGS (IFF_BROADCAST | IFF_MULTICAST) #define UP_AND_RUNNING(ifp) \ ((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING) CK_SLIST_HEAD(ifvlanhead, ifvlan); struct ifvlantrunk { struct ifnet *parent; /* parent interface of this trunk */ struct mtx lock; #ifdef VLAN_ARRAY #define VLAN_ARRAY_SIZE (EVL_VLID_MASK + 1) struct ifvlan *vlans[VLAN_ARRAY_SIZE]; /* static table */ #else struct ifvlanhead *hash; /* dynamic hash-list table */ uint16_t hmask; uint16_t hwidth; #endif int refcnt; }; #if defined(KERN_TLS) || defined(RATELIMIT) struct vlan_snd_tag { struct m_snd_tag com; struct m_snd_tag *tag; }; static inline struct vlan_snd_tag * mst_to_vst(struct m_snd_tag *mst) { return (__containerof(mst, struct vlan_snd_tag, com)); } #endif /* * This macro provides a facility to iterate over every vlan on a trunk with * the assumption that none will be added/removed during iteration. */ #ifdef VLAN_ARRAY #define VLAN_FOREACH(_ifv, _trunk) \ size_t _i; \ for (_i = 0; _i < VLAN_ARRAY_SIZE; _i++) \ if (((_ifv) = (_trunk)->vlans[_i]) != NULL) #else /* VLAN_ARRAY */ #define VLAN_FOREACH(_ifv, _trunk) \ struct ifvlan *_next; \ size_t _i; \ for (_i = 0; _i < (1 << (_trunk)->hwidth); _i++) \ CK_SLIST_FOREACH_SAFE((_ifv), &(_trunk)->hash[_i], ifv_list, _next) #endif /* VLAN_ARRAY */ /* * This macro provides a facility to iterate over every vlan on a trunk while * also modifying the number of vlans on the trunk. The iteration continues * until some condition is met or there are no more vlans on the trunk. */ #ifdef VLAN_ARRAY /* The VLAN_ARRAY case is simple -- just a for loop using the condition. */ #define VLAN_FOREACH_UNTIL_SAFE(_ifv, _trunk, _cond) \ size_t _i; \ for (_i = 0; !(_cond) && _i < VLAN_ARRAY_SIZE; _i++) \ if (((_ifv) = (_trunk)->vlans[_i])) #else /* VLAN_ARRAY */ /* * The hash table case is more complicated. We allow for the hash table to be * modified (i.e. vlans removed) while we are iterating over it. To allow for * this we must restart the iteration every time we "touch" something during * the iteration, since removal will resize the hash table and invalidate our * current position. If acting on the touched element causes the trunk to be * emptied, then iteration also stops. */ #define VLAN_FOREACH_UNTIL_SAFE(_ifv, _trunk, _cond) \ size_t _i; \ bool _touch = false; \ for (_i = 0; \ !(_cond) && _i < (1 << (_trunk)->hwidth); \ _i = (_touch && ((_trunk) != NULL) ? 0 : _i + 1), _touch = false) \ if (((_ifv) = CK_SLIST_FIRST(&(_trunk)->hash[_i])) != NULL && \ (_touch = true)) #endif /* VLAN_ARRAY */ struct vlan_mc_entry { struct sockaddr_dl mc_addr; CK_SLIST_ENTRY(vlan_mc_entry) mc_entries; struct epoch_context mc_epoch_ctx; }; struct ifvlan { struct ifvlantrunk *ifv_trunk; struct ifnet *ifv_ifp; #define TRUNK(ifv) ((ifv)->ifv_trunk) #define PARENT(ifv) (TRUNK(ifv)->parent) void *ifv_cookie; int ifv_pflags; /* special flags we have set on parent */ int ifv_capenable; int ifv_capenable2; int ifv_encaplen; /* encapsulation length */ int ifv_mtufudge; /* MTU fudged by this much */ int ifv_mintu; /* min transmission unit */ struct ether_8021q_tag ifv_qtag; #define ifv_proto ifv_qtag.proto #define ifv_vid ifv_qtag.vid #define ifv_pcp ifv_qtag.pcp struct task lladdr_task; CK_SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead; #ifndef VLAN_ARRAY CK_SLIST_ENTRY(ifvlan) ifv_list; #endif }; /* Special flags we should propagate to parent. */ static struct { int flag; int (*func)(struct ifnet *, int); } vlan_pflags[] = { {IFF_PROMISC, ifpromisc}, {IFF_ALLMULTI, if_allmulti}, {0, NULL} }; VNET_DECLARE(int, vlan_mtag_pcp); #define V_vlan_mtag_pcp VNET(vlan_mtag_pcp) static const char vlanname[] = "vlan"; static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface"); static eventhandler_tag ifdetach_tag; static eventhandler_tag iflladdr_tag; static eventhandler_tag ifevent_tag; /* * if_vlan uses two module-level synchronizations primitives to allow concurrent * modification of vlan interfaces and (mostly) allow for vlans to be destroyed * while they are being used for tx/rx. To accomplish this in a way that has * acceptable performance and cooperation with other parts of the network stack * there is a non-sleepable epoch(9) and an sx(9). * * The performance-sensitive paths that warrant using the epoch(9) are * vlan_transmit and vlan_input. Both have to check for the vlan interface's * existence using if_vlantrunk, and being in the network tx/rx paths the use * of an epoch(9) gives a measureable improvement in performance. * * The reason for having an sx(9) is mostly because there are still areas that * must be sleepable and also have safe concurrent access to a vlan interface. * Since the sx(9) exists, it is used by default in most paths unless sleeping * is not permitted, or if it is not clear whether sleeping is permitted. * */ #define _VLAN_SX_ID ifv_sx static struct sx _VLAN_SX_ID; #define VLAN_LOCKING_INIT() \ sx_init_flags(&_VLAN_SX_ID, "vlan_sx", SX_RECURSE) #define VLAN_LOCKING_DESTROY() \ sx_destroy(&_VLAN_SX_ID) #define VLAN_SLOCK() sx_slock(&_VLAN_SX_ID) #define VLAN_SUNLOCK() sx_sunlock(&_VLAN_SX_ID) #define VLAN_XLOCK() sx_xlock(&_VLAN_SX_ID) #define VLAN_XUNLOCK() sx_xunlock(&_VLAN_SX_ID) #define VLAN_SLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_SLOCKED) #define VLAN_XLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_XLOCKED) #define VLAN_SXLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_LOCKED) /* * We also have a per-trunk mutex that should be acquired when changing * its state. */ #define TRUNK_LOCK_INIT(trunk) mtx_init(&(trunk)->lock, vlanname, NULL, MTX_DEF) #define TRUNK_LOCK_DESTROY(trunk) mtx_destroy(&(trunk)->lock) #define TRUNK_WLOCK(trunk) mtx_lock(&(trunk)->lock) #define TRUNK_WUNLOCK(trunk) mtx_unlock(&(trunk)->lock) #define TRUNK_WLOCK_ASSERT(trunk) mtx_assert(&(trunk)->lock, MA_OWNED); /* * The VLAN_ARRAY substitutes the dynamic hash with a static array * with 4096 entries. In theory this can give a boost in processing, * however in practice it does not. Probably this is because the array * is too big to fit into CPU cache. */ #ifndef VLAN_ARRAY static void vlan_inithash(struct ifvlantrunk *trunk); static void vlan_freehash(struct ifvlantrunk *trunk); static int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv); static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv); static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch); static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid); #endif static void trunk_destroy(struct ifvlantrunk *trunk); static void vlan_init(void *foo); static void vlan_input(struct ifnet *ifp, struct mbuf *m); static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr); #if defined(KERN_TLS) || defined(RATELIMIT) static int vlan_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); static int vlan_snd_tag_modify(struct m_snd_tag *, union if_snd_tag_modify_params *); static int vlan_snd_tag_query(struct m_snd_tag *, union if_snd_tag_query_params *); static void vlan_snd_tag_free(struct m_snd_tag *); static struct m_snd_tag *vlan_next_snd_tag(struct m_snd_tag *); static void vlan_ratelimit_query(struct ifnet *, struct if_ratelimit_query_results *); #endif static void vlan_qflush(struct ifnet *ifp); static int vlan_setflag(struct ifnet *ifp, int flag, int status, int (*func)(struct ifnet *, int)); static int vlan_setflags(struct ifnet *ifp, int status); static int vlan_setmulti(struct ifnet *ifp); static int vlan_transmit(struct ifnet *ifp, struct mbuf *m); #ifdef ALTQ static void vlan_altq_start(struct ifnet *ifp); static int vlan_altq_transmit(struct ifnet *ifp, struct mbuf *m); #endif static int vlan_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro); static void vlan_unconfig(struct ifnet *ifp); static void vlan_unconfig_locked(struct ifnet *ifp, int departing); static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag, uint16_t proto); static void vlan_link_state(struct ifnet *ifp); static void vlan_capabilities(struct ifvlan *ifv); static void vlan_trunk_capabilities(struct ifnet *ifp); static struct ifnet *vlan_clone_match_ethervid(const char *, int *); static int vlan_clone_match(struct if_clone *, const char *); static int vlan_clone_create(struct if_clone *, char *, size_t, struct ifc_data *, struct ifnet **); static int vlan_clone_destroy(struct if_clone *, struct ifnet *, uint32_t); static int vlan_clone_create_nl(struct if_clone *ifc, char *name, size_t len, struct ifc_data_nl *ifd); static int vlan_clone_modify_nl(struct ifnet *ifp, struct ifc_data_nl *ifd); static void vlan_clone_dump_nl(struct ifnet *ifp, struct nl_writer *nw); static void vlan_ifdetach(void *arg, struct ifnet *ifp); static void vlan_iflladdr(void *arg, struct ifnet *ifp); static void vlan_ifevent(void *arg, struct ifnet *ifp, int event); static void vlan_lladdr_fn(void *arg, int pending); static struct if_clone *vlan_cloner; #ifdef VIMAGE VNET_DEFINE_STATIC(struct if_clone *, vlan_cloner); #define V_vlan_cloner VNET(vlan_cloner) #endif #ifdef RATELIMIT static const struct if_snd_tag_sw vlan_snd_tag_ul_sw = { .snd_tag_modify = vlan_snd_tag_modify, .snd_tag_query = vlan_snd_tag_query, .snd_tag_free = vlan_snd_tag_free, .next_snd_tag = vlan_next_snd_tag, .type = IF_SND_TAG_TYPE_UNLIMITED }; static const struct if_snd_tag_sw vlan_snd_tag_rl_sw = { .snd_tag_modify = vlan_snd_tag_modify, .snd_tag_query = vlan_snd_tag_query, .snd_tag_free = vlan_snd_tag_free, .next_snd_tag = vlan_next_snd_tag, .type = IF_SND_TAG_TYPE_RATE_LIMIT }; #endif #ifdef KERN_TLS static const struct if_snd_tag_sw vlan_snd_tag_tls_sw = { .snd_tag_modify = vlan_snd_tag_modify, .snd_tag_query = vlan_snd_tag_query, .snd_tag_free = vlan_snd_tag_free, .next_snd_tag = vlan_next_snd_tag, .type = IF_SND_TAG_TYPE_TLS }; #ifdef RATELIMIT static const struct if_snd_tag_sw vlan_snd_tag_tls_rl_sw = { .snd_tag_modify = vlan_snd_tag_modify, .snd_tag_query = vlan_snd_tag_query, .snd_tag_free = vlan_snd_tag_free, .next_snd_tag = vlan_next_snd_tag, .type = IF_SND_TAG_TYPE_TLS_RATE_LIMIT }; #endif #endif static void vlan_mc_free(struct epoch_context *ctx) { struct vlan_mc_entry *mc = __containerof(ctx, struct vlan_mc_entry, mc_epoch_ctx); free(mc, M_VLAN); } #ifndef VLAN_ARRAY #define HASH(n, m) ((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m)) static void vlan_inithash(struct ifvlantrunk *trunk) { int i, n; /* * The trunk must not be locked here since we call malloc(M_WAITOK). * It is OK in case this function is called before the trunk struct * gets hooked up and becomes visible from other threads. */ KASSERT(trunk->hwidth == 0 && trunk->hash == NULL, ("%s: hash already initialized", __func__)); trunk->hwidth = VLAN_DEF_HWIDTH; n = 1 << trunk->hwidth; trunk->hmask = n - 1; trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK); for (i = 0; i < n; i++) CK_SLIST_INIT(&trunk->hash[i]); } static void vlan_freehash(struct ifvlantrunk *trunk) { #ifdef INVARIANTS int i; KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); for (i = 0; i < (1 << trunk->hwidth); i++) KASSERT(CK_SLIST_EMPTY(&trunk->hash[i]), ("%s: hash table not empty", __func__)); #endif free(trunk->hash, M_VLAN); trunk->hash = NULL; trunk->hwidth = trunk->hmask = 0; } static int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { int i, b; struct ifvlan *ifv2; VLAN_XLOCK_ASSERT(); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); b = 1 << trunk->hwidth; i = HASH(ifv->ifv_vid, trunk->hmask); CK_SLIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) if (ifv->ifv_vid == ifv2->ifv_vid) return (EEXIST); /* * Grow the hash when the number of vlans exceeds half of the number of * hash buckets squared. This will make the average linked-list length * buckets/2. */ if (trunk->refcnt > (b * b) / 2) { vlan_growhash(trunk, 1); i = HASH(ifv->ifv_vid, trunk->hmask); } CK_SLIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list); trunk->refcnt++; return (0); } static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { int i, b; struct ifvlan *ifv2; VLAN_XLOCK_ASSERT(); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); b = 1 << (trunk->hwidth - 1); i = HASH(ifv->ifv_vid, trunk->hmask); CK_SLIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) if (ifv2 == ifv) { trunk->refcnt--; CK_SLIST_REMOVE(&trunk->hash[i], ifv2, ifvlan, ifv_list); if (trunk->refcnt < (b * b) / 2) vlan_growhash(trunk, -1); return (0); } panic("%s: vlan not found\n", __func__); return (ENOENT); /*NOTREACHED*/ } /* * Grow the hash larger or smaller if memory permits. */ static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch) { struct ifvlan *ifv; struct ifvlanhead *hash2; int hwidth2, i, j, n, n2; VLAN_XLOCK_ASSERT(); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); if (howmuch == 0) { /* Harmless yet obvious coding error */ printf("%s: howmuch is 0\n", __func__); return; } hwidth2 = trunk->hwidth + howmuch; n = 1 << trunk->hwidth; n2 = 1 << hwidth2; /* Do not shrink the table below the default */ if (hwidth2 < VLAN_DEF_HWIDTH) return; hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_WAITOK); for (j = 0; j < n2; j++) CK_SLIST_INIT(&hash2[j]); for (i = 0; i < n; i++) while ((ifv = CK_SLIST_FIRST(&trunk->hash[i])) != NULL) { CK_SLIST_REMOVE(&trunk->hash[i], ifv, ifvlan, ifv_list); j = HASH(ifv->ifv_vid, n2 - 1); CK_SLIST_INSERT_HEAD(&hash2[j], ifv, ifv_list); } NET_EPOCH_WAIT(); free(trunk->hash, M_VLAN); trunk->hash = hash2; trunk->hwidth = hwidth2; trunk->hmask = n2 - 1; if (bootverbose) if_printf(trunk->parent, "VLAN hash table resized from %d to %d buckets\n", n, n2); } static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid) { struct ifvlan *ifv; NET_EPOCH_ASSERT(); CK_SLIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list) if (ifv->ifv_vid == vid) return (ifv); return (NULL); } #if 0 /* Debugging code to view the hashtables. */ static void vlan_dumphash(struct ifvlantrunk *trunk) { int i; struct ifvlan *ifv; for (i = 0; i < (1 << trunk->hwidth); i++) { printf("%d: ", i); CK_SLIST_FOREACH(ifv, &trunk->hash[i], ifv_list) printf("%s ", ifv->ifv_ifp->if_xname); printf("\n"); } } #endif /* 0 */ #else static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid) { return trunk->vlans[vid]; } static __inline int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { if (trunk->vlans[ifv->ifv_vid] != NULL) return EEXIST; trunk->vlans[ifv->ifv_vid] = ifv; trunk->refcnt++; return (0); } static __inline int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { trunk->vlans[ifv->ifv_vid] = NULL; trunk->refcnt--; return (0); } static __inline void vlan_freehash(struct ifvlantrunk *trunk) { } static __inline void vlan_inithash(struct ifvlantrunk *trunk) { } #endif /* !VLAN_ARRAY */ static void trunk_destroy(struct ifvlantrunk *trunk) { VLAN_XLOCK_ASSERT(); vlan_freehash(trunk); trunk->parent->if_vlantrunk = NULL; TRUNK_LOCK_DESTROY(trunk); if_rele(trunk->parent); free(trunk, M_VLAN); } /* * Program our multicast filter. What we're actually doing is * programming the multicast filter of the parent. This has the * side effect of causing the parent interface to receive multicast * traffic that it doesn't really want, which ends up being discarded * later by the upper protocol layers. Unfortunately, there's no way * to avoid this: there really is only one physical interface. */ static int vlan_setmulti(struct ifnet *ifp) { struct ifnet *ifp_p; struct ifmultiaddr *ifma; struct ifvlan *sc; struct vlan_mc_entry *mc; int error; VLAN_XLOCK_ASSERT(); /* Find the parent. */ sc = ifp->if_softc; ifp_p = PARENT(sc); CURVNET_SET_QUIET(ifp_p->if_vnet); /* First, remove any existing filter entries. */ while ((mc = CK_SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) { CK_SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries); (void)if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr); NET_EPOCH_CALL(vlan_mc_free, &mc->mc_epoch_ctx); } /* Now program new ones. */ IF_ADDR_WLOCK(ifp); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT); if (mc == NULL) { IF_ADDR_WUNLOCK(ifp); CURVNET_RESTORE(); return (ENOMEM); } bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len); mc->mc_addr.sdl_index = ifp_p->if_index; CK_SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries); } IF_ADDR_WUNLOCK(ifp); CK_SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) { error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr, NULL); if (error) { CURVNET_RESTORE(); return (error); } } CURVNET_RESTORE(); return (0); } /* * A handler for interface ifnet events. */ static void vlan_ifevent(void *arg __unused, struct ifnet *ifp, int event) { struct epoch_tracker et; struct ifvlan *ifv; struct ifvlantrunk *trunk; if (event != IFNET_EVENT_UPDATE_BAUDRATE) return; NET_EPOCH_ENTER(et); trunk = ifp->if_vlantrunk; if (trunk == NULL) { NET_EPOCH_EXIT(et); return; } TRUNK_WLOCK(trunk); VLAN_FOREACH(ifv, trunk) { ifv->ifv_ifp->if_baudrate = ifp->if_baudrate; } TRUNK_WUNLOCK(trunk); NET_EPOCH_EXIT(et); } /* * A handler for parent interface link layer address changes. * If the parent interface link layer address is changed we * should also change it on all children vlans. */ static void vlan_iflladdr(void *arg __unused, struct ifnet *ifp) { struct epoch_tracker et; struct ifvlan *ifv; struct ifnet *ifv_ifp; struct ifvlantrunk *trunk; struct sockaddr_dl *sdl; /* Need the epoch since this is run on taskqueue_swi. */ NET_EPOCH_ENTER(et); trunk = ifp->if_vlantrunk; if (trunk == NULL) { NET_EPOCH_EXIT(et); return; } /* * OK, it's a trunk. Loop over and change all vlan's lladdrs on it. * We need an exclusive lock here to prevent concurrent SIOCSIFLLADDR * ioctl calls on the parent garbling the lladdr of the child vlan. */ TRUNK_WLOCK(trunk); VLAN_FOREACH(ifv, trunk) { /* * Copy new new lladdr into the ifv_ifp, enqueue a task * to actually call if_setlladdr. if_setlladdr needs to * be deferred to a taskqueue because it will call into * the if_vlan ioctl path and try to acquire the global * lock. */ ifv_ifp = ifv->ifv_ifp; bcopy(IF_LLADDR(ifp), IF_LLADDR(ifv_ifp), ifp->if_addrlen); sdl = (struct sockaddr_dl *)ifv_ifp->if_addr->ifa_addr; sdl->sdl_alen = ifp->if_addrlen; taskqueue_enqueue(taskqueue_thread, &ifv->lladdr_task); } TRUNK_WUNLOCK(trunk); NET_EPOCH_EXIT(et); } /* * A handler for network interface departure events. * Track departure of trunks here so that we don't access invalid * pointers or whatever if a trunk is ripped from under us, e.g., * by ejecting its hot-plug card. However, if an ifnet is simply * being renamed, then there's no need to tear down the state. */ static void vlan_ifdetach(void *arg __unused, struct ifnet *ifp) { struct ifvlan *ifv; struct ifvlantrunk *trunk; /* If the ifnet is just being renamed, don't do anything. */ if (ifp->if_flags & IFF_RENAMING) return; VLAN_XLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { VLAN_XUNLOCK(); return; } /* * OK, it's a trunk. Loop over and detach all vlan's on it. * Check trunk pointer after each vlan_unconfig() as it will * free it and set to NULL after the last vlan was detached. */ VLAN_FOREACH_UNTIL_SAFE(ifv, ifp->if_vlantrunk, ifp->if_vlantrunk == NULL) vlan_unconfig_locked(ifv->ifv_ifp, 1); /* Trunk should have been destroyed in vlan_unconfig(). */ KASSERT(ifp->if_vlantrunk == NULL, ("%s: purge failed", __func__)); VLAN_XUNLOCK(); } /* * Return the trunk device for a virtual interface. */ static struct ifnet * vlan_trunkdev(struct ifnet *ifp) { struct ifvlan *ifv; NET_EPOCH_ASSERT(); if (ifp->if_type != IFT_L2VLAN) return (NULL); ifv = ifp->if_softc; ifp = NULL; if (ifv->ifv_trunk) ifp = PARENT(ifv); return (ifp); } /* * Return the 12-bit VLAN VID for this interface, for use by external * components such as Infiniband. * * XXXRW: Note that the function name here is historical; it should be named * vlan_vid(). */ static int vlan_tag(struct ifnet *ifp, uint16_t *vidp) { struct ifvlan *ifv; if (ifp->if_type != IFT_L2VLAN) return (EINVAL); ifv = ifp->if_softc; *vidp = ifv->ifv_vid; return (0); } static int vlan_pcp(struct ifnet *ifp, uint16_t *pcpp) { struct ifvlan *ifv; if (ifp->if_type != IFT_L2VLAN) return (EINVAL); ifv = ifp->if_softc; *pcpp = ifv->ifv_pcp; return (0); } /* * Return a driver specific cookie for this interface. Synchronization * with setcookie must be provided by the driver. */ static void * vlan_cookie(struct ifnet *ifp) { struct ifvlan *ifv; if (ifp->if_type != IFT_L2VLAN) return (NULL); ifv = ifp->if_softc; return (ifv->ifv_cookie); } /* * Store a cookie in our softc that drivers can use to store driver * private per-instance data in. */ static int vlan_setcookie(struct ifnet *ifp, void *cookie) { struct ifvlan *ifv; if (ifp->if_type != IFT_L2VLAN) return (EINVAL); ifv = ifp->if_softc; ifv->ifv_cookie = cookie; return (0); } /* * Return the vlan device present at the specific VID. */ static struct ifnet * vlan_devat(struct ifnet *ifp, uint16_t vid) { struct ifvlantrunk *trunk; struct ifvlan *ifv; NET_EPOCH_ASSERT(); trunk = ifp->if_vlantrunk; if (trunk == NULL) return (NULL); ifp = NULL; ifv = vlan_gethash(trunk, vid); if (ifv) ifp = ifv->ifv_ifp; return (ifp); } /* For if_link_state_change() eyes only... */ extern void (*vlan_link_state_p)(struct ifnet *); static struct if_clone_addreq_v2 vlan_addreq = { .version = 2, .match_f = vlan_clone_match, .create_f = vlan_clone_create, .destroy_f = vlan_clone_destroy, .create_nl_f = vlan_clone_create_nl, .modify_nl_f = vlan_clone_modify_nl, .dump_nl_f = vlan_clone_dump_nl, }; static int vlan_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: ifdetach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, vlan_ifdetach, NULL, EVENTHANDLER_PRI_ANY); if (ifdetach_tag == NULL) return (ENOMEM); iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event, vlan_iflladdr, NULL, EVENTHANDLER_PRI_ANY); if (iflladdr_tag == NULL) return (ENOMEM); ifevent_tag = EVENTHANDLER_REGISTER(ifnet_event, vlan_ifevent, NULL, EVENTHANDLER_PRI_ANY); if (ifevent_tag == NULL) return (ENOMEM); VLAN_LOCKING_INIT(); vlan_input_p = vlan_input; vlan_link_state_p = vlan_link_state; vlan_trunk_cap_p = vlan_trunk_capabilities; vlan_trunkdev_p = vlan_trunkdev; vlan_cookie_p = vlan_cookie; vlan_setcookie_p = vlan_setcookie; vlan_tag_p = vlan_tag; vlan_pcp_p = vlan_pcp; vlan_devat_p = vlan_devat; #ifndef VIMAGE vlan_cloner = ifc_attach_cloner(vlanname, (struct if_clone_addreq *)&vlan_addreq); #endif if (bootverbose) printf("vlan: initialized, using " #ifdef VLAN_ARRAY "full-size arrays" #else "hash tables with chaining" #endif "\n"); break; case MOD_UNLOAD: #ifndef VIMAGE ifc_detach_cloner(vlan_cloner); #endif EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag); EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag); EVENTHANDLER_DEREGISTER(ifnet_event, ifevent_tag); vlan_input_p = NULL; vlan_link_state_p = NULL; vlan_trunk_cap_p = NULL; vlan_trunkdev_p = NULL; vlan_tag_p = NULL; vlan_cookie_p = NULL; vlan_setcookie_p = NULL; vlan_devat_p = NULL; VLAN_LOCKING_DESTROY(); if (bootverbose) printf("vlan: unloaded\n"); break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t vlan_mod = { "if_vlan", vlan_modevent, 0 }; DECLARE_MODULE(if_vlan, vlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_vlan, 3); #ifdef VIMAGE static void vnet_vlan_init(const void *unused __unused) { vlan_cloner = ifc_attach_cloner(vlanname, (struct if_clone_addreq *)&vlan_addreq); V_vlan_cloner = vlan_cloner; } VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_vlan_init, NULL); static void vnet_vlan_uninit(const void *unused __unused) { ifc_detach_cloner(V_vlan_cloner); } VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, vnet_vlan_uninit, NULL); #endif /* * Check for .[. ...] style interface names. */ static struct ifnet * vlan_clone_match_ethervid(const char *name, int *vidp) { char ifname[IFNAMSIZ]; char *cp; struct ifnet *ifp; int vid; strlcpy(ifname, name, IFNAMSIZ); if ((cp = strrchr(ifname, '.')) == NULL) return (NULL); *cp = '\0'; if ((ifp = ifunit_ref(ifname)) == NULL) return (NULL); /* Parse VID. */ if (*++cp == '\0') { if_rele(ifp); return (NULL); } vid = 0; for(; *cp >= '0' && *cp <= '9'; cp++) vid = (vid * 10) + (*cp - '0'); if (*cp != '\0') { if_rele(ifp); return (NULL); } if (vidp != NULL) *vidp = vid; return (ifp); } static int vlan_clone_match(struct if_clone *ifc, const char *name) { struct ifnet *ifp; const char *cp; ifp = vlan_clone_match_ethervid(name, NULL); if (ifp != NULL) { if_rele(ifp); return (1); } if (strncmp(vlanname, name, strlen(vlanname)) != 0) return (0); for (cp = name + 4; *cp != '\0'; cp++) { if (*cp < '0' || *cp > '9') return (0); } return (1); } static int vlan_clone_create(struct if_clone *ifc, char *name, size_t len, struct ifc_data *ifd, struct ifnet **ifpp) { char *dp; bool wildcard = false; bool subinterface = false; int unit; int error; int vid = 0; uint16_t proto = ETHERTYPE_VLAN; struct ifvlan *ifv; struct ifnet *ifp; struct ifnet *p = NULL; struct ifaddr *ifa; struct sockaddr_dl *sdl; struct vlanreq vlr; static const u_char eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ /* * There are three ways to specify the cloned device: * o pass a parameter block with the clone request. * o specify parameters in the text of the clone device name * o specify no parameters and get an unattached device that * must be configured separately. * The first technique is preferred; the latter two are supported * for backwards compatibility. * * XXXRW: Note historic use of the word "tag" here. New ioctls may be * called for. */ if (ifd->params != NULL) { error = ifc_copyin(ifd, &vlr, sizeof(vlr)); if (error) return error; vid = vlr.vlr_tag; proto = vlr.vlr_proto; if (proto == 0) proto = ETHERTYPE_VLAN; p = ifunit_ref(vlr.vlr_parent); if (p == NULL) return (ENXIO); } if ((error = ifc_name2unit(name, &unit)) == 0) { /* * vlanX interface. Set wildcard to true if the unit number * is not fixed (-1) */ wildcard = (unit < 0); } else { struct ifnet *p_tmp = vlan_clone_match_ethervid(name, &vid); if (p_tmp != NULL) { error = 0; subinterface = true; unit = IF_DUNIT_NONE; wildcard = false; if (p != NULL) { if_rele(p_tmp); if (p != p_tmp) error = EINVAL; } else p = p_tmp; } else error = ENXIO; } if (error != 0) { if (p != NULL) if_rele(p); return (error); } if (!subinterface) { /* vlanX interface, mark X as busy or allocate new unit # */ error = ifc_alloc_unit(ifc, &unit); if (error != 0) { if (p != NULL) if_rele(p); return (error); } } /* In the wildcard case, we need to update the name. */ if (wildcard) { for (dp = name; *dp != '\0'; dp++); if (snprintf(dp, len - (dp-name), "%d", unit) > len - (dp-name) - 1) { panic("%s: interface name too long", __func__); } } ifv = malloc(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO); ifp = ifv->ifv_ifp = if_alloc(IFT_ETHER); CK_SLIST_INIT(&ifv->vlan_mc_listhead); ifp->if_softc = ifv; /* * Set the name manually rather than using if_initname because * we don't conform to the default naming convention for interfaces. */ strlcpy(ifp->if_xname, name, IFNAMSIZ); ifp->if_dname = vlanname; ifp->if_dunit = unit; ifp->if_init = vlan_init; #ifdef ALTQ ifp->if_start = vlan_altq_start; ifp->if_transmit = vlan_altq_transmit; IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); ifp->if_snd.ifq_drv_maxlen = 0; IFQ_SET_READY(&ifp->if_snd); #else ifp->if_transmit = vlan_transmit; #endif ifp->if_qflush = vlan_qflush; ifp->if_ioctl = vlan_ioctl; #if defined(KERN_TLS) || defined(RATELIMIT) ifp->if_snd_tag_alloc = vlan_snd_tag_alloc; ifp->if_ratelimit_query = vlan_ratelimit_query; #endif ifp->if_flags = VLAN_IFFLAGS; ifp->if_type = IFT_L2VLAN; ether_ifattach(ifp, eaddr); /* Now undo some of the damage... */ ifp->if_baudrate = 0; ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN; ifa = ifp->if_addr; sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_L2VLAN; if (p != NULL) { error = vlan_config(ifv, p, vid, proto); if_rele(p); if (error != 0) { /* * Since we've partially failed, we need to back * out all the way, otherwise userland could get * confused. Thus, we destroy the interface. */ ether_ifdetach(ifp); vlan_unconfig(ifp); if_free(ifp); if (!subinterface) ifc_free_unit(ifc, unit); free(ifv, M_VLAN); return (error); } } *ifpp = ifp; return (0); } /* * * Parsers of IFLA_INFO_DATA inside IFLA_LINKINFO of RTM_NEWLINK * {{nla_len=8, nla_type=IFLA_LINK}, 2}, * {{nla_len=12, nla_type=IFLA_IFNAME}, "xvlan22"}, * {{nla_len=24, nla_type=IFLA_LINKINFO}, * [ * {{nla_len=8, nla_type=IFLA_INFO_KIND}, "vlan"...}, * {{nla_len=12, nla_type=IFLA_INFO_DATA}, "\x06\x00\x01\x00\x16\x00\x00\x00"}]} */ struct nl_parsed_vlan { uint16_t vlan_id; uint16_t vlan_proto; struct ifla_vlan_flags vlan_flags; }; #define _OUT(_field) offsetof(struct nl_parsed_vlan, _field) static const struct nlattr_parser nla_p_vlan[] = { { .type = IFLA_VLAN_ID, .off = _OUT(vlan_id), .cb = nlattr_get_uint16 }, { .type = IFLA_VLAN_FLAGS, .off = _OUT(vlan_flags), .cb = nlattr_get_nla }, { .type = IFLA_VLAN_PROTOCOL, .off = _OUT(vlan_proto), .cb = nlattr_get_uint16 }, }; #undef _OUT NL_DECLARE_ATTR_PARSER(vlan_parser, nla_p_vlan); static int vlan_clone_create_nl(struct if_clone *ifc, char *name, size_t len, struct ifc_data_nl *ifd) { struct epoch_tracker et; struct ifnet *ifp_parent; struct nl_pstate *npt = ifd->npt; struct nl_parsed_link *lattrs = ifd->lattrs; int error; /* * lattrs.ifla_ifname is the new interface name * lattrs.ifi_index contains parent interface index * lattrs.ifla_idata contains un-parsed vlan data */ struct nl_parsed_vlan attrs = { .vlan_id = 0xFEFE, .vlan_proto = ETHERTYPE_VLAN }; if (lattrs->ifla_idata == NULL) { nlmsg_report_err_msg(npt, "vlan id is required, guessing not supported"); return (ENOTSUP); } error = nl_parse_nested(lattrs->ifla_idata, &vlan_parser, npt, &attrs); if (error != 0) return (error); if (attrs.vlan_id > DOT1Q_VID_MAX) { nlmsg_report_err_msg(npt, "Invalid VID: %d", attrs.vlan_id); return (EINVAL); } if (attrs.vlan_proto != ETHERTYPE_VLAN && attrs.vlan_proto != ETHERTYPE_QINQ) { nlmsg_report_err_msg(npt, "Unsupported ethertype: 0x%04X", attrs.vlan_proto); return (ENOTSUP); } struct vlanreq params = { .vlr_tag = attrs.vlan_id, .vlr_proto = attrs.vlan_proto, }; struct ifc_data ifd_new = { .flags = IFC_F_SYSSPACE, .unit = ifd->unit, .params = ¶ms }; NET_EPOCH_ENTER(et); ifp_parent = ifnet_byindex(lattrs->ifi_index); if (ifp_parent != NULL) strlcpy(params.vlr_parent, if_name(ifp_parent), sizeof(params.vlr_parent)); NET_EPOCH_EXIT(et); if (ifp_parent == NULL) { nlmsg_report_err_msg(npt, "unable to find parent interface %u", lattrs->ifi_index); return (ENOENT); } error = vlan_clone_create(ifc, name, len, &ifd_new, &ifd->ifp); return (error); } static int vlan_clone_modify_nl(struct ifnet *ifp, struct ifc_data_nl *ifd) { struct nl_parsed_link *lattrs = ifd->lattrs; if ((lattrs->ifla_idata != NULL) && ((ifd->flags & IFC_F_CREATE) == 0)) { struct epoch_tracker et; struct nl_parsed_vlan attrs = { .vlan_proto = ETHERTYPE_VLAN, }; int error; error = nl_parse_nested(lattrs->ifla_idata, &vlan_parser, ifd->npt, &attrs); if (error != 0) return (error); NET_EPOCH_ENTER(et); struct ifnet *ifp_parent = ifnet_byindex_ref(lattrs->ifla_link); NET_EPOCH_EXIT(et); if (ifp_parent == NULL) { nlmsg_report_err_msg(ifd->npt, "unable to find parent interface %u", lattrs->ifla_link); return (ENOENT); } struct ifvlan *ifv = ifp->if_softc; error = vlan_config(ifv, ifp_parent, attrs.vlan_id, attrs.vlan_proto); if_rele(ifp_parent); if (error != 0) return (error); } return (nl_modify_ifp_generic(ifp, ifd->lattrs, ifd->bm, ifd->npt)); } /* * {{nla_len=24, nla_type=IFLA_LINKINFO}, * [ * {{nla_len=8, nla_type=IFLA_INFO_KIND}, "vlan"...}, * {{nla_len=12, nla_type=IFLA_INFO_DATA}, "\x06\x00\x01\x00\x16\x00\x00\x00"}]} */ static void vlan_clone_dump_nl(struct ifnet *ifp, struct nl_writer *nw) { uint32_t parent_index = 0; uint16_t vlan_id = 0; uint16_t vlan_proto = 0; VLAN_SLOCK(); struct ifvlan *ifv = ifp->if_softc; if (TRUNK(ifv) != NULL) parent_index = PARENT(ifv)->if_index; vlan_id = ifv->ifv_vid; vlan_proto = ifv->ifv_proto; VLAN_SUNLOCK(); if (parent_index != 0) nlattr_add_u32(nw, IFLA_LINK, parent_index); int off = nlattr_add_nested(nw, IFLA_LINKINFO); if (off != 0) { nlattr_add_string(nw, IFLA_INFO_KIND, "vlan"); int off2 = nlattr_add_nested(nw, IFLA_INFO_DATA); if (off2 != 0) { nlattr_add_u16(nw, IFLA_VLAN_ID, vlan_id); nlattr_add_u16(nw, IFLA_VLAN_PROTOCOL, vlan_proto); nlattr_set_len(nw, off2); } nlattr_set_len(nw, off); } } static int vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) { struct ifvlan *ifv = ifp->if_softc; int unit = ifp->if_dunit; if (ifp->if_vlantrunk) return (EBUSY); #ifdef ALTQ IFQ_PURGE(&ifp->if_snd); #endif ether_ifdetach(ifp); /* first, remove it from system-wide lists */ vlan_unconfig(ifp); /* now it can be unconfigured and freed */ /* * We should have the only reference to the ifv now, so we can now * drain any remaining lladdr task before freeing the ifnet and the * ifvlan. */ taskqueue_drain(taskqueue_thread, &ifv->lladdr_task); NET_EPOCH_WAIT(); if_free(ifp); free(ifv, M_VLAN); if (unit != IF_DUNIT_NONE) ifc_free_unit(ifc, unit); return (0); } /* * The ifp->if_init entry point for vlan(4) is a no-op. */ static void vlan_init(void *foo __unused) { } /* * The if_transmit method for vlan(4) interface. */ static int vlan_transmit(struct ifnet *ifp, struct mbuf *m) { struct ifvlan *ifv; struct ifnet *p; int error, len, mcast; NET_EPOCH_ASSERT(); ifv = ifp->if_softc; if (TRUNK(ifv) == NULL) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); m_freem(m); return (ENETDOWN); } p = PARENT(ifv); len = m->m_pkthdr.len; mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; BPF_MTAP(ifp, m); #if defined(KERN_TLS) || defined(RATELIMIT) if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) { struct vlan_snd_tag *vst; struct m_snd_tag *mst; MPASS(m->m_pkthdr.snd_tag->ifp == ifp); mst = m->m_pkthdr.snd_tag; vst = mst_to_vst(mst); if (vst->tag->ifp != p) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); m_freem(m); return (EAGAIN); } m->m_pkthdr.snd_tag = m_snd_tag_ref(vst->tag); m_snd_tag_rele(mst); } #endif /* * Do not run parent's if_transmit() if the parent is not up, * or parent's driver will cause a system crash. */ if (!UP_AND_RUNNING(p)) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); m_freem(m); return (ENETDOWN); } if (!ether_8021q_frame(&m, ifp, p, &ifv->ifv_qtag)) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (0); } /* * Send it, precisely as ether_output() would have. */ error = (p->if_transmit)(p, m); if (error == 0) { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, len); if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast); } else if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); } static int vlan_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { struct ifvlan *ifv; struct ifnet *p; NET_EPOCH_ASSERT(); /* * Find the first non-VLAN parent interface. */ ifv = ifp->if_softc; do { if (TRUNK(ifv) == NULL) { m_freem(m); return (ENETDOWN); } p = PARENT(ifv); ifv = p->if_softc; } while (p->if_type == IFT_L2VLAN); return p->if_output(ifp, m, dst, ro); } #ifdef ALTQ static void vlan_altq_start(if_t ifp) { struct ifaltq *ifq = &ifp->if_snd; struct mbuf *m; IFQ_LOCK(ifq); IFQ_DEQUEUE_NOLOCK(ifq, m); while (m != NULL) { vlan_transmit(ifp, m); IFQ_DEQUEUE_NOLOCK(ifq, m); } IFQ_UNLOCK(ifq); } static int vlan_altq_transmit(if_t ifp, struct mbuf *m) { int err; if (ALTQ_IS_ENABLED(&ifp->if_snd)) { IFQ_ENQUEUE(&ifp->if_snd, m, err); if (err == 0) vlan_altq_start(ifp); } else err = vlan_transmit(ifp, m); return (err); } #endif /* ALTQ */ /* * The ifp->if_qflush entry point for vlan(4) is a no-op. */ static void vlan_qflush(struct ifnet *ifp __unused) { } static void vlan_input(struct ifnet *ifp, struct mbuf *m) { struct ifvlantrunk *trunk; struct ifvlan *ifv; struct m_tag *mtag; uint16_t vid, tag; NET_EPOCH_ASSERT(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { m_freem(m); return; } if (m->m_flags & M_VLANTAG) { /* * Packet is tagged, but m contains a normal * Ethernet frame; the tag is stored out-of-band. */ tag = m->m_pkthdr.ether_vtag; m->m_flags &= ~M_VLANTAG; } else { struct ether_vlan_header *evl; /* * Packet is tagged in-band as specified by 802.1q. */ switch (ifp->if_type) { case IFT_ETHER: if (m->m_len < sizeof(*evl) && (m = m_pullup(m, sizeof(*evl))) == NULL) { if_printf(ifp, "cannot pullup VLAN header\n"); return; } evl = mtod(m, struct ether_vlan_header *); tag = ntohs(evl->evl_tag); /* * Remove the 802.1q header by copying the Ethernet * addresses over it and adjusting the beginning of * the data in the mbuf. The encapsulated Ethernet * type field is already in place. */ bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, ETHER_HDR_LEN - ETHER_TYPE_LEN); m_adj(m, ETHER_VLAN_ENCAP_LEN); break; default: #ifdef INVARIANTS panic("%s: %s has unsupported if_type %u", __func__, ifp->if_xname, ifp->if_type); #endif if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); m_freem(m); return; } } vid = EVL_VLANOFTAG(tag); ifv = vlan_gethash(trunk, vid); if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) { if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); m_freem(m); return; } if (V_vlan_mtag_pcp) { /* * While uncommon, it is possible that we will find a 802.1q * packet encapsulated inside another packet that also had an * 802.1q header. For example, ethernet tunneled over IPSEC * arriving over ethernet. In that case, we replace the * existing 802.1q PCP m_tag value. */ mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL); if (mtag == NULL) { mtag = m_tag_alloc(MTAG_8021Q, MTAG_8021Q_PCP_IN, sizeof(uint8_t), M_NOWAIT); if (mtag == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return; } m_tag_prepend(m, mtag); } *(uint8_t *)(mtag + 1) = EVL_PRIOFTAG(tag); } m->m_pkthdr.rcvif = ifv->ifv_ifp; if_inc_counter(ifv->ifv_ifp, IFCOUNTER_IPACKETS, 1); /* Pass it back through the parent's input routine. */ (*ifv->ifv_ifp->if_input)(ifv->ifv_ifp, m); } static void vlan_lladdr_fn(void *arg, int pending __unused) { struct ifvlan *ifv; struct ifnet *ifp; ifv = (struct ifvlan *)arg; ifp = ifv->ifv_ifp; CURVNET_SET(ifp->if_vnet); /* The ifv_ifp already has the lladdr copied in. */ if_setlladdr(ifp, IF_LLADDR(ifp), ifp->if_addrlen); CURVNET_RESTORE(); } static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid, uint16_t proto) { struct epoch_tracker et; struct ifvlantrunk *trunk; struct ifnet *ifp; int error = 0; /* * We can handle non-ethernet hardware types as long as * they handle the tagging and headers themselves. */ if (p->if_type != IFT_ETHER && p->if_type != IFT_L2VLAN && p->if_type != IFT_BRIDGE && (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) return (EPROTONOSUPPORT); if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS) return (EPROTONOSUPPORT); /* * Don't let the caller set up a VLAN VID with * anything except VLID bits. * VID numbers 0x0 and 0xFFF are reserved. */ if (vid == 0 || vid == 0xFFF || (vid & ~EVL_VLID_MASK)) return (EINVAL); if (ifv->ifv_trunk) { trunk = ifv->ifv_trunk; if (trunk->parent != p) return (EBUSY); VLAN_XLOCK(); ifv->ifv_proto = proto; if (ifv->ifv_vid != vid) { int oldvid = ifv->ifv_vid; /* Re-hash */ vlan_remhash(trunk, ifv); ifv->ifv_vid = vid; error = vlan_inshash(trunk, ifv); if (error) { int ret __diagused; ifv->ifv_vid = oldvid; /* Re-insert back where we found it. */ ret = vlan_inshash(trunk, ifv); MPASS(ret == 0); } } /* Will unlock */ goto done; } VLAN_XLOCK(); if (p->if_vlantrunk == NULL) { trunk = malloc(sizeof(struct ifvlantrunk), M_VLAN, M_WAITOK | M_ZERO); vlan_inithash(trunk); TRUNK_LOCK_INIT(trunk); TRUNK_WLOCK(trunk); p->if_vlantrunk = trunk; trunk->parent = p; if_ref(trunk->parent); TRUNK_WUNLOCK(trunk); } else { trunk = p->if_vlantrunk; } ifv->ifv_vid = vid; /* must set this before vlan_inshash() */ ifv->ifv_pcp = 0; /* Default: best effort delivery. */ error = vlan_inshash(trunk, ifv); if (error) goto done; ifv->ifv_proto = proto; ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN; ifv->ifv_mintu = ETHERMIN; ifv->ifv_pflags = 0; ifv->ifv_capenable = -1; ifv->ifv_capenable2 = -1; /* * If the parent supports the VLAN_MTU capability, * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames, * use it. */ if (p->if_capenable & IFCAP_VLAN_MTU) { /* * No need to fudge the MTU since the parent can * handle extended frames. */ ifv->ifv_mtufudge = 0; } else { /* * Fudge the MTU by the encapsulation size. This * makes us incompatible with strictly compliant * 802.1Q implementations, but allows us to use * the feature with other NetBSD implementations, * which might still be useful. */ ifv->ifv_mtufudge = ifv->ifv_encaplen; } ifv->ifv_trunk = trunk; ifp = ifv->ifv_ifp; /* * Initialize fields from our parent. This duplicates some * work with ether_ifattach() but allows for non-ethernet * interfaces to also work. */ ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge; ifp->if_baudrate = p->if_baudrate; ifp->if_input = p->if_input; ifp->if_resolvemulti = p->if_resolvemulti; ifp->if_addrlen = p->if_addrlen; ifp->if_broadcastaddr = p->if_broadcastaddr; ifp->if_pcp = ifv->ifv_pcp; /* * We wrap the parent's if_output using vlan_output to ensure that it * can't become stale. */ ifp->if_output = vlan_output; /* * Copy only a selected subset of flags from the parent. * Other flags are none of our business. */ #define VLAN_COPY_FLAGS (IFF_SIMPLEX) ifp->if_flags &= ~VLAN_COPY_FLAGS; ifp->if_flags |= p->if_flags & VLAN_COPY_FLAGS; #undef VLAN_COPY_FLAGS ifp->if_link_state = p->if_link_state; NET_EPOCH_ENTER(et); vlan_capabilities(ifv); NET_EPOCH_EXIT(et); /* * Set up our interface address to reflect the underlying * physical interface's. */ TASK_INIT(&ifv->lladdr_task, 0, vlan_lladdr_fn, ifv); ((struct sockaddr_dl *)ifp->if_addr->ifa_addr)->sdl_alen = p->if_addrlen; /* * Do not schedule link address update if it was the same * as previous parent's. This helps avoid updating for each * associated llentry. */ if (memcmp(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen) != 0) { bcopy(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen); taskqueue_enqueue(taskqueue_thread, &ifv->lladdr_task); } /* We are ready for operation now. */ ifp->if_drv_flags |= IFF_DRV_RUNNING; /* Update flags on the parent, if necessary. */ vlan_setflags(ifp, 1); /* * Configure multicast addresses that may already be * joined on the vlan device. */ (void)vlan_setmulti(ifp); done: if (error == 0) EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid); VLAN_XUNLOCK(); return (error); } static void vlan_unconfig(struct ifnet *ifp) { VLAN_XLOCK(); vlan_unconfig_locked(ifp, 0); VLAN_XUNLOCK(); } static void vlan_unconfig_locked(struct ifnet *ifp, int departing) { struct ifvlantrunk *trunk; struct vlan_mc_entry *mc; struct ifvlan *ifv; struct ifnet *parent; int error; VLAN_XLOCK_ASSERT(); ifv = ifp->if_softc; trunk = ifv->ifv_trunk; parent = NULL; if (trunk != NULL) { parent = trunk->parent; /* * Since the interface is being unconfigured, we need to * empty the list of multicast groups that we may have joined * while we were alive from the parent's list. */ while ((mc = CK_SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) { /* * If the parent interface is being detached, * all its multicast addresses have already * been removed. Warn about errors if * if_delmulti() does fail, but don't abort as * all callers expect vlan destruction to * succeed. */ if (!departing) { error = if_delmulti(parent, (struct sockaddr *)&mc->mc_addr); if (error) if_printf(ifp, "Failed to delete multicast address from parent: %d\n", error); } CK_SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries); NET_EPOCH_CALL(vlan_mc_free, &mc->mc_epoch_ctx); } vlan_setflags(ifp, 0); /* clear special flags on parent */ vlan_remhash(trunk, ifv); ifv->ifv_trunk = NULL; /* * Check if we were the last. */ if (trunk->refcnt == 0) { parent->if_vlantrunk = NULL; NET_EPOCH_WAIT(); trunk_destroy(trunk); } } /* Disconnect from parent. */ if (ifv->ifv_pflags) if_printf(ifp, "%s: ifv_pflags unclean\n", __func__); ifp->if_mtu = ETHERMTU; ifp->if_link_state = LINK_STATE_UNKNOWN; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; /* * Only dispatch an event if vlan was * attached, otherwise there is nothing * to cleanup anyway. */ if (parent != NULL) EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_vid); } /* Handle a reference counted flag that should be set on the parent as well */ static int vlan_setflag(struct ifnet *ifp, int flag, int status, int (*func)(struct ifnet *, int)) { struct ifvlan *ifv; int error; VLAN_SXLOCK_ASSERT(); ifv = ifp->if_softc; status = status ? (ifp->if_flags & flag) : 0; /* Now "status" contains the flag value or 0 */ /* * See if recorded parent's status is different from what * we want it to be. If it is, flip it. We record parent's * status in ifv_pflags so that we won't clear parent's flag * we haven't set. In fact, we don't clear or set parent's * flags directly, but get or release references to them. * That's why we can be sure that recorded flags still are * in accord with actual parent's flags. */ if (status != (ifv->ifv_pflags & flag)) { error = (*func)(PARENT(ifv), status); if (error) return (error); ifv->ifv_pflags &= ~flag; ifv->ifv_pflags |= status; } return (0); } /* * Handle IFF_* flags that require certain changes on the parent: * if "status" is true, update parent's flags respective to our if_flags; * if "status" is false, forcedly clear the flags set on parent. */ static int vlan_setflags(struct ifnet *ifp, int status) { int error, i; for (i = 0; vlan_pflags[i].flag; i++) { error = vlan_setflag(ifp, vlan_pflags[i].flag, status, vlan_pflags[i].func); if (error) return (error); } return (0); } /* Inform all vlans that their parent has changed link state */ static void vlan_link_state(struct ifnet *ifp) { struct epoch_tracker et; struct ifvlantrunk *trunk; struct ifvlan *ifv; NET_EPOCH_ENTER(et); trunk = ifp->if_vlantrunk; if (trunk == NULL) { NET_EPOCH_EXIT(et); return; } TRUNK_WLOCK(trunk); VLAN_FOREACH(ifv, trunk) { ifv->ifv_ifp->if_baudrate = trunk->parent->if_baudrate; if_link_state_change(ifv->ifv_ifp, trunk->parent->if_link_state); } TRUNK_WUNLOCK(trunk); NET_EPOCH_EXIT(et); } #ifdef IPSEC_OFFLOAD #define VLAN_IPSEC_METHOD(exp) \ if_t p; \ struct ifvlan *ifv; \ int error; \ \ ifv = ifp->if_softc; \ VLAN_SLOCK(); \ if (TRUNK(ifv) != NULL) { \ p = PARENT(ifv); \ if_ref(p); \ error = p->if_ipsec_accel_m->exp; \ if_rele(p); \ } else { \ error = ENXIO; \ } \ VLAN_SUNLOCK(); \ return (error); static int vlan_if_spdadd(if_t ifp, void *sp, void *inp, void **priv) { VLAN_IPSEC_METHOD(if_spdadd(ifp, sp, inp, priv)); } static int vlan_if_spddel(if_t ifp, void *sp, void *priv) { VLAN_IPSEC_METHOD(if_spddel(ifp, sp, priv)); } static int vlan_if_sa_newkey(if_t ifp, void *sav, u_int drv_spi, void **privp) { VLAN_IPSEC_METHOD(if_sa_newkey(ifp, sav, drv_spi, privp)); } static int vlan_if_sa_deinstall(if_t ifp, u_int drv_spi, void *priv) { VLAN_IPSEC_METHOD(if_sa_deinstall(ifp, drv_spi, priv)); } static int vlan_if_sa_cnt(if_t ifp, void *sa, uint32_t drv_spi, void *priv, struct seclifetime *lt) { VLAN_IPSEC_METHOD(if_sa_cnt(ifp, sa, drv_spi, priv, lt)); } static int vlan_if_ipsec_hwassist(if_t ifp, void *sav, u_int drv_spi,void *priv) { if_t trunk; NET_EPOCH_ASSERT(); trunk = vlan_trunkdev(ifp); if (trunk == NULL) return (0); return (trunk->if_ipsec_accel_m->if_hwassist(trunk, sav, drv_spi, priv)); } static const struct if_ipsec_accel_methods vlan_if_ipsec_accel_methods = { .if_spdadd = vlan_if_spdadd, .if_spddel = vlan_if_spddel, .if_sa_newkey = vlan_if_sa_newkey, .if_sa_deinstall = vlan_if_sa_deinstall, .if_sa_cnt = vlan_if_sa_cnt, .if_hwassist = vlan_if_ipsec_hwassist, }; #undef VLAN_IPSEC_METHOD #endif /* IPSEC_OFFLOAD */ static void vlan_capabilities(struct ifvlan *ifv) { struct ifnet *p; struct ifnet *ifp; struct ifnet_hw_tsomax hw_tsomax; int cap = 0, ena = 0, mena, cap2 = 0, ena2 = 0; int mena2 __unused; u_long hwa = 0; NET_EPOCH_ASSERT(); VLAN_SXLOCK_ASSERT(); p = PARENT(ifv); ifp = ifv->ifv_ifp; /* Mask parent interface enabled capabilities disabled by user. */ mena = p->if_capenable & ifv->ifv_capenable; mena2 = p->if_capenable2 & ifv->ifv_capenable2; /* * If the parent interface can do checksum offloading * on VLANs, then propagate its hardware-assisted * checksumming flags. Also assert that checksum * offloading requires hardware VLAN tagging. */ if (p->if_capabilities & IFCAP_VLAN_HWCSUM) cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); if (p->if_capenable & IFCAP_VLAN_HWCSUM && p->if_capenable & IFCAP_VLAN_HWTAGGING) { ena |= mena & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); if (ena & IFCAP_TXCSUM) hwa |= p->if_hwassist & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP); if (ena & IFCAP_TXCSUM_IPV6) hwa |= p->if_hwassist & (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6); } /* * If the parent interface can do TSO on VLANs then * propagate the hardware-assisted flag. TSO on VLANs * does not necessarily require hardware VLAN tagging. */ memset(&hw_tsomax, 0, sizeof(hw_tsomax)); if_hw_tsomax_common(p, &hw_tsomax); if_hw_tsomax_update(ifp, &hw_tsomax); if (p->if_capabilities & IFCAP_VLAN_HWTSO) cap |= p->if_capabilities & IFCAP_TSO; if (p->if_capenable & IFCAP_VLAN_HWTSO) { ena |= mena & IFCAP_TSO; if (ena & IFCAP_TSO) hwa |= p->if_hwassist & CSUM_TSO; } /* * If the parent interface can do LRO and checksum offloading on * VLANs, then guess it may do LRO on VLANs. False positive here * cost nothing, while false negative may lead to some confusions. */ if (p->if_capabilities & IFCAP_VLAN_HWCSUM) cap |= p->if_capabilities & IFCAP_LRO; if (p->if_capenable & IFCAP_VLAN_HWCSUM) ena |= mena & IFCAP_LRO; /* * If the parent interface can offload TCP connections over VLANs then * propagate its TOE capability to the VLAN interface. * * All TOE drivers in the tree today can deal with VLANs. If this * changes then IFCAP_VLAN_TOE should be promoted to a full capability * with its own bit. */ #define IFCAP_VLAN_TOE IFCAP_TOE if (p->if_capabilities & IFCAP_VLAN_TOE) cap |= p->if_capabilities & IFCAP_TOE; if (p->if_capenable & IFCAP_VLAN_TOE) { SETTOEDEV(ifp, TOEDEV(p)); ena |= mena & IFCAP_TOE; } /* * If the parent interface supports dynamic link state, so does the * VLAN interface. */ cap |= (p->if_capabilities & IFCAP_LINKSTATE); ena |= (mena & IFCAP_LINKSTATE); #ifdef RATELIMIT /* * If the parent interface supports ratelimiting, so does the * VLAN interface. */ cap |= (p->if_capabilities & IFCAP_TXRTLMT); ena |= (mena & IFCAP_TXRTLMT); #endif /* * If the parent interface supports unmapped mbufs, so does * the VLAN interface. Note that this should be fine even for * interfaces that don't support hardware tagging as headers * are prepended in normal mbufs to unmapped mbufs holding * payload data. */ cap |= (p->if_capabilities & IFCAP_MEXTPG); ena |= (mena & IFCAP_MEXTPG); /* * If the parent interface can offload encryption and segmentation * of TLS records over TCP, propagate it's capability to the VLAN * interface. * * All TLS drivers in the tree today can deal with VLANs. If * this ever changes, then a new IFCAP_VLAN_TXTLS can be * defined. */ if (p->if_capabilities & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT)) cap |= p->if_capabilities & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT); if (p->if_capenable & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT)) ena |= mena & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT); ifp->if_capabilities = cap; ifp->if_capenable = ena; ifp->if_hwassist = hwa; #ifdef IPSEC_OFFLOAD cap2 |= p->if_capabilities2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD); ena2 |= mena2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD); ifp->if_ipsec_accel_m = &vlan_if_ipsec_accel_methods; #endif ifp->if_capabilities2 = cap2; ifp->if_capenable2 = ena2; } static void vlan_trunk_capabilities(struct ifnet *ifp) { struct epoch_tracker et; struct ifvlantrunk *trunk; struct ifvlan *ifv; VLAN_SLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { VLAN_SUNLOCK(); return; } NET_EPOCH_ENTER(et); VLAN_FOREACH(ifv, trunk) vlan_capabilities(ifv); NET_EPOCH_EXIT(et); VLAN_SUNLOCK(); } static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifnet *p; struct ifreq *ifr; #ifdef INET struct ifaddr *ifa; #endif struct ifvlan *ifv; struct ifvlantrunk *trunk; struct vlanreq vlr; int error = 0, oldmtu; ifr = (struct ifreq *)data; #ifdef INET ifa = (struct ifaddr *) data; #endif ifv = ifp->if_softc; switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) arp_ifinit(ifp, ifa); #endif break; case SIOCGIFADDR: bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0], ifp->if_addrlen); break; case SIOCGIFMEDIA: VLAN_SLOCK(); if (TRUNK(ifv) != NULL) { p = PARENT(ifv); if_ref(p); error = (*p->if_ioctl)(p, SIOCGIFMEDIA, data); if_rele(p); /* Limit the result to the parent's current config. */ if (error == 0) { struct ifmediareq *ifmr; ifmr = (struct ifmediareq *)data; if (ifmr->ifm_count >= 1 && ifmr->ifm_ulist) { ifmr->ifm_count = 1; error = copyout(&ifmr->ifm_current, ifmr->ifm_ulist, sizeof(int)); } } } else { error = EINVAL; } VLAN_SUNLOCK(); break; case SIOCSIFMEDIA: error = EINVAL; break; case SIOCSIFMTU: /* * Set the interface MTU. */ VLAN_SLOCK(); trunk = TRUNK(ifv); if (trunk != NULL) { TRUNK_WLOCK(trunk); if (ifr->ifr_mtu > (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) || ifr->ifr_mtu < (ifv->ifv_mintu - ifv->ifv_mtufudge)) error = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; TRUNK_WUNLOCK(trunk); } else error = EINVAL; VLAN_SUNLOCK(); break; case SIOCSETVLAN: #ifdef VIMAGE /* * XXXRW/XXXBZ: The goal in these checks is to allow a VLAN * interface to be delegated to a jail without allowing the * jail to change what underlying interface/VID it is * associated with. We are not entirely convinced that this * is the right way to accomplish that policy goal. */ if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif error = copyin(ifr_data_get_ptr(ifr), &vlr, sizeof(vlr)); if (error) break; if (vlr.vlr_parent[0] == '\0') { vlan_unconfig(ifp); break; } p = ifunit_ref(vlr.vlr_parent); if (p == NULL) { error = ENOENT; break; } + + /* + * If the ifp is in a bridge, do not allow setting the device + * to a bridge; this prevents having a bridge SVI as a bridge + * member (which is not permitted). + */ + if (ifp->if_bridge != NULL && p->if_type == IFT_BRIDGE) { + if_rele(p); + error = EINVAL; + break; + } + if (vlr.vlr_proto == 0) vlr.vlr_proto = ETHERTYPE_VLAN; oldmtu = ifp->if_mtu; error = vlan_config(ifv, p, vlr.vlr_tag, vlr.vlr_proto); if_rele(p); /* * VLAN MTU may change during addition of the vlandev. * If it did, do network layer specific procedure. */ if (ifp->if_mtu != oldmtu) if_notifymtu(ifp); break; case SIOCGETVLAN: #ifdef VIMAGE if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif bzero(&vlr, sizeof(vlr)); VLAN_SLOCK(); if (TRUNK(ifv) != NULL) { strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname, sizeof(vlr.vlr_parent)); vlr.vlr_tag = ifv->ifv_vid; vlr.vlr_proto = ifv->ifv_proto; } VLAN_SUNLOCK(); error = copyout(&vlr, ifr_data_get_ptr(ifr), sizeof(vlr)); break; case SIOCSIFFLAGS: /* * We should propagate selected flags to the parent, * e.g., promiscuous mode. */ VLAN_SLOCK(); if (TRUNK(ifv) != NULL) error = vlan_setflags(ifp, 1); VLAN_SUNLOCK(); break; case SIOCADDMULTI: case SIOCDELMULTI: /* * If we don't have a parent, just remember the membership for * when we do. * * XXX We need the rmlock here to avoid sleeping while * holding in6_multi_mtx. */ VLAN_XLOCK(); trunk = TRUNK(ifv); if (trunk != NULL) error = vlan_setmulti(ifp); VLAN_XUNLOCK(); break; case SIOCGVLANPCP: #ifdef VIMAGE if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif ifr->ifr_vlan_pcp = ifv->ifv_pcp; break; case SIOCSVLANPCP: #ifdef VIMAGE if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif error = priv_check(curthread, PRIV_NET_SETVLANPCP); if (error) break; if (ifr->ifr_vlan_pcp > VLAN_PCP_MAX) { error = EINVAL; break; } ifv->ifv_pcp = ifr->ifr_vlan_pcp; ifp->if_pcp = ifv->ifv_pcp; /* broadcast event about PCP change */ EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_PCP); break; case SIOCSIFCAP: VLAN_SLOCK(); ifv->ifv_capenable = ifr->ifr_reqcap; trunk = TRUNK(ifv); if (trunk != NULL) { struct epoch_tracker et; NET_EPOCH_ENTER(et); vlan_capabilities(ifv); NET_EPOCH_EXIT(et); } VLAN_SUNLOCK(); break; default: error = EINVAL; break; } return (error); } #if defined(KERN_TLS) || defined(RATELIMIT) static int vlan_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **ppmt) { struct epoch_tracker et; const struct if_snd_tag_sw *sw; struct vlan_snd_tag *vst; struct ifvlan *ifv; struct ifnet *parent; struct m_snd_tag *mst; int error; NET_EPOCH_ENTER(et); ifv = ifp->if_softc; switch (params->hdr.type) { #ifdef RATELIMIT case IF_SND_TAG_TYPE_UNLIMITED: sw = &vlan_snd_tag_ul_sw; break; case IF_SND_TAG_TYPE_RATE_LIMIT: sw = &vlan_snd_tag_rl_sw; break; #endif #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS: sw = &vlan_snd_tag_tls_sw; break; case IF_SND_TAG_TYPE_TLS_RX: sw = NULL; if (params->tls_rx.vlan_id != 0) goto failure; params->tls_rx.vlan_id = ifv->ifv_vid; break; #ifdef RATELIMIT case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: sw = &vlan_snd_tag_tls_rl_sw; break; #endif #endif default: goto failure; } if (ifv->ifv_trunk != NULL) parent = PARENT(ifv); else parent = NULL; if (parent == NULL) goto failure; if_ref(parent); NET_EPOCH_EXIT(et); if (sw != NULL) { vst = malloc(sizeof(*vst), M_VLAN, M_NOWAIT); if (vst == NULL) { if_rele(parent); return (ENOMEM); } } else vst = NULL; error = m_snd_tag_alloc(parent, params, &mst); if_rele(parent); if (error) { free(vst, M_VLAN); return (error); } if (sw != NULL) { m_snd_tag_init(&vst->com, ifp, sw); vst->tag = mst; *ppmt = &vst->com; } else *ppmt = mst; return (0); failure: NET_EPOCH_EXIT(et); return (EOPNOTSUPP); } static struct m_snd_tag * vlan_next_snd_tag(struct m_snd_tag *mst) { struct vlan_snd_tag *vst; vst = mst_to_vst(mst); return (vst->tag); } static int vlan_snd_tag_modify(struct m_snd_tag *mst, union if_snd_tag_modify_params *params) { struct vlan_snd_tag *vst; vst = mst_to_vst(mst); return (vst->tag->sw->snd_tag_modify(vst->tag, params)); } static int vlan_snd_tag_query(struct m_snd_tag *mst, union if_snd_tag_query_params *params) { struct vlan_snd_tag *vst; vst = mst_to_vst(mst); return (vst->tag->sw->snd_tag_query(vst->tag, params)); } static void vlan_snd_tag_free(struct m_snd_tag *mst) { struct vlan_snd_tag *vst; vst = mst_to_vst(mst); m_snd_tag_rele(vst->tag); free(vst, M_VLAN); } static void vlan_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q) { /* * For vlan, we have an indirect * interface. The caller needs to * get a ratelimit tag on the actual * interface the flow will go on. */ q->rate_table = NULL; q->flags = RT_IS_INDIRECT; q->max_flows = 0; q->number_of_rates = 0; } #endif diff --git a/tests/sys/net/if_bridge_test.sh b/tests/sys/net/if_bridge_test.sh index cd38adea28ad..c0c085f22273 100755 --- a/tests/sys/net/if_bridge_test.sh +++ b/tests/sys/net/if_bridge_test.sh @@ -1,1250 +1,1274 @@ # # SPDX-License-Identifier: BSD-2-Clause # # Copyright (c) 2020 The FreeBSD Foundation # # This software was developed by Kristof Provost under sponsorship # from the FreeBSD Foundation. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. . $(atf_get_srcdir)/../common/vnet.subr atf_test_case "bridge_transmit_ipv4_unicast" "cleanup" bridge_transmit_ipv4_unicast_head() { atf_set descr 'bridge_transmit_ipv4_unicast bridging test' atf_set require.user root } bridge_transmit_ipv4_unicast_body() { vnet_init vnet_init_bridge epair_alcatraz=$(vnet_mkepair) epair_singsing=$(vnet_mkepair) vnet_mkjail alcatraz ${epair_alcatraz}b vnet_mkjail singsing ${epair_singsing}b jexec alcatraz ifconfig ${epair_alcatraz}b 192.0.2.1/24 up jexec singsing ifconfig ${epair_singsing}b 192.0.2.2/24 up bridge=$(vnet_mkbridge) ifconfig ${bridge} up ifconfig ${epair_alcatraz}a up ifconfig ${epair_singsing}a up ifconfig ${bridge} addm ${epair_alcatraz}a ifconfig ${bridge} addm ${epair_singsing}a atf_check -s exit:0 -o ignore jexec alcatraz ping -c 3 -t 1 192.0.2.2 atf_check -s exit:0 -o ignore jexec singsing ping -c 3 -t 1 192.0.2.1 } bridge_transmit_ipv4_unicast_cleanup() { vnet_cleanup } atf_test_case "stp" "cleanup" stp_head() { atf_set descr 'Spanning tree test' atf_set require.user root } stp_body() { vnet_init vnet_init_bridge epair_one=$(vnet_mkepair) epair_two=$(vnet_mkepair) bridge_a=$(vnet_mkbridge) bridge_b=$(vnet_mkbridge) vnet_mkjail a ${bridge_a} ${epair_one}a ${epair_two}a vnet_mkjail b ${bridge_b} ${epair_one}b ${epair_two}b jexec a ifconfig ${epair_one}a up jexec a ifconfig ${epair_two}a up jexec a ifconfig ${bridge_a} addm ${epair_one}a jexec a ifconfig ${bridge_a} addm ${epair_two}a jexec b ifconfig ${epair_one}b up jexec b ifconfig ${epair_two}b up jexec b ifconfig ${bridge_b} addm ${epair_one}b jexec b ifconfig ${bridge_b} addm ${epair_two}b jexec a ifconfig ${bridge_a} 192.0.2.1/24 # Enable spanning tree jexec a ifconfig ${bridge_a} stp ${epair_one}a jexec a ifconfig ${bridge_a} stp ${epair_two}a jexec b ifconfig ${bridge_b} stp ${epair_one}b jexec b ifconfig ${bridge_b} stp ${epair_two}b jexec b ifconfig ${bridge_b} up jexec a ifconfig ${bridge_a} up # Give STP time to do its thing sleep 5 a_discard=$(jexec a ifconfig ${bridge_a} | grep discarding) b_discard=$(jexec b ifconfig ${bridge_b} | grep discarding) if [ -z "${a_discard}" ] && [ -z "${b_discard}" ] then atf_fail "STP failed to detect bridging loop" fi # We must also have at least some forwarding interfaces a_forwarding=$(jexec a ifconfig ${bridge_a} | grep forwarding) b_forwarding=$(jexec b ifconfig ${bridge_b} | grep forwarding) if [ -z "${a_forwarding}" ] && [ -z "${b_forwarding}" ] then atf_fail "STP failed to detect bridging loop" fi } stp_cleanup() { vnet_cleanup } atf_test_case "stp_vlan" "cleanup" stp_vlan_head() { atf_set descr 'Spanning tree on VLAN test' atf_set require.user root } stp_vlan_body() { vnet_init vnet_init_bridge epair_one=$(vnet_mkepair) epair_two=$(vnet_mkepair) bridge_a=$(vnet_mkbridge) bridge_b=$(vnet_mkbridge) vnet_mkjail a ${bridge_a} ${epair_one}a ${epair_two}a vnet_mkjail b ${bridge_b} ${epair_one}b ${epair_two}b jexec a ifconfig ${epair_one}a up jexec a ifconfig ${epair_two}a up vlan_a_one=$(jexec a ifconfig vlan create vlandev ${epair_one}a vlan 42) vlan_a_two=$(jexec a ifconfig vlan create vlandev ${epair_two}a vlan 42) jexec a ifconfig ${vlan_a_one} up jexec a ifconfig ${vlan_a_two} up jexec a ifconfig ${bridge_a} addm ${vlan_a_one} jexec a ifconfig ${bridge_a} addm ${vlan_a_two} jexec b ifconfig ${epair_one}b up jexec b ifconfig ${epair_two}b up vlan_b_one=$(jexec b ifconfig vlan create vlandev ${epair_one}b vlan 42) vlan_b_two=$(jexec b ifconfig vlan create vlandev ${epair_two}b vlan 42) jexec b ifconfig ${vlan_b_one} up jexec b ifconfig ${vlan_b_two} up jexec b ifconfig ${bridge_b} addm ${vlan_b_one} jexec b ifconfig ${bridge_b} addm ${vlan_b_two} jexec a ifconfig ${bridge_a} 192.0.2.1/24 # Enable spanning tree jexec a ifconfig ${bridge_a} stp ${vlan_a_one} jexec a ifconfig ${bridge_a} stp ${vlan_a_two} jexec b ifconfig ${bridge_b} stp ${vlan_b_one} jexec b ifconfig ${bridge_b} stp ${vlan_b_two} jexec b ifconfig ${bridge_b} up jexec a ifconfig ${bridge_a} up # Give STP time to do its thing sleep 5 a_discard=$(jexec a ifconfig ${bridge_a} | grep discarding) b_discard=$(jexec b ifconfig ${bridge_b} | grep discarding) if [ -z "${a_discard}" ] && [ -z "${b_discard}" ] then atf_fail "STP failed to detect bridging loop" fi # We must also have at least some forwarding interfaces a_forwarding=$(jexec a ifconfig ${bridge_a} | grep forwarding) b_forwarding=$(jexec b ifconfig ${bridge_b} | grep forwarding) if [ -z "${a_forwarding}" ] && [ -z "${b_forwarding}" ] then atf_fail "STP failed to detect bridging loop" fi } stp_vlan_cleanup() { vnet_cleanup } atf_test_case "static" "cleanup" static_head() { atf_set descr 'Bridge static address test' atf_set require.user root } static_body() { vnet_init vnet_init_bridge epair=$(vnet_mkepair) bridge=$(vnet_mkbridge) vnet_mkjail one ${bridge} ${epair}a ifconfig ${epair}b up jexec one ifconfig ${bridge} up jexec one ifconfig ${epair}a up jexec one ifconfig ${bridge} addm ${epair}a # Wrong interface atf_check -s exit:1 -o ignore -e ignore \ jexec one ifconfig ${bridge} static ${epair}b 00:01:02:03:04:05 # Bad address format atf_check -s exit:1 -o ignore -e ignore \ jexec one ifconfig ${bridge} static ${epair}a 00:01:02:03:04 # Correct add atf_check -s exit:0 -o ignore \ jexec one ifconfig ${bridge} static ${epair}a 00:01:02:03:04:05 # List addresses atf_check -s exit:0 -o ignore \ jexec one ifconfig ${bridge} addr # Delete with bad address format atf_check -s exit:1 -o ignore -e ignore \ jexec one ifconfig ${bridge} deladdr 00:01:02:03:04 # Delete with unlisted address atf_check -s exit:1 -o ignore -e ignore \ jexec one ifconfig ${bridge} deladdr 00:01:02:03:04:06 # Correct delete atf_check -s exit:0 -o ignore \ jexec one ifconfig ${bridge} deladdr 00:01:02:03:04:05 } static_cleanup() { vnet_cleanup } atf_test_case "span" "cleanup" span_head() { atf_set descr 'Bridge span test' atf_set require.user root atf_set require.progs python3 scapy } span_body() { vnet_init vnet_init_bridge epair=$(vnet_mkepair) epair_span=$(vnet_mkepair) bridge=$(vnet_mkbridge) vnet_mkjail one ${bridge} ${epair}a ${epair_span}a ifconfig ${epair}b up ifconfig ${epair_span}b up jexec one ifconfig ${bridge} up jexec one ifconfig ${epair}a up jexec one ifconfig ${epair_span}a up jexec one ifconfig ${bridge} addm ${epair}a jexec one ifconfig ${bridge} span ${epair_span}a jexec one ifconfig ${bridge} 192.0.2.1/24 # Send some traffic through the span jexec one ping -c 1 -t 1 192.0.2.2 # Check that we see the traffic on the span interface atf_check -s exit:0 \ $(atf_get_srcdir)/../netpfil/common/pft_ping.py \ --sendif ${epair}b \ --to 192.0.2.2 \ --recvif ${epair_span}b jexec one ifconfig ${bridge} -span ${epair_span}a # And no more traffic after we remove the span atf_check -s exit:1 \ $(atf_get_srcdir)/../netpfil/common/pft_ping.py \ --sendif ${epair}b \ --to 192.0.2.2 \ --recvif ${epair_span}b } span_cleanup() { vnet_cleanup } atf_test_case "delete_with_members" "cleanup" delete_with_members_head() { atf_set descr 'Delete a bridge which still has member interfaces' atf_set require.user root } delete_with_members_body() { vnet_init vnet_init_bridge bridge=$(vnet_mkbridge) epair=$(vnet_mkepair) ifconfig ${bridge} 192.0.2.1/24 up ifconfig ${epair}a up ifconfig ${bridge} addm ${epair}a ifconfig ${bridge} destroy } delete_with_members_cleanup() { vnet_cleanup } atf_test_case "mac_conflict" "cleanup" mac_conflict_head() { atf_set descr 'Ensure that bridges in different jails get different mac addresses' atf_set require.user root } mac_conflict_body() { vnet_init vnet_init_bridge epair=$(vnet_mkepair) # Ensure the bridge module is loaded so jails can use it. tmpbridge=$(vnet_mkbridge) vnet_mkjail bridge_mac_conflict_one ${epair}a vnet_mkjail bridge_mac_conflict_two ${epair}b jexec bridge_mac_conflict_one ifconfig bridge create jexec bridge_mac_conflict_one ifconfig bridge0 192.0.2.1/24 up \ addm ${epair}a jexec bridge_mac_conflict_one ifconfig ${epair}a up jexec bridge_mac_conflict_two ifconfig bridge create jexec bridge_mac_conflict_two ifconfig bridge0 192.0.2.2/24 up \ addm ${epair}b jexec bridge_mac_conflict_two ifconfig ${epair}b up atf_check -s exit:0 -o ignore \ jexec bridge_mac_conflict_one ping -c 3 192.0.2.2 } mac_conflict_cleanup() { vnet_cleanup } atf_test_case "inherit_mac" "cleanup" inherit_mac_head() { atf_set descr 'Bridge inherit_mac test, #216510' atf_set require.user root } inherit_mac_body() { vnet_init vnet_init_bridge bridge=$(vnet_mkbridge) epair=$(vnet_mkepair) vnet_mkjail one ${bridge} ${epair}a jexec one sysctl net.link.bridge.inherit_mac=1 # Attempt to provoke the panic described in #216510 jexec one ifconfig ${bridge} 192.0.0.1/24 up jexec one ifconfig ${bridge} addm ${epair}a } inherit_mac_cleanup() { vnet_cleanup } atf_test_case "stp_validation" "cleanup" stp_validation_head() { atf_set descr 'Check STP validation' atf_set require.user root atf_set require.progs python3 scapy } stp_validation_body() { vnet_init vnet_init_bridge epair_one=$(vnet_mkepair) epair_two=$(vnet_mkepair) bridge=$(vnet_mkbridge) ifconfig ${bridge} up ifconfig ${bridge} addm ${epair_one}a addm ${epair_two}a ifconfig ${bridge} stp ${epair_one}a stp ${epair_two}a ifconfig ${epair_one}a up ifconfig ${epair_one}b up ifconfig ${epair_two}a up ifconfig ${epair_two}b up # Wait until the interfaces are no longer discarding while ifconfig ${bridge} | grep 'state discarding' >/dev/null do sleep 1 done # Now inject invalid STP BPDUs on epair_one and see if they're repeated # on epair_two atf_check -s exit:0 \ $(atf_get_srcdir)/stp.py \ --sendif ${epair_one}b \ --recvif ${epair_two}b } stp_validation_cleanup() { vnet_cleanup } atf_test_case "gif" "cleanup" gif_head() { atf_set descr 'gif as a bridge member' atf_set require.user root } gif_body() { vnet_init vnet_init_bridge epair=$(vnet_mkepair) vnet_mkjail one ${epair}a vnet_mkjail two ${epair}b jexec one sysctl net.link.gif.max_nesting=2 jexec two sysctl net.link.gif.max_nesting=2 jexec one ifconfig ${epair}a 192.0.2.1/24 up jexec two ifconfig ${epair}b 192.0.2.2/24 up # Tunnel gif_one=$(jexec one ifconfig gif create) gif_two=$(jexec two ifconfig gif create) jexec one ifconfig ${gif_one} tunnel 192.0.2.1 192.0.2.2 jexec one ifconfig ${gif_one} up jexec two ifconfig ${gif_two} tunnel 192.0.2.2 192.0.2.1 jexec two ifconfig ${gif_two} up bridge_one=$(jexec one ifconfig bridge create) bridge_two=$(jexec two ifconfig bridge create) jexec one ifconfig ${bridge_one} 198.51.100.1/24 up jexec one ifconfig ${bridge_one} addm ${gif_one} jexec two ifconfig ${bridge_two} 198.51.100.2/24 up jexec two ifconfig ${bridge_two} addm ${gif_two} # Sanity check atf_check -s exit:0 -o ignore \ jexec one ping -c 1 192.0.2.2 # Test tunnel atf_check -s exit:0 -o ignore \ jexec one ping -c 1 198.51.100.2 atf_check -s exit:0 -o ignore \ jexec one ping -c 1 -s 1200 198.51.100.2 atf_check -s exit:0 -o ignore \ jexec one ping -c 1 -s 2000 198.51.100.2 # Higher MTU on the tunnel than on the underlying interface jexec one ifconfig ${epair}a mtu 1000 jexec two ifconfig ${epair}b mtu 1000 atf_check -s exit:0 -o ignore \ jexec one ping -c 1 -s 1200 198.51.100.2 atf_check -s exit:0 -o ignore \ jexec one ping -c 1 -s 2000 198.51.100.2 } gif_cleanup() { vnet_cleanup } atf_test_case "mtu" "cleanup" mtu_head() { atf_set descr 'Bridge MTU changes' atf_set require.user root } get_mtu() { intf=$1 ifconfig ${intf} | awk '$5 == "mtu" { print $6 }' } check_mtu() { intf=$1 expected=$2 mtu=$(get_mtu $intf) if [ "$mtu" -ne "$expected" ]; then atf_fail "Expected MTU of $expected on $intf but found $mtu" fi } mtu_body() { vnet_init vnet_init_bridge epair=$(vnet_mkepair) gif=$(ifconfig gif create) echo ${gif} >> created_interfaces.lst bridge=$(vnet_mkbridge) atf_check -s exit:0 \ ifconfig ${bridge} addm ${epair}a ifconfig ${gif} mtu 1500 atf_check -s exit:0 \ ifconfig ${bridge} addm ${gif} # Changing MTU changes it for all member interfaces atf_check -s exit:0 \ ifconfig ${bridge} mtu 2000 check_mtu ${bridge} 2000 check_mtu ${gif} 2000 check_mtu ${epair}a 2000 # Rejected MTUs mean none of the MTUs change atf_check -s exit:1 -e ignore \ ifconfig ${bridge} mtu 9000 check_mtu ${bridge} 2000 check_mtu ${gif} 2000 check_mtu ${epair}a 2000 # We're not allowed to change the MTU of a member interface atf_check -s exit:1 -e ignore \ ifconfig ${epair}a mtu 1900 check_mtu ${epair}a 2000 # Test adding an interface with a different MTU new_epair=$(vnet_mkepair) check_mtu ${new_epair}a 1500 atf_check -s exit:0 -e ignore \ ifconfig ${bridge} addm ${new_epair}a check_mtu ${bridge} 2000 check_mtu ${gif} 2000 check_mtu ${epair}a 2000 check_mtu ${new_epair}a 2000 } mtu_cleanup() { vnet_cleanup } atf_test_case "vlan" "cleanup" vlan_head() { atf_set descr 'Ensure the bridge takes vlan ID into account, PR#270559' atf_set require.user root } vlan_body() { vnet_init vnet_init_bridge vid=1 epaira=$(vnet_mkepair) epairb=$(vnet_mkepair) br=$(vnet_mkbridge) vnet_mkjail one ${epaira}b vnet_mkjail two ${epairb}b ifconfig ${br} up ifconfig ${epaira}a up ifconfig ${epairb}a up ifconfig ${br} addm ${epaira}a addm ${epairb}a jexec one ifconfig ${epaira}b up jexec one ifconfig ${epaira}b.${vid} create jexec two ifconfig ${epairb}b up jexec two ifconfig ${epairb}b.${vid} create # Create a MAC address conflict between an untagged and tagged interface jexec two ifconfig ${epairb}b.${vid} ether 02:05:6e:06:28:1a jexec one ifconfig ${epaira}b ether 02:05:6e:06:28:1a jexec one ifconfig ${epaira}b.${vid} ether 02:05:6e:06:28:1b # Add ip address, will also populate $br's fowarding table, by ARP announcement jexec one ifconfig ${epaira}b.${vid} 192.0.2.1/24 up jexec two ifconfig ${epairb}b.${vid} 192.0.2.2/24 up sleep 0.5 ifconfig ${br} jexec one ifconfig jexec two ifconfig ifconfig ${br} addr atf_check -s exit:0 -o ignore \ jexec one ping -c 1 -t 1 192.0.2.2 # This will trigger a mac flap (by ARP announcement) jexec one ifconfig ${epaira}b 192.0.2.1/24 up sleep 0.5 ifconfig ${br} addr atf_check -s exit:0 -o ignore \ jexec one ping -c 1 -t 1 192.0.2.2 } vlan_cleanup() { vnet_cleanup } atf_test_case "many_bridge_members" "cleanup" many_bridge_members_head() { atf_set descr 'many_bridge_members ifconfig test' atf_set require.user root } many_bridge_members_body() { vnet_init vnet_init_bridge bridge=$(vnet_mkbridge) ifcount=256 for _ in $(seq 1 $ifcount); do epair=$(vnet_mkepair) ifconfig "${bridge}" addm "${epair}"a done atf_check -s exit:0 -o inline:"$ifcount\n" \ sh -c "ifconfig ${bridge} | grep member: | wc -l | xargs" } many_bridge_members_cleanup() { vnet_cleanup } atf_test_case "member_ifaddrs_enabled" "cleanup" member_ifaddrs_enabled_head() { atf_set descr 'bridge with member_ifaddrs=1' atf_set require.user root } member_ifaddrs_enabled_body() { vnet_init vnet_init_bridge ep=$(vnet_mkepair) ifconfig ${ep}a inet 192.0.2.1/24 up vnet_mkjail one ${ep}b jexec one sysctl net.link.bridge.member_ifaddrs=1 jexec one ifconfig ${ep}b inet 192.0.2.2/24 up jexec one ifconfig bridge0 create addm ${ep}b atf_check -s exit:0 -o ignore ping -c3 -t1 192.0.2.2 } member_ifaddrs_enabled_cleanup() { vnet_cleanup } atf_test_case "member_ifaddrs_disabled" "cleanup" member_ifaddrs_disabled_head() { atf_set descr 'bridge with member_ifaddrs=0' atf_set require.user root } member_ifaddrs_disabled_body() { vnet_init vnet_init_bridge vnet_mkjail one jexec one sysctl net.link.bridge.member_ifaddrs=0 bridge=$(jexec one ifconfig bridge create) # adding an interface with an IPv4 address ep=$(jexec one ifconfig epair create) jexec one ifconfig ${ep} 192.0.2.1/32 atf_check -s exit:1 -e ignore jexec one ifconfig ${bridge} addm ${ep} # adding an interface with an IPv6 address ep=$(jexec one ifconfig epair create) jexec one ifconfig ${ep} inet6 2001:db8::1/128 atf_check -s exit:1 -e ignore jexec one ifconfig ${bridge} addm ${ep} # adding an interface with an IPv6 link-local address ep=$(jexec one ifconfig epair create) jexec one ifconfig ${ep} inet6 -ifdisabled auto_linklocal up atf_check -s exit:1 -e ignore jexec one ifconfig ${bridge} addm ${ep} # adding an IPv4 address to a member ep=$(jexec one ifconfig epair create) jexec one ifconfig ${bridge} addm ${ep} atf_check -s exit:1 -e ignore jexec one ifconfig ${ep} inet 192.0.2.2/32 # adding an IPv6 address to a member ep=$(jexec one ifconfig epair create) jexec one ifconfig ${bridge} addm ${ep} atf_check -s exit:1 -e ignore jexec one ifconfig ${ep} inet6 2001:db8::1/128 } member_ifaddrs_disabled_cleanup() { vnet_cleanup } # # Test kern/287150: when member_ifaddrs=0, and a physical interface which is in # a bridge also has a vlan(4) on it, tagged packets are not correctly passed to # vlan(4). atf_test_case "member_ifaddrs_vlan" "cleanup" member_ifaddrs_vlan_head() { atf_set descr 'kern/287150: vlan and bridge on the same interface' atf_set require.user root } member_ifaddrs_vlan_body() { vnet_init vnet_init_bridge epone=$(vnet_mkepair) eptwo=$(vnet_mkepair) # The first jail has an epair with an IP address on vlan 20. vnet_mkjail one ${epone}a atf_check -s exit:0 jexec one ifconfig ${epone}a up atf_check -s exit:0 jexec one \ ifconfig ${epone}a.20 create inet 192.0.2.1/24 up # The second jail has an epair with an IP address on vlan 20, # which is also in a bridge. vnet_mkjail two ${epone}b jexec two ifconfig atf_check -s exit:0 -o save:bridge jexec two ifconfig bridge create bridge=$(cat bridge) atf_check -s exit:0 jexec two ifconfig ${bridge} addm ${epone}b up atf_check -s exit:0 -o ignore jexec two \ sysctl net.link.bridge.member_ifaddrs=0 atf_check -s exit:0 jexec two ifconfig ${epone}b up atf_check -s exit:0 jexec two \ ifconfig ${epone}b.20 create inet 192.0.2.2/24 up # Make sure the two jails can communicate over the vlan. atf_check -s exit:0 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 atf_check -s exit:0 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 } member_ifaddrs_vlan_cleanup() { vnet_cleanup } atf_test_case "vlan_pvid" "cleanup" vlan_pvid_head() { atf_set descr 'bridge with two ports with pvid set' atf_set require.user root } vlan_pvid_body() { vnet_init vnet_init_bridge epone=$(vnet_mkepair) eptwo=$(vnet_mkepair) vnet_mkjail one ${epone}b vnet_mkjail two ${eptwo}b jexec one ifconfig ${epone}b 192.0.2.1/24 up jexec two ifconfig ${eptwo}b 192.0.2.2/24 up bridge=$(vnet_mkbridge) ifconfig ${bridge} up ifconfig ${epone}a up ifconfig ${eptwo}a up ifconfig ${bridge} addm ${epone}a untagged ${epone}a 20 ifconfig ${bridge} addm ${eptwo}a untagged ${eptwo}a 20 # With VLAN filtering enabled, traffic should be passed. atf_check -s exit:0 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 atf_check -s exit:0 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 # Removed the untagged VLAN on one port; traffic should not be passed. ifconfig ${bridge} -untagged ${epone}a atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 } vlan_pvid_cleanup() { vnet_cleanup } atf_test_case "vlan_pvid_filtered" "cleanup" vlan_pvid_filtered_head() { atf_set descr 'bridge with two ports with different pvids' atf_set require.user root } vlan_pvid_filtered_body() { vnet_init vnet_init_bridge epone=$(vnet_mkepair) eptwo=$(vnet_mkepair) vnet_mkjail one ${epone}b vnet_mkjail two ${eptwo}b jexec one ifconfig ${epone}b 192.0.2.1/24 up jexec two ifconfig ${eptwo}b 192.0.2.2/24 up bridge=$(vnet_mkbridge) ifconfig ${bridge} up ifconfig ${epone}a up ifconfig ${eptwo}a up ifconfig ${bridge} addm ${epone}a untagged ${epone}a 20 ifconfig ${bridge} addm ${eptwo}a untagged ${eptwo}a 30 atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 } vlan_pvid_filtered_cleanup() { vnet_cleanup } atf_test_case "vlan_pvid_tagged" "cleanup" vlan_pvid_tagged_head() { atf_set descr 'bridge pvid with tagged frames for pvid' atf_set require.user root } vlan_pvid_tagged_body() { vnet_init vnet_init_bridge epone=$(vnet_mkepair) eptwo=$(vnet_mkepair) vnet_mkjail one ${epone}b vnet_mkjail two ${eptwo}b # Create two tagged interfaces on the appropriate VLANs jexec one ifconfig ${epone}b up jexec one ifconfig ${epone}b.20 create 192.0.2.1/24 up jexec two ifconfig ${eptwo}b up jexec two ifconfig ${eptwo}b.20 create 192.0.2.2/24 up bridge=$(vnet_mkbridge) ifconfig ${bridge} up ifconfig ${epone}a up ifconfig ${eptwo}a up ifconfig ${bridge} addm ${epone}a untagged ${epone}a 20 ifconfig ${bridge} addm ${eptwo}a untagged ${eptwo}a 20 # Tagged frames should not be passed. atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 } vlan_pvid_tagged_cleanup() { vnet_cleanup } atf_test_case "vlan_pvid_1q" "cleanup" vlan_pvid_1q_head() { atf_set descr '802.1q tag addition and removal' atf_set require.user root } vlan_pvid_1q_body() { vnet_init vnet_init_bridge epone=$(vnet_mkepair) eptwo=$(vnet_mkepair) vnet_mkjail one ${epone}b vnet_mkjail two ${eptwo}b # Set up one jail with an access port, and the other with a trunk port. # This forces the bridge to add and remove .1q tags to bridge the # traffic. jexec one ifconfig ${epone}b 192.0.2.1/24 up jexec two ifconfig ${eptwo}b up jexec two ifconfig ${eptwo}b.20 create 192.0.2.2/24 up bridge=$(vnet_mkbridge) ifconfig ${bridge} addm ${epone}a untagged ${epone}a 20 ifconfig ${bridge} addm ${eptwo}a ifconfig ${bridge} up ifconfig ${epone}a up ifconfig ${eptwo}a up atf_check -s exit:0 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 atf_check -s exit:0 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 } vlan_pvid_1q_cleanup() { vnet_cleanup } # # Test vlan filtering. # atf_test_case "vlan_filtering" "cleanup" vlan_filtering_head() { atf_set descr 'tagged traffic with filtering' atf_set require.user root } vlan_filtering_body() { vnet_init vnet_init_bridge epone=$(vnet_mkepair) eptwo=$(vnet_mkepair) vnet_mkjail one ${epone}b vnet_mkjail two ${eptwo}b jexec one ifconfig ${epone}b up jexec one ifconfig ${epone}b.20 create 192.0.2.1/24 up jexec two ifconfig ${eptwo}b up jexec two ifconfig ${eptwo}b.20 create 192.0.2.2/24 up bridge=$(vnet_mkbridge) ifconfig ${bridge} up ifconfig ${epone}a up ifconfig ${eptwo}a up ifconfig ${bridge} addm ${epone}a vlanfilter ${epone}a ifconfig ${bridge} addm ${eptwo}a vlanfilter ${eptwo}a # Right now there are no VLANs on the access list, so everything # should be blocked. atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 # Set the untagged vlan on both ports to 20 and make sure traffic is # still blocked. We intentionally do not pass tagged traffic for the # untagged vlan. atf_check -s exit:0 ifconfig ${bridge} untagged ${epone}a 20 atf_check -s exit:0 ifconfig ${bridge} untagged ${eptwo}a 20 atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 atf_check -s exit:0 ifconfig ${bridge} -untagged ${epone}a atf_check -s exit:0 ifconfig ${bridge} -untagged ${eptwo}a # Add VLANs 10-30 to the access list; now access should be allowed. ifconfig ${bridge} +tagged ${epone}a 10-30 ifconfig ${bridge} +tagged ${eptwo}a 10-30 atf_check -s exit:0 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 atf_check -s exit:0 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 # Remove vlan 20 from the access list, now access should be blocked # again. ifconfig ${bridge} -tagged ${epone}a 20 ifconfig ${bridge} -tagged ${eptwo}a 20 atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 } vlan_filtering_cleanup() { vnet_cleanup } # # Test the ifconfig 'tagged' option. # atf_test_case "vlan_ifconfig_tagged" "cleanup" vlan_ifconfig_tagged_head() { atf_set descr 'test the ifconfig tagged option' atf_set require.user root } vlan_ifconfig_tagged_body() { vnet_init vnet_init_bridge ep=$(vnet_mkepair) bridge=$(vnet_mkbridge) ifconfig ${bridge} addm ${ep}a vlanfilter ${ep}a up ifconfig ${ep}a up # To start with, no vlans should be configured. atf_check -s exit:0 -o not-match:"tagged" ifconfig ${bridge} # Add vlans 100-149. atf_check -s exit:0 ifconfig ${bridge} tagged ${ep}a 100-149 atf_check -s exit:0 -o match:"tagged 100-149" ifconfig ${bridge} # Replace the vlan list with 139-199. atf_check -s exit:0 ifconfig ${bridge} tagged ${ep}a 139-199 atf_check -s exit:0 -o match:"tagged 139-199" ifconfig ${bridge} # Add vlans 100-170. atf_check -s exit:0 ifconfig ${bridge} +tagged ${ep}a 100-170 atf_check -s exit:0 -o match:"tagged 100-199" ifconfig ${bridge} # Remove vlans 104, 105, and 150-159 atf_check -s exit:0 ifconfig ${bridge} -tagged ${ep}a 104,105,150-159 atf_check -s exit:0 -o match:"tagged 100-103,106-149,160-199" \ ifconfig ${bridge} # Remove the entire vlan list. atf_check -s exit:0 ifconfig ${bridge} tagged ${ep}a none atf_check -s exit:0 -o not-match:"tagged" ifconfig ${bridge} # Test some invalid vlans sets. for bad_vlan in -1 0 4096 4097 foo 0-10 4000-5000 foo-40 40-foo; do atf_check -s exit:1 -e ignore \ ifconfig ${bridge} tagged "$bad_vlan" done } vlan_ifconfig_tagged_cleanup() { vnet_cleanup } # # Test a vlan(4) "SVI" interface on top of a bridge. # atf_test_case "vlan_svi" "cleanup" vlan_svi_head() { atf_set descr 'vlan bridge with an SVI' atf_set require.user root } vlan_svi_body() { vnet_init vnet_init_bridge epone=$(vnet_mkepair) vnet_mkjail one ${epone}b jexec one ifconfig ${epone}b up jexec one ifconfig ${epone}b.20 create 192.0.2.1/24 up bridge=$(vnet_mkbridge) ifconfig ${bridge} up ifconfig ${epone}a up ifconfig ${bridge} addm ${epone}a tagged ${epone}a 20 svi=$(vnet_mkvlan) ifconfig ${svi} vlan 20 vlandev ${bridge} ifconfig ${svi} inet 192.0.2.2/24 up atf_check -s exit:0 -o ignore ping -c 3 -t 1 192.0.2.1 } vlan_svi_cleanup() { vnet_cleanup } # # Test QinQ (802.1ad). # atf_test_case "vlan_qinq" "cleanup" vlan_qinq_head() { atf_set descr 'vlan filtering with QinQ traffic' atf_set require.user root } vlan_qinq_body() { vnet_init vnet_init_bridge epone=$(vnet_mkepair) eptwo=$(vnet_mkepair) vnet_mkjail one ${epone}b vnet_mkjail two ${eptwo}b # Create a QinQ trunk between the two jails. The outer (provider) tag # is 5, and the inner tag is 10. jexec one ifconfig ${epone}b up jexec one ifconfig ${epone}b.5 create vlanproto 802.1ad up jexec one ifconfig ${epone}b.5.10 create inet 192.0.2.1/24 up jexec two ifconfig ${eptwo}b up jexec two ifconfig ${eptwo}b.5 create vlanproto 802.1ad up jexec two ifconfig ${eptwo}b.5.10 create inet 192.0.2.2/24 up bridge=$(vnet_mkbridge) ifconfig ${bridge} up ifconfig ${epone}a up ifconfig ${eptwo}a up ifconfig ${bridge} addm ${epone}a vlanfilter ${epone}a ifconfig ${bridge} addm ${eptwo}a vlanfilter ${eptwo}a # Right now there are no VLANs on the access list, so everything # should be blocked. atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 # Add the provider tag to the access list; now traffic should be passed. ifconfig ${bridge} +tagged ${epone}a 5 ifconfig ${bridge} +tagged ${eptwo}a 5 atf_check -s exit:0 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 atf_check -s exit:0 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 } vlan_qinq_cleanup() { vnet_cleanup } +# Adding a bridge SVI to a bridge should not be allowed. +atf_test_case "bridge_svi_in_bridge" "cleanup" +bridge_svi_in_bridge_head() +{ + atf_set descr 'adding a bridge SVI to a bridge is not allowed (1)' + atf_set require.user root +} + +bridge_svi_in_bridge_body() +{ + vnet_init + vnet_init_bridge + + bridge=$(vnet_mkbridge) + atf_check -s exit:0 ifconfig ${bridge}.1 create + atf_check -s exit:1 -e ignore ifconfig ${bridge} addm ${bridge}.1 +} + +bridge_svi_in_bridge_cleanup() +{ + vnet_cleanup +} + atf_init_test_cases() { atf_add_test_case "bridge_transmit_ipv4_unicast" atf_add_test_case "stp" atf_add_test_case "stp_vlan" atf_add_test_case "static" atf_add_test_case "span" atf_add_test_case "inherit_mac" atf_add_test_case "delete_with_members" atf_add_test_case "mac_conflict" atf_add_test_case "stp_validation" atf_add_test_case "gif" atf_add_test_case "mtu" atf_add_test_case "vlan" atf_add_test_case "many_bridge_members" atf_add_test_case "member_ifaddrs_enabled" atf_add_test_case "member_ifaddrs_disabled" atf_add_test_case "member_ifaddrs_vlan" atf_add_test_case "vlan_pvid" atf_add_test_case "vlan_pvid_1q" atf_add_test_case "vlan_pvid_filtered" atf_add_test_case "vlan_pvid_tagged" atf_add_test_case "vlan_filtering" atf_add_test_case "vlan_ifconfig_tagged" atf_add_test_case "vlan_svi" atf_add_test_case "vlan_qinq" + atf_add_test_case "bridge_svi_in_bridge" } diff --git a/tests/sys/net/if_vlan.sh b/tests/sys/net/if_vlan.sh index 424eac705b94..8122203337e2 100755 --- a/tests/sys/net/if_vlan.sh +++ b/tests/sys/net/if_vlan.sh @@ -1,346 +1,373 @@ . $(atf_get_srcdir)/../common/vnet.subr atf_test_case "basic" "cleanup" basic_head() { atf_set descr 'Basic VLAN test' atf_set require.user root } basic_body() { vnet_init epair_vlan=$(vnet_mkepair) vnet_mkjail alcatraz ${epair_vlan}a vnet_mkjail singsing ${epair_vlan}b vlan0=$(jexec alcatraz ifconfig vlan create vlandev ${epair_vlan}a \ vlan 42) jexec alcatraz ifconfig ${epair_vlan}a up jexec alcatraz ifconfig ${vlan0} 10.0.0.1/24 up vlan1=$(jexec singsing ifconfig vlan create) # Test associating the physical interface atf_check -s exit:0 \ jexec singsing ifconfig ${vlan1} vlandev ${epair_vlan}b vlan 42 jexec singsing ifconfig ${epair_vlan}b up jexec singsing ifconfig ${vlan1} 10.0.0.2/24 up atf_check -s exit:0 -o ignore jexec singsing ping -c 1 10.0.0.1 # Test changing the vlan ID atf_check -s exit:0 \ jexec singsing ifconfig ${vlan1} vlandev ${epair_vlan}b vlan 43 atf_check -s exit:2 -o ignore jexec singsing ping -c 1 10.0.0.1 # And change back # Test changing the vlan ID atf_check -s exit:0 \ jexec singsing ifconfig ${vlan1} vlan 42 vlandev ${epair_vlan}b atf_check -s exit:0 -o ignore jexec singsing ping -c 1 10.0.0.1 } basic_cleanup() { vnet_cleanup } # Simple Q-in-Q (802.1Q over 802.1ad) atf_test_case "qinq_simple" "cleanup" qinq_simple_head() { atf_set descr 'Simple Q-in-Q test (802.1Q over 802.1ad)' atf_set require.user root } qinq_simple_body() { vnet_init epair_qinq=$(vnet_mkepair) vnet_mkjail jqinq0 ${epair_qinq}a vnet_mkjail jqinq1 ${epair_qinq}b vlan5a=$(jexec jqinq0 ifconfig vlan create \ vlandev ${epair_qinq}a vlan 5 vlanproto 802.1ad) vlan42a=$(jexec jqinq0 ifconfig vlan create \ vlandev ${vlan5a} vlan 42 vlanproto 802.1q) jexec jqinq0 ifconfig ${epair_qinq}a up jexec jqinq0 ifconfig ${vlan5a} up jexec jqinq0 ifconfig ${vlan42a} 10.5.42.1/24 up vlan5b=$(jexec jqinq1 ifconfig vlan create \ vlandev ${epair_qinq}b vlan 5 vlanproto 802.1ad) vlan42b=$(jexec jqinq1 ifconfig vlan create \ vlandev ${vlan5b} vlan 42 vlanproto 802.1q) jexec jqinq1 ifconfig ${epair_qinq}b up jexec jqinq1 ifconfig ${vlan5b} up jexec jqinq1 ifconfig ${vlan42b} 10.5.42.2/24 up atf_check -s exit:0 -o ignore jexec jqinq1 ping -c 1 10.5.42.1 } qinq_simple_cleanup() { vnet_cleanup } # Deep Q-in-Q (802.1Q over 802.1ad over 802.1ad) atf_test_case "qinq_deep" "cleanup" qinq_deep_head() { atf_set descr 'Deep Q-in-Q test (802.1Q over 802.1ad over 802.1ad)' atf_set require.user root } qinq_deep_body() { vnet_init epair_qinq=$(vnet_mkepair) vnet_mkjail jqinq2 ${epair_qinq}a vnet_mkjail jqinq3 ${epair_qinq}b vlan5a=$(jexec jqinq2 ifconfig vlan create \ vlandev ${epair_qinq}a vlan 5 vlanproto 802.1ad) vlan6a=$(jexec jqinq2 ifconfig vlan create \ vlandev ${vlan5a} vlan 6 vlanproto 802.1ad) vlan42a=$(jexec jqinq2 ifconfig vlan create \ vlandev ${vlan6a} vlan 42 vlanproto 802.1q) jexec jqinq2 ifconfig ${epair_qinq}a up jexec jqinq2 ifconfig ${vlan5a} up jexec jqinq2 ifconfig ${vlan6a} up jexec jqinq2 ifconfig ${vlan42a} 10.6.42.1/24 up vlan5b=$(jexec jqinq3 ifconfig vlan create \ vlandev ${epair_qinq}b vlan 5 vlanproto 802.1ad) vlan6b=$(jexec jqinq3 ifconfig vlan create \ vlandev ${vlan5b} vlan 6 vlanproto 802.1ad) vlan42b=$(jexec jqinq3 ifconfig vlan create \ vlandev ${vlan6b} vlan 42 vlanproto 802.1q) jexec jqinq3 ifconfig ${epair_qinq}b up jexec jqinq3 ifconfig ${vlan5b} up jexec jqinq3 ifconfig ${vlan6b} up jexec jqinq3 ifconfig ${vlan42b} 10.6.42.2/24 up atf_check -s exit:0 -o ignore jexec jqinq3 ping -c 1 10.6.42.1 } qinq_deep_cleanup() { vnet_cleanup } # Legacy Q-in-Q (802.1Q over 802.1Q) atf_test_case "qinq_legacy" "cleanup" qinq_legacy_head() { atf_set descr 'Legacy Q-in-Q test (802.1Q over 802.1Q)' atf_set require.user root } qinq_legacy_body() { vnet_init epair_qinq=$(vnet_mkepair) vnet_mkjail jqinq4 ${epair_qinq}a vnet_mkjail jqinq5 ${epair_qinq}b vlan5a=$(jexec jqinq4 ifconfig vlan create \ vlandev ${epair_qinq}a vlan 5) vlan42a=$(jexec jqinq4 ifconfig vlan create \ vlandev ${vlan5a} vlan 42) jexec jqinq4 ifconfig ${epair_qinq}a up jexec jqinq4 ifconfig ${vlan5a} up jexec jqinq4 ifconfig ${vlan42a} 10.5.42.1/24 up vlan5b=$(jexec jqinq5 ifconfig vlan create \ vlandev ${epair_qinq}b vlan 5) vlan42b=$(jexec jqinq5 ifconfig vlan create \ vlandev ${vlan5b} vlan 42) jexec jqinq5 ifconfig ${epair_qinq}b up jexec jqinq5 ifconfig ${vlan5b} up jexec jqinq5 ifconfig ${vlan42b} 10.5.42.2/24 up atf_check -s exit:0 -o ignore jexec jqinq5 ping -c 1 10.5.42.1 } qinq_legacy_cleanup() { vnet_cleanup } # Simple Q-in-Q with dot notation atf_test_case "qinq_dot" "cleanup" qinq_dot_head() { atf_set descr 'Simple Q-in-Q test with dot notation' atf_set require.user root } qinq_dot_body() { vnet_init epair_qinq=$(vnet_mkepair) vnet_mkjail jqinq6 ${epair_qinq}a vnet_mkjail jqinq7 ${epair_qinq}b jexec jqinq6 ifconfig vlan5 create \ vlandev ${epair_qinq}a vlan 5 vlanproto 802.1ad jexec jqinq6 ifconfig vlan5.42 create \ vlanproto 802.1q jexec jqinq6 ifconfig ${epair_qinq}a up jexec jqinq6 ifconfig vlan5 up jexec jqinq6 ifconfig vlan5.42 10.5.42.1/24 up vlan5b=$(jexec jqinq7 ifconfig vlan create \ vlandev ${epair_qinq}b vlan 5 vlanproto 802.1ad) vlan42b=$(jexec jqinq7 ifconfig vlan create \ vlandev ${vlan5b} vlan 42 vlanproto 802.1q) jexec jqinq7 ifconfig ${epair_qinq}b up jexec jqinq7 ifconfig ${vlan5b} up jexec jqinq7 ifconfig ${vlan42b} 10.5.42.2/24 up atf_check -s exit:0 -o ignore jexec jqinq7 ping -c 1 10.5.42.1 } qinq_dot_cleanup() { vnet_cleanup } atf_test_case "qinq_setflags" "cleanup" qinq_setflags_head() { atf_set descr 'Test setting flags on a QinQ device' atf_set require.user root } qinq_setflags_body() { vnet_init epair=$(vnet_mkepair) ifconfig ${epair}a up vlan1=$(ifconfig vlan create) ifconfig $vlan1 vlan 1 vlandev ${epair}a vlan2=$(ifconfig vlan create) ifconfig $vlan2 vlan 2 vlandev $vlan1 # This panics, incorrect locking ifconfig $vlan2 promisc } qinq_setflags_cleanup() { vnet_cleanup } atf_test_case "bpf_pcp" "cleanup" bpf_pcp_head() { atf_set descr 'Set VLAN PCP through BPF' atf_set require.user root atf_set require.progs python3 scapy } bpf_pcp_body() { vnet_init epair=$(vnet_mkepair) ifconfig ${epair}a up vnet_mkjail alcatraz ${epair}b vlan=$(jexec alcatraz ifconfig vlan create) jexec alcatraz ifconfig ${vlan} vlan 42 vlandev ${epair}b jexec alcatraz ifconfig ${vlan} up jexec alcatraz ifconfig ${epair}b up jexec alcatraz sysctl net.link.vlan.mtag_pcp=1 jexec alcatraz dhclient ${vlan} & atf_check -s exit:1 -o ignore -e ignore $(atf_get_srcdir)/pcp.py \ --expect-pcp 6 \ --recvif ${epair}a jexec alcatraz killall dhclient sleep 1 jexec alcatraz dhclient -c $(atf_get_srcdir)/dhclient_pcp.conf ${vlan} & atf_check -s exit:0 -o ignore -e ignore $(atf_get_srcdir)/pcp.py \ --expect-pcp 6 \ --recvif ${epair}a } bpf_pcp_cleanup() { sysctl net.link.vlan.mtag_pcp=0 jexec alcatraz killall dhclient vnet_cleanup } atf_test_case "conflict_id" "cleanup" conflict_id_head() { atf_set descr 'Test conflicting VLAN IDs, PR #279195' atf_set require.user root } conflict_id_body() { vnet_init epair=$(vnet_mkepair) vnet_mkjail alcatraz ${epair}b vlan_a=$(jexec alcatraz ifconfig vlan create) vlan_b=$(jexec alcatraz ifconfig vlan create) jexec alcatraz ifconfig ${vlan_a} vlan 100 vlandev ${epair}b jexec alcatraz ifconfig ${vlan_b} vlan 101 vlandev ${epair}b atf_check -s exit:1 -o ignore -e ignore \ jexec alcatraz ifconfig ${vlan_a} vlan 101 atf_check -s exit:0 -o match:"vlan: 100" \ jexec alcatraz ifconfig ${vlan_a} atf_check -s exit:0 -o ignore -e ignore \ jexec alcatraz ifconfig ${vlan_a} vlan 100 } conflict_id_cleanup() { vnet_cleanup } +# If a vlan interface is in a bridge, changing the vlandev to refer to +# a bridge should not be allowed. +atf_test_case "bridge_vlandev" "cleanup" +bridge_vlandev_head() +{ + atf_set descr 'transforming a bridge member vlan into an SVI is not allowed' + atf_set require.user root +} + +bridge_vlandev_body() +{ + vnet_init + vnet_init_bridge + + bridge=$(vnet_mkbridge) + vlan=$(vnet_mkvlan) + + atf_check -s exit:0 ifconfig ${bridge} addm ${vlan} + atf_check -s exit:1 -e ignore ifconfig ${vlan} vlan 1 vlandev ${bridge} +} + +bridge_vlandev_cleanup() +{ + vnet_cleanup +} + atf_init_test_cases() { atf_add_test_case "basic" atf_add_test_case "qinq_simple" atf_add_test_case "qinq_deep" atf_add_test_case "qinq_legacy" atf_add_test_case "qinq_dot" atf_add_test_case "qinq_setflags" atf_add_test_case "bpf_pcp" atf_add_test_case "conflict_id" + atf_add_test_case "bridge_vlandev" }