Index: head/sys/net/if_bridge.c =================================================================== --- head/sys/net/if_bridge.c +++ head/sys/net/if_bridge.c @@ -186,17 +186,41 @@ /* * Bridge locking + * + * The bridge relies heavily on the epoch(9) system to protect its data + * structures. This means we can safely use CK_LISTs while in NET_EPOCH, but we + * must ensure there is only one writer at a time. + * + * That is: for read accesses we only need to be in NET_EPOCH, but for write + * accesses we must hold: + * + * - BRIDGE_RT_LOCK, for any change to bridge_rtnodes + * - BRIDGE_LOCK, for any other change + * + * The BRIDGE_LOCK is a sleepable lock, because it is held accross ioctl() + * calls to bridge member interfaces and these ioctl()s can sleep. + * The BRIDGE_RT_LOCK is a non-sleepable mutex, because it is sometimes + * required while we're in NET_EPOCH and then we're not allowed to sleep. */ #define BRIDGE_LOCK_INIT(_sc) do { \ - mtx_init(&(_sc)->sc_mtx, "if_bridge", NULL, MTX_DEF); \ + sx_init(&(_sc)->sc_sx, "if_bridge"); \ + mtx_init(&(_sc)->sc_rt_mtx, "if_bridge rt", NULL, MTX_DEF); \ } while (0) #define BRIDGE_LOCK_DESTROY(_sc) do { \ - mtx_destroy(&(_sc)->sc_mtx); \ + sx_destroy(&(_sc)->sc_sx); \ + mtx_destroy(&(_sc)->sc_rt_mtx); \ } while (0) -#define BRIDGE_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) -#define BRIDGE_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) -#define BRIDGE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_mtx, MA_OWNED) -#define BRIDGE_UNLOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_mtx, MA_NOTOWNED) +#define BRIDGE_LOCK(_sc) sx_xlock(&(_sc)->sc_sx) +#define BRIDGE_UNLOCK(_sc) sx_xunlock(&(_sc)->sc_sx) +#define BRIDGE_LOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SX_XLOCKED) +#define BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(_sc) \ + MPASS(in_epoch(net_epoch_preempt) || sx_xlocked(&(_sc)->sc_sx)) +#define BRIDGE_UNLOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SX_UNLOCKED) +#define BRIDGE_RT_LOCK(_sc) mtx_lock(&(_sc)->sc_rt_mtx) +#define BRIDGE_RT_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_rt_mtx) +#define BRIDGE_RT_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_rt_mtx, MA_OWNED) +#define BRIDGE_RT_LOCK_OR_NET_EPOCH_ASSERT(_sc) \ + MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(_sc)->sc_rt_mtx)) /* * Bridge interface list entry. @@ -235,7 +259,8 @@ struct bridge_softc { struct ifnet *sc_ifp; /* make this an interface */ LIST_ENTRY(bridge_softc) sc_list; - struct mtx sc_mtx; + struct sx sc_sx; + struct mtx sc_rt_mtx; uint32_t sc_brtmax; /* max # of addresses */ uint32_t sc_brtcnt; /* cur. # of addresses */ uint32_t sc_brttimeout; /* rt timeout in seconds */ @@ -252,8 +277,8 @@ struct epoch_context sc_epoch_ctx; }; -VNET_DEFINE_STATIC(struct mtx, bridge_list_mtx); -#define V_bridge_list_mtx VNET(bridge_list_mtx) +VNET_DEFINE_STATIC(struct sx, bridge_list_sx); +#define V_bridge_list_sx VNET(bridge_list_sx) static eventhandler_tag bridge_detach_cookie; int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD; @@ -536,11 +561,11 @@ VNET_DEFINE_STATIC(LIST_HEAD(, bridge_softc), bridge_list); #define V_bridge_list VNET(bridge_list) -#define BRIDGE_LIST_LOCK_INIT(x) mtx_init(&V_bridge_list_mtx, \ - "if_bridge list", NULL, MTX_DEF) -#define BRIDGE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_bridge_list_mtx) -#define BRIDGE_LIST_LOCK(x) mtx_lock(&V_bridge_list_mtx) -#define BRIDGE_LIST_UNLOCK(x) mtx_unlock(&V_bridge_list_mtx) +#define BRIDGE_LIST_LOCK_INIT(x) sx_init(&V_bridge_list_sx, \ + "if_bridge list") +#define BRIDGE_LIST_LOCK_DESTROY(x) sx_destroy(&V_bridge_list_sx) +#define BRIDGE_LIST_LOCK(x) sx_xlock(&V_bridge_list_sx) +#define BRIDGE_LIST_UNLOCK(x) sx_xunlock(&V_bridge_list_sx) VNET_DEFINE_STATIC(struct if_clone *, bridge_cloner); #define V_bridge_cloner VNET(bridge_cloner) @@ -670,7 +695,7 @@ /* Initialize our routing table. */ bridge_rtable_init(sc); - callout_init_mtx(&sc->sc_brcallout, &sc->sc_mtx, 0); + callout_init_mtx(&sc->sc_brcallout, &sc->sc_rt_mtx, 0); CK_LIST_INIT(&sc->sc_iflist); CK_LIST_INIT(&sc->sc_spanlist); @@ -722,7 +747,6 @@ struct bridge_iflist *bif; struct epoch_tracker et; - NET_EPOCH_ENTER(et); BRIDGE_LOCK(sc); bridge_stop(ifp, 1); @@ -740,6 +764,8 @@ BRIDGE_UNLOCK(sc); + NET_EPOCH_ENTER(et); + callout_drain(&sc->sc_brcallout); BRIDGE_LIST_LOCK(); @@ -778,9 +804,8 @@ struct ifdrv *ifd = (struct ifdrv *) data; const struct bridge_control *bc; int error = 0, oldmtu; - struct epoch_tracker et; - NET_EPOCH_ENTER(et); + BRIDGE_LOCK(sc); switch (cmd) { case SIOCADDMULTI: @@ -826,9 +851,7 @@ } oldmtu = ifp->if_mtu; - BRIDGE_LOCK(sc); error = (*bc->bc_func)(sc, &args); - BRIDGE_UNLOCK(sc); if (error) break; @@ -855,16 +878,16 @@ * If interface is marked down and it is running, * then stop and disable it. */ - BRIDGE_LOCK(sc); bridge_stop(ifp, 1); - BRIDGE_UNLOCK(sc); } else if ((ifp->if_flags & IFF_UP) && !(ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* * If interface is marked up and it is stopped, then * start it. */ + BRIDGE_UNLOCK(sc); (*ifp->if_init)(sc); + BRIDGE_LOCK(sc); } break; @@ -877,7 +900,6 @@ sc->sc_ifp->if_mtu = ifr->ifr_mtu; break; } - BRIDGE_LOCK(sc); CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (bif->bif_ifp->if_mtu != ifr->ifr_mtu) { log(LOG_NOTICE, "%s: invalid MTU: %u(%s)" @@ -890,18 +912,19 @@ } if (!error) sc->sc_ifp->if_mtu = ifr->ifr_mtu; - BRIDGE_UNLOCK(sc); break; default: /* * drop the lock as ether_ioctl() will call bridge_start() and * cause the lock to be recursed. */ + BRIDGE_UNLOCK(sc); error = ether_ioctl(ifp, cmd, data); + BRIDGE_LOCK(sc); break; } - NET_EPOCH_EXIT(et); + BRIDGE_UNLOCK(sc); return (error); } @@ -933,9 +956,7 @@ /* strip off mask bits and enable them again if allowed */ enabled &= ~BRIDGE_IFCAPS_MASK; enabled |= mask; - BRIDGE_UNLOCK(sc); bridge_set_ifcap(sc, bif, enabled); - BRIDGE_LOCK(sc); } } @@ -946,8 +967,6 @@ struct ifreq ifr; int error, mask, stuck; - BRIDGE_UNLOCK_ASSERT(sc); - bzero(&ifr, sizeof(ifr)); ifr.ifr_reqcap = set; @@ -977,7 +996,7 @@ struct bridge_iflist *bif; struct ifnet *ifp; - NET_EPOCH_ASSERT(); + BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc); CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { ifp = bif->bif_ifp; @@ -998,7 +1017,7 @@ { struct bridge_iflist *bif; - NET_EPOCH_ASSERT(); + BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc); CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (bif->bif_ifp == member_ifp) @@ -1061,14 +1080,15 @@ bridge_linkcheck(sc); bridge_mutecaps(sc); /* recalcuate now this interface is removed */ + BRIDGE_RT_LOCK(sc); bridge_rtdelete(sc, ifs, IFBF_FLUSHALL); + BRIDGE_RT_UNLOCK(sc); KASSERT(bif->bif_addrcnt == 0, ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt)); ifs->if_bridge_output = NULL; ifs->if_bridge_input = NULL; ifs->if_bridge_linkstate = NULL; - BRIDGE_UNLOCK(sc); if (!gone) { switch (ifs->if_type) { case IFT_ETHER: @@ -1095,7 +1115,6 @@ bridge_set_ifcap(sc, bif, bif->bif_savedcaps); } bstp_destroy(&bif->bif_stp); /* prepare to free */ - BRIDGE_LOCK(sc); NET_EPOCH_CALL(bridge_delete_member_cb, &bif->bif_epoch_ctx); } @@ -1173,9 +1192,7 @@ */ CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (in6ifa_llaonifp(bif->bif_ifp)) { - BRIDGE_UNLOCK(sc); in6_ifdetach(bif->bif_ifp); - BRIDGE_LOCK(sc); if_printf(sc->sc_ifp, "IPv6 addresses on %s have been removed " "before adding it as a member to prevent " @@ -1184,9 +1201,7 @@ } } if (in6ifa_llaonifp(ifs)) { - BRIDGE_UNLOCK(sc); in6_ifdetach(ifs); - BRIDGE_LOCK(sc); if_printf(sc->sc_ifp, "IPv6 addresses on %s have been removed " "before adding it as a member to prevent " @@ -1244,9 +1259,7 @@ switch (ifs->if_type) { case IFT_ETHER: case IFT_L2VLAN: - BRIDGE_UNLOCK(sc); error = ifpromisc(ifs, 1); - BRIDGE_LOCK(sc); break; } @@ -1428,10 +1441,8 @@ len -= sizeof(breq); } - BRIDGE_UNLOCK(sc); bifc->ifbic_len = sizeof(breq) * count; error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len); - BRIDGE_LOCK(sc); free(outbuf, M_TEMP); return (error); } @@ -1481,10 +1492,8 @@ len -= sizeof(bareq); } out: - BRIDGE_UNLOCK(sc); bac->ifbac_len = sizeof(bareq) * count; error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len); - BRIDGE_LOCK(sc); free(outbuf, M_TEMP); return (error); } @@ -1494,19 +1503,20 @@ { struct ifbareq *req = arg; struct bridge_iflist *bif; + struct epoch_tracker et; int error; - NET_EPOCH_ASSERT(); - + NET_EPOCH_ENTER(et); bif = bridge_lookup_member(sc, req->ifba_ifsname); - if (bif == NULL) + if (bif == NULL) { + NET_EPOCH_EXIT(et); return (ENOENT); + } /* bridge_rtupdate() may acquire the lock. */ - BRIDGE_UNLOCK(sc); error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1, req->ifba_flags); - BRIDGE_LOCK(sc); + NET_EPOCH_EXIT(et); return (error); } @@ -1542,7 +1552,10 @@ { struct ifbreq *req = arg; + BRIDGE_RT_LOCK(sc); bridge_rtflush(sc, req->ifbr_ifsflags); + BRIDGE_RT_UNLOCK(sc); + return (0); } @@ -1810,10 +1823,8 @@ len -= sizeof(bpreq); } - BRIDGE_UNLOCK(sc); bifstp->ifbpstp_len = sizeof(bpreq) * count; error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); - BRIDGE_LOCK(sc); free(outbuf, M_TEMP); return (error); } @@ -1845,7 +1856,6 @@ { struct bridge_softc *sc = ifp->if_bridge; struct bridge_iflist *bif; - struct epoch_tracker et; if (ifp->if_flags & IFF_RENAMING) return; @@ -1856,7 +1866,6 @@ */ return; } - NET_EPOCH_ENTER(et); /* Check if the interface is a bridge member */ if (sc != NULL) { BRIDGE_LOCK(sc); @@ -1866,7 +1875,6 @@ bridge_delete_member(sc, bif, 1); BRIDGE_UNLOCK(sc); - NET_EPOCH_EXIT(et); return; } @@ -1883,7 +1891,6 @@ BRIDGE_UNLOCK(sc); } BRIDGE_LIST_UNLOCK(); - NET_EPOCH_EXIT(et); } /* @@ -1920,16 +1927,18 @@ { struct bridge_softc *sc = ifp->if_softc; - NET_EPOCH_ASSERT(); BRIDGE_LOCK_ASSERT(sc); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; + BRIDGE_RT_LOCK(sc); callout_stop(&sc->sc_brcallout); + bstp_stop(&sc->sc_stp); bridge_rtflush(sc, IFBF_FLUSHDYN); + BRIDGE_RT_UNLOCK(sc); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; } @@ -2659,8 +2668,7 @@ struct bridge_rtnode *brt; int error; - NET_EPOCH_ASSERT(); - BRIDGE_UNLOCK_ASSERT(sc); + BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc); /* Check the source address is valid and not multicast. */ if (ETHER_IS_MULTICAST(dst) || @@ -2677,24 +2685,24 @@ * update it, otherwise create a new one. */ if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) { - BRIDGE_LOCK(sc); + BRIDGE_RT_LOCK(sc); /* Check again, now that we have the lock. There could have * been a race and we only want to insert this once. */ if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) != NULL) { - BRIDGE_UNLOCK(sc); + BRIDGE_RT_UNLOCK(sc); return (0); } if (sc->sc_brtcnt >= sc->sc_brtmax) { sc->sc_brtexceeded++; - BRIDGE_UNLOCK(sc); + BRIDGE_RT_UNLOCK(sc); return (ENOSPC); } /* Check per interface address limits (if enabled) */ if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) { bif->bif_addrexceeded++; - BRIDGE_UNLOCK(sc); + BRIDGE_RT_UNLOCK(sc); return (ENOSPC); } @@ -2705,7 +2713,7 @@ */ brt = uma_zalloc(V_bridge_rtnode_zone, M_NOWAIT | M_ZERO); if (brt == NULL) { - BRIDGE_UNLOCK(sc); + BRIDGE_RT_UNLOCK(sc); return (ENOMEM); } brt->brt_vnet = curvnet; @@ -2720,22 +2728,22 @@ if ((error = bridge_rtnode_insert(sc, brt)) != 0) { uma_zfree(V_bridge_rtnode_zone, brt); - BRIDGE_UNLOCK(sc); + BRIDGE_RT_UNLOCK(sc); return (error); } brt->brt_dst = bif; bif->bif_addrcnt++; - BRIDGE_UNLOCK(sc); + BRIDGE_RT_UNLOCK(sc); } if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC && brt->brt_dst != bif) { - BRIDGE_LOCK(sc); + BRIDGE_RT_LOCK(sc); brt->brt_dst->bif_addrcnt--; brt->brt_dst = bif; brt->brt_dst->bif_addrcnt++; - BRIDGE_UNLOCK(sc); + BRIDGE_RT_UNLOCK(sc); } if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) @@ -2777,7 +2785,7 @@ struct bridge_rtnode *brt, *nbrt; NET_EPOCH_ASSERT(); - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_RT_LOCK_ASSERT(sc); /* Make sure we actually need to do this. */ if (sc->sc_brtcnt <= sc->sc_brtmax) @@ -2806,10 +2814,8 @@ bridge_timer(void *arg) { struct bridge_softc *sc = arg; - struct epoch_tracker et; - NET_EPOCH_ENTER(et); - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_RT_LOCK_ASSERT(sc); /* Destruction of rtnodes requires a proper vnet context */ CURVNET_SET(sc->sc_ifp->if_vnet); @@ -2819,7 +2825,6 @@ callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz, bridge_timer, sc); CURVNET_RESTORE(); - NET_EPOCH_EXIT(et); } /* @@ -2832,8 +2837,7 @@ { struct bridge_rtnode *brt, *nbrt; - NET_EPOCH_ASSERT(); - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_RT_LOCK_ASSERT(sc); CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { @@ -2853,8 +2857,7 @@ { struct bridge_rtnode *brt, *nbrt; - NET_EPOCH_ASSERT(); - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_RT_LOCK_ASSERT(sc); CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) @@ -2873,8 +2876,7 @@ struct bridge_rtnode *brt; int found = 0; - NET_EPOCH_ASSERT(); - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_RT_LOCK(sc); /* * If vlan is zero then we want to delete for all vlans so the lookup @@ -2885,6 +2887,8 @@ found = 1; } + BRIDGE_RT_UNLOCK(sc); + return (found ? 0 : ENOENT); } @@ -2898,8 +2902,7 @@ { struct bridge_rtnode *brt, *nbrt; - NET_EPOCH_ASSERT(); - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_RT_LOCK_ASSERT(sc); CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if (brt->brt_ifp == ifp && (full || @@ -3003,7 +3006,7 @@ uint32_t hash; int dir; - NET_EPOCH_ASSERT(); + BRIDGE_RT_LOCK_OR_NET_EPOCH_ASSERT(sc); hash = bridge_rthash(sc, addr); CK_LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) { @@ -3030,7 +3033,7 @@ uint32_t hash; int dir; - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_RT_LOCK_ASSERT(sc); hash = bridge_rthash(sc, brt->brt_addr); @@ -3086,8 +3089,7 @@ static void bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt) { - NET_EPOCH_ASSERT(); - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_RT_LOCK_ASSERT(sc); CK_LIST_REMOVE(brt, brt_hash); @@ -3108,11 +3110,9 @@ { struct bridge_softc *sc = ifp->if_bridge; struct bridge_rtnode *brt; - struct epoch_tracker et; - NET_EPOCH_ENTER(et); CURVNET_SET(ifp->if_vnet); - BRIDGE_LOCK(sc); + BRIDGE_RT_LOCK(sc); /* * If the age is zero then flush, otherwise set all the expiry times to @@ -3129,9 +3129,8 @@ brt->brt_expire = time_uptime + age; } } - BRIDGE_UNLOCK(sc); + BRIDGE_RT_UNLOCK(sc); CURVNET_RESTORE(); - NET_EPOCH_EXIT(et); } /* @@ -3659,7 +3658,7 @@ struct bridge_iflist *bif; int new_link, hasls; - NET_EPOCH_ASSERT(); + BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc); new_link = LINK_STATE_DOWN; hasls = 0;