Index: sys/net/if_lagg.h =================================================================== --- sys/net/if_lagg.h +++ sys/net/if_lagg.h @@ -206,6 +206,7 @@ struct ifnet *sc_ifp; /* virtual interface */ struct rmlock sc_mtx; struct sx sc_sx; + bool sc_slowpath; int sc_proto; /* lagg protocol */ u_int sc_count; /* number of ports */ u_int sc_active; /* active port count */ @@ -257,20 +258,9 @@ #define LAGG_LOCK_INIT(_sc) rm_init(&(_sc)->sc_mtx, "if_lagg rmlock") #define LAGG_LOCK_DESTROY(_sc) rm_destroy(&(_sc)->sc_mtx) -#define LAGG_RLOCK(_sc, _p) rm_rlock(&(_sc)->sc_mtx, (_p)) -#define LAGG_WLOCK(_sc) rm_wlock(&(_sc)->sc_mtx) -#define LAGG_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->sc_mtx, (_p)) -#define LAGG_WUNLOCK(_sc) rm_wunlock(&(_sc)->sc_mtx) -#define LAGG_RLOCK_ASSERT(_sc) rm_assert(&(_sc)->sc_mtx, RA_RLOCKED) -#define LAGG_WLOCK_ASSERT(_sc) rm_assert(&(_sc)->sc_mtx, RA_WLOCKED) -#define LAGG_UNLOCK_ASSERT(_sc) rm_assert(&(_sc)->sc_mtx, RA_UNLOCKED) #define LAGG_SX_INIT(_sc) sx_init(&(_sc)->sc_sx, "if_lagg sx") #define LAGG_SX_DESTROY(_sc) sx_destroy(&(_sc)->sc_sx) -#define LAGG_SLOCK(_sc) sx_slock(&(_sc)->sc_sx) -#define LAGG_XLOCK(_sc) sx_xlock(&(_sc)->sc_sx) -#define LAGG_SUNLOCK(_sc) sx_sunlock(&(_sc)->sc_sx) -#define LAGG_XUNLOCK(_sc) sx_xunlock(&(_sc)->sc_sx) #define LAGG_SXLOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SA_LOCKED) #define LAGG_SLOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SA_SLOCKED) #define LAGG_XLOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SA_XLOCKED) Index: sys/net/if_lagg.c =================================================================== --- sys/net/if_lagg.c +++ sys/net/if_lagg.c @@ -312,7 +312,108 @@ DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_lagg, 1); +/* + * Locking + * + * There are two locks in play, a rmlock and an sx lock. The rmlock + * is used for unsleepable reads in the fast path, and the sx lock is + * used in the control path and protects against changes. The sx lock + * is needed as if_addmulti() and if_delmulti() and other things called + * from lagg_port_ioctl() may sleep. + * + * The rm lock only protects sc->sc_slowpath. If this value is true, + * the control path holds or is obtaining an exclusive sx lock, and the + * fastpath needs to block. This is done via lagg_rlock() and + * lagg_runlock(). These should only be used where sleeping is not + * allowed. + * + * The sx lock protects the softc and can only be exclusive when + * sc->sc_slowpath is true. However, a shared lock can co-exist with + * the rm lock. lagg_slock/lagg_sunlock obtain shared locks, and + * lagg_xlock/lagg_xunlock handle the exclusive locks. + */ + +/* + * Grab the rmlock, ensuring sc_slowpath is false. + */ +static inline void +lagg_rlock(struct lagg_softc *sc, struct rm_priotracker *tracker) +{ + sx_assert(&sc->sc_sx, SA_UNLOCKED); + do { + rm_rlock(&sc->sc_mtx, tracker); + if (sc->sc_slowpath == false) + break; + rm_runlock(&sc->sc_mtx, tracker); + cpu_spinwait(); + } while(1); +} + +static inline void +lagg_runlock(struct lagg_softc *sc, struct rm_priotracker *tracker) +{ + rm_runlock(&sc->sc_mtx, tracker); +} + +/* + * Shared sx lock. Sleeps until sc_slowpath is false. + */ static void +lagg_slock(struct lagg_softc *sc) +{ + struct rm_priotracker tracker; + + rm_assert(&sc->sc_mtx, RA_UNLOCKED); + do { + sx_slock(&sc->sc_sx); + rm_rlock(&sc->sc_mtx, &tracker); + if (sc->sc_slowpath == false) { + rm_runlock(&sc->sc_mtx, &tracker); + break; + } + rm_runlock(&sc->sc_mtx, &tracker); + sx_sunlock(&sc->sc_sx); + DELAY(1); + } while(1); +} + +/* + * Exclusive sx lock. Sleeps until sc_slowpath is false, + * then sets it to true and obtains the exclusive lock. + */ +static void +lagg_xlock(struct lagg_softc *sc) +{ + rm_assert(&sc->sc_mtx, RA_UNLOCKED); + do { + rm_wlock(&sc->sc_mtx); + if (sc->sc_slowpath == false) { + sc->sc_slowpath = true; + rm_wunlock(&sc->sc_mtx); + break; + } + DELAY(1); + rm_wunlock(&sc->sc_mtx); + } while(1); + sx_xlock(&sc->sc_sx); +} + +static inline void +lagg_sunlock(struct lagg_softc *sc) +{ + sx_sunlock(&sc->sc_sx); +} + +static inline void +lagg_xunlock(struct lagg_softc *sc) +{ + sx_xunlock(&sc->sc_sx); + rm_wlock(&sc->sc_mtx); + sc->sc_slowpath = false; + rm_wunlock(&sc->sc_mtx); +} + +static void lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr) { @@ -334,14 +435,11 @@ lagg_proto pr; LAGG_XLOCK_ASSERT(sc); - LAGG_WLOCK_ASSERT(sc); pr = sc->sc_proto; sc->sc_proto = LAGG_PROTO_NONE; if (lagg_protos[pr].pr_detach != NULL) lagg_protos[pr].pr_detach(sc); - else - LAGG_WUNLOCK(sc); } static int @@ -437,10 +535,10 @@ if (ifp->if_softc != arg) /* Not our event */ return; - LAGG_SLOCK(sc); + lagg_slock(sc); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag); - LAGG_SUNLOCK(sc); + lagg_sunlock(sc); } /* @@ -456,10 +554,10 @@ if (ifp->if_softc != arg) /* Not our event */ return; - LAGG_SLOCK(sc); + lagg_slock(sc); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag); - LAGG_SUNLOCK(sc); + lagg_sunlock(sc); } static int @@ -478,7 +576,7 @@ LAGG_LOCK_INIT(sc); LAGG_SX_INIT(sc); - LAGG_XLOCK(sc); + lagg_xlock(sc); if (V_def_use_flowid) sc->sc_opts |= LAGG_OPT_USE_FLOWID; sc->flowid_shift = V_def_flowid_shift; @@ -526,7 +624,7 @@ LAGG_LIST_LOCK(); SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries); LAGG_LIST_UNLOCK(); - LAGG_XUNLOCK(sc); + lagg_xunlock(sc); return (0); } @@ -537,7 +635,7 @@ struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; struct lagg_port *lp; - LAGG_XLOCK(sc); + lagg_xlock(sc); sc->sc_destroying = 1; lagg_stop(sc); ifp->if_flags &= ~IFF_UP; @@ -550,10 +648,8 @@ lagg_port_destroy(lp, 1); /* Unhook the aggregation protocol */ - LAGG_WLOCK(sc); lagg_proto_detach(sc); - LAGG_UNLOCK_ASSERT(sc); - LAGG_XUNLOCK(sc); + lagg_xunlock(sc); ifmedia_removeall(&sc->sc_media); ether_ifdetach(ifp); @@ -564,7 +660,6 @@ LAGG_LIST_UNLOCK(); LAGG_SX_DESTROY(sc); - LAGG_LOCK_DESTROY(sc); free(sc, M_DEVBUF); } @@ -689,17 +784,14 @@ bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN); lp->lp_ifcapenable = ifp->if_capenable; if (SLIST_EMPTY(&sc->sc_ports)) { - LAGG_WLOCK(sc); bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); lagg_proto_lladdr(sc); - LAGG_WUNLOCK(sc); EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); } else { if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); } lagg_setflags(lp, 1); - LAGG_WLOCK(sc); if (SLIST_EMPTY(&sc->sc_ports)) sc->sc_primary = lp; @@ -738,13 +830,9 @@ lagg_setmulti(lp); - LAGG_WUNLOCK(sc); - if ((error = lagg_proto_addport(sc, lp)) != 0) { /* Remove the port, without calling pr_delport. */ - LAGG_WLOCK(sc); lagg_port_destroy(lp, 0); - LAGG_UNLOCK_ASSERT(sc); return (error); } @@ -786,11 +874,8 @@ LAGG_XLOCK_ASSERT(sc); - if (rundelport) { - LAGG_WLOCK(sc); + if (rundelport) lagg_proto_delport(sc, lp); - } else - LAGG_WLOCK_ASSERT(sc); if (lp->lp_detaching == 0) lagg_clrmulti(lp); @@ -824,10 +909,8 @@ if (sc->sc_destroying == 0) { bcopy(lladdr, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); lagg_proto_lladdr(sc); - LAGG_WUNLOCK(sc); EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); - } else - LAGG_WUNLOCK(sc); + } /* * Update lladdr for each port (new primary needs update @@ -835,8 +918,7 @@ */ SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries) if_setlladdr(lp_ptr->lp_ifp, lladdr, ETHER_ADDR_LEN); - } else - LAGG_WUNLOCK(sc); + } if (lp->lp_ifflags) if_printf(ifp, "%s: lp_ifflags unclean\n", __func__); @@ -878,15 +960,15 @@ break; } - LAGG_SLOCK(sc); + lagg_slock(sc); if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) { error = ENOENT; - LAGG_SUNLOCK(sc); + lagg_sunlock(sc); break; } lagg_port2req(lp, rp); - LAGG_SUNLOCK(sc); + lagg_sunlock(sc); break; case SIOCSIFCAP: @@ -899,9 +981,9 @@ break; /* Update lagg interface capabilities */ - LAGG_XLOCK(sc); + lagg_xlock(sc); lagg_capabilities(sc); - LAGG_XUNLOCK(sc); + lagg_xunlock(sc); VLAN_CAPABILITIES(sc->sc_ifp); break; @@ -942,14 +1024,14 @@ struct lagg_softc *sc; struct lagg_port *lp; struct ifnet *lpifp; - struct rm_priotracker tracker; uint64_t newval, oldval, vsum; + struct rm_priotracker tracker; /* Revise this when we've got non-generic counters. */ KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); sc = (struct lagg_softc *)ifp->if_softc; - LAGG_RLOCK(sc, &tracker); + lagg_rlock(sc, &tracker); vsum = 0; SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { @@ -973,7 +1055,7 @@ */ vsum += sc->detached_counters.val[cnt]; - LAGG_RUNLOCK(sc, &tracker); + lagg_runlock(sc, &tracker); return (vsum); } @@ -1012,10 +1094,10 @@ sc = lp->lp_softc; - LAGG_XLOCK(sc); + lagg_xlock(sc); lp->lp_detaching = 1; lagg_port_destroy(lp, 1); - LAGG_XUNLOCK(sc); + lagg_xunlock(sc); VLAN_CAPABILITIES(sc->sc_ifp); } @@ -1066,9 +1148,9 @@ struct ifnet *ifp = sc->sc_ifp; struct lagg_port *lp; - LAGG_XLOCK(sc); + lagg_xlock(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - LAGG_XUNLOCK(sc); + lagg_xunlock(sc); return; } @@ -1087,7 +1169,7 @@ lagg_proto_init(sc); - LAGG_XUNLOCK(sc); + lagg_xunlock(sc); } static void @@ -1124,7 +1206,7 @@ switch (cmd) { case SIOCGLAGG: - LAGG_SLOCK(sc); + lagg_slock(sc); buflen = sc->sc_count * sizeof(struct lagg_reqport); outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); ra->ra_proto = sc->sc_proto; @@ -1142,7 +1224,7 @@ buf += sizeof(rpbuf); len -= sizeof(rpbuf); } - LAGG_SUNLOCK(sc); + lagg_sunlock(sc); ra->ra_ports = count; ra->ra_size = count * sizeof(rpbuf); error = copyout(outbuf, ra->ra_port, ra->ra_size); @@ -1157,15 +1239,13 @@ break; } - LAGG_XLOCK(sc); - LAGG_WLOCK(sc); + lagg_xlock(sc); lagg_proto_detach(sc); - LAGG_UNLOCK_ASSERT(sc); lagg_proto_attach(sc, ra->ra_proto); - LAGG_XUNLOCK(sc); + lagg_xunlock(sc); break; case SIOCGLAGGOPTS: - LAGG_SLOCK(sc); + lagg_slock(sc); ro->ro_opts = sc->sc_opts; if (sc->sc_proto == LAGG_PROTO_LACP) { struct lacp_softc *lsc; @@ -1189,7 +1269,7 @@ ro->ro_bkt = sc->sc_bkt; ro->ro_flapping = sc->sc_flapping; ro->ro_flowid_shift = sc->flowid_shift; - LAGG_SUNLOCK(sc); + lagg_sunlock(sc); break; case SIOCSLAGGOPTS: if (sc->sc_proto == LAGG_PROTO_ROUNDROBIN) { @@ -1231,13 +1311,13 @@ break; } - LAGG_XLOCK(sc); + lagg_xlock(sc); if (valid == 0 || (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) { /* Invalid combination of options specified. */ error = EINVAL; - LAGG_XUNLOCK(sc); + lagg_xunlock(sc); break; /* Return from SIOCSLAGGOPTS. */ } /* @@ -1292,18 +1372,18 @@ break; } } - LAGG_XUNLOCK(sc); + lagg_xunlock(sc); break; case SIOCGLAGGFLAGS: rf->rf_flags = 0; - LAGG_SLOCK(sc); + lagg_slock(sc); if (sc->sc_flags & MBUF_HASHFLAG_L2) rf->rf_flags |= LAGG_F_HASHL2; if (sc->sc_flags & MBUF_HASHFLAG_L3) rf->rf_flags |= LAGG_F_HASHL3; if (sc->sc_flags & MBUF_HASHFLAG_L4) rf->rf_flags |= LAGG_F_HASHL4; - LAGG_SUNLOCK(sc); + lagg_sunlock(sc); break; case SIOCSLAGGHASH: error = priv_check(td, PRIV_NET_LAGG); @@ -1313,7 +1393,7 @@ error = EINVAL; break; } - LAGG_XLOCK(sc); + lagg_xlock(sc); sc->sc_flags = 0; if (rf->rf_flags & LAGG_F_HASHL2) sc->sc_flags |= MBUF_HASHFLAG_L2; @@ -1321,7 +1401,7 @@ sc->sc_flags |= MBUF_HASHFLAG_L3; if (rf->rf_flags & LAGG_F_HASHL4) sc->sc_flags |= MBUF_HASHFLAG_L4; - LAGG_XUNLOCK(sc); + lagg_xunlock(sc); break; case SIOCGLAGGPORT: if (rp->rp_portname[0] == '\0' || @@ -1330,17 +1410,17 @@ break; } - LAGG_SLOCK(sc); + lagg_slock(sc); if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL || lp->lp_softc != sc) { error = ENOENT; - LAGG_SUNLOCK(sc); + lagg_sunlock(sc); if_rele(tpif); break; } lagg_port2req(lp, rp); - LAGG_SUNLOCK(sc); + lagg_sunlock(sc); if_rele(tpif); break; case SIOCSLAGGPORT: @@ -1372,9 +1452,9 @@ tpif->if_xname); } #endif - LAGG_XLOCK(sc); + lagg_xlock(sc); error = lagg_port_create(sc, tpif); - LAGG_XUNLOCK(sc); + lagg_xunlock(sc); if_rele(tpif); VLAN_CAPABILITIES(ifp); break; @@ -1388,23 +1468,23 @@ break; } - LAGG_XLOCK(sc); + lagg_xlock(sc); if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL || lp->lp_softc != sc) { error = ENOENT; - LAGG_XUNLOCK(sc); + lagg_xunlock(sc); if_rele(tpif); break; } error = lagg_port_destroy(lp, 1); - LAGG_XUNLOCK(sc); + lagg_xunlock(sc); if_rele(tpif); VLAN_CAPABILITIES(ifp); break; case SIOCSIFFLAGS: /* Set flags on ports too */ - LAGG_XLOCK(sc); + lagg_xlock(sc); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { lagg_setflags(lp, 1); } @@ -1416,26 +1496,26 @@ * then stop and disable it. */ lagg_stop(sc); - LAGG_XUNLOCK(sc); + lagg_xunlock(sc); } else if ((ifp->if_flags & IFF_UP) && !(ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* * If interface is marked up and it is stopped, then * start it. */ - LAGG_XUNLOCK(sc); + lagg_xunlock(sc); (*ifp->if_init)(sc); } else - LAGG_XUNLOCK(sc); + lagg_xunlock(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: - LAGG_WLOCK(sc); + lagg_xlock(sc); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { lagg_clrmulti(lp); lagg_setmulti(lp); } - LAGG_WUNLOCK(sc); + lagg_xunlock(sc); error = 0; break; case SIOCSIFMEDIA: @@ -1444,13 +1524,13 @@ break; case SIOCSIFCAP: - LAGG_XLOCK(sc); + lagg_xlock(sc); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (lp->lp_ioctl != NULL) (*lp->lp_ioctl)(lp->lp_ifp, cmd, data); } lagg_capabilities(sc); - LAGG_XUNLOCK(sc); + lagg_xunlock(sc); VLAN_CAPABILITIES(ifp); error = 0; break; @@ -1523,7 +1603,7 @@ struct ifmultiaddr *ifma; int error; - LAGG_WLOCK_ASSERT(sc); + LAGG_XLOCK_ASSERT(sc); IF_ADDR_WLOCK(scifp); TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) @@ -1554,7 +1634,7 @@ { struct lagg_mc *mc; - LAGG_WLOCK_ASSERT(lp->lp_softc); + LAGG_XLOCK_ASSERT(lp->lp_softc); while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) { SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries); if (mc->mc_ifma && lp->lp_detaching == 0) @@ -1640,10 +1720,10 @@ len = m->m_pkthdr.len; mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; - LAGG_RLOCK(sc, &tracker); + lagg_rlock(sc, &tracker); /* We need a Tx algorithm and at least one port */ if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) { - LAGG_RUNLOCK(sc, &tracker); + lagg_runlock(sc, &tracker); m_freem(m); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (ENXIO); @@ -1652,7 +1732,7 @@ ETHER_BPF_MTAP(ifp, m); error = lagg_proto_start(sc, m); - LAGG_RUNLOCK(sc, &tracker); + lagg_runlock(sc, &tracker); if (error != 0) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); @@ -1676,11 +1756,11 @@ struct ifnet *scifp = sc->sc_ifp; struct rm_priotracker tracker; - LAGG_RLOCK(sc, &tracker); + lagg_rlock(sc, &tracker); if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (lp->lp_flags & LAGG_PORT_DISABLED) || sc->sc_proto == LAGG_PROTO_NONE) { - LAGG_RUNLOCK(sc, &tracker); + lagg_runlock(sc, &tracker); m_freem(m); return (NULL); } @@ -1700,7 +1780,7 @@ } } - LAGG_RUNLOCK(sc, &tracker); + lagg_runlock(sc, &tracker); return (m); } @@ -1725,12 +1805,12 @@ imr->ifm_status = IFM_AVALID; imr->ifm_active = IFM_ETHER | IFM_AUTO; - LAGG_SLOCK(sc); + lagg_slock(sc); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (LAGG_PORTACTIVE(lp)) imr->ifm_status |= IFM_ACTIVE; } - LAGG_SUNLOCK(sc); + lagg_sunlock(sc); } static void @@ -1782,10 +1862,10 @@ if (sc == NULL) return; - LAGG_XLOCK(sc); + lagg_xlock(sc); lagg_linkstate(sc); lagg_proto_linkstate(sc, lp); - LAGG_XUNLOCK(sc); + lagg_xunlock(sc); } struct lagg_port * @@ -2014,8 +2094,8 @@ { struct lagg_lb *lb; + LAGG_XLOCK_ASSERT(sc); lb = (struct lagg_lb *)sc->sc_psc; - LAGG_WUNLOCK(sc); if (lb != NULL) free(lb, M_DEVBUF); } @@ -2114,12 +2194,12 @@ struct lagg_port *lp; void *psc; + LAGG_XLOCK_ASSERT(sc); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lacp_port_destroy(lp); psc = sc->sc_psc; sc->sc_psc = NULL; - LAGG_WUNLOCK(sc); lacp_detach(psc); }