Index: sys/net/if_vxlan.c =================================================================== --- sys/net/if_vxlan.c +++ sys/net/if_vxlan.c @@ -1134,7 +1134,7 @@ * If we really need to, we can of course look in the INP's * membership list: * sotoinpcb(vso->vxlso_sock)->inp_moptions-> - * imo_membership[]->inm_ifp + * imo_head[]->imf_inm->inm_ifp * similarly to imo_match_group(). */ source->in4.sin_addr = local->in4.sin_addr; Index: sys/netinet/in.h =================================================================== --- sys/netinet/in.h +++ sys/netinet/in.h @@ -505,13 +505,9 @@ #define IP_DEFAULT_MULTICAST_LOOP 1 /* normally hear sends if a member */ /* - * The imo_membership vector for each socket is now dynamically allocated at - * run-time, bounded by USHRT_MAX, and is reallocated when needed, sized - * according to a power-of-two increment. + * Limit for IPv4 multicast memberships */ -#define IP_MIN_MEMBERSHIPS 31 #define IP_MAX_MEMBERSHIPS 4095 -#define IP_MAX_SOURCE_FILTER 1024 /* XXX to be unused */ /* * Default resource limits for IPv4 multicast source filtering. Index: sys/netinet/in_mcast.c =================================================================== --- sys/netinet/in_mcast.c +++ sys/netinet/in_mcast.c @@ -94,7 +94,9 @@ /* * Locking: - * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK. + * + * - Lock order is: Giant, IN_MULTI_LOCK, INP_WLOCK, + * IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK. * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however * it can be taken by code in net/if.c also. * - ip_moptions and in_mfilter are covered by the INP_WLOCK. @@ -144,12 +146,11 @@ static void imf_purge(struct in_mfilter *); static void imf_rollback(struct in_mfilter *); static void imf_reap(struct in_mfilter *); -static int imo_grow(struct ip_moptions *); -static size_t imo_match_group(const struct ip_moptions *, +static struct in_mfilter * + imo_match_group(const struct ip_moptions *, const struct ifnet *, const struct sockaddr *); static struct in_msource * - imo_match_source(const struct ip_moptions *, const size_t, - const struct sockaddr *); + imo_match_source(struct in_mfilter *, const struct sockaddr *); static void ims_merge(struct ip_msource *ims, const struct in_msource *lims, const int rollback); static int in_getmulti(struct ifnet *, const struct in_addr *, @@ -333,6 +334,27 @@ imf->imf_st[1] = st1; } +struct in_mfilter * +ip_alloc_mfilter(const int mflags, const int st0, const int st1) +{ + struct in_mfilter *imf; + + imf = malloc(sizeof(*imf), M_INMFILTER, mflags); + + if (imf != NULL) + imf_init(imf, st0, st1); + + return (imf); +} + +void +ip_free_mfilter(struct in_mfilter *imf) +{ + + imf_purge(imf); + free(imf, M_INMFILTER); +} + /* * Function for looking up an in_multi record for an IPv4 multicast address * on a given interface. ifp must be valid. If no record found, return NULL. @@ -378,90 +400,31 @@ return (inm); } -/* - * Resize the ip_moptions vector to the next power-of-two minus 1. - * May be called with locks held; do not sleep. - */ -static int -imo_grow(struct ip_moptions *imo) -{ - struct in_multi **nmships; - struct in_multi **omships; - struct in_mfilter *nmfilters; - struct in_mfilter *omfilters; - size_t idx; - size_t newmax; - size_t oldmax; - - nmships = NULL; - nmfilters = NULL; - omships = imo->imo_membership; - omfilters = imo->imo_mfilters; - oldmax = imo->imo_max_memberships; - newmax = ((oldmax + 1) * 2) - 1; - - if (newmax <= IP_MAX_MEMBERSHIPS) { - nmships = (struct in_multi **)realloc(omships, - sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT); - nmfilters = (struct in_mfilter *)realloc(omfilters, - sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT); - if (nmships != NULL && nmfilters != NULL) { - /* Initialize newly allocated source filter heads. */ - for (idx = oldmax; idx < newmax; idx++) { - imf_init(&nmfilters[idx], MCAST_UNDEFINED, - MCAST_EXCLUDE); - } - imo->imo_max_memberships = newmax; - imo->imo_membership = nmships; - imo->imo_mfilters = nmfilters; - } - } - - if (nmships == NULL || nmfilters == NULL) { - if (nmships != NULL) - free(nmships, M_IPMOPTS); - if (nmfilters != NULL) - free(nmfilters, M_INMFILTER); - return (ETOOMANYREFS); - } - - return (0); -} - /* * Find an IPv4 multicast group entry for this ip_moptions instance * which matches the specified group, and optionally an interface. * Return its index into the array, or -1 if not found. */ -static size_t +static struct in_mfilter * imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp, const struct sockaddr *group) { const struct sockaddr_in *gsin; - struct in_multi **pinm; - int idx; - int nmships; + struct in_mfilter *imf; + struct in_multi *inm; gsin = (const struct sockaddr_in *)group; - /* The imo_membership array may be lazy allocated. */ - if (imo->imo_membership == NULL || imo->imo_num_memberships == 0) - return (-1); - - nmships = imo->imo_num_memberships; - pinm = &imo->imo_membership[0]; - for (idx = 0; idx < nmships; idx++, pinm++) { - if (*pinm == NULL) + for (imf = NULL; ip_next_mfilter(&imo->imo_head, &imf); ) { + inm = imf->imf_inm; + if (inm == NULL) continue; - if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) && - in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) { + if ((ifp == NULL || (inm->inm_ifp == ifp)) && + in_hosteq(inm->inm_addr, gsin->sin_addr)) { break; } } - if (idx >= nmships) - idx = -1; - - return (idx); + return (imf); } /* @@ -472,22 +435,13 @@ * it exists, which may not be the desired behaviour. */ static struct in_msource * -imo_match_source(const struct ip_moptions *imo, const size_t gidx, - const struct sockaddr *src) +imo_match_source(struct in_mfilter *imf, const struct sockaddr *src) { struct ip_msource find; - struct in_mfilter *imf; struct ip_msource *ims; const sockunion_t *psa; KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__)); - KASSERT(gidx != -1 && gidx < imo->imo_num_memberships, - ("%s: invalid index %d\n", __func__, (int)gidx)); - - /* The imo_mfilters array may be lazy allocated. */ - if (imo->imo_mfilters == NULL) - return (NULL); - imf = &imo->imo_mfilters[gidx]; /* Source trees are keyed in host byte order. */ psa = (const sockunion_t *)src; @@ -507,14 +461,14 @@ imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp, const struct sockaddr *group, const struct sockaddr *src) { - size_t gidx; + struct in_mfilter *imf; struct in_msource *ims; int mode; KASSERT(ifp != NULL, ("%s: null ifp", __func__)); - gidx = imo_match_group(imo, ifp, group); - if (gidx == -1) + imf = imo_match_group(imo, ifp, group); + if (imf == NULL) return (MCAST_NOTGMEMBER); /* @@ -526,8 +480,8 @@ * NOTE: We are comparing group state here at IGMP t1 (now) * with socket-layer t0 (since last downcall). */ - mode = imo->imo_mfilters[gidx].imf_st[1]; - ims = imo_match_source(imo, gidx, src); + mode = imf->imf_st[1]; + ims = imo_match_source(imf, src); if ((ims == NULL && mode == MCAST_INCLUDE) || (ims != NULL && ims->imsl_st[0] != mode)) @@ -1452,7 +1406,6 @@ struct ip_moptions *imo; struct in_msource *ims; struct in_multi *inm; - size_t idx; uint16_t fmode; int error, doblock; @@ -1535,16 +1488,12 @@ * Check if we are actually a member of this group. */ imo = inp_findmoptions(inp); - idx = imo_match_group(imo, ifp, &gsa->sa); - if (idx == -1 || imo->imo_mfilters == NULL) { + imf = imo_match_group(imo, ifp, &gsa->sa); + if (imf == NULL) { error = EADDRNOTAVAIL; goto out_inp_locked; } - - KASSERT(imo->imo_mfilters != NULL, - ("%s: imo_mfilters not allocated", __func__)); - imf = &imo->imo_mfilters[idx]; - inm = imo->imo_membership[idx]; + inm = imf->imf_inm; /* * Attempting to use the delta-based API on an @@ -1562,7 +1511,7 @@ * Asked to unblock, but nothing to unblock. * If adding a new block entry, allocate it. */ - ims = imo_match_source(imo, idx, &ssa->sa); + ims = imo_match_source(imf, &ssa->sa); if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__, ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not "); @@ -1636,9 +1585,6 @@ inp_findmoptions(struct inpcb *inp) { struct ip_moptions *imo; - struct in_multi **immp; - struct in_mfilter *imfp; - size_t idx; INP_WLOCK(inp); if (inp->inp_moptions != NULL) @@ -1647,29 +1593,16 @@ INP_WUNLOCK(inp); imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); - immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS, - M_WAITOK | M_ZERO); - imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS, - M_INMFILTER, M_WAITOK); imo->imo_multicast_ifp = NULL; imo->imo_multicast_addr.s_addr = INADDR_ANY; imo->imo_multicast_vif = -1; imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; imo->imo_multicast_loop = in_mcast_loop; - imo->imo_num_memberships = 0; - imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; - imo->imo_membership = immp; - - /* Initialize per-group source filters. */ - for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) - imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE); - imo->imo_mfilters = imfp; + STAILQ_INIT(&imo->imo_head); INP_WLOCK(inp); if (inp->inp_moptions != NULL) { - free(imfp, M_INMFILTER); - free(immp, M_IPMOPTS); free(imo, M_IPMOPTS); return (inp->inp_moptions); } @@ -1680,32 +1613,25 @@ static void inp_gcmoptions(struct ip_moptions *imo) { - struct in_mfilter *imf; + struct in_mfilter *imf; struct in_multi *inm; struct ifnet *ifp; - size_t idx, nmships; - - nmships = imo->imo_num_memberships; - for (idx = 0; idx < nmships; ++idx) { - imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL; - if (imf) - imf_leave(imf); - inm = imo->imo_membership[idx]; - ifp = inm->inm_ifp; - if (ifp != NULL) { - CURVNET_SET(ifp->if_vnet); - (void)in_leavegroup(inm, imf); - CURVNET_RESTORE(); - } else { - (void)in_leavegroup(inm, imf); + + while ((imf = ip_first_mfilter(&imo->imo_head)) != NULL) { + ip_remove_mfilter(&imo->imo_head, imf); + + imf_leave(imf); + if ((inm = imf->imf_inm) != NULL) { + if ((ifp = inm->inm_ifp) != NULL) { + CURVNET_SET(ifp->if_vnet); + (void)in_leavegroup(inm, imf); + CURVNET_RESTORE(); + } else { + (void)in_leavegroup(inm, imf); + } } - if (imf) - imf_purge(imf); + ip_free_mfilter(imf); } - - if (imo->imo_mfilters) - free(imo->imo_mfilters, M_INMFILTER); - free(imo->imo_membership, M_IPMOPTS); free(imo, M_IPMOPTS); } @@ -1741,7 +1667,7 @@ struct sockaddr_storage *ptss; struct sockaddr_storage *tss; int error; - size_t idx, nsrcs, ncsrcs; + size_t nsrcs, ncsrcs; INP_WLOCK_ASSERT(inp); @@ -1768,12 +1694,11 @@ * Lookup group on the socket. */ gsa = (sockunion_t *)&msfr.msfr_group; - idx = imo_match_group(imo, ifp, &gsa->sa); - if (idx == -1 || imo->imo_mfilters == NULL) { + imf = imo_match_group(imo, ifp, &gsa->sa); + if (imf == NULL) { INP_WUNLOCK(inp); return (EADDRNOTAVAIL); } - imf = &imo->imo_mfilters[idx]; /* * Ignore memberships which are in limbo. @@ -2033,14 +1958,11 @@ struct ip_moptions *imo; struct in_multi *inm; struct in_msource *lims; - size_t idx; int error, is_new; ifp = NULL; - imf = NULL; lims = NULL; error = 0; - is_new = 0; memset(&gsr, 0, sizeof(struct group_source_req)); gsa = (sockunion_t *)&gsr.gsr_group; @@ -2148,13 +2070,22 @@ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) return (EADDRNOTAVAIL); + IN_MULTI_LOCK(); + imo = inp_findmoptions(inp); - idx = imo_match_group(imo, ifp, &gsa->sa); - if (idx == -1) { + imf = imo_match_group(imo, ifp, &gsa->sa); + if (imf == NULL) { is_new = 1; + inm = NULL; + + if (ip_count_mfilter(&imo->imo_head) >= IP_MAX_MEMBERSHIPS) { + error = ENOMEM; + goto out_inp_locked; + } } else { - inm = imo->imo_membership[idx]; - imf = &imo->imo_mfilters[idx]; + is_new = 0; + inm = imf->imf_inm; + if (ssa->ss.ss_family != AF_UNSPEC) { /* * MCAST_JOIN_SOURCE_GROUP on an exclusive membership @@ -2181,7 +2112,7 @@ * full-state SSM API with the delta-based API, * which is discouraged in the relevant RFCs. */ - lims = imo_match_source(imo, idx, &ssa->sa); + lims = imo_match_source(imf, &ssa->sa); if (lims != NULL /*&& lims->imsl_st[1] == MCAST_INCLUDE*/) { error = EADDRNOTAVAIL; @@ -2214,27 +2145,6 @@ */ INP_WLOCK_ASSERT(inp); - if (is_new) { - if (imo->imo_num_memberships == imo->imo_max_memberships) { - error = imo_grow(imo); - if (error) - goto out_inp_locked; - } - /* - * Allocate the new slot upfront so we can deal with - * grafting the new source filter in same code path - * as for join-source on existing membership. - */ - idx = imo->imo_num_memberships; - imo->imo_membership[idx] = NULL; - imo->imo_num_memberships++; - KASSERT(imo->imo_mfilters != NULL, - ("%s: imf_mfilters vector was not allocated", __func__)); - imf = &imo->imo_mfilters[idx]; - KASSERT(RB_EMPTY(&imf->imf_sources), - ("%s: imf_sources not empty", __func__)); - } - /* * Graft new source into filter list for this inpcb's * membership of the group. The in_multi may not have @@ -2250,7 +2160,11 @@ /* Membership starts in IN mode */ if (is_new) { CTR1(KTR_IGMPV3, "%s: new join w/source", __func__); - imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE); + imf = ip_alloc_mfilter(M_NOWAIT, MCAST_UNDEFINED, MCAST_INCLUDE); + if (imf == NULL) { + error = ENOMEM; + goto out_inp_locked; + } } else { CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); } @@ -2259,34 +2173,41 @@ CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__); error = ENOMEM; - goto out_imo_free; + goto out_inp_locked; } } else { /* No address specified; Membership starts in EX mode */ if (is_new) { CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__); - imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE); + imf = ip_alloc_mfilter(M_NOWAIT, MCAST_UNDEFINED, MCAST_EXCLUDE); + if (imf == NULL) { + error = ENOMEM; + goto out_inp_locked; + } } } /* * Begin state merge transaction at IGMP layer. */ - in_pcbref(inp); - INP_WUNLOCK(inp); - IN_MULTI_LOCK(); - if (is_new) { + in_pcbref(inp); + INP_WUNLOCK(inp); + error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf, - &inm); + &imf->imf_inm); + + INP_WLOCK(inp); + if (in_pcbrele_wlocked(inp)) { + error = ENXIO; + goto out_inp_unlocked; + } if (error) { CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed", __func__); - IN_MULTI_LIST_UNLOCK(); - goto out_imo_free; + goto out_inp_locked; } - inm_acquire(inm); - imo->imo_membership[idx] = inm; + inm_acquire(imf->imf_inm); } else { CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); IN_MULTI_LIST_LOCK(); @@ -2295,7 +2216,9 @@ CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); IN_MULTI_LIST_UNLOCK(); - goto out_in_multi_locked; + imf_rollback(imf); + imf_reap(imf); + goto out_inp_locked; } CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); error = igmp_change_state(inm); @@ -2303,40 +2226,30 @@ if (error) { CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); - goto out_in_multi_locked; + imf_rollback(imf); + imf_reap(imf); + goto out_inp_locked; } } + if (is_new) + ip_insert_mfilter(&imo->imo_head, imf); -out_in_multi_locked: + imf_commit(imf); + imf = NULL; +out_inp_locked: + INP_WUNLOCK(inp); +out_inp_unlocked: IN_MULTI_UNLOCK(); - INP_WLOCK(inp); - if (in_pcbrele_wlocked(inp)) - return (ENXIO); - if (error) { - imf_rollback(imf); - if (is_new) - imf_purge(imf); - else - imf_reap(imf); - } else { - imf_commit(imf); - } -out_imo_free: - if (error && is_new) { - inm = imo->imo_membership[idx]; - if (inm != NULL) { + if (is_new && imf) { + if (imf->imf_inm != NULL) { IN_MULTI_LIST_LOCK(); - inm_release_deferred(inm); + inm_release_deferred(imf->imf_inm); IN_MULTI_LIST_UNLOCK(); } - imo->imo_membership[idx] = NULL; - --imo->imo_num_memberships; + ip_free_mfilter(imf); } - -out_inp_locked: - INP_WUNLOCK(inp); return (error); } @@ -2355,7 +2268,6 @@ struct ip_moptions *imo; struct in_msource *ims; struct in_multi *inm; - size_t idx; int error, is_final; ifp = NULL; @@ -2460,17 +2372,18 @@ if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) return (EINVAL); + IN_MULTI_LOCK(); + /* * Find the membership in the membership array. */ imo = inp_findmoptions(inp); - idx = imo_match_group(imo, ifp, &gsa->sa); - if (idx == -1) { + imf = imo_match_group(imo, ifp, &gsa->sa); + if (imf == NULL) { error = EADDRNOTAVAIL; goto out_inp_locked; } - inm = imo->imo_membership[idx]; - imf = &imo->imo_mfilters[idx]; + inm = imf->imf_inm; if (ssa->ss.ss_family != AF_UNSPEC) is_final = 0; @@ -2485,13 +2398,14 @@ * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. */ if (is_final) { + ip_remove_mfilter(&imo->imo_head, imf); imf_leave(imf); } else { if (imf->imf_st[0] == MCAST_EXCLUDE) { error = EADDRNOTAVAIL; goto out_inp_locked; } - ims = imo_match_source(imo, idx, &ssa->sa); + ims = imo_match_source(imf, &ssa->sa); if (ims == NULL) { CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__, ntohl(ssa->sin.sin_addr.s_addr), "not "); @@ -2510,17 +2424,7 @@ /* * Begin state merge transaction at IGMP layer. */ - in_pcbref(inp); - INP_WUNLOCK(inp); - IN_MULTI_LOCK(); - - if (is_final) { - /* - * Give up the multicast address record to which - * the membership points. - */ - (void)in_leavegroup_locked(inm, imf); - } else { + if (is_final == 0) { CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); IN_MULTI_LIST_LOCK(); error = inm_merge(inm, imf); @@ -2528,7 +2432,9 @@ CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); IN_MULTI_LIST_UNLOCK(); - goto out_in_multi_locked; + imf_rollback(imf); + imf_reap(imf); + goto out_inp_locked; } CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); @@ -2537,38 +2443,27 @@ if (error) { CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); + imf_rollback(imf); + imf_reap(imf); + goto out_inp_locked; } } - -out_in_multi_locked: - - IN_MULTI_UNLOCK(); - INP_WLOCK(inp); - if (in_pcbrele_wlocked(inp)) - return (ENXIO); - - if (error) - imf_rollback(imf); - else - imf_commit(imf); - + imf_commit(imf); imf_reap(imf); - if (is_final) { - /* Remove the gap in the membership and filter array. */ - KASSERT(RB_EMPTY(&imf->imf_sources), - ("%s: imf_sources not empty", __func__)); - for (++idx; idx < imo->imo_num_memberships; ++idx) { - imo->imo_membership[idx - 1] = imo->imo_membership[idx]; - imo->imo_mfilters[idx - 1] = imo->imo_mfilters[idx]; - } - imf_init(&imo->imo_mfilters[idx - 1], MCAST_UNDEFINED, - MCAST_EXCLUDE); - imo->imo_num_memberships--; - } - out_inp_locked: INP_WUNLOCK(inp); + + if (is_final && imf) { + /* + * Give up the multicast address record to which + * the membership points. + */ + (void) in_leavegroup_locked(imf->imf_inm, imf); + ip_free_mfilter(imf); + } + + IN_MULTI_UNLOCK(); return (error); } @@ -2658,7 +2553,6 @@ struct in_mfilter *imf; struct ip_moptions *imo; struct in_multi *inm; - size_t idx; int error; error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), @@ -2695,13 +2589,12 @@ * Check if this socket is a member of this group. */ imo = inp_findmoptions(inp); - idx = imo_match_group(imo, ifp, &gsa->sa); - if (idx == -1 || imo->imo_mfilters == NULL) { + imf = imo_match_group(imo, ifp, &gsa->sa); + if (imf == NULL) { error = EADDRNOTAVAIL; goto out_inp_locked; } - inm = imo->imo_membership[idx]; - imf = &imo->imo_mfilters[idx]; + inm = imf->imf_inm; /* * Begin state merge transaction at socket layer. Index: sys/netinet/in_pcb.c =================================================================== --- sys/netinet/in_pcb.c +++ sys/netinet/in_pcb.c @@ -86,6 +86,9 @@ #if defined(INET) || defined(INET6) #include #include +#ifdef INET +#include +#endif #include #include #ifdef TCPHPTS @@ -93,16 +96,13 @@ #endif #include #include -#endif -#ifdef INET -#include -#endif #ifdef INET6 #include #include #include #include #endif /* INET6 */ +#endif #include @@ -1779,8 +1779,9 @@ in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) { struct inpcb *inp; + struct in_multi *inm; + struct in_mfilter *imf; struct ip_moptions *imo; - int i, gap; INP_INFO_WLOCK(pcbinfo); CK_LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { @@ -1801,17 +1802,17 @@ * * XXX This can all be deferred to an epoch_call */ - for (i = 0, gap = 0; i < imo->imo_num_memberships; - i++) { - if (imo->imo_membership[i]->inm_ifp == ifp) { + for (imf = NULL; ip_next_mfilter(&imo->imo_head, &imf); ) { + if ((inm = imf->imf_inm) == NULL) + continue; + if (inm->inm_ifp == ifp) { + ip_remove_mfilter(&imo->imo_head, imf); IN_MULTI_LOCK_ASSERT(); - in_leavegroup_locked(imo->imo_membership[i], NULL); - gap++; - } else if (gap != 0) - imo->imo_membership[i - gap] = - imo->imo_membership[i]; + in_leavegroup_locked(inm, NULL); + ip_free_mfilter(imf); + imf = NULL; /* re-start search */ + } } - imo->imo_num_memberships -= gap; } INP_WUNLOCK(inp); } Index: sys/netinet/in_var.h =================================================================== --- sys/netinet/in_var.h +++ sys/netinet/in_var.h @@ -232,8 +232,61 @@ struct ip_msource_tree imf_sources; /* source list for (S,G) */ u_long imf_nsrc; /* # of source entries */ uint8_t imf_st[2]; /* state before/at commit */ + struct in_multi *imf_inm; /* associated multicast address */ + STAILQ_ENTRY(in_mfilter) imf_entry; /* list entry */ }; +/* + * Helper types and functions for IPv4 multicast filters. + */ +typedef STAILQ_HEAD(, in_mfilter) ip_mfilter_head_t; + +struct in_mfilter * ip_alloc_mfilter(int mflags, int st0, int st1); +void ip_free_mfilter(struct in_mfilter *); + +static inline struct in_mfilter * +ip_first_mfilter(const ip_mfilter_head_t *head) +{ + + return (STAILQ_FIRST(head)); +} + +static inline void +ip_insert_mfilter(ip_mfilter_head_t *head, struct in_mfilter *imf) +{ + + STAILQ_INSERT_TAIL(head, imf, imf_entry); +} + +static inline void +ip_remove_mfilter(ip_mfilter_head_t *head, struct in_mfilter *imf) +{ + + STAILQ_REMOVE(head, imf, in_mfilter, imf_entry); +} + +static inline bool +ip_next_mfilter(const ip_mfilter_head_t *head, struct in_mfilter **ppimf) +{ + + if (*ppimf == NULL) + *ppimf = STAILQ_FIRST(head); + else + *ppimf = STAILQ_NEXT(*ppimf, imf_entry); + return (*ppimf != NULL); +} + +static inline size_t +ip_count_mfilter(ip_mfilter_head_t *head) +{ + struct in_mfilter *imf; + size_t num = 0; + + STAILQ_FOREACH(imf, head, imf_entry) + num++; + return (num); +} + /* * IPv4 group descriptor. * Index: sys/netinet/ip_carp.c =================================================================== --- sys/netinet/ip_carp.c +++ sys/netinet/ip_carp.c @@ -1371,25 +1371,24 @@ case AF_INET: { struct ip_moptions *imo = &cif->cif_imo; + struct in_mfilter *imf; struct in_addr addr; - if (imo->imo_membership) + if (ip_first_mfilter(&imo->imo_head) != NULL) return (0); - imo->imo_membership = (struct in_multi **)malloc( - (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP, - M_WAITOK); - imo->imo_mfilters = NULL; - imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; + imf = ip_alloc_mfilter(M_WAITOK, 0, 0); + STAILQ_INIT(&imo->imo_head); imo->imo_multicast_vif = -1; addr.s_addr = htonl(INADDR_CARP_GROUP); if ((error = in_joingroup(ifp, &addr, NULL, - &imo->imo_membership[0])) != 0) { - free(imo->imo_membership, M_CARP); + &imf->imf_inm)) != 0) { + ip_free_mfilter(imf); break; } - imo->imo_num_memberships++; + + ip_insert_mfilter(&imo->imo_head, imf); imo->imo_multicast_ifp = ifp; imo->imo_multicast_ttl = CARP_DFLTTL; imo->imo_multicast_loop = 0; @@ -1400,17 +1399,16 @@ case AF_INET6: { struct ip6_moptions *im6o = &cif->cif_im6o; + struct in6_mfilter *im6f[2]; struct in6_addr in6; - struct in6_multi *in6m; - if (im6o->im6o_membership) + if (ip6_first_mfilter(&im6o->im6o_head)) return (0); - im6o->im6o_membership = (struct in6_multi **)malloc( - (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP, - M_ZERO | M_WAITOK); - im6o->im6o_mfilters = NULL; - im6o->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS; + im6f[0] = ip6_alloc_mfilter(M_WAITOK, 0, 0); + im6f[1] = ip6_alloc_mfilter(M_WAITOK, 0, 0); + + STAILQ_INIT(&im6o->im6o_head); im6o->im6o_multicast_hlim = CARP_DFLTTL; im6o->im6o_multicast_ifp = ifp; @@ -1419,17 +1417,15 @@ in6.s6_addr16[0] = htons(0xff02); in6.s6_addr8[15] = 0x12; if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { - free(im6o->im6o_membership, M_CARP); + ip6_free_mfilter(im6f[0]); + ip6_free_mfilter(im6f[1]); break; } - in6m = NULL; - if ((error = in6_joingroup(ifp, &in6, NULL, &in6m, 0)) != 0) { - free(im6o->im6o_membership, M_CARP); + if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) { + ip6_free_mfilter(im6f[0]); + ip6_free_mfilter(im6f[1]); break; } - in6m_acquire(in6m); - im6o->im6o_membership[0] = in6m; - im6o->im6o_num_memberships++; /* Join solicited multicast address. */ bzero(&in6, sizeof(in6)); @@ -1438,20 +1434,21 @@ in6.s6_addr32[2] = htonl(1); in6.s6_addr32[3] = 0; in6.s6_addr8[12] = 0xff; + if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { - in6_leavegroup(im6o->im6o_membership[0], NULL); - free(im6o->im6o_membership, M_CARP); + ip6_free_mfilter(im6f[0]); + ip6_free_mfilter(im6f[1]); break; } - in6m = NULL; - if ((error = in6_joingroup(ifp, &in6, NULL, &in6m, 0)) != 0) { - in6_leavegroup(im6o->im6o_membership[0], NULL); - free(im6o->im6o_membership, M_CARP); + + if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) { + in6_leavegroup(im6f[0]->im6f_in6m, NULL); + ip6_free_mfilter(im6f[0]); + ip6_free_mfilter(im6f[1]); break; } - in6m_acquire(in6m); - im6o->im6o_membership[1] = in6m; - im6o->im6o_num_memberships++; + ip6_insert_mfilter(&im6o->im6o_head, im6f[0]); + ip6_insert_mfilter(&im6o->im6o_head, im6f[1]); break; } #endif @@ -1474,13 +1471,13 @@ case AF_INET: if (cif->cif_naddrs == 0) { struct ip_moptions *imo = &cif->cif_imo; + struct in_mfilter *imf; - in_leavegroup(imo->imo_membership[0], NULL); - KASSERT(imo->imo_mfilters == NULL, - ("%s: imo_mfilters != NULL", __func__)); - free(imo->imo_membership, M_CARP); - imo->imo_membership = NULL; - + while ((imf = ip_first_mfilter(&imo->imo_head)) != NULL) { + ip_remove_mfilter(&imo->imo_head, imf); + in_leavegroup(imf->imf_inm, NULL); + ip_free_mfilter(imf); + } } break; #endif @@ -1488,13 +1485,13 @@ case AF_INET6: if (cif->cif_naddrs6 == 0) { struct ip6_moptions *im6o = &cif->cif_im6o; + struct in6_mfilter *im6f; - in6_leavegroup(im6o->im6o_membership[0], NULL); - in6_leavegroup(im6o->im6o_membership[1], NULL); - KASSERT(im6o->im6o_mfilters == NULL, - ("%s: im6o_mfilters != NULL", __func__)); - free(im6o->im6o_membership, M_CARP); - im6o->im6o_membership = NULL; + while ((im6f = ip6_first_mfilter(&im6o->im6o_head)) != NULL) { + ip6_remove_mfilter(&im6o->im6o_head, im6f); + in6_leavegroup(im6f->im6f_in6m, NULL); + ip6_free_mfilter(im6f); + } } break; #endif Index: sys/netinet/ip_mroute.c =================================================================== --- sys/netinet/ip_mroute.c +++ sys/netinet/ip_mroute.c @@ -1680,7 +1680,6 @@ send_packet(struct vif *vifp, struct mbuf *m) { struct ip_moptions imo; - struct in_multi *imm[2]; int error __unused; VIF_LOCK_ASSERT(); @@ -1689,9 +1688,7 @@ imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1; imo.imo_multicast_loop = 1; imo.imo_multicast_vif = -1; - imo.imo_num_memberships = 0; - imo.imo_max_memberships = 2; - imo.imo_membership = &imm[0]; + STAILQ_INIT(&imo.imo_head); /* * Re-entrancy should not be a problem here, because Index: sys/netinet/ip_var.h =================================================================== --- sys/netinet/ip_var.h +++ sys/netinet/ip_var.h @@ -82,6 +82,7 @@ char ipopt_list[MAX_IPOPTLEN]; /* options proper */ }; +#ifdef _NETINET_IN_VAR_H_ /* * Structure attached to inpcb.ip_moptions and * passed to ip_output when IP multicast options are in use. @@ -93,12 +94,12 @@ u_long imo_multicast_vif; /* vif num outgoing multicasts */ u_char imo_multicast_ttl; /* TTL for outgoing multicasts */ u_char imo_multicast_loop; /* 1 => hear sends if a member */ - u_short imo_num_memberships; /* no. memberships this socket */ - u_short imo_max_memberships; /* max memberships this socket */ - struct in_multi **imo_membership; /* group memberships */ - struct in_mfilter *imo_mfilters; /* source filters */ + ip_mfilter_head_t imo_head; /* group memberships */ struct epoch_context imo_epoch_ctx; }; +#else +struct ip_moptions; +#endif struct ipstat { uint64_t ips_total; /* total packets received */ Index: sys/netinet6/in6.h =================================================================== --- sys/netinet6/in6.h +++ sys/netinet6/in6.h @@ -523,11 +523,8 @@ #define IPV6_DEFAULT_MULTICAST_LOOP 1 /* normally hear sends if a member */ /* - * The im6o_membership vector for each socket is now dynamically allocated at - * run-time, bounded by USHRT_MAX, and is reallocated when needed, sized - * according to a power-of-two increment. + * Limit for IPv6 multicast memberships */ -#define IPV6_MIN_MEMBERSHIPS 31 #define IPV6_MAX_MEMBERSHIPS 4095 /* Index: sys/netinet6/in6_ifattach.c =================================================================== --- sys/netinet6/in6_ifattach.c +++ sys/netinet6/in6_ifattach.c @@ -774,9 +774,11 @@ in6_purgeaddr(ifa); } if (purgeulp) { + IN6_MULTI_LOCK(); in6_pcbpurgeif0(&V_udbinfo, ifp); in6_pcbpurgeif0(&V_ulitecbinfo, ifp); in6_pcbpurgeif0(&V_ripcbinfo, ifp); + IN6_MULTI_UNLOCK(); } /* leave from all multicast groups joined */ in6_purgemaddrs(ifp); Index: sys/netinet6/in6_mcast.c =================================================================== --- sys/netinet6/in6_mcast.c +++ sys/netinet6/in6_mcast.c @@ -102,7 +102,8 @@ /* * Locking: - * - Lock order is: Giant, INP_WLOCK, IN6_MULTI_LOCK, MLD_LOCK, IF_ADDR_LOCK. + * - Lock order is: Giant, IN6_MULTI_LOCK, INP_WLOCK, + * IN6_MULTI_LIST_LOCK, MLD_LOCK, IF_ADDR_LOCK. * - The IF_ADDR_LOCK is implicitly taken by in6m_lookup() earlier, however * it can be taken by code in net/if.c also. * - ip6_moptions and in6_mfilter are covered by the INP_WLOCK. @@ -134,12 +135,11 @@ static void im6f_purge(struct in6_mfilter *); static void im6f_rollback(struct in6_mfilter *); static void im6f_reap(struct in6_mfilter *); -static int im6o_grow(struct ip6_moptions *); -static size_t im6o_match_group(const struct ip6_moptions *, +static struct in6_mfilter * + im6o_match_group(const struct ip6_moptions *, const struct ifnet *, const struct sockaddr *); static struct in6_msource * - im6o_match_source(const struct ip6_moptions *, const size_t, - const struct sockaddr *); + im6o_match_source(struct in6_mfilter *, const struct sockaddr *); static void im6s_merge(struct ip6_msource *ims, const struct in6_msource *lims, const int rollback); static int in6_getmulti(struct ifnet *, const struct in6_addr *, @@ -228,55 +228,25 @@ imf->im6f_st[1] = st1; } -/* - * Resize the ip6_moptions vector to the next power-of-two minus 1. - * May be called with locks held; do not sleep. - */ -static int -im6o_grow(struct ip6_moptions *imo) +struct in6_mfilter * +ip6_alloc_mfilter(const int mflags, const int st0, const int st1) { - struct in6_multi **nmships; - struct in6_multi **omships; - struct in6_mfilter *nmfilters; - struct in6_mfilter *omfilters; - size_t idx; - size_t newmax; - size_t oldmax; - - nmships = NULL; - nmfilters = NULL; - omships = imo->im6o_membership; - omfilters = imo->im6o_mfilters; - oldmax = imo->im6o_max_memberships; - newmax = ((oldmax + 1) * 2) - 1; - - if (newmax <= IPV6_MAX_MEMBERSHIPS) { - nmships = (struct in6_multi **)realloc(omships, - sizeof(struct in6_multi *) * newmax, M_IP6MOPTS, M_NOWAIT); - nmfilters = (struct in6_mfilter *)realloc(omfilters, - sizeof(struct in6_mfilter) * newmax, M_IN6MFILTER, - M_NOWAIT); - if (nmships != NULL && nmfilters != NULL) { - /* Initialize newly allocated source filter heads. */ - for (idx = oldmax; idx < newmax; idx++) { - im6f_init(&nmfilters[idx], MCAST_UNDEFINED, - MCAST_EXCLUDE); - } - imo->im6o_max_memberships = newmax; - imo->im6o_membership = nmships; - imo->im6o_mfilters = nmfilters; - } - } + struct in6_mfilter *imf; - if (nmships == NULL || nmfilters == NULL) { - if (nmships != NULL) - free(nmships, M_IP6MOPTS); - if (nmfilters != NULL) - free(nmfilters, M_IN6MFILTER); - return (ETOOMANYREFS); - } + imf = malloc(sizeof(*imf), M_IN6MFILTER, mflags); - return (0); + if (imf != NULL) + im6f_init(imf, st0, st1); + + return (imf); +} + +void +ip6_free_mfilter(struct in6_mfilter *imf) +{ + + im6f_purge(imf); + free(imf, M_IN6MFILTER); } /* @@ -284,36 +254,27 @@ * which matches the specified group, and optionally an interface. * Return its index into the array, or -1 if not found. */ -static size_t +static struct in6_mfilter * im6o_match_group(const struct ip6_moptions *imo, const struct ifnet *ifp, const struct sockaddr *group) { const struct sockaddr_in6 *gsin6; - struct in6_multi **pinm; - int idx; - int nmships; - - gsin6 = (const struct sockaddr_in6 *)group; + struct in6_mfilter *imf; + struct in6_multi *inm; - /* The im6o_membership array may be lazy allocated. */ - if (imo->im6o_membership == NULL || imo->im6o_num_memberships == 0) - return (-1); + gsin6 = (const struct sockaddr_in6 *)group; - nmships = imo->im6o_num_memberships; - pinm = &imo->im6o_membership[0]; - for (idx = 0; idx < nmships; idx++, pinm++) { - if (*pinm == NULL) + for (imf = NULL; ip6_next_mfilter(&imo->im6o_head, &imf); ) { + inm = imf->im6f_in6m; + if (inm == NULL) continue; - if ((ifp == NULL || ((*pinm)->in6m_ifp == ifp)) && - IN6_ARE_ADDR_EQUAL(&(*pinm)->in6m_addr, + if ((ifp == NULL || (inm->in6m_ifp == ifp)) && + IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, &gsin6->sin6_addr)) { break; } } - if (idx >= nmships) - idx = -1; - - return (idx); + return (imf); } /* @@ -328,22 +289,13 @@ * it exists, which may not be the desired behaviour. */ static struct in6_msource * -im6o_match_source(const struct ip6_moptions *imo, const size_t gidx, - const struct sockaddr *src) +im6o_match_source(struct in6_mfilter *imf, const struct sockaddr *src) { struct ip6_msource find; - struct in6_mfilter *imf; struct ip6_msource *ims; const sockunion_t *psa; KASSERT(src->sa_family == AF_INET6, ("%s: !AF_INET6", __func__)); - KASSERT(gidx != -1 && gidx < imo->im6o_num_memberships, - ("%s: invalid index %d\n", __func__, (int)gidx)); - - /* The im6o_mfilters array may be lazy allocated. */ - if (imo->im6o_mfilters == NULL) - return (NULL); - imf = &imo->im6o_mfilters[gidx]; psa = (const sockunion_t *)src; find.im6s_addr = psa->sin6.sin6_addr; @@ -363,14 +315,14 @@ im6o_mc_filter(const struct ip6_moptions *imo, const struct ifnet *ifp, const struct sockaddr *group, const struct sockaddr *src) { - size_t gidx; + struct in6_mfilter *imf; struct in6_msource *ims; int mode; KASSERT(ifp != NULL, ("%s: null ifp", __func__)); - gidx = im6o_match_group(imo, ifp, group); - if (gidx == -1) + imf = im6o_match_group(imo, ifp, group); + if (imf == NULL) return (MCAST_NOTGMEMBER); /* @@ -382,8 +334,8 @@ * NOTE: We are comparing group state here at MLD t1 (now) * with socket-layer t0 (since last downcall). */ - mode = imo->im6o_mfilters[gidx].im6f_st[1]; - ims = im6o_match_source(imo, gidx, src); + mode = imf->im6f_st[1]; + ims = im6o_match_source(imf, src); if ((ims == NULL && mode == MCAST_INCLUDE) || (ims != NULL && ims->im6sl_st[0] != mode)) @@ -1447,7 +1399,6 @@ struct ip6_moptions *imo; struct in6_msource *ims; struct in6_multi *inm; - size_t idx; uint16_t fmode; int error, doblock; #ifdef KTR @@ -1504,16 +1455,12 @@ * Check if we are actually a member of this group. */ imo = in6p_findmoptions(inp); - idx = im6o_match_group(imo, ifp, &gsa->sa); - if (idx == -1 || imo->im6o_mfilters == NULL) { + imf = im6o_match_group(imo, ifp, &gsa->sa); + if (imf == NULL) { error = EADDRNOTAVAIL; goto out_in6p_locked; } - - KASSERT(imo->im6o_mfilters != NULL, - ("%s: im6o_mfilters not allocated", __func__)); - imf = &imo->im6o_mfilters[idx]; - inm = imo->im6o_membership[idx]; + inm = imf->im6f_in6m; /* * Attempting to use the delta-based API on an @@ -1531,7 +1478,7 @@ * Asked to unblock, but nothing to unblock. * If adding a new block entry, allocate it. */ - ims = im6o_match_source(imo, idx, &ssa->sa); + ims = im6o_match_source(imf, &ssa->sa); if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { CTR3(KTR_MLD, "%s: source %s %spresent", __func__, ip6_sprintf(ip6tbuf, &ssa->sin6.sin6_addr), @@ -1601,9 +1548,6 @@ in6p_findmoptions(struct inpcb *inp) { struct ip6_moptions *imo; - struct in6_multi **immp; - struct in6_mfilter *imfp; - size_t idx; INP_WLOCK(inp); if (inp->in6p_moptions != NULL) @@ -1612,27 +1556,14 @@ INP_WUNLOCK(inp); imo = malloc(sizeof(*imo), M_IP6MOPTS, M_WAITOK); - immp = malloc(sizeof(*immp) * IPV6_MIN_MEMBERSHIPS, M_IP6MOPTS, - M_WAITOK | M_ZERO); - imfp = malloc(sizeof(struct in6_mfilter) * IPV6_MIN_MEMBERSHIPS, - M_IN6MFILTER, M_WAITOK); imo->im6o_multicast_ifp = NULL; imo->im6o_multicast_hlim = V_ip6_defmcasthlim; imo->im6o_multicast_loop = in6_mcast_loop; - imo->im6o_num_memberships = 0; - imo->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS; - imo->im6o_membership = immp; - - /* Initialize per-group source filters. */ - for (idx = 0; idx < IPV6_MIN_MEMBERSHIPS; idx++) - im6f_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE); - imo->im6o_mfilters = imfp; + STAILQ_INIT(&imo->im6o_head); INP_WLOCK(inp); if (inp->in6p_moptions != NULL) { - free(imfp, M_IN6MFILTER); - free(immp, M_IP6MOPTS); free(imo, M_IP6MOPTS); return (inp->in6p_moptions); } @@ -1652,33 +1583,26 @@ static void inp_gcmoptions(struct ip6_moptions *imo) { - struct in6_mfilter *imf; + struct in6_mfilter *imf; struct in6_multi *inm; struct ifnet *ifp; - size_t idx, nmships; - - nmships = imo->im6o_num_memberships; - for (idx = 0; idx < nmships; ++idx) { - imf = imo->im6o_mfilters ? &imo->im6o_mfilters[idx] : NULL; - if (imf) - im6f_leave(imf); - inm = imo->im6o_membership[idx]; - ifp = inm->in6m_ifp; - if (ifp != NULL) { - CURVNET_SET(ifp->if_vnet); - (void)in6_leavegroup(inm, imf); - CURVNET_RESTORE(); - } else { - (void)in6_leavegroup(inm, imf); - } - if (imf) - im6f_purge(imf); - } - if (imo->im6o_mfilters) - free(imo->im6o_mfilters, M_IN6MFILTER); - free(imo->im6o_membership, M_IP6MOPTS); - free(imo, M_IP6MOPTS); + while ((imf = ip6_first_mfilter(&imo->im6o_head)) != NULL) { + ip6_remove_mfilter(&imo->im6o_head, imf); + + im6f_leave(imf); + if ((inm = imf->im6f_in6m) != NULL) { + if ((ifp = inm->in6m_ifp) != NULL) { + CURVNET_SET(ifp->if_vnet); + (void)in6_leavegroup(inm, imf); + CURVNET_RESTORE(); + } else { + (void)in6_leavegroup(inm, imf); + } + } + ip6_free_mfilter(imf); + } + free(imo, M_IP6MOPTS); } void @@ -1707,7 +1631,7 @@ struct sockaddr_storage *ptss; struct sockaddr_storage *tss; int error; - size_t idx, nsrcs, ncsrcs; + size_t nsrcs, ncsrcs; INP_WLOCK_ASSERT(inp); @@ -1741,12 +1665,11 @@ /* * Lookup group on the socket. */ - idx = im6o_match_group(imo, ifp, &gsa->sa); - if (idx == -1 || imo->im6o_mfilters == NULL) { + imf = im6o_match_group(imo, ifp, &gsa->sa); + if (imf == NULL) { INP_WUNLOCK(inp); return (EADDRNOTAVAIL); } - imf = &imo->im6o_mfilters[idx]; /* * Ignore memberships which are in limbo. @@ -1943,15 +1866,12 @@ struct ip6_moptions *imo; struct in6_multi *inm; struct in6_msource *lims; - size_t idx; int error, is_new; SLIST_INIT(&inmh); ifp = NULL; - imf = NULL; lims = NULL; error = 0; - is_new = 0; memset(&gsr, 0, sizeof(struct group_source_req)); gsa = (sockunion_t *)&gsr.gsr_group; @@ -2052,13 +1972,22 @@ */ (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL); + IN6_MULTI_LOCK(); + imo = in6p_findmoptions(inp); - idx = im6o_match_group(imo, ifp, &gsa->sa); - if (idx == -1) { + imf = im6o_match_group(imo, ifp, &gsa->sa); + if (imf == NULL) { is_new = 1; + inm = NULL; + + if (ip6_count_mfilter(&imo->im6o_head) >= IPV6_MAX_MEMBERSHIPS) { + error = ENOMEM; + goto out_in6p_locked; + } } else { - inm = imo->im6o_membership[idx]; - imf = &imo->im6o_mfilters[idx]; + is_new = 0; + inm = imf->im6f_in6m; + if (ssa->ss.ss_family != AF_UNSPEC) { /* * MCAST_JOIN_SOURCE_GROUP on an exclusive membership @@ -2085,7 +2014,7 @@ * full-state SSM API with the delta-based API, * which is discouraged in the relevant RFCs. */ - lims = im6o_match_source(imo, idx, &ssa->sa); + lims = im6o_match_source(imf, &ssa->sa); if (lims != NULL /*&& lims->im6sl_st[1] == MCAST_INCLUDE*/) { error = EADDRNOTAVAIL; @@ -2113,27 +2042,6 @@ */ INP_WLOCK_ASSERT(inp); - if (is_new) { - if (imo->im6o_num_memberships == imo->im6o_max_memberships) { - error = im6o_grow(imo); - if (error) - goto out_in6p_locked; - } - /* - * Allocate the new slot upfront so we can deal with - * grafting the new source filter in same code path - * as for join-source on existing membership. - */ - idx = imo->im6o_num_memberships; - imo->im6o_membership[idx] = NULL; - imo->im6o_num_memberships++; - KASSERT(imo->im6o_mfilters != NULL, - ("%s: im6f_mfilters vector was not allocated", __func__)); - imf = &imo->im6o_mfilters[idx]; - KASSERT(RB_EMPTY(&imf->im6f_sources), - ("%s: im6f_sources not empty", __func__)); - } - /* * Graft new source into filter list for this inpcb's * membership of the group. The in6_multi may not have @@ -2149,7 +2057,11 @@ /* Membership starts in IN mode */ if (is_new) { CTR1(KTR_MLD, "%s: new join w/source", __func__); - im6f_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE); + imf = ip6_alloc_mfilter(M_NOWAIT, MCAST_UNDEFINED, MCAST_INCLUDE); + if (imf == NULL) { + error = ENOMEM; + goto out_in6p_locked; + } } else { CTR2(KTR_MLD, "%s: %s source", __func__, "allow"); } @@ -2158,81 +2070,88 @@ CTR1(KTR_MLD, "%s: merge imf state failed", __func__); error = ENOMEM; - goto out_im6o_free; + goto out_in6p_locked; } } else { /* No address specified; Membership starts in EX mode */ if (is_new) { CTR1(KTR_MLD, "%s: new join w/o source", __func__); - im6f_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE); + imf = ip6_alloc_mfilter(M_NOWAIT, MCAST_UNDEFINED, MCAST_EXCLUDE); + if (imf == NULL) { + error = ENOMEM; + goto out_in6p_locked; + } } } /* * Begin state merge transaction at MLD layer. */ - in_pcbref(inp); - INP_WUNLOCK(inp); - IN6_MULTI_LOCK(); - if (is_new) { + in_pcbref(inp); + INP_WUNLOCK(inp); + error = in6_joingroup_locked(ifp, &gsa->sin6.sin6_addr, imf, - &inm, 0); + &imf->im6f_in6m, 0); + + INP_WLOCK(inp); + if (in_pcbrele_wlocked(inp)) { + error = ENXIO; + goto out_in6p_unlocked; + } if (error) { - IN6_MULTI_UNLOCK(); - goto out_im6o_free; + goto out_in6p_locked; } /* * NOTE: Refcount from in6_joingroup_locked() * is protecting membership. */ - imo->im6o_membership[idx] = inm; } else { CTR1(KTR_MLD, "%s: merge inm state", __func__); IN6_MULTI_LIST_LOCK(); error = in6m_merge(inm, imf); - if (error) + if (error) { CTR1(KTR_MLD, "%s: failed to merge inm state", __func__); - else { - CTR1(KTR_MLD, "%s: doing mld downcall", __func__); - error = mld_change_state(inm, 0); - if (error) - CTR1(KTR_MLD, "%s: failed mld downcall", - __func__); + IN6_MULTI_LIST_UNLOCK(); + im6f_rollback(imf); + im6f_reap(imf); + goto out_in6p_locked; } + CTR1(KTR_MLD, "%s: doing mld downcall", __func__); + error = mld_change_state(inm, 0); IN6_MULTI_LIST_UNLOCK(); - } - IN6_MULTI_UNLOCK(); - INP_WLOCK(inp); - if (in_pcbrele_wlocked(inp)) - return (ENXIO); - if (error) { - im6f_rollback(imf); - if (is_new) - im6f_purge(imf); - else + if (error) { + CTR1(KTR_MLD, "%s: failed mld downcall", + __func__); + im6f_rollback(imf); im6f_reap(imf); - } else { - im6f_commit(imf); - } - -out_im6o_free: - if (error && is_new) { - inm = imo->im6o_membership[idx]; - if (inm != NULL) { - IN6_MULTI_LIST_LOCK(); - in6m_rele_locked(&inmh, inm); - IN6_MULTI_LIST_UNLOCK(); + goto out_in6p_locked; } - imo->im6o_membership[idx] = NULL; - --imo->im6o_num_memberships; } + if (is_new) + ip6_insert_mfilter(&imo->im6o_head, imf); + + im6f_commit(imf); + imf = NULL; + out_in6p_locked: INP_WUNLOCK(inp); - in6m_release_list_deferred(&inmh); +out_in6p_unlocked: + IN6_MULTI_UNLOCK(); + + if (is_new && imf) { + if (imf->im6f_in6m != NULL) { + struct in6_multi_head inmh; + + SLIST_INIT(&inmh); + SLIST_INSERT_HEAD(&inmh, imf->im6f_in6m, in6m_defer); + in6m_release_list_deferred(&inmh); + } + ip6_free_mfilter(imf); + } return (error); } @@ -2251,7 +2170,6 @@ struct in6_msource *ims; struct in6_multi *inm; uint32_t ifindex; - size_t idx; int error, is_final; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; @@ -2378,17 +2296,18 @@ CTR2(KTR_MLD, "%s: ifp = %p", __func__, ifp); KASSERT(ifp != NULL, ("%s: ifp did not resolve", __func__)); + IN_MULTI_LOCK(); + /* * Find the membership in the membership array. */ imo = in6p_findmoptions(inp); - idx = im6o_match_group(imo, ifp, &gsa->sa); - if (idx == -1) { + imf = im6o_match_group(imo, ifp, &gsa->sa); + if (imf == NULL) { error = EADDRNOTAVAIL; goto out_in6p_locked; } - inm = imo->im6o_membership[idx]; - imf = &imo->im6o_mfilters[idx]; + inm = imf->im6f_in6m; if (ssa->ss.ss_family != AF_UNSPEC) is_final = 0; @@ -2403,13 +2322,14 @@ * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. */ if (is_final) { + ip6_remove_mfilter(&imo->im6o_head, imf); im6f_leave(imf); } else { if (imf->im6f_st[0] == MCAST_EXCLUDE) { error = EADDRNOTAVAIL; goto out_in6p_locked; } - ims = im6o_match_source(imo, idx, &ssa->sa); + ims = im6o_match_source(imf, &ssa->sa); if (ims == NULL) { CTR3(KTR_MLD, "%s: source %p %spresent", __func__, ip6_sprintf(ip6tbuf, &ssa->sin6.sin6_addr), @@ -2433,56 +2353,47 @@ INP_WUNLOCK(inp); IN6_MULTI_LOCK(); - if (is_final) { - /* - * Give up the multicast address record to which - * the membership points. - */ - (void)in6_leavegroup_locked(inm, imf); - } else { + if (is_final == 0) { CTR1(KTR_MLD, "%s: merge inm state", __func__); IN6_MULTI_LIST_LOCK(); error = in6m_merge(inm, imf); - if (error) + if (error) { CTR1(KTR_MLD, "%s: failed to merge inm state", __func__); - else { - CTR1(KTR_MLD, "%s: doing mld downcall", __func__); - error = mld_change_state(inm, 0); - if (error) - CTR1(KTR_MLD, "%s: failed mld downcall", - __func__); + IN6_MULTI_LIST_UNLOCK(); + im6f_rollback(imf); + im6f_reap(imf); + goto out_in6p_locked; } + + CTR1(KTR_MLD, "%s: doing mld downcall", __func__); + error = mld_change_state(inm, 0); IN6_MULTI_LIST_UNLOCK(); + if (error) { + CTR1(KTR_MLD, "%s: failed mld downcall", + __func__); + im6f_rollback(imf); + im6f_reap(imf); + goto out_in6p_locked; + } } - IN6_MULTI_UNLOCK(); - INP_WLOCK(inp); - if (in_pcbrele_wlocked(inp)) - return (ENXIO); - - if (error) - im6f_rollback(imf); - else - im6f_commit(imf); - + im6f_commit(imf); im6f_reap(imf); - if (is_final) { - /* Remove the gap in the membership array. */ - KASSERT(RB_EMPTY(&imf->im6f_sources), - ("%s: im6f_sources not empty", __func__)); - for (++idx; idx < imo->im6o_num_memberships; ++idx) { - imo->im6o_membership[idx - 1] = imo->im6o_membership[idx]; - imo->im6o_mfilters[idx - 1] = imo->im6o_mfilters[idx]; - } - im6f_init(&imo->im6o_mfilters[idx - 1], MCAST_UNDEFINED, - MCAST_EXCLUDE); - imo->im6o_num_memberships--; - } - out_in6p_locked: INP_WUNLOCK(inp); + + if (is_final && imf) { + /* + * Give up the multicast address record to which + * the membership points. + */ + (void)in6_leavegroup_locked(inm, imf); + ip6_free_mfilter(imf); + } + + IN6_MULTI_UNLOCK(); return (error); } @@ -2540,7 +2451,6 @@ struct in6_mfilter *imf; struct ip6_moptions *imo; struct in6_multi *inm; - size_t idx; int error; error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), @@ -2577,13 +2487,12 @@ * Check if this socket is a member of this group. */ imo = in6p_findmoptions(inp); - idx = im6o_match_group(imo, ifp, &gsa->sa); - if (idx == -1 || imo->im6o_mfilters == NULL) { + imf = im6o_match_group(imo, ifp, &gsa->sa); + if (imf == NULL) { error = EADDRNOTAVAIL; goto out_in6p_locked; } - inm = imo->im6o_membership[idx]; - imf = &imo->im6o_mfilters[idx]; + inm = imf->im6f_in6m; /* * Begin state merge transaction at socket layer. Index: sys/netinet6/in6_pcb.c =================================================================== --- sys/netinet6/in6_pcb.c +++ sys/netinet6/in6_pcb.c @@ -802,8 +802,9 @@ in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) { struct inpcb *in6p; + struct in6_multi *inm; + struct in6_mfilter *imf; struct ip6_moptions *im6o; - int i, gap; INP_INFO_WLOCK(pcbinfo); CK_LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) { @@ -824,18 +825,17 @@ * Drop multicast group membership if we joined * through the interface being detached. */ - gap = 0; - for (i = 0; i < im6o->im6o_num_memberships; i++) { - if (im6o->im6o_membership[i]->in6m_ifp == - ifp) { - in6_leavegroup(im6o->im6o_membership[i], NULL); - gap++; - } else if (gap != 0) { - im6o->im6o_membership[i - gap] = - im6o->im6o_membership[i]; + for (imf = NULL; ip6_next_mfilter(&im6o->im6o_head, &imf); ) { + if ((inm = imf->im6f_in6m) == NULL) + continue; + if (inm->in6m_ifp == ifp) { + ip6_remove_mfilter(&im6o->im6o_head, imf); + IN6_MULTI_LOCK_ASSERT(); + in6_leavegroup_locked(inm, NULL); + ip6_free_mfilter(imf); + imf = NULL; /* re-start search */ } } - im6o->im6o_num_memberships -= gap; } INP_WUNLOCK(in6p); } Index: sys/netinet6/in6_var.h =================================================================== --- sys/netinet6/in6_var.h +++ sys/netinet6/in6_var.h @@ -602,8 +602,61 @@ struct ip6_msource_tree im6f_sources; /* source list for (S,G) */ u_long im6f_nsrc; /* # of source entries */ uint8_t im6f_st[2]; /* state before/at commit */ + struct in6_multi *im6f_in6m; /* associated multicast address */ + STAILQ_ENTRY(in6_mfilter) im6f_entry; /* list entry */ }; +/* + * Helper types and functions for IPv4 multicast filters. + */ +typedef STAILQ_HEAD(, in6_mfilter) ip6_mfilter_head_t; + +struct in6_mfilter * ip6_alloc_mfilter(int mflags, int st0, int st1); +void ip6_free_mfilter(struct in6_mfilter *); + +static inline struct in6_mfilter * +ip6_first_mfilter(const ip6_mfilter_head_t *head) +{ + + return (STAILQ_FIRST(head)); +} + +static inline void +ip6_insert_mfilter(ip6_mfilter_head_t *head, struct in6_mfilter *imf) +{ + + STAILQ_INSERT_TAIL(head, imf, im6f_entry); +} + +static inline void +ip6_remove_mfilter(ip6_mfilter_head_t *head, struct in6_mfilter *imf) +{ + + STAILQ_REMOVE(head, imf, in6_mfilter, im6f_entry); +} + +static inline bool +ip6_next_mfilter(const ip6_mfilter_head_t *head, struct in6_mfilter **ppimf) +{ + + if (*ppimf == NULL) + *ppimf = STAILQ_FIRST(head); + else + *ppimf = STAILQ_NEXT(*ppimf, im6f_entry); + return (*ppimf != NULL); +} + +static inline size_t +ip6_count_mfilter(ip6_mfilter_head_t *head) +{ + struct in6_mfilter *imf; + size_t num = 0; + + STAILQ_FOREACH(imf, head, im6f_entry) + num++; + return (num); +} + /* * Legacy KAME IPv6 multicast membership descriptor. */ Index: sys/netinet6/ip6_var.h =================================================================== --- sys/netinet6/ip6_var.h +++ sys/netinet6/ip6_var.h @@ -110,6 +110,7 @@ uint32_t ip6dc_off; /* offset to next header */ }; +#ifdef _NETINET6_IN6_VAR_H_ /* * Structure attached to inpcb.in6p_moptions and * passed to ip6_output when IPv6 multicast options are in use. @@ -119,13 +120,12 @@ struct ifnet *im6o_multicast_ifp; /* ifp for outgoing multicasts */ u_char im6o_multicast_hlim; /* hoplimit for outgoing multicasts */ u_char im6o_multicast_loop; /* 1 >= hear sends if a member */ - u_short im6o_num_memberships; /* no. memberships this socket */ - u_short im6o_max_memberships; /* max memberships this socket */ - struct in6_multi **im6o_membership; /* group memberships */ - struct in6_mfilter *im6o_mfilters; /* source filters */ + ip6_mfilter_head_t im6o_head; /* group memberships */ struct epoch_context imo6_epoch_ctx; }; - +#else +struct ip6_moptions; +#endif /* * Control options for outgoing packets */ Index: sys/netpfil/pf/if_pfsync.c =================================================================== --- sys/netpfil/pf/if_pfsync.c +++ sys/netpfil/pf/if_pfsync.c @@ -264,7 +264,7 @@ static void pfsync_push_all(struct pfsync_softc *); static void pfsyncintr(void *); static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, - void *); + struct in_mfilter *imf); static void pfsync_multicast_cleanup(struct pfsync_softc *); static void pfsync_pointers_init(void); static void pfsync_pointers_uninit(void); @@ -430,8 +430,7 @@ pfsync_drop(sc); if_free(ifp); - if (sc->sc_imo.imo_membership) - pfsync_multicast_cleanup(sc); + pfsync_multicast_cleanup(sc); mtx_destroy(&sc->sc_mtx); mtx_destroy(&sc->sc_bulk_mtx); @@ -1373,10 +1372,9 @@ case SIOCSETPFSYNC: { - struct ip_moptions *imo = &sc->sc_imo; + struct in_mfilter *imf = NULL; struct ifnet *sifp; struct ip *ip; - void *mship = NULL; if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) return (error); @@ -1396,8 +1394,7 @@ pfsyncr.pfsyncr_syncpeer.s_addr == 0 || pfsyncr.pfsyncr_syncpeer.s_addr == htonl(INADDR_PFSYNC_GROUP))) - mship = malloc((sizeof(struct in_multi *) * - IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO); + imf = ip_alloc_mfilter(M_WAITOK, 0,0); PFSYNC_LOCK(sc); if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) @@ -1419,8 +1416,7 @@ if (sc->sc_sync_if) if_rele(sc->sc_sync_if); sc->sc_sync_if = NULL; - if (imo->imo_membership) - pfsync_multicast_cleanup(sc); + pfsync_multicast_cleanup(sc); PFSYNC_UNLOCK(sc); break; } @@ -1436,14 +1432,13 @@ PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); } - if (imo->imo_membership) - pfsync_multicast_cleanup(sc); + pfsync_multicast_cleanup(sc); if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { - error = pfsync_multicast_setup(sc, sifp, mship); + error = pfsync_multicast_setup(sc, sifp, imf); if (error) { if_rele(sifp); - free(mship, M_PFSYNC); + ip_free_mfilter(imf); PFSYNC_UNLOCK(sc); return (error); } @@ -2353,7 +2348,7 @@ } static int -pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship) +pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, struct in_mfilter *imf) { struct ip_moptions *imo = &sc->sc_imo; int error; @@ -2361,16 +2356,14 @@ if (!(ifp->if_flags & IFF_MULTICAST)) return (EADDRNOTAVAIL); - imo->imo_membership = (struct in_multi **)mship; - imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; imo->imo_multicast_vif = -1; if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL, - &imo->imo_membership[0])) != 0) { - imo->imo_membership = NULL; + &imf->imf_inm)) != 0) return (error); - } - imo->imo_num_memberships++; + + STAILQ_INIT(&imo->imo_head); + ip_insert_mfilter(&imo->imo_head, imf); imo->imo_multicast_ifp = ifp; imo->imo_multicast_ttl = PFSYNC_DFLTTL; imo->imo_multicast_loop = 0; @@ -2382,10 +2375,14 @@ pfsync_multicast_cleanup(struct pfsync_softc *sc) { struct ip_moptions *imo = &sc->sc_imo; + struct in_mfilter *imf = ip_first_mfilter(&sc->sc_imo.imo_head); + + if (imf == NULL) + return; - in_leavegroup(imo->imo_membership[0], NULL); - free(imo->imo_membership, M_PFSYNC); - imo->imo_membership = NULL; + ip_remove_mfilter(&imo->imo_head, imf); + in_leavegroup(imf->imf_inm, NULL); + ip_free_mfilter(imf); imo->imo_multicast_ifp = NULL; } @@ -2404,7 +2401,7 @@ * is going away. We do need to ensure we don't try to do * cleanup later. */ - sc->sc_imo.imo_membership = NULL; + STAILQ_INIT(&sc->sc_imo.imo_head); sc->sc_imo.imo_multicast_ifp = NULL; sc->sc_sync_if = NULL; }