Index: stable/12/sys/netinet6/in6_mcast.c =================================================================== --- stable/12/sys/netinet6/in6_mcast.c (revision 343650) +++ stable/12/sys/netinet6/in6_mcast.c (revision 343651) @@ -1,2994 +1,2994 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2009 Bruce Simpson. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * IPv6 multicast socket, group, and socket option processing module. * Normative references: RFC 2292, RFC 3492, RFC 3542, RFC 3678, RFC 3810. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef KTR_MLD #define KTR_MLD KTR_INET6 #endif #ifndef __SOCKUNION_DECLARED union sockunion { struct sockaddr_storage ss; struct sockaddr sa; struct sockaddr_dl sdl; struct sockaddr_in6 sin6; }; typedef union sockunion sockunion_t; #define __SOCKUNION_DECLARED #endif /* __SOCKUNION_DECLARED */ static MALLOC_DEFINE(M_IN6MFILTER, "in6_mfilter", "IPv6 multicast PCB-layer source filter"); MALLOC_DEFINE(M_IP6MADDR, "in6_multi", "IPv6 multicast group"); static MALLOC_DEFINE(M_IP6MOPTS, "ip6_moptions", "IPv6 multicast options"); static MALLOC_DEFINE(M_IP6MSOURCE, "ip6_msource", "IPv6 multicast MLD-layer source filter"); RB_GENERATE(ip6_msource_tree, ip6_msource, im6s_link, ip6_msource_cmp); /* * Locking: * - Lock order is: Giant, INP_WLOCK, IN6_MULTI_LOCK, MLD_LOCK, IF_ADDR_LOCK. * - The IF_ADDR_LOCK is implicitly taken by in6m_lookup() earlier, however * it can be taken by code in net/if.c also. * - ip6_moptions and in6_mfilter are covered by the INP_WLOCK. * * struct in6_multi is covered by IN6_MULTI_LOCK. There isn't strictly * any need for in6_multi itself to be virtualized -- it is bound to an ifp * anyway no matter what happens. */ struct mtx in6_multi_list_mtx; MTX_SYSINIT(in6_multi_mtx, &in6_multi_list_mtx, "in6_multi_list_mtx", MTX_DEF); struct mtx in6_multi_free_mtx; MTX_SYSINIT(in6_multi_free_mtx, &in6_multi_free_mtx, "in6_multi_free_mtx", MTX_DEF); struct sx in6_multi_sx; SX_SYSINIT(in6_multi_sx, &in6_multi_sx, "in6_multi_sx"); static void im6f_commit(struct in6_mfilter *); static int im6f_get_source(struct in6_mfilter *imf, const struct sockaddr_in6 *psin, struct in6_msource **); static struct in6_msource * im6f_graft(struct in6_mfilter *, const uint8_t, const struct sockaddr_in6 *); static void im6f_leave(struct in6_mfilter *); static int im6f_prune(struct in6_mfilter *, const struct sockaddr_in6 *); static void im6f_purge(struct in6_mfilter *); static void im6f_rollback(struct in6_mfilter *); static void im6f_reap(struct in6_mfilter *); static int im6o_grow(struct ip6_moptions *); static size_t im6o_match_group(const struct ip6_moptions *, const struct ifnet *, const struct sockaddr *); static struct in6_msource * im6o_match_source(const struct ip6_moptions *, const size_t, const struct sockaddr *); static void im6s_merge(struct ip6_msource *ims, const struct in6_msource *lims, const int rollback); static int in6_getmulti(struct ifnet *, const struct in6_addr *, struct in6_multi **); static int in6m_get_source(struct in6_multi *inm, const struct in6_addr *addr, const int noalloc, struct ip6_msource **pims); #ifdef KTR static int in6m_is_ifp_detached(const struct in6_multi *); #endif static int in6m_merge(struct in6_multi *, /*const*/ struct in6_mfilter *); static void in6m_purge(struct in6_multi *); static void in6m_reap(struct in6_multi *); static struct ip6_moptions * in6p_findmoptions(struct inpcb *); static int in6p_get_source_filters(struct inpcb *, struct sockopt *); static int in6p_join_group(struct inpcb *, struct sockopt *); static int in6p_leave_group(struct inpcb *, struct sockopt *); static struct ifnet * in6p_lookup_mcast_ifp(const struct inpcb *, const struct sockaddr_in6 *); static int in6p_block_unblock_source(struct inpcb *, struct sockopt *); static int in6p_set_multicast_if(struct inpcb *, struct sockopt *); static int in6p_set_source_filters(struct inpcb *, struct sockopt *); static int sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS); SYSCTL_DECL(_net_inet6_ip6); /* XXX Not in any common header. */ static SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, mcast, CTLFLAG_RW, 0, "IPv6 multicast"); static u_long in6_mcast_maxgrpsrc = IPV6_MAX_GROUP_SRC_FILTER; SYSCTL_ULONG(_net_inet6_ip6_mcast, OID_AUTO, maxgrpsrc, CTLFLAG_RWTUN, &in6_mcast_maxgrpsrc, 0, "Max source filters per group"); static u_long in6_mcast_maxsocksrc = IPV6_MAX_SOCK_SRC_FILTER; SYSCTL_ULONG(_net_inet6_ip6_mcast, OID_AUTO, maxsocksrc, CTLFLAG_RWTUN, &in6_mcast_maxsocksrc, 0, "Max source filters per socket"); /* TODO Virtualize this switch. */ int in6_mcast_loop = IPV6_DEFAULT_MULTICAST_LOOP; SYSCTL_INT(_net_inet6_ip6_mcast, OID_AUTO, loop, CTLFLAG_RWTUN, &in6_mcast_loop, 0, "Loopback multicast datagrams by default"); static SYSCTL_NODE(_net_inet6_ip6_mcast, OID_AUTO, filters, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip6_mcast_filters, "Per-interface stack-wide source filters"); #ifdef KTR /* * Inline function which wraps assertions for a valid ifp. * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp * is detached. */ static int __inline in6m_is_ifp_detached(const struct in6_multi *inm) { struct ifnet *ifp; KASSERT(inm->in6m_ifma != NULL, ("%s: no ifma", __func__)); ifp = inm->in6m_ifma->ifma_ifp; if (ifp != NULL) { /* * Sanity check that network-layer notion of ifp is the * same as that of link-layer. */ KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__)); } return (ifp == NULL); } #endif /* * Initialize an in6_mfilter structure to a known state at t0, t1 * with an empty source filter list. */ static __inline void im6f_init(struct in6_mfilter *imf, const int st0, const int st1) { memset(imf, 0, sizeof(struct in6_mfilter)); RB_INIT(&imf->im6f_sources); imf->im6f_st[0] = st0; imf->im6f_st[1] = st1; } /* * Resize the ip6_moptions vector to the next power-of-two minus 1. * May be called with locks held; do not sleep. */ static int im6o_grow(struct ip6_moptions *imo) { struct in6_multi **nmships; struct in6_multi **omships; struct in6_mfilter *nmfilters; struct in6_mfilter *omfilters; size_t idx; size_t newmax; size_t oldmax; nmships = NULL; nmfilters = NULL; omships = imo->im6o_membership; omfilters = imo->im6o_mfilters; oldmax = imo->im6o_max_memberships; newmax = ((oldmax + 1) * 2) - 1; if (newmax <= IPV6_MAX_MEMBERSHIPS) { nmships = (struct in6_multi **)realloc(omships, sizeof(struct in6_multi *) * newmax, M_IP6MOPTS, M_NOWAIT); nmfilters = (struct in6_mfilter *)realloc(omfilters, sizeof(struct in6_mfilter) * newmax, M_IN6MFILTER, M_NOWAIT); if (nmships != NULL && nmfilters != NULL) { /* Initialize newly allocated source filter heads. */ for (idx = oldmax; idx < newmax; idx++) { im6f_init(&nmfilters[idx], MCAST_UNDEFINED, MCAST_EXCLUDE); } imo->im6o_max_memberships = newmax; imo->im6o_membership = nmships; imo->im6o_mfilters = nmfilters; } } if (nmships == NULL || nmfilters == NULL) { if (nmships != NULL) free(nmships, M_IP6MOPTS); if (nmfilters != NULL) free(nmfilters, M_IN6MFILTER); return (ETOOMANYREFS); } return (0); } /* * Find an IPv6 multicast group entry for this ip6_moptions instance * which matches the specified group, and optionally an interface. * Return its index into the array, or -1 if not found. */ static size_t im6o_match_group(const struct ip6_moptions *imo, const struct ifnet *ifp, const struct sockaddr *group) { const struct sockaddr_in6 *gsin6; struct in6_multi **pinm; int idx; int nmships; gsin6 = (const struct sockaddr_in6 *)group; /* The im6o_membership array may be lazy allocated. */ if (imo->im6o_membership == NULL || imo->im6o_num_memberships == 0) return (-1); nmships = imo->im6o_num_memberships; pinm = &imo->im6o_membership[0]; for (idx = 0; idx < nmships; idx++, pinm++) { if (*pinm == NULL) continue; if ((ifp == NULL || ((*pinm)->in6m_ifp == ifp)) && IN6_ARE_ADDR_EQUAL(&(*pinm)->in6m_addr, &gsin6->sin6_addr)) { break; } } if (idx >= nmships) idx = -1; return (idx); } /* * Find an IPv6 multicast source entry for this imo which matches * the given group index for this socket, and source address. * * XXX TODO: The scope ID, if present in src, is stripped before * any comparison. We SHOULD enforce scope/zone checks where the source * filter entry has a link scope. * * NOTE: This does not check if the entry is in-mode, merely if * it exists, which may not be the desired behaviour. */ static struct in6_msource * im6o_match_source(const struct ip6_moptions *imo, const size_t gidx, const struct sockaddr *src) { struct ip6_msource find; struct in6_mfilter *imf; struct ip6_msource *ims; const sockunion_t *psa; KASSERT(src->sa_family == AF_INET6, ("%s: !AF_INET6", __func__)); KASSERT(gidx != -1 && gidx < imo->im6o_num_memberships, ("%s: invalid index %d\n", __func__, (int)gidx)); /* The im6o_mfilters array may be lazy allocated. */ if (imo->im6o_mfilters == NULL) return (NULL); imf = &imo->im6o_mfilters[gidx]; psa = (const sockunion_t *)src; find.im6s_addr = psa->sin6.sin6_addr; in6_clearscope(&find.im6s_addr); /* XXX */ ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find); return ((struct in6_msource *)ims); } /* * Perform filtering for multicast datagrams on a socket by group and source. * * Returns 0 if a datagram should be allowed through, or various error codes * if the socket was not a member of the group, or the source was muted, etc. */ int im6o_mc_filter(const struct ip6_moptions *imo, const struct ifnet *ifp, const struct sockaddr *group, const struct sockaddr *src) { size_t gidx; struct in6_msource *ims; int mode; KASSERT(ifp != NULL, ("%s: null ifp", __func__)); gidx = im6o_match_group(imo, ifp, group); if (gidx == -1) return (MCAST_NOTGMEMBER); /* * Check if the source was included in an (S,G) join. * Allow reception on exclusive memberships by default, * reject reception on inclusive memberships by default. * Exclude source only if an in-mode exclude filter exists. * Include source only if an in-mode include filter exists. * NOTE: We are comparing group state here at MLD t1 (now) * with socket-layer t0 (since last downcall). */ mode = imo->im6o_mfilters[gidx].im6f_st[1]; ims = im6o_match_source(imo, gidx, src); if ((ims == NULL && mode == MCAST_INCLUDE) || (ims != NULL && ims->im6sl_st[0] != mode)) return (MCAST_NOTSMEMBER); return (MCAST_PASS); } /* * Find and return a reference to an in6_multi record for (ifp, group), * and bump its reference count. * If one does not exist, try to allocate it, and update link-layer multicast * filters on ifp to listen for group. * Assumes the IN6_MULTI lock is held across the call. * Return 0 if successful, otherwise return an appropriate error code. */ static int in6_getmulti(struct ifnet *ifp, const struct in6_addr *group, struct in6_multi **pinm) { struct epoch_tracker et; struct sockaddr_in6 gsin6; struct ifmultiaddr *ifma; struct in6_multi *inm; int error; error = 0; /* * XXX: Accesses to ifma_protospec must be covered by IF_ADDR_LOCK; * if_addmulti() takes this mutex itself, so we must drop and * re-acquire around the call. */ IN6_MULTI_LOCK_ASSERT(); IN6_MULTI_LIST_LOCK(); IF_ADDR_WLOCK(ifp); - NET_EPOCH_ENTER(et); + NET_EPOCH_ENTER_ET(et); inm = in6m_lookup_locked(ifp, group); - NET_EPOCH_EXIT(et); + NET_EPOCH_EXIT_ET(et); if (inm != NULL) { /* * If we already joined this group, just bump the * refcount and return it. */ KASSERT(inm->in6m_refcount >= 1, ("%s: bad refcount %d", __func__, inm->in6m_refcount)); in6m_acquire_locked(inm); *pinm = inm; goto out_locked; } memset(&gsin6, 0, sizeof(gsin6)); gsin6.sin6_family = AF_INET6; gsin6.sin6_len = sizeof(struct sockaddr_in6); gsin6.sin6_addr = *group; /* * Check if a link-layer group is already associated * with this network-layer group on the given ifnet. */ IN6_MULTI_LIST_UNLOCK(); IF_ADDR_WUNLOCK(ifp); error = if_addmulti(ifp, (struct sockaddr *)&gsin6, &ifma); if (error != 0) return (error); IN6_MULTI_LIST_LOCK(); IF_ADDR_WLOCK(ifp); /* * If something other than netinet6 is occupying the link-layer * group, print a meaningful error message and back out of * the allocation. * Otherwise, bump the refcount on the existing network-layer * group association and return it. */ if (ifma->ifma_protospec != NULL) { inm = (struct in6_multi *)ifma->ifma_protospec; #ifdef INVARIANTS KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr", __func__)); KASSERT(ifma->ifma_addr->sa_family == AF_INET6, ("%s: ifma not AF_INET6", __func__)); KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); if (inm->in6m_ifma != ifma || inm->in6m_ifp != ifp || !IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, group)) panic("%s: ifma %p is inconsistent with %p (%p)", __func__, ifma, inm, group); #endif in6m_acquire_locked(inm); *pinm = inm; goto out_locked; } IF_ADDR_WLOCK_ASSERT(ifp); /* * A new in6_multi record is needed; allocate and initialize it. * We DO NOT perform an MLD join as the in6_ layer may need to * push an initial source list down to MLD to support SSM. * * The initial source filter state is INCLUDE, {} as per the RFC. * Pending state-changes per group are subject to a bounds check. */ inm = malloc(sizeof(*inm), M_IP6MADDR, M_NOWAIT | M_ZERO); if (inm == NULL) { IN6_MULTI_LIST_UNLOCK(); IF_ADDR_WUNLOCK(ifp); if_delmulti_ifma(ifma); return (ENOMEM); } inm->in6m_addr = *group; inm->in6m_ifp = ifp; inm->in6m_mli = MLD_IFINFO(ifp); inm->in6m_ifma = ifma; inm->in6m_refcount = 1; inm->in6m_state = MLD_NOT_MEMBER; mbufq_init(&inm->in6m_scq, MLD_MAX_STATE_CHANGES); inm->in6m_st[0].iss_fmode = MCAST_UNDEFINED; inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED; RB_INIT(&inm->in6m_srcs); ifma->ifma_protospec = inm; *pinm = inm; out_locked: IN6_MULTI_LIST_UNLOCK(); IF_ADDR_WUNLOCK(ifp); return (error); } /* * Drop a reference to an in6_multi record. * * If the refcount drops to 0, free the in6_multi record and * delete the underlying link-layer membership. */ static void in6m_release(struct in6_multi *inm) { struct ifmultiaddr *ifma; struct ifnet *ifp; CTR2(KTR_MLD, "%s: refcount is %d", __func__, inm->in6m_refcount); MPASS(inm->in6m_refcount == 0); CTR2(KTR_MLD, "%s: freeing inm %p", __func__, inm); ifma = inm->in6m_ifma; ifp = inm->in6m_ifp; MPASS(ifma->ifma_llifma == NULL); /* XXX this access is not covered by IF_ADDR_LOCK */ CTR2(KTR_MLD, "%s: purging ifma %p", __func__, ifma); KASSERT(ifma->ifma_protospec == NULL, ("%s: ifma_protospec != NULL", __func__)); if (ifp == NULL) ifp = ifma->ifma_ifp; if (ifp != NULL) { CURVNET_SET(ifp->if_vnet); in6m_purge(inm); free(inm, M_IP6MADDR); if_delmulti_ifma_flags(ifma, 1); CURVNET_RESTORE(); if_rele(ifp); } else { in6m_purge(inm); free(inm, M_IP6MADDR); if_delmulti_ifma_flags(ifma, 1); } } static struct grouptask free_gtask; static struct in6_multi_head in6m_free_list; static void in6m_release_task(void *arg __unused); static void in6m_init(void) { SLIST_INIT(&in6m_free_list); taskqgroup_config_gtask_init(NULL, &free_gtask, in6m_release_task, "in6m release task"); } #ifdef EARLY_AP_STARTUP SYSINIT(in6m_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, in6m_init, NULL); #else SYSINIT(in6m_init, SI_SUB_ROOT_CONF - 1, SI_ORDER_SECOND, in6m_init, NULL); #endif void in6m_release_list_deferred(struct in6_multi_head *inmh) { if (SLIST_EMPTY(inmh)) return; mtx_lock(&in6_multi_free_mtx); SLIST_CONCAT(&in6m_free_list, inmh, in6_multi, in6m_nrele); mtx_unlock(&in6_multi_free_mtx); GROUPTASK_ENQUEUE(&free_gtask); } void in6m_release_wait(void) { /* Wait for all jobs to complete. */ gtaskqueue_drain_all(free_gtask.gt_taskqueue); } void in6m_disconnect_locked(struct in6_multi_head *inmh, struct in6_multi *inm) { struct ifnet *ifp; struct ifaddr *ifa; struct in6_ifaddr *ifa6; struct in6_multi_mship *imm, *imm_tmp; struct ifmultiaddr *ifma, *ll_ifma; IN6_MULTI_LIST_LOCK_ASSERT(); ifp = inm->in6m_ifp; if (ifp == NULL) return; /* already called */ inm->in6m_ifp = NULL; IF_ADDR_WLOCK_ASSERT(ifp); ifma = inm->in6m_ifma; if (ifma == NULL) return; if_ref(ifp); if (ifma->ifma_flags & IFMA_F_ENQUEUED) { CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); ifma->ifma_flags &= ~IFMA_F_ENQUEUED; } MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname); if ((ll_ifma = ifma->ifma_llifma) != NULL) { MPASS(ifma != ll_ifma); ifma->ifma_llifma = NULL; MPASS(ll_ifma->ifma_llifma == NULL); MPASS(ll_ifma->ifma_ifp == ifp); if (--ll_ifma->ifma_refcount == 0) { if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) { CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link); ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED; } MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname); if_freemulti(ll_ifma); } } CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (void *)ifa; LIST_FOREACH_SAFE(imm, &ifa6->ia6_memberships, i6mm_chain, imm_tmp) { if (inm == imm->i6mm_maddr) { LIST_REMOVE(imm, i6mm_chain); free(imm, M_IP6MADDR); in6m_rele_locked(inmh, inm); } } } } static void in6m_release_task(void *arg __unused) { struct in6_multi_head in6m_free_tmp; struct in6_multi *inm, *tinm; SLIST_INIT(&in6m_free_tmp); mtx_lock(&in6_multi_free_mtx); SLIST_CONCAT(&in6m_free_tmp, &in6m_free_list, in6_multi, in6m_nrele); mtx_unlock(&in6_multi_free_mtx); IN6_MULTI_LOCK(); SLIST_FOREACH_SAFE(inm, &in6m_free_tmp, in6m_nrele, tinm) { SLIST_REMOVE_HEAD(&in6m_free_tmp, in6m_nrele); in6m_release(inm); } IN6_MULTI_UNLOCK(); } /* * Clear recorded source entries for a group. * Used by the MLD code. Caller must hold the IN6_MULTI lock. * FIXME: Should reap. */ void in6m_clear_recorded(struct in6_multi *inm) { struct ip6_msource *ims; IN6_MULTI_LIST_LOCK_ASSERT(); RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) { if (ims->im6s_stp) { ims->im6s_stp = 0; --inm->in6m_st[1].iss_rec; } } KASSERT(inm->in6m_st[1].iss_rec == 0, ("%s: iss_rec %d not 0", __func__, inm->in6m_st[1].iss_rec)); } /* * Record a source as pending for a Source-Group MLDv2 query. * This lives here as it modifies the shared tree. * * inm is the group descriptor. * naddr is the address of the source to record in network-byte order. * * If the net.inet6.mld.sgalloc sysctl is non-zero, we will * lazy-allocate a source node in response to an SG query. * Otherwise, no allocation is performed. This saves some memory * with the trade-off that the source will not be reported to the * router if joined in the window between the query response and * the group actually being joined on the local host. * * VIMAGE: XXX: Currently the mld_sgalloc feature has been removed. * This turns off the allocation of a recorded source entry if * the group has not been joined. * * Return 0 if the source didn't exist or was already marked as recorded. * Return 1 if the source was marked as recorded by this function. * Return <0 if any error occurred (negated errno code). */ int in6m_record_source(struct in6_multi *inm, const struct in6_addr *addr) { struct ip6_msource find; struct ip6_msource *ims, *nims; IN6_MULTI_LIST_LOCK_ASSERT(); find.im6s_addr = *addr; ims = RB_FIND(ip6_msource_tree, &inm->in6m_srcs, &find); if (ims && ims->im6s_stp) return (0); if (ims == NULL) { if (inm->in6m_nsrc == in6_mcast_maxgrpsrc) return (-ENOSPC); nims = malloc(sizeof(struct ip6_msource), M_IP6MSOURCE, M_NOWAIT | M_ZERO); if (nims == NULL) return (-ENOMEM); nims->im6s_addr = find.im6s_addr; RB_INSERT(ip6_msource_tree, &inm->in6m_srcs, nims); ++inm->in6m_nsrc; ims = nims; } /* * Mark the source as recorded and update the recorded * source count. */ ++ims->im6s_stp; ++inm->in6m_st[1].iss_rec; return (1); } /* * Return a pointer to an in6_msource owned by an in6_mfilter, * given its source address. * Lazy-allocate if needed. If this is a new entry its filter state is * undefined at t0. * * imf is the filter set being modified. * addr is the source address. * * SMPng: May be called with locks held; malloc must not block. */ static int im6f_get_source(struct in6_mfilter *imf, const struct sockaddr_in6 *psin, struct in6_msource **plims) { struct ip6_msource find; struct ip6_msource *ims, *nims; struct in6_msource *lims; int error; error = 0; ims = NULL; lims = NULL; find.im6s_addr = psin->sin6_addr; ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find); lims = (struct in6_msource *)ims; if (lims == NULL) { if (imf->im6f_nsrc == in6_mcast_maxsocksrc) return (ENOSPC); nims = malloc(sizeof(struct in6_msource), M_IN6MFILTER, M_NOWAIT | M_ZERO); if (nims == NULL) return (ENOMEM); lims = (struct in6_msource *)nims; lims->im6s_addr = find.im6s_addr; lims->im6sl_st[0] = MCAST_UNDEFINED; RB_INSERT(ip6_msource_tree, &imf->im6f_sources, nims); ++imf->im6f_nsrc; } *plims = lims; return (error); } /* * Graft a source entry into an existing socket-layer filter set, * maintaining any required invariants and checking allocations. * * The source is marked as being in the new filter mode at t1. * * Return the pointer to the new node, otherwise return NULL. */ static struct in6_msource * im6f_graft(struct in6_mfilter *imf, const uint8_t st1, const struct sockaddr_in6 *psin) { struct ip6_msource *nims; struct in6_msource *lims; nims = malloc(sizeof(struct in6_msource), M_IN6MFILTER, M_NOWAIT | M_ZERO); if (nims == NULL) return (NULL); lims = (struct in6_msource *)nims; lims->im6s_addr = psin->sin6_addr; lims->im6sl_st[0] = MCAST_UNDEFINED; lims->im6sl_st[1] = st1; RB_INSERT(ip6_msource_tree, &imf->im6f_sources, nims); ++imf->im6f_nsrc; return (lims); } /* * Prune a source entry from an existing socket-layer filter set, * maintaining any required invariants and checking allocations. * * The source is marked as being left at t1, it is not freed. * * Return 0 if no error occurred, otherwise return an errno value. */ static int im6f_prune(struct in6_mfilter *imf, const struct sockaddr_in6 *psin) { struct ip6_msource find; struct ip6_msource *ims; struct in6_msource *lims; find.im6s_addr = psin->sin6_addr; ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find); if (ims == NULL) return (ENOENT); lims = (struct in6_msource *)ims; lims->im6sl_st[1] = MCAST_UNDEFINED; return (0); } /* * Revert socket-layer filter set deltas at t1 to t0 state. */ static void im6f_rollback(struct in6_mfilter *imf) { struct ip6_msource *ims, *tims; struct in6_msource *lims; RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) { lims = (struct in6_msource *)ims; if (lims->im6sl_st[0] == lims->im6sl_st[1]) { /* no change at t1 */ continue; } else if (lims->im6sl_st[0] != MCAST_UNDEFINED) { /* revert change to existing source at t1 */ lims->im6sl_st[1] = lims->im6sl_st[0]; } else { /* revert source added t1 */ CTR2(KTR_MLD, "%s: free ims %p", __func__, ims); RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims); free(ims, M_IN6MFILTER); imf->im6f_nsrc--; } } imf->im6f_st[1] = imf->im6f_st[0]; } /* * Mark socket-layer filter set as INCLUDE {} at t1. */ static void im6f_leave(struct in6_mfilter *imf) { struct ip6_msource *ims; struct in6_msource *lims; RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) { lims = (struct in6_msource *)ims; lims->im6sl_st[1] = MCAST_UNDEFINED; } imf->im6f_st[1] = MCAST_INCLUDE; } /* * Mark socket-layer filter set deltas as committed. */ static void im6f_commit(struct in6_mfilter *imf) { struct ip6_msource *ims; struct in6_msource *lims; RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) { lims = (struct in6_msource *)ims; lims->im6sl_st[0] = lims->im6sl_st[1]; } imf->im6f_st[0] = imf->im6f_st[1]; } /* * Reap unreferenced sources from socket-layer filter set. */ static void im6f_reap(struct in6_mfilter *imf) { struct ip6_msource *ims, *tims; struct in6_msource *lims; RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) { lims = (struct in6_msource *)ims; if ((lims->im6sl_st[0] == MCAST_UNDEFINED) && (lims->im6sl_st[1] == MCAST_UNDEFINED)) { CTR2(KTR_MLD, "%s: free lims %p", __func__, ims); RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims); free(ims, M_IN6MFILTER); imf->im6f_nsrc--; } } } /* * Purge socket-layer filter set. */ static void im6f_purge(struct in6_mfilter *imf) { struct ip6_msource *ims, *tims; RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) { CTR2(KTR_MLD, "%s: free ims %p", __func__, ims); RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims); free(ims, M_IN6MFILTER); imf->im6f_nsrc--; } imf->im6f_st[0] = imf->im6f_st[1] = MCAST_UNDEFINED; KASSERT(RB_EMPTY(&imf->im6f_sources), ("%s: im6f_sources not empty", __func__)); } /* * Look up a source filter entry for a multicast group. * * inm is the group descriptor to work with. * addr is the IPv6 address to look up. * noalloc may be non-zero to suppress allocation of sources. * *pims will be set to the address of the retrieved or allocated source. * * SMPng: NOTE: may be called with locks held. * Return 0 if successful, otherwise return a non-zero error code. */ static int in6m_get_source(struct in6_multi *inm, const struct in6_addr *addr, const int noalloc, struct ip6_msource **pims) { struct ip6_msource find; struct ip6_msource *ims, *nims; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif find.im6s_addr = *addr; ims = RB_FIND(ip6_msource_tree, &inm->in6m_srcs, &find); if (ims == NULL && !noalloc) { if (inm->in6m_nsrc == in6_mcast_maxgrpsrc) return (ENOSPC); nims = malloc(sizeof(struct ip6_msource), M_IP6MSOURCE, M_NOWAIT | M_ZERO); if (nims == NULL) return (ENOMEM); nims->im6s_addr = *addr; RB_INSERT(ip6_msource_tree, &inm->in6m_srcs, nims); ++inm->in6m_nsrc; ims = nims; CTR3(KTR_MLD, "%s: allocated %s as %p", __func__, ip6_sprintf(ip6tbuf, addr), ims); } *pims = ims; return (0); } /* * Merge socket-layer source into MLD-layer source. * If rollback is non-zero, perform the inverse of the merge. */ static void im6s_merge(struct ip6_msource *ims, const struct in6_msource *lims, const int rollback) { int n = rollback ? -1 : 1; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; ip6_sprintf(ip6tbuf, &lims->im6s_addr); #endif if (lims->im6sl_st[0] == MCAST_EXCLUDE) { CTR3(KTR_MLD, "%s: t1 ex -= %d on %s", __func__, n, ip6tbuf); ims->im6s_st[1].ex -= n; } else if (lims->im6sl_st[0] == MCAST_INCLUDE) { CTR3(KTR_MLD, "%s: t1 in -= %d on %s", __func__, n, ip6tbuf); ims->im6s_st[1].in -= n; } if (lims->im6sl_st[1] == MCAST_EXCLUDE) { CTR3(KTR_MLD, "%s: t1 ex += %d on %s", __func__, n, ip6tbuf); ims->im6s_st[1].ex += n; } else if (lims->im6sl_st[1] == MCAST_INCLUDE) { CTR3(KTR_MLD, "%s: t1 in += %d on %s", __func__, n, ip6tbuf); ims->im6s_st[1].in += n; } } /* * Atomically update the global in6_multi state, when a membership's * filter list is being updated in any way. * * imf is the per-inpcb-membership group filter pointer. * A fake imf may be passed for in-kernel consumers. * * XXX This is a candidate for a set-symmetric-difference style loop * which would eliminate the repeated lookup from root of ims nodes, * as they share the same key space. * * If any error occurred this function will back out of refcounts * and return a non-zero value. */ static int in6m_merge(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf) { struct ip6_msource *ims, *nims; struct in6_msource *lims; int schanged, error; int nsrc0, nsrc1; schanged = 0; error = 0; nsrc1 = nsrc0 = 0; IN6_MULTI_LIST_LOCK_ASSERT(); /* * Update the source filters first, as this may fail. * Maintain count of in-mode filters at t0, t1. These are * used to work out if we transition into ASM mode or not. * Maintain a count of source filters whose state was * actually modified by this operation. */ RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) { lims = (struct in6_msource *)ims; if (lims->im6sl_st[0] == imf->im6f_st[0]) nsrc0++; if (lims->im6sl_st[1] == imf->im6f_st[1]) nsrc1++; if (lims->im6sl_st[0] == lims->im6sl_st[1]) continue; error = in6m_get_source(inm, &lims->im6s_addr, 0, &nims); ++schanged; if (error) break; im6s_merge(nims, lims, 0); } if (error) { struct ip6_msource *bims; RB_FOREACH_REVERSE_FROM(ims, ip6_msource_tree, nims) { lims = (struct in6_msource *)ims; if (lims->im6sl_st[0] == lims->im6sl_st[1]) continue; (void)in6m_get_source(inm, &lims->im6s_addr, 1, &bims); if (bims == NULL) continue; im6s_merge(bims, lims, 1); } goto out_reap; } CTR3(KTR_MLD, "%s: imf filters in-mode: %d at t0, %d at t1", __func__, nsrc0, nsrc1); /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */ if (imf->im6f_st[0] == imf->im6f_st[1] && imf->im6f_st[1] == MCAST_INCLUDE) { if (nsrc1 == 0) { CTR1(KTR_MLD, "%s: --in on inm at t1", __func__); --inm->in6m_st[1].iss_in; } } /* Handle filter mode transition on socket. */ if (imf->im6f_st[0] != imf->im6f_st[1]) { CTR3(KTR_MLD, "%s: imf transition %d to %d", __func__, imf->im6f_st[0], imf->im6f_st[1]); if (imf->im6f_st[0] == MCAST_EXCLUDE) { CTR1(KTR_MLD, "%s: --ex on inm at t1", __func__); --inm->in6m_st[1].iss_ex; } else if (imf->im6f_st[0] == MCAST_INCLUDE) { CTR1(KTR_MLD, "%s: --in on inm at t1", __func__); --inm->in6m_st[1].iss_in; } if (imf->im6f_st[1] == MCAST_EXCLUDE) { CTR1(KTR_MLD, "%s: ex++ on inm at t1", __func__); inm->in6m_st[1].iss_ex++; } else if (imf->im6f_st[1] == MCAST_INCLUDE && nsrc1 > 0) { CTR1(KTR_MLD, "%s: in++ on inm at t1", __func__); inm->in6m_st[1].iss_in++; } } /* * Track inm filter state in terms of listener counts. * If there are any exclusive listeners, stack-wide * membership is exclusive. * Otherwise, if only inclusive listeners, stack-wide is inclusive. * If no listeners remain, state is undefined at t1, * and the MLD lifecycle for this group should finish. */ if (inm->in6m_st[1].iss_ex > 0) { CTR1(KTR_MLD, "%s: transition to EX", __func__); inm->in6m_st[1].iss_fmode = MCAST_EXCLUDE; } else if (inm->in6m_st[1].iss_in > 0) { CTR1(KTR_MLD, "%s: transition to IN", __func__); inm->in6m_st[1].iss_fmode = MCAST_INCLUDE; } else { CTR1(KTR_MLD, "%s: transition to UNDEF", __func__); inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED; } /* Decrement ASM listener count on transition out of ASM mode. */ if (imf->im6f_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { if ((imf->im6f_st[1] != MCAST_EXCLUDE) || (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) { CTR1(KTR_MLD, "%s: --asm on inm at t1", __func__); --inm->in6m_st[1].iss_asm; } } /* Increment ASM listener count on transition to ASM mode. */ if (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 == 0) { CTR1(KTR_MLD, "%s: asm++ on inm at t1", __func__); inm->in6m_st[1].iss_asm++; } CTR3(KTR_MLD, "%s: merged imf %p to inm %p", __func__, imf, inm); in6m_print(inm); out_reap: if (schanged > 0) { CTR1(KTR_MLD, "%s: sources changed; reaping", __func__); in6m_reap(inm); } return (error); } /* * Mark an in6_multi's filter set deltas as committed. * Called by MLD after a state change has been enqueued. */ void in6m_commit(struct in6_multi *inm) { struct ip6_msource *ims; CTR2(KTR_MLD, "%s: commit inm %p", __func__, inm); CTR1(KTR_MLD, "%s: pre commit:", __func__); in6m_print(inm); RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) { ims->im6s_st[0] = ims->im6s_st[1]; } inm->in6m_st[0] = inm->in6m_st[1]; } /* * Reap unreferenced nodes from an in6_multi's filter set. */ static void in6m_reap(struct in6_multi *inm) { struct ip6_msource *ims, *tims; RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, tims) { if (ims->im6s_st[0].ex > 0 || ims->im6s_st[0].in > 0 || ims->im6s_st[1].ex > 0 || ims->im6s_st[1].in > 0 || ims->im6s_stp != 0) continue; CTR2(KTR_MLD, "%s: free ims %p", __func__, ims); RB_REMOVE(ip6_msource_tree, &inm->in6m_srcs, ims); free(ims, M_IP6MSOURCE); inm->in6m_nsrc--; } } /* * Purge all source nodes from an in6_multi's filter set. */ static void in6m_purge(struct in6_multi *inm) { struct ip6_msource *ims, *tims; RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, tims) { CTR2(KTR_MLD, "%s: free ims %p", __func__, ims); RB_REMOVE(ip6_msource_tree, &inm->in6m_srcs, ims); free(ims, M_IP6MSOURCE); inm->in6m_nsrc--; } /* Free state-change requests that might be queued. */ mbufq_drain(&inm->in6m_scq); } /* * Join a multicast address w/o sources. * KAME compatibility entry point. * * SMPng: Assume no mc locks held by caller. */ int in6_joingroup(struct ifnet *ifp, const struct in6_addr *mcaddr, /*const*/ struct in6_mfilter *imf, struct in6_multi **pinm, const int delay) { int error; IN6_MULTI_LOCK(); error = in6_joingroup_locked(ifp, mcaddr, NULL, pinm, delay); IN6_MULTI_UNLOCK(); return (error); } /* * Join a multicast group; real entry point. * * Only preserves atomicity at inm level. * NOTE: imf argument cannot be const due to sys/tree.h limitations. * * If the MLD downcall fails, the group is not joined, and an error * code is returned. */ int in6_joingroup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr, /*const*/ struct in6_mfilter *imf, struct in6_multi **pinm, const int delay) { struct in6_multi_head inmh; struct in6_mfilter timf; struct in6_multi *inm; struct ifmultiaddr *ifma; int error; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif #ifdef INVARIANTS /* * Sanity: Check scope zone ID was set for ifp, if and * only if group is scoped to an interface. */ KASSERT(IN6_IS_ADDR_MULTICAST(mcaddr), ("%s: not a multicast address", __func__)); if (IN6_IS_ADDR_MC_LINKLOCAL(mcaddr) || IN6_IS_ADDR_MC_INTFACELOCAL(mcaddr)) { KASSERT(mcaddr->s6_addr16[1] != 0, ("%s: scope zone ID not set", __func__)); } #endif IN6_MULTI_LOCK_ASSERT(); IN6_MULTI_LIST_UNLOCK_ASSERT(); CTR4(KTR_MLD, "%s: join %s on %p(%s))", __func__, ip6_sprintf(ip6tbuf, mcaddr), ifp, if_name(ifp)); error = 0; inm = NULL; /* * If no imf was specified (i.e. kernel consumer), * fake one up and assume it is an ASM join. */ if (imf == NULL) { im6f_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE); imf = &timf; } error = in6_getmulti(ifp, mcaddr, &inm); if (error) { CTR1(KTR_MLD, "%s: in6_getmulti() failure", __func__); return (error); } IN6_MULTI_LIST_LOCK(); CTR1(KTR_MLD, "%s: merge inm state", __func__); error = in6m_merge(inm, imf); if (error) { CTR1(KTR_MLD, "%s: failed to merge inm state", __func__); goto out_in6m_release; } CTR1(KTR_MLD, "%s: doing mld downcall", __func__); error = mld_change_state(inm, delay); if (error) { CTR1(KTR_MLD, "%s: failed to update source", __func__); goto out_in6m_release; } out_in6m_release: SLIST_INIT(&inmh); if (error) { CTR2(KTR_MLD, "%s: dropping ref on %p", __func__, inm); IF_ADDR_RLOCK(ifp); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_protospec == inm) { ifma->ifma_protospec = NULL; break; } } in6m_disconnect_locked(&inmh, inm); in6m_rele_locked(&inmh, inm); IF_ADDR_RUNLOCK(ifp); } else { *pinm = inm; } IN6_MULTI_LIST_UNLOCK(); in6m_release_list_deferred(&inmh); return (error); } /* * Leave a multicast group; unlocked entry point. */ int in6_leavegroup(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf) { int error; IN6_MULTI_LOCK(); error = in6_leavegroup_locked(inm, imf); IN6_MULTI_UNLOCK(); return (error); } /* * Leave a multicast group; real entry point. * All source filters will be expunged. * * Only preserves atomicity at inm level. * * Holding the write lock for the INP which contains imf * is highly advisable. We can't assert for it as imf does not * contain a back-pointer to the owning inp. * * Note: This is not the same as in6m_release(*) as this function also * makes a state change downcall into MLD. */ int in6_leavegroup_locked(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf) { struct in6_multi_head inmh; struct in6_mfilter timf; struct ifnet *ifp; int error; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif error = 0; IN6_MULTI_LOCK_ASSERT(); CTR5(KTR_MLD, "%s: leave inm %p, %s/%s, imf %p", __func__, inm, ip6_sprintf(ip6tbuf, &inm->in6m_addr), (in6m_is_ifp_detached(inm) ? "null" : if_name(inm->in6m_ifp)), imf); /* * If no imf was specified (i.e. kernel consumer), * fake one up and assume it is an ASM join. */ if (imf == NULL) { im6f_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED); imf = &timf; } /* * Begin state merge transaction at MLD layer. * * As this particular invocation should not cause any memory * to be allocated, and there is no opportunity to roll back * the transaction, it MUST NOT fail. */ ifp = inm->in6m_ifp; IN6_MULTI_LIST_LOCK(); CTR1(KTR_MLD, "%s: merge inm state", __func__); error = in6m_merge(inm, imf); KASSERT(error == 0, ("%s: failed to merge inm state", __func__)); CTR1(KTR_MLD, "%s: doing mld downcall", __func__); error = 0; if (ifp) error = mld_change_state(inm, 0); if (error) CTR1(KTR_MLD, "%s: failed mld downcall", __func__); CTR2(KTR_MLD, "%s: dropping ref on %p", __func__, inm); if (ifp) IF_ADDR_WLOCK(ifp); SLIST_INIT(&inmh); if (inm->in6m_refcount == 1) in6m_disconnect_locked(&inmh, inm); in6m_rele_locked(&inmh, inm); if (ifp) IF_ADDR_WUNLOCK(ifp); IN6_MULTI_LIST_UNLOCK(); in6m_release_list_deferred(&inmh); return (error); } /* * Block or unblock an ASM multicast source on an inpcb. * This implements the delta-based API described in RFC 3678. * * The delta-based API applies only to exclusive-mode memberships. * An MLD downcall will be performed. * * SMPng: NOTE: Must take Giant as a join may create a new ifma. * * Return 0 if successful, otherwise return an appropriate error code. */ static int in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) { struct group_source_req gsr; sockunion_t *gsa, *ssa; struct ifnet *ifp; struct in6_mfilter *imf; struct ip6_moptions *imo; struct in6_msource *ims; struct in6_multi *inm; size_t idx; uint16_t fmode; int error, doblock; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif ifp = NULL; error = 0; doblock = 0; memset(&gsr, 0, sizeof(struct group_source_req)); gsa = (sockunion_t *)&gsr.gsr_group; ssa = (sockunion_t *)&gsr.gsr_source; switch (sopt->sopt_name) { case MCAST_BLOCK_SOURCE: case MCAST_UNBLOCK_SOURCE: error = sooptcopyin(sopt, &gsr, sizeof(struct group_source_req), sizeof(struct group_source_req)); if (error) return (error); if (gsa->sin6.sin6_family != AF_INET6 || gsa->sin6.sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); if (ssa->sin6.sin6_family != AF_INET6 || ssa->sin6.sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) return (EADDRNOTAVAIL); ifp = ifnet_byindex(gsr.gsr_interface); if (sopt->sopt_name == MCAST_BLOCK_SOURCE) doblock = 1; break; default: CTR2(KTR_MLD, "%s: unknown sopt_name %d", __func__, sopt->sopt_name); return (EOPNOTSUPP); break; } if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr)) return (EINVAL); (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL); /* * Check if we are actually a member of this group. */ imo = in6p_findmoptions(inp); idx = im6o_match_group(imo, ifp, &gsa->sa); if (idx == -1 || imo->im6o_mfilters == NULL) { error = EADDRNOTAVAIL; goto out_in6p_locked; } KASSERT(imo->im6o_mfilters != NULL, ("%s: im6o_mfilters not allocated", __func__)); imf = &imo->im6o_mfilters[idx]; inm = imo->im6o_membership[idx]; /* * Attempting to use the delta-based API on an * non exclusive-mode membership is an error. */ fmode = imf->im6f_st[0]; if (fmode != MCAST_EXCLUDE) { error = EINVAL; goto out_in6p_locked; } /* * Deal with error cases up-front: * Asked to block, but already blocked; or * Asked to unblock, but nothing to unblock. * If adding a new block entry, allocate it. */ ims = im6o_match_source(imo, idx, &ssa->sa); if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { CTR3(KTR_MLD, "%s: source %s %spresent", __func__, ip6_sprintf(ip6tbuf, &ssa->sin6.sin6_addr), doblock ? "" : "not "); error = EADDRNOTAVAIL; goto out_in6p_locked; } INP_WLOCK_ASSERT(inp); /* * Begin state merge transaction at socket layer. */ if (doblock) { CTR2(KTR_MLD, "%s: %s source", __func__, "block"); ims = im6f_graft(imf, fmode, &ssa->sin6); if (ims == NULL) error = ENOMEM; } else { CTR2(KTR_MLD, "%s: %s source", __func__, "allow"); error = im6f_prune(imf, &ssa->sin6); } if (error) { CTR1(KTR_MLD, "%s: merge imf state failed", __func__); goto out_im6f_rollback; } /* * Begin state merge transaction at MLD layer. */ IN6_MULTI_LIST_LOCK(); CTR1(KTR_MLD, "%s: merge inm state", __func__); error = in6m_merge(inm, imf); if (error) CTR1(KTR_MLD, "%s: failed to merge inm state", __func__); else { CTR1(KTR_MLD, "%s: doing mld downcall", __func__); error = mld_change_state(inm, 0); if (error) CTR1(KTR_MLD, "%s: failed mld downcall", __func__); } IN6_MULTI_LIST_UNLOCK(); out_im6f_rollback: if (error) im6f_rollback(imf); else im6f_commit(imf); im6f_reap(imf); out_in6p_locked: INP_WUNLOCK(inp); return (error); } /* * Given an inpcb, return its multicast options structure pointer. Accepts * an unlocked inpcb pointer, but will return it locked. May sleep. * * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. * SMPng: NOTE: Returns with the INP write lock held. */ static struct ip6_moptions * in6p_findmoptions(struct inpcb *inp) { struct ip6_moptions *imo; struct in6_multi **immp; struct in6_mfilter *imfp; size_t idx; INP_WLOCK(inp); if (inp->in6p_moptions != NULL) return (inp->in6p_moptions); INP_WUNLOCK(inp); imo = malloc(sizeof(*imo), M_IP6MOPTS, M_WAITOK); immp = malloc(sizeof(*immp) * IPV6_MIN_MEMBERSHIPS, M_IP6MOPTS, M_WAITOK | M_ZERO); imfp = malloc(sizeof(struct in6_mfilter) * IPV6_MIN_MEMBERSHIPS, M_IN6MFILTER, M_WAITOK); imo->im6o_multicast_ifp = NULL; imo->im6o_multicast_hlim = V_ip6_defmcasthlim; imo->im6o_multicast_loop = in6_mcast_loop; imo->im6o_num_memberships = 0; imo->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS; imo->im6o_membership = immp; /* Initialize per-group source filters. */ for (idx = 0; idx < IPV6_MIN_MEMBERSHIPS; idx++) im6f_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE); imo->im6o_mfilters = imfp; INP_WLOCK(inp); if (inp->in6p_moptions != NULL) { free(imfp, M_IN6MFILTER); free(immp, M_IP6MOPTS); free(imo, M_IP6MOPTS); return (inp->in6p_moptions); } inp->in6p_moptions = imo; return (imo); } /* * Discard the IPv6 multicast options (and source filters). * * SMPng: NOTE: assumes INP write lock is held. * * XXX can all be safely deferred to epoch_call * */ static void inp_gcmoptions(struct ip6_moptions *imo) { struct in6_mfilter *imf; struct in6_multi *inm; struct ifnet *ifp; size_t idx, nmships; nmships = imo->im6o_num_memberships; for (idx = 0; idx < nmships; ++idx) { imf = imo->im6o_mfilters ? &imo->im6o_mfilters[idx] : NULL; if (imf) im6f_leave(imf); inm = imo->im6o_membership[idx]; ifp = inm->in6m_ifp; if (ifp != NULL) { CURVNET_SET(ifp->if_vnet); (void)in6_leavegroup(inm, imf); CURVNET_RESTORE(); } else { (void)in6_leavegroup(inm, imf); } if (imf) im6f_purge(imf); } if (imo->im6o_mfilters) free(imo->im6o_mfilters, M_IN6MFILTER); free(imo->im6o_membership, M_IP6MOPTS); free(imo, M_IP6MOPTS); } void ip6_freemoptions(struct ip6_moptions *imo) { if (imo == NULL) return; inp_gcmoptions(imo); } /* * Atomically get source filters on a socket for an IPv6 multicast group. * Called with INP lock held; returns with lock released. */ static int in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt) { struct __msfilterreq msfr; sockunion_t *gsa; struct ifnet *ifp; struct ip6_moptions *imo; struct in6_mfilter *imf; struct ip6_msource *ims; struct in6_msource *lims; struct sockaddr_in6 *psin; struct sockaddr_storage *ptss; struct sockaddr_storage *tss; int error; size_t idx, nsrcs, ncsrcs; INP_WLOCK_ASSERT(inp); imo = inp->in6p_moptions; KASSERT(imo != NULL, ("%s: null ip6_moptions", __func__)); INP_WUNLOCK(inp); error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), sizeof(struct __msfilterreq)); if (error) return (error); if (msfr.msfr_group.ss_family != AF_INET6 || msfr.msfr_group.ss_len != sizeof(struct sockaddr_in6)) return (EINVAL); gsa = (sockunion_t *)&msfr.msfr_group; if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr)) return (EINVAL); if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) return (EADDRNOTAVAIL); ifp = ifnet_byindex(msfr.msfr_ifindex); if (ifp == NULL) return (EADDRNOTAVAIL); (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL); INP_WLOCK(inp); /* * Lookup group on the socket. */ idx = im6o_match_group(imo, ifp, &gsa->sa); if (idx == -1 || imo->im6o_mfilters == NULL) { INP_WUNLOCK(inp); return (EADDRNOTAVAIL); } imf = &imo->im6o_mfilters[idx]; /* * Ignore memberships which are in limbo. */ if (imf->im6f_st[1] == MCAST_UNDEFINED) { INP_WUNLOCK(inp); return (EAGAIN); } msfr.msfr_fmode = imf->im6f_st[1]; /* * If the user specified a buffer, copy out the source filter * entries to userland gracefully. * We only copy out the number of entries which userland * has asked for, but we always tell userland how big the * buffer really needs to be. */ if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc) msfr.msfr_nsrcs = in6_mcast_maxsocksrc; tss = NULL; if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, M_TEMP, M_NOWAIT | M_ZERO); if (tss == NULL) { INP_WUNLOCK(inp); return (ENOBUFS); } } /* * Count number of sources in-mode at t0. * If buffer space exists and remains, copy out source entries. */ nsrcs = msfr.msfr_nsrcs; ncsrcs = 0; ptss = tss; RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) { lims = (struct in6_msource *)ims; if (lims->im6sl_st[0] == MCAST_UNDEFINED || lims->im6sl_st[0] != imf->im6f_st[0]) continue; ++ncsrcs; if (tss != NULL && nsrcs > 0) { psin = (struct sockaddr_in6 *)ptss; psin->sin6_family = AF_INET6; psin->sin6_len = sizeof(struct sockaddr_in6); psin->sin6_addr = lims->im6s_addr; psin->sin6_port = 0; --nsrcs; ++ptss; } } INP_WUNLOCK(inp); if (tss != NULL) { error = copyout(tss, msfr.msfr_srcs, sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); free(tss, M_TEMP); if (error) return (error); } msfr.msfr_nsrcs = ncsrcs; error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq)); return (error); } /* * Return the IP multicast options in response to user getsockopt(). */ int ip6_getmoptions(struct inpcb *inp, struct sockopt *sopt) { struct ip6_moptions *im6o; int error; u_int optval; INP_WLOCK(inp); im6o = inp->in6p_moptions; /* * If socket is neither of type SOCK_RAW or SOCK_DGRAM, * or is a divert socket, reject it. */ if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || (inp->inp_socket->so_proto->pr_type != SOCK_RAW && inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { INP_WUNLOCK(inp); return (EOPNOTSUPP); } error = 0; switch (sopt->sopt_name) { case IPV6_MULTICAST_IF: if (im6o == NULL || im6o->im6o_multicast_ifp == NULL) { optval = 0; } else { optval = im6o->im6o_multicast_ifp->if_index; } INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof(u_int)); break; case IPV6_MULTICAST_HOPS: if (im6o == NULL) optval = V_ip6_defmcasthlim; else optval = im6o->im6o_multicast_hlim; INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof(u_int)); break; case IPV6_MULTICAST_LOOP: if (im6o == NULL) optval = in6_mcast_loop; /* XXX VIMAGE */ else optval = im6o->im6o_multicast_loop; INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof(u_int)); break; case IPV6_MSFILTER: if (im6o == NULL) { error = EADDRNOTAVAIL; INP_WUNLOCK(inp); } else { error = in6p_get_source_filters(inp, sopt); } break; default: INP_WUNLOCK(inp); error = ENOPROTOOPT; break; } INP_UNLOCK_ASSERT(inp); return (error); } /* * Look up the ifnet to use for a multicast group membership, * given the address of an IPv6 group. * * This routine exists to support legacy IPv6 multicast applications. * * If inp is non-NULL, use this socket's current FIB number for any * required FIB lookup. Look up the group address in the unicast FIB, * and use its ifp; usually, this points to the default next-hop. * If the FIB lookup fails, return NULL. * * FUTURE: Support multiple forwarding tables for IPv6. * * Returns NULL if no ifp could be found. */ static struct ifnet * in6p_lookup_mcast_ifp(const struct inpcb *in6p, const struct sockaddr_in6 *gsin6) { struct nhop6_basic nh6; struct in6_addr dst; uint32_t scopeid; uint32_t fibnum; KASSERT(in6p->inp_vflag & INP_IPV6, ("%s: not INP_IPV6 inpcb", __func__)); KASSERT(gsin6->sin6_family == AF_INET6, ("%s: not AF_INET6 group", __func__)); in6_splitscope(&gsin6->sin6_addr, &dst, &scopeid); fibnum = in6p ? in6p->inp_inc.inc_fibnum : RT_DEFAULT_FIB; if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6) != 0) return (NULL); return (nh6.nh_ifp); } /* * Join an IPv6 multicast group, possibly with a source. * * FIXME: The KAME use of the unspecified address (::) * to join *all* multicast groups is currently unsupported. */ static int in6p_join_group(struct inpcb *inp, struct sockopt *sopt) { struct in6_multi_head inmh; struct group_source_req gsr; sockunion_t *gsa, *ssa; struct ifnet *ifp; struct in6_mfilter *imf; struct ip6_moptions *imo; struct in6_multi *inm; struct in6_msource *lims; size_t idx; int error, is_new; SLIST_INIT(&inmh); ifp = NULL; imf = NULL; lims = NULL; error = 0; is_new = 0; memset(&gsr, 0, sizeof(struct group_source_req)); gsa = (sockunion_t *)&gsr.gsr_group; gsa->ss.ss_family = AF_UNSPEC; ssa = (sockunion_t *)&gsr.gsr_source; ssa->ss.ss_family = AF_UNSPEC; /* * Chew everything into struct group_source_req. * Overwrite the port field if present, as the sockaddr * being copied in may be matched with a binary comparison. * Ignore passed-in scope ID. */ switch (sopt->sopt_name) { case IPV6_JOIN_GROUP: { struct ipv6_mreq mreq; error = sooptcopyin(sopt, &mreq, sizeof(struct ipv6_mreq), sizeof(struct ipv6_mreq)); if (error) return (error); gsa->sin6.sin6_family = AF_INET6; gsa->sin6.sin6_len = sizeof(struct sockaddr_in6); gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr; if (mreq.ipv6mr_interface == 0) { ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6); } else { if (V_if_index < mreq.ipv6mr_interface) return (EADDRNOTAVAIL); ifp = ifnet_byindex(mreq.ipv6mr_interface); } CTR3(KTR_MLD, "%s: ipv6mr_interface = %d, ifp = %p", __func__, mreq.ipv6mr_interface, ifp); } break; case MCAST_JOIN_GROUP: case MCAST_JOIN_SOURCE_GROUP: if (sopt->sopt_name == MCAST_JOIN_GROUP) { error = sooptcopyin(sopt, &gsr, sizeof(struct group_req), sizeof(struct group_req)); } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { error = sooptcopyin(sopt, &gsr, sizeof(struct group_source_req), sizeof(struct group_source_req)); } if (error) return (error); if (gsa->sin6.sin6_family != AF_INET6 || gsa->sin6.sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { if (ssa->sin6.sin6_family != AF_INET6 || ssa->sin6.sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr)) return (EINVAL); /* * TODO: Validate embedded scope ID in source * list entry against passed-in ifp, if and only * if source list filter entry is iface or node local. */ in6_clearscope(&ssa->sin6.sin6_addr); ssa->sin6.sin6_port = 0; ssa->sin6.sin6_scope_id = 0; } if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) return (EADDRNOTAVAIL); ifp = ifnet_byindex(gsr.gsr_interface); break; default: CTR2(KTR_MLD, "%s: unknown sopt_name %d", __func__, sopt->sopt_name); return (EOPNOTSUPP); break; } if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr)) return (EINVAL); if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) return (EADDRNOTAVAIL); gsa->sin6.sin6_port = 0; gsa->sin6.sin6_scope_id = 0; /* * Always set the scope zone ID on memberships created from userland. * Use the passed-in ifp to do this. * XXX The in6_setscope() return value is meaningless. * XXX SCOPE6_LOCK() is taken by in6_setscope(). */ (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL); imo = in6p_findmoptions(inp); idx = im6o_match_group(imo, ifp, &gsa->sa); if (idx == -1) { is_new = 1; } else { inm = imo->im6o_membership[idx]; imf = &imo->im6o_mfilters[idx]; if (ssa->ss.ss_family != AF_UNSPEC) { /* * MCAST_JOIN_SOURCE_GROUP on an exclusive membership * is an error. On an existing inclusive membership, * it just adds the source to the filter list. */ if (imf->im6f_st[1] != MCAST_INCLUDE) { error = EINVAL; goto out_in6p_locked; } /* * Throw out duplicates. * * XXX FIXME: This makes a naive assumption that * even if entries exist for *ssa in this imf, * they will be rejected as dupes, even if they * are not valid in the current mode (in-mode). * * in6_msource is transactioned just as for anything * else in SSM -- but note naive use of in6m_graft() * below for allocating new filter entries. * * This is only an issue if someone mixes the * full-state SSM API with the delta-based API, * which is discouraged in the relevant RFCs. */ lims = im6o_match_source(imo, idx, &ssa->sa); if (lims != NULL /*&& lims->im6sl_st[1] == MCAST_INCLUDE*/) { error = EADDRNOTAVAIL; goto out_in6p_locked; } } else { /* * MCAST_JOIN_GROUP alone, on any existing membership, * is rejected, to stop the same inpcb tying up * multiple refs to the in_multi. * On an existing inclusive membership, this is also * an error; if you want to change filter mode, * you must use the userland API setsourcefilter(). * XXX We don't reject this for imf in UNDEFINED * state at t1, because allocation of a filter * is atomic with allocation of a membership. */ error = EINVAL; goto out_in6p_locked; } } /* * Begin state merge transaction at socket layer. */ INP_WLOCK_ASSERT(inp); if (is_new) { if (imo->im6o_num_memberships == imo->im6o_max_memberships) { error = im6o_grow(imo); if (error) goto out_in6p_locked; } /* * Allocate the new slot upfront so we can deal with * grafting the new source filter in same code path * as for join-source on existing membership. */ idx = imo->im6o_num_memberships; imo->im6o_membership[idx] = NULL; imo->im6o_num_memberships++; KASSERT(imo->im6o_mfilters != NULL, ("%s: im6f_mfilters vector was not allocated", __func__)); imf = &imo->im6o_mfilters[idx]; KASSERT(RB_EMPTY(&imf->im6f_sources), ("%s: im6f_sources not empty", __func__)); } /* * Graft new source into filter list for this inpcb's * membership of the group. The in6_multi may not have * been allocated yet if this is a new membership, however, * the in_mfilter slot will be allocated and must be initialized. * * Note: Grafting of exclusive mode filters doesn't happen * in this path. * XXX: Should check for non-NULL lims (node exists but may * not be in-mode) for interop with full-state API. */ if (ssa->ss.ss_family != AF_UNSPEC) { /* Membership starts in IN mode */ if (is_new) { CTR1(KTR_MLD, "%s: new join w/source", __func__); im6f_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE); } else { CTR2(KTR_MLD, "%s: %s source", __func__, "allow"); } lims = im6f_graft(imf, MCAST_INCLUDE, &ssa->sin6); if (lims == NULL) { CTR1(KTR_MLD, "%s: merge imf state failed", __func__); error = ENOMEM; goto out_im6o_free; } } else { /* No address specified; Membership starts in EX mode */ if (is_new) { CTR1(KTR_MLD, "%s: new join w/o source", __func__); im6f_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE); } } /* * Begin state merge transaction at MLD layer. */ in_pcbref(inp); INP_WUNLOCK(inp); IN6_MULTI_LOCK(); if (is_new) { error = in6_joingroup_locked(ifp, &gsa->sin6.sin6_addr, imf, &inm, 0); if (error) { IN6_MULTI_UNLOCK(); goto out_im6o_free; } /* * NOTE: Refcount from in6_joingroup_locked() * is protecting membership. */ imo->im6o_membership[idx] = inm; } else { CTR1(KTR_MLD, "%s: merge inm state", __func__); IN6_MULTI_LIST_LOCK(); error = in6m_merge(inm, imf); if (error) CTR1(KTR_MLD, "%s: failed to merge inm state", __func__); else { CTR1(KTR_MLD, "%s: doing mld downcall", __func__); error = mld_change_state(inm, 0); if (error) CTR1(KTR_MLD, "%s: failed mld downcall", __func__); } IN6_MULTI_LIST_UNLOCK(); } IN6_MULTI_UNLOCK(); INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) return (ENXIO); if (error) { im6f_rollback(imf); if (is_new) im6f_purge(imf); else im6f_reap(imf); } else { im6f_commit(imf); } out_im6o_free: if (error && is_new) { inm = imo->im6o_membership[idx]; if (inm != NULL) { IN6_MULTI_LIST_LOCK(); in6m_rele_locked(&inmh, inm); IN6_MULTI_LIST_UNLOCK(); } imo->im6o_membership[idx] = NULL; --imo->im6o_num_memberships; } out_in6p_locked: INP_WUNLOCK(inp); in6m_release_list_deferred(&inmh); return (error); } /* * Leave an IPv6 multicast group on an inpcb, possibly with a source. */ static int in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) { struct ipv6_mreq mreq; struct group_source_req gsr; sockunion_t *gsa, *ssa; struct ifnet *ifp; struct in6_mfilter *imf; struct ip6_moptions *imo; struct in6_msource *ims; struct in6_multi *inm; uint32_t ifindex; size_t idx; int error, is_final; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif ifp = NULL; ifindex = 0; error = 0; is_final = 1; memset(&gsr, 0, sizeof(struct group_source_req)); gsa = (sockunion_t *)&gsr.gsr_group; gsa->ss.ss_family = AF_UNSPEC; ssa = (sockunion_t *)&gsr.gsr_source; ssa->ss.ss_family = AF_UNSPEC; /* * Chew everything passed in up into a struct group_source_req * as that is easier to process. * Note: Any embedded scope ID in the multicast group passed * in by userland is ignored, the interface index is the recommended * mechanism to specify an interface; see below. */ switch (sopt->sopt_name) { case IPV6_LEAVE_GROUP: error = sooptcopyin(sopt, &mreq, sizeof(struct ipv6_mreq), sizeof(struct ipv6_mreq)); if (error) return (error); gsa->sin6.sin6_family = AF_INET6; gsa->sin6.sin6_len = sizeof(struct sockaddr_in6); gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr; gsa->sin6.sin6_port = 0; gsa->sin6.sin6_scope_id = 0; ifindex = mreq.ipv6mr_interface; break; case MCAST_LEAVE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: if (sopt->sopt_name == MCAST_LEAVE_GROUP) { error = sooptcopyin(sopt, &gsr, sizeof(struct group_req), sizeof(struct group_req)); } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { error = sooptcopyin(sopt, &gsr, sizeof(struct group_source_req), sizeof(struct group_source_req)); } if (error) return (error); if (gsa->sin6.sin6_family != AF_INET6 || gsa->sin6.sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { if (ssa->sin6.sin6_family != AF_INET6 || ssa->sin6.sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr)) return (EINVAL); /* * TODO: Validate embedded scope ID in source * list entry against passed-in ifp, if and only * if source list filter entry is iface or node local. */ in6_clearscope(&ssa->sin6.sin6_addr); } gsa->sin6.sin6_port = 0; gsa->sin6.sin6_scope_id = 0; ifindex = gsr.gsr_interface; break; default: CTR2(KTR_MLD, "%s: unknown sopt_name %d", __func__, sopt->sopt_name); return (EOPNOTSUPP); break; } if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr)) return (EINVAL); /* * Validate interface index if provided. If no interface index * was provided separately, attempt to look the membership up * from the default scope as a last resort to disambiguate * the membership we are being asked to leave. * XXX SCOPE6 lock potentially taken here. */ if (ifindex != 0) { if (V_if_index < ifindex) return (EADDRNOTAVAIL); ifp = ifnet_byindex(ifindex); if (ifp == NULL) return (EADDRNOTAVAIL); (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL); } else { error = sa6_embedscope(&gsa->sin6, V_ip6_use_defzone); if (error) return (EADDRNOTAVAIL); /* * Some badly behaved applications don't pass an ifindex * or a scope ID, which is an API violation. In this case, * perform a lookup as per a v6 join. * * XXX For now, stomp on zone ID for the corner case. * This is not the 'KAME way', but we need to see the ifp * directly until such time as this implementation is * refactored, assuming the scope IDs are the way to go. */ ifindex = ntohs(gsa->sin6.sin6_addr.s6_addr16[1]); if (ifindex == 0) { CTR2(KTR_MLD, "%s: warning: no ifindex, looking up " "ifp for group %s.", __func__, ip6_sprintf(ip6tbuf, &gsa->sin6.sin6_addr)); ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6); } else { ifp = ifnet_byindex(ifindex); } if (ifp == NULL) return (EADDRNOTAVAIL); } CTR2(KTR_MLD, "%s: ifp = %p", __func__, ifp); KASSERT(ifp != NULL, ("%s: ifp did not resolve", __func__)); /* * Find the membership in the membership array. */ imo = in6p_findmoptions(inp); idx = im6o_match_group(imo, ifp, &gsa->sa); if (idx == -1) { error = EADDRNOTAVAIL; goto out_in6p_locked; } inm = imo->im6o_membership[idx]; imf = &imo->im6o_mfilters[idx]; if (ssa->ss.ss_family != AF_UNSPEC) is_final = 0; /* * Begin state merge transaction at socket layer. */ INP_WLOCK_ASSERT(inp); /* * If we were instructed only to leave a given source, do so. * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. */ if (is_final) { im6f_leave(imf); } else { if (imf->im6f_st[0] == MCAST_EXCLUDE) { error = EADDRNOTAVAIL; goto out_in6p_locked; } ims = im6o_match_source(imo, idx, &ssa->sa); if (ims == NULL) { CTR3(KTR_MLD, "%s: source %p %spresent", __func__, ip6_sprintf(ip6tbuf, &ssa->sin6.sin6_addr), "not "); error = EADDRNOTAVAIL; goto out_in6p_locked; } CTR2(KTR_MLD, "%s: %s source", __func__, "block"); error = im6f_prune(imf, &ssa->sin6); if (error) { CTR1(KTR_MLD, "%s: merge imf state failed", __func__); goto out_in6p_locked; } } /* * Begin state merge transaction at MLD layer. */ in_pcbref(inp); INP_WUNLOCK(inp); IN6_MULTI_LOCK(); if (is_final) { /* * Give up the multicast address record to which * the membership points. */ (void)in6_leavegroup_locked(inm, imf); } else { CTR1(KTR_MLD, "%s: merge inm state", __func__); IN6_MULTI_LIST_LOCK(); error = in6m_merge(inm, imf); if (error) CTR1(KTR_MLD, "%s: failed to merge inm state", __func__); else { CTR1(KTR_MLD, "%s: doing mld downcall", __func__); error = mld_change_state(inm, 0); if (error) CTR1(KTR_MLD, "%s: failed mld downcall", __func__); } IN6_MULTI_LIST_UNLOCK(); } IN6_MULTI_UNLOCK(); INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) return (ENXIO); if (error) im6f_rollback(imf); else im6f_commit(imf); im6f_reap(imf); if (is_final) { /* Remove the gap in the membership array. */ for (++idx; idx < imo->im6o_num_memberships; ++idx) { imo->im6o_membership[idx-1] = imo->im6o_membership[idx]; imo->im6o_mfilters[idx-1] = imo->im6o_mfilters[idx]; } imo->im6o_num_memberships--; } out_in6p_locked: INP_WUNLOCK(inp); return (error); } /* * Select the interface for transmitting IPv6 multicast datagrams. * * Either an instance of struct in6_addr or an instance of struct ipv6_mreqn * may be passed to this socket option. An address of in6addr_any or an * interface index of 0 is used to remove a previous selection. * When no interface is selected, one is chosen for every send. */ static int in6p_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) { struct ifnet *ifp; struct ip6_moptions *imo; u_int ifindex; int error; if (sopt->sopt_valsize != sizeof(u_int)) return (EINVAL); error = sooptcopyin(sopt, &ifindex, sizeof(u_int), sizeof(u_int)); if (error) return (error); if (V_if_index < ifindex) return (EINVAL); if (ifindex == 0) ifp = NULL; else { ifp = ifnet_byindex(ifindex); if (ifp == NULL) return (EINVAL); if ((ifp->if_flags & IFF_MULTICAST) == 0) return (EADDRNOTAVAIL); } imo = in6p_findmoptions(inp); imo->im6o_multicast_ifp = ifp; INP_WUNLOCK(inp); return (0); } /* * Atomically set source filters on a socket for an IPv6 multicast group. * * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. */ static int in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt) { struct __msfilterreq msfr; sockunion_t *gsa; struct ifnet *ifp; struct in6_mfilter *imf; struct ip6_moptions *imo; struct in6_multi *inm; size_t idx; int error; error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), sizeof(struct __msfilterreq)); if (error) return (error); if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc) return (ENOBUFS); if (msfr.msfr_fmode != MCAST_EXCLUDE && msfr.msfr_fmode != MCAST_INCLUDE) return (EINVAL); if (msfr.msfr_group.ss_family != AF_INET6 || msfr.msfr_group.ss_len != sizeof(struct sockaddr_in6)) return (EINVAL); gsa = (sockunion_t *)&msfr.msfr_group; if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr)) return (EINVAL); gsa->sin6.sin6_port = 0; /* ignore port */ if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) return (EADDRNOTAVAIL); ifp = ifnet_byindex(msfr.msfr_ifindex); if (ifp == NULL) return (EADDRNOTAVAIL); (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL); /* * Take the INP write lock. * Check if this socket is a member of this group. */ imo = in6p_findmoptions(inp); idx = im6o_match_group(imo, ifp, &gsa->sa); if (idx == -1 || imo->im6o_mfilters == NULL) { error = EADDRNOTAVAIL; goto out_in6p_locked; } inm = imo->im6o_membership[idx]; imf = &imo->im6o_mfilters[idx]; /* * Begin state merge transaction at socket layer. */ INP_WLOCK_ASSERT(inp); imf->im6f_st[1] = msfr.msfr_fmode; /* * Apply any new source filters, if present. * Make a copy of the user-space source vector so * that we may copy them with a single copyin. This * allows us to deal with page faults up-front. */ if (msfr.msfr_nsrcs > 0) { struct in6_msource *lims; struct sockaddr_in6 *psin; struct sockaddr_storage *kss, *pkss; int i; INP_WUNLOCK(inp); CTR2(KTR_MLD, "%s: loading %lu source list entries", __func__, (unsigned long)msfr.msfr_nsrcs); kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, M_TEMP, M_WAITOK); error = copyin(msfr.msfr_srcs, kss, sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); if (error) { free(kss, M_TEMP); return (error); } INP_WLOCK(inp); /* * Mark all source filters as UNDEFINED at t1. * Restore new group filter mode, as im6f_leave() * will set it to INCLUDE. */ im6f_leave(imf); imf->im6f_st[1] = msfr.msfr_fmode; /* * Update socket layer filters at t1, lazy-allocating * new entries. This saves a bunch of memory at the * cost of one RB_FIND() per source entry; duplicate * entries in the msfr_nsrcs vector are ignored. * If we encounter an error, rollback transaction. * * XXX This too could be replaced with a set-symmetric * difference like loop to avoid walking from root * every time, as the key space is common. */ for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) { psin = (struct sockaddr_in6 *)pkss; if (psin->sin6_family != AF_INET6) { error = EAFNOSUPPORT; break; } if (psin->sin6_len != sizeof(struct sockaddr_in6)) { error = EINVAL; break; } if (IN6_IS_ADDR_MULTICAST(&psin->sin6_addr)) { error = EINVAL; break; } /* * TODO: Validate embedded scope ID in source * list entry against passed-in ifp, if and only * if source list filter entry is iface or node local. */ in6_clearscope(&psin->sin6_addr); error = im6f_get_source(imf, psin, &lims); if (error) break; lims->im6sl_st[1] = imf->im6f_st[1]; } free(kss, M_TEMP); } if (error) goto out_im6f_rollback; INP_WLOCK_ASSERT(inp); IN6_MULTI_LIST_LOCK(); /* * Begin state merge transaction at MLD layer. */ CTR1(KTR_MLD, "%s: merge inm state", __func__); error = in6m_merge(inm, imf); if (error) CTR1(KTR_MLD, "%s: failed to merge inm state", __func__); else { CTR1(KTR_MLD, "%s: doing mld downcall", __func__); error = mld_change_state(inm, 0); if (error) CTR1(KTR_MLD, "%s: failed mld downcall", __func__); } IN6_MULTI_LIST_UNLOCK(); out_im6f_rollback: if (error) im6f_rollback(imf); else im6f_commit(imf); im6f_reap(imf); out_in6p_locked: INP_WUNLOCK(inp); return (error); } /* * Set the IP multicast options in response to user setsockopt(). * * Many of the socket options handled in this function duplicate the * functionality of socket options in the regular unicast API. However, * it is not possible to merge the duplicate code, because the idempotence * of the IPv6 multicast part of the BSD Sockets API must be preserved; * the effects of these options must be treated as separate and distinct. * * SMPng: XXX: Unlocked read of inp_socket believed OK. */ int ip6_setmoptions(struct inpcb *inp, struct sockopt *sopt) { struct ip6_moptions *im6o; int error; error = 0; /* * If socket is neither of type SOCK_RAW or SOCK_DGRAM, * or is a divert socket, reject it. */ if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || (inp->inp_socket->so_proto->pr_type != SOCK_RAW && inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) return (EOPNOTSUPP); switch (sopt->sopt_name) { case IPV6_MULTICAST_IF: error = in6p_set_multicast_if(inp, sopt); break; case IPV6_MULTICAST_HOPS: { int hlim; if (sopt->sopt_valsize != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &hlim, sizeof(hlim), sizeof(int)); if (error) break; if (hlim < -1 || hlim > 255) { error = EINVAL; break; } else if (hlim == -1) { hlim = V_ip6_defmcasthlim; } im6o = in6p_findmoptions(inp); im6o->im6o_multicast_hlim = hlim; INP_WUNLOCK(inp); break; } case IPV6_MULTICAST_LOOP: { u_int loop; /* * Set the loopback flag for outgoing multicast packets. * Must be zero or one. */ if (sopt->sopt_valsize != sizeof(u_int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &loop, sizeof(u_int), sizeof(u_int)); if (error) break; if (loop > 1) { error = EINVAL; break; } im6o = in6p_findmoptions(inp); im6o->im6o_multicast_loop = loop; INP_WUNLOCK(inp); break; } case IPV6_JOIN_GROUP: case MCAST_JOIN_GROUP: case MCAST_JOIN_SOURCE_GROUP: error = in6p_join_group(inp, sopt); break; case IPV6_LEAVE_GROUP: case MCAST_LEAVE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: error = in6p_leave_group(inp, sopt); break; case MCAST_BLOCK_SOURCE: case MCAST_UNBLOCK_SOURCE: error = in6p_block_unblock_source(inp, sopt); break; case IPV6_MSFILTER: error = in6p_set_source_filters(inp, sopt); break; default: error = EOPNOTSUPP; break; } INP_UNLOCK_ASSERT(inp); return (error); } /* * Expose MLD's multicast filter mode and source list(s) to userland, * keyed by (ifindex, group). * The filter mode is written out as a uint32_t, followed by * 0..n of struct in6_addr. * For use by ifmcstat(8). * SMPng: NOTE: unlocked read of ifindex space. */ static int sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS) { struct in6_addr mcaddr; struct in6_addr src; struct ifnet *ifp; struct ifmultiaddr *ifma; struct in6_multi *inm; struct ip6_msource *ims; int *name; int retval; u_int namelen; uint32_t fmode, ifindex; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif name = (int *)arg1; namelen = arg2; if (req->newptr != NULL) return (EPERM); /* int: ifindex + 4 * 32 bits of IPv6 address */ if (namelen != 5) return (EINVAL); ifindex = name[0]; if (ifindex <= 0 || ifindex > V_if_index) { CTR2(KTR_MLD, "%s: ifindex %u out of range", __func__, ifindex); return (ENOENT); } memcpy(&mcaddr, &name[1], sizeof(struct in6_addr)); if (!IN6_IS_ADDR_MULTICAST(&mcaddr)) { CTR2(KTR_MLD, "%s: group %s is not multicast", __func__, ip6_sprintf(ip6tbuf, &mcaddr)); return (EINVAL); } ifp = ifnet_byindex(ifindex); if (ifp == NULL) { CTR2(KTR_MLD, "%s: no ifp for ifindex %u", __func__, ifindex); return (ENOENT); } /* * Internal MLD lookups require that scope/zone ID is set. */ (void)in6_setscope(&mcaddr, ifp, NULL); retval = sysctl_wire_old_buffer(req, sizeof(uint32_t) + (in6_mcast_maxgrpsrc * sizeof(struct in6_addr))); if (retval) return (retval); IN6_MULTI_LOCK(); IN6_MULTI_LIST_LOCK(); IF_ADDR_RLOCK(ifp); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { inm = in6m_ifmultiaddr_get_inm(ifma); if (inm == NULL) continue; if (!IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, &mcaddr)) continue; fmode = inm->in6m_st[1].iss_fmode; retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t)); if (retval != 0) break; RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) { CTR2(KTR_MLD, "%s: visit node %p", __func__, ims); /* * Only copy-out sources which are in-mode. */ if (fmode != im6s_get_mode(inm, ims, 1)) { CTR1(KTR_MLD, "%s: skip non-in-mode", __func__); continue; } src = ims->im6s_addr; retval = SYSCTL_OUT(req, &src, sizeof(struct in6_addr)); if (retval != 0) break; } } IF_ADDR_RUNLOCK(ifp); IN6_MULTI_LIST_UNLOCK(); IN6_MULTI_UNLOCK(); return (retval); } #ifdef KTR static const char *in6m_modestrs[] = { "un", "in", "ex" }; static const char * in6m_mode_str(const int mode) { if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE) return (in6m_modestrs[mode]); return ("??"); } static const char *in6m_statestrs[] = { "not-member", "silent", "idle", "lazy", "sleeping", "awakening", "query-pending", "sg-query-pending", "leaving" }; static const char * in6m_state_str(const int state) { if (state >= MLD_NOT_MEMBER && state <= MLD_LEAVING_MEMBER) return (in6m_statestrs[state]); return ("??"); } /* * Dump an in6_multi structure to the console. */ void in6m_print(const struct in6_multi *inm) { int t; char ip6tbuf[INET6_ADDRSTRLEN]; if ((ktr_mask & KTR_MLD) == 0) return; printf("%s: --- begin in6m %p ---\n", __func__, inm); printf("addr %s ifp %p(%s) ifma %p\n", ip6_sprintf(ip6tbuf, &inm->in6m_addr), inm->in6m_ifp, if_name(inm->in6m_ifp), inm->in6m_ifma); printf("timer %u state %s refcount %u scq.len %u\n", inm->in6m_timer, in6m_state_str(inm->in6m_state), inm->in6m_refcount, mbufq_len(&inm->in6m_scq)); printf("mli %p nsrc %lu sctimer %u scrv %u\n", inm->in6m_mli, inm->in6m_nsrc, inm->in6m_sctimer, inm->in6m_scrv); for (t = 0; t < 2; t++) { printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t, in6m_mode_str(inm->in6m_st[t].iss_fmode), inm->in6m_st[t].iss_asm, inm->in6m_st[t].iss_ex, inm->in6m_st[t].iss_in, inm->in6m_st[t].iss_rec); } printf("%s: --- end in6m %p ---\n", __func__, inm); } #else /* !KTR */ void in6m_print(const struct in6_multi *inm) { } #endif /* KTR */ Index: stable/12/sys/netinet6/in6_var.h =================================================================== --- stable/12/sys/netinet6/in6_var.h (revision 343650) +++ stable/12/sys/netinet6/in6_var.h (revision 343651) @@ -1,875 +1,873 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in6_var.h,v 1.56 2001/03/29 05:34:31 itojun Exp $ */ /*- * Copyright (c) 1985, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_var.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NETINET6_IN6_VAR_H_ #define _NETINET6_IN6_VAR_H_ #include #include #ifdef _KERNEL #include #include #endif /* * Interface address, Internet version. One of these structures * is allocated for each interface with an Internet address. * The ifaddr structure contains the protocol-independent part * of the structure and is assumed to be first. */ /* * pltime/vltime are just for future reference (required to implements 2 * hour rule for hosts). they should never be modified by nd6_timeout or * anywhere else. * userland -> kernel: accept pltime/vltime * kernel -> userland: throw up everything * in kernel: modify preferred/expire only */ struct in6_addrlifetime { time_t ia6t_expire; /* valid lifetime expiration time */ time_t ia6t_preferred; /* preferred lifetime expiration time */ u_int32_t ia6t_vltime; /* valid lifetime */ u_int32_t ia6t_pltime; /* prefix lifetime */ }; struct nd_ifinfo; struct scope6_id; struct lltable; struct mld_ifsoftc; struct in6_multi; struct in6_ifextra { counter_u64_t *in6_ifstat; counter_u64_t *icmp6_ifstat; struct nd_ifinfo *nd_ifinfo; struct scope6_id *scope6_id; struct lltable *lltable; struct mld_ifsoftc *mld_ifinfo; }; #define LLTABLE6(ifp) (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->lltable) #ifdef _KERNEL SLIST_HEAD(in6_multi_head, in6_multi); MALLOC_DECLARE(M_IP6MADDR); struct in6_ifaddr { struct ifaddr ia_ifa; /* protocol-independent info */ #define ia_ifp ia_ifa.ifa_ifp #define ia_flags ia_ifa.ifa_flags struct sockaddr_in6 ia_addr; /* interface address */ struct sockaddr_in6 ia_net; /* network number of interface */ struct sockaddr_in6 ia_dstaddr; /* space for destination addr */ struct sockaddr_in6 ia_prefixmask; /* prefix mask */ u_int32_t ia_plen; /* prefix length */ CK_STAILQ_ENTRY(in6_ifaddr) ia_link; /* list of IPv6 addresses */ int ia6_flags; struct in6_addrlifetime ia6_lifetime; time_t ia6_createtime; /* the creation time of this address, which is * currently used for temporary addresses only. */ time_t ia6_updatetime; /* back pointer to the ND prefix (for autoconfigured addresses only) */ struct nd_prefix *ia6_ndpr; /* multicast addresses joined from the kernel */ LIST_HEAD(, in6_multi_mship) ia6_memberships; /* entry in bucket of inet6 addresses */ CK_LIST_ENTRY(in6_ifaddr) ia6_hash; }; /* List of in6_ifaddr's. */ CK_STAILQ_HEAD(in6_ifaddrhead, in6_ifaddr); CK_LIST_HEAD(in6_ifaddrlisthead, in6_ifaddr); #endif /* _KERNEL */ /* control structure to manage address selection policy */ struct in6_addrpolicy { struct sockaddr_in6 addr; /* prefix address */ struct sockaddr_in6 addrmask; /* prefix mask */ int preced; /* precedence */ int label; /* matching label */ u_quad_t use; /* statistics */ }; /* * IPv6 interface statistics, as defined in RFC2465 Ipv6IfStatsEntry (p12). */ struct in6_ifstat { uint64_t ifs6_in_receive; /* # of total input datagram */ uint64_t ifs6_in_hdrerr; /* # of datagrams with invalid hdr */ uint64_t ifs6_in_toobig; /* # of datagrams exceeded MTU */ uint64_t ifs6_in_noroute; /* # of datagrams with no route */ uint64_t ifs6_in_addrerr; /* # of datagrams with invalid dst */ uint64_t ifs6_in_protounknown; /* # of datagrams with unknown proto */ /* NOTE: increment on final dst if */ uint64_t ifs6_in_truncated; /* # of truncated datagrams */ uint64_t ifs6_in_discard; /* # of discarded datagrams */ /* NOTE: fragment timeout is not here */ uint64_t ifs6_in_deliver; /* # of datagrams delivered to ULP */ /* NOTE: increment on final dst if */ uint64_t ifs6_out_forward; /* # of datagrams forwarded */ /* NOTE: increment on outgoing if */ uint64_t ifs6_out_request; /* # of outgoing datagrams from ULP */ /* NOTE: does not include forwrads */ uint64_t ifs6_out_discard; /* # of discarded datagrams */ uint64_t ifs6_out_fragok; /* # of datagrams fragmented */ uint64_t ifs6_out_fragfail; /* # of datagrams failed on fragment */ uint64_t ifs6_out_fragcreat; /* # of fragment datagrams */ /* NOTE: this is # after fragment */ uint64_t ifs6_reass_reqd; /* # of incoming fragmented packets */ /* NOTE: increment on final dst if */ uint64_t ifs6_reass_ok; /* # of reassembled packets */ /* NOTE: this is # after reass */ /* NOTE: increment on final dst if */ uint64_t ifs6_reass_fail; /* # of reass failures */ /* NOTE: may not be packet count */ /* NOTE: increment on final dst if */ uint64_t ifs6_in_mcast; /* # of inbound multicast datagrams */ uint64_t ifs6_out_mcast; /* # of outbound multicast datagrams */ }; /* * ICMPv6 interface statistics, as defined in RFC2466 Ipv6IfIcmpEntry. * XXX: I'm not sure if this file is the right place for this structure... */ struct icmp6_ifstat { /* * Input statistics */ /* ipv6IfIcmpInMsgs, total # of input messages */ uint64_t ifs6_in_msg; /* ipv6IfIcmpInErrors, # of input error messages */ uint64_t ifs6_in_error; /* ipv6IfIcmpInDestUnreachs, # of input dest unreach errors */ uint64_t ifs6_in_dstunreach; /* ipv6IfIcmpInAdminProhibs, # of input administratively prohibited errs */ uint64_t ifs6_in_adminprohib; /* ipv6IfIcmpInTimeExcds, # of input time exceeded errors */ uint64_t ifs6_in_timeexceed; /* ipv6IfIcmpInParmProblems, # of input parameter problem errors */ uint64_t ifs6_in_paramprob; /* ipv6IfIcmpInPktTooBigs, # of input packet too big errors */ uint64_t ifs6_in_pkttoobig; /* ipv6IfIcmpInEchos, # of input echo requests */ uint64_t ifs6_in_echo; /* ipv6IfIcmpInEchoReplies, # of input echo replies */ uint64_t ifs6_in_echoreply; /* ipv6IfIcmpInRouterSolicits, # of input router solicitations */ uint64_t ifs6_in_routersolicit; /* ipv6IfIcmpInRouterAdvertisements, # of input router advertisements */ uint64_t ifs6_in_routeradvert; /* ipv6IfIcmpInNeighborSolicits, # of input neighbor solicitations */ uint64_t ifs6_in_neighborsolicit; /* ipv6IfIcmpInNeighborAdvertisements, # of input neighbor advertisements */ uint64_t ifs6_in_neighboradvert; /* ipv6IfIcmpInRedirects, # of input redirects */ uint64_t ifs6_in_redirect; /* ipv6IfIcmpInGroupMembQueries, # of input MLD queries */ uint64_t ifs6_in_mldquery; /* ipv6IfIcmpInGroupMembResponses, # of input MLD reports */ uint64_t ifs6_in_mldreport; /* ipv6IfIcmpInGroupMembReductions, # of input MLD done */ uint64_t ifs6_in_mlddone; /* * Output statistics. We should solve unresolved routing problem... */ /* ipv6IfIcmpOutMsgs, total # of output messages */ uint64_t ifs6_out_msg; /* ipv6IfIcmpOutErrors, # of output error messages */ uint64_t ifs6_out_error; /* ipv6IfIcmpOutDestUnreachs, # of output dest unreach errors */ uint64_t ifs6_out_dstunreach; /* ipv6IfIcmpOutAdminProhibs, # of output administratively prohibited errs */ uint64_t ifs6_out_adminprohib; /* ipv6IfIcmpOutTimeExcds, # of output time exceeded errors */ uint64_t ifs6_out_timeexceed; /* ipv6IfIcmpOutParmProblems, # of output parameter problem errors */ uint64_t ifs6_out_paramprob; /* ipv6IfIcmpOutPktTooBigs, # of output packet too big errors */ uint64_t ifs6_out_pkttoobig; /* ipv6IfIcmpOutEchos, # of output echo requests */ uint64_t ifs6_out_echo; /* ipv6IfIcmpOutEchoReplies, # of output echo replies */ uint64_t ifs6_out_echoreply; /* ipv6IfIcmpOutRouterSolicits, # of output router solicitations */ uint64_t ifs6_out_routersolicit; /* ipv6IfIcmpOutRouterAdvertisements, # of output router advertisements */ uint64_t ifs6_out_routeradvert; /* ipv6IfIcmpOutNeighborSolicits, # of output neighbor solicitations */ uint64_t ifs6_out_neighborsolicit; /* ipv6IfIcmpOutNeighborAdvertisements, # of output neighbor advertisements */ uint64_t ifs6_out_neighboradvert; /* ipv6IfIcmpOutRedirects, # of output redirects */ uint64_t ifs6_out_redirect; /* ipv6IfIcmpOutGroupMembQueries, # of output MLD queries */ uint64_t ifs6_out_mldquery; /* ipv6IfIcmpOutGroupMembResponses, # of output MLD reports */ uint64_t ifs6_out_mldreport; /* ipv6IfIcmpOutGroupMembReductions, # of output MLD done */ uint64_t ifs6_out_mlddone; }; struct in6_ifreq { char ifr_name[IFNAMSIZ]; union { struct sockaddr_in6 ifru_addr; struct sockaddr_in6 ifru_dstaddr; int ifru_flags; int ifru_flags6; int ifru_metric; caddr_t ifru_data; struct in6_addrlifetime ifru_lifetime; struct in6_ifstat ifru_stat; struct icmp6_ifstat ifru_icmp6stat; u_int32_t ifru_scope_id[16]; } ifr_ifru; }; struct in6_aliasreq { char ifra_name[IFNAMSIZ]; struct sockaddr_in6 ifra_addr; struct sockaddr_in6 ifra_dstaddr; struct sockaddr_in6 ifra_prefixmask; int ifra_flags; struct in6_addrlifetime ifra_lifetime; int ifra_vhid; }; /* pre-10.x compat */ struct oin6_aliasreq { char ifra_name[IFNAMSIZ]; struct sockaddr_in6 ifra_addr; struct sockaddr_in6 ifra_dstaddr; struct sockaddr_in6 ifra_prefixmask; int ifra_flags; struct in6_addrlifetime ifra_lifetime; }; /* prefix type macro */ #define IN6_PREFIX_ND 1 #define IN6_PREFIX_RR 2 /* * prefix related flags passed between kernel(NDP related part) and * user land command(ifconfig) and daemon(rtadvd). */ struct in6_prflags { struct prf_ra { u_char onlink : 1; u_char autonomous : 1; u_char reserved : 6; } prf_ra; u_char prf_reserved1; u_short prf_reserved2; /* want to put this on 4byte offset */ struct prf_rr { u_char decrvalid : 1; u_char decrprefd : 1; u_char reserved : 6; } prf_rr; u_char prf_reserved3; u_short prf_reserved4; }; struct in6_prefixreq { char ipr_name[IFNAMSIZ]; u_char ipr_origin; u_char ipr_plen; u_int32_t ipr_vltime; u_int32_t ipr_pltime; struct in6_prflags ipr_flags; struct sockaddr_in6 ipr_prefix; }; #define PR_ORIG_RA 0 #define PR_ORIG_RR 1 #define PR_ORIG_STATIC 2 #define PR_ORIG_KERNEL 3 #define ipr_raf_onlink ipr_flags.prf_ra.onlink #define ipr_raf_auto ipr_flags.prf_ra.autonomous #define ipr_statef_onlink ipr_flags.prf_state.onlink #define ipr_rrf_decrvalid ipr_flags.prf_rr.decrvalid #define ipr_rrf_decrprefd ipr_flags.prf_rr.decrprefd struct in6_rrenumreq { char irr_name[IFNAMSIZ]; u_char irr_origin; u_char irr_m_len; /* match len for matchprefix */ u_char irr_m_minlen; /* minlen for matching prefix */ u_char irr_m_maxlen; /* maxlen for matching prefix */ u_char irr_u_uselen; /* uselen for adding prefix */ u_char irr_u_keeplen; /* keeplen from matching prefix */ struct irr_raflagmask { u_char onlink : 1; u_char autonomous : 1; u_char reserved : 6; } irr_raflagmask; u_int32_t irr_vltime; u_int32_t irr_pltime; struct in6_prflags irr_flags; struct sockaddr_in6 irr_matchprefix; struct sockaddr_in6 irr_useprefix; }; #define irr_raf_mask_onlink irr_raflagmask.onlink #define irr_raf_mask_auto irr_raflagmask.autonomous #define irr_raf_mask_reserved irr_raflagmask.reserved #define irr_raf_onlink irr_flags.prf_ra.onlink #define irr_raf_auto irr_flags.prf_ra.autonomous #define irr_statef_onlink irr_flags.prf_state.onlink #define irr_rrf irr_flags.prf_rr #define irr_rrf_decrvalid irr_flags.prf_rr.decrvalid #define irr_rrf_decrprefd irr_flags.prf_rr.decrprefd /* * Given a pointer to an in6_ifaddr (ifaddr), * return a pointer to the addr as a sockaddr_in6 */ #define IA6_IN6(ia) (&((ia)->ia_addr.sin6_addr)) #define IA6_DSTIN6(ia) (&((ia)->ia_dstaddr.sin6_addr)) #define IA6_MASKIN6(ia) (&((ia)->ia_prefixmask.sin6_addr)) #define IA6_SIN6(ia) (&((ia)->ia_addr)) #define IA6_DSTSIN6(ia) (&((ia)->ia_dstaddr)) #define IFA_IN6(x) (&((struct sockaddr_in6 *)((x)->ifa_addr))->sin6_addr) #define IFA_DSTIN6(x) (&((struct sockaddr_in6 *)((x)->ifa_dstaddr))->sin6_addr) #define IFPR_IN6(x) (&((struct sockaddr_in6 *)((x)->ifpr_prefix))->sin6_addr) #ifdef _KERNEL #define IN6_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ (((d)->s6_addr32[0] ^ (a)->s6_addr32[0]) & (m)->s6_addr32[0]) == 0 && \ (((d)->s6_addr32[1] ^ (a)->s6_addr32[1]) & (m)->s6_addr32[1]) == 0 && \ (((d)->s6_addr32[2] ^ (a)->s6_addr32[2]) & (m)->s6_addr32[2]) == 0 && \ (((d)->s6_addr32[3] ^ (a)->s6_addr32[3]) & (m)->s6_addr32[3]) == 0 ) #define IN6_MASK_ADDR(a, m) do { \ (a)->s6_addr32[0] &= (m)->s6_addr32[0]; \ (a)->s6_addr32[1] &= (m)->s6_addr32[1]; \ (a)->s6_addr32[2] &= (m)->s6_addr32[2]; \ (a)->s6_addr32[3] &= (m)->s6_addr32[3]; \ } while (0) #endif #define SIOCSIFADDR_IN6 _IOW('i', 12, struct in6_ifreq) #define SIOCGIFADDR_IN6 _IOWR('i', 33, struct in6_ifreq) #ifdef _KERNEL /* * SIOCSxxx ioctls should be unused (see comments in in6.c), but * we do not shift numbers for binary compatibility. */ #define SIOCSIFDSTADDR_IN6 _IOW('i', 14, struct in6_ifreq) #define SIOCSIFNETMASK_IN6 _IOW('i', 22, struct in6_ifreq) #endif #define SIOCGIFDSTADDR_IN6 _IOWR('i', 34, struct in6_ifreq) #define SIOCGIFNETMASK_IN6 _IOWR('i', 37, struct in6_ifreq) #define SIOCDIFADDR_IN6 _IOW('i', 25, struct in6_ifreq) #define OSIOCAIFADDR_IN6 _IOW('i', 26, struct oin6_aliasreq) #define SIOCAIFADDR_IN6 _IOW('i', 27, struct in6_aliasreq) #define SIOCSIFPHYADDR_IN6 _IOW('i', 70, struct in6_aliasreq) #define SIOCGIFPSRCADDR_IN6 _IOWR('i', 71, struct in6_ifreq) #define SIOCGIFPDSTADDR_IN6 _IOWR('i', 72, struct in6_ifreq) #define SIOCGIFAFLAG_IN6 _IOWR('i', 73, struct in6_ifreq) #ifdef _KERNEL #define OSIOCGIFINFO_IN6 _IOWR('i', 76, struct in6_ondireq) #endif #define SIOCGIFINFO_IN6 _IOWR('i', 108, struct in6_ndireq) #define SIOCSIFINFO_IN6 _IOWR('i', 109, struct in6_ndireq) #define SIOCSNDFLUSH_IN6 _IOWR('i', 77, struct in6_ifreq) #define SIOCGNBRINFO_IN6 _IOWR('i', 78, struct in6_nbrinfo) #define SIOCSPFXFLUSH_IN6 _IOWR('i', 79, struct in6_ifreq) #define SIOCSRTRFLUSH_IN6 _IOWR('i', 80, struct in6_ifreq) #define SIOCGIFALIFETIME_IN6 _IOWR('i', 81, struct in6_ifreq) #define SIOCGIFSTAT_IN6 _IOWR('i', 83, struct in6_ifreq) #define SIOCGIFSTAT_ICMP6 _IOWR('i', 84, struct in6_ifreq) #define SIOCSDEFIFACE_IN6 _IOWR('i', 85, struct in6_ndifreq) #define SIOCGDEFIFACE_IN6 _IOWR('i', 86, struct in6_ndifreq) #define SIOCSIFINFO_FLAGS _IOWR('i', 87, struct in6_ndireq) /* XXX */ #define SIOCSSCOPE6 _IOW('i', 88, struct in6_ifreq) #define SIOCGSCOPE6 _IOWR('i', 89, struct in6_ifreq) #define SIOCGSCOPE6DEF _IOWR('i', 90, struct in6_ifreq) #define SIOCSIFPREFIX_IN6 _IOW('i', 100, struct in6_prefixreq) /* set */ #define SIOCGIFPREFIX_IN6 _IOWR('i', 101, struct in6_prefixreq) /* get */ #define SIOCDIFPREFIX_IN6 _IOW('i', 102, struct in6_prefixreq) /* del */ #define SIOCAIFPREFIX_IN6 _IOW('i', 103, struct in6_rrenumreq) /* add */ #define SIOCCIFPREFIX_IN6 _IOW('i', 104, \ struct in6_rrenumreq) /* change */ #define SIOCSGIFPREFIX_IN6 _IOW('i', 105, \ struct in6_rrenumreq) /* set global */ #define SIOCGETSGCNT_IN6 _IOWR('u', 106, \ struct sioc_sg_req6) /* get s,g pkt cnt */ #define SIOCGETMIFCNT_IN6 _IOWR('u', 107, \ struct sioc_mif_req6) /* get pkt cnt per if */ #define SIOCAADDRCTL_POLICY _IOW('u', 108, struct in6_addrpolicy) #define SIOCDADDRCTL_POLICY _IOW('u', 109, struct in6_addrpolicy) #define IN6_IFF_ANYCAST 0x01 /* anycast address */ #define IN6_IFF_TENTATIVE 0x02 /* tentative address */ #define IN6_IFF_DUPLICATED 0x04 /* DAD detected duplicate */ #define IN6_IFF_DETACHED 0x08 /* may be detached from the link */ #define IN6_IFF_DEPRECATED 0x10 /* deprecated address */ #define IN6_IFF_NODAD 0x20 /* don't perform DAD on this address * (obsolete) */ #define IN6_IFF_AUTOCONF 0x40 /* autoconfigurable address. */ #define IN6_IFF_TEMPORARY 0x80 /* temporary (anonymous) address. */ #define IN6_IFF_PREFER_SOURCE 0x0100 /* preferred address for SAS */ /* do not input/output */ #define IN6_IFF_NOTREADY (IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED) #ifdef _KERNEL #define IN6_ARE_SCOPE_CMP(a,b) ((a)-(b)) #define IN6_ARE_SCOPE_EQUAL(a,b) ((a)==(b)) #endif #ifdef _KERNEL VNET_DECLARE(struct in6_ifaddrhead, in6_ifaddrhead); VNET_DECLARE(struct in6_ifaddrlisthead *, in6_ifaddrhashtbl); VNET_DECLARE(u_long, in6_ifaddrhmask); #define V_in6_ifaddrhead VNET(in6_ifaddrhead) #define V_in6_ifaddrhashtbl VNET(in6_ifaddrhashtbl) #define V_in6_ifaddrhmask VNET(in6_ifaddrhmask) #define IN6ADDR_NHASH_LOG2 8 #define IN6ADDR_NHASH (1 << IN6ADDR_NHASH_LOG2) #define IN6ADDR_HASHVAL(x) (in6_addrhash(x)) #define IN6ADDR_HASH(x) \ (&V_in6_ifaddrhashtbl[IN6ADDR_HASHVAL(x) & V_in6_ifaddrhmask]) static __inline uint32_t in6_addrhash(const struct in6_addr *in6) { uint32_t x; x = in6->s6_addr32[0] ^ in6->s6_addr32[1] ^ in6->s6_addr32[2] ^ in6->s6_addr32[3]; return (fnv_32_buf(&x, sizeof(x), FNV1_32_INIT)); } extern struct rmlock in6_ifaddr_lock; #define IN6_IFADDR_LOCK_ASSERT() rm_assert(&in6_ifaddr_lock, RA_LOCKED) #define IN6_IFADDR_RLOCK(t) rm_rlock(&in6_ifaddr_lock, (t)) #define IN6_IFADDR_RLOCK_ASSERT() rm_assert(&in6_ifaddr_lock, RA_RLOCKED) #define IN6_IFADDR_RUNLOCK(t) rm_runlock(&in6_ifaddr_lock, (t)) #define IN6_IFADDR_WLOCK() rm_wlock(&in6_ifaddr_lock) #define IN6_IFADDR_WLOCK_ASSERT() rm_assert(&in6_ifaddr_lock, RA_WLOCKED) #define IN6_IFADDR_WUNLOCK() rm_wunlock(&in6_ifaddr_lock) #define in6_ifstat_inc(ifp, tag) \ do { \ if (ifp) \ counter_u64_add(((struct in6_ifextra *) \ ((ifp)->if_afdata[AF_INET6]))->in6_ifstat[ \ offsetof(struct in6_ifstat, tag) / sizeof(uint64_t)], 1);\ } while (/*CONSTCOND*/ 0) extern u_char inet6ctlerrmap[]; VNET_DECLARE(unsigned long, in6_maxmtu); #define V_in6_maxmtu VNET(in6_maxmtu) #endif /* _KERNEL */ /* * IPv6 multicast MLD-layer source entry. */ struct ip6_msource { RB_ENTRY(ip6_msource) im6s_link; /* RB tree links */ struct in6_addr im6s_addr; struct im6s_st { uint16_t ex; /* # of exclusive members */ uint16_t in; /* # of inclusive members */ } im6s_st[2]; /* state at t0, t1 */ uint8_t im6s_stp; /* pending query */ }; RB_HEAD(ip6_msource_tree, ip6_msource); /* * IPv6 multicast PCB-layer source entry. * * NOTE: overlapping use of struct ip6_msource fields at start. */ struct in6_msource { RB_ENTRY(ip6_msource) im6s_link; /* Common field */ struct in6_addr im6s_addr; /* Common field */ uint8_t im6sl_st[2]; /* state before/at commit */ }; #ifdef _KERNEL /* * IPv6 source tree comparison function. * * An ordered predicate is necessary; bcmp() is not documented to return * an indication of order, memcmp() is, and is an ISO C99 requirement. */ static __inline int ip6_msource_cmp(const struct ip6_msource *a, const struct ip6_msource *b) { return (memcmp(&a->im6s_addr, &b->im6s_addr, sizeof(struct in6_addr))); } RB_PROTOTYPE(ip6_msource_tree, ip6_msource, im6s_link, ip6_msource_cmp); /* * IPv6 multicast PCB-layer group filter descriptor. */ struct in6_mfilter { struct ip6_msource_tree im6f_sources; /* source list for (S,G) */ u_long im6f_nsrc; /* # of source entries */ uint8_t im6f_st[2]; /* state before/at commit */ }; /* * Legacy KAME IPv6 multicast membership descriptor. */ struct in6_multi_mship { struct in6_multi *i6mm_maddr; LIST_ENTRY(in6_multi_mship) i6mm_chain; }; /* * IPv6 group descriptor. * * For every entry on an ifnet's if_multiaddrs list which represents * an IP multicast group, there is one of these structures. * * If any source filters are present, then a node will exist in the RB-tree * to permit fast lookup by source whenever an operation takes place. * This permits pre-order traversal when we issue reports. * Source filter trees are kept separately from the socket layer to * greatly simplify locking. * * When MLDv2 is active, in6m_timer is the response to group query timer. * The state-change timer in6m_sctimer is separate; whenever state changes * for the group the state change record is generated and transmitted, * and kept if retransmissions are necessary. * * FUTURE: in6m_link is now only used when groups are being purged * on a detaching ifnet. It could be demoted to a SLIST_ENTRY, but * because it is at the very start of the struct, we can't do this * w/o breaking the ABI for ifmcstat. */ struct in6_multi { struct in6_addr in6m_addr; /* IPv6 multicast address */ struct ifnet *in6m_ifp; /* back pointer to ifnet */ struct ifmultiaddr *in6m_ifma; /* back pointer to ifmultiaddr */ u_int in6m_refcount; /* reference count */ u_int in6m_state; /* state of the membership */ u_int in6m_timer; /* MLD6 listener report timer */ /* New fields for MLDv2 follow. */ struct mld_ifsoftc *in6m_mli; /* MLD info */ SLIST_ENTRY(in6_multi) in6m_nrele; /* to-be-released by MLD */ SLIST_ENTRY(in6_multi) in6m_defer; /* deferred MLDv1 */ struct ip6_msource_tree in6m_srcs; /* tree of sources */ u_long in6m_nsrc; /* # of tree entries */ struct mbufq in6m_scq; /* queue of pending * state-change packets */ struct timeval in6m_lastgsrtv; /* last G-S-R query */ uint16_t in6m_sctimer; /* state-change timer */ uint16_t in6m_scrv; /* state-change rexmit count */ /* * SSM state counters which track state at T0 (the time the last * state-change report's RV timer went to zero) and T1 * (time of pending report, i.e. now). * Used for computing MLDv2 state-change reports. Several refcounts * are maintained here to optimize for common use-cases. */ struct in6m_st { uint16_t iss_fmode; /* MLD filter mode */ uint16_t iss_asm; /* # of ASM listeners */ uint16_t iss_ex; /* # of exclusive members */ uint16_t iss_in; /* # of inclusive members */ uint16_t iss_rec; /* # of recorded sources */ } in6m_st[2]; /* state at t0, t1 */ }; void in6m_disconnect_locked(struct in6_multi_head *inmh, struct in6_multi *inm); /* * Helper function to derive the filter mode on a source entry * from its internal counters. Predicates are: * A source is only excluded if all listeners exclude it. * A source is only included if no listeners exclude it, * and at least one listener includes it. * May be used by ifmcstat(8). */ static __inline uint8_t im6s_get_mode(const struct in6_multi *inm, const struct ip6_msource *ims, uint8_t t) { t = !!t; if (inm->in6m_st[t].iss_ex > 0 && inm->in6m_st[t].iss_ex == ims->im6s_st[t].ex) return (MCAST_EXCLUDE); else if (ims->im6s_st[t].in > 0 && ims->im6s_st[t].ex == 0) return (MCAST_INCLUDE); return (MCAST_UNDEFINED); } /* * Lock macros for IPv6 layer multicast address lists. IPv6 lock goes * before link layer multicast locks in the lock order. In most cases, * consumers of IN_*_MULTI() macros should acquire the locks before * calling them; users of the in_{add,del}multi() functions should not. */ extern struct mtx in6_multi_list_mtx; extern struct sx in6_multi_sx; #define IN6_MULTI_LIST_LOCK() mtx_lock(&in6_multi_list_mtx) #define IN6_MULTI_LIST_UNLOCK() mtx_unlock(&in6_multi_list_mtx) #define IN6_MULTI_LIST_LOCK_ASSERT() mtx_assert(&in6_multi_list_mtx, MA_OWNED) #define IN6_MULTI_LIST_UNLOCK_ASSERT() mtx_assert(&in6_multi_list_mtx, MA_NOTOWNED) #define IN6_MULTI_LOCK() sx_xlock(&in6_multi_sx) #define IN6_MULTI_UNLOCK() sx_xunlock(&in6_multi_sx) #define IN6_MULTI_LOCK_ASSERT() sx_assert(&in6_multi_sx, SA_XLOCKED) #define IN6_MULTI_UNLOCK_ASSERT() sx_assert(&in6_multi_sx, SA_XUNLOCKED) /* * Get the in6_multi pointer from a ifmultiaddr. * Returns NULL if ifmultiaddr is no longer valid. */ static __inline struct in6_multi * in6m_ifmultiaddr_get_inm(struct ifmultiaddr *ifma) { - NET_EPOCH_ASSERT(); - return ((ifma->ifma_addr->sa_family != AF_INET6 || (ifma->ifma_flags & IFMA_F_ENQUEUED) == 0) ? NULL : ifma->ifma_protospec); } /* * Look up an in6_multi record for an IPv6 multicast address * on the interface ifp. * If no record found, return NULL. */ static __inline struct in6_multi * in6m_lookup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr) { struct ifmultiaddr *ifma; struct in6_multi *inm; CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { inm = in6m_ifmultiaddr_get_inm(ifma); if (inm == NULL) continue; if (IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, mcaddr)) return (inm); } return (NULL); } /* * Wrapper for in6m_lookup_locked(). * * SMPng: Assumes that neithr the IN6_MULTI_LOCK() or IF_ADDR_LOCK() are held. */ static __inline struct in6_multi * in6m_lookup(struct ifnet *ifp, const struct in6_addr *mcaddr) { struct in6_multi *inm; IN6_MULTI_LIST_LOCK(); IF_ADDR_RLOCK(ifp); inm = in6m_lookup_locked(ifp, mcaddr); IF_ADDR_RUNLOCK(ifp); IN6_MULTI_LIST_UNLOCK(); return (inm); } /* Acquire an in6_multi record. */ static __inline void in6m_acquire_locked(struct in6_multi *inm) { IN6_MULTI_LIST_LOCK_ASSERT(); ++inm->in6m_refcount; } static __inline void in6m_acquire(struct in6_multi *inm) { IN6_MULTI_LIST_LOCK(); in6m_acquire_locked(inm); IN6_MULTI_LIST_UNLOCK(); } static __inline void in6m_rele_locked(struct in6_multi_head *inmh, struct in6_multi *inm) { KASSERT(inm->in6m_refcount > 0, ("refcount == %d inm: %p", inm->in6m_refcount, inm)); IN6_MULTI_LIST_LOCK_ASSERT(); if (--inm->in6m_refcount == 0) { MPASS(inm->in6m_ifp == NULL); inm->in6m_ifma->ifma_protospec = NULL; MPASS(inm->in6m_ifma->ifma_llifma == NULL); SLIST_INSERT_HEAD(inmh, inm, in6m_nrele); } } struct ip6_moptions; struct sockopt; struct inpcbinfo; /* Multicast KPIs. */ int im6o_mc_filter(const struct ip6_moptions *, const struct ifnet *, const struct sockaddr *, const struct sockaddr *); int in6_joingroup(struct ifnet *, const struct in6_addr *, struct in6_mfilter *, struct in6_multi **, int); int in6_joingroup_locked(struct ifnet *, const struct in6_addr *, struct in6_mfilter *, struct in6_multi **, int); int in6_leavegroup(struct in6_multi *, struct in6_mfilter *); int in6_leavegroup_locked(struct in6_multi *, struct in6_mfilter *); void in6m_clear_recorded(struct in6_multi *); void in6m_commit(struct in6_multi *); void in6m_print(const struct in6_multi *); int in6m_record_source(struct in6_multi *, const struct in6_addr *); void in6m_release_list_deferred(struct in6_multi_head *); void in6m_release_wait(void); void ip6_freemoptions(struct ip6_moptions *); int ip6_getmoptions(struct inpcb *, struct sockopt *); int ip6_setmoptions(struct inpcb *, struct sockopt *); /* flags to in6_update_ifa */ #define IN6_IFAUPDATE_DADDELAY 0x1 /* first time to configure an address */ int in6_mask2len(struct in6_addr *, u_char *); int in6_control(struct socket *, u_long, caddr_t, struct ifnet *, struct thread *); int in6_update_ifa(struct ifnet *, struct in6_aliasreq *, struct in6_ifaddr *, int); void in6_prepare_ifra(struct in6_aliasreq *, const struct in6_addr *, const struct in6_addr *); void in6_purgeaddr(struct ifaddr *); int in6if_do_dad(struct ifnet *); void in6_savemkludge(struct in6_ifaddr *); void *in6_domifattach(struct ifnet *); void in6_domifdetach(struct ifnet *, void *); int in6_domifmtu(struct ifnet *); void in6_setmaxmtu(void); int in6_if2idlen(struct ifnet *); struct in6_ifaddr *in6ifa_ifpforlinklocal(struct ifnet *, int); struct in6_ifaddr *in6ifa_ifpwithaddr(struct ifnet *, const struct in6_addr *); struct in6_ifaddr *in6ifa_ifwithaddr(const struct in6_addr *, uint32_t); struct in6_ifaddr *in6ifa_llaonifp(struct ifnet *); int in6_addr2zoneid(struct ifnet *, struct in6_addr *, u_int32_t *); int in6_matchlen(struct in6_addr *, struct in6_addr *); int in6_are_prefix_equal(struct in6_addr *, struct in6_addr *, int); void in6_prefixlen2mask(struct in6_addr *, int); int in6_prefix_ioctl(struct socket *, u_long, caddr_t, struct ifnet *); int in6_prefix_add_ifid(int, struct in6_ifaddr *); void in6_prefix_remove_ifid(int, struct in6_ifaddr *); void in6_purgeprefix(struct ifnet *); int in6_is_addr_deprecated(struct sockaddr_in6 *); int in6_src_ioctl(u_long, caddr_t); void in6_newaddrmsg(struct in6_ifaddr *, int); /* * Extended API for IPv6 FIB support. */ struct mbuf *ip6_tryforward(struct mbuf *); void in6_rtredirect(struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *, u_int); int in6_rtrequest(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int); void in6_rtalloc(struct route_in6 *, u_int); void in6_rtalloc_ign(struct route_in6 *, u_long, u_int); struct rtentry *in6_rtalloc1(struct sockaddr *, int, u_long, u_int); #endif /* _KERNEL */ #endif /* _NETINET6_IN6_VAR_H_ */ Index: stable/12/sys/netinet6/mld6.c =================================================================== --- stable/12/sys/netinet6/mld6.c (revision 343650) +++ stable/12/sys/netinet6/mld6.c (revision 343651) @@ -1,3333 +1,3333 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2009 Bruce Simpson. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: mld6.c,v 1.27 2001/04/04 05:17:30 itojun Exp $ */ /*- * Copyright (c) 1988 Stephen Deering. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Stephen Deering of Stanford University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)igmp.c 8.1 (Berkeley) 7/19/93 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef KTR_MLD #define KTR_MLD KTR_INET6 #endif static struct mld_ifsoftc * mli_alloc_locked(struct ifnet *); static void mli_delete_locked(const struct ifnet *); static void mld_dispatch_packet(struct mbuf *); static void mld_dispatch_queue(struct mbufq *, int); static void mld_final_leave(struct in6_multi *, struct mld_ifsoftc *); static void mld_fasttimo_vnet(struct in6_multi_head *inmh); static int mld_handle_state_change(struct in6_multi *, struct mld_ifsoftc *); static int mld_initial_join(struct in6_multi *, struct mld_ifsoftc *, const int); #ifdef KTR static char * mld_rec_type_to_str(const int); #endif static void mld_set_version(struct mld_ifsoftc *, const int); static void mld_slowtimo_vnet(void); static int mld_v1_input_query(struct ifnet *, const struct ip6_hdr *, /*const*/ struct mld_hdr *); static int mld_v1_input_report(struct ifnet *, const struct ip6_hdr *, /*const*/ struct mld_hdr *); static void mld_v1_process_group_timer(struct in6_multi_head *, struct in6_multi *); static void mld_v1_process_querier_timers(struct mld_ifsoftc *); static int mld_v1_transmit_report(struct in6_multi *, const int); static void mld_v1_update_group(struct in6_multi *, const int); static void mld_v2_cancel_link_timers(struct mld_ifsoftc *); static void mld_v2_dispatch_general_query(struct mld_ifsoftc *); static struct mbuf * mld_v2_encap_report(struct ifnet *, struct mbuf *); static int mld_v2_enqueue_filter_change(struct mbufq *, struct in6_multi *); static int mld_v2_enqueue_group_record(struct mbufq *, struct in6_multi *, const int, const int, const int, const int); static int mld_v2_input_query(struct ifnet *, const struct ip6_hdr *, struct mbuf *, const int, const int); static int mld_v2_merge_state_changes(struct in6_multi *, struct mbufq *); static void mld_v2_process_group_timers(struct in6_multi_head *, struct mbufq *, struct mbufq *, struct in6_multi *, const int); static int mld_v2_process_group_query(struct in6_multi *, struct mld_ifsoftc *mli, int, struct mbuf *, const int); static int sysctl_mld_gsr(SYSCTL_HANDLER_ARGS); static int sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS); /* * Normative references: RFC 2710, RFC 3590, RFC 3810. * * Locking: * * The MLD subsystem lock ends up being system-wide for the moment, * but could be per-VIMAGE later on. * * The permitted lock order is: IN6_MULTI_LOCK, MLD_LOCK, IF_ADDR_LOCK. * Any may be taken independently; if any are held at the same * time, the above lock order must be followed. * * IN6_MULTI_LOCK covers in_multi. * * MLD_LOCK covers per-link state and any global variables in this file. * * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of * per-link state iterators. * * XXX LOR PREVENTION * A special case for IPv6 is the in6_setscope() routine. ip6_output() * will not accept an ifp; it wants an embedded scope ID, unlike * ip_output(), which happily takes the ifp given to it. The embedded * scope ID is only used by MLD to select the outgoing interface. * * During interface attach and detach, MLD will take MLD_LOCK *after* * the IF_AFDATA_LOCK. * As in6_setscope() takes IF_AFDATA_LOCK then SCOPE_LOCK, we can't call * it with MLD_LOCK held without triggering an LOR. A netisr with indirect * dispatch could work around this, but we'd rather not do that, as it * can introduce other races. * * As such, we exploit the fact that the scope ID is just the interface * index, and embed it in the IPv6 destination address accordingly. * This is potentially NOT VALID for MLDv1 reports, as they * are always sent to the multicast group itself; as MLDv2 * reports are always sent to ff02::16, this is not an issue * when MLDv2 is in use. * * This does not however eliminate the LOR when ip6_output() itself * calls in6_setscope() internally whilst MLD_LOCK is held. This will * trigger a LOR warning in WITNESS when the ifnet is detached. * * The right answer is probably to make IF_AFDATA_LOCK an rwlock, given * how it's used across the network stack. Here we're simply exploiting * the fact that MLD runs at a similar layer in the stack to scope6.c. * * VIMAGE: * * Each in6_multi corresponds to an ifp, and each ifp corresponds * to a vnet in ifp->if_vnet. */ static struct mtx mld_mtx; static MALLOC_DEFINE(M_MLD, "mld", "mld state"); #define MLD_EMBEDSCOPE(pin6, zoneid) \ if (IN6_IS_SCOPE_LINKLOCAL(pin6) || \ IN6_IS_ADDR_MC_INTFACELOCAL(pin6)) \ (pin6)->s6_addr16[1] = htons((zoneid) & 0xFFFF) \ /* * VIMAGE-wide globals. */ VNET_DEFINE_STATIC(struct timeval, mld_gsrdelay) = {10, 0}; VNET_DEFINE_STATIC(LIST_HEAD(, mld_ifsoftc), mli_head); VNET_DEFINE_STATIC(int, interface_timers_running6); VNET_DEFINE_STATIC(int, state_change_timers_running6); VNET_DEFINE_STATIC(int, current_state_timers_running6); #define V_mld_gsrdelay VNET(mld_gsrdelay) #define V_mli_head VNET(mli_head) #define V_interface_timers_running6 VNET(interface_timers_running6) #define V_state_change_timers_running6 VNET(state_change_timers_running6) #define V_current_state_timers_running6 VNET(current_state_timers_running6) SYSCTL_DECL(_net_inet6); /* Note: Not in any common header. */ SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW, 0, "IPv6 Multicast Listener Discovery"); /* * Virtualized sysctls. */ SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &VNET_NAME(mld_gsrdelay.tv_sec), 0, sysctl_mld_gsr, "I", "Rate limit for MLDv2 Group-and-Source queries in seconds"); /* * Non-virtualized sysctls. */ static SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_mld_ifinfo, "Per-interface MLDv2 state"); static int mld_v1enable = 1; SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RWTUN, &mld_v1enable, 0, "Enable fallback to MLDv1"); static int mld_v2enable = 1; SYSCTL_INT(_net_inet6_mld, OID_AUTO, v2enable, CTLFLAG_RWTUN, &mld_v2enable, 0, "Enable MLDv2"); static int mld_use_allow = 1; SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RWTUN, &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves"); /* * Packed Router Alert option structure declaration. */ struct mld_raopt { struct ip6_hbh hbh; struct ip6_opt pad; struct ip6_opt_router ra; } __packed; /* * Router Alert hop-by-hop option header. */ static struct mld_raopt mld_ra = { .hbh = { 0, 0 }, .pad = { .ip6o_type = IP6OPT_PADN, 0 }, .ra = { .ip6or_type = IP6OPT_ROUTER_ALERT, .ip6or_len = IP6OPT_RTALERT_LEN - 2, .ip6or_value[0] = ((IP6OPT_RTALERT_MLD >> 8) & 0xFF), .ip6or_value[1] = (IP6OPT_RTALERT_MLD & 0xFF) } }; static struct ip6_pktopts mld_po; static __inline void mld_save_context(struct mbuf *m, struct ifnet *ifp) { #ifdef VIMAGE m->m_pkthdr.PH_loc.ptr = ifp->if_vnet; #endif /* VIMAGE */ m->m_pkthdr.flowid = ifp->if_index; } static __inline void mld_scrub_context(struct mbuf *m) { m->m_pkthdr.PH_loc.ptr = NULL; m->m_pkthdr.flowid = 0; } /* * Restore context from a queued output chain. * Return saved ifindex. * * VIMAGE: The assertion is there to make sure that we * actually called CURVNET_SET() with what's in the mbuf chain. */ static __inline uint32_t mld_restore_context(struct mbuf *m) { #if defined(VIMAGE) && defined(INVARIANTS) KASSERT(curvnet == m->m_pkthdr.PH_loc.ptr, ("%s: called when curvnet was not restored: cuvnet %p m ptr %p", __func__, curvnet, m->m_pkthdr.PH_loc.ptr)); #endif return (m->m_pkthdr.flowid); } /* * Retrieve or set threshold between group-source queries in seconds. * * VIMAGE: Assume curvnet set by caller. * SMPng: NOTE: Serialized by MLD lock. */ static int sysctl_mld_gsr(SYSCTL_HANDLER_ARGS) { int error; int i; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error) return (error); MLD_LOCK(); i = V_mld_gsrdelay.tv_sec; error = sysctl_handle_int(oidp, &i, 0, req); if (error || !req->newptr) goto out_locked; if (i < -1 || i >= 60) { error = EINVAL; goto out_locked; } CTR2(KTR_MLD, "change mld_gsrdelay from %d to %d", V_mld_gsrdelay.tv_sec, i); V_mld_gsrdelay.tv_sec = i; out_locked: MLD_UNLOCK(); return (error); } /* * Expose struct mld_ifsoftc to userland, keyed by ifindex. * For use by ifmcstat(8). * * SMPng: NOTE: Does an unlocked ifindex space read. * VIMAGE: Assume curvnet set by caller. The node handler itself * is not directly virtualized. */ static int sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS) { int *name; int error; u_int namelen; struct ifnet *ifp; struct mld_ifsoftc *mli; name = (int *)arg1; namelen = arg2; if (req->newptr != NULL) return (EPERM); if (namelen != 1) return (EINVAL); error = sysctl_wire_old_buffer(req, sizeof(struct mld_ifinfo)); if (error) return (error); IN6_MULTI_LOCK(); IN6_MULTI_LIST_LOCK(); MLD_LOCK(); if (name[0] <= 0 || name[0] > V_if_index) { error = ENOENT; goto out_locked; } error = ENOENT; ifp = ifnet_byindex(name[0]); if (ifp == NULL) goto out_locked; LIST_FOREACH(mli, &V_mli_head, mli_link) { if (ifp == mli->mli_ifp) { struct mld_ifinfo info; info.mli_version = mli->mli_version; info.mli_v1_timer = mli->mli_v1_timer; info.mli_v2_timer = mli->mli_v2_timer; info.mli_flags = mli->mli_flags; info.mli_rv = mli->mli_rv; info.mli_qi = mli->mli_qi; info.mli_qri = mli->mli_qri; info.mli_uri = mli->mli_uri; error = SYSCTL_OUT(req, &info, sizeof(info)); break; } } out_locked: MLD_UNLOCK(); IN6_MULTI_LIST_UNLOCK(); IN6_MULTI_UNLOCK(); return (error); } /* * Dispatch an entire queue of pending packet chains. * VIMAGE: Assumes the vnet pointer has been set. */ static void mld_dispatch_queue(struct mbufq *mq, int limit) { struct mbuf *m; while ((m = mbufq_dequeue(mq)) != NULL) { CTR3(KTR_MLD, "%s: dispatch %p from %p", __func__, mq, m); mld_dispatch_packet(m); if (--limit == 0) break; } } /* * Filter outgoing MLD report state by group. * * Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1) * and node-local addresses. However, kernel and socket consumers * always embed the KAME scope ID in the address provided, so strip it * when performing comparison. * Note: This is not the same as the *multicast* scope. * * Return zero if the given group is one for which MLD reports * should be suppressed, or non-zero if reports should be issued. */ static __inline int mld_is_addr_reported(const struct in6_addr *addr) { KASSERT(IN6_IS_ADDR_MULTICAST(addr), ("%s: not multicast", __func__)); if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL) return (0); if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL) { struct in6_addr tmp = *addr; in6_clearscope(&tmp); if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes)) return (0); } return (1); } /* * Attach MLD when PF_INET6 is attached to an interface. * * SMPng: Normally called with IF_AFDATA_LOCK held. */ struct mld_ifsoftc * mld_domifattach(struct ifnet *ifp) { struct mld_ifsoftc *mli; CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp, if_name(ifp)); MLD_LOCK(); mli = mli_alloc_locked(ifp); if (!(ifp->if_flags & IFF_MULTICAST)) mli->mli_flags |= MLIF_SILENT; if (mld_use_allow) mli->mli_flags |= MLIF_USEALLOW; MLD_UNLOCK(); return (mli); } /* * VIMAGE: assume curvnet set by caller. */ static struct mld_ifsoftc * mli_alloc_locked(/*const*/ struct ifnet *ifp) { struct mld_ifsoftc *mli; MLD_LOCK_ASSERT(); mli = malloc(sizeof(struct mld_ifsoftc), M_MLD, M_NOWAIT|M_ZERO); if (mli == NULL) goto out; mli->mli_ifp = ifp; mli->mli_version = MLD_VERSION_2; mli->mli_flags = 0; mli->mli_rv = MLD_RV_INIT; mli->mli_qi = MLD_QI_INIT; mli->mli_qri = MLD_QRI_INIT; mli->mli_uri = MLD_URI_INIT; mbufq_init(&mli->mli_gq, MLD_MAX_RESPONSE_PACKETS); LIST_INSERT_HEAD(&V_mli_head, mli, mli_link); CTR2(KTR_MLD, "allocate mld_ifsoftc for ifp %p(%s)", ifp, if_name(ifp)); out: return (mli); } /* * Hook for ifdetach. * * NOTE: Some finalization tasks need to run before the protocol domain * is detached, but also before the link layer does its cleanup. * Run before link-layer cleanup; cleanup groups, but do not free MLD state. * * SMPng: Caller must hold IN6_MULTI_LOCK(). * Must take IF_ADDR_LOCK() to cover if_multiaddrs iterator. * XXX This routine is also bitten by unlocked ifma_protospec access. */ void mld_ifdetach(struct ifnet *ifp, struct in6_multi_head *inmh) { struct epoch_tracker et; struct mld_ifsoftc *mli; struct ifmultiaddr *ifma; struct in6_multi *inm; CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp, if_name(ifp)); IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK(); mli = MLD_IFINFO(ifp); IF_ADDR_WLOCK(ifp); /* * Extract list of in6_multi associated with the detaching ifp * which the PF_INET6 layer is about to release. */ - NET_EPOCH_ENTER(et); + NET_EPOCH_ENTER_ET(et); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { inm = in6m_ifmultiaddr_get_inm(ifma); if (inm == NULL) continue; in6m_disconnect_locked(inmh, inm); if (mli->mli_version == MLD_VERSION_2) { in6m_clear_recorded(inm); /* * We need to release the final reference held * for issuing the INCLUDE {}. */ if (inm->in6m_state == MLD_LEAVING_MEMBER) { inm->in6m_state = MLD_NOT_MEMBER; in6m_rele_locked(inmh, inm); } } } - NET_EPOCH_EXIT(et); + NET_EPOCH_EXIT_ET(et); IF_ADDR_WUNLOCK(ifp); MLD_UNLOCK(); } /* * Hook for domifdetach. * Runs after link-layer cleanup; free MLD state. * * SMPng: Normally called with IF_AFDATA_LOCK held. */ void mld_domifdetach(struct ifnet *ifp) { CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp, if_name(ifp)); MLD_LOCK(); mli_delete_locked(ifp); MLD_UNLOCK(); } static void mli_delete_locked(const struct ifnet *ifp) { struct mld_ifsoftc *mli, *tmli; CTR3(KTR_MLD, "%s: freeing mld_ifsoftc for ifp %p(%s)", __func__, ifp, if_name(ifp)); MLD_LOCK_ASSERT(); LIST_FOREACH_SAFE(mli, &V_mli_head, mli_link, tmli) { if (mli->mli_ifp == ifp) { /* * Free deferred General Query responses. */ mbufq_drain(&mli->mli_gq); LIST_REMOVE(mli, mli_link); free(mli, M_MLD); return; } } } /* * Process a received MLDv1 general or address-specific query. * Assumes that the query header has been pulled up to sizeof(mld_hdr). * * NOTE: Can't be fully const correct as we temporarily embed scope ID in * mld_addr. This is OK as we own the mbuf chain. */ static int mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, /*const*/ struct mld_hdr *mld) { struct ifmultiaddr *ifma; struct mld_ifsoftc *mli; struct in6_multi *inm; int is_general_query; uint16_t timer; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif is_general_query = 0; if (!mld_v1enable) { CTR3(KTR_MLD, "ignore v1 query %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, if_name(ifp)); return (0); } /* * RFC3810 Section 6.2: MLD queries must originate from * a router's link-local address. */ if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &ip6->ip6_src), ifp, if_name(ifp)); return (0); } /* * Do address field validation upfront before we accept * the query. */ if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) { /* * MLDv1 General Query. * If this was not sent to the all-nodes group, ignore it. */ struct in6_addr dst; dst = ip6->ip6_dst; in6_clearscope(&dst); if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes)) return (EINVAL); is_general_query = 1; } else { /* * Embed scope ID of receiving interface in MLD query for * lookup whilst we don't hold other locks. */ in6_setscope(&mld->mld_addr, ifp, NULL); } IN6_MULTI_LIST_LOCK(); MLD_LOCK(); /* * Switch to MLDv1 host compatibility mode. */ mli = MLD_IFINFO(ifp); KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); mld_set_version(mli, MLD_VERSION_1); timer = (ntohs(mld->mld_maxdelay) * PR_FASTHZ) / MLD_TIMER_SCALE; if (timer == 0) timer = 1; IF_ADDR_RLOCK(ifp); if (is_general_query) { /* * For each reporting group joined on this * interface, kick the report timer. */ CTR2(KTR_MLD, "process v1 general query on ifp %p(%s)", ifp, if_name(ifp)); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { inm = in6m_ifmultiaddr_get_inm(ifma); if (inm == NULL) continue; mld_v1_update_group(inm, timer); } } else { /* * MLDv1 Group-Specific Query. * If this is a group-specific MLDv1 query, we need only * look up the single group to process it. */ inm = in6m_lookup_locked(ifp, &mld->mld_addr); if (inm != NULL) { CTR3(KTR_MLD, "process v1 query %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, if_name(ifp)); mld_v1_update_group(inm, timer); } /* XXX Clear embedded scope ID as userland won't expect it. */ in6_clearscope(&mld->mld_addr); } IF_ADDR_RUNLOCK(ifp); MLD_UNLOCK(); IN6_MULTI_LIST_UNLOCK(); return (0); } /* * Update the report timer on a group in response to an MLDv1 query. * * If we are becoming the reporting member for this group, start the timer. * If we already are the reporting member for this group, and timer is * below the threshold, reset it. * * We may be updating the group for the first time since we switched * to MLDv2. If we are, then we must clear any recorded source lists, * and transition to REPORTING state; the group timer is overloaded * for group and group-source query responses. * * Unlike MLDv2, the delay per group should be jittered * to avoid bursts of MLDv1 reports. */ static void mld_v1_update_group(struct in6_multi *inm, const int timer) { #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif CTR4(KTR_MLD, "%s: %s/%s timer=%d", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), if_name(inm->in6m_ifp), timer); IN6_MULTI_LIST_LOCK_ASSERT(); switch (inm->in6m_state) { case MLD_NOT_MEMBER: case MLD_SILENT_MEMBER: break; case MLD_REPORTING_MEMBER: if (inm->in6m_timer != 0 && inm->in6m_timer <= timer) { CTR1(KTR_MLD, "%s: REPORTING and timer running, " "skipping.", __func__); break; } /* FALLTHROUGH */ case MLD_SG_QUERY_PENDING_MEMBER: case MLD_G_QUERY_PENDING_MEMBER: case MLD_IDLE_MEMBER: case MLD_LAZY_MEMBER: case MLD_AWAKENING_MEMBER: CTR1(KTR_MLD, "%s: ->REPORTING", __func__); inm->in6m_state = MLD_REPORTING_MEMBER; inm->in6m_timer = MLD_RANDOM_DELAY(timer); V_current_state_timers_running6 = 1; break; case MLD_SLEEPING_MEMBER: CTR1(KTR_MLD, "%s: ->AWAKENING", __func__); inm->in6m_state = MLD_AWAKENING_MEMBER; break; case MLD_LEAVING_MEMBER: break; } } /* * Process a received MLDv2 general, group-specific or * group-and-source-specific query. * * Assumes that the query header has been pulled up to sizeof(mldv2_query). * * Return 0 if successful, otherwise an appropriate error code is returned. */ static int mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, struct mbuf *m, const int off, const int icmp6len) { struct mld_ifsoftc *mli; struct mldv2_query *mld; struct in6_multi *inm; uint32_t maxdelay, nsrc, qqi; int is_general_query; uint16_t timer; uint8_t qrv; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif if (!mld_v2enable) { CTR3(KTR_MLD, "ignore v2 query src %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &ip6->ip6_src), ifp, if_name(ifp)); return (0); } /* * RFC3810 Section 6.2: MLD queries must originate from * a router's link-local address. */ if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &ip6->ip6_src), ifp, if_name(ifp)); return (0); } is_general_query = 0; CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, if_name(ifp)); mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off); maxdelay = ntohs(mld->mld_maxdelay); /* in 1/10ths of a second */ if (maxdelay >= 32768) { maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) << (MLD_MRC_EXP(maxdelay) + 3); } timer = (maxdelay * PR_FASTHZ) / MLD_TIMER_SCALE; if (timer == 0) timer = 1; qrv = MLD_QRV(mld->mld_misc); if (qrv < 2) { CTR3(KTR_MLD, "%s: clamping qrv %d to %d", __func__, qrv, MLD_RV_INIT); qrv = MLD_RV_INIT; } qqi = mld->mld_qqi; if (qqi >= 128) { qqi = MLD_QQIC_MANT(mld->mld_qqi) << (MLD_QQIC_EXP(mld->mld_qqi) + 3); } nsrc = ntohs(mld->mld_numsrc); if (nsrc > MLD_MAX_GS_SOURCES) return (EMSGSIZE); if (icmp6len < sizeof(struct mldv2_query) + (nsrc * sizeof(struct in6_addr))) return (EMSGSIZE); /* * Do further input validation upfront to avoid resetting timers * should we need to discard this query. */ if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) { /* * A general query with a source list has undefined * behaviour; discard it. */ if (nsrc > 0) return (EINVAL); is_general_query = 1; } else { /* * Embed scope ID of receiving interface in MLD query for * lookup whilst we don't hold other locks (due to KAME * locking lameness). We own this mbuf chain just now. */ in6_setscope(&mld->mld_addr, ifp, NULL); } IN6_MULTI_LIST_LOCK(); MLD_LOCK(); mli = MLD_IFINFO(ifp); KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); /* * Discard the v2 query if we're in Compatibility Mode. * The RFC is pretty clear that hosts need to stay in MLDv1 mode * until the Old Version Querier Present timer expires. */ if (mli->mli_version != MLD_VERSION_2) goto out_locked; mld_set_version(mli, MLD_VERSION_2); mli->mli_rv = qrv; mli->mli_qi = qqi; mli->mli_qri = maxdelay; CTR4(KTR_MLD, "%s: qrv %d qi %d maxdelay %d", __func__, qrv, qqi, maxdelay); if (is_general_query) { /* * MLDv2 General Query. * * Schedule a current-state report on this ifp for * all groups, possibly containing source lists. * * If there is a pending General Query response * scheduled earlier than the selected delay, do * not schedule any other reports. * Otherwise, reset the interface timer. */ CTR2(KTR_MLD, "process v2 general query on ifp %p(%s)", ifp, if_name(ifp)); if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) { mli->mli_v2_timer = MLD_RANDOM_DELAY(timer); V_interface_timers_running6 = 1; } } else { /* * MLDv2 Group-specific or Group-and-source-specific Query. * * Group-source-specific queries are throttled on * a per-group basis to defeat denial-of-service attempts. * Queries for groups we are not a member of on this * link are simply ignored. */ IF_ADDR_RLOCK(ifp); inm = in6m_lookup_locked(ifp, &mld->mld_addr); if (inm == NULL) { IF_ADDR_RUNLOCK(ifp); goto out_locked; } if (nsrc > 0) { if (!ratecheck(&inm->in6m_lastgsrtv, &V_mld_gsrdelay)) { CTR1(KTR_MLD, "%s: GS query throttled.", __func__); IF_ADDR_RUNLOCK(ifp); goto out_locked; } } CTR2(KTR_MLD, "process v2 group query on ifp %p(%s)", ifp, if_name(ifp)); /* * If there is a pending General Query response * scheduled sooner than the selected delay, no * further report need be scheduled. * Otherwise, prepare to respond to the * group-specific or group-and-source query. */ if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) mld_v2_process_group_query(inm, mli, timer, m, off); /* XXX Clear embedded scope ID as userland won't expect it. */ in6_clearscope(&mld->mld_addr); IF_ADDR_RUNLOCK(ifp); } out_locked: MLD_UNLOCK(); IN6_MULTI_LIST_UNLOCK(); return (0); } /* * Process a received MLDv2 group-specific or group-and-source-specific * query. * Return <0 if any error occurred. Currently this is ignored. */ static int mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifsoftc *mli, int timer, struct mbuf *m0, const int off) { struct mldv2_query *mld; int retval; uint16_t nsrc; IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); retval = 0; mld = (struct mldv2_query *)(mtod(m0, uint8_t *) + off); switch (inm->in6m_state) { case MLD_NOT_MEMBER: case MLD_SILENT_MEMBER: case MLD_SLEEPING_MEMBER: case MLD_LAZY_MEMBER: case MLD_AWAKENING_MEMBER: case MLD_IDLE_MEMBER: case MLD_LEAVING_MEMBER: return (retval); break; case MLD_REPORTING_MEMBER: case MLD_G_QUERY_PENDING_MEMBER: case MLD_SG_QUERY_PENDING_MEMBER: break; } nsrc = ntohs(mld->mld_numsrc); /* * Deal with group-specific queries upfront. * If any group query is already pending, purge any recorded * source-list state if it exists, and schedule a query response * for this group-specific query. */ if (nsrc == 0) { if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER || inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) { in6m_clear_recorded(inm); timer = min(inm->in6m_timer, timer); } inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER; inm->in6m_timer = MLD_RANDOM_DELAY(timer); V_current_state_timers_running6 = 1; return (retval); } /* * Deal with the case where a group-and-source-specific query has * been received but a group-specific query is already pending. */ if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) { timer = min(inm->in6m_timer, timer); inm->in6m_timer = MLD_RANDOM_DELAY(timer); V_current_state_timers_running6 = 1; return (retval); } /* * Finally, deal with the case where a group-and-source-specific * query has been received, where a response to a previous g-s-r * query exists, or none exists. * In this case, we need to parse the source-list which the Querier * has provided us with and check if we have any source list filter * entries at T1 for these sources. If we do not, there is no need * schedule a report and the query may be dropped. * If we do, we must record them and schedule a current-state * report for those sources. */ if (inm->in6m_nsrc > 0) { struct mbuf *m; uint8_t *sp; int i, nrecorded; int soff; m = m0; soff = off + sizeof(struct mldv2_query); nrecorded = 0; for (i = 0; i < nsrc; i++) { sp = mtod(m, uint8_t *) + soff; retval = in6m_record_source(inm, (const struct in6_addr *)sp); if (retval < 0) break; nrecorded += retval; soff += sizeof(struct in6_addr); if (soff >= m->m_len) { soff = soff - m->m_len; m = m->m_next; if (m == NULL) break; } } if (nrecorded > 0) { CTR1(KTR_MLD, "%s: schedule response to SG query", __func__); inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER; inm->in6m_timer = MLD_RANDOM_DELAY(timer); V_current_state_timers_running6 = 1; } } return (retval); } /* * Process a received MLDv1 host membership report. * Assumes mld points to mld_hdr in pulled up mbuf chain. * * NOTE: Can't be fully const correct as we temporarily embed scope ID in * mld_addr. This is OK as we own the mbuf chain. */ static int mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, /*const*/ struct mld_hdr *mld) { struct in6_addr src, dst; struct in6_ifaddr *ia; struct in6_multi *inm; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif if (!mld_v1enable) { CTR3(KTR_MLD, "ignore v1 report %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, if_name(ifp)); return (0); } if (ifp->if_flags & IFF_LOOPBACK) return (0); /* * MLDv1 reports must originate from a host's link-local address, * or the unspecified address (when booting). */ src = ip6->ip6_src; in6_clearscope(&src); if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) { CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &ip6->ip6_src), ifp, if_name(ifp)); return (EINVAL); } /* * RFC2710 Section 4: MLDv1 reports must pertain to a multicast * group, and must be directed to the group itself. */ dst = ip6->ip6_dst; in6_clearscope(&dst); if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) || !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) { CTR3(KTR_MLD, "ignore v1 query dst %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &ip6->ip6_dst), ifp, if_name(ifp)); return (EINVAL); } /* * Make sure we don't hear our own membership report, as fast * leave requires knowing that we are the only member of a * group. Assume we used the link-local address if available, * otherwise look for ::. * * XXX Note that scope ID comparison is needed for the address * returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be * performed for the on-wire address. */ ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); if ((ia && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia))) || (ia == NULL && IN6_IS_ADDR_UNSPECIFIED(&src))) { if (ia != NULL) ifa_free(&ia->ia_ifa); return (0); } if (ia != NULL) ifa_free(&ia->ia_ifa); CTR3(KTR_MLD, "process v1 report %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, if_name(ifp)); /* * Embed scope ID of receiving interface in MLD query for lookup * whilst we don't hold other locks (due to KAME locking lameness). */ if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) in6_setscope(&mld->mld_addr, ifp, NULL); IN6_MULTI_LIST_LOCK(); MLD_LOCK(); IF_ADDR_RLOCK(ifp); /* * MLDv1 report suppression. * If we are a member of this group, and our membership should be * reported, and our group timer is pending or about to be reset, * stop our group timer by transitioning to the 'lazy' state. */ inm = in6m_lookup_locked(ifp, &mld->mld_addr); if (inm != NULL) { struct mld_ifsoftc *mli; mli = inm->in6m_mli; KASSERT(mli != NULL, ("%s: no mli for ifp %p", __func__, ifp)); /* * If we are in MLDv2 host mode, do not allow the * other host's MLDv1 report to suppress our reports. */ if (mli->mli_version == MLD_VERSION_2) goto out_locked; inm->in6m_timer = 0; switch (inm->in6m_state) { case MLD_NOT_MEMBER: case MLD_SILENT_MEMBER: case MLD_SLEEPING_MEMBER: break; case MLD_REPORTING_MEMBER: case MLD_IDLE_MEMBER: case MLD_AWAKENING_MEMBER: CTR3(KTR_MLD, "report suppressed for %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, if_name(ifp)); case MLD_LAZY_MEMBER: inm->in6m_state = MLD_LAZY_MEMBER; break; case MLD_G_QUERY_PENDING_MEMBER: case MLD_SG_QUERY_PENDING_MEMBER: case MLD_LEAVING_MEMBER: break; } } out_locked: IF_ADDR_RUNLOCK(ifp); MLD_UNLOCK(); IN6_MULTI_LIST_UNLOCK(); /* XXX Clear embedded scope ID as userland won't expect it. */ in6_clearscope(&mld->mld_addr); return (0); } /* * MLD input path. * * Assume query messages which fit in a single ICMPv6 message header * have been pulled up. * Assume that userland will want to see the message, even if it * otherwise fails kernel input validation; do not free it. * Pullup may however free the mbuf chain m if it fails. * * Return IPPROTO_DONE if we freed m. Otherwise, return 0. */ int mld_input(struct mbuf *m, int off, int icmp6len) { struct ifnet *ifp; struct ip6_hdr *ip6; struct mld_hdr *mld; int mldlen; CTR3(KTR_MLD, "%s: called w/mbuf (%p,%d)", __func__, m, off); ifp = m->m_pkthdr.rcvif; ip6 = mtod(m, struct ip6_hdr *); /* Pullup to appropriate size. */ mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off); if (mld->mld_type == MLD_LISTENER_QUERY && icmp6len >= sizeof(struct mldv2_query)) { mldlen = sizeof(struct mldv2_query); } else { mldlen = sizeof(struct mld_hdr); } IP6_EXTHDR_GET(mld, struct mld_hdr *, m, off, mldlen); if (mld == NULL) { ICMP6STAT_INC(icp6s_badlen); return (IPPROTO_DONE); } /* * Userland needs to see all of this traffic for implementing * the endpoint discovery portion of multicast routing. */ switch (mld->mld_type) { case MLD_LISTENER_QUERY: icmp6_ifstat_inc(ifp, ifs6_in_mldquery); if (icmp6len == sizeof(struct mld_hdr)) { if (mld_v1_input_query(ifp, ip6, mld) != 0) return (0); } else if (icmp6len >= sizeof(struct mldv2_query)) { if (mld_v2_input_query(ifp, ip6, m, off, icmp6len) != 0) return (0); } break; case MLD_LISTENER_REPORT: icmp6_ifstat_inc(ifp, ifs6_in_mldreport); if (mld_v1_input_report(ifp, ip6, mld) != 0) return (0); break; case MLDV2_LISTENER_REPORT: icmp6_ifstat_inc(ifp, ifs6_in_mldreport); break; case MLD_LISTENER_DONE: icmp6_ifstat_inc(ifp, ifs6_in_mlddone); break; default: break; } return (0); } /* * Fast timeout handler (global). * VIMAGE: Timeout handlers are expected to service all vimages. */ void mld_fasttimo(void) { struct in6_multi_head inmh; VNET_ITERATOR_DECL(vnet_iter); SLIST_INIT(&inmh); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); mld_fasttimo_vnet(&inmh); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); in6m_release_list_deferred(&inmh); } /* * Fast timeout handler (per-vnet). * * VIMAGE: Assume caller has set up our curvnet. */ static void mld_fasttimo_vnet(struct in6_multi_head *inmh) { struct epoch_tracker et; struct mbufq scq; /* State-change packets */ struct mbufq qrq; /* Query response packets */ struct ifnet *ifp; struct mld_ifsoftc *mli; struct ifmultiaddr *ifma; struct in6_multi *inm; int uri_fasthz; uri_fasthz = 0; /* * Quick check to see if any work needs to be done, in order to * minimize the overhead of fasttimo processing. * SMPng: XXX Unlocked reads. */ if (!V_current_state_timers_running6 && !V_interface_timers_running6 && !V_state_change_timers_running6) return; IN6_MULTI_LIST_LOCK(); MLD_LOCK(); /* * MLDv2 General Query response timer processing. */ if (V_interface_timers_running6) { CTR1(KTR_MLD, "%s: interface timers running", __func__); V_interface_timers_running6 = 0; LIST_FOREACH(mli, &V_mli_head, mli_link) { if (mli->mli_v2_timer == 0) { /* Do nothing. */ } else if (--mli->mli_v2_timer == 0) { mld_v2_dispatch_general_query(mli); } else { V_interface_timers_running6 = 1; } } } if (!V_current_state_timers_running6 && !V_state_change_timers_running6) goto out_locked; V_current_state_timers_running6 = 0; V_state_change_timers_running6 = 0; CTR1(KTR_MLD, "%s: state change timers running", __func__); /* * MLD host report and state-change timer processing. * Note: Processing a v2 group timer may remove a node. */ LIST_FOREACH(mli, &V_mli_head, mli_link) { ifp = mli->mli_ifp; if (mli->mli_version == MLD_VERSION_2) { uri_fasthz = MLD_RANDOM_DELAY(mli->mli_uri * PR_FASTHZ); mbufq_init(&qrq, MLD_MAX_G_GS_PACKETS); mbufq_init(&scq, MLD_MAX_STATE_CHANGE_PACKETS); } IF_ADDR_WLOCK(ifp); - NET_EPOCH_ENTER(et); + NET_EPOCH_ENTER_ET(et); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { inm = in6m_ifmultiaddr_get_inm(ifma); if (inm == NULL) continue; switch (mli->mli_version) { case MLD_VERSION_1: mld_v1_process_group_timer(inmh, inm); break; case MLD_VERSION_2: mld_v2_process_group_timers(inmh, &qrq, &scq, inm, uri_fasthz); break; } } IF_ADDR_WUNLOCK(ifp); switch (mli->mli_version) { case MLD_VERSION_1: /* * Transmit reports for this lifecycle. This * is done while not holding IF_ADDR_LOCK * since this can call * in6ifa_ifpforlinklocal() which locks * IF_ADDR_LOCK internally as well as * ip6_output() to transmit a packet. */ while ((inm = SLIST_FIRST(inmh)) != NULL) { SLIST_REMOVE_HEAD(inmh, in6m_defer); (void)mld_v1_transmit_report(inm, MLD_LISTENER_REPORT); } break; case MLD_VERSION_2: mld_dispatch_queue(&qrq, 0); mld_dispatch_queue(&scq, 0); break; } - NET_EPOCH_EXIT(et); + NET_EPOCH_EXIT_ET(et); } out_locked: MLD_UNLOCK(); IN6_MULTI_LIST_UNLOCK(); } /* * Update host report group timer. * Will update the global pending timer flags. */ static void mld_v1_process_group_timer(struct in6_multi_head *inmh, struct in6_multi *inm) { int report_timer_expired; IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); if (inm->in6m_timer == 0) { report_timer_expired = 0; } else if (--inm->in6m_timer == 0) { report_timer_expired = 1; } else { V_current_state_timers_running6 = 1; return; } switch (inm->in6m_state) { case MLD_NOT_MEMBER: case MLD_SILENT_MEMBER: case MLD_IDLE_MEMBER: case MLD_LAZY_MEMBER: case MLD_SLEEPING_MEMBER: case MLD_AWAKENING_MEMBER: break; case MLD_REPORTING_MEMBER: if (report_timer_expired) { inm->in6m_state = MLD_IDLE_MEMBER; SLIST_INSERT_HEAD(inmh, inm, in6m_defer); } break; case MLD_G_QUERY_PENDING_MEMBER: case MLD_SG_QUERY_PENDING_MEMBER: case MLD_LEAVING_MEMBER: break; } } /* * Update a group's timers for MLDv2. * Will update the global pending timer flags. * Note: Unlocked read from mli. */ static void mld_v2_process_group_timers(struct in6_multi_head *inmh, struct mbufq *qrq, struct mbufq *scq, struct in6_multi *inm, const int uri_fasthz) { int query_response_timer_expired; int state_change_retransmit_timer_expired; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); query_response_timer_expired = 0; state_change_retransmit_timer_expired = 0; /* * During a transition from compatibility mode back to MLDv2, * a group record in REPORTING state may still have its group * timer active. This is a no-op in this function; it is easier * to deal with it here than to complicate the slow-timeout path. */ if (inm->in6m_timer == 0) { query_response_timer_expired = 0; } else if (--inm->in6m_timer == 0) { query_response_timer_expired = 1; } else { V_current_state_timers_running6 = 1; } if (inm->in6m_sctimer == 0) { state_change_retransmit_timer_expired = 0; } else if (--inm->in6m_sctimer == 0) { state_change_retransmit_timer_expired = 1; } else { V_state_change_timers_running6 = 1; } /* We are in fasttimo, so be quick about it. */ if (!state_change_retransmit_timer_expired && !query_response_timer_expired) return; switch (inm->in6m_state) { case MLD_NOT_MEMBER: case MLD_SILENT_MEMBER: case MLD_SLEEPING_MEMBER: case MLD_LAZY_MEMBER: case MLD_AWAKENING_MEMBER: case MLD_IDLE_MEMBER: break; case MLD_G_QUERY_PENDING_MEMBER: case MLD_SG_QUERY_PENDING_MEMBER: /* * Respond to a previously pending Group-Specific * or Group-and-Source-Specific query by enqueueing * the appropriate Current-State report for * immediate transmission. */ if (query_response_timer_expired) { int retval; retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1, (inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER), 0); CTR2(KTR_MLD, "%s: enqueue record = %d", __func__, retval); inm->in6m_state = MLD_REPORTING_MEMBER; in6m_clear_recorded(inm); } /* FALLTHROUGH */ case MLD_REPORTING_MEMBER: case MLD_LEAVING_MEMBER: if (state_change_retransmit_timer_expired) { /* * State-change retransmission timer fired. * If there are any further pending retransmissions, * set the global pending state-change flag, and * reset the timer. */ if (--inm->in6m_scrv > 0) { inm->in6m_sctimer = uri_fasthz; V_state_change_timers_running6 = 1; } /* * Retransmit the previously computed state-change * report. If there are no further pending * retransmissions, the mbuf queue will be consumed. * Update T0 state to T1 as we have now sent * a state-change. */ (void)mld_v2_merge_state_changes(inm, scq); in6m_commit(inm); CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), if_name(inm->in6m_ifp)); /* * If we are leaving the group for good, make sure * we release MLD's reference to it. * This release must be deferred using a SLIST, * as we are called from a loop which traverses * the in_ifmultiaddr TAILQ. */ if (inm->in6m_state == MLD_LEAVING_MEMBER && inm->in6m_scrv == 0) { inm->in6m_state = MLD_NOT_MEMBER; in6m_disconnect_locked(inmh, inm); in6m_rele_locked(inmh, inm); } } break; } } /* * Switch to a different version on the given interface, * as per Section 9.12. */ static void mld_set_version(struct mld_ifsoftc *mli, const int version) { int old_version_timer; MLD_LOCK_ASSERT(); CTR4(KTR_MLD, "%s: switching to v%d on ifp %p(%s)", __func__, version, mli->mli_ifp, if_name(mli->mli_ifp)); if (version == MLD_VERSION_1) { /* * Compute the "Older Version Querier Present" timer as per * Section 9.12. */ old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri; old_version_timer *= PR_SLOWHZ; mli->mli_v1_timer = old_version_timer; } if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) { mli->mli_version = MLD_VERSION_1; mld_v2_cancel_link_timers(mli); } } /* * Cancel pending MLDv2 timers for the given link and all groups * joined on it; state-change, general-query, and group-query timers. */ static void mld_v2_cancel_link_timers(struct mld_ifsoftc *mli) { struct epoch_tracker et; struct in6_multi_head inmh; struct ifmultiaddr *ifma; struct ifnet *ifp; struct in6_multi *inm; CTR3(KTR_MLD, "%s: cancel v2 timers on ifp %p(%s)", __func__, mli->mli_ifp, if_name(mli->mli_ifp)); SLIST_INIT(&inmh); IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); /* * Fast-track this potentially expensive operation * by checking all the global 'timer pending' flags. */ if (!V_interface_timers_running6 && !V_state_change_timers_running6 && !V_current_state_timers_running6) return; mli->mli_v2_timer = 0; ifp = mli->mli_ifp; IF_ADDR_WLOCK(ifp); - NET_EPOCH_ENTER(et); + NET_EPOCH_ENTER_ET(et); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { inm = in6m_ifmultiaddr_get_inm(ifma); if (inm == NULL) continue; switch (inm->in6m_state) { case MLD_NOT_MEMBER: case MLD_SILENT_MEMBER: case MLD_IDLE_MEMBER: case MLD_LAZY_MEMBER: case MLD_SLEEPING_MEMBER: case MLD_AWAKENING_MEMBER: break; case MLD_LEAVING_MEMBER: /* * If we are leaving the group and switching * version, we need to release the final * reference held for issuing the INCLUDE {}. */ in6m_rele_locked(&inmh, inm); /* FALLTHROUGH */ case MLD_G_QUERY_PENDING_MEMBER: case MLD_SG_QUERY_PENDING_MEMBER: in6m_clear_recorded(inm); /* FALLTHROUGH */ case MLD_REPORTING_MEMBER: inm->in6m_sctimer = 0; inm->in6m_timer = 0; inm->in6m_state = MLD_REPORTING_MEMBER; /* * Free any pending MLDv2 state-change records. */ mbufq_drain(&inm->in6m_scq); break; } } - NET_EPOCH_EXIT(et); + NET_EPOCH_EXIT_ET(et); IF_ADDR_WUNLOCK(ifp); in6m_release_list_deferred(&inmh); } /* * Global slowtimo handler. * VIMAGE: Timeout handlers are expected to service all vimages. */ void mld_slowtimo(void) { VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); mld_slowtimo_vnet(); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); } /* * Per-vnet slowtimo handler. */ static void mld_slowtimo_vnet(void) { struct mld_ifsoftc *mli; MLD_LOCK(); LIST_FOREACH(mli, &V_mli_head, mli_link) { mld_v1_process_querier_timers(mli); } MLD_UNLOCK(); } /* * Update the Older Version Querier Present timers for a link. * See Section 9.12 of RFC 3810. */ static void mld_v1_process_querier_timers(struct mld_ifsoftc *mli) { MLD_LOCK_ASSERT(); if (mli->mli_version != MLD_VERSION_2 && --mli->mli_v1_timer == 0) { /* * MLDv1 Querier Present timer expired; revert to MLDv2. */ CTR5(KTR_MLD, "%s: transition from v%d -> v%d on %p(%s)", __func__, mli->mli_version, MLD_VERSION_2, mli->mli_ifp, if_name(mli->mli_ifp)); mli->mli_version = MLD_VERSION_2; } } /* * Transmit an MLDv1 report immediately. */ static int mld_v1_transmit_report(struct in6_multi *in6m, const int type) { struct ifnet *ifp; struct in6_ifaddr *ia; struct ip6_hdr *ip6; struct mbuf *mh, *md; struct mld_hdr *mld; IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); ifp = in6m->in6m_ifp; /* in process of being freed */ if (ifp == NULL) return (0); ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); /* ia may be NULL if link-local address is tentative. */ mh = m_gethdr(M_NOWAIT, MT_DATA); if (mh == NULL) { if (ia != NULL) ifa_free(&ia->ia_ifa); return (ENOMEM); } md = m_get(M_NOWAIT, MT_DATA); if (md == NULL) { m_free(mh); if (ia != NULL) ifa_free(&ia->ia_ifa); return (ENOMEM); } mh->m_next = md; /* * FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so * that ether_output() does not need to allocate another mbuf * for the header in the most common case. */ M_ALIGN(mh, sizeof(struct ip6_hdr)); mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr); mh->m_len = sizeof(struct ip6_hdr); ip6 = mtod(mh, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any; ip6->ip6_dst = in6m->in6m_addr; md->m_len = sizeof(struct mld_hdr); mld = mtod(md, struct mld_hdr *); mld->mld_type = type; mld->mld_code = 0; mld->mld_cksum = 0; mld->mld_maxdelay = 0; mld->mld_reserved = 0; mld->mld_addr = in6m->in6m_addr; in6_clearscope(&mld->mld_addr); mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), sizeof(struct mld_hdr)); mld_save_context(mh, ifp); mh->m_flags |= M_MLDV1; mld_dispatch_packet(mh); if (ia != NULL) ifa_free(&ia->ia_ifa); return (0); } /* * Process a state change from the upper layer for the given IPv6 group. * * Each socket holds a reference on the in_multi in its own ip_moptions. * The socket layer will have made the necessary updates to.the group * state, it is now up to MLD to issue a state change report if there * has been any change between T0 (when the last state-change was issued) * and T1 (now). * * We use the MLDv2 state machine at group level. The MLd module * however makes the decision as to which MLD protocol version to speak. * A state change *from* INCLUDE {} always means an initial join. * A state change *to* INCLUDE {} always means a final leave. * * If delay is non-zero, and the state change is an initial multicast * join, the state change report will be delayed by 'delay' ticks * in units of PR_FASTHZ if MLDv1 is active on the link; otherwise * the initial MLDv2 state change report will be delayed by whichever * is sooner, a pending state-change timer or delay itself. * * VIMAGE: curvnet should have been set by caller, as this routine * is called from the socket option handlers. */ int mld_change_state(struct in6_multi *inm, const int delay) { struct mld_ifsoftc *mli; struct ifnet *ifp; int error; IN6_MULTI_LIST_LOCK_ASSERT(); error = 0; /* * Check if the in6_multi has already been disconnected. */ if (inm->in6m_ifp == NULL) { CTR1(KTR_MLD, "%s: inm is disconnected", __func__); return (0); } /* * Try to detect if the upper layer just asked us to change state * for an interface which has now gone away. */ KASSERT(inm->in6m_ifma != NULL, ("%s: no ifma", __func__)); ifp = inm->in6m_ifma->ifma_ifp; if (ifp == NULL) return (0); /* * Sanity check that netinet6's notion of ifp is the * same as net's. */ KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__)); MLD_LOCK(); mli = MLD_IFINFO(ifp); KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); /* * If we detect a state transition to or from MCAST_UNDEFINED * for this group, then we are starting or finishing an MLD * life cycle for this group. */ if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) { CTR3(KTR_MLD, "%s: inm transition %d -> %d", __func__, inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode); if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) { CTR1(KTR_MLD, "%s: initial join", __func__); error = mld_initial_join(inm, mli, delay); goto out_locked; } else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) { CTR1(KTR_MLD, "%s: final leave", __func__); mld_final_leave(inm, mli); goto out_locked; } } else { CTR1(KTR_MLD, "%s: filter set change", __func__); } error = mld_handle_state_change(inm, mli); out_locked: MLD_UNLOCK(); return (error); } /* * Perform the initial join for an MLD group. * * When joining a group: * If the group should have its MLD traffic suppressed, do nothing. * MLDv1 starts sending MLDv1 host membership reports. * MLDv2 will schedule an MLDv2 state-change report containing the * initial state of the membership. * * If the delay argument is non-zero, then we must delay sending the * initial state change for delay ticks (in units of PR_FASTHZ). */ static int mld_initial_join(struct in6_multi *inm, struct mld_ifsoftc *mli, const int delay) { struct ifnet *ifp; struct mbufq *mq; int error, retval, syncstates; int odelay; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif CTR4(KTR_MLD, "%s: initial join %s on ifp %p(%s)", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), inm->in6m_ifp, if_name(inm->in6m_ifp)); error = 0; syncstates = 1; ifp = inm->in6m_ifp; IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); KASSERT(mli && mli->mli_ifp == ifp, ("%s: inconsistent ifp", __func__)); /* * Groups joined on loopback or marked as 'not reported', * enter the MLD_SILENT_MEMBER state and * are never reported in any protocol exchanges. * All other groups enter the appropriate state machine * for the version in use on this link. * A link marked as MLIF_SILENT causes MLD to be completely * disabled for the link. */ if ((ifp->if_flags & IFF_LOOPBACK) || (mli->mli_flags & MLIF_SILENT) || !mld_is_addr_reported(&inm->in6m_addr)) { CTR1(KTR_MLD, "%s: not kicking state machine for silent group", __func__); inm->in6m_state = MLD_SILENT_MEMBER; inm->in6m_timer = 0; } else { /* * Deal with overlapping in_multi lifecycle. * If this group was LEAVING, then make sure * we drop the reference we picked up to keep the * group around for the final INCLUDE {} enqueue. */ if (mli->mli_version == MLD_VERSION_2 && inm->in6m_state == MLD_LEAVING_MEMBER) { inm->in6m_refcount--; MPASS(inm->in6m_refcount > 0); } inm->in6m_state = MLD_REPORTING_MEMBER; switch (mli->mli_version) { case MLD_VERSION_1: /* * If a delay was provided, only use it if * it is greater than the delay normally * used for an MLDv1 state change report, * and delay sending the initial MLDv1 report * by not transitioning to the IDLE state. */ odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI * PR_FASTHZ); if (delay) { inm->in6m_timer = max(delay, odelay); V_current_state_timers_running6 = 1; } else { inm->in6m_state = MLD_IDLE_MEMBER; error = mld_v1_transmit_report(inm, MLD_LISTENER_REPORT); if (error == 0) { inm->in6m_timer = odelay; V_current_state_timers_running6 = 1; } } break; case MLD_VERSION_2: /* * Defer update of T0 to T1, until the first copy * of the state change has been transmitted. */ syncstates = 0; /* * Immediately enqueue a State-Change Report for * this interface, freeing any previous reports. * Don't kick the timers if there is nothing to do, * or if an error occurred. */ mq = &inm->in6m_scq; mbufq_drain(mq); retval = mld_v2_enqueue_group_record(mq, inm, 1, 0, 0, (mli->mli_flags & MLIF_USEALLOW)); CTR2(KTR_MLD, "%s: enqueue record = %d", __func__, retval); if (retval <= 0) { error = retval * -1; break; } /* * Schedule transmission of pending state-change * report up to RV times for this link. The timer * will fire at the next mld_fasttimo (~200ms), * giving us an opportunity to merge the reports. * * If a delay was provided to this function, only * use this delay if sooner than the existing one. */ KASSERT(mli->mli_rv > 1, ("%s: invalid robustness %d", __func__, mli->mli_rv)); inm->in6m_scrv = mli->mli_rv; if (delay) { if (inm->in6m_sctimer > 1) { inm->in6m_sctimer = min(inm->in6m_sctimer, delay); } else inm->in6m_sctimer = delay; } else inm->in6m_sctimer = 1; V_state_change_timers_running6 = 1; error = 0; break; } } /* * Only update the T0 state if state change is atomic, * i.e. we don't need to wait for a timer to fire before we * can consider the state change to have been communicated. */ if (syncstates) { in6m_commit(inm); CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), if_name(inm->in6m_ifp)); } return (error); } /* * Issue an intermediate state change during the life-cycle. */ static int mld_handle_state_change(struct in6_multi *inm, struct mld_ifsoftc *mli) { struct ifnet *ifp; int retval; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif CTR4(KTR_MLD, "%s: state change for %s on ifp %p(%s)", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), inm->in6m_ifp, if_name(inm->in6m_ifp)); ifp = inm->in6m_ifp; IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); KASSERT(mli && mli->mli_ifp == ifp, ("%s: inconsistent ifp", __func__)); if ((ifp->if_flags & IFF_LOOPBACK) || (mli->mli_flags & MLIF_SILENT) || !mld_is_addr_reported(&inm->in6m_addr) || (mli->mli_version != MLD_VERSION_2)) { if (!mld_is_addr_reported(&inm->in6m_addr)) { CTR1(KTR_MLD, "%s: not kicking state machine for silent group", __func__); } CTR1(KTR_MLD, "%s: nothing to do", __func__); in6m_commit(inm); CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), if_name(inm->in6m_ifp)); return (0); } mbufq_drain(&inm->in6m_scq); retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0, (mli->mli_flags & MLIF_USEALLOW)); CTR2(KTR_MLD, "%s: enqueue record = %d", __func__, retval); if (retval <= 0) return (-retval); /* * If record(s) were enqueued, start the state-change * report timer for this group. */ inm->in6m_scrv = mli->mli_rv; inm->in6m_sctimer = 1; V_state_change_timers_running6 = 1; return (0); } /* * Perform the final leave for a multicast address. * * When leaving a group: * MLDv1 sends a DONE message, if and only if we are the reporter. * MLDv2 enqueues a state-change report containing a transition * to INCLUDE {} for immediate transmission. */ static void mld_final_leave(struct in6_multi *inm, struct mld_ifsoftc *mli) { int syncstates; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif syncstates = 1; CTR4(KTR_MLD, "%s: final leave %s on ifp %p(%s)", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), inm->in6m_ifp, if_name(inm->in6m_ifp)); IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); switch (inm->in6m_state) { case MLD_NOT_MEMBER: case MLD_SILENT_MEMBER: case MLD_LEAVING_MEMBER: /* Already leaving or left; do nothing. */ CTR1(KTR_MLD, "%s: not kicking state machine for silent group", __func__); break; case MLD_REPORTING_MEMBER: case MLD_IDLE_MEMBER: case MLD_G_QUERY_PENDING_MEMBER: case MLD_SG_QUERY_PENDING_MEMBER: if (mli->mli_version == MLD_VERSION_1) { #ifdef INVARIANTS if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER || inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) panic("%s: MLDv2 state reached, not MLDv2 mode", __func__); #endif mld_v1_transmit_report(inm, MLD_LISTENER_DONE); inm->in6m_state = MLD_NOT_MEMBER; V_current_state_timers_running6 = 1; } else if (mli->mli_version == MLD_VERSION_2) { /* * Stop group timer and all pending reports. * Immediately enqueue a state-change report * TO_IN {} to be sent on the next fast timeout, * giving us an opportunity to merge reports. */ mbufq_drain(&inm->in6m_scq); inm->in6m_timer = 0; inm->in6m_scrv = mli->mli_rv; CTR4(KTR_MLD, "%s: Leaving %s/%s with %d " "pending retransmissions.", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), if_name(inm->in6m_ifp), inm->in6m_scrv); if (inm->in6m_scrv == 0) { inm->in6m_state = MLD_NOT_MEMBER; inm->in6m_sctimer = 0; } else { int retval; in6m_acquire_locked(inm); retval = mld_v2_enqueue_group_record( &inm->in6m_scq, inm, 1, 0, 0, (mli->mli_flags & MLIF_USEALLOW)); KASSERT(retval != 0, ("%s: enqueue record = %d", __func__, retval)); inm->in6m_state = MLD_LEAVING_MEMBER; inm->in6m_sctimer = 1; V_state_change_timers_running6 = 1; syncstates = 0; } break; } break; case MLD_LAZY_MEMBER: case MLD_SLEEPING_MEMBER: case MLD_AWAKENING_MEMBER: /* Our reports are suppressed; do nothing. */ break; } if (syncstates) { in6m_commit(inm); CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), if_name(inm->in6m_ifp)); inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED; CTR3(KTR_MLD, "%s: T1 now MCAST_UNDEFINED for %p/%s", __func__, &inm->in6m_addr, if_name(inm->in6m_ifp)); } } /* * Enqueue an MLDv2 group record to the given output queue. * * If is_state_change is zero, a current-state record is appended. * If is_state_change is non-zero, a state-change report is appended. * * If is_group_query is non-zero, an mbuf packet chain is allocated. * If is_group_query is zero, and if there is a packet with free space * at the tail of the queue, it will be appended to providing there * is enough free space. * Otherwise a new mbuf packet chain is allocated. * * If is_source_query is non-zero, each source is checked to see if * it was recorded for a Group-Source query, and will be omitted if * it is not both in-mode and recorded. * * If use_block_allow is non-zero, state change reports for initial join * and final leave, on an inclusive mode group with a source list, will be * rewritten to use the ALLOW_NEW and BLOCK_OLD record types, respectively. * * The function will attempt to allocate leading space in the packet * for the IPv6+ICMP headers to be prepended without fragmenting the chain. * * If successful the size of all data appended to the queue is returned, * otherwise an error code less than zero is returned, or zero if * no record(s) were appended. */ static int mld_v2_enqueue_group_record(struct mbufq *mq, struct in6_multi *inm, const int is_state_change, const int is_group_query, const int is_source_query, const int use_block_allow) { struct mldv2_record mr; struct mldv2_record *pmr; struct ifnet *ifp; struct ip6_msource *ims, *nims; struct mbuf *m0, *m, *md; int is_filter_list_change; int minrec0len, m0srcs, msrcs, nbytes, off; int record_has_sources; int now; int type; uint8_t mode; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif IN6_MULTI_LIST_LOCK_ASSERT(); ifp = inm->in6m_ifp; is_filter_list_change = 0; m = NULL; m0 = NULL; m0srcs = 0; msrcs = 0; nbytes = 0; nims = NULL; record_has_sources = 1; pmr = NULL; type = MLD_DO_NOTHING; mode = inm->in6m_st[1].iss_fmode; /* * If we did not transition out of ASM mode during t0->t1, * and there are no source nodes to process, we can skip * the generation of source records. */ if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 && inm->in6m_nsrc == 0) record_has_sources = 0; if (is_state_change) { /* * Queue a state change record. * If the mode did not change, and there are non-ASM * listeners or source filters present, * we potentially need to issue two records for the group. * If there are ASM listeners, and there was no filter * mode transition of any kind, do nothing. * * If we are transitioning to MCAST_UNDEFINED, we need * not send any sources. A transition to/from this state is * considered inclusive with some special treatment. * * If we are rewriting initial joins/leaves to use * ALLOW/BLOCK, and the group's membership is inclusive, * we need to send sources in all cases. */ if (mode != inm->in6m_st[0].iss_fmode) { if (mode == MCAST_EXCLUDE) { CTR1(KTR_MLD, "%s: change to EXCLUDE", __func__); type = MLD_CHANGE_TO_EXCLUDE_MODE; } else { CTR1(KTR_MLD, "%s: change to INCLUDE", __func__); if (use_block_allow) { /* * XXX * Here we're interested in state * edges either direction between * MCAST_UNDEFINED and MCAST_INCLUDE. * Perhaps we should just check * the group state, rather than * the filter mode. */ if (mode == MCAST_UNDEFINED) { type = MLD_BLOCK_OLD_SOURCES; } else { type = MLD_ALLOW_NEW_SOURCES; } } else { type = MLD_CHANGE_TO_INCLUDE_MODE; if (mode == MCAST_UNDEFINED) record_has_sources = 0; } } } else { if (record_has_sources) { is_filter_list_change = 1; } else { type = MLD_DO_NOTHING; } } } else { /* * Queue a current state record. */ if (mode == MCAST_EXCLUDE) { type = MLD_MODE_IS_EXCLUDE; } else if (mode == MCAST_INCLUDE) { type = MLD_MODE_IS_INCLUDE; KASSERT(inm->in6m_st[1].iss_asm == 0, ("%s: inm %p is INCLUDE but ASM count is %d", __func__, inm, inm->in6m_st[1].iss_asm)); } } /* * Generate the filter list changes using a separate function. */ if (is_filter_list_change) return (mld_v2_enqueue_filter_change(mq, inm)); if (type == MLD_DO_NOTHING) { CTR3(KTR_MLD, "%s: nothing to do for %s/%s", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), if_name(inm->in6m_ifp)); return (0); } /* * If any sources are present, we must be able to fit at least * one in the trailing space of the tail packet's mbuf, * ideally more. */ minrec0len = sizeof(struct mldv2_record); if (record_has_sources) minrec0len += sizeof(struct in6_addr); CTR4(KTR_MLD, "%s: queueing %s for %s/%s", __func__, mld_rec_type_to_str(type), ip6_sprintf(ip6tbuf, &inm->in6m_addr), if_name(inm->in6m_ifp)); /* * Check if we have a packet in the tail of the queue for this * group into which the first group record for this group will fit. * Otherwise allocate a new packet. * Always allocate leading space for IP6+RA+ICMPV6+REPORT. * Note: Group records for G/GSR query responses MUST be sent * in their own packet. */ m0 = mbufq_last(mq); if (!is_group_query && m0 != NULL && (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) && (m0->m_pkthdr.len + minrec0len) < (ifp->if_mtu - MLD_MTUSPACE)) { m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - sizeof(struct mldv2_record)) / sizeof(struct in6_addr); m = m0; CTR1(KTR_MLD, "%s: use existing packet", __func__); } else { if (mbufq_full(mq)) { CTR1(KTR_MLD, "%s: outbound queue full", __func__); return (-ENOMEM); } m = NULL; m0srcs = (ifp->if_mtu - MLD_MTUSPACE - sizeof(struct mldv2_record)) / sizeof(struct in6_addr); if (!is_state_change && !is_group_query) m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return (-ENOMEM); mld_save_context(m, ifp); CTR1(KTR_MLD, "%s: allocated first packet", __func__); } /* * Append group record. * If we have sources, we don't know how many yet. */ mr.mr_type = type; mr.mr_datalen = 0; mr.mr_numsrc = 0; mr.mr_addr = inm->in6m_addr; in6_clearscope(&mr.mr_addr); if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) { if (m != m0) m_freem(m); CTR1(KTR_MLD, "%s: m_append() failed.", __func__); return (-ENOMEM); } nbytes += sizeof(struct mldv2_record); /* * Append as many sources as will fit in the first packet. * If we are appending to a new packet, the chain allocation * may potentially use clusters; use m_getptr() in this case. * If we are appending to an existing packet, we need to obtain * a pointer to the group record after m_append(), in case a new * mbuf was allocated. * * Only append sources which are in-mode at t1. If we are * transitioning to MCAST_UNDEFINED state on the group, and * use_block_allow is zero, do not include source entries. * Otherwise, we need to include this source in the report. * * Only report recorded sources in our filter set when responding * to a group-source query. */ if (record_has_sources) { if (m == m0) { md = m_last(m); pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + md->m_len - nbytes); } else { md = m_getptr(m, 0, &off); pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off); } msrcs = 0; RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, nims) { CTR2(KTR_MLD, "%s: visit node %s", __func__, ip6_sprintf(ip6tbuf, &ims->im6s_addr)); now = im6s_get_mode(inm, ims, 1); CTR2(KTR_MLD, "%s: node is %d", __func__, now); if ((now != mode) || (now == mode && (!use_block_allow && mode == MCAST_UNDEFINED))) { CTR1(KTR_MLD, "%s: skip node", __func__); continue; } if (is_source_query && ims->im6s_stp == 0) { CTR1(KTR_MLD, "%s: skip unrecorded node", __func__); continue; } CTR1(KTR_MLD, "%s: append node", __func__); if (!m_append(m, sizeof(struct in6_addr), (void *)&ims->im6s_addr)) { if (m != m0) m_freem(m); CTR1(KTR_MLD, "%s: m_append() failed.", __func__); return (-ENOMEM); } nbytes += sizeof(struct in6_addr); ++msrcs; if (msrcs == m0srcs) break; } CTR2(KTR_MLD, "%s: msrcs is %d this packet", __func__, msrcs); pmr->mr_numsrc = htons(msrcs); nbytes += (msrcs * sizeof(struct in6_addr)); } if (is_source_query && msrcs == 0) { CTR1(KTR_MLD, "%s: no recorded sources to report", __func__); if (m != m0) m_freem(m); return (0); } /* * We are good to go with first packet. */ if (m != m0) { CTR1(KTR_MLD, "%s: enqueueing first packet", __func__); m->m_pkthdr.PH_vt.vt_nrecs = 1; mbufq_enqueue(mq, m); } else m->m_pkthdr.PH_vt.vt_nrecs++; /* * No further work needed if no source list in packet(s). */ if (!record_has_sources) return (nbytes); /* * Whilst sources remain to be announced, we need to allocate * a new packet and fill out as many sources as will fit. * Always try for a cluster first. */ while (nims != NULL) { if (mbufq_full(mq)) { CTR1(KTR_MLD, "%s: outbound queue full", __func__); return (-ENOMEM); } m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return (-ENOMEM); mld_save_context(m, ifp); md = m_getptr(m, 0, &off); pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off); CTR1(KTR_MLD, "%s: allocated next packet", __func__); if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) { if (m != m0) m_freem(m); CTR1(KTR_MLD, "%s: m_append() failed.", __func__); return (-ENOMEM); } m->m_pkthdr.PH_vt.vt_nrecs = 1; nbytes += sizeof(struct mldv2_record); m0srcs = (ifp->if_mtu - MLD_MTUSPACE - sizeof(struct mldv2_record)) / sizeof(struct in6_addr); msrcs = 0; RB_FOREACH_FROM(ims, ip6_msource_tree, nims) { CTR2(KTR_MLD, "%s: visit node %s", __func__, ip6_sprintf(ip6tbuf, &ims->im6s_addr)); now = im6s_get_mode(inm, ims, 1); if ((now != mode) || (now == mode && (!use_block_allow && mode == MCAST_UNDEFINED))) { CTR1(KTR_MLD, "%s: skip node", __func__); continue; } if (is_source_query && ims->im6s_stp == 0) { CTR1(KTR_MLD, "%s: skip unrecorded node", __func__); continue; } CTR1(KTR_MLD, "%s: append node", __func__); if (!m_append(m, sizeof(struct in6_addr), (void *)&ims->im6s_addr)) { if (m != m0) m_freem(m); CTR1(KTR_MLD, "%s: m_append() failed.", __func__); return (-ENOMEM); } ++msrcs; if (msrcs == m0srcs) break; } pmr->mr_numsrc = htons(msrcs); nbytes += (msrcs * sizeof(struct in6_addr)); CTR1(KTR_MLD, "%s: enqueueing next packet", __func__); mbufq_enqueue(mq, m); } return (nbytes); } /* * Type used to mark record pass completion. * We exploit the fact we can cast to this easily from the * current filter modes on each ip_msource node. */ typedef enum { REC_NONE = 0x00, /* MCAST_UNDEFINED */ REC_ALLOW = 0x01, /* MCAST_INCLUDE */ REC_BLOCK = 0x02, /* MCAST_EXCLUDE */ REC_FULL = REC_ALLOW | REC_BLOCK } rectype_t; /* * Enqueue an MLDv2 filter list change to the given output queue. * * Source list filter state is held in an RB-tree. When the filter list * for a group is changed without changing its mode, we need to compute * the deltas between T0 and T1 for each source in the filter set, * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records. * * As we may potentially queue two record types, and the entire R-B tree * needs to be walked at once, we break this out into its own function * so we can generate a tightly packed queue of packets. * * XXX This could be written to only use one tree walk, although that makes * serializing into the mbuf chains a bit harder. For now we do two walks * which makes things easier on us, and it may or may not be harder on * the L2 cache. * * If successful the size of all data appended to the queue is returned, * otherwise an error code less than zero is returned, or zero if * no record(s) were appended. */ static int mld_v2_enqueue_filter_change(struct mbufq *mq, struct in6_multi *inm) { static const int MINRECLEN = sizeof(struct mldv2_record) + sizeof(struct in6_addr); struct ifnet *ifp; struct mldv2_record mr; struct mldv2_record *pmr; struct ip6_msource *ims, *nims; struct mbuf *m, *m0, *md; int m0srcs, nbytes, npbytes, off, rsrcs, schanged; int nallow, nblock; uint8_t mode, now, then; rectype_t crt, drt, nrt; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif IN6_MULTI_LIST_LOCK_ASSERT(); if (inm->in6m_nsrc == 0 || (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0)) return (0); ifp = inm->in6m_ifp; /* interface */ mode = inm->in6m_st[1].iss_fmode; /* filter mode at t1 */ crt = REC_NONE; /* current group record type */ drt = REC_NONE; /* mask of completed group record types */ nrt = REC_NONE; /* record type for current node */ m0srcs = 0; /* # source which will fit in current mbuf chain */ npbytes = 0; /* # of bytes appended this packet */ nbytes = 0; /* # of bytes appended to group's state-change queue */ rsrcs = 0; /* # sources encoded in current record */ schanged = 0; /* # nodes encoded in overall filter change */ nallow = 0; /* # of source entries in ALLOW_NEW */ nblock = 0; /* # of source entries in BLOCK_OLD */ nims = NULL; /* next tree node pointer */ /* * For each possible filter record mode. * The first kind of source we encounter tells us which * is the first kind of record we start appending. * If a node transitioned to UNDEFINED at t1, its mode is treated * as the inverse of the group's filter mode. */ while (drt != REC_FULL) { do { m0 = mbufq_last(mq); if (m0 != NULL && (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) && (m0->m_pkthdr.len + MINRECLEN) < (ifp->if_mtu - MLD_MTUSPACE)) { m = m0; m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - sizeof(struct mldv2_record)) / sizeof(struct in6_addr); CTR1(KTR_MLD, "%s: use previous packet", __func__); } else { m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { CTR1(KTR_MLD, "%s: m_get*() failed", __func__); return (-ENOMEM); } m->m_pkthdr.PH_vt.vt_nrecs = 0; mld_save_context(m, ifp); m0srcs = (ifp->if_mtu - MLD_MTUSPACE - sizeof(struct mldv2_record)) / sizeof(struct in6_addr); npbytes = 0; CTR1(KTR_MLD, "%s: allocated new packet", __func__); } /* * Append the MLD group record header to the * current packet's data area. * Recalculate pointer to free space for next * group record, in case m_append() allocated * a new mbuf or cluster. */ memset(&mr, 0, sizeof(mr)); mr.mr_addr = inm->in6m_addr; in6_clearscope(&mr.mr_addr); if (!m_append(m, sizeof(mr), (void *)&mr)) { if (m != m0) m_freem(m); CTR1(KTR_MLD, "%s: m_append() failed", __func__); return (-ENOMEM); } npbytes += sizeof(struct mldv2_record); if (m != m0) { /* new packet; offset in chain */ md = m_getptr(m, npbytes - sizeof(struct mldv2_record), &off); pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off); } else { /* current packet; offset from last append */ md = m_last(m); pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + md->m_len - sizeof(struct mldv2_record)); } /* * Begin walking the tree for this record type * pass, or continue from where we left off * previously if we had to allocate a new packet. * Only report deltas in-mode at t1. * We need not report included sources as allowed * if we are in inclusive mode on the group, * however the converse is not true. */ rsrcs = 0; if (nims == NULL) { nims = RB_MIN(ip6_msource_tree, &inm->in6m_srcs); } RB_FOREACH_FROM(ims, ip6_msource_tree, nims) { CTR2(KTR_MLD, "%s: visit node %s", __func__, ip6_sprintf(ip6tbuf, &ims->im6s_addr)); now = im6s_get_mode(inm, ims, 1); then = im6s_get_mode(inm, ims, 0); CTR3(KTR_MLD, "%s: mode: t0 %d, t1 %d", __func__, then, now); if (now == then) { CTR1(KTR_MLD, "%s: skip unchanged", __func__); continue; } if (mode == MCAST_EXCLUDE && now == MCAST_INCLUDE) { CTR1(KTR_MLD, "%s: skip IN src on EX group", __func__); continue; } nrt = (rectype_t)now; if (nrt == REC_NONE) nrt = (rectype_t)(~mode & REC_FULL); if (schanged++ == 0) { crt = nrt; } else if (crt != nrt) continue; if (!m_append(m, sizeof(struct in6_addr), (void *)&ims->im6s_addr)) { if (m != m0) m_freem(m); CTR1(KTR_MLD, "%s: m_append() failed", __func__); return (-ENOMEM); } nallow += !!(crt == REC_ALLOW); nblock += !!(crt == REC_BLOCK); if (++rsrcs == m0srcs) break; } /* * If we did not append any tree nodes on this * pass, back out of allocations. */ if (rsrcs == 0) { npbytes -= sizeof(struct mldv2_record); if (m != m0) { CTR1(KTR_MLD, "%s: m_free(m)", __func__); m_freem(m); } else { CTR1(KTR_MLD, "%s: m_adj(m, -mr)", __func__); m_adj(m, -((int)sizeof( struct mldv2_record))); } continue; } npbytes += (rsrcs * sizeof(struct in6_addr)); if (crt == REC_ALLOW) pmr->mr_type = MLD_ALLOW_NEW_SOURCES; else if (crt == REC_BLOCK) pmr->mr_type = MLD_BLOCK_OLD_SOURCES; pmr->mr_numsrc = htons(rsrcs); /* * Count the new group record, and enqueue this * packet if it wasn't already queued. */ m->m_pkthdr.PH_vt.vt_nrecs++; if (m != m0) mbufq_enqueue(mq, m); nbytes += npbytes; } while (nims != NULL); drt |= crt; crt = (~crt & REC_FULL); } CTR3(KTR_MLD, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__, nallow, nblock); return (nbytes); } static int mld_v2_merge_state_changes(struct in6_multi *inm, struct mbufq *scq) { struct mbufq *gq; struct mbuf *m; /* pending state-change */ struct mbuf *m0; /* copy of pending state-change */ struct mbuf *mt; /* last state-change in packet */ int docopy, domerge; u_int recslen; docopy = 0; domerge = 0; recslen = 0; IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); /* * If there are further pending retransmissions, make a writable * copy of each queued state-change message before merging. */ if (inm->in6m_scrv > 0) docopy = 1; gq = &inm->in6m_scq; #ifdef KTR if (mbufq_first(gq) == NULL) { CTR2(KTR_MLD, "%s: WARNING: queue for inm %p is empty", __func__, inm); } #endif m = mbufq_first(gq); while (m != NULL) { /* * Only merge the report into the current packet if * there is sufficient space to do so; an MLDv2 report * packet may only contain 65,535 group records. * Always use a simple mbuf chain concatentation to do this, * as large state changes for single groups may have * allocated clusters. */ domerge = 0; mt = mbufq_last(scq); if (mt != NULL) { recslen = m_length(m, NULL); if ((mt->m_pkthdr.PH_vt.vt_nrecs + m->m_pkthdr.PH_vt.vt_nrecs <= MLD_V2_REPORT_MAXRECS) && (mt->m_pkthdr.len + recslen <= (inm->in6m_ifp->if_mtu - MLD_MTUSPACE))) domerge = 1; } if (!domerge && mbufq_full(gq)) { CTR2(KTR_MLD, "%s: outbound queue full, skipping whole packet %p", __func__, m); mt = m->m_nextpkt; if (!docopy) m_freem(m); m = mt; continue; } if (!docopy) { CTR2(KTR_MLD, "%s: dequeueing %p", __func__, m); m0 = mbufq_dequeue(gq); m = m0->m_nextpkt; } else { CTR2(KTR_MLD, "%s: copying %p", __func__, m); m0 = m_dup(m, M_NOWAIT); if (m0 == NULL) return (ENOMEM); m0->m_nextpkt = NULL; m = m->m_nextpkt; } if (!domerge) { CTR3(KTR_MLD, "%s: queueing %p to scq %p)", __func__, m0, scq); mbufq_enqueue(scq, m0); } else { struct mbuf *mtl; /* last mbuf of packet mt */ CTR3(KTR_MLD, "%s: merging %p with ifscq tail %p)", __func__, m0, mt); mtl = m_last(mt); m0->m_flags &= ~M_PKTHDR; mt->m_pkthdr.len += recslen; mt->m_pkthdr.PH_vt.vt_nrecs += m0->m_pkthdr.PH_vt.vt_nrecs; mtl->m_next = m0; } } return (0); } /* * Respond to a pending MLDv2 General Query. */ static void mld_v2_dispatch_general_query(struct mld_ifsoftc *mli) { struct ifmultiaddr *ifma; struct ifnet *ifp; struct in6_multi *inm; int retval; IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); KASSERT(mli->mli_version == MLD_VERSION_2, ("%s: called when version %d", __func__, mli->mli_version)); /* * Check that there are some packets queued. If so, send them first. * For large number of groups the reply to general query can take * many packets, we should finish sending them before starting of * queuing the new reply. */ if (mbufq_len(&mli->mli_gq) != 0) goto send; ifp = mli->mli_ifp; IF_ADDR_RLOCK(ifp); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { inm = in6m_ifmultiaddr_get_inm(ifma); if (inm == NULL) continue; KASSERT(ifp == inm->in6m_ifp, ("%s: inconsistent ifp", __func__)); switch (inm->in6m_state) { case MLD_NOT_MEMBER: case MLD_SILENT_MEMBER: break; case MLD_REPORTING_MEMBER: case MLD_IDLE_MEMBER: case MLD_LAZY_MEMBER: case MLD_SLEEPING_MEMBER: case MLD_AWAKENING_MEMBER: inm->in6m_state = MLD_REPORTING_MEMBER; retval = mld_v2_enqueue_group_record(&mli->mli_gq, inm, 0, 0, 0, 0); CTR2(KTR_MLD, "%s: enqueue record = %d", __func__, retval); break; case MLD_G_QUERY_PENDING_MEMBER: case MLD_SG_QUERY_PENDING_MEMBER: case MLD_LEAVING_MEMBER: break; } } IF_ADDR_RUNLOCK(ifp); send: mld_dispatch_queue(&mli->mli_gq, MLD_MAX_RESPONSE_BURST); /* * Slew transmission of bursts over 500ms intervals. */ if (mbufq_first(&mli->mli_gq) != NULL) { mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY( MLD_RESPONSE_BURST_INTERVAL); V_interface_timers_running6 = 1; } } /* * Transmit the next pending message in the output queue. * * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis. * MRT: Nothing needs to be done, as MLD traffic is always local to * a link and uses a link-scope multicast address. */ static void mld_dispatch_packet(struct mbuf *m) { struct ip6_moptions im6o; struct ifnet *ifp; struct ifnet *oifp; struct mbuf *m0; struct mbuf *md; struct ip6_hdr *ip6; struct mld_hdr *mld; int error; int off; int type; uint32_t ifindex; CTR2(KTR_MLD, "%s: transmit %p", __func__, m); /* * Set VNET image pointer from enqueued mbuf chain * before doing anything else. Whilst we use interface * indexes to guard against interface detach, they are * unique to each VIMAGE and must be retrieved. */ ifindex = mld_restore_context(m); /* * Check if the ifnet still exists. This limits the scope of * any race in the absence of a global ifp lock for low cost * (an array lookup). */ ifp = ifnet_byindex(ifindex); if (ifp == NULL) { CTR3(KTR_MLD, "%s: dropped %p as ifindex %u went away.", __func__, m, ifindex); m_freem(m); IP6STAT_INC(ip6s_noroute); goto out; } im6o.im6o_multicast_hlim = 1; im6o.im6o_multicast_loop = (V_ip6_mrouter != NULL); im6o.im6o_multicast_ifp = ifp; if (m->m_flags & M_MLDV1) { m0 = m; } else { m0 = mld_v2_encap_report(ifp, m); if (m0 == NULL) { CTR2(KTR_MLD, "%s: dropped %p", __func__, m); IP6STAT_INC(ip6s_odropped); goto out; } } mld_scrub_context(m0); m_clrprotoflags(m); m0->m_pkthdr.rcvif = V_loif; ip6 = mtod(m0, struct ip6_hdr *); #if 0 (void)in6_setscope(&ip6->ip6_dst, ifp, NULL); /* XXX LOR */ #else /* * XXX XXX Break some KPI rules to prevent an LOR which would * occur if we called in6_setscope() at transmission. * See comments at top of file. */ MLD_EMBEDSCOPE(&ip6->ip6_dst, ifp->if_index); #endif /* * Retrieve the ICMPv6 type before handoff to ip6_output(), * so we can bump the stats. */ md = m_getptr(m0, sizeof(struct ip6_hdr), &off); mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off); type = mld->mld_type; error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, &im6o, &oifp, NULL); if (error) { CTR3(KTR_MLD, "%s: ip6_output(%p) = %d", __func__, m0, error); goto out; } ICMP6STAT_INC(icp6s_outhist[type]); if (oifp != NULL) { icmp6_ifstat_inc(oifp, ifs6_out_msg); switch (type) { case MLD_LISTENER_REPORT: case MLDV2_LISTENER_REPORT: icmp6_ifstat_inc(oifp, ifs6_out_mldreport); break; case MLD_LISTENER_DONE: icmp6_ifstat_inc(oifp, ifs6_out_mlddone); break; } } out: return; } /* * Encapsulate an MLDv2 report. * * KAME IPv6 requires that hop-by-hop options be passed separately, * and that the IPv6 header be prepended in a separate mbuf. * * Returns a pointer to the new mbuf chain head, or NULL if the * allocation failed. */ static struct mbuf * mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m) { struct mbuf *mh; struct mldv2_report *mld; struct ip6_hdr *ip6; struct in6_ifaddr *ia; int mldreclen; KASSERT(ifp != NULL, ("%s: null ifp", __func__)); KASSERT((m->m_flags & M_PKTHDR), ("%s: mbuf chain %p is !M_PKTHDR", __func__, m)); /* * RFC3590: OK to send as :: or tentative during DAD. */ ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); if (ia == NULL) CTR1(KTR_MLD, "%s: warning: ia is NULL", __func__); mh = m_gethdr(M_NOWAIT, MT_DATA); if (mh == NULL) { if (ia != NULL) ifa_free(&ia->ia_ifa); m_freem(m); return (NULL); } M_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report)); mldreclen = m_length(m, NULL); CTR2(KTR_MLD, "%s: mldreclen is %d", __func__, mldreclen); mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report); mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report) + mldreclen; ip6 = mtod(mh, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any; if (ia != NULL) ifa_free(&ia->ia_ifa); ip6->ip6_dst = in6addr_linklocal_allv2routers; /* scope ID will be set in netisr */ mld = (struct mldv2_report *)(ip6 + 1); mld->mld_type = MLDV2_LISTENER_REPORT; mld->mld_code = 0; mld->mld_cksum = 0; mld->mld_v2_reserved = 0; mld->mld_v2_numrecs = htons(m->m_pkthdr.PH_vt.vt_nrecs); m->m_pkthdr.PH_vt.vt_nrecs = 0; mh->m_next = m; mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen); return (mh); } #ifdef KTR static char * mld_rec_type_to_str(const int type) { switch (type) { case MLD_CHANGE_TO_EXCLUDE_MODE: return "TO_EX"; break; case MLD_CHANGE_TO_INCLUDE_MODE: return "TO_IN"; break; case MLD_MODE_IS_EXCLUDE: return "MODE_EX"; break; case MLD_MODE_IS_INCLUDE: return "MODE_IN"; break; case MLD_ALLOW_NEW_SOURCES: return "ALLOW_NEW"; break; case MLD_BLOCK_OLD_SOURCES: return "BLOCK_OLD"; break; default: break; } return "unknown"; } #endif static void mld_init(void *unused __unused) { CTR1(KTR_MLD, "%s: initializing", __func__); MLD_LOCK_INIT(); ip6_initpktopts(&mld_po); mld_po.ip6po_hlim = 1; mld_po.ip6po_hbh = &mld_ra.hbh; mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER; mld_po.ip6po_flags = IP6PO_DONTFRAG; } SYSINIT(mld_init, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_init, NULL); static void mld_uninit(void *unused __unused) { CTR1(KTR_MLD, "%s: tearing down", __func__); MLD_LOCK_DESTROY(); } SYSUNINIT(mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_uninit, NULL); static void vnet_mld_init(const void *unused __unused) { CTR1(KTR_MLD, "%s: initializing", __func__); LIST_INIT(&V_mli_head); } VNET_SYSINIT(vnet_mld_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_init, NULL); static void vnet_mld_uninit(const void *unused __unused) { /* This can happen if we shutdown the network stack. */ CTR1(KTR_MLD, "%s: tearing down", __func__); } VNET_SYSUNINIT(vnet_mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_uninit, NULL); static int mld_modevent(module_t mod, int type, void *unused __unused) { switch (type) { case MOD_LOAD: case MOD_UNLOAD: break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t mld_mod = { "mld", mld_modevent, 0 }; DECLARE_MODULE(mld, mld_mod, SI_SUB_PROTO_MC, SI_ORDER_ANY);