Index: head/sys/net/if_stf.c
===================================================================
--- head/sys/net/if_stf.c	(revision 362899)
+++ head/sys/net/if_stf.c	(revision 362900)
@@ -1,759 +1,762 @@
 /*	$FreeBSD$	*/
 /*	$KAME: if_stf.c,v 1.73 2001/12/03 11:08:30 keiichi Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 2000 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * 6to4 interface, based on RFC3056.
  *
  * 6to4 interface is NOT capable of link-layer (I mean, IPv4) multicasting.
  * There is no address mapping defined from IPv6 multicast address to IPv4
  * address.  Therefore, we do not have IFF_MULTICAST on the interface.
  *
  * Due to the lack of address mapping for link-local addresses, we cannot
  * throw packets toward link-local addresses (fe80::x).  Also, we cannot throw
  * packets to link-local multicast addresses (ff02::x).
  *
  * Here are interesting symptoms due to the lack of link-local address:
  *
  * Unicast routing exchange:
  * - RIPng: Impossible.  Uses link-local multicast packet toward ff02::9,
  *   and link-local addresses as nexthop.
  * - OSPFv6: Impossible.  OSPFv6 assumes that there's link-local address
  *   assigned to the link, and makes use of them.  Also, HELLO packets use
  *   link-local multicast addresses (ff02::5 and ff02::6).
  * - BGP4+: Maybe.  You can only use global address as nexthop, and global
  *   address as TCP endpoint address.
  *
  * Multicast routing protocols:
  * - PIM: Hello packet cannot be used to discover adjacent PIM routers.
  *   Adjacent PIM routers must be configured manually (is it really spec-wise
  *   correct thing to do?).
  *
  * ICMPv6:
  * - Redirects cannot be used due to the lack of link-local address.
  *
  * stf interface does not have, and will not need, a link-local address.  
  * It seems to have no real benefit and does not help the above symptoms much.
  * Even if we assign link-locals to interface, we cannot really
  * use link-local unicast/multicast on top of 6to4 cloud (since there's no
  * encapsulation defined for link-local address), and the above analysis does
  * not change.  RFC3056 does not mandate the assignment of link-local address
  * either.
  *
  * 6to4 interface has security issues.  Refer to
  * http://playground.iijlab.net/i-d/draft-itojun-ipv6-transition-abuse-00.txt
  * for details.  The code tries to filter out some of malicious packets.
  * Note that there is no way to be 100% secure.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/rmlock.h>
 #include <sys/sysctl.h>
 #include <machine/cpu.h>
 
 #include <sys/malloc.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/route.h>
+#include <net/route/nhop.h>
 #include <net/netisr.h>
 #include <net/if_types.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_var.h>
 
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip_ecn.h>
 
 #include <netinet/ip_encap.h>
 
 #include <machine/stdarg.h>
 
 #include <net/bpf.h>
 
 #include <security/mac/mac_framework.h>
 
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_STF, stf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "6to4 Interface");
 
 static int stf_permit_rfc1918 = 0;
 SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RWTUN,
     &stf_permit_rfc1918, 0, "Permit the use of private IPv4 addresses");
 
 #define STFUNIT		0
 
 #define IN6_IS_ADDR_6TO4(x)	(ntohs((x)->s6_addr16[0]) == 0x2002)
 
 /*
  * XXX: Return a pointer with 16-bit aligned.  Don't cast it to
  * struct in_addr *; use bcopy() instead.
  */
 #define GET_V4(x)	(&(x)->s6_addr16[1])
 
 struct stf_softc {
 	struct ifnet	*sc_ifp;
 	u_int	sc_fibnum;
 	const struct encaptab *encap_cookie;
 };
 #define STF2IFP(sc)	((sc)->sc_ifp)
 
 static const char stfname[] = "stf";
 
 static MALLOC_DEFINE(M_STF, stfname, "6to4 Tunnel Interface");
 static const int ip_stf_ttl = 40;
 
 static int in_stf_input(struct mbuf *, int, int, void *);
 static char *stfnames[] = {"stf0", "stf", "6to4", NULL};
 
 static int stfmodevent(module_t, int, void *);
 static int stf_encapcheck(const struct mbuf *, int, int, void *);
 static int stf_getsrcifa6(struct ifnet *, struct in6_addr *, struct in6_addr *);
 static int stf_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
 	struct route *);
 static int isrfc1918addr(struct in_addr *);
 static int stf_checkaddr4(struct stf_softc *, struct in_addr *,
 	struct ifnet *);
 static int stf_checkaddr6(struct stf_softc *, struct in6_addr *,
 	struct ifnet *);
 static int stf_ioctl(struct ifnet *, u_long, caddr_t);
 
 static int stf_clone_match(struct if_clone *, const char *);
 static int stf_clone_create(struct if_clone *, char *, size_t, caddr_t);
 static int stf_clone_destroy(struct if_clone *, struct ifnet *);
 static struct if_clone *stf_cloner;
 
 static const struct encap_config ipv4_encap_cfg = {
 	.proto = IPPROTO_IPV6,
 	.min_length = sizeof(struct ip),
 	.exact_match = (sizeof(in_addr_t) << 3) + 8,
 	.check = stf_encapcheck,
 	.input = in_stf_input
 };
 
 static int
 stf_clone_match(struct if_clone *ifc, const char *name)
 {
 	int i;
 
 	for(i = 0; stfnames[i] != NULL; i++) {
 		if (strcmp(stfnames[i], name) == 0)
 			return (1);
 	}
 
 	return (0);
 }
 
 static int
 stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 {
 	char *dp;
 	int err, unit, wildcard;
 	struct stf_softc *sc;
 	struct ifnet *ifp;
 
 	err = ifc_name2unit(name, &unit);
 	if (err != 0)
 		return (err);
 	wildcard = (unit < 0);
 
 	/*
 	 * We can only have one unit, but since unit allocation is
 	 * already locked, we use it to keep from allocating extra
 	 * interfaces.
 	 */
 	unit = STFUNIT;
 	err = ifc_alloc_unit(ifc, &unit);
 	if (err != 0)
 		return (err);
 
 	sc = malloc(sizeof(struct stf_softc), M_STF, M_WAITOK | M_ZERO);
 	ifp = STF2IFP(sc) = if_alloc(IFT_STF);
 	if (ifp == NULL) {
 		free(sc, M_STF);
 		ifc_free_unit(ifc, unit);
 		return (ENOSPC);
 	}
 	ifp->if_softc = sc;
 	sc->sc_fibnum = curthread->td_proc->p_fibnum;
 
 	/*
 	 * Set the name manually rather then using if_initname because
 	 * we don't conform to the default naming convention for interfaces.
 	 * In the wildcard case, we need to update the name.
 	 */
 	if (wildcard) {
 		for (dp = name; *dp != '\0'; dp++);
 		if (snprintf(dp, len - (dp-name), "%d", unit) >
 		    len - (dp-name) - 1) {
 			/*
 			 * This can only be a programmer error and
 			 * there's no straightforward way to recover if
 			 * it happens.
 			 */
 			panic("if_clone_create(): interface name too long");
 		}
 	}
 	strlcpy(ifp->if_xname, name, IFNAMSIZ);
 	ifp->if_dname = stfname;
 	ifp->if_dunit = IF_DUNIT_NONE;
 
 	sc->encap_cookie = ip_encap_attach(&ipv4_encap_cfg, sc, M_WAITOK);
 	if (sc->encap_cookie == NULL) {
 		if_printf(ifp, "attach failed\n");
 		free(sc, M_STF);
 		ifc_free_unit(ifc, unit);
 		return (ENOMEM);
 	}
 
 	ifp->if_mtu    = IPV6_MMTU;
 	ifp->if_ioctl  = stf_ioctl;
 	ifp->if_output = stf_output;
 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
 	if_attach(ifp);
 	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
 	return (0);
 }
 
 static int
 stf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
 {
 	struct stf_softc *sc = ifp->if_softc;
 	int err __unused;
 
 	err = ip_encap_detach(sc->encap_cookie);
 	KASSERT(err == 0, ("Unexpected error detaching encap_cookie"));
 	bpfdetach(ifp);
 	if_detach(ifp);
 	if_free(ifp);
 
 	free(sc, M_STF);
 	ifc_free_unit(ifc, STFUNIT);
 
 	return (0);
 }
 
 static int
 stfmodevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		stf_cloner = if_clone_advanced(stfname, 0, stf_clone_match,
 		    stf_clone_create, stf_clone_destroy);
 		break;
 	case MOD_UNLOAD:
 		if_clone_detach(stf_cloner);
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 
 	return (0);
 }
 
 static moduledata_t stf_mod = {
 	"if_stf",
 	stfmodevent,
 	0
 };
 
 DECLARE_MODULE(if_stf, stf_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 
 static int
 stf_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
 	struct ip ip;
 	struct stf_softc *sc;
 	struct in_addr a, b, mask;
 	struct in6_addr addr6, mask6;
 
 	sc = (struct stf_softc *)arg;
 	if (sc == NULL)
 		return 0;
 
 	if ((STF2IFP(sc)->if_flags & IFF_UP) == 0)
 		return 0;
 
 	/* IFF_LINK0 means "no decapsulation" */
 	if ((STF2IFP(sc)->if_flags & IFF_LINK0) != 0)
 		return 0;
 
 	if (proto != IPPROTO_IPV6)
 		return 0;
 
 	m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
 
 	if (ip.ip_v != 4)
 		return 0;
 
 	if (stf_getsrcifa6(STF2IFP(sc), &addr6, &mask6) != 0)
 		return (0);
 
 	/*
 	 * check if IPv4 dst matches the IPv4 address derived from the
 	 * local 6to4 address.
 	 * success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:...
 	 */
 	if (bcmp(GET_V4(&addr6), &ip.ip_dst, sizeof(ip.ip_dst)) != 0)
 		return 0;
 
 	/*
 	 * check if IPv4 src matches the IPv4 address derived from the
 	 * local 6to4 address masked by prefixmask.
 	 * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24
 	 * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24
 	 */
 	bzero(&a, sizeof(a));
 	bcopy(GET_V4(&addr6), &a, sizeof(a));
 	bcopy(GET_V4(&mask6), &mask, sizeof(mask));
 	a.s_addr &= mask.s_addr;
 	b = ip.ip_src;
 	b.s_addr &= mask.s_addr;
 	if (a.s_addr != b.s_addr)
 		return 0;
 
 	/* stf interface makes single side match only */
 	return 32;
 }
 
 static int
 stf_getsrcifa6(struct ifnet *ifp, struct in6_addr *addr, struct in6_addr *mask)
 {
 	struct rm_priotracker in_ifa_tracker;
 	struct ifaddr *ia;
 	struct in_ifaddr *ia4;
 	struct in6_ifaddr *ia6;
 	struct sockaddr_in6 *sin6;
 	struct in_addr in;
 
 	NET_EPOCH_ASSERT();
 
 	CK_STAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
 		if (ia->ifa_addr->sa_family != AF_INET6)
 			continue;
 		sin6 = (struct sockaddr_in6 *)ia->ifa_addr;
 		if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr))
 			continue;
 
 		bcopy(GET_V4(&sin6->sin6_addr), &in, sizeof(in));
 		IN_IFADDR_RLOCK(&in_ifa_tracker);
 		LIST_FOREACH(ia4, INADDR_HASH(in.s_addr), ia_hash)
 			if (ia4->ia_addr.sin_addr.s_addr == in.s_addr)
 				break;
 		IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 		if (ia4 == NULL)
 			continue;
 
 		ia6 = (struct in6_ifaddr *)ia;
 
 		*addr = sin6->sin6_addr;
 		*mask = ia6->ia_prefixmask.sin6_addr;
 		return (0);
 	}
 
 	return (ENOENT);
 }
 
 static int
 stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	struct stf_softc *sc;
 	const struct sockaddr_in6 *dst6;
 	struct in_addr in4;
 	const void *ptr;
 	u_int8_t tos;
 	struct ip *ip;
 	struct ip6_hdr *ip6;
 	struct in6_addr addr6, mask6;
 	int error;
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error) {
 		m_freem(m);
 		return (error);
 	}
 #endif
 
 	sc = ifp->if_softc;
 	dst6 = (const struct sockaddr_in6 *)dst;
 
 	/* just in case */
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		return ENETDOWN;
 	}
 
 	/*
 	 * If we don't have an ip4 address that match my inner ip6 address,
 	 * we shouldn't generate output.  Without this check, we'll end up
 	 * using wrong IPv4 source.
 	 */
 	if (stf_getsrcifa6(ifp, &addr6, &mask6) != 0) {
 		m_freem(m);
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		return ENETDOWN;
 	}
 
 	if (m->m_len < sizeof(*ip6)) {
 		m = m_pullup(m, sizeof(*ip6));
 		if (!m) {
 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			return ENOBUFS;
 		}
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 	tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
 
 	/*
 	 * Pickup the right outer dst addr from the list of candidates.
 	 * ip6_dst has priority as it may be able to give us shorter IPv4 hops.
 	 */
 	ptr = NULL;
 	if (IN6_IS_ADDR_6TO4(&ip6->ip6_dst))
 		ptr = GET_V4(&ip6->ip6_dst);
 	else if (IN6_IS_ADDR_6TO4(&dst6->sin6_addr))
 		ptr = GET_V4(&dst6->sin6_addr);
 	else {
 		m_freem(m);
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		return ENETUNREACH;
 	}
 	bcopy(ptr, &in4, sizeof(in4));
 
 	if (bpf_peers_present(ifp->if_bpf)) {
 		/*
 		 * We need to prepend the address family as
 		 * a four byte field.  Cons up a dummy header
 		 * to pacify bpf.  This is safe because bpf
 		 * will only read from the mbuf (i.e., it won't
 		 * try to free it or keep a pointer a to it).
 		 */
 		u_int af = AF_INET6;
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
 	}
 
 	M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
 	if (m == NULL) {
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		return ENOBUFS;
 	}
 	ip = mtod(m, struct ip *);
 
 	bzero(ip, sizeof(*ip));
 
 	bcopy(GET_V4(&addr6), &ip->ip_src, sizeof(ip->ip_src));
 	bcopy(&in4, &ip->ip_dst, sizeof(ip->ip_dst));
 	ip->ip_p = IPPROTO_IPV6;
 	ip->ip_ttl = ip_stf_ttl;
 	ip->ip_len = htons(m->m_pkthdr.len);
 	if (ifp->if_flags & IFF_LINK1)
 		ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
 	else
 		ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
 
 	M_SETFIB(m, sc->sc_fibnum);
 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 	error = ip_output(m, NULL, NULL, 0, NULL, NULL);
 
 	return error;
 }
 
 static int
 isrfc1918addr(struct in_addr *in)
 {
 	/*
 	 * returns 1 if private address range:
 	 * 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16
 	 */
 	if (stf_permit_rfc1918 == 0 && (
 	    (ntohl(in->s_addr) & 0xff000000) >> 24 == 10 ||
 	    (ntohl(in->s_addr) & 0xfff00000) >> 16 == 172 * 256 + 16 ||
 	    (ntohl(in->s_addr) & 0xffff0000) >> 16 == 192 * 256 + 168))
 		return 1;
 
 	return 0;
 }
 
 static int
 stf_checkaddr4(struct stf_softc *sc, struct in_addr *in, struct ifnet *inifp)
 {
 	struct rm_priotracker in_ifa_tracker;
 	struct in_ifaddr *ia4;
 
 	/*
 	 * reject packets with the following address:
 	 * 224.0.0.0/4 0.0.0.0/8 127.0.0.0/8 255.0.0.0/8
 	 */
 	if (IN_MULTICAST(ntohl(in->s_addr)))
 		return -1;
 	switch ((ntohl(in->s_addr) & 0xff000000) >> 24) {
 	case 0: case 127: case 255:
 		return -1;
 	}
 
 	/*
 	 * reject packets with private address range.
 	 * (requirement from RFC3056 section 2 1st paragraph)
 	 */
 	if (isrfc1918addr(in))
 		return -1;
 
 	/*
 	 * reject packets with broadcast
 	 */
 	IN_IFADDR_RLOCK(&in_ifa_tracker);
 	CK_STAILQ_FOREACH(ia4, &V_in_ifaddrhead, ia_link) {
 		if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
 			continue;
 		if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			return -1;
 		}
 	}
 	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 
 	/*
 	 * perform ingress filter
 	 */
 	if (sc && (STF2IFP(sc)->if_flags & IFF_LINK2) == 0 && inifp) {
-		struct nhop4_basic nh4;
+		struct nhop_object *nh;
 
-		if (fib4_lookup_nh_basic(sc->sc_fibnum, *in, 0, 0, &nh4) != 0)
+		NET_EPOCH_ASSERT();
+		nh = fib4_lookup(sc->sc_fibnum, *in, 0, 0, 0);
+		if (nh == NULL)
 			return (-1);
 
-		if (nh4.nh_ifp != inifp)
+		if (nh->nh_ifp != inifp)
 			return (-1);
 	}
 
 	return 0;
 }
 
 static int
 stf_checkaddr6(struct stf_softc *sc, struct in6_addr *in6, struct ifnet *inifp)
 {
 	/*
 	 * check 6to4 addresses
 	 */
 	if (IN6_IS_ADDR_6TO4(in6)) {
 		struct in_addr in4;
 		bcopy(GET_V4(in6), &in4, sizeof(in4));
 		return stf_checkaddr4(sc, &in4, inifp);
 	}
 
 	/*
 	 * reject anything that look suspicious.  the test is implemented
 	 * in ip6_input too, but we check here as well to
 	 * (1) reject bad packets earlier, and
 	 * (2) to be safe against future ip6_input change.
 	 */
 	if (IN6_IS_ADDR_V4COMPAT(in6) || IN6_IS_ADDR_V4MAPPED(in6))
 		return -1;
 
 	return 0;
 }
 
 static int
 in_stf_input(struct mbuf *m, int off, int proto, void *arg)
 {
 	struct stf_softc *sc = arg;
 	struct ip *ip;
 	struct ip6_hdr *ip6;
 	u_int8_t otos, itos;
 	struct ifnet *ifp;
 
 	NET_EPOCH_ASSERT();
 
 	if (proto != IPPROTO_IPV6) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	ip = mtod(m, struct ip *);
 	if (sc == NULL || (STF2IFP(sc)->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	ifp = STF2IFP(sc);
 
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * perform sanity check against outer src/dst.
 	 * for source, perform ingress filter as well.
 	 */
 	if (stf_checkaddr4(sc, &ip->ip_dst, NULL) < 0 ||
 	    stf_checkaddr4(sc, &ip->ip_src, m->m_pkthdr.rcvif) < 0) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	otos = ip->ip_tos;
 	m_adj(m, off);
 
 	if (m->m_len < sizeof(*ip6)) {
 		m = m_pullup(m, sizeof(*ip6));
 		if (!m)
 			return (IPPROTO_DONE);
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/*
 	 * perform sanity check against inner src/dst.
 	 * for source, perform ingress filter as well.
 	 */
 	if (stf_checkaddr6(sc, &ip6->ip6_dst, NULL) < 0 ||
 	    stf_checkaddr6(sc, &ip6->ip6_src, m->m_pkthdr.rcvif) < 0) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
 	if ((ifp->if_flags & IFF_LINK1) != 0)
 		ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
 	else
 		ip_ecn_egress(ECN_NOCARE, &otos, &itos);
 	ip6->ip6_flow &= ~htonl(0xff << 20);
 	ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
 
 	m->m_pkthdr.rcvif = ifp;
 
 	if (bpf_peers_present(ifp->if_bpf)) {
 		/*
 		 * We need to prepend the address family as
 		 * a four byte field.  Cons up a dummy header
 		 * to pacify bpf.  This is safe because bpf
 		 * will only read from the mbuf (i.e., it won't
 		 * try to free it or keep a pointer a to it).
 		 */
 		u_int32_t af = AF_INET6;
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
 	}
 
 	/*
 	 * Put the packet to the network layer input queue according to the
 	 * specified address family.
 	 * See net/if_gif.c for possible issues with packet processing
 	 * reorder due to extra queueing.
 	 */
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(NETISR_IPV6, m);
 	return (IPPROTO_DONE);
 }
 
 static int
 stf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifaddr *ifa;
 	struct ifreq *ifr;
 	struct sockaddr_in6 *sin6;
 	struct in_addr addr;
 	int error, mtu;
 
 	error = 0;
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifa = (struct ifaddr *)data;
 		if (ifa == NULL || ifa->ifa_addr->sa_family != AF_INET6) {
 			error = EAFNOSUPPORT;
 			break;
 		}
 		sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
 		if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) {
 			error = EINVAL;
 			break;
 		}
 		bcopy(GET_V4(&sin6->sin6_addr), &addr, sizeof(addr));
 		if (isrfc1918addr(&addr)) {
 			error = EINVAL;
 			break;
 		}
 
 		ifp->if_flags |= IFF_UP;
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		ifr = (struct ifreq *)data;
 		if (ifr && ifr->ifr_addr.sa_family == AF_INET6)
 			;
 		else
 			error = EAFNOSUPPORT;
 		break;
 
 	case SIOCGIFMTU:
 		break;
 
 	case SIOCSIFMTU:
 		ifr = (struct ifreq *)data;
 		mtu = ifr->ifr_mtu;
 		/* RFC 4213 3.2 ideal world MTU */
 		if (mtu < IPV6_MINMTU || mtu > IF_MAXMTU - 20)
 			return (EINVAL);
 		ifp->if_mtu = mtu;
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	return error;
 }
Index: head/sys/netinet/if_ether.c
===================================================================
--- head/sys/netinet/if_ether.c	(revision 362899)
+++ head/sys/netinet/if_ether.c	(revision 362900)
@@ -1,1521 +1,1523 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_ether.c	8.1 (Berkeley) 6/10/93
  */
 
 /*
  * Ethernet address resolution protocol.
  * TODO:
  *	add "inuse/lock" bit (or ref. count) along with valid bit
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/queue.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/ethernet.h>
 #include <net/route.h>
+#include <net/route/nhop.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_var.h>
 #include <net/if_llatbl.h>
 #include <netinet/if_ether.h>
 #ifdef INET
 #include <netinet/ip_carp.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 #define SIN(s) ((const struct sockaddr_in *)(s))
 
 static struct timeval arp_lastlog;
 static int arp_curpps;
 static int arp_maxpps = 1;
 
 /* Simple ARP state machine */
 enum arp_llinfo_state {
 	ARP_LLINFO_INCOMPLETE = 0, /* No LLE data */
 	ARP_LLINFO_REACHABLE,	/* LLE is valid */
 	ARP_LLINFO_VERIFY,	/* LLE is valid, need refresh */
 	ARP_LLINFO_DELETED,	/* LLE is deleted */
 };
 
 SYSCTL_DECL(_net_link_ether);
 static SYSCTL_NODE(_net_link_ether, PF_INET, inet,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "");
 static SYSCTL_NODE(_net_link_ether, PF_ARP, arp,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "");
 
 /* timer values */
 VNET_DEFINE_STATIC(int, arpt_keep) = (20*60);	/* once resolved, good for 20
 						 * minutes */
 VNET_DEFINE_STATIC(int, arp_maxtries) = 5;
 VNET_DEFINE_STATIC(int, arp_proxyall) = 0;
 VNET_DEFINE_STATIC(int, arpt_down) = 20;	/* keep incomplete entries for
 						 * 20 seconds */
 VNET_DEFINE_STATIC(int, arpt_rexmit) = 1;	/* retransmit arp entries, sec*/
 VNET_PCPUSTAT_DEFINE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h */
 VNET_PCPUSTAT_SYSINIT(arpstat);
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(arpstat);
 #endif /* VIMAGE */
 
 VNET_DEFINE_STATIC(int, arp_maxhold) = 1;
 
 #define	V_arpt_keep		VNET(arpt_keep)
 #define	V_arpt_down		VNET(arpt_down)
 #define	V_arpt_rexmit		VNET(arpt_rexmit)
 #define	V_arp_maxtries		VNET(arp_maxtries)
 #define	V_arp_proxyall		VNET(arp_proxyall)
 #define	V_arp_maxhold		VNET(arp_maxhold)
 
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(arpt_keep), 0,
 	"ARP entry lifetime in seconds");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(arp_maxtries), 0,
 	"ARP resolution attempts before returning error");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(arp_proxyall), 0,
 	"Enable proxy ARP for all suitable requests");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, wait, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(arpt_down), 0,
 	"Incomplete ARP entry lifetime in seconds");
 SYSCTL_VNET_PCPUSTAT(_net_link_ether_arp, OID_AUTO, stats, struct arpstat,
     arpstat, "ARP statistics (struct arpstat, net/if_arp.h)");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxhold, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(arp_maxhold), 0,
 	"Number of packets to hold per ARP entry");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_log_per_second,
 	CTLFLAG_RW, &arp_maxpps, 0,
 	"Maximum number of remotely triggered ARP messages that can be "
 	"logged per second");
 
 /*
  * Due to the exponential backoff algorithm used for the interval between GARP
  * retransmissions, the maximum number of retransmissions is limited for
  * sanity. This limit corresponds to a maximum interval between retransmissions
  * of 2^16 seconds ~= 18 hours.
  *
  * Making this limit more dynamic is more complicated than worthwhile,
  * especially since sending out GARPs spaced days apart would be of little
  * use. A maximum dynamic limit would look something like:
  *
  * const int max = fls(INT_MAX / hz) - 1;
  */
 #define MAX_GARP_RETRANSMITS 16
 static int sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS);
 static int garp_rexmit_count = 0; /* GARP retransmission setting. */
 
 SYSCTL_PROC(_net_link_ether_inet, OID_AUTO, garp_rexmit_count,
     CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MPSAFE,
     &garp_rexmit_count, 0, sysctl_garp_rexmit, "I",
     "Number of times to retransmit GARP packets;"
     " 0 to disable, maximum of 16");
 
 VNET_DEFINE_STATIC(int, arp_log_level) = LOG_INFO;	/* Min. log(9) level. */
 #define	V_arp_log_level		VNET(arp_log_level)
 SYSCTL_INT(_net_link_ether_arp, OID_AUTO, log_level, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(arp_log_level), 0,
 	"Minimum log(9) level for recording rate limited arp log messages. "
 	"The higher will be log more (emerg=0, info=6 (default), debug=7).");
 #define	ARP_LOG(pri, ...)	do {					\
 	if ((pri) <= V_arp_log_level &&					\
 	    ppsratecheck(&arp_lastlog, &arp_curpps, arp_maxpps))	\
 		log((pri), "arp: " __VA_ARGS__);			\
 } while (0)
 
 
 static void	arpintr(struct mbuf *);
 static void	arptimer(void *);
 #ifdef INET
 static void	in_arpinput(struct mbuf *);
 #endif
 
 static void arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr,
     struct ifnet *ifp, int bridged, struct llentry *la);
 static void arp_mark_lle_reachable(struct llentry *la);
 static void arp_iflladdr(void *arg __unused, struct ifnet *ifp);
 
 static eventhandler_tag iflladdr_tag;
 
 static const struct netisr_handler arp_nh = {
 	.nh_name = "arp",
 	.nh_handler = arpintr,
 	.nh_proto = NETISR_ARP,
 	.nh_policy = NETISR_POLICY_SOURCE,
 };
 
 /*
  * Timeout routine.  Age arp_tab entries periodically.
  */
 static void
 arptimer(void *arg)
 {
 	struct llentry *lle = (struct llentry *)arg;
 	struct ifnet *ifp;
 	int r_skip_req;
 
 	if (lle->la_flags & LLE_STATIC) {
 		return;
 	}
 	LLE_WLOCK(lle);
 	if (callout_pending(&lle->lle_timer)) {
 		/*
 		 * Here we are a bit odd here in the treatment of
 		 * active/pending. If the pending bit is set, it got
 		 * rescheduled before I ran. The active
 		 * bit we ignore, since if it was stopped
 		 * in ll_tablefree() and was currently running
 		 * it would have return 0 so the code would
 		 * not have deleted it since the callout could
 		 * not be stopped so we want to go through
 		 * with the delete here now. If the callout
 		 * was restarted, the pending bit will be back on and
 		 * we just want to bail since the callout_reset would
 		 * return 1 and our reference would have been removed
 		 * by arpresolve() below.
 		 */
 		LLE_WUNLOCK(lle);
  		return;
  	}
 	ifp = lle->lle_tbl->llt_ifp;
 	CURVNET_SET(ifp->if_vnet);
 
 	switch (lle->ln_state) {
 	case ARP_LLINFO_REACHABLE:
 
 		/*
 		 * Expiration time is approaching.
 		 * Let's try to refresh entry if it is still
 		 * in use.
 		 *
 		 * Set r_skip_req to get feedback from
 		 * fast path. Change state and re-schedule
 		 * ourselves.
 		 */
 		LLE_REQ_LOCK(lle);
 		lle->r_skip_req = 1;
 		LLE_REQ_UNLOCK(lle);
 		lle->ln_state = ARP_LLINFO_VERIFY;
 		callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit);
 		LLE_WUNLOCK(lle);
 		CURVNET_RESTORE();
 		return;
 	case ARP_LLINFO_VERIFY:
 		LLE_REQ_LOCK(lle);
 		r_skip_req = lle->r_skip_req;
 		LLE_REQ_UNLOCK(lle);
 
 		if (r_skip_req == 0 && lle->la_preempt > 0) {
 			/* Entry was used, issue refresh request */
 			struct epoch_tracker et;
 			struct in_addr dst;
 
 			dst = lle->r_l3addr.addr4;
 			lle->la_preempt--;
 			callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit);
 			LLE_WUNLOCK(lle);
 			NET_EPOCH_ENTER(et);
 			arprequest(ifp, NULL, &dst, NULL);
 			NET_EPOCH_EXIT(et);
 			CURVNET_RESTORE();
 			return;
 		}
 		/* Nothing happened. Reschedule if not too late */
 		if (lle->la_expire > time_uptime) {
 			callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit);
 			LLE_WUNLOCK(lle);
 			CURVNET_RESTORE();
 			return;
 		}
 		break;
 	case ARP_LLINFO_INCOMPLETE:
 	case ARP_LLINFO_DELETED:
 		break;
 	}
 
 	if ((lle->la_flags & LLE_DELETED) == 0) {
 		int evt;
 
 		if (lle->la_flags & LLE_VALID)
 			evt = LLENTRY_EXPIRED;
 		else
 			evt = LLENTRY_TIMEDOUT;
 		EVENTHANDLER_INVOKE(lle_event, lle, evt);
 	}
 
 	callout_stop(&lle->lle_timer);
 
 	/* XXX: LOR avoidance. We still have ref on lle. */
 	LLE_WUNLOCK(lle);
 	IF_AFDATA_LOCK(ifp);
 	LLE_WLOCK(lle);
 
 	/* Guard against race with other llentry_free(). */
 	if (lle->la_flags & LLE_LINKED) {
 		LLE_REMREF(lle);
 		lltable_unlink_entry(lle->lle_tbl, lle);
 	}
 	IF_AFDATA_UNLOCK(ifp);
 
 	size_t pkts_dropped = llentry_free(lle);
 
 	ARPSTAT_ADD(dropped, pkts_dropped);
 	ARPSTAT_INC(timeouts);
 
 	CURVNET_RESTORE();
 }
 
 /*
  * Stores link-layer header for @ifp in format suitable for if_output()
  * into buffer @buf. Resulting header length is stored in @bufsize.
  *
  * Returns 0 on success.
  */
 static int
 arp_fillheader(struct ifnet *ifp, struct arphdr *ah, int bcast, u_char *buf,
     size_t *bufsize)
 {
 	struct if_encap_req ereq;
 	int error;
 
 	bzero(buf, *bufsize);
 	bzero(&ereq, sizeof(ereq));
 	ereq.buf = buf;
 	ereq.bufsize = *bufsize;
 	ereq.rtype = IFENCAP_LL;
 	ereq.family = AF_ARP;
 	ereq.lladdr = ar_tha(ah);
 	ereq.hdata = (u_char *)ah;
 	if (bcast)
 		ereq.flags = IFENCAP_FLAG_BROADCAST;
 	error = ifp->if_requestencap(ifp, &ereq);
 	if (error == 0)
 		*bufsize = ereq.bufsize;
 
 	return (error);
 }
 
 
 /*
  * Broadcast an ARP request. Caller specifies:
  *	- arp header source ip address
  *	- arp header target ip address
  *	- arp header source ethernet address
  */
 static int
 arprequest_internal(struct ifnet *ifp, const struct in_addr *sip,
     const struct in_addr *tip, u_char *enaddr)
 {
 	struct mbuf *m;
 	struct arphdr *ah;
 	struct sockaddr sa;
 	u_char *carpaddr = NULL;
 	uint8_t linkhdr[LLE_MAX_LINKHDR];
 	size_t linkhdrsize;
 	struct route ro;
 	int error;
 
 	NET_EPOCH_ASSERT();
 
 	if (sip == NULL) {
 		/*
 		 * The caller did not supply a source address, try to find
 		 * a compatible one among those assigned to this interface.
 		 */
 		struct ifaddr *ifa;
 
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 
 			if (ifa->ifa_carp) {
 				if ((*carp_iamatch_p)(ifa, &carpaddr) == 0)
 					continue;
 				sip = &IA_SIN(ifa)->sin_addr;
 			} else {
 				carpaddr = NULL;
 				sip = &IA_SIN(ifa)->sin_addr;
 			}
 
 			if (0 == ((sip->s_addr ^ tip->s_addr) &
 			    IA_MASKSIN(ifa)->sin_addr.s_addr))
 				break;  /* found it. */
 		}
 		if (sip == NULL) {
 			printf("%s: cannot find matching address\n", __func__);
 			return (EADDRNOTAVAIL);
 		}
 	}
 	if (enaddr == NULL)
 		enaddr = carpaddr ? carpaddr : (u_char *)IF_LLADDR(ifp);
 
 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
 		return (ENOMEM);
 	m->m_len = sizeof(*ah) + 2 * sizeof(struct in_addr) +
 		2 * ifp->if_addrlen;
 	m->m_pkthdr.len = m->m_len;
 	M_ALIGN(m, m->m_len);
 	ah = mtod(m, struct arphdr *);
 	bzero((caddr_t)ah, m->m_len);
 #ifdef MAC
 	mac_netinet_arp_send(ifp, m);
 #endif
 	ah->ar_pro = htons(ETHERTYPE_IP);
 	ah->ar_hln = ifp->if_addrlen;		/* hardware address length */
 	ah->ar_pln = sizeof(struct in_addr);	/* protocol address length */
 	ah->ar_op = htons(ARPOP_REQUEST);
 	bcopy(enaddr, ar_sha(ah), ah->ar_hln);
 	bcopy(sip, ar_spa(ah), ah->ar_pln);
 	bcopy(tip, ar_tpa(ah), ah->ar_pln);
 	sa.sa_family = AF_ARP;
 	sa.sa_len = 2;
 
 	/* Calculate link header for sending frame */
 	bzero(&ro, sizeof(ro));
 	linkhdrsize = sizeof(linkhdr);
 	error = arp_fillheader(ifp, ah, 1, linkhdr, &linkhdrsize);
 	if (error != 0 && error != EAFNOSUPPORT) {
 		ARP_LOG(LOG_ERR, "Failed to calculate ARP header on %s: %d\n",
 		    if_name(ifp), error);
 		return (error);
 	}
 
 	ro.ro_prepend = linkhdr;
 	ro.ro_plen = linkhdrsize;
 	ro.ro_flags = 0;
 
 	m->m_flags |= M_BCAST;
 	m_clrprotoflags(m);	/* Avoid confusing lower layers. */
 	error = (*ifp->if_output)(ifp, m, &sa, &ro);
 	ARPSTAT_INC(txrequests);
 	if (error) {
 		ARPSTAT_INC(txerrors);
 		ARP_LOG(LOG_DEBUG, "Failed to send ARP packet on %s: %d\n",
 		    if_name(ifp), error);
 	}
 	return (error);
 }
 
 void
 arprequest(struct ifnet *ifp, const struct in_addr *sip,
     const struct in_addr *tip, u_char *enaddr)
 {
 
 	(void) arprequest_internal(ifp, sip, tip, enaddr);
 }
 
 /*
  * Resolve an IP address into an ethernet address - heavy version.
  * Used internally by arpresolve().
  * We have already checked that we can't use an existing lle without
  * modification so we have to acquire an LLE_EXCLUSIVE lle lock.
  *
  * On success, desten and pflags are filled in and the function returns 0;
  * If the packet must be held pending resolution, we return EWOULDBLOCK
  * On other errors, we return the corresponding error code.
  * Note that m_freem() handles NULL.
  */
 static int
 arpresolve_full(struct ifnet *ifp, int is_gw, int flags, struct mbuf *m,
 	const struct sockaddr *dst, u_char *desten, uint32_t *pflags,
 	struct llentry **plle)
 {
 	struct llentry *la = NULL, *la_tmp;
 	struct mbuf *curr = NULL;
 	struct mbuf *next = NULL;
 	int error, renew;
 	char *lladdr;
 	int ll_len;
 
 	NET_EPOCH_ASSERT();
 
 	if (pflags != NULL)
 		*pflags = 0;
 	if (plle != NULL)
 		*plle = NULL;
 
 	if ((flags & LLE_CREATE) == 0)
 		la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
 	if (la == NULL && (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) {
 		la = lltable_alloc_entry(LLTABLE(ifp), 0, dst);
 		if (la == NULL) {
 			char addrbuf[INET_ADDRSTRLEN];
 
 			log(LOG_DEBUG,
 			    "arpresolve: can't allocate llinfo for %s on %s\n",
 			    inet_ntoa_r(SIN(dst)->sin_addr, addrbuf),
 			    if_name(ifp));
 			m_freem(m);
 			return (EINVAL);
 		}
 
 		IF_AFDATA_WLOCK(ifp);
 		LLE_WLOCK(la);
 		la_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
 		/* Prefer ANY existing lle over newly-created one */
 		if (la_tmp == NULL)
 			lltable_link_entry(LLTABLE(ifp), la);
 		IF_AFDATA_WUNLOCK(ifp);
 		if (la_tmp != NULL) {
 			lltable_free_entry(LLTABLE(ifp), la);
 			la = la_tmp;
 		}
 	}
 	if (la == NULL) {
 		m_freem(m);
 		return (EINVAL);
 	}
 
 	if ((la->la_flags & LLE_VALID) &&
 	    ((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) {
 		if (flags & LLE_ADDRONLY) {
 			lladdr = la->ll_addr;
 			ll_len = ifp->if_addrlen;
 		} else {
 			lladdr = la->r_linkdata;
 			ll_len = la->r_hdrlen;
 		}
 		bcopy(lladdr, desten, ll_len);
 
 		/* Notify LLE code that the entry was used by datapath */
 		llentry_mark_used(la);
 		if (pflags != NULL)
 			*pflags = la->la_flags & (LLE_VALID|LLE_IFADDR);
 		if (plle) {
 			LLE_ADDREF(la);
 			*plle = la;
 		}
 		LLE_WUNLOCK(la);
 		return (0);
 	}
 
 	renew = (la->la_asked == 0 || la->la_expire != time_uptime);
 	/*
 	 * There is an arptab entry, but no ethernet address
 	 * response yet.  Add the mbuf to the list, dropping
 	 * the oldest packet if we have exceeded the system
 	 * setting.
 	 */
 	if (m != NULL) {
 		if (la->la_numheld >= V_arp_maxhold) {
 			if (la->la_hold != NULL) {
 				next = la->la_hold->m_nextpkt;
 				m_freem(la->la_hold);
 				la->la_hold = next;
 				la->la_numheld--;
 				ARPSTAT_INC(dropped);
 			}
 		}
 		if (la->la_hold != NULL) {
 			curr = la->la_hold;
 			while (curr->m_nextpkt != NULL)
 				curr = curr->m_nextpkt;
 			curr->m_nextpkt = m;
 		} else
 			la->la_hold = m;
 		la->la_numheld++;
 	}
 	/*
 	 * Return EWOULDBLOCK if we have tried less than arp_maxtries. It
 	 * will be masked by ether_output(). Return EHOSTDOWN/EHOSTUNREACH
 	 * if we have already sent arp_maxtries ARP requests. Retransmit the
 	 * ARP request, but not faster than one request per second.
 	 */
 	if (la->la_asked < V_arp_maxtries)
 		error = EWOULDBLOCK;	/* First request. */
 	else
 		error = is_gw != 0 ? EHOSTUNREACH : EHOSTDOWN;
 
 	if (renew) {
 		int canceled, e;
 
 		LLE_ADDREF(la);
 		la->la_expire = time_uptime;
 		canceled = callout_reset(&la->lle_timer, hz * V_arpt_down,
 		    arptimer, la);
 		if (canceled)
 			LLE_REMREF(la);
 		la->la_asked++;
 		LLE_WUNLOCK(la);
 		e = arprequest_internal(ifp, NULL, &SIN(dst)->sin_addr, NULL);
 		/*
 		 * Only overwrite 'error' in case of error; in case of success
 		 * the proper return value was already set above.
 		 */
 		if (e != 0)
 			return (e);
 		return (error);
 	}
 
 	LLE_WUNLOCK(la);
 	return (error);
 }
 
 /*
  * Lookups link header based on an IP address.
  * On input:
  *    ifp is the interface we use
  *    is_gw != 0 if @dst represents gateway to some destination
  *    m is the mbuf. May be NULL if we don't have a packet.
  *    dst is the next hop,
  *    desten is the storage to put LL header.
  *    flags returns subset of lle flags: LLE_VALID | LLE_IFADDR
  *
  * On success, full/partial link header and flags are filled in and
  * the function returns 0.
  * If the packet must be held pending resolution, we return EWOULDBLOCK
  * On other errors, we return the corresponding error code.
  * Note that m_freem() handles NULL.
  */
 int
 arpresolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
 	const struct sockaddr *dst, u_char *desten, uint32_t *pflags,
 	struct llentry **plle)
 {
 	struct llentry *la = NULL;
 
 	NET_EPOCH_ASSERT();
 
 	if (pflags != NULL)
 		*pflags = 0;
 	if (plle != NULL)
 		*plle = NULL;
 
 	if (m != NULL) {
 		if (m->m_flags & M_BCAST) {
 			/* broadcast */
 			(void)memcpy(desten,
 			    ifp->if_broadcastaddr, ifp->if_addrlen);
 			return (0);
 		}
 		if (m->m_flags & M_MCAST) {
 			/* multicast */
 			ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten);
 			return (0);
 		}
 	}
 
 	la = lla_lookup(LLTABLE(ifp), plle ? LLE_EXCLUSIVE : LLE_UNLOCKED, dst);
 	if (la != NULL && (la->r_flags & RLLE_VALID) != 0) {
 		/* Entry found, let's copy lle info */
 		bcopy(la->r_linkdata, desten, la->r_hdrlen);
 		if (pflags != NULL)
 			*pflags = LLE_VALID | (la->r_flags & RLLE_IFADDR);
 		/* Notify the LLE handling code that the entry was used. */
 		llentry_mark_used(la);
 		if (plle) {
 			LLE_ADDREF(la);
 			*plle = la;
 			LLE_WUNLOCK(la);
 		}
 		return (0);
 	}
 	if (plle && la)
 		LLE_WUNLOCK(la);
 
 	return (arpresolve_full(ifp, is_gw, la == NULL ? LLE_CREATE : 0, m, dst,
 	    desten, pflags, plle));
 }
 
 /*
  * Common length and type checks are done here,
  * then the protocol-specific routine is called.
  */
 static void
 arpintr(struct mbuf *m)
 {
 	struct arphdr *ar;
 	struct ifnet *ifp;
 	char *layer;
 	int hlen;
 
 	ifp = m->m_pkthdr.rcvif;
 
 	if (m->m_len < sizeof(struct arphdr) &&
 	    ((m = m_pullup(m, sizeof(struct arphdr))) == NULL)) {
 		ARP_LOG(LOG_NOTICE, "packet with short header received on %s\n",
 		    if_name(ifp));
 		return;
 	}
 	ar = mtod(m, struct arphdr *);
 
 	/* Check if length is sufficient */
 	if (m->m_len <  arphdr_len(ar)) {
 		m = m_pullup(m, arphdr_len(ar));
 		if (m == NULL) {
 			ARP_LOG(LOG_NOTICE, "short packet received on %s\n",
 			    if_name(ifp));
 			return;
 		}
 		ar = mtod(m, struct arphdr *);
 	}
 
 	hlen = 0;
 	layer = "";
 	switch (ntohs(ar->ar_hrd)) {
 	case ARPHRD_ETHER:
 		hlen = ETHER_ADDR_LEN; /* RFC 826 */
 		layer = "ethernet";
 		break;
 	case ARPHRD_INFINIBAND:
 		hlen = 20;	/* RFC 4391, INFINIBAND_ALEN */
 		layer = "infiniband";
 		break;
 	case ARPHRD_IEEE1394:
 		hlen = 0; /* SHALL be 16 */ /* RFC 2734 */
 		layer = "firewire";
 
 		/*
 		 * Restrict too long hardware addresses.
 		 * Currently we are capable of handling 20-byte
 		 * addresses ( sizeof(lle->ll_addr) )
 		 */
 		if (ar->ar_hln >= 20)
 			hlen = 16;
 		break;
 	default:
 		ARP_LOG(LOG_NOTICE,
 		    "packet with unknown hardware format 0x%02d received on "
 		    "%s\n", ntohs(ar->ar_hrd), if_name(ifp));
 		m_freem(m);
 		return;
 	}
 
 	if (hlen != 0 && hlen != ar->ar_hln) {
 		ARP_LOG(LOG_NOTICE,
 		    "packet with invalid %s address length %d received on %s\n",
 		    layer, ar->ar_hln, if_name(ifp));
 		m_freem(m);
 		return;
 	}
 
 	ARPSTAT_INC(received);
 	switch (ntohs(ar->ar_pro)) {
 #ifdef INET
 	case ETHERTYPE_IP:
 		in_arpinput(m);
 		return;
 #endif
 	}
 	m_freem(m);
 }
 
 #ifdef INET
 /*
  * ARP for Internet protocols on 10 Mb/s Ethernet.
  * Algorithm is that given in RFC 826.
  * In addition, a sanity check is performed on the sender
  * protocol address, to catch impersonators.
  * We no longer handle negotiations for use of trailer protocol:
  * Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent
  * along with IP replies if we wanted trailers sent to us,
  * and also sent them in response to IP replies.
  * This allowed either end to announce the desire to receive
  * trailer packets.
  * We no longer reply to requests for ETHERTYPE_TRAIL protocol either,
  * but formerly didn't normally send requests.
  */
 static int log_arp_wrong_iface = 1;
 static int log_arp_movements = 1;
 static int log_arp_permanent_modify = 1;
 static int allow_multicast = 0;
 
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_wrong_iface, CTLFLAG_RW,
 	&log_arp_wrong_iface, 0,
 	"log arp packets arriving on the wrong interface");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_movements, CTLFLAG_RW,
 	&log_arp_movements, 0,
 	"log arp replies from MACs different than the one in the cache");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_permanent_modify, CTLFLAG_RW,
 	&log_arp_permanent_modify, 0,
 	"log arp replies from MACs different than the one in the permanent arp entry");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, allow_multicast, CTLFLAG_RW,
 	&allow_multicast, 0, "accept multicast addresses");
 
 static void
 in_arpinput(struct mbuf *m)
 {
 	struct rm_priotracker in_ifa_tracker;
 	struct arphdr *ah;
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct llentry *la = NULL, *la_tmp;
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 	struct sockaddr sa;
 	struct in_addr isaddr, itaddr, myaddr;
 	u_int8_t *enaddr = NULL;
 	int op;
 	int bridged = 0, is_bridge = 0;
 	int carped;
 	struct sockaddr_in sin;
 	struct sockaddr *dst;
-	struct nhop4_basic nh4;
+	struct nhop_object *nh;
 	uint8_t linkhdr[LLE_MAX_LINKHDR];
 	struct route ro;
 	size_t linkhdrsize;
 	int lladdr_off;
 	int error;
 	char addrbuf[INET_ADDRSTRLEN];
 
 	NET_EPOCH_ASSERT();
 
 	sin.sin_len = sizeof(struct sockaddr_in);
 	sin.sin_family = AF_INET;
 	sin.sin_addr.s_addr = 0;
 
 	if (ifp->if_bridge)
 		bridged = 1;
 	if (ifp->if_type == IFT_BRIDGE)
 		is_bridge = 1;
 
 	/*
 	 * We already have checked that mbuf contains enough contiguous data
 	 * to hold entire arp message according to the arp header.
 	 */
 	ah = mtod(m, struct arphdr *);
 
 	/*
 	 * ARP is only for IPv4 so we can reject packets with
 	 * a protocol length not equal to an IPv4 address.
 	 */
 	if (ah->ar_pln != sizeof(struct in_addr)) {
 		ARP_LOG(LOG_NOTICE, "requested protocol length != %zu\n",
 		    sizeof(struct in_addr));
 		goto drop;
 	}
 
 	if (allow_multicast == 0 && ETHER_IS_MULTICAST(ar_sha(ah))) {
 		ARP_LOG(LOG_NOTICE, "%*D is multicast\n",
 		    ifp->if_addrlen, (u_char *)ar_sha(ah), ":");
 		goto drop;
 	}
 
 	op = ntohs(ah->ar_op);
 	(void)memcpy(&isaddr, ar_spa(ah), sizeof (isaddr));
 	(void)memcpy(&itaddr, ar_tpa(ah), sizeof (itaddr));
 
 	if (op == ARPOP_REPLY)
 		ARPSTAT_INC(rxreplies);
 
 	/*
 	 * For a bridge, we want to check the address irrespective
 	 * of the receive interface. (This will change slightly
 	 * when we have clusters of interfaces).
 	 */
 	IN_IFADDR_RLOCK(&in_ifa_tracker);
 	LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
 		if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) ||
 		    ia->ia_ifp == ifp) &&
 		    itaddr.s_addr == ia->ia_addr.sin_addr.s_addr &&
 		    (ia->ia_ifa.ifa_carp == NULL ||
 		    (*carp_iamatch_p)(&ia->ia_ifa, &enaddr))) {
 			ifa_ref(&ia->ia_ifa);
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			goto match;
 		}
 	}
 	LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash)
 		if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) ||
 		    ia->ia_ifp == ifp) &&
 		    isaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
 			ifa_ref(&ia->ia_ifa);
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			goto match;
 		}
 
 #define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia)				\
   (ia->ia_ifp->if_bridge == ifp->if_softc &&				\
   !bcmp(IF_LLADDR(ia->ia_ifp), IF_LLADDR(ifp), ifp->if_addrlen) &&	\
   addr == ia->ia_addr.sin_addr.s_addr)
 	/*
 	 * Check the case when bridge shares its MAC address with
 	 * some of its children, so packets are claimed by bridge
 	 * itself (bridge_input() does it first), but they are really
 	 * meant to be destined to the bridge member.
 	 */
 	if (is_bridge) {
 		LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
 			if (BDG_MEMBER_MATCHES_ARP(itaddr.s_addr, ifp, ia)) {
 				ifa_ref(&ia->ia_ifa);
 				ifp = ia->ia_ifp;
 				IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 				goto match;
 			}
 		}
 	}
 #undef BDG_MEMBER_MATCHES_ARP
 	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 
 	/*
 	 * No match, use the first inet address on the receive interface
 	 * as a dummy address for the rest of the function.
 	 */
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (ifa->ifa_addr->sa_family == AF_INET &&
 		    (ifa->ifa_carp == NULL ||
 		    (*carp_iamatch_p)(ifa, &enaddr))) {
 			ia = ifatoia(ifa);
 			ifa_ref(ifa);
 			goto match;
 		}
 
 	/*
 	 * If bridging, fall back to using any inet address.
 	 */
 	IN_IFADDR_RLOCK(&in_ifa_tracker);
 	if (!bridged || (ia = CK_STAILQ_FIRST(&V_in_ifaddrhead)) == NULL) {
 		IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 		goto drop;
 	}
 	ifa_ref(&ia->ia_ifa);
 	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 match:
 	if (!enaddr)
 		enaddr = (u_int8_t *)IF_LLADDR(ifp);
 	carped = (ia->ia_ifa.ifa_carp != NULL);
 	myaddr = ia->ia_addr.sin_addr;
 	ifa_free(&ia->ia_ifa);
 	if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen))
 		goto drop;	/* it's from me, ignore it. */
 	if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) {
 		ARP_LOG(LOG_NOTICE, "link address is broadcast for IP address "
 		    "%s!\n", inet_ntoa_r(isaddr, addrbuf));
 		goto drop;
 	}
 
 	if (ifp->if_addrlen != ah->ar_hln) {
 		ARP_LOG(LOG_WARNING, "from %*D: addr len: new %d, "
 		    "i/f %d (ignored)\n", ifp->if_addrlen,
 		    (u_char *) ar_sha(ah), ":", ah->ar_hln,
 		    ifp->if_addrlen);
 		goto drop;
 	}
 
 	/*
 	 * Warn if another host is using the same IP address, but only if the
 	 * IP address isn't 0.0.0.0, which is used for DHCP only, in which
 	 * case we suppress the warning to avoid false positive complaints of
 	 * potential misconfiguration.
 	 */
 	if (!bridged && !carped && isaddr.s_addr == myaddr.s_addr &&
 	    myaddr.s_addr != 0) {
 		ARP_LOG(LOG_ERR, "%*D is using my IP address %s on %s!\n",
 		   ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
 		   inet_ntoa_r(isaddr, addrbuf), ifp->if_xname);
 		itaddr = myaddr;
 		ARPSTAT_INC(dupips);
 		goto reply;
 	}
 	if (ifp->if_flags & IFF_STATICARP)
 		goto reply;
 
 	bzero(&sin, sizeof(sin));
 	sin.sin_len = sizeof(struct sockaddr_in);
 	sin.sin_family = AF_INET;
 	sin.sin_addr = isaddr;
 	dst = (struct sockaddr *)&sin;
 	la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
 	if (la != NULL)
 		arp_check_update_lle(ah, isaddr, ifp, bridged, la);
 	else if (itaddr.s_addr == myaddr.s_addr) {
 		/*
 		 * Request/reply to our address, but no lle exists yet.
 		 * Calculate full link prepend to use in lle.
 		 */
 		linkhdrsize = sizeof(linkhdr);
 		if (lltable_calc_llheader(ifp, AF_INET, ar_sha(ah), linkhdr,
 		    &linkhdrsize, &lladdr_off) != 0)
 			goto reply;
 
 		/* Allocate new entry */
 		la = lltable_alloc_entry(LLTABLE(ifp), 0, dst);
 		if (la == NULL) {
 
 			/*
 			 * lle creation may fail if source address belongs
 			 * to non-directly connected subnet. However, we
 			 * will try to answer the request instead of dropping
 			 * frame.
 			 */
 			goto reply;
 		}
 		lltable_set_entry_addr(ifp, la, linkhdr, linkhdrsize,
 		    lladdr_off);
 
 		IF_AFDATA_WLOCK(ifp);
 		LLE_WLOCK(la);
 		la_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
 
 		/*
 		 * Check if lle still does not exists.
 		 * If it does, that means that we either
 		 * 1) have configured it explicitly, via
 		 * 1a) 'arp -s' static entry or
 		 * 1b) interface address static record
 		 * or
 		 * 2) it was the result of sending first packet to-host
 		 * or
 		 * 3) it was another arp reply packet we handled in
 		 * different thread.
 		 *
 		 * In all cases except 3) we definitely need to prefer
 		 * existing lle. For the sake of simplicity, prefer any
 		 * existing lle over newly-create one.
 		 */
 		if (la_tmp == NULL)
 			lltable_link_entry(LLTABLE(ifp), la);
 		IF_AFDATA_WUNLOCK(ifp);
 
 		if (la_tmp == NULL) {
 			arp_mark_lle_reachable(la);
 			LLE_WUNLOCK(la);
 		} else {
 			/* Free newly-create entry and handle packet */
 			lltable_free_entry(LLTABLE(ifp), la);
 			la = la_tmp;
 			la_tmp = NULL;
 			arp_check_update_lle(ah, isaddr, ifp, bridged, la);
 			/* arp_check_update_lle() returns @la unlocked */
 		}
 		la = NULL;
 	}
 reply:
 	if (op != ARPOP_REQUEST)
 		goto drop;
 	ARPSTAT_INC(rxrequests);
 
 	if (itaddr.s_addr == myaddr.s_addr) {
 		/* Shortcut.. the receiving interface is the target. */
 		(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
 		(void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
 	} else {
 		/*
 		 * Destination address is not ours. Check if
 		 * proxyarp entry exists or proxyarp is turned on globally.
 		 */
 		struct llentry *lle;
 
 		sin.sin_addr = itaddr;
 		lle = lla_lookup(LLTABLE(ifp), 0, (struct sockaddr *)&sin);
 
 		if ((lle != NULL) && (lle->la_flags & LLE_PUB)) {
 			(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
 			(void)memcpy(ar_sha(ah), lle->ll_addr, ah->ar_hln);
 			LLE_RUNLOCK(lle);
 		} else {
 
 			if (lle != NULL)
 				LLE_RUNLOCK(lle);
 
 			if (!V_arp_proxyall)
 				goto drop;
 
-			if (fib4_lookup_nh_basic(ifp->if_fib, itaddr, 0, 0,
-			    &nh4) != 0)
+			NET_EPOCH_ASSERT();
+			nh = fib4_lookup(ifp->if_fib, itaddr, 0, 0, 0);
+			if (nh == NULL)
 				goto drop;
 
 			/*
 			 * Don't send proxies for nodes on the same interface
 			 * as this one came out of, or we'll get into a fight
 			 * over who claims what Ether address.
 			 */
-			if (nh4.nh_ifp == ifp)
+			if (nh->nh_ifp == ifp)
 				goto drop;
 
 			(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
 			(void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
 
 			/*
 			 * Also check that the node which sent the ARP packet
 			 * is on the interface we expect it to be on. This
 			 * avoids ARP chaos if an interface is connected to the
 			 * wrong network.
 			 */
 
-			if (fib4_lookup_nh_basic(ifp->if_fib, isaddr, 0, 0,
-			    &nh4) != 0)
+			nh = fib4_lookup(ifp->if_fib, isaddr, 0, 0, 0);
+			if (nh == NULL)
 				goto drop;
-			if (nh4.nh_ifp != ifp) {
+			if (nh->nh_ifp != ifp) {
 				ARP_LOG(LOG_INFO, "proxy: ignoring request"
 				    " from %s via %s\n",
 				    inet_ntoa_r(isaddr, addrbuf),
 				    ifp->if_xname);
 				goto drop;
 			}
 
 #ifdef DEBUG_PROXY
 			printf("arp: proxying for %s\n",
 			    inet_ntoa_r(itaddr, addrbuf));
 #endif
 		}
 	}
 
 	if (itaddr.s_addr == myaddr.s_addr &&
 	    IN_LINKLOCAL(ntohl(itaddr.s_addr))) {
 		/* RFC 3927 link-local IPv4; always reply by broadcast. */
 #ifdef DEBUG_LINKLOCAL
 		printf("arp: sending reply for link-local addr %s\n",
 		    inet_ntoa_r(itaddr, addrbuf));
 #endif
 		m->m_flags |= M_BCAST;
 		m->m_flags &= ~M_MCAST;
 	} else {
 		/* default behaviour; never reply by broadcast. */
 		m->m_flags &= ~(M_BCAST|M_MCAST);
 	}
 	(void)memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln);
 	(void)memcpy(ar_spa(ah), &itaddr, ah->ar_pln);
 	ah->ar_op = htons(ARPOP_REPLY);
 	ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */
 	m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + (2 * ah->ar_hln);
 	m->m_pkthdr.len = m->m_len;
 	m->m_pkthdr.rcvif = NULL;
 	sa.sa_family = AF_ARP;
 	sa.sa_len = 2;
 
 	/* Calculate link header for sending frame */
 	bzero(&ro, sizeof(ro));
 	linkhdrsize = sizeof(linkhdr);
 	error = arp_fillheader(ifp, ah, 0, linkhdr, &linkhdrsize);
 
 	/*
 	 * arp_fillheader() may fail due to lack of support inside encap request
 	 * routing. This is not necessary an error, AF_ARP can/should be handled
 	 * by if_output().
 	 */
 	if (error != 0 && error != EAFNOSUPPORT) {
 		ARP_LOG(LOG_ERR, "Failed to calculate ARP header on %s: %d\n",
 		    if_name(ifp), error);
 		return;
 	}
 
 	ro.ro_prepend = linkhdr;
 	ro.ro_plen = linkhdrsize;
 	ro.ro_flags = 0;
 
 	m_clrprotoflags(m);	/* Avoid confusing lower layers. */
 	(*ifp->if_output)(ifp, m, &sa, &ro);
 	ARPSTAT_INC(txreplies);
 	return;
 
 drop:
 	m_freem(m);
 }
 #endif
 
 /*
  * Checks received arp data against existing @la.
  * Updates lle state/performs notification if necessary.
  */
 static void
 arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr, struct ifnet *ifp,
     int bridged, struct llentry *la)
 {
 	struct sockaddr sa;
 	struct mbuf *m_hold, *m_hold_next;
 	uint8_t linkhdr[LLE_MAX_LINKHDR];
 	size_t linkhdrsize;
 	int lladdr_off;
 	char addrbuf[INET_ADDRSTRLEN];
 
 	LLE_WLOCK_ASSERT(la);
 
 	/* the following is not an error when doing bridging */
 	if (!bridged && la->lle_tbl->llt_ifp != ifp) {
 		if (log_arp_wrong_iface)
 			ARP_LOG(LOG_WARNING, "%s is on %s "
 			    "but got reply from %*D on %s\n",
 			    inet_ntoa_r(isaddr, addrbuf),
 			    la->lle_tbl->llt_ifp->if_xname,
 			    ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
 			    ifp->if_xname);
 		LLE_WUNLOCK(la);
 		return;
 	}
 	if ((la->la_flags & LLE_VALID) &&
 	    bcmp(ar_sha(ah), la->ll_addr, ifp->if_addrlen)) {
 		if (la->la_flags & LLE_STATIC) {
 			LLE_WUNLOCK(la);
 			if (log_arp_permanent_modify)
 				ARP_LOG(LOG_ERR,
 				    "%*D attempts to modify "
 				    "permanent entry for %s on %s\n",
 				    ifp->if_addrlen,
 				    (u_char *)ar_sha(ah), ":",
 				    inet_ntoa_r(isaddr, addrbuf),
 				    ifp->if_xname);
 			return;
 		}
 		if (log_arp_movements) {
 			ARP_LOG(LOG_INFO, "%s moved from %*D "
 			    "to %*D on %s\n",
 			    inet_ntoa_r(isaddr, addrbuf),
 			    ifp->if_addrlen,
 			    (u_char *)la->ll_addr, ":",
 			    ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
 			    ifp->if_xname);
 		}
 	}
 
 	/* Calculate full link prepend to use in lle */
 	linkhdrsize = sizeof(linkhdr);
 	if (lltable_calc_llheader(ifp, AF_INET, ar_sha(ah), linkhdr,
 	    &linkhdrsize, &lladdr_off) != 0)
 		return;
 
 	/* Check if something has changed */
 	if (memcmp(la->r_linkdata, linkhdr, linkhdrsize) != 0 ||
 	    (la->la_flags & LLE_VALID) == 0) {
 		/* Try to perform LLE update */
 		if (lltable_try_set_entry_addr(ifp, la, linkhdr, linkhdrsize,
 		    lladdr_off) == 0)
 			return;
 
 		/* Clear fast path feedback request if set */
 		la->r_skip_req = 0;
 	}
 
 	arp_mark_lle_reachable(la);
 
 	/*
 	 * The packets are all freed within the call to the output
 	 * routine.
 	 *
 	 * NB: The lock MUST be released before the call to the
 	 * output routine.
 	 */
 	if (la->la_hold != NULL) {
 		m_hold = la->la_hold;
 		la->la_hold = NULL;
 		la->la_numheld = 0;
 		lltable_fill_sa_entry(la, &sa);
 		LLE_WUNLOCK(la);
 		for (; m_hold != NULL; m_hold = m_hold_next) {
 			m_hold_next = m_hold->m_nextpkt;
 			m_hold->m_nextpkt = NULL;
 			/* Avoid confusing lower layers. */
 			m_clrprotoflags(m_hold);
 			(*ifp->if_output)(ifp, m_hold, &sa, NULL);
 		}
 	} else
 		LLE_WUNLOCK(la);
 }
 
 static void
 arp_mark_lle_reachable(struct llentry *la)
 {
 	int canceled, wtime;
 
 	LLE_WLOCK_ASSERT(la);
 
 	la->ln_state = ARP_LLINFO_REACHABLE;
 	EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED);
 
 	if (!(la->la_flags & LLE_STATIC)) {
 		LLE_ADDREF(la);
 		la->la_expire = time_uptime + V_arpt_keep;
 		wtime = V_arpt_keep - V_arp_maxtries * V_arpt_rexmit;
 		if (wtime < 0)
 			wtime = V_arpt_keep;
 		canceled = callout_reset(&la->lle_timer,
 		    hz * wtime, arptimer, la);
 		if (canceled)
 			LLE_REMREF(la);
 	}
 	la->la_asked = 0;
 	la->la_preempt = V_arp_maxtries;
 }
 
 /*
  * Add permanent link-layer record for given interface address.
  */
 static __noinline void
 arp_add_ifa_lle(struct ifnet *ifp, const struct sockaddr *dst)
 {
 	struct llentry *lle, *lle_tmp;
 
 	/*
 	 * Interface address LLE record is considered static
 	 * because kernel code relies on LLE_STATIC flag to check
 	 * if these entries can be rewriten by arp updates.
 	 */
 	lle = lltable_alloc_entry(LLTABLE(ifp), LLE_IFADDR | LLE_STATIC, dst);
 	if (lle == NULL) {
 		log(LOG_INFO, "arp_ifinit: cannot create arp "
 		    "entry for interface address\n");
 		return;
 	}
 
 	IF_AFDATA_WLOCK(ifp);
 	LLE_WLOCK(lle);
 	/* Unlink any entry if exists */
 	lle_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
 	if (lle_tmp != NULL)
 		lltable_unlink_entry(LLTABLE(ifp), lle_tmp);
 
 	lltable_link_entry(LLTABLE(ifp), lle);
 	IF_AFDATA_WUNLOCK(ifp);
 
 	if (lle_tmp != NULL)
 		EVENTHANDLER_INVOKE(lle_event, lle_tmp, LLENTRY_EXPIRED);
 
 	EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_RESOLVED);
 	LLE_WUNLOCK(lle);
 	if (lle_tmp != NULL)
 		lltable_free_entry(LLTABLE(ifp), lle_tmp);
 }
 
 /*
  * Handle the garp_rexmit_count. Like sysctl_handle_int(), but limits the range
  * of valid values.
  */
 static int
 sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int rexmit_count = *(int *)arg1;
 
 	error = sysctl_handle_int(oidp, &rexmit_count, 0, req);
 
 	/* Enforce limits on any new value that may have been set. */
 	if (!error && req->newptr) {
 		/* A new value was set. */
 		if (rexmit_count < 0) {
 			rexmit_count = 0;
 		} else if (rexmit_count > MAX_GARP_RETRANSMITS) {
 			rexmit_count = MAX_GARP_RETRANSMITS;
 		}
 		*(int *)arg1 = rexmit_count;
 	}
 
 	return (error);
 }
 
 /*
  * Retransmit a Gratuitous ARP (GARP) and, if necessary, schedule a callout to
  * retransmit it again. A pending callout owns a reference to the ifa.
  */
 static void
 garp_rexmit(void *arg)
 {
 	struct in_ifaddr *ia = arg;
 
 	if (callout_pending(&ia->ia_garp_timer) ||
 	    !callout_active(&ia->ia_garp_timer)) {
 		IF_ADDR_WUNLOCK(ia->ia_ifa.ifa_ifp);
 		ifa_free(&ia->ia_ifa);
 		return;
 	}
 
 	CURVNET_SET(ia->ia_ifa.ifa_ifp->if_vnet);
 
 	/*
 	 * Drop lock while the ARP request is generated.
 	 */
 	IF_ADDR_WUNLOCK(ia->ia_ifa.ifa_ifp);
 
 	arprequest(ia->ia_ifa.ifa_ifp, &IA_SIN(ia)->sin_addr,
 	    &IA_SIN(ia)->sin_addr, IF_LLADDR(ia->ia_ifa.ifa_ifp));
 
 	/*
 	 * Increment the count of retransmissions. If the count has reached the
 	 * maximum value, stop sending the GARP packets. Otherwise, schedule
 	 * the callout to retransmit another GARP packet.
 	 */
 	++ia->ia_garp_count;
 	if (ia->ia_garp_count >= garp_rexmit_count) {
 		ifa_free(&ia->ia_ifa);
 	} else {
 		int rescheduled;
 		IF_ADDR_WLOCK(ia->ia_ifa.ifa_ifp);
 		rescheduled = callout_reset(&ia->ia_garp_timer,
 		    (1 << ia->ia_garp_count) * hz,
 		    garp_rexmit, ia);
 		IF_ADDR_WUNLOCK(ia->ia_ifa.ifa_ifp);
 		if (rescheduled) {
 			ifa_free(&ia->ia_ifa);
 		}
 	}
 
 	CURVNET_RESTORE();
 }
 
 /*
  * Start the GARP retransmit timer.
  *
  * A single GARP is always transmitted when an IPv4 address is added
  * to an interface and that is usually sufficient. However, in some
  * circumstances, such as when a shared address is passed between
  * cluster nodes, this single GARP may occasionally be dropped or
  * lost. This can lead to neighbors on the network link working with a
  * stale ARP cache and sending packets destined for that address to
  * the node that previously owned the address, which may not respond.
  *
  * To avoid this situation, GARP retransmits can be enabled by setting
  * the net.link.ether.inet.garp_rexmit_count sysctl to a value greater
  * than zero. The setting represents the maximum number of
  * retransmissions. The interval between retransmissions is calculated
  * using an exponential backoff algorithm, doubling each time, so the
  * retransmission intervals are: {1, 2, 4, 8, 16, ...} (seconds).
  */
 static void
 garp_timer_start(struct ifaddr *ifa)
 {
 	struct in_ifaddr *ia = (struct in_ifaddr *) ifa;
 
 	IF_ADDR_WLOCK(ia->ia_ifa.ifa_ifp);
 	ia->ia_garp_count = 0;
 	if (callout_reset(&ia->ia_garp_timer, (1 << ia->ia_garp_count) * hz,
 	    garp_rexmit, ia) == 0) {
 		ifa_ref(ifa);
 	}
 	IF_ADDR_WUNLOCK(ia->ia_ifa.ifa_ifp);
 }
 
 void
 arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
 {
 	struct epoch_tracker et;
 	const struct sockaddr_in *dst_in;
 	const struct sockaddr *dst;
 
 	if (ifa->ifa_carp != NULL)
 		return;
 
 	dst = ifa->ifa_addr;
 	dst_in = (const struct sockaddr_in *)dst;
 
 	if (ntohl(dst_in->sin_addr.s_addr) == INADDR_ANY)
 		return;
 	NET_EPOCH_ENTER(et);
 	arp_announce_ifaddr(ifp, dst_in->sin_addr, IF_LLADDR(ifp));
 	NET_EPOCH_EXIT(et);
 	if (garp_rexmit_count > 0) {
 		garp_timer_start(ifa);
 	}
 
 	arp_add_ifa_lle(ifp, dst);
 }
 
 void
 arp_announce_ifaddr(struct ifnet *ifp, struct in_addr addr, u_char *enaddr)
 {
 
 	if (ntohl(addr.s_addr) != INADDR_ANY)
 		arprequest(ifp, &addr, &addr, enaddr);
 }
 
 /*
  * Sends gratuitous ARPs for each ifaddr to notify other
  * nodes about the address change.
  */
 static __noinline void
 arp_handle_ifllchange(struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
 
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family == AF_INET)
 			arp_ifinit(ifp, ifa);
 	}
 }
 
 /*
  * A handler for interface link layer address change event.
  */
 static void
 arp_iflladdr(void *arg __unused, struct ifnet *ifp)
 {
 
 	lltable_update_ifaddr(LLTABLE(ifp));
 
 	if ((ifp->if_flags & IFF_UP) != 0)
 		arp_handle_ifllchange(ifp);
 }
 
 static void
 vnet_arp_init(void)
 {
 
 	if (IS_DEFAULT_VNET(curvnet)) {
 		netisr_register(&arp_nh);
 		iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
 		    arp_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
 	}
 #ifdef VIMAGE
 	else
 		netisr_register_vnet(&arp_nh);
 #endif
 }
 VNET_SYSINIT(vnet_arp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND,
     vnet_arp_init, 0);
 
 #ifdef VIMAGE
 /*
  * We have to unregister ARP along with IP otherwise we risk doing INADDR_HASH
  * lookups after destroying the hash.  Ideally this would go on SI_ORDER_3.5.
  */
 static void
 vnet_arp_destroy(__unused void *arg)
 {
 
 	netisr_unregister_vnet(&arp_nh);
 }
 VNET_SYSUNINIT(vnet_arp_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
     vnet_arp_destroy, NULL);
 #endif
Index: head/sys/netinet/in_fib.c
===================================================================
--- head/sys/netinet/in_fib.c	(revision 362899)
+++ head/sys/netinet/in_fib.c	(revision 362900)
@@ -1,405 +1,246 @@
 /*-
  * Copyright (c) 2015
  * 	Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_route.h"
 #include "opt_mpath.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/kernel.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/route/route_var.h>
 #include <net/route/nhop.h>
 #include <net/route/shared.h>
 #include <net/vnet.h>
 
 #ifdef RADIX_MPATH
 #include <net/radix_mpath.h>
 #endif
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_fib.h>
 
 #ifdef INET
 
 /* Verify struct route compatiblity */
 /* Assert 'struct route_in' is compatible with 'struct route' */
 CHK_STRUCT_ROUTE_COMPAT(struct route_in, ro_dst4);
-static void fib4_rte_to_nh_basic(struct nhop_object *nh, struct in_addr dst,
-    uint32_t flags, struct nhop4_basic *pnh4);
-static void fib4_rte_to_nh_extended(struct nhop_object *nh, struct in_addr dst,
-    uint32_t flags, struct nhop4_extended *pnh4);
-
-
-static void
-fib4_rte_to_nh_basic(struct nhop_object *nh, struct in_addr dst,
-    uint32_t flags, struct nhop4_basic *pnh4)
-{
-
-	if ((flags & NHR_IFAIF) != 0)
-		pnh4->nh_ifp = nh->nh_ifa->ifa_ifp;
-	else
-		pnh4->nh_ifp = nh->nh_ifp;
-	pnh4->nh_mtu = nh->nh_mtu;
-	if (nh->nh_flags & NHF_GATEWAY)
-		pnh4->nh_addr = nh->gw4_sa.sin_addr;
-	else
-		pnh4->nh_addr = dst;
-	/* Set flags */
-	pnh4->nh_flags = nh->nh_flags;
-	/* TODO: Handle RTF_BROADCAST here */
-}
-
-static void
-fib4_rte_to_nh_extended(struct nhop_object *nh, struct in_addr dst,
-    uint32_t flags, struct nhop4_extended *pnh4)
-{
-
-	if ((flags & NHR_IFAIF) != 0)
-		pnh4->nh_ifp = nh->nh_ifa->ifa_ifp;
-	else
-		pnh4->nh_ifp = nh->nh_ifp;
-	pnh4->nh_mtu = nh->nh_mtu;
-	if (nh->nh_flags & NHF_GATEWAY)
-		pnh4->nh_addr = nh->gw4_sa.sin_addr;
-	else
-		pnh4->nh_addr = dst;
-	/* Set flags */
-	pnh4->nh_flags = nh->nh_flags;
-	pnh4->nh_ia = ifatoia(nh->nh_ifa);
-	pnh4->nh_src = IA_SIN(pnh4->nh_ia)->sin_addr;
-}
-
-/*
- * Performs IPv4 route table lookup on @dst. Returns 0 on success.
- * Stores nexthop info provided @pnh4 structure.
- * Note that
- * - nh_ifp cannot be safely dereferenced
- * - nh_ifp represents logical transmit interface (rt_ifp) (e.g. if
- *   looking up address on interface "ix0" pointer to "lo0" interface
- *   will be returned instead of "ix0")
- * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed
- * - howewer mtu from "transmit" interface will be returned.
- */
-int
-fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags,
-    uint32_t flowid, struct nhop4_basic *pnh4)
-{
-	RIB_RLOCK_TRACKER;
-	struct rib_head *rh;
-	struct radix_node *rn;
-	struct sockaddr_in sin;
-	struct nhop_object *nh;
-
-	KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_basic: bad fibnum"));
-	rh = rt_tables_get_rnh(fibnum, AF_INET);
-	if (rh == NULL)
-		return (ENOENT);
-
-	/* Prepare lookup key */
-	memset(&sin, 0, sizeof(sin));
-	sin.sin_len = sizeof(struct sockaddr_in);
-	sin.sin_addr = dst;
-
-	RIB_RLOCK(rh);
-	rn = rh->rnh_matchaddr((void *)&sin, &rh->head);
-	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
-		nh = RNTORT(rn)->rt_nhop;
-		/* Ensure route & ifp is UP */
-		if (RT_LINK_IS_UP(nh->nh_ifp)) {
-			fib4_rte_to_nh_basic(nh, dst, flags, pnh4);
-			RIB_RUNLOCK(rh);
-
-			return (0);
-		}
-	}
-	RIB_RUNLOCK(rh);
-
-	return (ENOENT);
-}
-
-/*
- * Performs IPv4 route table lookup on @dst. Returns 0 on success.
- * Stores extende nexthop info provided @pnh4 structure.
- * Note that
- * - nh_ifp cannot be safely dereferenced unless NHR_REF is specified.
- * - in that case you need to call fib4_free_nh_ext()
- * - nh_ifp represents logical transmit interface (rt_ifp) (e.g. if
- *   looking up address of interface "ix0" pointer to "lo0" interface
- *   will be returned instead of "ix0")
- * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed
- * - howewer mtu from "transmit" interface will be returned.
- */
-int
-fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags,
-    uint32_t flowid, struct nhop4_extended *pnh4)
-{
-	RIB_RLOCK_TRACKER;
-	struct rib_head *rh;
-	struct radix_node *rn;
-	struct sockaddr_in sin;
-	struct rtentry *rte;
-	struct nhop_object *nh;
-
-	KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_ext: bad fibnum"));
-	rh = rt_tables_get_rnh(fibnum, AF_INET);
-	if (rh == NULL)
-		return (ENOENT);
-
-	/* Prepare lookup key */
-	memset(&sin, 0, sizeof(sin));
-	sin.sin_len = sizeof(struct sockaddr_in);
-	sin.sin_addr = dst;
-
-	RIB_RLOCK(rh);
-	rn = rh->rnh_matchaddr((void *)&sin, &rh->head);
-	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
-		rte = RNTORT(rn);
-#ifdef RADIX_MPATH
-		rte = rt_mpath_select(rte, flowid);
-		if (rte == NULL) {
-			RIB_RUNLOCK(rh);
-			return (ENOENT);
-		}
-#endif
-		nh = rte->rt_nhop;
-		/* Ensure route & ifp is UP */
-		if (RT_LINK_IS_UP(nh->nh_ifp)) {
-			fib4_rte_to_nh_extended(nh, dst, flags, pnh4);
-			if ((flags & NHR_REF) != 0) {
-				/* TODO: lwref on egress ifp's ? */
-			}
-			RIB_RUNLOCK(rh);
-
-			return (0);
-		}
-	}
-	RIB_RUNLOCK(rh);
-
-	return (ENOENT);
-}
-
-void
-fib4_free_nh_ext(uint32_t fibnum, struct nhop4_extended *pnh4)
-{
-
-}
 
 /*
  * Looks up path in fib @fibnum specified by @dst.
  * Returns path nexthop on success. Nexthop is safe to use
  *  within the current network epoch. If longer lifetime is required,
  *  one needs to pass NHR_REF as a flag. This will return referenced
  *  nexthop.
  */
 struct nhop_object *
 fib4_lookup(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
     uint32_t flags, uint32_t flowid)
 {
 	RIB_RLOCK_TRACKER;
 	struct rib_head *rh;
 	struct radix_node *rn;
 	struct rtentry *rt;
 	struct nhop_object *nh;
 
 	KASSERT((fibnum < rt_numfibs), ("fib4_lookup: bad fibnum"));
 	rh = rt_tables_get_rnh(fibnum, AF_INET);
 	if (rh == NULL)
 		return (NULL);
 
 	/* Prepare lookup key */
 	struct sockaddr_in sin4;
 	memset(&sin4, 0, sizeof(sin4));
 	sin4.sin_family = AF_INET;
 	sin4.sin_len = sizeof(struct sockaddr_in);
 	sin4.sin_addr = dst;
 
 	nh = NULL;
 	RIB_RLOCK(rh);
 	rn = rh->rnh_matchaddr((void *)&sin4, &rh->head);
 	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
 		rt = RNTORT(rn);
 #ifdef RADIX_MPATH
 		if (rt_mpath_next(rt) != NULL)
 			rt = rt_mpath_selectrte(rt, flowid);
 #endif
 		nh = rt->rt_nhop;
 		/* Ensure route & ifp is UP */
 		if (RT_LINK_IS_UP(nh->nh_ifp)) {
 			if (flags & NHR_REF)
 				nhop_ref_object(nh);
 			RIB_RUNLOCK(rh);
 			return (nh);
 		}
 	}
 	RIB_RUNLOCK(rh);
 
 	RTSTAT_INC(rts_unreach);
 	return (NULL);
 }
 
 inline static int
 check_urpf(const struct nhop_object *nh, uint32_t flags,
     const struct ifnet *src_if)
 {
 
 	if (src_if != NULL && nh->nh_aifp == src_if) {
 		return (1);
 	}
 	if (src_if == NULL) {
 		if ((flags & NHR_NODEFAULT) == 0)
 			return (1);
 		else if ((nh->nh_flags & NHF_DEFAULT) == 0)
 			return (1);
 	}
 
 	return (0);
 }
 
 #ifdef RADIX_MPATH
 inline static int
 check_urpf_mpath(struct rtentry *rt, uint32_t flags,
     const struct ifnet *src_if)
 {
 	
 	while (rt != NULL) {
 		if (check_urpf(rt->rt_nhop, flags, src_if) != 0)
 			return (1);
 		rt = rt_mpath_next(rt);
 	}
 
 	return (0);
 }
 #endif
 
 /*
  * Performs reverse path forwarding lookup.
  * If @src_if is non-zero, verifies that at least 1 path goes via
  *   this interface.
  * If @src_if is zero, verifies that route exist.
  * if @flags contains NHR_NOTDEFAULT, do not consider default route.
  *
  * Returns 1 if route matching conditions is found, 0 otherwise.
  */
 int
 fib4_check_urpf(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
   uint32_t flags, const struct ifnet *src_if)
 {
 	RIB_RLOCK_TRACKER;
 	struct rib_head *rh;
 	struct radix_node *rn;
 	struct rtentry *rt;
 	int ret;
 
 	KASSERT((fibnum < rt_numfibs), ("fib4_check_urpf: bad fibnum"));
 	rh = rt_tables_get_rnh(fibnum, AF_INET);
 	if (rh == NULL)
 		return (0);
 
 	/* Prepare lookup key */
 	struct sockaddr_in sin4;
 	memset(&sin4, 0, sizeof(sin4));
 	sin4.sin_len = sizeof(struct sockaddr_in);
 	sin4.sin_addr = dst;
 
 	RIB_RLOCK(rh);
 	rn = rh->rnh_matchaddr((void *)&sin4, &rh->head);
 	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
 		rt = RNTORT(rn);
 #ifdef	RADIX_MPATH
 		ret = check_urpf_mpath(rt, flags, src_if);
 #else
 		ret = check_urpf(rt->rt_nhop, flags, src_if);
 #endif
 		RIB_RUNLOCK(rh);
 		return (ret);
 	}
 	RIB_RUNLOCK(rh);
 
 	return (0);
 }
 
 struct nhop_object *
 fib4_lookup_debugnet(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
     uint32_t flags)
 {
 	struct rib_head *rh;
 	struct radix_node *rn;
 	struct rtentry *rt;
 	struct nhop_object *nh;
 
 	KASSERT((fibnum < rt_numfibs), ("fib4_lookup_debugnet: bad fibnum"));
 	rh = rt_tables_get_rnh(fibnum, AF_INET);
 	if (rh == NULL)
 		return (NULL);
 
 	/* Prepare lookup key */
 	struct sockaddr_in sin4;
 	memset(&sin4, 0, sizeof(sin4));
 	sin4.sin_family = AF_INET;
 	sin4.sin_len = sizeof(struct sockaddr_in);
 	sin4.sin_addr = dst;
 
 	nh = NULL;
 	/* unlocked lookup */
 	rn = rh->rnh_matchaddr((void *)&sin4, &rh->head);
 	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
 		rt = RNTORT(rn);
 #ifdef RADIX_MPATH
 		if (rt_mpath_next(rt) != NULL)
 			rt = rt_mpath_selectrte(rt, 0);
 #endif
 		nh = rt->rt_nhop;
 		/* Ensure route & ifp is UP */
 		if (RT_LINK_IS_UP(nh->nh_ifp)) {
 			if (flags & NHR_REF)
 				nhop_ref_object(nh);
 			return (nh);
 		}
 	}
 
 	return (NULL);
 }
 
 #endif
Index: head/sys/netinet/in_fib.h
===================================================================
--- head/sys/netinet/in_fib.h	(revision 362899)
+++ head/sys/netinet/in_fib.h	(revision 362900)
@@ -1,81 +1,55 @@
 /*-
  * Copyright (c) 2015
  * 	Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _NETINET_IN_FIB_H_
 #define	_NETINET_IN_FIB_H_
 
 struct route_in {
 	/* common fields shared among all 'struct route' */
 	struct nhop_object *ro_nh;
 	struct	llentry *ro_lle;
 	char		*ro_prepend;
 	uint16_t	ro_plen;
 	uint16_t	ro_flags;
 	uint16_t	ro_mtu;	/* saved ro_rt mtu */
 	uint16_t	spare;
 	/* custom sockaddr */
 	struct sockaddr_in ro_dst4;
 };
 
-/* Basic nexthop info used for uRPF/mtu checks */
-struct nhop4_basic {
-	struct ifnet	*nh_ifp;	/* Logical egress interface */
-	uint16_t	nh_mtu;		/* nexthop mtu */
-	uint16_t	nh_flags;	/* nhop flags */
-	struct in_addr	nh_addr;	/* GW/DST IPv4 address */
-};
-
-/* Extended nexthop info used for control protocols */
-struct nhop4_extended {
-	struct ifnet	*nh_ifp;	/* Logical egress interface */
-	struct in_ifaddr *nh_ia;	/* Associated address */
-	uint16_t	nh_mtu;		/* nexthop mtu */
-	uint16_t	nh_flags;	/* nhop flags */
-	uint8_t		spare[4];
-	struct in_addr	nh_addr;	/* GW/DST IPv4 address */
-	struct in_addr	nh_src;		/* default source IPv4 address */
-	uint64_t	spare2;
-};
-
-int fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags,
-    uint32_t flowid, struct nhop4_basic *pnh4);
-int fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags,
-    uint32_t flowid, struct nhop4_extended *pnh4);
-void fib4_free_nh_ext(uint32_t fibnum, struct nhop4_extended *pnh4);
-
 struct nhop_object *fib4_lookup(uint32_t fibnum, struct in_addr dst,
     uint32_t scopeid, uint32_t flags, uint32_t flowid);
 int fib4_check_urpf(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
     uint32_t flags, const struct ifnet *src_if);
 struct nhop_object *fib4_lookup_debugnet(uint32_t fibnum, struct in_addr dst,
     uint32_t scopeid, uint32_t flags);
 #endif
 
Index: head/sys/netinet/in_mcast.c
===================================================================
--- head/sys/netinet/in_mcast.c	(revision 362899)
+++ head/sys/netinet/in_mcast.c	(revision 362900)
@@ -1,3058 +1,3063 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2007-2009 Bruce Simpson.
  * Copyright (c) 2005 Robert N. M. Watson.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * IPv4 multicast socket, group, and socket option processing module.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/sysctl.h>
 #include <sys/ktr.h>
 #include <sys/taskqueue.h>
 #include <sys/tree.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
+#include <net/route/nhop.h>
 #include <net/vnet.h>
 
 #include <net/ethernet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/igmp_var.h>
 
 #ifndef KTR_IGMPV3
 #define KTR_IGMPV3 KTR_INET
 #endif
 
 #ifndef __SOCKUNION_DECLARED
 union sockunion {
 	struct sockaddr_storage	ss;
 	struct sockaddr		sa;
 	struct sockaddr_dl	sdl;
 	struct sockaddr_in	sin;
 };
 typedef union sockunion sockunion_t;
 #define __SOCKUNION_DECLARED
 #endif /* __SOCKUNION_DECLARED */
 
 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter",
     "IPv4 multicast PCB-layer source filter");
 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource",
     "IPv4 multicast IGMP-layer source filter");
 
 /*
  * Locking:
  *
  * - Lock order is: Giant, IN_MULTI_LOCK, INP_WLOCK,
  *   IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
  * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however
  *   it can be taken by code in net/if.c also.
  * - ip_moptions and in_mfilter are covered by the INP_WLOCK.
  *
  * struct in_multi is covered by IN_MULTI_LIST_LOCK. There isn't strictly
  * any need for in_multi itself to be virtualized -- it is bound to an ifp
  * anyway no matter what happens.
  */
 struct mtx in_multi_list_mtx;
 MTX_SYSINIT(in_multi_mtx, &in_multi_list_mtx, "in_multi_list_mtx", MTX_DEF);
 
 struct mtx in_multi_free_mtx;
 MTX_SYSINIT(in_multi_free_mtx, &in_multi_free_mtx, "in_multi_free_mtx", MTX_DEF);
 
 struct sx in_multi_sx;
 SX_SYSINIT(in_multi_sx, &in_multi_sx, "in_multi_sx");
 
 int ifma_restart;
 
 /*
  * Functions with non-static linkage defined in this file should be
  * declared in in_var.h:
  *  imo_multi_filter()
  *  in_addmulti()
  *  in_delmulti()
  *  in_joingroup()
  *  in_joingroup_locked()
  *  in_leavegroup()
  *  in_leavegroup_locked()
  * and ip_var.h:
  *  inp_freemoptions()
  *  inp_getmoptions()
  *  inp_setmoptions()
  *
  * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti()
  * and in_delmulti().
  */
 static void	imf_commit(struct in_mfilter *);
 static int	imf_get_source(struct in_mfilter *imf,
 		    const struct sockaddr_in *psin,
 		    struct in_msource **);
 static struct in_msource *
 		imf_graft(struct in_mfilter *, const uint8_t,
 		    const struct sockaddr_in *);
 static void	imf_leave(struct in_mfilter *);
 static int	imf_prune(struct in_mfilter *, const struct sockaddr_in *);
 static void	imf_purge(struct in_mfilter *);
 static void	imf_rollback(struct in_mfilter *);
 static void	imf_reap(struct in_mfilter *);
 static struct in_mfilter *
 		imo_match_group(const struct ip_moptions *,
 		    const struct ifnet *, const struct sockaddr *);
 static struct in_msource *
 		imo_match_source(struct in_mfilter *, const struct sockaddr *);
 static void	ims_merge(struct ip_msource *ims,
 		    const struct in_msource *lims, const int rollback);
 static int	in_getmulti(struct ifnet *, const struct in_addr *,
 		    struct in_multi **);
 static int	inm_get_source(struct in_multi *inm, const in_addr_t haddr,
 		    const int noalloc, struct ip_msource **pims);
 #ifdef KTR
 static int	inm_is_ifp_detached(const struct in_multi *);
 #endif
 static int	inm_merge(struct in_multi *, /*const*/ struct in_mfilter *);
 static void	inm_purge(struct in_multi *);
 static void	inm_reap(struct in_multi *);
 static void inm_release(struct in_multi *);
 static struct ip_moptions *
 		inp_findmoptions(struct inpcb *);
 static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
 static int	inp_join_group(struct inpcb *, struct sockopt *);
 static int	inp_leave_group(struct inpcb *, struct sockopt *);
 static struct ifnet *
 		inp_lookup_mcast_ifp(const struct inpcb *,
 		    const struct sockaddr_in *, const struct in_addr);
 static int	inp_block_unblock_source(struct inpcb *, struct sockopt *);
 static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
 static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
 static int	sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS);
 
 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "IPv4 multicast");
 
 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER;
 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc,
     CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0,
     "Max source filters per group");
 
 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER;
 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc,
     CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0,
     "Max source filters per socket");
 
 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP;
 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN,
     &in_mcast_loop, 0, "Loopback multicast datagrams by default");
 
 static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
     CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
     "Per-interface stack-wide source filters");
 
 #ifdef KTR
 /*
  * Inline function which wraps assertions for a valid ifp.
  * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
  * is detached.
  */
 static int __inline
 inm_is_ifp_detached(const struct in_multi *inm)
 {
 	struct ifnet *ifp;
 
 	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
 	ifp = inm->inm_ifma->ifma_ifp;
 	if (ifp != NULL) {
 		/*
 		 * Sanity check that netinet's notion of ifp is the
 		 * same as net's.
 		 */
 		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
 	}
 
 	return (ifp == NULL);
 }
 #endif
 
 static struct task free_task;
 static struct in_multi_head inm_free_list = SLIST_HEAD_INITIALIZER();
 static void inm_release_task(void *arg __unused, int pending __unused);
 
 static void
 inm_init(void)
 {
 	TASK_INIT(&free_task, 0, inm_release_task, NULL);
 }
 SYSINIT(inm_init, SI_SUB_TASKQ, SI_ORDER_ANY, inm_init, NULL);
 
 void
 inm_release_list_deferred(struct in_multi_head *inmh)
 {
 
 	if (SLIST_EMPTY(inmh))
 		return;
 	mtx_lock(&in_multi_free_mtx);
 	SLIST_CONCAT(&inm_free_list, inmh, in_multi, inm_nrele);
 	mtx_unlock(&in_multi_free_mtx);
 	taskqueue_enqueue(taskqueue_thread, &free_task);
 }
 
 void
 inm_disconnect(struct in_multi *inm)
 {
 	struct ifnet *ifp;
 	struct ifmultiaddr *ifma, *ll_ifma;
 
 	ifp = inm->inm_ifp;
 	IF_ADDR_WLOCK_ASSERT(ifp);
 	ifma = inm->inm_ifma;
 
 	if_ref(ifp);
 	if (ifma->ifma_flags & IFMA_F_ENQUEUED) {
 		CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
 		ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
 	}
 	MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname);
 	if ((ll_ifma = ifma->ifma_llifma) != NULL) {
 		MPASS(ifma != ll_ifma);
 		ifma->ifma_llifma = NULL;
 		MPASS(ll_ifma->ifma_llifma == NULL);
 		MPASS(ll_ifma->ifma_ifp == ifp);
 		if (--ll_ifma->ifma_refcount == 0) {
 			if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) {
 				CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link);
 				ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
 			}
 			MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname);
 			if_freemulti(ll_ifma);
 			ifma_restart = true;
 		}
 	}
 }
 
 void
 inm_release_deferred(struct in_multi *inm)
 {
 	struct in_multi_head tmp;
 
 	IN_MULTI_LIST_LOCK_ASSERT();
 	MPASS(inm->inm_refcount > 0);
 	if (--inm->inm_refcount == 0) {
 		SLIST_INIT(&tmp);
 		inm_disconnect(inm);
 		inm->inm_ifma->ifma_protospec = NULL;
 		SLIST_INSERT_HEAD(&tmp, inm, inm_nrele);
 		inm_release_list_deferred(&tmp);
 	}
 }
 
 static void
 inm_release_task(void *arg __unused, int pending __unused)
 {
 	struct in_multi_head inm_free_tmp;
 	struct in_multi *inm, *tinm;
 
 	SLIST_INIT(&inm_free_tmp);
 	mtx_lock(&in_multi_free_mtx);
 	SLIST_CONCAT(&inm_free_tmp, &inm_free_list, in_multi, inm_nrele);
 	mtx_unlock(&in_multi_free_mtx);
 	IN_MULTI_LOCK();
 	SLIST_FOREACH_SAFE(inm, &inm_free_tmp, inm_nrele, tinm) {
 		SLIST_REMOVE_HEAD(&inm_free_tmp, inm_nrele);
 		MPASS(inm);
 		inm_release(inm);
 	}
 	IN_MULTI_UNLOCK();
 }
 
 /*
  * Initialize an in_mfilter structure to a known state at t0, t1
  * with an empty source filter list.
  */
 static __inline void
 imf_init(struct in_mfilter *imf, const int st0, const int st1)
 {
 	memset(imf, 0, sizeof(struct in_mfilter));
 	RB_INIT(&imf->imf_sources);
 	imf->imf_st[0] = st0;
 	imf->imf_st[1] = st1;
 }
 
 struct in_mfilter *
 ip_mfilter_alloc(const int mflags, const int st0, const int st1)
 {
 	struct in_mfilter *imf;
 
 	imf = malloc(sizeof(*imf), M_INMFILTER, mflags);
 	if (imf != NULL)
 		imf_init(imf, st0, st1);
 
 	return (imf);
 }
 
 void
 ip_mfilter_free(struct in_mfilter *imf)
 {
 
 	imf_purge(imf);
 	free(imf, M_INMFILTER);
 }
 
 /*
  * Function for looking up an in_multi record for an IPv4 multicast address
  * on a given interface. ifp must be valid. If no record found, return NULL.
  * The IN_MULTI_LIST_LOCK and IF_ADDR_LOCK on ifp must be held.
  */
 struct in_multi *
 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina)
 {
 	struct ifmultiaddr *ifma;
 	struct in_multi *inm;
 
 	IN_MULTI_LIST_LOCK_ASSERT();
 	IF_ADDR_LOCK_ASSERT(ifp);
 
 	inm = NULL;
 	CK_STAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_INET ||
 			ifma->ifma_protospec == NULL)
 			continue;
 		inm = (struct in_multi *)ifma->ifma_protospec;
 		if (inm->inm_addr.s_addr == ina.s_addr)
 			break;
 		inm = NULL;
 	}
 	return (inm);
 }
 
 /*
  * Wrapper for inm_lookup_locked().
  * The IF_ADDR_LOCK will be taken on ifp and released on return.
  */
 struct in_multi *
 inm_lookup(struct ifnet *ifp, const struct in_addr ina)
 {
 	struct epoch_tracker et;
 	struct in_multi *inm;
 
 	IN_MULTI_LIST_LOCK_ASSERT();
 	NET_EPOCH_ENTER(et);
 
 	inm = inm_lookup_locked(ifp, ina);
 	NET_EPOCH_EXIT(et);
 
 	return (inm);
 }
 
 /*
  * Find an IPv4 multicast group entry for this ip_moptions instance
  * which matches the specified group, and optionally an interface.
  * Return its index into the array, or -1 if not found.
  */
 static struct in_mfilter *
 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp,
     const struct sockaddr *group)
 {
 	const struct sockaddr_in *gsin;
 	struct in_mfilter *imf;
 	struct in_multi	*inm;
 
 	gsin = (const struct sockaddr_in *)group;
 
 	IP_MFILTER_FOREACH(imf, &imo->imo_head) {
 		inm = imf->imf_inm;
 		if (inm == NULL)
 			continue;
 		if ((ifp == NULL || (inm->inm_ifp == ifp)) &&
 		    in_hosteq(inm->inm_addr, gsin->sin_addr)) {
 			break;
 		}
 	}
 	return (imf);
 }
 
 /*
  * Find an IPv4 multicast source entry for this imo which matches
  * the given group index for this socket, and source address.
  *
  * NOTE: This does not check if the entry is in-mode, merely if
  * it exists, which may not be the desired behaviour.
  */
 static struct in_msource *
 imo_match_source(struct in_mfilter *imf, const struct sockaddr *src)
 {
 	struct ip_msource	 find;
 	struct ip_msource	*ims;
 	const sockunion_t	*psa;
 
 	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
 
 	/* Source trees are keyed in host byte order. */
 	psa = (const sockunion_t *)src;
 	find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr);
 	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
 
 	return ((struct in_msource *)ims);
 }
 
 /*
  * Perform filtering for multicast datagrams on a socket by group and source.
  *
  * Returns 0 if a datagram should be allowed through, or various error codes
  * if the socket was not a member of the group, or the source was muted, etc.
  */
 int
 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp,
     const struct sockaddr *group, const struct sockaddr *src)
 {
 	struct in_mfilter *imf;
 	struct in_msource *ims;
 	int mode;
 
 	KASSERT(ifp != NULL, ("%s: null ifp", __func__));
 
 	imf = imo_match_group(imo, ifp, group);
 	if (imf == NULL)
 		return (MCAST_NOTGMEMBER);
 
 	/*
 	 * Check if the source was included in an (S,G) join.
 	 * Allow reception on exclusive memberships by default,
 	 * reject reception on inclusive memberships by default.
 	 * Exclude source only if an in-mode exclude filter exists.
 	 * Include source only if an in-mode include filter exists.
 	 * NOTE: We are comparing group state here at IGMP t1 (now)
 	 * with socket-layer t0 (since last downcall).
 	 */
 	mode = imf->imf_st[1];
 	ims = imo_match_source(imf, src);
 
 	if ((ims == NULL && mode == MCAST_INCLUDE) ||
 	    (ims != NULL && ims->imsl_st[0] != mode))
 		return (MCAST_NOTSMEMBER);
 
 	return (MCAST_PASS);
 }
 
 /*
  * Find and return a reference to an in_multi record for (ifp, group),
  * and bump its reference count.
  * If one does not exist, try to allocate it, and update link-layer multicast
  * filters on ifp to listen for group.
  * Assumes the IN_MULTI lock is held across the call.
  * Return 0 if successful, otherwise return an appropriate error code.
  */
 static int
 in_getmulti(struct ifnet *ifp, const struct in_addr *group,
     struct in_multi **pinm)
 {
 	struct sockaddr_in	 gsin;
 	struct ifmultiaddr	*ifma;
 	struct in_ifinfo	*ii;
 	struct in_multi		*inm;
 	int error;
 
 	IN_MULTI_LOCK_ASSERT();
 
 	ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET];
 	IN_MULTI_LIST_LOCK();
 	inm = inm_lookup(ifp, *group);
 	if (inm != NULL) {
 		/*
 		 * If we already joined this group, just bump the
 		 * refcount and return it.
 		 */
 		KASSERT(inm->inm_refcount >= 1,
 		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
 		inm_acquire_locked(inm);
 		*pinm = inm;
 	}
 	IN_MULTI_LIST_UNLOCK();
 	if (inm != NULL)
 		return (0);
 
 	memset(&gsin, 0, sizeof(gsin));
 	gsin.sin_family = AF_INET;
 	gsin.sin_len = sizeof(struct sockaddr_in);
 	gsin.sin_addr = *group;
 
 	/*
 	 * Check if a link-layer group is already associated
 	 * with this network-layer group on the given ifnet.
 	 */
 	error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma);
 	if (error != 0)
 		return (error);
 
 	/* XXX ifma_protospec must be covered by IF_ADDR_LOCK */
 	IN_MULTI_LIST_LOCK();
 	IF_ADDR_WLOCK(ifp);
 
 	/*
 	 * If something other than netinet is occupying the link-layer
 	 * group, print a meaningful error message and back out of
 	 * the allocation.
 	 * Otherwise, bump the refcount on the existing network-layer
 	 * group association and return it.
 	 */
 	if (ifma->ifma_protospec != NULL) {
 		inm = (struct in_multi *)ifma->ifma_protospec;
 #ifdef INVARIANTS
 		KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr",
 		    __func__));
 		KASSERT(ifma->ifma_addr->sa_family == AF_INET,
 		    ("%s: ifma not AF_INET", __func__));
 		KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__));
 		if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
 		    !in_hosteq(inm->inm_addr, *group)) {
 			char addrbuf[INET_ADDRSTRLEN];
 
 			panic("%s: ifma %p is inconsistent with %p (%s)",
 			    __func__, ifma, inm, inet_ntoa_r(*group, addrbuf));
 		}
 #endif
 		inm_acquire_locked(inm);
 		*pinm = inm;
 		goto out_locked;
 	}
 
 	IF_ADDR_WLOCK_ASSERT(ifp);
 
 	/*
 	 * A new in_multi record is needed; allocate and initialize it.
 	 * We DO NOT perform an IGMP join as the in_ layer may need to
 	 * push an initial source list down to IGMP to support SSM.
 	 *
 	 * The initial source filter state is INCLUDE, {} as per the RFC.
 	 */
 	inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO);
 	if (inm == NULL) {
 		IF_ADDR_WUNLOCK(ifp);
 		IN_MULTI_LIST_UNLOCK();
 		if_delmulti_ifma(ifma);
 		return (ENOMEM);
 	}
 	inm->inm_addr = *group;
 	inm->inm_ifp = ifp;
 	inm->inm_igi = ii->ii_igmp;
 	inm->inm_ifma = ifma;
 	inm->inm_refcount = 1;
 	inm->inm_state = IGMP_NOT_MEMBER;
 	mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES);
 	inm->inm_st[0].iss_fmode = MCAST_UNDEFINED;
 	inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
 	RB_INIT(&inm->inm_srcs);
 
 	ifma->ifma_protospec = inm;
 
 	*pinm = inm;
  out_locked:
 	IF_ADDR_WUNLOCK(ifp);
 	IN_MULTI_LIST_UNLOCK();
 	return (0);
 }
 
 /*
  * Drop a reference to an in_multi record.
  *
  * If the refcount drops to 0, free the in_multi record and
  * delete the underlying link-layer membership.
  */
 static void
 inm_release(struct in_multi *inm)
 {
 	struct ifmultiaddr *ifma;
 	struct ifnet *ifp;
 
 	CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount);
 	MPASS(inm->inm_refcount == 0);
 	CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm);
 
 	ifma = inm->inm_ifma;
 	ifp = inm->inm_ifp;
 
 	/* XXX this access is not covered by IF_ADDR_LOCK */
 	CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma);
 	if (ifp != NULL) {
 		CURVNET_SET(ifp->if_vnet);
 		inm_purge(inm);
 		free(inm, M_IPMADDR);
 		if_delmulti_ifma_flags(ifma, 1);
 		CURVNET_RESTORE();
 		if_rele(ifp);
 	} else {
 		inm_purge(inm);
 		free(inm, M_IPMADDR);
 		if_delmulti_ifma_flags(ifma, 1);
 	}
 }
 
 /*
  * Clear recorded source entries for a group.
  * Used by the IGMP code. Caller must hold the IN_MULTI lock.
  * FIXME: Should reap.
  */
 void
 inm_clear_recorded(struct in_multi *inm)
 {
 	struct ip_msource	*ims;
 
 	IN_MULTI_LIST_LOCK_ASSERT();
 
 	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
 		if (ims->ims_stp) {
 			ims->ims_stp = 0;
 			--inm->inm_st[1].iss_rec;
 		}
 	}
 	KASSERT(inm->inm_st[1].iss_rec == 0,
 	    ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec));
 }
 
 /*
  * Record a source as pending for a Source-Group IGMPv3 query.
  * This lives here as it modifies the shared tree.
  *
  * inm is the group descriptor.
  * naddr is the address of the source to record in network-byte order.
  *
  * If the net.inet.igmp.sgalloc sysctl is non-zero, we will
  * lazy-allocate a source node in response to an SG query.
  * Otherwise, no allocation is performed. This saves some memory
  * with the trade-off that the source will not be reported to the
  * router if joined in the window between the query response and
  * the group actually being joined on the local host.
  *
  * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed.
  * This turns off the allocation of a recorded source entry if
  * the group has not been joined.
  *
  * Return 0 if the source didn't exist or was already marked as recorded.
  * Return 1 if the source was marked as recorded by this function.
  * Return <0 if any error occurred (negated errno code).
  */
 int
 inm_record_source(struct in_multi *inm, const in_addr_t naddr)
 {
 	struct ip_msource	 find;
 	struct ip_msource	*ims, *nims;
 
 	IN_MULTI_LIST_LOCK_ASSERT();
 
 	find.ims_haddr = ntohl(naddr);
 	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
 	if (ims && ims->ims_stp)
 		return (0);
 	if (ims == NULL) {
 		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
 			return (-ENOSPC);
 		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
 		    M_NOWAIT | M_ZERO);
 		if (nims == NULL)
 			return (-ENOMEM);
 		nims->ims_haddr = find.ims_haddr;
 		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
 		++inm->inm_nsrc;
 		ims = nims;
 	}
 
 	/*
 	 * Mark the source as recorded and update the recorded
 	 * source count.
 	 */
 	++ims->ims_stp;
 	++inm->inm_st[1].iss_rec;
 
 	return (1);
 }
 
 /*
  * Return a pointer to an in_msource owned by an in_mfilter,
  * given its source address.
  * Lazy-allocate if needed. If this is a new entry its filter state is
  * undefined at t0.
  *
  * imf is the filter set being modified.
  * haddr is the source address in *host* byte-order.
  *
  * SMPng: May be called with locks held; malloc must not block.
  */
 static int
 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin,
     struct in_msource **plims)
 {
 	struct ip_msource	 find;
 	struct ip_msource	*ims, *nims;
 	struct in_msource	*lims;
 	int			 error;
 
 	error = 0;
 	ims = NULL;
 	lims = NULL;
 
 	/* key is host byte order */
 	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
 	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
 	lims = (struct in_msource *)ims;
 	if (lims == NULL) {
 		if (imf->imf_nsrc == in_mcast_maxsocksrc)
 			return (ENOSPC);
 		nims = malloc(sizeof(struct in_msource), M_INMFILTER,
 		    M_NOWAIT | M_ZERO);
 		if (nims == NULL)
 			return (ENOMEM);
 		lims = (struct in_msource *)nims;
 		lims->ims_haddr = find.ims_haddr;
 		lims->imsl_st[0] = MCAST_UNDEFINED;
 		RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
 		++imf->imf_nsrc;
 	}
 
 	*plims = lims;
 
 	return (error);
 }
 
 /*
  * Graft a source entry into an existing socket-layer filter set,
  * maintaining any required invariants and checking allocations.
  *
  * The source is marked as being in the new filter mode at t1.
  *
  * Return the pointer to the new node, otherwise return NULL.
  */
 static struct in_msource *
 imf_graft(struct in_mfilter *imf, const uint8_t st1,
     const struct sockaddr_in *psin)
 {
 	struct ip_msource	*nims;
 	struct in_msource	*lims;
 
 	nims = malloc(sizeof(struct in_msource), M_INMFILTER,
 	    M_NOWAIT | M_ZERO);
 	if (nims == NULL)
 		return (NULL);
 	lims = (struct in_msource *)nims;
 	lims->ims_haddr = ntohl(psin->sin_addr.s_addr);
 	lims->imsl_st[0] = MCAST_UNDEFINED;
 	lims->imsl_st[1] = st1;
 	RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
 	++imf->imf_nsrc;
 
 	return (lims);
 }
 
 /*
  * Prune a source entry from an existing socket-layer filter set,
  * maintaining any required invariants and checking allocations.
  *
  * The source is marked as being left at t1, it is not freed.
  *
  * Return 0 if no error occurred, otherwise return an errno value.
  */
 static int
 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin)
 {
 	struct ip_msource	 find;
 	struct ip_msource	*ims;
 	struct in_msource	*lims;
 
 	/* key is host byte order */
 	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
 	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
 	if (ims == NULL)
 		return (ENOENT);
 	lims = (struct in_msource *)ims;
 	lims->imsl_st[1] = MCAST_UNDEFINED;
 	return (0);
 }
 
 /*
  * Revert socket-layer filter set deltas at t1 to t0 state.
  */
 static void
 imf_rollback(struct in_mfilter *imf)
 {
 	struct ip_msource	*ims, *tims;
 	struct in_msource	*lims;
 
 	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
 		lims = (struct in_msource *)ims;
 		if (lims->imsl_st[0] == lims->imsl_st[1]) {
 			/* no change at t1 */
 			continue;
 		} else if (lims->imsl_st[0] != MCAST_UNDEFINED) {
 			/* revert change to existing source at t1 */
 			lims->imsl_st[1] = lims->imsl_st[0];
 		} else {
 			/* revert source added t1 */
 			CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
 			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
 			free(ims, M_INMFILTER);
 			imf->imf_nsrc--;
 		}
 	}
 	imf->imf_st[1] = imf->imf_st[0];
 }
 
 /*
  * Mark socket-layer filter set as INCLUDE {} at t1.
  */
 static void
 imf_leave(struct in_mfilter *imf)
 {
 	struct ip_msource	*ims;
 	struct in_msource	*lims;
 
 	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
 		lims = (struct in_msource *)ims;
 		lims->imsl_st[1] = MCAST_UNDEFINED;
 	}
 	imf->imf_st[1] = MCAST_INCLUDE;
 }
 
 /*
  * Mark socket-layer filter set deltas as committed.
  */
 static void
 imf_commit(struct in_mfilter *imf)
 {
 	struct ip_msource	*ims;
 	struct in_msource	*lims;
 
 	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
 		lims = (struct in_msource *)ims;
 		lims->imsl_st[0] = lims->imsl_st[1];
 	}
 	imf->imf_st[0] = imf->imf_st[1];
 }
 
 /*
  * Reap unreferenced sources from socket-layer filter set.
  */
 static void
 imf_reap(struct in_mfilter *imf)
 {
 	struct ip_msource	*ims, *tims;
 	struct in_msource	*lims;
 
 	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
 		lims = (struct in_msource *)ims;
 		if ((lims->imsl_st[0] == MCAST_UNDEFINED) &&
 		    (lims->imsl_st[1] == MCAST_UNDEFINED)) {
 			CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims);
 			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
 			free(ims, M_INMFILTER);
 			imf->imf_nsrc--;
 		}
 	}
 }
 
 /*
  * Purge socket-layer filter set.
  */
 static void
 imf_purge(struct in_mfilter *imf)
 {
 	struct ip_msource	*ims, *tims;
 
 	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
 		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
 		RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
 		free(ims, M_INMFILTER);
 		imf->imf_nsrc--;
 	}
 	imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED;
 	KASSERT(RB_EMPTY(&imf->imf_sources),
 	    ("%s: imf_sources not empty", __func__));
 }
 
 /*
  * Look up a source filter entry for a multicast group.
  *
  * inm is the group descriptor to work with.
  * haddr is the host-byte-order IPv4 address to look up.
  * noalloc may be non-zero to suppress allocation of sources.
  * *pims will be set to the address of the retrieved or allocated source.
  *
  * SMPng: NOTE: may be called with locks held.
  * Return 0 if successful, otherwise return a non-zero error code.
  */
 static int
 inm_get_source(struct in_multi *inm, const in_addr_t haddr,
     const int noalloc, struct ip_msource **pims)
 {
 	struct ip_msource	 find;
 	struct ip_msource	*ims, *nims;
 
 	find.ims_haddr = haddr;
 	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
 	if (ims == NULL && !noalloc) {
 		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
 			return (ENOSPC);
 		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
 		    M_NOWAIT | M_ZERO);
 		if (nims == NULL)
 			return (ENOMEM);
 		nims->ims_haddr = haddr;
 		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
 		++inm->inm_nsrc;
 		ims = nims;
 #ifdef KTR
 		CTR3(KTR_IGMPV3, "%s: allocated 0x%08x as %p", __func__,
 		    haddr, ims);
 #endif
 	}
 
 	*pims = ims;
 	return (0);
 }
 
 /*
  * Merge socket-layer source into IGMP-layer source.
  * If rollback is non-zero, perform the inverse of the merge.
  */
 static void
 ims_merge(struct ip_msource *ims, const struct in_msource *lims,
     const int rollback)
 {
 	int n = rollback ? -1 : 1;
 
 	if (lims->imsl_st[0] == MCAST_EXCLUDE) {
 		CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on 0x%08x",
 		    __func__, n, ims->ims_haddr);
 		ims->ims_st[1].ex -= n;
 	} else if (lims->imsl_st[0] == MCAST_INCLUDE) {
 		CTR3(KTR_IGMPV3, "%s: t1 in -= %d on 0x%08x",
 		    __func__, n, ims->ims_haddr);
 		ims->ims_st[1].in -= n;
 	}
 
 	if (lims->imsl_st[1] == MCAST_EXCLUDE) {
 		CTR3(KTR_IGMPV3, "%s: t1 ex += %d on 0x%08x",
 		    __func__, n, ims->ims_haddr);
 		ims->ims_st[1].ex += n;
 	} else if (lims->imsl_st[1] == MCAST_INCLUDE) {
 		CTR3(KTR_IGMPV3, "%s: t1 in += %d on 0x%08x",
 		    __func__, n, ims->ims_haddr);
 		ims->ims_st[1].in += n;
 	}
 }
 
 /*
  * Atomically update the global in_multi state, when a membership's
  * filter list is being updated in any way.
  *
  * imf is the per-inpcb-membership group filter pointer.
  * A fake imf may be passed for in-kernel consumers.
  *
  * XXX This is a candidate for a set-symmetric-difference style loop
  * which would eliminate the repeated lookup from root of ims nodes,
  * as they share the same key space.
  *
  * If any error occurred this function will back out of refcounts
  * and return a non-zero value.
  */
 static int
 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
 {
 	struct ip_msource	*ims, *nims;
 	struct in_msource	*lims;
 	int			 schanged, error;
 	int			 nsrc0, nsrc1;
 
 	schanged = 0;
 	error = 0;
 	nsrc1 = nsrc0 = 0;
 	IN_MULTI_LIST_LOCK_ASSERT();
 
 	/*
 	 * Update the source filters first, as this may fail.
 	 * Maintain count of in-mode filters at t0, t1. These are
 	 * used to work out if we transition into ASM mode or not.
 	 * Maintain a count of source filters whose state was
 	 * actually modified by this operation.
 	 */
 	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
 		lims = (struct in_msource *)ims;
 		if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++;
 		if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++;
 		if (lims->imsl_st[0] == lims->imsl_st[1]) continue;
 		error = inm_get_source(inm, lims->ims_haddr, 0, &nims);
 		++schanged;
 		if (error)
 			break;
 		ims_merge(nims, lims, 0);
 	}
 	if (error) {
 		struct ip_msource *bims;
 
 		RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) {
 			lims = (struct in_msource *)ims;
 			if (lims->imsl_st[0] == lims->imsl_st[1])
 				continue;
 			(void)inm_get_source(inm, lims->ims_haddr, 1, &bims);
 			if (bims == NULL)
 				continue;
 			ims_merge(bims, lims, 1);
 		}
 		goto out_reap;
 	}
 
 	CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1",
 	    __func__, nsrc0, nsrc1);
 
 	/* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
 	if (imf->imf_st[0] == imf->imf_st[1] &&
 	    imf->imf_st[1] == MCAST_INCLUDE) {
 		if (nsrc1 == 0) {
 			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
 			--inm->inm_st[1].iss_in;
 		}
 	}
 
 	/* Handle filter mode transition on socket. */
 	if (imf->imf_st[0] != imf->imf_st[1]) {
 		CTR3(KTR_IGMPV3, "%s: imf transition %d to %d",
 		    __func__, imf->imf_st[0], imf->imf_st[1]);
 
 		if (imf->imf_st[0] == MCAST_EXCLUDE) {
 			CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__);
 			--inm->inm_st[1].iss_ex;
 		} else if (imf->imf_st[0] == MCAST_INCLUDE) {
 			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
 			--inm->inm_st[1].iss_in;
 		}
 
 		if (imf->imf_st[1] == MCAST_EXCLUDE) {
 			CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__);
 			inm->inm_st[1].iss_ex++;
 		} else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
 			CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__);
 			inm->inm_st[1].iss_in++;
 		}
 	}
 
 	/*
 	 * Track inm filter state in terms of listener counts.
 	 * If there are any exclusive listeners, stack-wide
 	 * membership is exclusive.
 	 * Otherwise, if only inclusive listeners, stack-wide is inclusive.
 	 * If no listeners remain, state is undefined at t1,
 	 * and the IGMP lifecycle for this group should finish.
 	 */
 	if (inm->inm_st[1].iss_ex > 0) {
 		CTR1(KTR_IGMPV3, "%s: transition to EX", __func__);
 		inm->inm_st[1].iss_fmode = MCAST_EXCLUDE;
 	} else if (inm->inm_st[1].iss_in > 0) {
 		CTR1(KTR_IGMPV3, "%s: transition to IN", __func__);
 		inm->inm_st[1].iss_fmode = MCAST_INCLUDE;
 	} else {
 		CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__);
 		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
 	}
 
 	/* Decrement ASM listener count on transition out of ASM mode. */
 	if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
 		if ((imf->imf_st[1] != MCAST_EXCLUDE) ||
 		    (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) {
 			CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__);
 			--inm->inm_st[1].iss_asm;
 		}
 	}
 
 	/* Increment ASM listener count on transition to ASM mode. */
 	if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
 		CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__);
 		inm->inm_st[1].iss_asm++;
 	}
 
 	CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm);
 	inm_print(inm);
 
 out_reap:
 	if (schanged > 0) {
 		CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__);
 		inm_reap(inm);
 	}
 	return (error);
 }
 
 /*
  * Mark an in_multi's filter set deltas as committed.
  * Called by IGMP after a state change has been enqueued.
  */
 void
 inm_commit(struct in_multi *inm)
 {
 	struct ip_msource	*ims;
 
 	CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm);
 	CTR1(KTR_IGMPV3, "%s: pre commit:", __func__);
 	inm_print(inm);
 
 	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
 		ims->ims_st[0] = ims->ims_st[1];
 	}
 	inm->inm_st[0] = inm->inm_st[1];
 }
 
 /*
  * Reap unreferenced nodes from an in_multi's filter set.
  */
 static void
 inm_reap(struct in_multi *inm)
 {
 	struct ip_msource	*ims, *tims;
 
 	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
 		if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 ||
 		    ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 ||
 		    ims->ims_stp != 0)
 			continue;
 		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
 		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
 		free(ims, M_IPMSOURCE);
 		inm->inm_nsrc--;
 	}
 }
 
 /*
  * Purge all source nodes from an in_multi's filter set.
  */
 static void
 inm_purge(struct in_multi *inm)
 {
 	struct ip_msource	*ims, *tims;
 
 	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
 		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
 		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
 		free(ims, M_IPMSOURCE);
 		inm->inm_nsrc--;
 	}
 }
 
 /*
  * Join a multicast group; unlocked entry point.
  *
  * SMPng: XXX: in_joingroup() is called from in_control() when Giant
  * is not held. Fortunately, ifp is unlikely to have been detached
  * at this point, so we assume it's OK to recurse.
  */
 int
 in_joingroup(struct ifnet *ifp, const struct in_addr *gina,
     /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
 {
 	int error;
 
 	IN_MULTI_LOCK();
 	error = in_joingroup_locked(ifp, gina, imf, pinm);
 	IN_MULTI_UNLOCK();
 
 	return (error);
 }
 
 /*
  * Join a multicast group; real entry point.
  *
  * Only preserves atomicity at inm level.
  * NOTE: imf argument cannot be const due to sys/tree.h limitations.
  *
  * If the IGMP downcall fails, the group is not joined, and an error
  * code is returned.
  */
 int
 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina,
     /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
 {
 	struct in_mfilter	 timf;
 	struct in_multi		*inm;
 	int			 error;
 
 	IN_MULTI_LOCK_ASSERT();
 	IN_MULTI_LIST_UNLOCK_ASSERT();
 
 	CTR4(KTR_IGMPV3, "%s: join 0x%08x on %p(%s))", __func__,
 	    ntohl(gina->s_addr), ifp, ifp->if_xname);
 
 	error = 0;
 	inm = NULL;
 
 	/*
 	 * If no imf was specified (i.e. kernel consumer),
 	 * fake one up and assume it is an ASM join.
 	 */
 	if (imf == NULL) {
 		imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
 		imf = &timf;
 	}
 
 	error = in_getmulti(ifp, gina, &inm);
 	if (error) {
 		CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__);
 		return (error);
 	}
 	IN_MULTI_LIST_LOCK();
 	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 	error = inm_merge(inm, imf);
 	if (error) {
 		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
 		goto out_inm_release;
 	}
 
 	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 	error = igmp_change_state(inm);
 	if (error) {
 		CTR1(KTR_IGMPV3, "%s: failed to update source", __func__);
 		goto out_inm_release;
 	}
 
  out_inm_release:
 	if (error) {
 
 		CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
 		IF_ADDR_WLOCK(ifp);
 		inm_release_deferred(inm);
 		IF_ADDR_WUNLOCK(ifp);
 	} else {
 		*pinm = inm;
 	}
 	IN_MULTI_LIST_UNLOCK();
 
 	return (error);
 }
 
 /*
  * Leave a multicast group; unlocked entry point.
  */
 int
 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
 {
 	int error;
 
 	IN_MULTI_LOCK();
 	error = in_leavegroup_locked(inm, imf);
 	IN_MULTI_UNLOCK();
 
 	return (error);
 }
 
 /*
  * Leave a multicast group; real entry point.
  * All source filters will be expunged.
  *
  * Only preserves atomicity at inm level.
  *
  * Holding the write lock for the INP which contains imf
  * is highly advisable. We can't assert for it as imf does not
  * contain a back-pointer to the owning inp.
  *
  * Note: This is not the same as inm_release(*) as this function also
  * makes a state change downcall into IGMP.
  */
 int
 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
 {
 	struct in_mfilter	 timf;
 	int			 error;
 
 	IN_MULTI_LOCK_ASSERT();
 	IN_MULTI_LIST_UNLOCK_ASSERT();
 
 	error = 0;
 
 	CTR5(KTR_IGMPV3, "%s: leave inm %p, 0x%08x/%s, imf %p", __func__,
 	    inm, ntohl(inm->inm_addr.s_addr),
 	    (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname),
 	    imf);
 
 	/*
 	 * If no imf was specified (i.e. kernel consumer),
 	 * fake one up and assume it is an ASM join.
 	 */
 	if (imf == NULL) {
 		imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
 		imf = &timf;
 	}
 
 	/*
 	 * Begin state merge transaction at IGMP layer.
 	 *
 	 * As this particular invocation should not cause any memory
 	 * to be allocated, and there is no opportunity to roll back
 	 * the transaction, it MUST NOT fail.
 	 */
 	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 	IN_MULTI_LIST_LOCK();
 	error = inm_merge(inm, imf);
 	KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
 
 	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 	CURVNET_SET(inm->inm_ifp->if_vnet);
 	error = igmp_change_state(inm);
 	IF_ADDR_WLOCK(inm->inm_ifp);
 	inm_release_deferred(inm);
 	IF_ADDR_WUNLOCK(inm->inm_ifp);
 	IN_MULTI_LIST_UNLOCK();
 	CURVNET_RESTORE();
 	if (error)
 		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
 
 	CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
 
 	return (error);
 }
 
 /*#ifndef BURN_BRIDGES*/
 /*
  * Join an IPv4 multicast group in (*,G) exclusive mode.
  * The group must be a 224.0.0.0/24 link-scope group.
  * This KPI is for legacy kernel consumers only.
  */
 struct in_multi *
 in_addmulti(struct in_addr *ap, struct ifnet *ifp)
 {
 	struct in_multi *pinm;
 	int error;
 #ifdef INVARIANTS
 	char addrbuf[INET_ADDRSTRLEN];
 #endif
 
 	KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)),
 	    ("%s: %s not in 224.0.0.0/24", __func__,
 	    inet_ntoa_r(*ap, addrbuf)));
 
 	error = in_joingroup(ifp, ap, NULL, &pinm);
 	if (error != 0)
 		pinm = NULL;
 
 	return (pinm);
 }
 
 /*
  * Block or unblock an ASM multicast source on an inpcb.
  * This implements the delta-based API described in RFC 3678.
  *
  * The delta-based API applies only to exclusive-mode memberships.
  * An IGMP downcall will be performed.
  *
  * SMPng: NOTE: Must take Giant as a join may create a new ifma.
  *
  * Return 0 if successful, otherwise return an appropriate error code.
  */
 static int
 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct group_source_req		 gsr;
 	struct rm_priotracker		 in_ifa_tracker;
 	sockunion_t			*gsa, *ssa;
 	struct ifnet			*ifp;
 	struct in_mfilter		*imf;
 	struct ip_moptions		*imo;
 	struct in_msource		*ims;
 	struct in_multi			*inm;
 	uint16_t			 fmode;
 	int				 error, doblock;
 
 	ifp = NULL;
 	error = 0;
 	doblock = 0;
 
 	memset(&gsr, 0, sizeof(struct group_source_req));
 	gsa = (sockunion_t *)&gsr.gsr_group;
 	ssa = (sockunion_t *)&gsr.gsr_source;
 
 	switch (sopt->sopt_name) {
 	case IP_BLOCK_SOURCE:
 	case IP_UNBLOCK_SOURCE: {
 		struct ip_mreq_source	 mreqs;
 
 		error = sooptcopyin(sopt, &mreqs,
 		    sizeof(struct ip_mreq_source),
 		    sizeof(struct ip_mreq_source));
 		if (error)
 			return (error);
 
 		gsa->sin.sin_family = AF_INET;
 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
 
 		ssa->sin.sin_family = AF_INET;
 		ssa->sin.sin_len = sizeof(struct sockaddr_in);
 		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
 
 		if (!in_nullhost(mreqs.imr_interface)) {
 			IN_IFADDR_RLOCK(&in_ifa_tracker);
 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 		}
 		if (sopt->sopt_name == IP_BLOCK_SOURCE)
 			doblock = 1;
 
 		CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
 		    __func__, ntohl(mreqs.imr_interface.s_addr), ifp);
 		break;
 	    }
 
 	case MCAST_BLOCK_SOURCE:
 	case MCAST_UNBLOCK_SOURCE:
 		error = sooptcopyin(sopt, &gsr,
 		    sizeof(struct group_source_req),
 		    sizeof(struct group_source_req));
 		if (error)
 			return (error);
 
 		if (gsa->sin.sin_family != AF_INET ||
 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 
 		if (ssa->sin.sin_family != AF_INET ||
 		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 
 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
 			return (EADDRNOTAVAIL);
 
 		ifp = ifnet_byindex(gsr.gsr_interface);
 
 		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
 			doblock = 1;
 		break;
 
 	default:
 		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
 		    __func__, sopt->sopt_name);
 		return (EOPNOTSUPP);
 		break;
 	}
 
 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 		return (EINVAL);
 
 	IN_MULTI_LOCK();
 
 	/*
 	 * Check if we are actually a member of this group.
 	 */
 	imo = inp_findmoptions(inp);
 	imf = imo_match_group(imo, ifp, &gsa->sa);
 	if (imf == NULL) {
 		error = EADDRNOTAVAIL;
 		goto out_inp_locked;
 	}
 	inm = imf->imf_inm;
 
 	/*
 	 * Attempting to use the delta-based API on an
 	 * non exclusive-mode membership is an error.
 	 */
 	fmode = imf->imf_st[0];
 	if (fmode != MCAST_EXCLUDE) {
 		error = EINVAL;
 		goto out_inp_locked;
 	}
 
 	/*
 	 * Deal with error cases up-front:
 	 *  Asked to block, but already blocked; or
 	 *  Asked to unblock, but nothing to unblock.
 	 * If adding a new block entry, allocate it.
 	 */
 	ims = imo_match_source(imf, &ssa->sa);
 	if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
 		CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__,
 		    ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not ");
 		error = EADDRNOTAVAIL;
 		goto out_inp_locked;
 	}
 
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * Begin state merge transaction at socket layer.
 	 */
 	if (doblock) {
 		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
 		ims = imf_graft(imf, fmode, &ssa->sin);
 		if (ims == NULL)
 			error = ENOMEM;
 	} else {
 		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
 		error = imf_prune(imf, &ssa->sin);
 	}
 
 	if (error) {
 		CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__);
 		goto out_imf_rollback;
 	}
 
 	/*
 	 * Begin state merge transaction at IGMP layer.
 	 */
 	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 	IN_MULTI_LIST_LOCK();
 	error = inm_merge(inm, imf);
 	if (error) {
 		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
 		IN_MULTI_LIST_UNLOCK();
 		goto out_imf_rollback;
 	}
 
 	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 	error = igmp_change_state(inm);
 	IN_MULTI_LIST_UNLOCK();
 	if (error)
 		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
 
 out_imf_rollback:
 	if (error)
 		imf_rollback(imf);
 	else
 		imf_commit(imf);
 
 	imf_reap(imf);
 
 out_inp_locked:
 	INP_WUNLOCK(inp);
 	IN_MULTI_UNLOCK();
 	return (error);
 }
 
 /*
  * Given an inpcb, return its multicast options structure pointer.  Accepts
  * an unlocked inpcb pointer, but will return it locked.  May sleep.
  *
  * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
  * SMPng: NOTE: Returns with the INP write lock held.
  */
 static struct ip_moptions *
 inp_findmoptions(struct inpcb *inp)
 {
 	struct ip_moptions	 *imo;
 
 	INP_WLOCK(inp);
 	if (inp->inp_moptions != NULL)
 		return (inp->inp_moptions);
 
 	INP_WUNLOCK(inp);
 
 	imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
 
 	imo->imo_multicast_ifp = NULL;
 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
 	imo->imo_multicast_vif = -1;
 	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
 	imo->imo_multicast_loop = in_mcast_loop;
 	STAILQ_INIT(&imo->imo_head);
 
 	INP_WLOCK(inp);
 	if (inp->inp_moptions != NULL) {
 		free(imo, M_IPMOPTS);
 		return (inp->inp_moptions);
 	}
 	inp->inp_moptions = imo;
 	return (imo);
 }
 
 static void
 inp_gcmoptions(struct ip_moptions *imo)
 {
 	struct in_mfilter *imf;
 	struct in_multi *inm;
 	struct ifnet *ifp;
 
 	while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
 		ip_mfilter_remove(&imo->imo_head, imf);
 
 		imf_leave(imf);
 		if ((inm = imf->imf_inm) != NULL) {
 			if ((ifp = inm->inm_ifp) != NULL) {
 				CURVNET_SET(ifp->if_vnet);
 				(void)in_leavegroup(inm, imf);
 				CURVNET_RESTORE();
 			} else {
 				(void)in_leavegroup(inm, imf);
 			}
 		}
 		ip_mfilter_free(imf);
 	}
 	free(imo, M_IPMOPTS);
 }
 
 /*
  * Discard the IP multicast options (and source filters).  To minimize
  * the amount of work done while holding locks such as the INP's
  * pcbinfo lock (which is used in the receive path), the free
  * operation is deferred to the epoch callback task.
  */
 void
 inp_freemoptions(struct ip_moptions *imo)
 {
 	if (imo == NULL)
 		return;
 	inp_gcmoptions(imo);
 }
 
 /*
  * Atomically get source filters on a socket for an IPv4 multicast group.
  * Called with INP lock held; returns with lock released.
  */
 static int
 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct __msfilterreq	 msfr;
 	sockunion_t		*gsa;
 	struct ifnet		*ifp;
 	struct ip_moptions	*imo;
 	struct in_mfilter	*imf;
 	struct ip_msource	*ims;
 	struct in_msource	*lims;
 	struct sockaddr_in	*psin;
 	struct sockaddr_storage	*ptss;
 	struct sockaddr_storage	*tss;
 	int			 error;
 	size_t			 nsrcs, ncsrcs;
 
 	INP_WLOCK_ASSERT(inp);
 
 	imo = inp->inp_moptions;
 	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
 
 	INP_WUNLOCK(inp);
 
 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
 	    sizeof(struct __msfilterreq));
 	if (error)
 		return (error);
 
 	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
 		return (EINVAL);
 
 	ifp = ifnet_byindex(msfr.msfr_ifindex);
 	if (ifp == NULL)
 		return (EINVAL);
 
 	INP_WLOCK(inp);
 
 	/*
 	 * Lookup group on the socket.
 	 */
 	gsa = (sockunion_t *)&msfr.msfr_group;
 	imf = imo_match_group(imo, ifp, &gsa->sa);
 	if (imf == NULL) {
 		INP_WUNLOCK(inp);
 		return (EADDRNOTAVAIL);
 	}
 
 	/*
 	 * Ignore memberships which are in limbo.
 	 */
 	if (imf->imf_st[1] == MCAST_UNDEFINED) {
 		INP_WUNLOCK(inp);
 		return (EAGAIN);
 	}
 	msfr.msfr_fmode = imf->imf_st[1];
 
 	/*
 	 * If the user specified a buffer, copy out the source filter
 	 * entries to userland gracefully.
 	 * We only copy out the number of entries which userland
 	 * has asked for, but we always tell userland how big the
 	 * buffer really needs to be.
 	 */
 	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
 		msfr.msfr_nsrcs = in_mcast_maxsocksrc;
 	tss = NULL;
 	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
 		tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
 		    M_TEMP, M_NOWAIT | M_ZERO);
 		if (tss == NULL) {
 			INP_WUNLOCK(inp);
 			return (ENOBUFS);
 		}
 	}
 
 	/*
 	 * Count number of sources in-mode at t0.
 	 * If buffer space exists and remains, copy out source entries.
 	 */
 	nsrcs = msfr.msfr_nsrcs;
 	ncsrcs = 0;
 	ptss = tss;
 	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
 		lims = (struct in_msource *)ims;
 		if (lims->imsl_st[0] == MCAST_UNDEFINED ||
 		    lims->imsl_st[0] != imf->imf_st[0])
 			continue;
 		++ncsrcs;
 		if (tss != NULL && nsrcs > 0) {
 			psin = (struct sockaddr_in *)ptss;
 			psin->sin_family = AF_INET;
 			psin->sin_len = sizeof(struct sockaddr_in);
 			psin->sin_addr.s_addr = htonl(lims->ims_haddr);
 			psin->sin_port = 0;
 			++ptss;
 			--nsrcs;
 		}
 	}
 
 	INP_WUNLOCK(inp);
 
 	if (tss != NULL) {
 		error = copyout(tss, msfr.msfr_srcs,
 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
 		free(tss, M_TEMP);
 		if (error)
 			return (error);
 	}
 
 	msfr.msfr_nsrcs = ncsrcs;
 	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
 
 	return (error);
 }
 
 /*
  * Return the IP multicast options in response to user getsockopt().
  */
 int
 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct rm_priotracker	 in_ifa_tracker;
 	struct ip_mreqn		 mreqn;
 	struct ip_moptions	*imo;
 	struct ifnet		*ifp;
 	struct in_ifaddr	*ia;
 	int			 error, optval;
 	u_char			 coptval;
 
 	INP_WLOCK(inp);
 	imo = inp->inp_moptions;
 	/*
 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
 	 * or is a divert socket, reject it.
 	 */
 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
 	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
 		INP_WUNLOCK(inp);
 		return (EOPNOTSUPP);
 	}
 
 	error = 0;
 	switch (sopt->sopt_name) {
 	case IP_MULTICAST_VIF:
 		if (imo != NULL)
 			optval = imo->imo_multicast_vif;
 		else
 			optval = -1;
 		INP_WUNLOCK(inp);
 		error = sooptcopyout(sopt, &optval, sizeof(int));
 		break;
 
 	case IP_MULTICAST_IF:
 		memset(&mreqn, 0, sizeof(struct ip_mreqn));
 		if (imo != NULL) {
 			ifp = imo->imo_multicast_ifp;
 			if (!in_nullhost(imo->imo_multicast_addr)) {
 				mreqn.imr_address = imo->imo_multicast_addr;
 			} else if (ifp != NULL) {
 				struct epoch_tracker et;
 
 				mreqn.imr_ifindex = ifp->if_index;
 				NET_EPOCH_ENTER(et);
 				IFP_TO_IA(ifp, ia, &in_ifa_tracker);
 				if (ia != NULL)
 					mreqn.imr_address =
 					    IA_SIN(ia)->sin_addr;
 				NET_EPOCH_EXIT(et);
 			}
 		}
 		INP_WUNLOCK(inp);
 		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
 			error = sooptcopyout(sopt, &mreqn,
 			    sizeof(struct ip_mreqn));
 		} else {
 			error = sooptcopyout(sopt, &mreqn.imr_address,
 			    sizeof(struct in_addr));
 		}
 		break;
 
 	case IP_MULTICAST_TTL:
 		if (imo == NULL)
 			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
 		else
 			optval = coptval = imo->imo_multicast_ttl;
 		INP_WUNLOCK(inp);
 		if (sopt->sopt_valsize == sizeof(u_char))
 			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
 		else
 			error = sooptcopyout(sopt, &optval, sizeof(int));
 		break;
 
 	case IP_MULTICAST_LOOP:
 		if (imo == NULL)
 			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
 		else
 			optval = coptval = imo->imo_multicast_loop;
 		INP_WUNLOCK(inp);
 		if (sopt->sopt_valsize == sizeof(u_char))
 			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
 		else
 			error = sooptcopyout(sopt, &optval, sizeof(int));
 		break;
 
 	case IP_MSFILTER:
 		if (imo == NULL) {
 			error = EADDRNOTAVAIL;
 			INP_WUNLOCK(inp);
 		} else {
 			error = inp_get_source_filters(inp, sopt);
 		}
 		break;
 
 	default:
 		INP_WUNLOCK(inp);
 		error = ENOPROTOOPT;
 		break;
 	}
 
 	INP_UNLOCK_ASSERT(inp);
 
 	return (error);
 }
 
 /*
  * Look up the ifnet to use for a multicast group membership,
  * given the IPv4 address of an interface, and the IPv4 group address.
  *
  * This routine exists to support legacy multicast applications
  * which do not understand that multicast memberships are scoped to
  * specific physical links in the networking stack, or which need
  * to join link-scope groups before IPv4 addresses are configured.
  *
  * If inp is non-NULL, use this socket's current FIB number for any
  * required FIB lookup.
  * If ina is INADDR_ANY, look up the group address in the unicast FIB,
  * and use its ifp; usually, this points to the default next-hop.
  *
  * If the FIB lookup fails, attempt to use the first non-loopback
  * interface with multicast capability in the system as a
  * last resort. The legacy IPv4 ASM API requires that we do
  * this in order to allow groups to be joined when the routing
  * table has not yet been populated during boot.
  *
  * Returns NULL if no ifp could be found.
  *
  * FUTURE: Implement IPv4 source-address selection.
  */
 static struct ifnet *
 inp_lookup_mcast_ifp(const struct inpcb *inp,
     const struct sockaddr_in *gsin, const struct in_addr ina)
 {
 	struct rm_priotracker in_ifa_tracker;
 	struct ifnet *ifp;
-	struct nhop4_basic nh4;
+	struct nhop_object *nh;
 	uint32_t fibnum;
 
 	KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__));
 	KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)),
 	    ("%s: not multicast", __func__));
 
 	ifp = NULL;
 	if (!in_nullhost(ina)) {
 		IN_IFADDR_RLOCK(&in_ifa_tracker);
 		INADDR_TO_IFP(ina, ifp);
 		IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 	} else {
 		fibnum = inp ? inp->inp_inc.inc_fibnum : 0;
-		if (fib4_lookup_nh_basic(fibnum, gsin->sin_addr, 0, 0, &nh4)==0)
-			ifp = nh4.nh_ifp;
+		nh = fib4_lookup(fibnum, gsin->sin_addr, 0, 0, 0);
+		if (nh != NULL)
+			ifp = nh->nh_ifp;
 		else {
 			struct in_ifaddr *ia;
 			struct ifnet *mifp;
 
 			mifp = NULL;
 			IN_IFADDR_RLOCK(&in_ifa_tracker);
 			CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 				mifp = ia->ia_ifp;
 				if (!(mifp->if_flags & IFF_LOOPBACK) &&
 				     (mifp->if_flags & IFF_MULTICAST)) {
 					ifp = mifp;
 					break;
 				}
 			}
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 		}
 	}
 
 	return (ifp);
 }
 
 /*
  * Join an IPv4 multicast group, possibly with a source.
  */
 static int
 inp_join_group(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct group_source_req		 gsr;
 	sockunion_t			*gsa, *ssa;
 	struct ifnet			*ifp;
 	struct in_mfilter		*imf;
 	struct ip_moptions		*imo;
 	struct in_multi			*inm;
 	struct in_msource		*lims;
 	int				 error, is_new;
 
 	ifp = NULL;
 	lims = NULL;
 	error = 0;
 
 	memset(&gsr, 0, sizeof(struct group_source_req));
 	gsa = (sockunion_t *)&gsr.gsr_group;
 	gsa->ss.ss_family = AF_UNSPEC;
 	ssa = (sockunion_t *)&gsr.gsr_source;
 	ssa->ss.ss_family = AF_UNSPEC;
 
 	switch (sopt->sopt_name) {
 	case IP_ADD_MEMBERSHIP: {
 		struct ip_mreqn mreqn;
 
 		if (sopt->sopt_valsize == sizeof(struct ip_mreqn))
 			error = sooptcopyin(sopt, &mreqn,
 			    sizeof(struct ip_mreqn), sizeof(struct ip_mreqn));
 		else
 			error = sooptcopyin(sopt, &mreqn,
 			    sizeof(struct ip_mreq), sizeof(struct ip_mreq));
 		if (error)
 			return (error);
 
 		gsa->sin.sin_family = AF_INET;
 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
 		gsa->sin.sin_addr = mreqn.imr_multiaddr;
 		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 			return (EINVAL);
 
 		if (sopt->sopt_valsize == sizeof(struct ip_mreqn) &&
 		    mreqn.imr_ifindex != 0)
 			ifp = ifnet_byindex(mreqn.imr_ifindex);
 		else
 			ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
 			    mreqn.imr_address);
 		break;
 	}
 	case IP_ADD_SOURCE_MEMBERSHIP: {
 		struct ip_mreq_source	 mreqs;
 
 		error = sooptcopyin(sopt, &mreqs, sizeof(struct ip_mreq_source),
 			    sizeof(struct ip_mreq_source));
 		if (error)
 			return (error);
 
 		gsa->sin.sin_family = ssa->sin.sin_family = AF_INET;
 		gsa->sin.sin_len = ssa->sin.sin_len =
 		    sizeof(struct sockaddr_in);
 
 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
 		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 			return (EINVAL);
 
 		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
 
 		ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
 		    mreqs.imr_interface);
 		CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
 		    __func__, ntohl(mreqs.imr_interface.s_addr), ifp);
 		break;
 	}
 
 	case MCAST_JOIN_GROUP:
 	case MCAST_JOIN_SOURCE_GROUP:
 		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
 			error = sooptcopyin(sopt, &gsr,
 			    sizeof(struct group_req),
 			    sizeof(struct group_req));
 		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
 			error = sooptcopyin(sopt, &gsr,
 			    sizeof(struct group_source_req),
 			    sizeof(struct group_source_req));
 		}
 		if (error)
 			return (error);
 
 		if (gsa->sin.sin_family != AF_INET ||
 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 
 		/*
 		 * Overwrite the port field if present, as the sockaddr
 		 * being copied in may be matched with a binary comparison.
 		 */
 		gsa->sin.sin_port = 0;
 		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
 			if (ssa->sin.sin_family != AF_INET ||
 			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
 				return (EINVAL);
 			ssa->sin.sin_port = 0;
 		}
 
 		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 			return (EINVAL);
 
 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
 			return (EADDRNOTAVAIL);
 		ifp = ifnet_byindex(gsr.gsr_interface);
 		break;
 
 	default:
 		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
 		    __func__, sopt->sopt_name);
 		return (EOPNOTSUPP);
 		break;
 	}
 
 	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
 		return (EADDRNOTAVAIL);
 
 	IN_MULTI_LOCK();
 
 	/*
 	 * Find the membership in the membership list.
 	 */
 	imo = inp_findmoptions(inp);
 	imf = imo_match_group(imo, ifp, &gsa->sa);
 	if (imf == NULL) {
 		is_new = 1;
 		inm = NULL;
 
 		if (ip_mfilter_count(&imo->imo_head) >= IP_MAX_MEMBERSHIPS) {
 			error = ENOMEM;
 			goto out_inp_locked;
 		}
 	} else {
 		is_new = 0;
 		inm = imf->imf_inm;
 
 		if (ssa->ss.ss_family != AF_UNSPEC) {
 			/*
 			 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership
 			 * is an error. On an existing inclusive membership,
 			 * it just adds the source to the filter list.
 			 */
 			if (imf->imf_st[1] != MCAST_INCLUDE) {
 				error = EINVAL;
 				goto out_inp_locked;
 			}
 			/*
 			 * Throw out duplicates.
 			 *
 			 * XXX FIXME: This makes a naive assumption that
 			 * even if entries exist for *ssa in this imf,
 			 * they will be rejected as dupes, even if they
 			 * are not valid in the current mode (in-mode).
 			 *
 			 * in_msource is transactioned just as for anything
 			 * else in SSM -- but note naive use of inm_graft()
 			 * below for allocating new filter entries.
 			 *
 			 * This is only an issue if someone mixes the
 			 * full-state SSM API with the delta-based API,
 			 * which is discouraged in the relevant RFCs.
 			 */
 			lims = imo_match_source(imf, &ssa->sa);
 			if (lims != NULL /*&&
 			    lims->imsl_st[1] == MCAST_INCLUDE*/) {
 				error = EADDRNOTAVAIL;
 				goto out_inp_locked;
 			}
 		} else {
 			/*
 			 * MCAST_JOIN_GROUP on an existing exclusive
 			 * membership is an error; return EADDRINUSE
 			 * to preserve 4.4BSD API idempotence, and
 			 * avoid tedious detour to code below.
 			 * NOTE: This is bending RFC 3678 a bit.
 			 *
 			 * On an existing inclusive membership, this is also
 			 * an error; if you want to change filter mode,
 			 * you must use the userland API setsourcefilter().
 			 * XXX We don't reject this for imf in UNDEFINED
 			 * state at t1, because allocation of a filter
 			 * is atomic with allocation of a membership.
 			 */
 			error = EINVAL;
 			if (imf->imf_st[1] == MCAST_EXCLUDE)
 				error = EADDRINUSE;
 			goto out_inp_locked;
 		}
 	}
 
 	/*
 	 * Begin state merge transaction at socket layer.
 	 */
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * Graft new source into filter list for this inpcb's
 	 * membership of the group. The in_multi may not have
 	 * been allocated yet if this is a new membership, however,
 	 * the in_mfilter slot will be allocated and must be initialized.
 	 *
 	 * Note: Grafting of exclusive mode filters doesn't happen
 	 * in this path.
 	 * XXX: Should check for non-NULL lims (node exists but may
 	 * not be in-mode) for interop with full-state API.
 	 */
 	if (ssa->ss.ss_family != AF_UNSPEC) {
 		/* Membership starts in IN mode */
 		if (is_new) {
 			CTR1(KTR_IGMPV3, "%s: new join w/source", __func__);
 			imf = ip_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_INCLUDE);
 			if (imf == NULL) {
 				error = ENOMEM;
 				goto out_inp_locked;
 			}
 		} else {
 			CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
 		}
 		lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin);
 		if (lims == NULL) {
 			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
 			    __func__);
 			error = ENOMEM;
 			goto out_inp_locked;
 		}
 	} else {
 		/* No address specified; Membership starts in EX mode */
 		if (is_new) {
 			CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__);
 			imf = ip_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_EXCLUDE);
 			if (imf == NULL) {
 				error = ENOMEM;
 				goto out_inp_locked;
 			}
 		}
 	}
 
 	/*
 	 * Begin state merge transaction at IGMP layer.
 	 */
 	if (is_new) {
 		in_pcbref(inp);
 		INP_WUNLOCK(inp);
 
 		error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf,
 		    &imf->imf_inm);
 
 		INP_WLOCK(inp);
 		if (in_pcbrele_wlocked(inp)) {
 			error = ENXIO;
 			goto out_inp_unlocked;
 		}
 		if (error) {
                         CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed",
                             __func__);
 			goto out_inp_locked;
 		}
 		/*
 		 * NOTE: Refcount from in_joingroup_locked()
 		 * is protecting membership.
 		 */
 		ip_mfilter_insert(&imo->imo_head, imf);
 	} else {
 		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 		IN_MULTI_LIST_LOCK();
 		error = inm_merge(inm, imf);
 		if (error) {
 			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
 				 __func__);
 			IN_MULTI_LIST_UNLOCK();
 			imf_rollback(imf);
 			imf_reap(imf);
 			goto out_inp_locked;
 		}
 		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 		error = igmp_change_state(inm);
 		IN_MULTI_LIST_UNLOCK();
 		if (error) {
 			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
 			    __func__);
 			imf_rollback(imf);
 			imf_reap(imf);
 			goto out_inp_locked;
 		}
 	}
 
 	imf_commit(imf);
 	imf = NULL;
 
 out_inp_locked:
 	INP_WUNLOCK(inp);
 out_inp_unlocked:
 	IN_MULTI_UNLOCK();
 
 	if (is_new && imf) {
 		if (imf->imf_inm != NULL) {
 			IN_MULTI_LIST_LOCK();
 			IF_ADDR_WLOCK(ifp);
 			inm_release_deferred(imf->imf_inm);
 			IF_ADDR_WUNLOCK(ifp);
 			IN_MULTI_LIST_UNLOCK();
 		}
 		ip_mfilter_free(imf);
 	}
 	return (error);
 }
 
 /*
  * Leave an IPv4 multicast group on an inpcb, possibly with a source.
  */
 static int
 inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct group_source_req		 gsr;
 	struct ip_mreq_source		 mreqs;
 	struct rm_priotracker		 in_ifa_tracker;
 	sockunion_t			*gsa, *ssa;
 	struct ifnet			*ifp;
 	struct in_mfilter		*imf;
 	struct ip_moptions		*imo;
 	struct in_msource		*ims;
 	struct in_multi			*inm;
 	int				 error;
 	bool				 is_final;
 
 	ifp = NULL;
 	error = 0;
 	is_final = true;
 
 	memset(&gsr, 0, sizeof(struct group_source_req));
 	gsa = (sockunion_t *)&gsr.gsr_group;
 	gsa->ss.ss_family = AF_UNSPEC;
 	ssa = (sockunion_t *)&gsr.gsr_source;
 	ssa->ss.ss_family = AF_UNSPEC;
 
 	switch (sopt->sopt_name) {
 	case IP_DROP_MEMBERSHIP:
 	case IP_DROP_SOURCE_MEMBERSHIP:
 		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
 			error = sooptcopyin(sopt, &mreqs,
 			    sizeof(struct ip_mreq),
 			    sizeof(struct ip_mreq));
 			/*
 			 * Swap interface and sourceaddr arguments,
 			 * as ip_mreq and ip_mreq_source are laid
 			 * out differently.
 			 */
 			mreqs.imr_interface = mreqs.imr_sourceaddr;
 			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
 		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
 			error = sooptcopyin(sopt, &mreqs,
 			    sizeof(struct ip_mreq_source),
 			    sizeof(struct ip_mreq_source));
 		}
 		if (error)
 			return (error);
 
 		gsa->sin.sin_family = AF_INET;
 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
 
 		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
 			ssa->sin.sin_family = AF_INET;
 			ssa->sin.sin_len = sizeof(struct sockaddr_in);
 			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
 		}
 
 		/*
 		 * Attempt to look up hinted ifp from interface address.
 		 * Fallthrough with null ifp iff lookup fails, to
 		 * preserve 4.4BSD mcast API idempotence.
 		 * XXX NOTE WELL: The RFC 3678 API is preferred because
 		 * using an IPv4 address as a key is racy.
 		 */
 		if (!in_nullhost(mreqs.imr_interface)) {
 			IN_IFADDR_RLOCK(&in_ifa_tracker);
 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 		}
 		CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
 		    __func__, ntohl(mreqs.imr_interface.s_addr), ifp);
 
 		break;
 
 	case MCAST_LEAVE_GROUP:
 	case MCAST_LEAVE_SOURCE_GROUP:
 		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
 			error = sooptcopyin(sopt, &gsr,
 			    sizeof(struct group_req),
 			    sizeof(struct group_req));
 		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
 			error = sooptcopyin(sopt, &gsr,
 			    sizeof(struct group_source_req),
 			    sizeof(struct group_source_req));
 		}
 		if (error)
 			return (error);
 
 		if (gsa->sin.sin_family != AF_INET ||
 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 
 		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
 			if (ssa->sin.sin_family != AF_INET ||
 			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
 				return (EINVAL);
 		}
 
 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
 			return (EADDRNOTAVAIL);
 
 		ifp = ifnet_byindex(gsr.gsr_interface);
 
 		if (ifp == NULL)
 			return (EADDRNOTAVAIL);
 		break;
 
 	default:
 		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
 		    __func__, sopt->sopt_name);
 		return (EOPNOTSUPP);
 		break;
 	}
 
 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 		return (EINVAL);
 
 	IN_MULTI_LOCK();
 
 	/*
 	 * Find the membership in the membership list.
 	 */
 	imo = inp_findmoptions(inp);
 	imf = imo_match_group(imo, ifp, &gsa->sa);
 	if (imf == NULL) {
 		error = EADDRNOTAVAIL;
 		goto out_inp_locked;
 	}
 	inm = imf->imf_inm;
 
 	if (ssa->ss.ss_family != AF_UNSPEC)
 		is_final = false;
 
 	/*
 	 * Begin state merge transaction at socket layer.
 	 */
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * If we were instructed only to leave a given source, do so.
 	 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
 	 */
 	if (is_final) {
 		ip_mfilter_remove(&imo->imo_head, imf);
 		imf_leave(imf);
 
 		/*
 		 * Give up the multicast address record to which
 		 * the membership points.
 		 */
 		(void) in_leavegroup_locked(imf->imf_inm, imf);
 	} else {
 		if (imf->imf_st[0] == MCAST_EXCLUDE) {
 			error = EADDRNOTAVAIL;
 			goto out_inp_locked;
 		}
 		ims = imo_match_source(imf, &ssa->sa);
 		if (ims == NULL) {
 			CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent",
 			    __func__, ntohl(ssa->sin.sin_addr.s_addr), "not ");
 			error = EADDRNOTAVAIL;
 			goto out_inp_locked;
 		}
 		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
 		error = imf_prune(imf, &ssa->sin);
 		if (error) {
 			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
 			    __func__);
 			goto out_inp_locked;
 		}
 	}
 
 	/*
 	 * Begin state merge transaction at IGMP layer.
 	 */
 	if (!is_final) {
 		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 		IN_MULTI_LIST_LOCK();
 		error = inm_merge(inm, imf);
 		if (error) {
 			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
 			    __func__);
 			IN_MULTI_LIST_UNLOCK();
 			imf_rollback(imf);
 			imf_reap(imf);
 			goto out_inp_locked;
 		}
 
 		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 		error = igmp_change_state(inm);
 		IN_MULTI_LIST_UNLOCK();
 		if (error) {
 			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
 			    __func__);
 			imf_rollback(imf);
 			imf_reap(imf);
 			goto out_inp_locked;
 		}
 	}
 	imf_commit(imf);
 	imf_reap(imf);
 
 out_inp_locked:
 	INP_WUNLOCK(inp);
 
 	if (is_final && imf)
 		ip_mfilter_free(imf);
 
 	IN_MULTI_UNLOCK();
 	return (error);
 }
 
 /*
  * Select the interface for transmitting IPv4 multicast datagrams.
  *
  * Either an instance of struct in_addr or an instance of struct ip_mreqn
  * may be passed to this socket option. An address of INADDR_ANY or an
  * interface index of 0 is used to remove a previous selection.
  * When no interface is selected, one is chosen for every send.
  */
 static int
 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct rm_priotracker	 in_ifa_tracker;
 	struct in_addr		 addr;
 	struct ip_mreqn		 mreqn;
 	struct ifnet		*ifp;
 	struct ip_moptions	*imo;
 	int			 error;
 
 	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
 		/*
 		 * An interface index was specified using the
 		 * Linux-derived ip_mreqn structure.
 		 */
 		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
 		    sizeof(struct ip_mreqn));
 		if (error)
 			return (error);
 
 		if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex)
 			return (EINVAL);
 
 		if (mreqn.imr_ifindex == 0) {
 			ifp = NULL;
 		} else {
 			ifp = ifnet_byindex(mreqn.imr_ifindex);
 			if (ifp == NULL)
 				return (EADDRNOTAVAIL);
 		}
 	} else {
 		/*
 		 * An interface was specified by IPv4 address.
 		 * This is the traditional BSD usage.
 		 */
 		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
 		    sizeof(struct in_addr));
 		if (error)
 			return (error);
 		if (in_nullhost(addr)) {
 			ifp = NULL;
 		} else {
 			IN_IFADDR_RLOCK(&in_ifa_tracker);
 			INADDR_TO_IFP(addr, ifp);
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			if (ifp == NULL)
 				return (EADDRNOTAVAIL);
 		}
 		CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = 0x%08x", __func__, ifp,
 		    ntohl(addr.s_addr));
 	}
 
 	/* Reject interfaces which do not support multicast. */
 	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
 		return (EOPNOTSUPP);
 
 	imo = inp_findmoptions(inp);
 	imo->imo_multicast_ifp = ifp;
 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
 	INP_WUNLOCK(inp);
 
 	return (0);
 }
 
 /*
  * Atomically set source filters on a socket for an IPv4 multicast group.
  *
  * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
  */
 static int
 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct __msfilterreq	 msfr;
 	sockunion_t		*gsa;
 	struct ifnet		*ifp;
 	struct in_mfilter	*imf;
 	struct ip_moptions	*imo;
 	struct in_multi		*inm;
 	int			 error;
 
 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
 	    sizeof(struct __msfilterreq));
 	if (error)
 		return (error);
 
 	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
 		return (ENOBUFS);
 
 	if ((msfr.msfr_fmode != MCAST_EXCLUDE &&
 	     msfr.msfr_fmode != MCAST_INCLUDE))
 		return (EINVAL);
 
 	if (msfr.msfr_group.ss_family != AF_INET ||
 	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
 		return (EINVAL);
 
 	gsa = (sockunion_t *)&msfr.msfr_group;
 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 		return (EINVAL);
 
 	gsa->sin.sin_port = 0;	/* ignore port */
 
 	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
 		return (EADDRNOTAVAIL);
 
 	ifp = ifnet_byindex(msfr.msfr_ifindex);
 	if (ifp == NULL)
 		return (EADDRNOTAVAIL);
 
 	IN_MULTI_LOCK();
 
 	/*
 	 * Take the INP write lock.
 	 * Check if this socket is a member of this group.
 	 */
 	imo = inp_findmoptions(inp);
 	imf = imo_match_group(imo, ifp, &gsa->sa);
 	if (imf == NULL) {
 		error = EADDRNOTAVAIL;
 		goto out_inp_locked;
 	}
 	inm = imf->imf_inm;
 
 	/*
 	 * Begin state merge transaction at socket layer.
 	 */
 	INP_WLOCK_ASSERT(inp);
 
 	imf->imf_st[1] = msfr.msfr_fmode;
 
 	/*
 	 * Apply any new source filters, if present.
 	 * Make a copy of the user-space source vector so
 	 * that we may copy them with a single copyin. This
 	 * allows us to deal with page faults up-front.
 	 */
 	if (msfr.msfr_nsrcs > 0) {
 		struct in_msource	*lims;
 		struct sockaddr_in	*psin;
 		struct sockaddr_storage	*kss, *pkss;
 		int			 i;
 
 		INP_WUNLOCK(inp);
 
 		CTR2(KTR_IGMPV3, "%s: loading %lu source list entries",
 		    __func__, (unsigned long)msfr.msfr_nsrcs);
 		kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
 		    M_TEMP, M_WAITOK);
 		error = copyin(msfr.msfr_srcs, kss,
 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
 		if (error) {
 			free(kss, M_TEMP);
 			return (error);
 		}
 
 		INP_WLOCK(inp);
 
 		/*
 		 * Mark all source filters as UNDEFINED at t1.
 		 * Restore new group filter mode, as imf_leave()
 		 * will set it to INCLUDE.
 		 */
 		imf_leave(imf);
 		imf->imf_st[1] = msfr.msfr_fmode;
 
 		/*
 		 * Update socket layer filters at t1, lazy-allocating
 		 * new entries. This saves a bunch of memory at the
 		 * cost of one RB_FIND() per source entry; duplicate
 		 * entries in the msfr_nsrcs vector are ignored.
 		 * If we encounter an error, rollback transaction.
 		 *
 		 * XXX This too could be replaced with a set-symmetric
 		 * difference like loop to avoid walking from root
 		 * every time, as the key space is common.
 		 */
 		for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) {
 			psin = (struct sockaddr_in *)pkss;
 			if (psin->sin_family != AF_INET) {
 				error = EAFNOSUPPORT;
 				break;
 			}
 			if (psin->sin_len != sizeof(struct sockaddr_in)) {
 				error = EINVAL;
 				break;
 			}
 			error = imf_get_source(imf, psin, &lims);
 			if (error)
 				break;
 			lims->imsl_st[1] = imf->imf_st[1];
 		}
 		free(kss, M_TEMP);
 	}
 
 	if (error)
 		goto out_imf_rollback;
 
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * Begin state merge transaction at IGMP layer.
 	 */
 	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
 	IN_MULTI_LIST_LOCK();
 	error = inm_merge(inm, imf);
 	if (error) {
 		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
 		IN_MULTI_LIST_UNLOCK();
 		goto out_imf_rollback;
 	}
 
 	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 	error = igmp_change_state(inm);
 	IN_MULTI_LIST_UNLOCK();
 	if (error)
 		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
 
 out_imf_rollback:
 	if (error)
 		imf_rollback(imf);
 	else
 		imf_commit(imf);
 
 	imf_reap(imf);
 
 out_inp_locked:
 	INP_WUNLOCK(inp);
 	IN_MULTI_UNLOCK();
 	return (error);
 }
 
 /*
  * Set the IP multicast options in response to user setsockopt().
  *
  * Many of the socket options handled in this function duplicate the
  * functionality of socket options in the regular unicast API. However,
  * it is not possible to merge the duplicate code, because the idempotence
  * of the IPv4 multicast part of the BSD Sockets API must be preserved;
  * the effects of these options must be treated as separate and distinct.
  *
  * SMPng: XXX: Unlocked read of inp_socket believed OK.
  * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING
  * is refactored to no longer use vifs.
  */
 int
 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct ip_moptions	*imo;
 	int			 error;
+	struct epoch_tracker	et;
 
 	error = 0;
 
 	/*
 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
 	 * or is a divert socket, reject it.
 	 */
 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
 	     inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
 		return (EOPNOTSUPP);
 
 	switch (sopt->sopt_name) {
 	case IP_MULTICAST_VIF: {
 		int vifi;
 		/*
 		 * Select a multicast VIF for transmission.
 		 * Only useful if multicast forwarding is active.
 		 */
 		if (legal_vif_num == NULL) {
 			error = EOPNOTSUPP;
 			break;
 		}
 		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
 		if (error)
 			break;
 		if (!legal_vif_num(vifi) && (vifi != -1)) {
 			error = EINVAL;
 			break;
 		}
 		imo = inp_findmoptions(inp);
 		imo->imo_multicast_vif = vifi;
 		INP_WUNLOCK(inp);
 		break;
 	}
 
 	case IP_MULTICAST_IF:
 		error = inp_set_multicast_if(inp, sopt);
 		break;
 
 	case IP_MULTICAST_TTL: {
 		u_char ttl;
 
 		/*
 		 * Set the IP time-to-live for outgoing multicast packets.
 		 * The original multicast API required a char argument,
 		 * which is inconsistent with the rest of the socket API.
 		 * We allow either a char or an int.
 		 */
 		if (sopt->sopt_valsize == sizeof(u_char)) {
 			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
 			    sizeof(u_char));
 			if (error)
 				break;
 		} else {
 			u_int ittl;
 
 			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
 			    sizeof(u_int));
 			if (error)
 				break;
 			if (ittl > 255) {
 				error = EINVAL;
 				break;
 			}
 			ttl = (u_char)ittl;
 		}
 		imo = inp_findmoptions(inp);
 		imo->imo_multicast_ttl = ttl;
 		INP_WUNLOCK(inp);
 		break;
 	}
 
 	case IP_MULTICAST_LOOP: {
 		u_char loop;
 
 		/*
 		 * Set the loopback flag for outgoing multicast packets.
 		 * Must be zero or one.  The original multicast API required a
 		 * char argument, which is inconsistent with the rest
 		 * of the socket API.  We allow either a char or an int.
 		 */
 		if (sopt->sopt_valsize == sizeof(u_char)) {
 			error = sooptcopyin(sopt, &loop, sizeof(u_char),
 			    sizeof(u_char));
 			if (error)
 				break;
 		} else {
 			u_int iloop;
 
 			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
 					    sizeof(u_int));
 			if (error)
 				break;
 			loop = (u_char)iloop;
 		}
 		imo = inp_findmoptions(inp);
 		imo->imo_multicast_loop = !!loop;
 		INP_WUNLOCK(inp);
 		break;
 	}
 
 	case IP_ADD_MEMBERSHIP:
 	case IP_ADD_SOURCE_MEMBERSHIP:
 	case MCAST_JOIN_GROUP:
 	case MCAST_JOIN_SOURCE_GROUP:
+		NET_EPOCH_ENTER(et);
 		error = inp_join_group(inp, sopt);
+		NET_EPOCH_EXIT(et);
 		break;
 
 	case IP_DROP_MEMBERSHIP:
 	case IP_DROP_SOURCE_MEMBERSHIP:
 	case MCAST_LEAVE_GROUP:
 	case MCAST_LEAVE_SOURCE_GROUP:
 		error = inp_leave_group(inp, sopt);
 		break;
 
 	case IP_BLOCK_SOURCE:
 	case IP_UNBLOCK_SOURCE:
 	case MCAST_BLOCK_SOURCE:
 	case MCAST_UNBLOCK_SOURCE:
 		error = inp_block_unblock_source(inp, sopt);
 		break;
 
 	case IP_MSFILTER:
 		error = inp_set_source_filters(inp, sopt);
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	INP_UNLOCK_ASSERT(inp);
 
 	return (error);
 }
 
 /*
  * Expose IGMP's multicast filter mode and source list(s) to userland,
  * keyed by (ifindex, group).
  * The filter mode is written out as a uint32_t, followed by
  * 0..n of struct in_addr.
  * For use by ifmcstat(8).
  * SMPng: NOTE: unlocked read of ifindex space.
  */
 static int
 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)
 {
 	struct in_addr			 src, group;
 	struct epoch_tracker		 et;
 	struct ifnet			*ifp;
 	struct ifmultiaddr		*ifma;
 	struct in_multi			*inm;
 	struct ip_msource		*ims;
 	int				*name;
 	int				 retval;
 	u_int				 namelen;
 	uint32_t			 fmode, ifindex;
 
 	name = (int *)arg1;
 	namelen = arg2;
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	if (namelen != 2)
 		return (EINVAL);
 
 	ifindex = name[0];
 	if (ifindex <= 0 || ifindex > V_if_index) {
 		CTR2(KTR_IGMPV3, "%s: ifindex %u out of range",
 		    __func__, ifindex);
 		return (ENOENT);
 	}
 
 	group.s_addr = name[1];
 	if (!IN_MULTICAST(ntohl(group.s_addr))) {
 		CTR2(KTR_IGMPV3, "%s: group 0x%08x is not multicast",
 		    __func__, ntohl(group.s_addr));
 		return (EINVAL);
 	}
 
 	NET_EPOCH_ENTER(et);
 	ifp = ifnet_byindex(ifindex);
 	if (ifp == NULL) {
 		NET_EPOCH_EXIT(et);
 		CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u",
 		    __func__, ifindex);
 		return (ENOENT);
 	}
 
 	retval = sysctl_wire_old_buffer(req,
 	    sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr)));
 	if (retval) {
 		NET_EPOCH_EXIT(et);
 		return (retval);
 	}
 
 	IN_MULTI_LIST_LOCK();
 
 	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_INET ||
 		    ifma->ifma_protospec == NULL)
 			continue;
 		inm = (struct in_multi *)ifma->ifma_protospec;
 		if (!in_hosteq(inm->inm_addr, group))
 			continue;
 		fmode = inm->inm_st[1].iss_fmode;
 		retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
 		if (retval != 0)
 			break;
 		RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
 			CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__,
 			    ims->ims_haddr);
 			/*
 			 * Only copy-out sources which are in-mode.
 			 */
 			if (fmode != ims_get_mode(inm, ims, 1)) {
 				CTR1(KTR_IGMPV3, "%s: skip non-in-mode",
 				    __func__);
 				continue;
 			}
 			src.s_addr = htonl(ims->ims_haddr);
 			retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr));
 			if (retval != 0)
 				break;
 		}
 	}
 
 	IN_MULTI_LIST_UNLOCK();
 	NET_EPOCH_EXIT(et);
 
 	return (retval);
 }
 
 #if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3)
 
 static const char *inm_modestrs[] = {
 	[MCAST_UNDEFINED] = "un",
 	[MCAST_INCLUDE] = "in",
 	[MCAST_EXCLUDE] = "ex",
 };
 _Static_assert(MCAST_UNDEFINED == 0 &&
 	       MCAST_EXCLUDE + 1 == nitems(inm_modestrs),
 	       "inm_modestrs: no longer matches #defines");
 
 static const char *
 inm_mode_str(const int mode)
 {
 
 	if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
 		return (inm_modestrs[mode]);
 	return ("??");
 }
 
 static const char *inm_statestrs[] = {
 	[IGMP_NOT_MEMBER] = "not-member",
 	[IGMP_SILENT_MEMBER] = "silent",
 	[IGMP_REPORTING_MEMBER] = "reporting",
 	[IGMP_IDLE_MEMBER] = "idle",
 	[IGMP_LAZY_MEMBER] = "lazy",
 	[IGMP_SLEEPING_MEMBER] = "sleeping",
 	[IGMP_AWAKENING_MEMBER] = "awakening",
 	[IGMP_G_QUERY_PENDING_MEMBER] = "query-pending",
 	[IGMP_SG_QUERY_PENDING_MEMBER] = "sg-query-pending",
 	[IGMP_LEAVING_MEMBER] = "leaving",
 };
 _Static_assert(IGMP_NOT_MEMBER == 0 &&
 	       IGMP_LEAVING_MEMBER + 1 == nitems(inm_statestrs),
 	       "inm_statetrs: no longer matches #defines");
 
 static const char *
 inm_state_str(const int state)
 {
 
 	if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER)
 		return (inm_statestrs[state]);
 	return ("??");
 }
 
 /*
  * Dump an in_multi structure to the console.
  */
 void
 inm_print(const struct in_multi *inm)
 {
 	int t;
 	char addrbuf[INET_ADDRSTRLEN];
 
 	if ((ktr_mask & KTR_IGMPV3) == 0)
 		return;
 
 	printf("%s: --- begin inm %p ---\n", __func__, inm);
 	printf("addr %s ifp %p(%s) ifma %p\n",
 	    inet_ntoa_r(inm->inm_addr, addrbuf),
 	    inm->inm_ifp,
 	    inm->inm_ifp->if_xname,
 	    inm->inm_ifma);
 	printf("timer %u state %s refcount %u scq.len %u\n",
 	    inm->inm_timer,
 	    inm_state_str(inm->inm_state),
 	    inm->inm_refcount,
 	    inm->inm_scq.mq_len);
 	printf("igi %p nsrc %lu sctimer %u scrv %u\n",
 	    inm->inm_igi,
 	    inm->inm_nsrc,
 	    inm->inm_sctimer,
 	    inm->inm_scrv);
 	for (t = 0; t < 2; t++) {
 		printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
 		    inm_mode_str(inm->inm_st[t].iss_fmode),
 		    inm->inm_st[t].iss_asm,
 		    inm->inm_st[t].iss_ex,
 		    inm->inm_st[t].iss_in,
 		    inm->inm_st[t].iss_rec);
 	}
 	printf("%s: --- end inm %p ---\n", __func__, inm);
 }
 
 #else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */
 
 void
 inm_print(const struct in_multi *inm)
 {
 
 }
 
 #endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */
 
 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp);
Index: head/sys/netinet/ip_options.c
===================================================================
--- head/sys/netinet/ip_options.c	(revision 362899)
+++ head/sys/netinet/ip_options.c	(revision 362900)
@@ -1,761 +1,763 @@
 /*
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1993
  *      The Regents of the University of California.
  * Copyright (c) 2005 Andre Oppermann, Internet Business Solutions AG.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ipstealth.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
+#include <net/route/nhop.h>
 #include <net/netisr.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/ip_icmp.h>
 #include <machine/in_cksum.h>
 
 #include <sys/socketvar.h>
 
 VNET_DEFINE_STATIC(int, ip_dosourceroute);
 SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_dosourceroute), 0,
     "Enable forwarding source routed IP packets");
 #define	V_ip_dosourceroute	VNET(ip_dosourceroute)
 
 VNET_DEFINE_STATIC(int,	ip_acceptsourceroute);
 SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_acceptsourceroute), 0,
     "Enable accepting source routed IP packets");
 #define	V_ip_acceptsourceroute	VNET(ip_acceptsourceroute)
 
 VNET_DEFINE(int, ip_doopts) = 1; /* 0 = ignore, 1 = process, 2 = reject */
 SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip_doopts), 0, "Enable IP options processing ([LS]SRR, RR, TS)");
 
 static void	save_rte(struct mbuf *m, u_char *, struct in_addr);
 
 /*
  * Do option processing on a datagram, possibly discarding it if bad options
  * are encountered, or forwarding it if source-routed.
  *
  * The pass argument is used when operating in the IPSTEALTH mode to tell
  * what options to process: [LS]SRR (pass 0) or the others (pass 1).  The
  * reason for as many as two passes is that when doing IPSTEALTH, non-routing
  * options should be processed only if the packet is for us.
  *
  * Returns 1 if packet has been forwarded/freed, 0 if the packet should be
  * processed further.
  */
 int
 ip_dooptions(struct mbuf *m, int pass)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	u_char *cp;
 	struct in_ifaddr *ia;
 	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
 	struct in_addr *sin, dst;
 	uint32_t ntime;
-	struct nhop4_extended nh_ext;
+	struct nhop_object *nh;
 	struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
 
 	NET_EPOCH_ASSERT();
 
 	/* Ignore or reject packets with IP options. */
 	if (V_ip_doopts == 0)
 		return 0;
 	else if (V_ip_doopts == 2) {
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_FILTER_PROHIB;
 		goto bad;
 	}
 
 	dst = ip->ip_dst;
 	cp = (u_char *)(ip + 1);
 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
 		opt = cp[IPOPT_OPTVAL];
 		if (opt == IPOPT_EOL)
 			break;
 		if (opt == IPOPT_NOP)
 			optlen = 1;
 		else {
 			if (cnt < IPOPT_OLEN + sizeof(*cp)) {
 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
 				goto bad;
 			}
 			optlen = cp[IPOPT_OLEN];
 			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
 				goto bad;
 			}
 		}
 		switch (opt) {
 
 		default:
 			break;
 
 		/*
 		 * Source routing with record.  Find interface with current
 		 * destination address.  If none on this machine then drop if
 		 * strictly routed, or do nothing if loosely routed.  Record
 		 * interface address and bring up next address component.  If
 		 * strictly routed make sure next address is on directly
 		 * accessible net.
 		 */
 		case IPOPT_LSRR:
 		case IPOPT_SSRR:
 #ifdef IPSTEALTH
 			if (V_ipstealth && pass > 0)
 				break;
 #endif
 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
 				goto bad;
 			}
 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 				goto bad;
 			}
 			ipaddr.sin_addr = ip->ip_dst;
 			if (ifa_ifwithaddr_check((struct sockaddr *)&ipaddr)
 			    == 0) {
 				if (opt == IPOPT_SSRR) {
 					type = ICMP_UNREACH;
 					code = ICMP_UNREACH_SRCFAIL;
 					goto bad;
 				}
 				if (!V_ip_dosourceroute)
 					goto nosourcerouting;
 				/*
 				 * Loose routing, and not at next destination
 				 * yet; nothing to do except forward.
 				 */
 				break;
 			}
 			off--;			/* 0 origin */
 			if (off > optlen - (int)sizeof(struct in_addr)) {
 				/*
 				 * End of source route.  Should be for us.
 				 */
 				if (!V_ip_acceptsourceroute)
 					goto nosourcerouting;
 				save_rte(m, cp, ip->ip_src);
 				break;
 			}
 #ifdef IPSTEALTH
 			if (V_ipstealth)
 				goto dropit;
 #endif
 			if (!V_ip_dosourceroute) {
 				if (V_ipforwarding) {
 					char srcbuf[INET_ADDRSTRLEN];
 					char dstbuf[INET_ADDRSTRLEN];
 
 					/*
 					 * Acting as a router, so generate
 					 * ICMP
 					 */
 nosourcerouting:
 					log(LOG_WARNING,
 					    "attempted source route from %s "
 					    "to %s\n",
 					    inet_ntoa_r(ip->ip_src, srcbuf),
 					    inet_ntoa_r(ip->ip_dst, dstbuf));
 					type = ICMP_UNREACH;
 					code = ICMP_UNREACH_SRCFAIL;
 					goto bad;
 				} else {
 					/*
 					 * Not acting as a router, so
 					 * silently drop.
 					 */
 #ifdef IPSTEALTH
 dropit:
 #endif
 					IPSTAT_INC(ips_cantforward);
 					m_freem(m);
 					return (1);
 				}
 			}
 
 			/*
 			 * locate outgoing interface
 			 */
 			(void)memcpy(&ipaddr.sin_addr, cp + off,
 			    sizeof(ipaddr.sin_addr));
 
 			type = ICMP_UNREACH;
 			code = ICMP_UNREACH_SRCFAIL;
 
 			if (opt == IPOPT_SSRR) {
 #define	INA	struct in_ifaddr *
 #define	SA	struct sockaddr *
 			    ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr,
 					    RT_ALL_FIBS);
 			    if (ia == NULL)
 				    ia = (INA)ifa_ifwithnet((SA)&ipaddr, 0,
 						    RT_ALL_FIBS);
 				if (ia == NULL)
 					goto bad;
 
 				memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
 				    sizeof(struct in_addr));
 			} else {
 				/* XXX MRT 0 for routing */
-				if (fib4_lookup_nh_ext(M_GETFIB(m),
-				    ipaddr.sin_addr, 0, 0, &nh_ext) != 0)
+				nh = fib4_lookup(M_GETFIB(m), ipaddr.sin_addr,
+				     0, NHR_NONE, 0);
+				if (nh == NULL)
 					goto bad;
 
-				memcpy(cp + off, &nh_ext.nh_src,
+				memcpy(cp + off, &(IA_SIN(nh->nh_ifa)->sin_addr),
 				    sizeof(struct in_addr));
 			}
 
 			ip->ip_dst = ipaddr.sin_addr;
 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
 			/*
 			 * Let ip_intr's mcast routing check handle mcast pkts
 			 */
 			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
 			break;
 
 		case IPOPT_RR:
 #ifdef IPSTEALTH
 			if (V_ipstealth && pass == 0)
 				break;
 #endif
 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 				goto bad;
 			}
 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 				goto bad;
 			}
 			/*
 			 * If no space remains, ignore.
 			 */
 			off--;			/* 0 origin */
 			if (off > optlen - (int)sizeof(struct in_addr))
 				break;
 			(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
 			    sizeof(ipaddr.sin_addr));
 			/*
 			 * Locate outgoing interface; if we're the
 			 * destination, use the incoming interface (should be
 			 * same).
 			 */
 			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) != NULL) {
 				memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
 				    sizeof(struct in_addr));
-			} else if (fib4_lookup_nh_ext(M_GETFIB(m),
-			    ipaddr.sin_addr, 0, 0, &nh_ext) == 0) {
-				memcpy(cp + off, &nh_ext.nh_src,
+			} else if ((nh = fib4_lookup(M_GETFIB(m),
+			    ipaddr.sin_addr, 0, NHR_NONE, 0)) != NULL) {
+				memcpy(cp + off, &(IA_SIN(nh->nh_ifa)->sin_addr),
 				    sizeof(struct in_addr));
 			} else {
 				type = ICMP_UNREACH;
 				code = ICMP_UNREACH_HOST;
 				goto bad;
 			}
 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
 			break;
 
 		case IPOPT_TS:
 #ifdef IPSTEALTH
 			if (V_ipstealth && pass == 0)
 				break;
 #endif
 			code = cp - (u_char *)ip;
 			if (optlen < 4 || optlen > 40) {
 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
 				goto bad;
 			}
 			if ((off = cp[IPOPT_OFFSET]) < 5) {
 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
 				goto bad;
 			}
 			if (off > optlen - (int)sizeof(int32_t)) {
 				cp[IPOPT_OFFSET + 1] += (1 << 4);
 				if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 					goto bad;
 				}
 				break;
 			}
 			off--;				/* 0 origin */
 			sin = (struct in_addr *)(cp + off);
 			switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
 
 			case IPOPT_TS_TSONLY:
 				break;
 
 			case IPOPT_TS_TSANDADDR:
 				if (off + sizeof(uint32_t) +
 				    sizeof(struct in_addr) > optlen) {
 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 					goto bad;
 				}
 				ipaddr.sin_addr = dst;
 				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
 							    m->m_pkthdr.rcvif);
 				if (ia == NULL)
 					continue;
 				(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
 				    sizeof(struct in_addr));
 				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
 				off += sizeof(struct in_addr);
 				break;
 
 			case IPOPT_TS_PRESPEC:
 				if (off + sizeof(uint32_t) +
 				    sizeof(struct in_addr) > optlen) {
 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 					goto bad;
 				}
 				(void)memcpy(&ipaddr.sin_addr, sin,
 				    sizeof(struct in_addr));
 				if (ifa_ifwithaddr_check((SA)&ipaddr) == 0)
 					continue;
 				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
 				off += sizeof(struct in_addr);
 				break;
 
 			default:
 				code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
 				goto bad;
 			}
 			ntime = iptime();
 			(void)memcpy(cp + off, &ntime, sizeof(uint32_t));
 			cp[IPOPT_OFFSET] += sizeof(uint32_t);
 		}
 	}
 	if (forward && V_ipforwarding) {
 		ip_forward(m, 1);
 		return (1);
 	}
 	return (0);
 bad:
 	icmp_error(m, type, code, 0, 0);
 	IPSTAT_INC(ips_badoptions);
 	return (1);
 }
 
 /*
  * Save incoming source route for use in replies, to be picked up later by
  * ip_srcroute if the receiver is interested.
  */
 static void
 save_rte(struct mbuf *m, u_char *option, struct in_addr dst)
 {
 	unsigned olen;
 	struct ipopt_tag *opts;
 
 	opts = (struct ipopt_tag *)m_tag_get(PACKET_TAG_IPOPTIONS,
 	    sizeof(struct ipopt_tag), M_NOWAIT);
 	if (opts == NULL)
 		return;
 
 	olen = option[IPOPT_OLEN];
 	if (olen > sizeof(opts->ip_srcrt) - (1 + sizeof(dst))) {
 		m_tag_free((struct m_tag *)opts);
 		return;
 	}
 	bcopy(option, opts->ip_srcrt.srcopt, olen);
 	opts->ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
 	opts->ip_srcrt.dst = dst;
 	m_tag_prepend(m, (struct m_tag *)opts);
 }
 
 /*
  * Retrieve incoming source route for use in replies, in the same form used
  * by setsockopt.  The first hop is placed before the options, will be
  * removed later.
  */
 struct mbuf *
 ip_srcroute(struct mbuf *m0)
 {
 	struct in_addr *p, *q;
 	struct mbuf *m;
 	struct ipopt_tag *opts;
 
 	opts = (struct ipopt_tag *)m_tag_find(m0, PACKET_TAG_IPOPTIONS, NULL);
 	if (opts == NULL)
 		return (NULL);
 
 	if (opts->ip_nhops == 0)
 		return (NULL);
 	m = m_get(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (NULL);
 
 #define OPTSIZ	(sizeof(opts->ip_srcrt.nop) + sizeof(opts->ip_srcrt.srcopt))
 
 	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
 	m->m_len = opts->ip_nhops * sizeof(struct in_addr) +
 	    sizeof(struct in_addr) + OPTSIZ;
 
 	/*
 	 * First, save first hop for return route.
 	 */
 	p = &(opts->ip_srcrt.route[opts->ip_nhops - 1]);
 	*(mtod(m, struct in_addr *)) = *p--;
 
 	/*
 	 * Copy option fields and padding (nop) to mbuf.
 	 */
 	opts->ip_srcrt.nop = IPOPT_NOP;
 	opts->ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
 	(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
 	    &(opts->ip_srcrt.nop), OPTSIZ);
 	q = (struct in_addr *)(mtod(m, caddr_t) +
 	    sizeof(struct in_addr) + OPTSIZ);
 #undef OPTSIZ
 	/*
 	 * Record return path as an IP source route, reversing the path
 	 * (pointers are now aligned).
 	 */
 	while (p >= opts->ip_srcrt.route) {
 		*q++ = *p--;
 	}
 	/*
 	 * Last hop goes to final destination.
 	 */
 	*q = opts->ip_srcrt.dst;
 	m_tag_delete(m0, (struct m_tag *)opts);
 	return (m);
 }
 
 /*
  * Strip out IP options, at higher level protocol in the kernel.
  */
 void
 ip_stripoptions(struct mbuf *m)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	int olen;
 
 	olen = (ip->ip_hl << 2) - sizeof(struct ip);
 	m->m_len -= olen;
 	if (m->m_flags & M_PKTHDR)
 		m->m_pkthdr.len -= olen;
 	ip->ip_len = htons(ntohs(ip->ip_len) - olen);
 	ip->ip_hl = sizeof(struct ip) >> 2;
 
 	bcopy((char *)ip + sizeof(struct ip) + olen, (ip + 1),
 	    (size_t )(m->m_len - sizeof(struct ip)));
 }
 
 /*
  * Insert IP options into preformed packet.  Adjust IP destination as
  * required for IP source routing, as indicated by a non-zero in_addr at the
  * start of the options.
  *
  * XXX This routine assumes that the packet has no options in place.
  */
 struct mbuf *
 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
 {
 	struct ipoption *p = mtod(opt, struct ipoption *);
 	struct mbuf *n;
 	struct ip *ip = mtod(m, struct ip *);
 	unsigned optlen;
 
 	optlen = opt->m_len - sizeof(p->ipopt_dst);
 	if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) {
 		*phlen = 0;
 		return (m);		/* XXX should fail */
 	}
 	if (p->ipopt_dst.s_addr)
 		ip->ip_dst = p->ipopt_dst;
 	if (!M_WRITABLE(m) || M_LEADINGSPACE(m) < optlen) {
 		n = m_gethdr(M_NOWAIT, MT_DATA);
 		if (n == NULL) {
 			*phlen = 0;
 			return (m);
 		}
 		m_move_pkthdr(n, m);
 		n->m_pkthdr.rcvif = NULL;
 		n->m_pkthdr.len += optlen;
 		m->m_len -= sizeof(struct ip);
 		m->m_data += sizeof(struct ip);
 		n->m_next = m;
 		m = n;
 		m->m_len = optlen + sizeof(struct ip);
 		m->m_data += max_linkhdr;
 		bcopy(ip, mtod(m, void *), sizeof(struct ip));
 	} else {
 		m->m_data -= optlen;
 		m->m_len += optlen;
 		m->m_pkthdr.len += optlen;
 		bcopy(ip, mtod(m, void *), sizeof(struct ip));
 	}
 	ip = mtod(m, struct ip *);
 	bcopy(p->ipopt_list, ip + 1, optlen);
 	*phlen = sizeof(struct ip) + optlen;
 	ip->ip_v = IPVERSION;
 	ip->ip_hl = *phlen >> 2;
 	ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
 	return (m);
 }
 
 /*
  * Copy options from ip to jp, omitting those not copied during
  * fragmentation.
  */
 int
 ip_optcopy(struct ip *ip, struct ip *jp)
 {
 	u_char *cp, *dp;
 	int opt, optlen, cnt;
 
 	cp = (u_char *)(ip + 1);
 	dp = (u_char *)(jp + 1);
 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
 		opt = cp[0];
 		if (opt == IPOPT_EOL)
 			break;
 		if (opt == IPOPT_NOP) {
 			/* Preserve for IP mcast tunnel's LSRR alignment. */
 			*dp++ = IPOPT_NOP;
 			optlen = 1;
 			continue;
 		}
 
 		KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
 		    ("ip_optcopy: malformed ipv4 option"));
 		optlen = cp[IPOPT_OLEN];
 		KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
 		    ("ip_optcopy: malformed ipv4 option"));
 
 		/* Bogus lengths should have been caught by ip_dooptions. */
 		if (optlen > cnt)
 			optlen = cnt;
 		if (IPOPT_COPIED(opt)) {
 			bcopy(cp, dp, optlen);
 			dp += optlen;
 		}
 	}
 	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
 		*dp++ = IPOPT_EOL;
 	return (optlen);
 }
 
 /*
  * Set up IP options in pcb for insertion in output packets.  Store in mbuf
  * with pointer in pcbopt, adding pseudo-option with destination address if
  * source routed.
  */
 int
 ip_pcbopts(struct inpcb *inp, int optname, struct mbuf *m)
 {
 	int cnt, optlen;
 	u_char *cp;
 	struct mbuf **pcbopt;
 	u_char opt;
 
 	INP_WLOCK_ASSERT(inp);
 
 	pcbopt = &inp->inp_options;
 
 	/* turn off any old options */
 	if (*pcbopt)
 		(void)m_free(*pcbopt);
 	*pcbopt = NULL;
 	if (m == NULL || m->m_len == 0) {
 		/*
 		 * Only turning off any previous options.
 		 */
 		if (m != NULL)
 			(void)m_free(m);
 		return (0);
 	}
 
 	if (m->m_len % sizeof(int32_t))
 		goto bad;
 	/*
 	 * IP first-hop destination address will be stored before actual
 	 * options; move other options back and clear it when none present.
 	 */
 	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
 		goto bad;
 	cnt = m->m_len;
 	m->m_len += sizeof(struct in_addr);
 	cp = mtod(m, u_char *) + sizeof(struct in_addr);
 	bcopy(mtod(m, void *), cp, (unsigned)cnt);
 	bzero(mtod(m, void *), sizeof(struct in_addr));
 
 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
 		opt = cp[IPOPT_OPTVAL];
 		if (opt == IPOPT_EOL)
 			break;
 		if (opt == IPOPT_NOP)
 			optlen = 1;
 		else {
 			if (cnt < IPOPT_OLEN + sizeof(*cp))
 				goto bad;
 			optlen = cp[IPOPT_OLEN];
 			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
 				goto bad;
 		}
 		switch (opt) {
 
 		default:
 			break;
 
 		case IPOPT_LSRR:
 		case IPOPT_SSRR:
 			/*
 			 * User process specifies route as:
 			 *
 			 *	->A->B->C->D
 			 *
 			 * D must be our final destination (but we can't
 			 * check that since we may not have connected yet).
 			 * A is first hop destination, which doesn't appear
 			 * in actual IP option, but is stored before the
 			 * options.
 			 */
 			/* XXX-BZ PRIV_NETINET_SETHDROPTS? */
 			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
 				goto bad;
 			m->m_len -= sizeof(struct in_addr);
 			cnt -= sizeof(struct in_addr);
 			optlen -= sizeof(struct in_addr);
 			cp[IPOPT_OLEN] = optlen;
 			/*
 			 * Move first hop before start of options.
 			 */
 			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
 			    sizeof(struct in_addr));
 			/*
 			 * Then copy rest of options back
 			 * to close up the deleted entry.
 			 */
 			bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
 			    &cp[IPOPT_OFFSET+1],
 			    (unsigned)cnt - (IPOPT_MINOFF - 1));
 			break;
 		}
 	}
 	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
 		goto bad;
 	*pcbopt = m;
 	return (0);
 
 bad:
 	(void)m_free(m);
 	return (EINVAL);
 }
 
 /*
  * Check for the presence of the IP Router Alert option [RFC2113]
  * in the header of an IPv4 datagram.
  *
  * This call is not intended for use from the forwarding path; it is here
  * so that protocol domains may check for the presence of the option.
  * Given how FreeBSD's IPv4 stack is currently structured, the Router Alert
  * option does not have much relevance to the implementation, though this
  * may change in future.
  * Router alert options SHOULD be passed if running in IPSTEALTH mode and
  * we are not the endpoint.
  * Length checks on individual options should already have been performed
  * by ip_dooptions() therefore they are folded under INVARIANTS here.
  *
  * Return zero if not present or options are invalid, non-zero if present.
  */
 int
 ip_checkrouteralert(struct mbuf *m)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	u_char *cp;
 	int opt, optlen, cnt, found_ra;
 
 	found_ra = 0;
 	cp = (u_char *)(ip + 1);
 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
 		opt = cp[IPOPT_OPTVAL];
 		if (opt == IPOPT_EOL)
 			break;
 		if (opt == IPOPT_NOP)
 			optlen = 1;
 		else {
 #ifdef INVARIANTS
 			if (cnt < IPOPT_OLEN + sizeof(*cp))
 				break;
 #endif
 			optlen = cp[IPOPT_OLEN];
 #ifdef INVARIANTS
 			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
 				break;
 #endif
 		}
 		switch (opt) {
 		case IPOPT_RA:
 #ifdef INVARIANTS
 			if (optlen != IPOPT_OFFSET + sizeof(uint16_t) ||
 			    (*((uint16_t *)&cp[IPOPT_OFFSET]) != 0))
 			    break;
 			else
 #endif
 			found_ra = 1;
 			break;
 		default:
 			break;
 		}
 	}
 
 	return (found_ra);
 }
Index: head/sys/netinet6/icmp6.c
===================================================================
--- head/sys/netinet6/icmp6.c	(revision 362899)
+++ head/sys/netinet6/icmp6.c	(revision 362900)
@@ -1,2807 +1,2808 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define	MBUF_PRIVATE	/* XXXRW: Optimisation tries to avoid M_EXT mbufs */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/domain.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_llatbl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/route/route_ctl.h>
 #include <net/route/nhop.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet/tcp_var.h>
 
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6protosw.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/mld6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/send.h>
 
 extern struct domain inet6domain;
 
 VNET_PCPUSTAT_DEFINE(struct icmp6stat, icmp6stat);
 VNET_PCPUSTAT_SYSINIT(icmp6stat);
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(icmp6stat);
 #endif /* VIMAGE */
 
 VNET_DECLARE(struct inpcbinfo, ripcbinfo);
 VNET_DECLARE(struct inpcbhead, ripcb);
 VNET_DECLARE(int, icmp6errppslim);
 VNET_DEFINE_STATIC(int, icmp6errpps_count) = 0;
 VNET_DEFINE_STATIC(struct timeval, icmp6errppslim_last);
 VNET_DECLARE(int, icmp6_nodeinfo);
 
 #define	V_ripcbinfo			VNET(ripcbinfo)
 #define	V_ripcb				VNET(ripcb)
 #define	V_icmp6errppslim		VNET(icmp6errppslim)
 #define	V_icmp6errpps_count		VNET(icmp6errpps_count)
 #define	V_icmp6errppslim_last		VNET(icmp6errppslim_last)
 #define	V_icmp6_nodeinfo		VNET(icmp6_nodeinfo)
 
 static void icmp6_errcount(int, int);
 static int icmp6_rip6_input(struct mbuf **, int);
 static void icmp6_reflect(struct mbuf *, size_t);
 static int icmp6_ratelimit(const struct in6_addr *, const int, const int);
 static const char *icmp6_redirect_diag(struct in6_addr *,
 	struct in6_addr *, struct in6_addr *);
 static struct mbuf *ni6_input(struct mbuf *, int, struct prison *);
 static struct mbuf *ni6_nametodns(const char *, int, int);
 static int ni6_dnsmatch(const char *, int, const char *, int);
 static int ni6_addrs(struct icmp6_nodeinfo *, struct mbuf *,
 			  struct ifnet **, struct in6_addr *);
 static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *,
 				struct ifnet *, int);
 static int icmp6_notify_error(struct mbuf **, int, int, int);
 
 /*
  * Kernel module interface for updating icmp6stat.  The argument is an index
  * into icmp6stat treated as an array of u_quad_t.  While this encodes the
  * general layout of icmp6stat into the caller, it doesn't encode its
  * location, so that future changes to add, for example, per-CPU stats
  * support won't cause binary compatibility problems for kernel modules.
  */
 void
 kmod_icmp6stat_inc(int statnum)
 {
 
 	counter_u64_add(VNET(icmp6stat)[statnum], 1);
 }
 
 static void
 icmp6_errcount(int type, int code)
 {
 	switch (type) {
 	case ICMP6_DST_UNREACH:
 		switch (code) {
 		case ICMP6_DST_UNREACH_NOROUTE:
 			ICMP6STAT_INC(icp6s_odst_unreach_noroute);
 			return;
 		case ICMP6_DST_UNREACH_ADMIN:
 			ICMP6STAT_INC(icp6s_odst_unreach_admin);
 			return;
 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
 			ICMP6STAT_INC(icp6s_odst_unreach_beyondscope);
 			return;
 		case ICMP6_DST_UNREACH_ADDR:
 			ICMP6STAT_INC(icp6s_odst_unreach_addr);
 			return;
 		case ICMP6_DST_UNREACH_NOPORT:
 			ICMP6STAT_INC(icp6s_odst_unreach_noport);
 			return;
 		}
 		break;
 	case ICMP6_PACKET_TOO_BIG:
 		ICMP6STAT_INC(icp6s_opacket_too_big);
 		return;
 	case ICMP6_TIME_EXCEEDED:
 		switch (code) {
 		case ICMP6_TIME_EXCEED_TRANSIT:
 			ICMP6STAT_INC(icp6s_otime_exceed_transit);
 			return;
 		case ICMP6_TIME_EXCEED_REASSEMBLY:
 			ICMP6STAT_INC(icp6s_otime_exceed_reassembly);
 			return;
 		}
 		break;
 	case ICMP6_PARAM_PROB:
 		switch (code) {
 		case ICMP6_PARAMPROB_HEADER:
 			ICMP6STAT_INC(icp6s_oparamprob_header);
 			return;
 		case ICMP6_PARAMPROB_NEXTHEADER:
 			ICMP6STAT_INC(icp6s_oparamprob_nextheader);
 			return;
 		case ICMP6_PARAMPROB_OPTION:
 			ICMP6STAT_INC(icp6s_oparamprob_option);
 			return;
 		}
 		break;
 	case ND_REDIRECT:
 		ICMP6STAT_INC(icp6s_oredirect);
 		return;
 	}
 	ICMP6STAT_INC(icp6s_ounknown);
 }
 
 /*
  * A wrapper function for icmp6_error() necessary when the erroneous packet
  * may not contain enough scope zone information.
  */
 void
 icmp6_error2(struct mbuf *m, int type, int code, int param,
     struct ifnet *ifp)
 {
 	struct ip6_hdr *ip6;
 
 	if (ifp == NULL)
 		return;
 
 	if (m->m_len < sizeof(struct ip6_hdr)) {
 		m = m_pullup(m, sizeof(struct ip6_hdr));
 		if (m == NULL) {
 			IP6STAT_INC(ip6s_exthdrtoolong);
 			return;
 		}
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0)
 		return;
 	if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
 		return;
 
 	icmp6_error(m, type, code, param);
 }
 
 /*
  * Generate an error packet of type error in response to bad IP6 packet.
  */
 void
 icmp6_error(struct mbuf *m, int type, int code, int param)
 {
 	struct ip6_hdr *oip6, *nip6;
 	struct icmp6_hdr *icmp6;
 	u_int preplen;
 	int off;
 	int nxt;
 
 	ICMP6STAT_INC(icp6s_error);
 
 	/* count per-type-code statistics */
 	icmp6_errcount(type, code);
 
 #ifdef M_DECRYPTED	/*not openbsd*/
 	if (m->m_flags & M_DECRYPTED) {
 		ICMP6STAT_INC(icp6s_canterror);
 		goto freeit;
 	}
 #endif
 
 	if (m->m_len < sizeof(struct ip6_hdr)) {
 		m = m_pullup(m, sizeof(struct ip6_hdr));
 		if (m == NULL) {
 			IP6STAT_INC(ip6s_exthdrtoolong);
 			return;
 		}
 	}
 	oip6 = mtod(m, struct ip6_hdr *);
 
 	/*
 	 * If the destination address of the erroneous packet is a multicast
 	 * address, or the packet was sent using link-layer multicast,
 	 * we should basically suppress sending an error (RFC 2463, Section
 	 * 2.4).
 	 * We have two exceptions (the item e.2 in that section):
 	 * - the Packet Too Big message can be sent for path MTU discovery.
 	 * - the Parameter Problem Message that can be allowed an icmp6 error
 	 *   in the option type field.  This check has been done in
 	 *   ip6_unknown_opt(), so we can just check the type and code.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST) ||
 	     IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
 	    (type != ICMP6_PACKET_TOO_BIG &&
 	     (type != ICMP6_PARAM_PROB ||
 	      code != ICMP6_PARAMPROB_OPTION)))
 		goto freeit;
 
 	/*
 	 * RFC 2463, 2.4 (e.5): source address check.
 	 * XXX: the case of anycast source?
 	 */
 	if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
 	    IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
 		goto freeit;
 
 	/*
 	 * If we are about to send ICMPv6 against ICMPv6 error/redirect,
 	 * don't do it.
 	 */
 	nxt = -1;
 	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
 	if (off >= 0 && nxt == IPPROTO_ICMPV6) {
 		struct icmp6_hdr *icp;
 
 		if (m->m_len < off + sizeof(struct icmp6_hdr)) {
 			m = m_pullup(m, off + sizeof(struct icmp6_hdr));
 			if (m == NULL) {
 				IP6STAT_INC(ip6s_exthdrtoolong);
 				return;
 			}
 		}
 		oip6 = mtod(m, struct ip6_hdr *);
 		icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 
 		if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
 		    icp->icmp6_type == ND_REDIRECT) {
 			/*
 			 * ICMPv6 error
 			 * Special case: for redirect (which is
 			 * informational) we must not send icmp6 error.
 			 */
 			ICMP6STAT_INC(icp6s_canterror);
 			goto freeit;
 		} else {
 			/* ICMPv6 informational - send the error */
 		}
 	} else {
 		/* non-ICMPv6 - send the error */
 	}
 
 	/* Finally, do rate limitation check. */
 	if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
 		ICMP6STAT_INC(icp6s_toofreq);
 		goto freeit;
 	}
 
 	/*
 	 * OK, ICMP6 can be generated.
 	 */
 
 	if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
 		m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
 
 	preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
 	M_PREPEND(m, preplen, M_NOWAIT);	/* FIB is also copied over. */
 	if (m == NULL) {
 		nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
 		return;
 	}
 
 	nip6 = mtod(m, struct ip6_hdr *);
 	nip6->ip6_src  = oip6->ip6_src;
 	nip6->ip6_dst  = oip6->ip6_dst;
 
 	in6_clearscope(&oip6->ip6_src);
 	in6_clearscope(&oip6->ip6_dst);
 
 	icmp6 = (struct icmp6_hdr *)(nip6 + 1);
 	icmp6->icmp6_type = type;
 	icmp6->icmp6_code = code;
 	icmp6->icmp6_pptr = htonl((u_int32_t)param);
 
 	ICMP6STAT_INC(icp6s_outhist[type]);
 	icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */
 
 	return;
 
   freeit:
 	/*
 	 * If we can't tell whether or not we can generate ICMP6, free it.
 	 */
 	m_freem(m);
 }
 
 /*
  * Process a received ICMP6 message.
  */
 int
 icmp6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m, *n;
 	struct ifnet *ifp;
 	struct ip6_hdr *ip6, *nip6;
 	struct icmp6_hdr *icmp6, *nicmp6;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 	int code, error, icmp6len, ip6len, noff, off, sum;
 
 	NET_EPOCH_ASSERT();
 
 	m = *mp;
 	off = *offp;
 
 	if (m->m_len < off + sizeof(struct icmp6_hdr)) {
 		m = m_pullup(m, off + sizeof(struct icmp6_hdr));
 		if (m == NULL) {
 			IP6STAT_INC(ip6s_exthdrtoolong);
 			*mp = m;
 			return (IPPROTO_DONE);
 		}
 	}
 
 	/*
 	 * Locate icmp6 structure in mbuf, and check
 	 * that not corrupted and of at least minimum length
 	 */
 
 	icmp6len = m->m_pkthdr.len - off;
 	if (icmp6len < sizeof(struct icmp6_hdr)) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		goto freeit;
 	}
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	ifp = m->m_pkthdr.rcvif;
 	/*
 	 * Check multicast group membership.
 	 * Note: SSM filters are not applied for ICMPv6 traffic.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		struct in6_multi	*inm;
 
 		inm = in6m_lookup(ifp, &ip6->ip6_dst);
 		if (inm == NULL) {
 			IP6STAT_INC(ip6s_notmember);
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 			goto freeit;
 		}
 	}
 
 	/* Calculate the checksum. */
 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
 	code = icmp6->icmp6_code;
 	if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
 		nd6log((LOG_ERR,
 		    "ICMP6 checksum error(%d|%x) %s\n",
 		    icmp6->icmp6_type, sum,
 		    ip6_sprintf(ip6bufs, &ip6->ip6_src)));
 		ICMP6STAT_INC(icp6s_checksum);
 		goto freeit;
 	}
 
 	ICMP6STAT_INC(icp6s_inhist[icmp6->icmp6_type]);
 	icmp6_ifstat_inc(ifp, ifs6_in_msg);
 	if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK)
 		icmp6_ifstat_inc(ifp, ifs6_in_error);
 
 	ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
 	switch (icmp6->icmp6_type) {
 	case ICMP6_DST_UNREACH:
 		icmp6_ifstat_inc(ifp, ifs6_in_dstunreach);
 		switch (code) {
 		case ICMP6_DST_UNREACH_NOROUTE:
 		case ICMP6_DST_UNREACH_ADDR:	/* PRC_HOSTDEAD is a DOS */
 			code = PRC_UNREACH_NET;
 			break;
 		case ICMP6_DST_UNREACH_ADMIN:
 			icmp6_ifstat_inc(ifp, ifs6_in_adminprohib);
 			code = PRC_UNREACH_ADMIN_PROHIB;
 			break;
 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
 			/* I mean "source address was incorrect." */
 			code = PRC_PARAMPROB;
 			break;
 		case ICMP6_DST_UNREACH_NOPORT:
 			code = PRC_UNREACH_PORT;
 			break;
 		default:
 			goto badcode;
 		}
 		goto deliver;
 		break;
 
 	case ICMP6_PACKET_TOO_BIG:
 		icmp6_ifstat_inc(ifp, ifs6_in_pkttoobig);
 
 		/* validation is made in icmp6_mtudisc_update */
 
 		code = PRC_MSGSIZE;
 
 		/*
 		 * Updating the path MTU will be done after examining
 		 * intermediate extension headers.
 		 */
 		goto deliver;
 		break;
 
 	case ICMP6_TIME_EXCEEDED:
 		icmp6_ifstat_inc(ifp, ifs6_in_timeexceed);
 		switch (code) {
 		case ICMP6_TIME_EXCEED_TRANSIT:
 			code = PRC_TIMXCEED_INTRANS;
 			break;
 		case ICMP6_TIME_EXCEED_REASSEMBLY:
 			code = PRC_TIMXCEED_REASS;
 			break;
 		default:
 			goto badcode;
 		}
 		goto deliver;
 		break;
 
 	case ICMP6_PARAM_PROB:
 		icmp6_ifstat_inc(ifp, ifs6_in_paramprob);
 		switch (code) {
 		case ICMP6_PARAMPROB_NEXTHEADER:
 			code = PRC_UNREACH_PROTOCOL;
 			break;
 		case ICMP6_PARAMPROB_HEADER:
 		case ICMP6_PARAMPROB_OPTION:
 			code = PRC_PARAMPROB;
 			break;
 		default:
 			goto badcode;
 		}
 		goto deliver;
 		break;
 
 	case ICMP6_ECHO_REQUEST:
 		icmp6_ifstat_inc(ifp, ifs6_in_echo);
 		if (code != 0)
 			goto badcode;
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 			/* Give up remote */
 			break;
 		}
 		if (!M_WRITABLE(n)
 		 || n->m_len < off + sizeof(struct icmp6_hdr)) {
 			struct mbuf *n0 = n;
 			int n0len;
 
 			CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) <= MHLEN);
 			n = m_gethdr(M_NOWAIT, n0->m_type);
 			if (n == NULL) {
 				/* Give up remote */
 				m_freem(n0);
 				break;
 			}
 
 			m_move_pkthdr(n, n0);	/* FIB copied. */
 			n0len = n0->m_pkthdr.len;	/* save for use below */
 			/*
 			 * Copy IPv6 and ICMPv6 only.
 			 */
 			nip6 = mtod(n, struct ip6_hdr *);
 			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
 			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
 			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
 			noff = sizeof(struct ip6_hdr);
 			/* new mbuf contains only ipv6+icmpv6 headers */
 			n->m_len = noff + sizeof(struct icmp6_hdr);
 			/*
 			 * Adjust mbuf.  ip6_plen will be adjusted in
 			 * ip6_output().
 			 */
 			m_adj(n0, off + sizeof(struct icmp6_hdr));
 			/* recalculate complete packet size */
 			n->m_pkthdr.len = n0len + (noff - off);
 			n->m_next = n0;
 		} else {
 			if (n->m_len < off + sizeof(*nicmp6)) {
 				n = m_pullup(n, off + sizeof(*nicmp6));
 				if (n == NULL) {
 					IP6STAT_INC(ip6s_exthdrtoolong);
 					break;
 				}
 			}
 			nicmp6 = (struct icmp6_hdr *)(mtod(n, caddr_t) + off);
 			noff = off;
 		}
 		if (n) {
 			nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
 			nicmp6->icmp6_code = 0;
 			ICMP6STAT_INC(icp6s_reflect);
 			ICMP6STAT_INC(icp6s_outhist[ICMP6_ECHO_REPLY]);
 			icmp6_reflect(n, noff);
 		}
 		break;
 
 	case ICMP6_ECHO_REPLY:
 		icmp6_ifstat_inc(ifp, ifs6_in_echoreply);
 		if (code != 0)
 			goto badcode;
 		break;
 
 	case MLD_LISTENER_QUERY:
 	case MLD_LISTENER_REPORT:
 	case MLD_LISTENER_DONE:
 	case MLDV2_LISTENER_REPORT:
 		/*
 		 * Drop MLD traffic which is not link-local, has a hop limit
 		 * of greater than 1 hop, or which does not have the
 		 * IPv6 HBH Router Alert option.
 		 * As IPv6 HBH options are stripped in ip6_input() we must
 		 * check an mbuf header flag.
 		 * XXX Should we also sanity check that these messages
 		 * were directed to a link-local multicast prefix?
 		 */
 		if ((ip6->ip6_hlim != 1) || (m->m_flags & M_RTALERT_MLD) == 0)
 			goto freeit;
 		if (mld_input(&m, off, icmp6len) != 0) {
 			*mp = NULL;
 			return (IPPROTO_DONE);
 		}
 		/* m stays. */
 		break;
 
 	case ICMP6_WRUREQUEST:	/* ICMP6_FQDN_QUERY */
 	    {
 		enum { WRU, FQDN } mode;
 		struct prison *pr;
 
 		if (!V_icmp6_nodeinfo)
 			break;
 
 		if (icmp6len == sizeof(struct icmp6_hdr) + 4)
 			mode = WRU;
 		else if (icmp6len >= sizeof(struct icmp6_nodeinfo))
 			mode = FQDN;
 		else
 			goto badlen;
 
 		pr = NULL;
 		sx_slock(&allprison_lock);
 		TAILQ_FOREACH(pr, &allprison, pr_list)
 			if (pr->pr_vnet == ifp->if_vnet)
 				break; 
 		sx_sunlock(&allprison_lock);
 		if (pr == NULL)
 			pr = curthread->td_ucred->cr_prison;
 		if (mode == FQDN) {
 			if (m->m_len < off + sizeof(struct icmp6_nodeinfo)) {
 				m = m_pullup(m, off +
 				    sizeof(struct icmp6_nodeinfo));
 				if (m == NULL) {
 					IP6STAT_INC(ip6s_exthdrtoolong);
 					*mp = m;
 					return (IPPROTO_DONE);
 				}
 			}
 			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 			if (n)
 				n = ni6_input(n, off, pr);
 			/* XXX meaningless if n == NULL */
 			noff = sizeof(struct ip6_hdr);
 		} else {
 			u_char *p;
 			int maxhlen, hlen;
 
 			/*
 			 * XXX: this combination of flags is pointless,
 			 * but should we keep this for compatibility?
 			 */
 			if ((V_icmp6_nodeinfo & (ICMP6_NODEINFO_FQDNOK |
 			    ICMP6_NODEINFO_TMPADDROK)) !=
 			    (ICMP6_NODEINFO_FQDNOK | ICMP6_NODEINFO_TMPADDROK))
 				break;
 
 			if (code != 0)
 				goto badcode;
 
 			CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) + 4 <= MHLEN);
 			n = m_gethdr(M_NOWAIT, m->m_type);
 			if (n == NULL) {
 				/* Give up remote */
 				break;
 			}
 			if (!m_dup_pkthdr(n, m, M_NOWAIT)) {
 				/*
 				 * Previous code did a blind M_COPY_PKTHDR
 				 * and said "just for rcvif".  If true, then
 				 * we could tolerate the dup failing (due to
 				 * the deep copy of the tag chain).  For now
 				 * be conservative and just fail.
 				 */
 				m_free(n);
 				n = NULL;
 				break;
 			}
 			/*
 			 * Copy IPv6 and ICMPv6 only.
 			 */
 			nip6 = mtod(n, struct ip6_hdr *);
 			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
 			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
 			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
 			p = (u_char *)(nicmp6 + 1);
 			bzero(p, 4);
 
 			maxhlen = M_TRAILINGSPACE(n) -
 			    (sizeof(*nip6) + sizeof(*nicmp6) + 4);
 			mtx_lock(&pr->pr_mtx);
 			hlen = strlen(pr->pr_hostname);
 			if (maxhlen > hlen)
 				maxhlen = hlen;
 			/* meaningless TTL */
 			bcopy(pr->pr_hostname, p + 4, maxhlen);
 			mtx_unlock(&pr->pr_mtx);
 			noff = sizeof(struct ip6_hdr);
 			n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
 				sizeof(struct icmp6_hdr) + 4 + maxhlen;
 			nicmp6->icmp6_type = ICMP6_WRUREPLY;
 			nicmp6->icmp6_code = 0;
 		}
 		if (n) {
 			ICMP6STAT_INC(icp6s_reflect);
 			ICMP6STAT_INC(icp6s_outhist[ICMP6_WRUREPLY]);
 			icmp6_reflect(n, noff);
 		}
 		break;
 	    }
 
 	case ICMP6_WRUREPLY:
 		if (code != 0)
 			goto badcode;
 		break;
 
 	case ND_ROUTER_SOLICIT:
 		icmp6_ifstat_inc(ifp, ifs6_in_routersolicit);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_router_solicit))
 			goto badlen;
 		if (send_sendso_input_hook != NULL) {
 			if (m->m_len < off + icmp6len) {
 				m = m_pullup(m, off + icmp6len);
 				if (m == NULL) {
 					IP6STAT_INC(ip6s_exthdrtoolong);
 					*mp = NULL;
 					return (IPPROTO_DONE);
 				}
 			}
 			error = send_sendso_input_hook(m, ifp, SND_IN, ip6len);
 			if (error == 0) {
 				m = NULL;
 				goto freeit;
 			}
 		}
 		n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 		nd6_rs_input(m, off, icmp6len);
 		m = n;
 		if (m == NULL)
 			goto freeit;
 		break;
 
 	case ND_ROUTER_ADVERT:
 		icmp6_ifstat_inc(ifp, ifs6_in_routeradvert);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_router_advert))
 			goto badlen;
 		if (send_sendso_input_hook != NULL) {
 			error = send_sendso_input_hook(m, ifp, SND_IN, ip6len);
 			if (error == 0) {
 				m = NULL;
 				goto freeit;
 			}
 		}
 		n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 		nd6_ra_input(m, off, icmp6len);
 		m = n;
 		if (m == NULL)
 			goto freeit;
 		break;
 
 	case ND_NEIGHBOR_SOLICIT:
 		icmp6_ifstat_inc(ifp, ifs6_in_neighborsolicit);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_neighbor_solicit))
 			goto badlen;
 		if (send_sendso_input_hook != NULL) {
 			error = send_sendso_input_hook(m, ifp, SND_IN, ip6len);
 			if (error == 0) {
 				m = NULL;
 				goto freeit;
 			}
 		}
 		n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 		nd6_ns_input(m, off, icmp6len);
 		m = n;
 		if (m == NULL)
 			goto freeit;
 		break;
 
 	case ND_NEIGHBOR_ADVERT:
 		icmp6_ifstat_inc(ifp, ifs6_in_neighboradvert);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_neighbor_advert))
 			goto badlen;
 		if (send_sendso_input_hook != NULL) {
 			error = send_sendso_input_hook(m, ifp, SND_IN, ip6len);
 			if (error == 0) {
 				m = NULL;
 				goto freeit;
 			}
 		}
 		n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 		nd6_na_input(m, off, icmp6len);
 		m = n;
 		if (m == NULL)
 			goto freeit;
 		break;
 
 	case ND_REDIRECT:
 		icmp6_ifstat_inc(ifp, ifs6_in_redirect);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_redirect))
 			goto badlen;
 		if (send_sendso_input_hook != NULL) {
 			error = send_sendso_input_hook(m, ifp, SND_IN, ip6len);
 			if (error == 0) {
 				m = NULL;
 				goto freeit;
 			}
 		}
 		n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 		icmp6_redirect_input(m, off);
 		m = n;
 		if (m == NULL)
 			goto freeit;
 		break;
 
 	case ICMP6_ROUTER_RENUMBERING:
 		if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
 		    code != ICMP6_ROUTER_RENUMBERING_RESULT)
 			goto badcode;
 		if (icmp6len < sizeof(struct icmp6_router_renum))
 			goto badlen;
 		break;
 
 	default:
 		nd6log((LOG_DEBUG,
 		    "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n",
 		    icmp6->icmp6_type, ip6_sprintf(ip6bufs, &ip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 		    ifp ? ifp->if_index : 0));
 		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
 			/* ICMPv6 error: MUST deliver it by spec... */
 			code = PRC_NCMDS;
 			/* deliver */
 		} else {
 			/* ICMPv6 informational: MUST not deliver */
 			break;
 		}
 	deliver:
 		if (icmp6_notify_error(&m, off, icmp6len, code) != 0) {
 			/* In this case, m should've been freed. */
 			*mp = NULL;
 			return (IPPROTO_DONE);
 		}
 		break;
 
 	badcode:
 		ICMP6STAT_INC(icp6s_badcode);
 		break;
 
 	badlen:
 		ICMP6STAT_INC(icp6s_badlen);
 		break;
 	}
 
 	/* deliver the packet to appropriate sockets */
 	icmp6_rip6_input(&m, *offp);
 
 	*mp = m;
 	return (IPPROTO_DONE);
 
  freeit:
 	m_freem(m);
 	*mp = NULL;
 	return (IPPROTO_DONE);
 }
 
 static int
 icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
 {
 	struct mbuf *m;
 	struct icmp6_hdr *icmp6;
 	struct ip6_hdr *eip6;
 	u_int32_t notifymtu;
 	struct sockaddr_in6 icmp6src, icmp6dst;
 
 	m = *mp;
 
 	if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		goto freeit;
 	}
 
 	if (m->m_len < off + sizeof(*icmp6) + sizeof(struct ip6_hdr)) {
 		m = m_pullup(m, off + sizeof(*icmp6) + sizeof(struct ip6_hdr));
 		if (m == NULL) {
 			IP6STAT_INC(ip6s_exthdrtoolong);
 			*mp = m;
 			return (-1);
 		}
 	}
 	icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 	eip6 = (struct ip6_hdr *)(icmp6 + 1);
 
 	/* Detect the upper level protocol */
 	{
 		void (*ctlfunc)(int, struct sockaddr *, void *);
 		u_int8_t nxt = eip6->ip6_nxt;
 		int eoff = off + sizeof(struct icmp6_hdr) +
 		    sizeof(struct ip6_hdr);
 		struct ip6ctlparam ip6cp;
 		struct in6_addr *finaldst = NULL;
 		int icmp6type = icmp6->icmp6_type;
 		struct ip6_frag *fh;
 		struct ip6_rthdr *rth;
 		struct ip6_rthdr0 *rth0;
 		int rthlen;
 
 		while (1) { /* XXX: should avoid infinite loop explicitly? */
 			struct ip6_ext *eh;
 
 			switch (nxt) {
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_DSTOPTS:
 			case IPPROTO_AH:
 				if (m->m_len < eoff + sizeof(struct ip6_ext)) {
 					m = m_pullup(m, eoff +
 					    sizeof(struct ip6_ext));
 					if (m == NULL) {
 						IP6STAT_INC(ip6s_exthdrtoolong);
 						*mp = m;
 						return (-1);
 					}
 				}
 				eh = (struct ip6_ext *)
 				    (mtod(m, caddr_t) + eoff);
 				if (nxt == IPPROTO_AH)
 					eoff += (eh->ip6e_len + 2) << 2;
 				else
 					eoff += (eh->ip6e_len + 1) << 3;
 				nxt = eh->ip6e_nxt;
 				break;
 			case IPPROTO_ROUTING:
 				/*
 				 * When the erroneous packet contains a
 				 * routing header, we should examine the
 				 * header to determine the final destination.
 				 * Otherwise, we can't properly update
 				 * information that depends on the final
 				 * destination (e.g. path MTU).
 				 */
 				if (m->m_len < eoff + sizeof(*rth)) {
 					m = m_pullup(m, eoff + sizeof(*rth));
 					if (m == NULL) {
 						IP6STAT_INC(ip6s_exthdrtoolong);
 						*mp = m;
 						return (-1);
 					}
 				}
 				rth = (struct ip6_rthdr *)
 				    (mtod(m, caddr_t) + eoff);
 				rthlen = (rth->ip6r_len + 1) << 3;
 				/*
 				 * XXX: currently there is no
 				 * officially defined type other
 				 * than type-0.
 				 * Note that if the segment left field
 				 * is 0, all intermediate hops must
 				 * have been passed.
 				 */
 				if (rth->ip6r_segleft &&
 				    rth->ip6r_type == IPV6_RTHDR_TYPE_0) {
 					int hops;
 
 					if (m->m_len < eoff + rthlen) {
 						m = m_pullup(m, eoff + rthlen);
 						if (m == NULL) {
 							IP6STAT_INC(
 							    ip6s_exthdrtoolong);
 							*mp = m;
 							return (-1);
 						}
 					}
 					rth0 = (struct ip6_rthdr0 *)
 					    (mtod(m, caddr_t) + eoff);
 					/* just ignore a bogus header */
 					if ((rth0->ip6r0_len % 2) == 0 &&
 					    (hops = rth0->ip6r0_len/2))
 						finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1);
 				}
 				eoff += rthlen;
 				nxt = rth->ip6r_nxt;
 				break;
 			case IPPROTO_FRAGMENT:
 				if (m->m_len < eoff + sizeof(struct ip6_frag)) {
 					m = m_pullup(m, eoff +
 					    sizeof(struct ip6_frag));
 					if (m == NULL) {
 						IP6STAT_INC(ip6s_exthdrtoolong);
 						*mp = m;
 						return (-1);
 					}
 				}
 				fh = (struct ip6_frag *)(mtod(m, caddr_t) +
 				    eoff);
 				/*
 				 * Data after a fragment header is meaningless
 				 * unless it is the first fragment, but
 				 * we'll go to the notify label for path MTU
 				 * discovery.
 				 */
 				if (fh->ip6f_offlg & IP6F_OFF_MASK)
 					goto notify;
 
 				eoff += sizeof(struct ip6_frag);
 				nxt = fh->ip6f_nxt;
 				break;
 			default:
 				/*
 				 * This case includes ESP and the No Next
 				 * Header.  In such cases going to the notify
 				 * label does not have any meaning
 				 * (i.e. ctlfunc will be NULL), but we go
 				 * anyway since we might have to update
 				 * path MTU information.
 				 */
 				goto notify;
 			}
 		}
 	  notify:
 		icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 
 		/*
 		 * retrieve parameters from the inner IPv6 header, and convert
 		 * them into sockaddr structures.
 		 * XXX: there is no guarantee that the source or destination
 		 * addresses of the inner packet are in the same scope as
 		 * the addresses of the icmp packet.  But there is no other
 		 * way to determine the zone.
 		 */
 		eip6 = (struct ip6_hdr *)(icmp6 + 1);
 
 		bzero(&icmp6dst, sizeof(icmp6dst));
 		icmp6dst.sin6_len = sizeof(struct sockaddr_in6);
 		icmp6dst.sin6_family = AF_INET6;
 		if (finaldst == NULL)
 			icmp6dst.sin6_addr = eip6->ip6_dst;
 		else
 			icmp6dst.sin6_addr = *finaldst;
 		if (in6_setscope(&icmp6dst.sin6_addr, m->m_pkthdr.rcvif, NULL))
 			goto freeit;
 		bzero(&icmp6src, sizeof(icmp6src));
 		icmp6src.sin6_len = sizeof(struct sockaddr_in6);
 		icmp6src.sin6_family = AF_INET6;
 		icmp6src.sin6_addr = eip6->ip6_src;
 		if (in6_setscope(&icmp6src.sin6_addr, m->m_pkthdr.rcvif, NULL))
 			goto freeit;
 		icmp6src.sin6_flowinfo =
 		    (eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
 
 		if (finaldst == NULL)
 			finaldst = &eip6->ip6_dst;
 		ip6cp.ip6c_m = m;
 		ip6cp.ip6c_icmp6 = icmp6;
 		ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
 		ip6cp.ip6c_off = eoff;
 		ip6cp.ip6c_finaldst = finaldst;
 		ip6cp.ip6c_src = &icmp6src;
 		ip6cp.ip6c_nxt = nxt;
 
 		if (icmp6type == ICMP6_PACKET_TOO_BIG) {
 			notifymtu = ntohl(icmp6->icmp6_mtu);
 			ip6cp.ip6c_cmdarg = (void *)&notifymtu;
 			icmp6_mtudisc_update(&ip6cp, 1);	/*XXX*/
 		}
 
 		ctlfunc = (void (*)(int, struct sockaddr *, void *))
 		    (inet6sw[ip6_protox[nxt]].pr_ctlinput);
 		if (ctlfunc) {
 			(void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst,
 			    &ip6cp);
 		}
 	}
 	*mp = m;
 	return (0);
 
   freeit:
 	m_freem(m);
 	*mp = NULL;
 	return (-1);
 }
 
 void
 icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated)
 {
 	struct in6_addr *dst = ip6cp->ip6c_finaldst;
 	struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
 	struct mbuf *m = ip6cp->ip6c_m;	/* will be necessary for scope issue */
 	u_int mtu = ntohl(icmp6->icmp6_mtu);
 	struct in_conninfo inc;
 
 #if 0
 	/*
 	 * RFC2460 section 5, last paragraph.
 	 * even though minimum link MTU for IPv6 is IPV6_MMTU,
 	 * we may see ICMPv6 too big with mtu < IPV6_MMTU
 	 * due to packet translator in the middle.
 	 * see ip6_output() and ip6_getpmtu() "alwaysfrag" case for
 	 * special handling.
 	 */
 	if (mtu < IPV6_MMTU)
 		return;
 #endif
 
 	/*
 	 * we reject ICMPv6 too big with abnormally small value.
 	 * XXX what is the good definition of "abnormally small"?
 	 */
 	if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8)
 		return;
 
 	if (!validated)
 		return;
 
 	/*
 	 * In case the suggested mtu is less than IPV6_MMTU, we
 	 * only need to remember that it was for above mentioned
 	 * "alwaysfrag" case.
 	 * Try to be as close to the spec as possible.
 	 */
 	if (mtu < IPV6_MMTU)
 		mtu = IPV6_MMTU - 8;
 
 	bzero(&inc, sizeof(inc));
 	inc.inc_fibnum = M_GETFIB(m);
 	inc.inc_flags |= INC_ISIPV6;
 	inc.inc6_faddr = *dst;
 	if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL))
 		return;
 
 	if (mtu < tcp_maxmtu6(&inc, NULL)) {
 		tcp_hc_updatemtu(&inc, mtu);
 		ICMP6STAT_INC(icp6s_pmtuchg);
 	}
 }
 
 /*
  * Process a Node Information Query packet, based on
  * draft-ietf-ipngwg-icmp-name-lookups-07.
  *
  * Spec incompatibilities:
  * - IPv6 Subject address handling
  * - IPv4 Subject address handling support missing
  * - Proxy reply (answer even if it's not for me)
  * - joins NI group address at in6_ifattach() time only, does not cope
  *   with hostname changes by sethostname(3)
  */
 static struct mbuf *
 ni6_input(struct mbuf *m, int off, struct prison *pr)
 {
 	struct icmp6_nodeinfo *ni6, *nni6;
 	struct mbuf *n = NULL;
 	u_int16_t qtype;
 	int subjlen;
 	int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
 	struct ni_reply_fqdn *fqdn;
 	int addrs;		/* for NI_QTYPE_NODEADDR */
 	struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */
 	struct in6_addr in6_subj; /* subject address */
 	struct ip6_hdr *ip6;
 	int oldfqdn = 0;	/* if 1, return pascal string (03 draft) */
 	char *subj = NULL;
 	struct in6_ifaddr *ia6 = NULL;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off);
 
 	/*
 	 * Validate IPv6 source address.
 	 * The default configuration MUST be to refuse answering queries from
 	 * global-scope addresses according to RFC4602.
 	 * Notes:
 	 *  - it's not very clear what "refuse" means; this implementation
 	 *    simply drops it.
 	 *  - it's not very easy to identify global-scope (unicast) addresses
 	 *    since there are many prefixes for them.  It should be safer
 	 *    and in practice sufficient to check "all" but loopback and
 	 *    link-local (note that site-local unicast was deprecated and
 	 *    ULA is defined as global scope-wise)
 	 */
 	if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_GLOBALOK) == 0 &&
 	    !IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) &&
 	    !IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src))
 		goto bad;
 
 	/*
 	 * Validate IPv6 destination address.
 	 *
 	 * The Responder must discard the Query without further processing
 	 * unless it is one of the Responder's unicast or anycast addresses, or
 	 * a link-local scope multicast address which the Responder has joined.
 	 * [RFC4602, Section 5.]
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		if (!IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst))
 			goto bad;
 		/* else it's a link-local multicast, fine */
 	} else {		/* unicast or anycast */
 		ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
 		if (ia6 == NULL)
 			goto bad; /* XXX impossible */
 
 		if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) &&
 		    !(V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK)) {
 			ifa_free(&ia6->ia_ifa);
 			nd6log((LOG_DEBUG, "ni6_input: ignore node info to "
 				"a temporary address in %s:%d",
 			       __FILE__, __LINE__));
 			goto bad;
 		}
 		ifa_free(&ia6->ia_ifa);
 	}
 
 	/* validate query Subject field. */
 	qtype = ntohs(ni6->ni_qtype);
 	subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo);
 	switch (qtype) {
 	case NI_QTYPE_NOOP:
 	case NI_QTYPE_SUPTYPES:
 		/* 07 draft */
 		if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0)
 			break;
 		/* FALLTHROUGH */
 	case NI_QTYPE_FQDN:
 	case NI_QTYPE_NODEADDR:
 	case NI_QTYPE_IPV4ADDR:
 		switch (ni6->ni_code) {
 		case ICMP6_NI_SUBJ_IPV6:
 #if ICMP6_NI_SUBJ_IPV6 != 0
 		case 0:
 #endif
 			/*
 			 * backward compatibility - try to accept 03 draft
 			 * format, where no Subject is present.
 			 */
 			if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 &&
 			    subjlen == 0) {
 				oldfqdn++;
 				break;
 			}
 #if ICMP6_NI_SUBJ_IPV6 != 0
 			if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6)
 				goto bad;
 #endif
 
 			if (subjlen != sizeof(struct in6_addr))
 				goto bad;
 
 			/*
 			 * Validate Subject address.
 			 *
 			 * Not sure what exactly "address belongs to the node"
 			 * means in the spec, is it just unicast, or what?
 			 *
 			 * At this moment we consider Subject address as
 			 * "belong to the node" if the Subject address equals
 			 * to the IPv6 destination address; validation for
 			 * IPv6 destination address should have done enough
 			 * check for us.
 			 *
 			 * We do not do proxy at this moment.
 			 */
 			m_copydata(m, off + sizeof(struct icmp6_nodeinfo),
 			    subjlen, (caddr_t)&in6_subj);
 			if (in6_setscope(&in6_subj, m->m_pkthdr.rcvif, NULL))
 				goto bad;
 
 			subj = (char *)&in6_subj;
 			if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj))
 				break;
 
 			/*
 			 * XXX if we are to allow other cases, we should really
 			 * be careful about scope here.
 			 * basically, we should disallow queries toward IPv6
 			 * destination X with subject Y,
 			 * if scope(X) > scope(Y).
 			 * if we allow scope(X) > scope(Y), it will result in
 			 * information leakage across scope boundary.
 			 */
 			goto bad;
 
 		case ICMP6_NI_SUBJ_FQDN:
 			/*
 			 * Validate Subject name with gethostname(3).
 			 *
 			 * The behavior may need some debate, since:
 			 * - we are not sure if the node has FQDN as
 			 *   hostname (returned by gethostname(3)).
 			 * - the code does wildcard match for truncated names.
 			 *   however, we are not sure if we want to perform
 			 *   wildcard match, if gethostname(3) side has
 			 *   truncated hostname.
 			 */
 			mtx_lock(&pr->pr_mtx);
 			n = ni6_nametodns(pr->pr_hostname,
 			    strlen(pr->pr_hostname), 0);
 			mtx_unlock(&pr->pr_mtx);
 			if (!n || n->m_next || n->m_len == 0)
 				goto bad;
 			if (m->m_len < off + sizeof(struct icmp6_nodeinfo) +
 			    subjlen) {
 				m = m_pullup(m, off +
 				    sizeof(struct icmp6_nodeinfo) + subjlen);
 				if (m == NULL) {
 					IP6STAT_INC(ip6s_exthdrtoolong);
 					goto bad;
 				}
 			}
 			/* ip6 possibly invalid but not used after. */
 			ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off);
 			subj = (char *)(mtod(m, caddr_t) + off +
 			    sizeof(struct icmp6_nodeinfo));
 			if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *),
 			    n->m_len)) {
 				goto bad;
 			}
 			m_freem(n);
 			n = NULL;
 			break;
 
 		case ICMP6_NI_SUBJ_IPV4:	/* XXX: to be implemented? */
 		default:
 			goto bad;
 		}
 		break;
 	}
 
 	/* refuse based on configuration.  XXX ICMP6_NI_REFUSED? */
 	switch (qtype) {
 	case NI_QTYPE_FQDN:
 		if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_FQDNOK) == 0)
 			goto bad;
 		break;
 	case NI_QTYPE_NODEADDR:
 	case NI_QTYPE_IPV4ADDR:
 		if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_NODEADDROK) == 0)
 			goto bad;
 		break;
 	}
 
 	/* guess reply length */
 	switch (qtype) {
 	case NI_QTYPE_NOOP:
 		break;		/* no reply data */
 	case NI_QTYPE_SUPTYPES:
 		replylen += sizeof(u_int32_t);
 		break;
 	case NI_QTYPE_FQDN:
 		/* XXX will append an mbuf */
 		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
 		break;
 	case NI_QTYPE_NODEADDR:
 		addrs = ni6_addrs(ni6, m, &ifp, (struct in6_addr *)subj);
 		if ((replylen += addrs * (sizeof(struct in6_addr) +
 		    sizeof(u_int32_t))) > MCLBYTES)
 			replylen = MCLBYTES; /* XXX: will truncate pkt later */
 		break;
 	case NI_QTYPE_IPV4ADDR:
 		/* unsupported - should respond with unknown Qtype? */
 		break;
 	default:
 		/*
 		 * XXX: We must return a reply with the ICMP6 code
 		 * `unknown Qtype' in this case.  However we regard the case
 		 * as an FQDN query for backward compatibility.
 		 * Older versions set a random value to this field,
 		 * so it rarely varies in the defined qtypes.
 		 * But the mechanism is not reliable...
 		 * maybe we should obsolete older versions.
 		 */
 		qtype = NI_QTYPE_FQDN;
 		/* XXX will append an mbuf */
 		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
 		oldfqdn++;
 		break;
 	}
 
 	/* Allocate an mbuf to reply. */
 	if (replylen > MCLBYTES) {
 		/*
 		 * XXX: should we try to allocate more? But MCLBYTES
 		 * is probably much larger than IPV6_MMTU...
 		 */
 		goto bad;
 	}
 	if (replylen > MHLEN)
 		n = m_getcl(M_NOWAIT, m->m_type, M_PKTHDR);
 	else
 		n = m_gethdr(M_NOWAIT, m->m_type);
 	if (n == NULL) {
 		m_freem(m);
 		return (NULL);
 	}
 	m_move_pkthdr(n, m); /* just for recvif and FIB */
 	n->m_pkthdr.len = n->m_len = replylen;
 
 	/* copy mbuf header and IPv6 + Node Information base headers */
 	bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr));
 	nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1);
 	bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo));
 
 	/* qtype dependent procedure */
 	switch (qtype) {
 	case NI_QTYPE_NOOP:
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		nni6->ni_flags = 0;
 		break;
 	case NI_QTYPE_SUPTYPES:
 	{
 		u_int32_t v;
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		nni6->ni_flags = htons(0x0000);	/* raw bitmap */
 		/* supports NOOP, SUPTYPES, FQDN, and NODEADDR */
 		v = (u_int32_t)htonl(0x0000000f);
 		bcopy(&v, nni6 + 1, sizeof(u_int32_t));
 		break;
 	}
 	case NI_QTYPE_FQDN:
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) +
 		    sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo));
 		nni6->ni_flags = 0; /* XXX: meaningless TTL */
 		fqdn->ni_fqdn_ttl = 0;	/* ditto. */
 		/*
 		 * XXX do we really have FQDN in hostname?
 		 */
 		mtx_lock(&pr->pr_mtx);
 		n->m_next = ni6_nametodns(pr->pr_hostname,
 		    strlen(pr->pr_hostname), oldfqdn);
 		mtx_unlock(&pr->pr_mtx);
 		if (n->m_next == NULL)
 			goto bad;
 		/* XXX we assume that n->m_next is not a chain */
 		if (n->m_next->m_next != NULL)
 			goto bad;
 		n->m_pkthdr.len += n->m_next->m_len;
 		break;
 	case NI_QTYPE_NODEADDR:
 	{
 		int lenlim, copied;
 
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		n->m_pkthdr.len = n->m_len =
 		    sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
 		lenlim = M_TRAILINGSPACE(n);
 		copied = ni6_store_addrs(ni6, nni6, ifp, lenlim);
 		/* XXX: reset mbuf length */
 		n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
 		    sizeof(struct icmp6_nodeinfo) + copied;
 		break;
 	}
 	default:
 		break;		/* XXX impossible! */
 	}
 
 	nni6->ni_type = ICMP6_NI_REPLY;
 	m_freem(m);
 	return (n);
 
   bad:
 	m_freem(m);
 	if (n)
 		m_freem(n);
 	return (NULL);
 }
 
 /*
  * make a mbuf with DNS-encoded string.  no compression support.
  *
  * XXX names with less than 2 dots (like "foo" or "foo.section") will be
  * treated as truncated name (two \0 at the end).  this is a wild guess.
  *
  * old - return pascal string if non-zero
  */
 static struct mbuf *
 ni6_nametodns(const char *name, int namelen, int old)
 {
 	struct mbuf *m;
 	char *cp, *ep;
 	const char *p, *q;
 	int i, len, nterm;
 
 	if (old)
 		len = namelen + 1;
 	else
 		len = MCLBYTES;
 
 	/* Because MAXHOSTNAMELEN is usually 256, we use cluster mbuf. */
 	if (len > MLEN)
 		m = m_getcl(M_NOWAIT, MT_DATA, 0);
 	else
 		m = m_get(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		goto fail;
 
 	if (old) {
 		m->m_len = len;
 		*mtod(m, char *) = namelen;
 		bcopy(name, mtod(m, char *) + 1, namelen);
 		return m;
 	} else {
 		m->m_len = 0;
 		cp = mtod(m, char *);
 		ep = mtod(m, char *) + M_TRAILINGSPACE(m);
 
 		/* if not certain about my name, return empty buffer */
 		if (namelen == 0)
 			return m;
 
 		/*
 		 * guess if it looks like shortened hostname, or FQDN.
 		 * shortened hostname needs two trailing "\0".
 		 */
 		i = 0;
 		for (p = name; p < name + namelen; p++) {
 			if (*p && *p == '.')
 				i++;
 		}
 		if (i < 2)
 			nterm = 2;
 		else
 			nterm = 1;
 
 		p = name;
 		while (cp < ep && p < name + namelen) {
 			i = 0;
 			for (q = p; q < name + namelen && *q && *q != '.'; q++)
 				i++;
 			/* result does not fit into mbuf */
 			if (cp + i + 1 >= ep)
 				goto fail;
 			/*
 			 * DNS label length restriction, RFC1035 page 8.
 			 * "i == 0" case is included here to avoid returning
 			 * 0-length label on "foo..bar".
 			 */
 			if (i <= 0 || i >= 64)
 				goto fail;
 			*cp++ = i;
 			bcopy(p, cp, i);
 			cp += i;
 			p = q;
 			if (p < name + namelen && *p == '.')
 				p++;
 		}
 		/* termination */
 		if (cp + nterm >= ep)
 			goto fail;
 		while (nterm-- > 0)
 			*cp++ = '\0';
 		m->m_len = cp - mtod(m, char *);
 		return m;
 	}
 
 	panic("should not reach here");
 	/* NOTREACHED */
 
  fail:
 	if (m)
 		m_freem(m);
 	return NULL;
 }
 
 /*
  * check if two DNS-encoded string matches.  takes care of truncated
  * form (with \0\0 at the end).  no compression support.
  * XXX upper/lowercase match (see RFC2065)
  */
 static int
 ni6_dnsmatch(const char *a, int alen, const char *b, int blen)
 {
 	const char *a0, *b0;
 	int l;
 
 	/* simplest case - need validation? */
 	if (alen == blen && bcmp(a, b, alen) == 0)
 		return 1;
 
 	a0 = a;
 	b0 = b;
 
 	/* termination is mandatory */
 	if (alen < 2 || blen < 2)
 		return 0;
 	if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0')
 		return 0;
 	alen--;
 	blen--;
 
 	while (a - a0 < alen && b - b0 < blen) {
 		if (a - a0 + 1 > alen || b - b0 + 1 > blen)
 			return 0;
 
 		if ((signed char)a[0] < 0 || (signed char)b[0] < 0)
 			return 0;
 		/* we don't support compression yet */
 		if (a[0] >= 64 || b[0] >= 64)
 			return 0;
 
 		/* truncated case */
 		if (a[0] == 0 && a - a0 == alen - 1)
 			return 1;
 		if (b[0] == 0 && b - b0 == blen - 1)
 			return 1;
 		if (a[0] == 0 || b[0] == 0)
 			return 0;
 
 		if (a[0] != b[0])
 			return 0;
 		l = a[0];
 		if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen)
 			return 0;
 		if (bcmp(a + 1, b + 1, l) != 0)
 			return 0;
 
 		a += 1 + l;
 		b += 1 + l;
 	}
 
 	if (a - a0 == alen && b - b0 == blen)
 		return 1;
 	else
 		return 0;
 }
 
 /*
  * calculate the number of addresses to be returned in the node info reply.
  */
 static int
 ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp,
     struct in6_addr *subj)
 {
 	struct ifnet *ifp;
 	struct in6_ifaddr *ifa6;
 	struct ifaddr *ifa;
 	int addrs = 0, addrsofif, iffound = 0;
 	int niflags = ni6->ni_flags;
 
 	NET_EPOCH_ASSERT();
 
 	if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) {
 		switch (ni6->ni_code) {
 		case ICMP6_NI_SUBJ_IPV6:
 			if (subj == NULL) /* must be impossible... */
 				return (0);
 			break;
 		default:
 			/*
 			 * XXX: we only support IPv6 subject address for
 			 * this Qtype.
 			 */
 			return (0);
 		}
 	}
 
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		addrsofif = 0;
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			ifa6 = (struct in6_ifaddr *)ifa;
 
 			if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 &&
 			    IN6_ARE_ADDR_EQUAL(subj, &ifa6->ia_addr.sin6_addr))
 				iffound = 1;
 
 			/*
 			 * IPv4-mapped addresses can only be returned by a
 			 * Node Information proxy, since they represent
 			 * addresses of IPv4-only nodes, which perforce do
 			 * not implement this protocol.
 			 * [icmp-name-lookups-07, Section 5.4]
 			 * So we don't support NI_NODEADDR_FLAG_COMPAT in
 			 * this function at this moment.
 			 */
 
 			/* What do we have to do about ::1? */
 			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
 			case IPV6_ADDR_SCOPE_LINKLOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_SITELOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_GLOBAL:
 				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
 					continue;
 				break;
 			default:
 				continue;
 			}
 
 			/*
 			 * check if anycast is okay.
 			 * XXX: just experimental.  not in the spec.
 			 */
 			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
 			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
 				continue; /* we need only unicast addresses */
 			if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 			    (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) {
 				continue;
 			}
 			addrsofif++; /* count the address */
 		}
 		if (iffound) {
 			*ifpp = ifp;
 			return (addrsofif);
 		}
 
 		addrs += addrsofif;
 	}
 
 	return (addrs);
 }
 
 static int
 ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6,
     struct ifnet *ifp0, int resid)
 {
 	struct ifnet *ifp;
 	struct in6_ifaddr *ifa6;
 	struct ifaddr *ifa;
 	struct ifnet *ifp_dep = NULL;
 	int copied = 0, allow_deprecated = 0;
 	u_char *cp = (u_char *)(nni6 + 1);
 	int niflags = ni6->ni_flags;
 	u_int32_t ltime;
 
 	NET_EPOCH_ASSERT();
 
 	if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL))
 		return (0);	/* needless to copy */
 
 	ifp = ifp0 ? ifp0 : CK_STAILQ_FIRST(&V_ifnet);
   again:
 
 	for (; ifp; ifp = CK_STAILQ_NEXT(ifp, if_link)) {
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			ifa6 = (struct in6_ifaddr *)ifa;
 
 			if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 &&
 			    allow_deprecated == 0) {
 				/*
 				 * prefererred address should be put before
 				 * deprecated addresses.
 				 */
 
 				/* record the interface for later search */
 				if (ifp_dep == NULL)
 					ifp_dep = ifp;
 
 				continue;
 			} else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 &&
 			    allow_deprecated != 0)
 				continue; /* we now collect deprecated addrs */
 
 			/* What do we have to do about ::1? */
 			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
 			case IPV6_ADDR_SCOPE_LINKLOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_SITELOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_GLOBAL:
 				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
 					continue;
 				break;
 			default:
 				continue;
 			}
 
 			/*
 			 * check if anycast is okay.
 			 * XXX: just experimental.  not in the spec.
 			 */
 			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
 			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
 				continue;
 			if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 			    (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) {
 				continue;
 			}
 
 			/* now we can copy the address */
 			if (resid < sizeof(struct in6_addr) +
 			    sizeof(u_int32_t)) {
 				/*
 				 * We give up much more copy.
 				 * Set the truncate flag and return.
 				 */
 				nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE;
 				return (copied);
 			}
 
 			/*
 			 * Set the TTL of the address.
 			 * The TTL value should be one of the following
 			 * according to the specification:
 			 *
 			 * 1. The remaining lifetime of a DHCP lease on the
 			 *    address, or
 			 * 2. The remaining Valid Lifetime of a prefix from
 			 *    which the address was derived through Stateless
 			 *    Autoconfiguration.
 			 *
 			 * Note that we currently do not support stateful
 			 * address configuration by DHCPv6, so the former
 			 * case can't happen.
 			 */
 			if (ifa6->ia6_lifetime.ia6t_expire == 0)
 				ltime = ND6_INFINITE_LIFETIME;
 			else {
 				if (ifa6->ia6_lifetime.ia6t_expire >
 				    time_uptime)
 					ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_uptime);
 				else
 					ltime = 0;
 			}
 
 			bcopy(&ltime, cp, sizeof(u_int32_t));
 			cp += sizeof(u_int32_t);
 
 			/* copy the address itself */
 			bcopy(&ifa6->ia_addr.sin6_addr, cp,
 			    sizeof(struct in6_addr));
 			in6_clearscope((struct in6_addr *)cp); /* XXX */
 			cp += sizeof(struct in6_addr);
 
 			resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
 			copied += (sizeof(struct in6_addr) + sizeof(u_int32_t));
 		}
 		if (ifp0)	/* we need search only on the specified IF */
 			break;
 	}
 
 	if (allow_deprecated == 0 && ifp_dep != NULL) {
 		ifp = ifp_dep;
 		allow_deprecated = 1;
 
 		goto again;
 	}
 
 	return (copied);
 }
 
 /*
  * XXX almost dup'ed code with rip6_input.
  */
 static int
 icmp6_rip6_input(struct mbuf **mp, int off)
 {
 	struct mbuf *m = *mp;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct inpcb *inp;
 	struct inpcb *last = NULL;
 	struct sockaddr_in6 fromsa;
 	struct icmp6_hdr *icmp6;
 	struct mbuf *opts = NULL;
 
 	NET_EPOCH_ASSERT();
 
 	/* This is assumed to be safe; icmp6_input() does a pullup. */
 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
 
 	/*
 	 * XXX: the address may have embedded scope zone ID, which should be
 	 * hidden from applications.
 	 */
 	bzero(&fromsa, sizeof(fromsa));
 	fromsa.sin6_family = AF_INET6;
 	fromsa.sin6_len = sizeof(struct sockaddr_in6);
 	fromsa.sin6_addr = ip6->ip6_src;
 	if (sa6_recoverscope(&fromsa)) {
 		m_freem(m);
 		*mp = NULL;
 		return (IPPROTO_DONE);
 	}
 
 	CK_LIST_FOREACH(inp, &V_ripcb, inp_list) {
 		if ((inp->inp_vflag & INP_IPV6) == 0)
 			continue;
 		if (inp->inp_ip_p != IPPROTO_ICMPV6)
 			continue;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
 		   !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst))
 			continue;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
 		   !IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src))
 			continue;
 		INP_RLOCK(inp);
 		if (__predict_false(inp->inp_flags2 & INP_FREED)) {
 			INP_RUNLOCK(inp);
 			continue;
 		}
 		if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
 		    inp->in6p_icmp6filt)) {
 			INP_RUNLOCK(inp);
 			continue;
 		}
 		if (last != NULL) {
 			struct	mbuf *n = NULL;
 
 			/*
 			 * Recent network drivers tend to allocate a single
 			 * mbuf cluster, rather than to make a couple of
 			 * mbufs without clusters.  Also, since the IPv6 code
 			 * path tries to avoid m_pullup(), it is highly
 			 * probable that we still have an mbuf cluster here
 			 * even though the necessary length can be stored in an
 			 * mbuf's internal buffer.
 			 * Meanwhile, the default size of the receive socket
 			 * buffer for raw sockets is not so large.  This means
 			 * the possibility of packet loss is relatively higher
 			 * than before.  To avoid this scenario, we copy the
 			 * received data to a separate mbuf that does not use
 			 * a cluster, if possible.
 			 * XXX: it is better to copy the data after stripping
 			 * intermediate headers.
 			 */
 			if ((m->m_flags & M_EXT) && m->m_next == NULL &&
 			    m->m_len <= MHLEN) {
 				n = m_get(M_NOWAIT, m->m_type);
 				if (n != NULL) {
 					if (m_dup_pkthdr(n, m, M_NOWAIT)) {
 						bcopy(m->m_data, n->m_data,
 						      m->m_len);
 						n->m_len = m->m_len;
 					} else {
 						m_free(n);
 						n = NULL;
 					}
 				}
 			}
 			if (n != NULL ||
 			    (n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
 				if (last->inp_flags & INP_CONTROLOPTS)
 					ip6_savecontrol(last, n, &opts);
 				/* strip intermediate headers */
 				m_adj(n, off);
 				SOCKBUF_LOCK(&last->inp_socket->so_rcv);
 				if (sbappendaddr_locked(
 				    &last->inp_socket->so_rcv,
 				    (struct sockaddr *)&fromsa, n, opts)
 				    == 0) {
 					/* should notify about lost packet */
 					m_freem(n);
 					if (opts) {
 						m_freem(opts);
 					}
 					SOCKBUF_UNLOCK(
 					    &last->inp_socket->so_rcv);
 				} else
 					sorwakeup_locked(last->inp_socket);
 				opts = NULL;
 			}
 			INP_RUNLOCK(last);
 		}
 		last = inp;
 	}
 	if (last != NULL) {
 		if (last->inp_flags & INP_CONTROLOPTS)
 			ip6_savecontrol(last, m, &opts);
 		/* strip intermediate headers */
 		m_adj(m, off);
 
 		/* avoid using mbuf clusters if possible (see above) */
 		if ((m->m_flags & M_EXT) && m->m_next == NULL &&
 		    m->m_len <= MHLEN) {
 			struct mbuf *n;
 
 			n = m_get(M_NOWAIT, m->m_type);
 			if (n != NULL) {
 				if (m_dup_pkthdr(n, m, M_NOWAIT)) {
 					bcopy(m->m_data, n->m_data, m->m_len);
 					n->m_len = m->m_len;
 
 					m_freem(m);
 					m = n;
 				} else {
 					m_freem(n);
 					n = NULL;
 				}
 			}
 		}
 		SOCKBUF_LOCK(&last->inp_socket->so_rcv);
 		if (sbappendaddr_locked(&last->inp_socket->so_rcv,
 		    (struct sockaddr *)&fromsa, m, opts) == 0) {
 			m_freem(m);
 			if (opts)
 				m_freem(opts);
 			SOCKBUF_UNLOCK(&last->inp_socket->so_rcv);
 		} else
 			sorwakeup_locked(last->inp_socket);
 		INP_RUNLOCK(last);
 	} else {
 		m_freem(m);
 		IP6STAT_DEC(ip6s_delivered);
 	}
 	*mp = NULL;
 	return (IPPROTO_DONE);
 }
 
 /*
  * Reflect the ip6 packet back to the source.
  * OFF points to the icmp6 header, counted from the top of the mbuf.
  */
 static void
 icmp6_reflect(struct mbuf *m, size_t off)
 {
 	struct in6_addr src6, *srcp;
 	struct ip6_hdr *ip6;
 	struct icmp6_hdr *icmp6;
 	struct in6_ifaddr *ia = NULL;
 	struct ifnet *outif = NULL;
 	int plen;
 	int type, code, hlim;
 
 	/* too short to reflect */
 	if (off < sizeof(struct ip6_hdr)) {
 		nd6log((LOG_DEBUG,
 		    "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
 		    (u_long)off, (u_long)sizeof(struct ip6_hdr),
 		    __FILE__, __LINE__));
 		goto bad;
 	}
 
 	/*
 	 * If there are extra headers between IPv6 and ICMPv6, strip
 	 * off that header first.
 	 */
 #ifdef DIAGNOSTIC
 	if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN)
 		panic("assumption failed in icmp6_reflect");
 #endif
 	if (off > sizeof(struct ip6_hdr)) {
 		size_t l;
 		struct ip6_hdr nip6;
 
 		l = off - sizeof(struct ip6_hdr);
 		m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6);
 		m_adj(m, l);
 		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
 		if (m->m_len < l) {
 			if ((m = m_pullup(m, l)) == NULL)
 				return;
 		}
 		bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6));
 	} else /* off == sizeof(struct ip6_hdr) */ {
 		size_t l;
 		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
 		if (m->m_len < l) {
 			if ((m = m_pullup(m, l)) == NULL)
 				return;
 		}
 	}
 	plen = m->m_pkthdr.len - sizeof(struct ip6_hdr);
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	icmp6 = (struct icmp6_hdr *)(ip6 + 1);
 	type = icmp6->icmp6_type; /* keep type for statistics */
 	code = icmp6->icmp6_code; /* ditto. */
 	hlim = 0;
 	srcp = NULL;
 
 	/*
 	 * If the incoming packet was addressed directly to us (i.e. unicast),
 	 * use dst as the src for the reply.
 	 * The IN6_IFF_NOTREADY case should be VERY rare, but is possible
 	 * (for example) when we encounter an error while forwarding procedure
 	 * destined to a duplicated address of ours.
 	 */
 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
 		if (ia != NULL && !(ia->ia6_flags &
 		    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) {
 			src6 = ia->ia_addr.sin6_addr;
 			srcp = &src6;
 
 			if (m->m_pkthdr.rcvif != NULL) {
 				/* XXX: This may not be the outgoing interface */
 				hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim;
 			} else
 				hlim = V_ip6_defhlim;
 		}
 		if (ia != NULL)
 			ifa_free(&ia->ia_ifa);
 	}
 
 	if (srcp == NULL) {
 		int error;
 		struct in6_addr dst6;
 		uint32_t scopeid;
 
 		/*
 		 * This case matches to multicasts, our anycast, or unicasts
 		 * that we do not own.  Select a source address based on the
 		 * source address of the erroneous packet.
 		 */
 		in6_splitscope(&ip6->ip6_src, &dst6, &scopeid);
 		error = in6_selectsrc_addr(M_GETFIB(m), &dst6,
 		    scopeid, NULL, &src6, &hlim);
 
 		if (error) {
 			char ip6buf[INET6_ADDRSTRLEN];
 			nd6log((LOG_DEBUG,
 			    "icmp6_reflect: source can't be determined: "
 			    "dst=%s, error=%d\n",
 			    ip6_sprintf(ip6buf, &ip6->ip6_dst), error));
 			goto bad;
 		}
 		srcp = &src6;
 	}
 	/*
 	 * ip6_input() drops a packet if its src is multicast.
 	 * So, the src is never multicast.
 	 */
 	ip6->ip6_dst = ip6->ip6_src;
 	ip6->ip6_src = *srcp;
 	ip6->ip6_flow = 0;
 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
 	ip6->ip6_vfc |= IPV6_VERSION;
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	ip6->ip6_hlim = hlim;
 
 	icmp6->icmp6_cksum = 0;
 	icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
 	    sizeof(struct ip6_hdr), plen);
 
 	/*
 	 * XXX option handling
 	 */
 
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 	m->m_pkthdr.rcvif = NULL;
 	ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
 	if (outif)
 		icmp6_ifoutstat_inc(outif, type, code);
 
 	return;
 
  bad:
 	m_freem(m);
 	return;
 }
 
 void
 icmp6_fasttimo(void)
 {
 
 	mld_fasttimo();
 }
 
 void
 icmp6_slowtimo(void)
 {
 
 	mld_slowtimo();
 }
 
 static const char *
 icmp6_redirect_diag(struct in6_addr *src6, struct in6_addr *dst6,
     struct in6_addr *tgt6)
 {
 	static char buf[1024];
 	char ip6bufs[INET6_ADDRSTRLEN];
 	char ip6bufd[INET6_ADDRSTRLEN];
 	char ip6buft[INET6_ADDRSTRLEN];
 	snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)",
 	    ip6_sprintf(ip6bufs, src6), ip6_sprintf(ip6bufd, dst6),
 	    ip6_sprintf(ip6buft, tgt6));
 	return buf;
 }
 
 void
 icmp6_redirect_input(struct mbuf *m, int off)
 {
 	struct ifnet *ifp;
 	struct ip6_hdr *ip6;
 	struct nd_redirect *nd_rd;
 	struct in6_addr src6, redtgt6, reddst6;
 	union nd_opts ndopts;
 	char ip6buf[INET6_ADDRSTRLEN];
 	char *lladdr;
 	int icmp6len, is_onlink, is_router, lladdrlen;
 
 	M_ASSERTPKTHDR(m);
 	KASSERT(m->m_pkthdr.rcvif != NULL, ("%s: no rcvif", __func__));
 
 	/* XXX if we are router, we don't update route by icmp6 redirect */
 	if (V_ip6_forwarding)
 		goto freeit;
 	if (!V_icmp6_rediraccept)
 		goto freeit;
 
 	/* RFC 6980: Nodes MUST silently ignore fragments */
 	if(m->m_flags & M_FRAGMENTED)
 		goto freeit;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	icmp6len = ntohs(ip6->ip6_plen);
 	if (m->m_len < off + icmp6len) {
 		m = m_pullup(m, off + icmp6len);
 		if (m == NULL) {
 			IP6STAT_INC(ip6s_exthdrtoolong);
 			return;
 		}
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 	nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off);
 
 	ifp = m->m_pkthdr.rcvif;
 	redtgt6 = nd_rd->nd_rd_target;
 	reddst6 = nd_rd->nd_rd_dst;
 
 	if (in6_setscope(&redtgt6, ifp, NULL) ||
 	    in6_setscope(&reddst6, ifp, NULL)) {
 		goto freeit;
 	}
 
 	/* validation */
 	src6 = ip6->ip6_src;
 	if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect sent from %s rejected; "
 		    "must be from linklocal\n",
 		    ip6_sprintf(ip6buf, &src6)));
 		goto bad;
 	}
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect sent from %s rejected; "
 		    "hlim=%d (must be 255)\n",
 		    ip6_sprintf(ip6buf, &src6), ip6->ip6_hlim));
 		goto bad;
 	}
     {
 	/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
-	struct nhop6_basic nh6;
+	struct nhop_object *nh;
 	struct in6_addr kdst;
 	uint32_t scopeid;
 
 	in6_splitscope(&reddst6, &kdst, &scopeid);
-	if (fib6_lookup_nh_basic(ifp->if_fib, &kdst, scopeid, 0, 0,&nh6)==0){
-		if ((nh6.nh_flags & NHF_GATEWAY) == 0) {
+	NET_EPOCH_ASSERT();
+	nh = fib6_lookup(ifp->if_fib, &kdst, scopeid, 0, 0);
+	if (nh == NULL) {
+		struct in6_addr nh_addr;
+		nh_addr = ifatoia6(nh->nh_ifa)->ia_addr.sin6_addr;
+		if ((nh->nh_flags & NHF_GATEWAY) == 0) {
 			nd6log((LOG_ERR,
 			    "ICMP6 redirect rejected; no route "
 			    "with inet6 gateway found for redirect dst: %s\n",
 			    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 			goto bad;
 		}
 
 		/*
-		 * Embed scope zone id into next hop address, since
-		 * fib6_lookup_nh_basic() returns address without embedded
-		 * scope zone id.
+		 * Embed scope zone id into next hop address.
 		 */
-		if (in6_setscope(&nh6.nh_addr, m->m_pkthdr.rcvif, NULL))
-			goto freeit;
+		nh_addr = nh->gw6_sa.sin6_addr;
 
-		if (IN6_ARE_ADDR_EQUAL(&src6, &nh6.nh_addr) == 0) {
+		if (IN6_ARE_ADDR_EQUAL(&src6, &nh_addr) == 0) {
 			nd6log((LOG_ERR,
 			    "ICMP6 redirect rejected; "
 			    "not equal to gw-for-src=%s (must be same): "
 			    "%s\n",
-			    ip6_sprintf(ip6buf, &nh6.nh_addr),
+			    ip6_sprintf(ip6buf, &nh_addr),
 			    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 			goto bad;
 		}
 	} else {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect rejected; "
 		    "no route found for redirect dst: %s\n",
 		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
     }
 	if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect rejected; "
 		    "redirect dst must be unicast: %s\n",
 		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 
 	is_router = is_onlink = 0;
 	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
 		is_router = 1;	/* router case */
 	if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
 		is_onlink = 1;	/* on-link destination case */
 	if (!is_router && !is_onlink) {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect rejected; "
 		    "neither router case nor onlink case: %s\n",
 		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 
 	icmp6len -= sizeof(*nd_rd);
 	nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
 	if (nd6_options(&ndopts) < 0) {
 		nd6log((LOG_INFO, "%s: invalid ND option, rejected: %s\n",
 		    __func__, icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		/* nd6_options have incremented stats */
 		goto freeit;
 	}
 
 	lladdr = NULL;
 	lladdrlen = 0;
 	if (ndopts.nd_opts_tgt_lladdr) {
 		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
 		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
 	}
 
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO, "%s: lladdrlen mismatch for %s "
 		    "(if %d, icmp6 packet %d): %s\n",
 		    __func__, ip6_sprintf(ip6buf, &redtgt6),
 		    ifp->if_addrlen, lladdrlen - 2,
 		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 
 	/* Validation passed. */
 
 	/* RFC 2461 8.3 */
 	nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
 	    is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
 
 	/*
 	 * Install a gateway route in the better-router case or an interface
 	 * route in the on-link-destination case.
 	 */
 	{
 		struct sockaddr_in6 sdst;
 		struct sockaddr_in6 sgw;
 		struct sockaddr_in6 ssrc;
 		struct sockaddr *gw;
 		int rt_flags;
 		u_int fibnum;
 
 		bzero(&sdst, sizeof(sdst));
 		bzero(&ssrc, sizeof(ssrc));
 		sdst.sin6_family = ssrc.sin6_family = AF_INET6;
 		sdst.sin6_len = ssrc.sin6_len = sizeof(struct sockaddr_in6);
 		bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
 		bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
 		rt_flags = 0;
 		if (is_router) {
 			bzero(&sgw, sizeof(sgw));
 			sgw.sin6_family = AF_INET6;
 			sgw.sin6_len = sizeof(struct sockaddr_in6);
 			bcopy(&redtgt6, &sgw.sin6_addr,
 				sizeof(struct in6_addr));
 			gw = (struct sockaddr *)&sgw;
 			rt_flags |= RTF_GATEWAY;
 		} else
 			gw = ifp->if_addr->ifa_addr;
 		for (fibnum = 0; fibnum < rt_numfibs; fibnum++)
 			rib_add_redirect(fibnum, (struct sockaddr *)&sdst, gw,
 			    (struct sockaddr *)&ssrc, ifp, rt_flags,
 			    V_icmp6_redirtimeout);
 	}
 	/* finally update cached route in each socket via pfctlinput */
     {
 	struct sockaddr_in6 sdst;
 
 	bzero(&sdst, sizeof(sdst));
 	sdst.sin6_family = AF_INET6;
 	sdst.sin6_len = sizeof(struct sockaddr_in6);
 	bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
 	pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
     }
 
  freeit:
 	m_freem(m);
 	return;
 
  bad:
 	ICMP6STAT_INC(icp6s_badredirect);
 	m_freem(m);
 }
 
 void
 icmp6_redirect_output(struct mbuf *m0, struct nhop_object *nh)
 {
 	struct ifnet *ifp;	/* my outgoing interface */
 	struct in6_addr *ifp_ll6;
 	struct in6_addr *router_ll6;
 	struct ip6_hdr *sip6;	/* m0 as struct ip6_hdr */
 	struct mbuf *m = NULL;	/* newly allocated one */
 	struct m_tag *mtag;
 	struct ip6_hdr *ip6;	/* m as struct ip6_hdr */
 	struct nd_redirect *nd_rd;
 	struct llentry *ln = NULL;
 	size_t maxlen;
 	u_char *p;
 	struct ifnet *outif = NULL;
 	struct sockaddr_in6 src_sa;
 
 	icmp6_errcount(ND_REDIRECT, 0);
 
 	/* if we are not router, we don't send icmp6 redirect */
 	if (!V_ip6_forwarding)
 		goto fail;
 
 	/* sanity check */
 	if (!m0 || !nh || !(NH_IS_VALID(nh)) || !(ifp = nh->nh_ifp))
 		goto fail;
 
 	/*
 	 * Address check:
 	 *  the source address must identify a neighbor, and
 	 *  the destination address must not be a multicast address
 	 *  [RFC 2461, sec 8.2]
 	 */
 	sip6 = mtod(m0, struct ip6_hdr *);
 	bzero(&src_sa, sizeof(src_sa));
 	src_sa.sin6_family = AF_INET6;
 	src_sa.sin6_len = sizeof(src_sa);
 	src_sa.sin6_addr = sip6->ip6_src;
 	if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
 		goto fail;
 	if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
 		goto fail;	/* what should we do here? */
 
 	/* rate limit */
 	if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
 		goto fail;
 
 	/*
 	 * Since we are going to append up to 1280 bytes (= IPV6_MMTU),
 	 * we almost always ask for an mbuf cluster for simplicity.
 	 * (MHLEN < IPV6_MMTU is almost always true)
 	 */
 #if IPV6_MMTU >= MCLBYTES
 # error assumption failed about IPV6_MMTU and MCLBYTES
 #endif
 	m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (m == NULL)
 		goto fail;
 	M_SETFIB(m, M_GETFIB(m0));
 	maxlen = M_TRAILINGSPACE(m);
 	maxlen = min(IPV6_MMTU, maxlen);
 	/* just for safety */
 	if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) +
 	    ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
 		goto fail;
 	}
 
 	{
 		/* get ip6 linklocal address for ifp(my outgoing interface). */
 		struct in6_ifaddr *ia;
 		if ((ia = in6ifa_ifpforlinklocal(ifp,
 						 IN6_IFF_NOTREADY|
 						 IN6_IFF_ANYCAST)) == NULL)
 			goto fail;
 		ifp_ll6 = &ia->ia_addr.sin6_addr;
 		/* XXXRW: reference released prematurely. */
 		ifa_free(&ia->ia_ifa);
 	}
 
 	/* get ip6 linklocal address for the router. */
 	if (nh->nh_flags & NHF_GATEWAY) {
 		struct sockaddr_in6 *sin6;
 		sin6 = &nh->gw6_sa;
 		router_ll6 = &sin6->sin6_addr;
 		if (!IN6_IS_ADDR_LINKLOCAL(router_ll6))
 			router_ll6 = (struct in6_addr *)NULL;
 	} else
 		router_ll6 = (struct in6_addr *)NULL;
 
 	/* ip6 */
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_flow = 0;
 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
 	ip6->ip6_vfc |= IPV6_VERSION;
 	/* ip6->ip6_plen will be set later */
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	ip6->ip6_hlim = 255;
 	/* ip6->ip6_src must be linklocal addr for my outgoing if. */
 	bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
 	bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));
 
 	/* ND Redirect */
 	nd_rd = (struct nd_redirect *)(ip6 + 1);
 	nd_rd->nd_rd_type = ND_REDIRECT;
 	nd_rd->nd_rd_code = 0;
 	nd_rd->nd_rd_reserved = 0;
 	if (nh->nh_flags & NHF_GATEWAY) {
 		/*
 		 * nd_rd->nd_rd_target must be a link-local address in
 		 * better router cases.
 		 */
 		if (!router_ll6)
 			goto fail;
 		bcopy(router_ll6, &nd_rd->nd_rd_target,
 		    sizeof(nd_rd->nd_rd_target));
 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
 		    sizeof(nd_rd->nd_rd_dst));
 	} else {
 		/* make sure redtgt == reddst */
 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
 		    sizeof(nd_rd->nd_rd_target));
 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
 		    sizeof(nd_rd->nd_rd_dst));
 	}
 
 	p = (u_char *)(nd_rd + 1);
 
 	if (!router_ll6)
 		goto nolladdropt;
 
 	{
 		/* target lladdr option */
 		int len;
 		struct nd_opt_hdr *nd_opt;
 		char *lladdr;
 
 		ln = nd6_lookup(router_ll6, 0, ifp);
 		if (ln == NULL)
 			goto nolladdropt;
 
 		len = sizeof(*nd_opt) + ifp->if_addrlen;
 		len = (len + 7) & ~7;	/* round by 8 */
 		/* safety check */
 		if (len + (p - (u_char *)ip6) > maxlen) 			
 			goto nolladdropt;
 
 		if (ln->la_flags & LLE_VALID) {
 			nd_opt = (struct nd_opt_hdr *)p;
 			nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
 			nd_opt->nd_opt_len = len >> 3;
 			lladdr = (char *)(nd_opt + 1);
 			bcopy(ln->ll_addr, lladdr, ifp->if_addrlen);
 			p += len;
 		}
 	}
 nolladdropt:
 	if (ln != NULL)
 		LLE_RUNLOCK(ln);
 		
 	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
 
 	/* just to be safe */
 #ifdef M_DECRYPTED	/*not openbsd*/
 	if (m0->m_flags & M_DECRYPTED)
 		goto noredhdropt;
 #endif
 	if (p - (u_char *)ip6 > maxlen)
 		goto noredhdropt;
 
 	{
 		/* redirected header option */
 		int len;
 		struct nd_opt_rd_hdr *nd_opt_rh;
 
 		/*
 		 * compute the maximum size for icmp6 redirect header option.
 		 * XXX room for auth header?
 		 */
 		len = maxlen - (p - (u_char *)ip6);
 		len &= ~7;
 
 		/* This is just for simplicity. */
 		if (m0->m_pkthdr.len != m0->m_len) {
 			if (m0->m_next) {
 				m_freem(m0->m_next);
 				m0->m_next = NULL;
 			}
 			m0->m_pkthdr.len = m0->m_len;
 		}
 
 		/*
 		 * Redirected header option spec (RFC2461 4.6.3) talks nothing
 		 * about padding/truncate rule for the original IP packet.
 		 * From the discussion on IPv6imp in Feb 1999,
 		 * the consensus was:
 		 * - "attach as much as possible" is the goal
 		 * - pad if not aligned (original size can be guessed by
 		 *   original ip6 header)
 		 * Following code adds the padding if it is simple enough,
 		 * and truncates if not.
 		 */
 		if (m0->m_next || m0->m_pkthdr.len != m0->m_len)
 			panic("assumption failed in %s:%d", __FILE__,
 			    __LINE__);
 
 		if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
 			/* not enough room, truncate */
 			m0->m_pkthdr.len = m0->m_len = len -
 			    sizeof(*nd_opt_rh);
 		} else {
 			/* enough room, pad or truncate */
 			size_t extra;
 
 			extra = m0->m_pkthdr.len % 8;
 			if (extra) {
 				/* pad if easy enough, truncate if not */
 				if (8 - extra <= M_TRAILINGSPACE(m0)) {
 					/* pad */
 					m0->m_len += (8 - extra);
 					m0->m_pkthdr.len += (8 - extra);
 				} else {
 					/* truncate */
 					m0->m_pkthdr.len -= extra;
 					m0->m_len -= extra;
 				}
 			}
 			len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
 			m0->m_pkthdr.len = m0->m_len = len -
 			    sizeof(*nd_opt_rh);
 		}
 
 		nd_opt_rh = (struct nd_opt_rd_hdr *)p;
 		bzero(nd_opt_rh, sizeof(*nd_opt_rh));
 		nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
 		nd_opt_rh->nd_opt_rh_len = len >> 3;
 		p += sizeof(*nd_opt_rh);
 		m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
 
 		/* connect m0 to m */
 		m_tag_delete_chain(m0, NULL);
 		m0->m_flags &= ~M_PKTHDR;
 		m->m_next = m0;
 		m->m_pkthdr.len = m->m_len + m0->m_len;
 		m0 = NULL;
 	}
 noredhdropt:;
 	if (m0) {
 		m_freem(m0);
 		m0 = NULL;
 	}
 
 	/* XXX: clear embedded link IDs in the inner header */
 	in6_clearscope(&sip6->ip6_src);
 	in6_clearscope(&sip6->ip6_dst);
 	in6_clearscope(&nd_rd->nd_rd_target);
 	in6_clearscope(&nd_rd->nd_rd_dst);
 
 	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
 
 	nd_rd->nd_rd_cksum = 0;
 	nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6,
 	    sizeof(*ip6), ntohs(ip6->ip6_plen));
 
         if (send_sendso_input_hook != NULL) {
 		mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short),
 			M_NOWAIT);
 		if (mtag == NULL)
 			goto fail;
 		*(unsigned short *)(mtag + 1) = nd_rd->nd_rd_type;
 		m_tag_prepend(m, mtag);
 	}
 
 	/* send the packet to outside... */
 	ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
 	if (outif) {
 		icmp6_ifstat_inc(outif, ifs6_out_msg);
 		icmp6_ifstat_inc(outif, ifs6_out_redirect);
 	}
 	ICMP6STAT_INC(icp6s_outhist[ND_REDIRECT]);
 
 	return;
 
 fail:
 	if (m)
 		m_freem(m);
 	if (m0)
 		m_freem(m0);
 }
 
 /*
  * ICMPv6 socket option processing.
  */
 int
 icmp6_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int error = 0;
 	int optlen;
 	struct inpcb *inp = sotoinpcb(so);
 	int level, op, optname;
 
 	if (sopt) {
 		level = sopt->sopt_level;
 		op = sopt->sopt_dir;
 		optname = sopt->sopt_name;
 		optlen = sopt->sopt_valsize;
 	} else
 		level = op = optname = optlen = 0;
 
 	if (level != IPPROTO_ICMPV6) {
 		return EINVAL;
 	}
 
 	switch (op) {
 	case PRCO_SETOPT:
 		switch (optname) {
 		case ICMP6_FILTER:
 		    {
 			struct icmp6_filter ic6f;
 
 			if (optlen != sizeof(ic6f)) {
 				error = EMSGSIZE;
 				break;
 			}
 			error = sooptcopyin(sopt, &ic6f, optlen, optlen);
 			if (error == 0) {
 				INP_WLOCK(inp);
 				*inp->in6p_icmp6filt = ic6f;
 				INP_WUNLOCK(inp);
 			}
 			break;
 		    }
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case PRCO_GETOPT:
 		switch (optname) {
 		case ICMP6_FILTER:
 		    {
 			struct icmp6_filter ic6f;
 
 			INP_RLOCK(inp);
 			ic6f = *inp->in6p_icmp6filt;
 			INP_RUNLOCK(inp);
 			error = sooptcopyout(sopt, &ic6f, sizeof(ic6f));
 			break;
 		    }
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * Perform rate limit check.
  * Returns 0 if it is okay to send the icmp6 packet.
  * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
  * limitation.
  *
  * XXX per-destination/type check necessary?
  *
  * dst - not used at this moment
  * type - not used at this moment
  * code - not used at this moment
  */
 static int
 icmp6_ratelimit(const struct in6_addr *dst, const int type,
     const int code)
 {
 	int ret;
 
 	ret = 0;	/* okay to send */
 
 	/* PPS limit */
 	if (!ppsratecheck(&V_icmp6errppslim_last, &V_icmp6errpps_count,
 	    V_icmp6errppslim)) {
 		/* The packet is subject to rate limit */
 		ret++;
 	}
 
 	return ret;
 }
Index: head/sys/netinet6/in6.c
===================================================================
--- head/sys/netinet6/in6.c	(revision 362899)
+++ head/sys/netinet6/in6.c	(revision 362900)
@@ -1,2557 +1,2557 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: in6.c,v 1.259 2002/01/21 11:37:50 keiichi Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in.c	8.2 (Berkeley) 11/15/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/errno.h>
 #include <sys/jail.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/systm.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/route.h>
+#include <net/route/nhop.h>
 #include <net/if_dl.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <net/if_llatbl.h>
 #include <netinet/if_ether.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_carp.h>
 
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/mld6_var.h>
 #include <netinet6/ip6_mroute.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_pcb.h>
 
 
 /*
  * struct in6_ifreq and struct ifreq must be type punnable for common members
  * of ifr_ifru to allow accessors to be shared.
  */
 _Static_assert(offsetof(struct in6_ifreq, ifr_ifru) ==
     offsetof(struct ifreq, ifr_ifru),
     "struct in6_ifreq and struct ifreq are not type punnable");
 
 VNET_DECLARE(int, icmp6_nodeinfo_oldmcprefix);
 #define V_icmp6_nodeinfo_oldmcprefix	VNET(icmp6_nodeinfo_oldmcprefix)
 
 /*
  * Definitions of some costant IP6 addresses.
  */
 const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
 const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
 const struct in6_addr in6addr_nodelocal_allnodes =
 	IN6ADDR_NODELOCAL_ALLNODES_INIT;
 const struct in6_addr in6addr_linklocal_allnodes =
 	IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 const struct in6_addr in6addr_linklocal_allrouters =
 	IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
 const struct in6_addr in6addr_linklocal_allv2routers =
 	IN6ADDR_LINKLOCAL_ALLV2ROUTERS_INIT;
 
 const struct in6_addr in6mask0 = IN6MASK0;
 const struct in6_addr in6mask32 = IN6MASK32;
 const struct in6_addr in6mask64 = IN6MASK64;
 const struct in6_addr in6mask96 = IN6MASK96;
 const struct in6_addr in6mask128 = IN6MASK128;
 
 const struct sockaddr_in6 sa6_any =
 	{ sizeof(sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0 };
 
 static int in6_notify_ifa(struct ifnet *, struct in6_ifaddr *,
 	struct in6_aliasreq *, int);
 static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *);
 
 static int in6_validate_ifra(struct ifnet *, struct in6_aliasreq *,
     struct in6_ifaddr *, int);
 static struct in6_ifaddr *in6_alloc_ifa(struct ifnet *,
     struct in6_aliasreq *, int flags);
 static int in6_update_ifa_internal(struct ifnet *, struct in6_aliasreq *,
     struct in6_ifaddr *, int, int);
 static int in6_broadcast_ifa(struct ifnet *, struct in6_aliasreq *,
     struct in6_ifaddr *, int);
 
 #define ifa2ia6(ifa)	((struct in6_ifaddr *)(ifa))
 #define ia62ifa(ia6)	(&((ia6)->ia_ifa))
 
 
 void
 in6_newaddrmsg(struct in6_ifaddr *ia, int cmd)
 {
 	struct rt_addrinfo info;
 	struct ifaddr *ifa;
 	struct sockaddr_dl gateway;
 	int fibnum;
 
 	ifa = &ia->ia_ifa;
 
 	/*
 	 * Prepare info data for the host route.
 	 * This code mimics one from ifa_maintain_loopback_route().
 	 */
 	bzero(&info, sizeof(struct rt_addrinfo));
 	info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED;
 	info.rti_info[RTAX_DST] = ifa->ifa_addr;
 	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gateway;
 	link_init_sdl(ifa->ifa_ifp, (struct sockaddr *)&gateway, ifa->ifa_ifp->if_type);
 	if (cmd != RTM_DELETE)
 		info.rti_ifp = V_loif;
 
 
 	fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS : ia62ifa(ia)->ifa_ifp->if_fib;
 
 	if (cmd == RTM_ADD) {
 		rt_addrmsg(cmd, &ia->ia_ifa, fibnum);
 		rt_routemsg_info(cmd, &info, fibnum);
 	} else if (cmd == RTM_DELETE) {
 		rt_routemsg_info(cmd, &info, fibnum);
 		rt_addrmsg(cmd, &ia->ia_ifa, fibnum);
 	}
 }
 
 int
 in6_mask2len(struct in6_addr *mask, u_char *lim0)
 {
 	int x = 0, y;
 	u_char *lim = lim0, *p;
 
 	/* ignore the scope_id part */
 	if (lim0 == NULL || lim0 - (u_char *)mask > sizeof(*mask))
 		lim = (u_char *)mask + sizeof(*mask);
 	for (p = (u_char *)mask; p < lim; x++, p++) {
 		if (*p != 0xff)
 			break;
 	}
 	y = 0;
 	if (p < lim) {
 		for (y = 0; y < 8; y++) {
 			if ((*p & (0x80 >> y)) == 0)
 				break;
 		}
 	}
 
 	/*
 	 * when the limit pointer is given, do a stricter check on the
 	 * remaining bits.
 	 */
 	if (p < lim) {
 		if (y != 0 && (*p & (0x00ff >> y)) != 0)
 			return (-1);
 		for (p = p + 1; p < lim; p++)
 			if (*p != 0)
 				return (-1);
 	}
 
 	return x * 8 + y;
 }
 
 #ifdef COMPAT_FREEBSD32
 struct in6_ndifreq32 {
 	char ifname[IFNAMSIZ];
 	uint32_t ifindex;
 };
 #define	SIOCGDEFIFACE32_IN6	_IOWR('i', 86, struct in6_ndifreq32)
 #endif
 
 int
 in6_control(struct socket *so, u_long cmd, caddr_t data,
     struct ifnet *ifp, struct thread *td)
 {
 	struct	in6_ifreq *ifr = (struct in6_ifreq *)data;
 	struct	in6_ifaddr *ia = NULL;
 	struct	in6_aliasreq *ifra = (struct in6_aliasreq *)data;
 	struct sockaddr_in6 *sa6;
 	int carp_attached = 0;
 	int error;
 	u_long ocmd = cmd;
 
 	/*
 	 * Compat to make pre-10.x ifconfig(8) operable.
 	 */
 	if (cmd == OSIOCAIFADDR_IN6)
 		cmd = SIOCAIFADDR_IN6;
 
 	switch (cmd) {
 	case SIOCGETSGCNT_IN6:
 	case SIOCGETMIFCNT_IN6:
 		/*
 		 * XXX mrt_ioctl has a 3rd, unused, FIB argument in route.c.
 		 * We cannot see how that would be needed, so do not adjust the
 		 * KPI blindly; more likely should clean up the IPv4 variant.
 		 */
 		return (mrt6_ioctl ? mrt6_ioctl(cmd, data) : EOPNOTSUPP);
 	}
 
 	switch (cmd) {
 	case SIOCAADDRCTL_POLICY:
 	case SIOCDADDRCTL_POLICY:
 		if (td != NULL) {
 			error = priv_check(td, PRIV_NETINET_ADDRCTRL6);
 			if (error)
 				return (error);
 		}
 		return (in6_src_ioctl(cmd, data));
 	}
 
 	if (ifp == NULL)
 		return (EOPNOTSUPP);
 
 	switch (cmd) {
 	case SIOCSNDFLUSH_IN6:
 	case SIOCSPFXFLUSH_IN6:
 	case SIOCSRTRFLUSH_IN6:
 	case SIOCSDEFIFACE_IN6:
 	case SIOCSIFINFO_FLAGS:
 	case SIOCSIFINFO_IN6:
 		if (td != NULL) {
 			error = priv_check(td, PRIV_NETINET_ND6);
 			if (error)
 				return (error);
 		}
 		/* FALLTHROUGH */
 	case OSIOCGIFINFO_IN6:
 	case SIOCGIFINFO_IN6:
 	case SIOCGNBRINFO_IN6:
 	case SIOCGDEFIFACE_IN6:
 		return (nd6_ioctl(cmd, data, ifp));
 
 #ifdef COMPAT_FREEBSD32
 	case SIOCGDEFIFACE32_IN6:
 		{
 			struct in6_ndifreq ndif;
 			struct in6_ndifreq32 *ndif32;
 
 			error = nd6_ioctl(SIOCGDEFIFACE_IN6, (caddr_t)&ndif,
 			    ifp);
 			if (error)
 				return (error);
 			ndif32 = (struct in6_ndifreq32 *)data;
 			ndif32->ifindex = ndif.ifindex;
 			return (0);
 		}
 #endif
 	}
 
 	switch (cmd) {
 	case SIOCSIFPREFIX_IN6:
 	case SIOCDIFPREFIX_IN6:
 	case SIOCAIFPREFIX_IN6:
 	case SIOCCIFPREFIX_IN6:
 	case SIOCSGIFPREFIX_IN6:
 	case SIOCGIFPREFIX_IN6:
 		log(LOG_NOTICE,
 		    "prefix ioctls are now invalidated. "
 		    "please use ifconfig.\n");
 		return (EOPNOTSUPP);
 	}
 
 	switch (cmd) {
 	case SIOCSSCOPE6:
 		if (td != NULL) {
 			error = priv_check(td, PRIV_NETINET_SCOPE6);
 			if (error)
 				return (error);
 		}
 		/* FALLTHROUGH */
 	case SIOCGSCOPE6:
 	case SIOCGSCOPE6DEF:
 		return (scope6_ioctl(cmd, data, ifp));
 	}
 
 	/*
 	 * Find address for this interface, if it exists.
 	 *
 	 * In netinet code, we have checked ifra_addr in SIOCSIF*ADDR operation
 	 * only, and used the first interface address as the target of other
 	 * operations (without checking ifra_addr).  This was because netinet
 	 * code/API assumed at most 1 interface address per interface.
 	 * Since IPv6 allows a node to assign multiple addresses
 	 * on a single interface, we almost always look and check the
 	 * presence of ifra_addr, and reject invalid ones here.
 	 * It also decreases duplicated code among SIOC*_IN6 operations.
 	 */
 	switch (cmd) {
 	case SIOCAIFADDR_IN6:
 	case SIOCSIFPHYADDR_IN6:
 		sa6 = &ifra->ifra_addr;
 		break;
 	case SIOCSIFADDR_IN6:
 	case SIOCGIFADDR_IN6:
 	case SIOCSIFDSTADDR_IN6:
 	case SIOCSIFNETMASK_IN6:
 	case SIOCGIFDSTADDR_IN6:
 	case SIOCGIFNETMASK_IN6:
 	case SIOCDIFADDR_IN6:
 	case SIOCGIFPSRCADDR_IN6:
 	case SIOCGIFPDSTADDR_IN6:
 	case SIOCGIFAFLAG_IN6:
 	case SIOCSNDFLUSH_IN6:
 	case SIOCSPFXFLUSH_IN6:
 	case SIOCSRTRFLUSH_IN6:
 	case SIOCGIFALIFETIME_IN6:
 	case SIOCGIFSTAT_IN6:
 	case SIOCGIFSTAT_ICMP6:
 		sa6 = &ifr->ifr_addr;
 		break;
 	case SIOCSIFADDR:
 	case SIOCSIFBRDADDR:
 	case SIOCSIFDSTADDR:
 	case SIOCSIFNETMASK:
 		/*
 		 * Although we should pass any non-INET6 ioctl requests
 		 * down to driver, we filter some legacy INET requests.
 		 * Drivers trust SIOCSIFADDR et al to come from an already
 		 * privileged layer, and do not perform any credentials
 		 * checks or input validation.
 		 */
 		return (EINVAL);
 	default:
 		sa6 = NULL;
 		break;
 	}
 	if (sa6 && sa6->sin6_family == AF_INET6) {
 		if (sa6->sin6_scope_id != 0)
 			error = sa6_embedscope(sa6, 0);
 		else
 			error = in6_setscope(&sa6->sin6_addr, ifp, NULL);
 		if (error != 0)
 			return (error);
 		if (td != NULL && (error = prison_check_ip6(td->td_ucred,
 		    &sa6->sin6_addr)) != 0)
 			return (error);
 		ia = in6ifa_ifpwithaddr(ifp, &sa6->sin6_addr);
 	} else
 		ia = NULL;
 
 	switch (cmd) {
 	case SIOCSIFADDR_IN6:
 	case SIOCSIFDSTADDR_IN6:
 	case SIOCSIFNETMASK_IN6:
 		/*
 		 * Since IPv6 allows a node to assign multiple addresses
 		 * on a single interface, SIOCSIFxxx ioctls are deprecated.
 		 */
 		/* we decided to obsolete this command (20000704) */
 		error = EINVAL;
 		goto out;
 
 	case SIOCDIFADDR_IN6:
 		/*
 		 * for IPv4, we look for existing in_ifaddr here to allow
 		 * "ifconfig if0 delete" to remove the first IPv4 address on
 		 * the interface.  For IPv6, as the spec allows multiple
 		 * interface address from the day one, we consider "remove the
 		 * first one" semantics to be not preferable.
 		 */
 		if (ia == NULL) {
 			error = EADDRNOTAVAIL;
 			goto out;
 		}
 		/* FALLTHROUGH */
 	case SIOCAIFADDR_IN6:
 		/*
 		 * We always require users to specify a valid IPv6 address for
 		 * the corresponding operation.
 		 */
 		if (ifra->ifra_addr.sin6_family != AF_INET6 ||
 		    ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6)) {
 			error = EAFNOSUPPORT;
 			goto out;
 		}
 
 		if (td != NULL) {
 			error = priv_check(td, (cmd == SIOCDIFADDR_IN6) ?
 			    PRIV_NET_DELIFADDR : PRIV_NET_ADDIFADDR);
 			if (error)
 				goto out;
 		}
 		/* FALLTHROUGH */
 	case SIOCGIFSTAT_IN6:
 	case SIOCGIFSTAT_ICMP6:
 		if (ifp->if_afdata[AF_INET6] == NULL) {
 			error = EPFNOSUPPORT;
 			goto out;
 		}
 		break;
 
 	case SIOCGIFADDR_IN6:
 		/* This interface is basically deprecated. use SIOCGIFCONF. */
 		/* FALLTHROUGH */
 	case SIOCGIFAFLAG_IN6:
 	case SIOCGIFNETMASK_IN6:
 	case SIOCGIFDSTADDR_IN6:
 	case SIOCGIFALIFETIME_IN6:
 		/* must think again about its semantics */
 		if (ia == NULL) {
 			error = EADDRNOTAVAIL;
 			goto out;
 		}
 		break;
 	}
 
 	switch (cmd) {
 	case SIOCGIFADDR_IN6:
 		ifr->ifr_addr = ia->ia_addr;
 		if ((error = sa6_recoverscope(&ifr->ifr_addr)) != 0)
 			goto out;
 		break;
 
 	case SIOCGIFDSTADDR_IN6:
 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
 			error = EINVAL;
 			goto out;
 		}
 		ifr->ifr_dstaddr = ia->ia_dstaddr;
 		if ((error = sa6_recoverscope(&ifr->ifr_dstaddr)) != 0)
 			goto out;
 		break;
 
 	case SIOCGIFNETMASK_IN6:
 		ifr->ifr_addr = ia->ia_prefixmask;
 		break;
 
 	case SIOCGIFAFLAG_IN6:
 		ifr->ifr_ifru.ifru_flags6 = ia->ia6_flags;
 		break;
 
 	case SIOCGIFSTAT_IN6:
 		COUNTER_ARRAY_COPY(((struct in6_ifextra *)
 		    ifp->if_afdata[AF_INET6])->in6_ifstat,
 		    &ifr->ifr_ifru.ifru_stat,
 		    sizeof(struct in6_ifstat) / sizeof(uint64_t));
 		break;
 
 	case SIOCGIFSTAT_ICMP6:
 		COUNTER_ARRAY_COPY(((struct in6_ifextra *)
 		    ifp->if_afdata[AF_INET6])->icmp6_ifstat,
 		    &ifr->ifr_ifru.ifru_icmp6stat,
 		    sizeof(struct icmp6_ifstat) / sizeof(uint64_t));
 		break;
 
 	case SIOCGIFALIFETIME_IN6:
 		ifr->ifr_ifru.ifru_lifetime = ia->ia6_lifetime;
 		if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
 			time_t maxexpire;
 			struct in6_addrlifetime *retlt =
 			    &ifr->ifr_ifru.ifru_lifetime;
 
 			/*
 			 * XXX: adjust expiration time assuming time_t is
 			 * signed.
 			 */
 			maxexpire = (-1) &
 			    ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
 			if (ia->ia6_lifetime.ia6t_vltime <
 			    maxexpire - ia->ia6_updatetime) {
 				retlt->ia6t_expire = ia->ia6_updatetime +
 				    ia->ia6_lifetime.ia6t_vltime;
 			} else
 				retlt->ia6t_expire = maxexpire;
 		}
 		if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
 			time_t maxexpire;
 			struct in6_addrlifetime *retlt =
 			    &ifr->ifr_ifru.ifru_lifetime;
 
 			/*
 			 * XXX: adjust expiration time assuming time_t is
 			 * signed.
 			 */
 			maxexpire = (-1) &
 			    ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
 			if (ia->ia6_lifetime.ia6t_pltime <
 			    maxexpire - ia->ia6_updatetime) {
 				retlt->ia6t_preferred = ia->ia6_updatetime +
 				    ia->ia6_lifetime.ia6t_pltime;
 			} else
 				retlt->ia6t_preferred = maxexpire;
 		}
 		break;
 
 	case SIOCAIFADDR_IN6:
 	{
 		struct nd_prefixctl pr0;
 		struct nd_prefix *pr;
 
 		/*
 		 * first, make or update the interface address structure,
 		 * and link it to the list.
 		 */
 		if ((error = in6_update_ifa(ifp, ifra, ia, 0)) != 0)
 			goto out;
 		if (ia != NULL) {
 			if (ia->ia_ifa.ifa_carp)
 				(*carp_detach_p)(&ia->ia_ifa, true);
 			ifa_free(&ia->ia_ifa);
 		}
 		if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr))
 		    == NULL) {
 			/*
 			 * this can happen when the user specify the 0 valid
 			 * lifetime.
 			 */
 			break;
 		}
 
 		if (cmd == ocmd && ifra->ifra_vhid > 0) {
 			if (carp_attach_p != NULL)
 				error = (*carp_attach_p)(&ia->ia_ifa,
 				    ifra->ifra_vhid);
 			else
 				error = EPROTONOSUPPORT;
 			if (error)
 				goto out;
 			else
 				carp_attached = 1;
 		}
 
 		/*
 		 * then, make the prefix on-link on the interface.
 		 * XXX: we'd rather create the prefix before the address, but
 		 * we need at least one address to install the corresponding
 		 * interface route, so we configure the address first.
 		 */
 
 		/*
 		 * convert mask to prefix length (prefixmask has already
 		 * been validated in in6_update_ifa().
 		 */
 		bzero(&pr0, sizeof(pr0));
 		pr0.ndpr_ifp = ifp;
 		pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
 		    NULL);
 		if (pr0.ndpr_plen == 128) {
 			/* we don't need to install a host route. */
 			goto aifaddr_out;
 		}
 		pr0.ndpr_prefix = ifra->ifra_addr;
 		/* apply the mask for safety. */
 		IN6_MASK_ADDR(&pr0.ndpr_prefix.sin6_addr,
 		    &ifra->ifra_prefixmask.sin6_addr);
 
 		/*
 		 * XXX: since we don't have an API to set prefix (not address)
 		 * lifetimes, we just use the same lifetimes as addresses.
 		 * The (temporarily) installed lifetimes can be overridden by
 		 * later advertised RAs (when accept_rtadv is non 0), which is
 		 * an intended behavior.
 		 */
 		pr0.ndpr_raf_onlink = 1; /* should be configurable? */
 		pr0.ndpr_raf_auto =
 		    ((ifra->ifra_flags & IN6_IFF_AUTOCONF) != 0);
 		pr0.ndpr_vltime = ifra->ifra_lifetime.ia6t_vltime;
 		pr0.ndpr_pltime = ifra->ifra_lifetime.ia6t_pltime;
 
 		/* add the prefix if not yet. */
 		if ((pr = nd6_prefix_lookup(&pr0)) == NULL) {
 			/*
 			 * nd6_prelist_add will install the corresponding
 			 * interface route.
 			 */
 			if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) {
 				if (carp_attached)
 					(*carp_detach_p)(&ia->ia_ifa, false);
 				goto out;
 			}
 		}
 
 		/* relate the address to the prefix */
 		if (ia->ia6_ndpr == NULL) {
 			ia->ia6_ndpr = pr;
 			pr->ndpr_addrcnt++;
 
 			/*
 			 * If this is the first autoconf address from the
 			 * prefix, create a temporary address as well
 			 * (when required).
 			 */
 			if ((ia->ia6_flags & IN6_IFF_AUTOCONF) &&
 			    V_ip6_use_tempaddr && pr->ndpr_addrcnt == 1) {
 				int e;
 				if ((e = in6_tmpifadd(ia, 1, 0)) != 0) {
 					log(LOG_NOTICE, "in6_control: failed "
 					    "to create a temporary address, "
 					    "errno=%d\n", e);
 				}
 			}
 		}
 		nd6_prefix_rele(pr);
 
 		/*
 		 * this might affect the status of autoconfigured addresses,
 		 * that is, this address might make other addresses detached.
 		 */
 		pfxlist_onlink_check();
 
 aifaddr_out:
 		/*
 		 * Try to clear the flag when a new IPv6 address is added
 		 * onto an IFDISABLED interface and it succeeds.
 		 */
 		if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
 			struct in6_ndireq nd;
 
 			memset(&nd, 0, sizeof(nd));
 			nd.ndi.flags = ND_IFINFO(ifp)->flags;
 			nd.ndi.flags &= ~ND6_IFF_IFDISABLED;
 			if (nd6_ioctl(SIOCSIFINFO_FLAGS, (caddr_t)&nd, ifp) < 0)
 				log(LOG_NOTICE, "SIOCAIFADDR_IN6: "
 				    "SIOCSIFINFO_FLAGS for -ifdisabled "
 				    "failed.");
 			/*
 			 * Ignore failure of clearing the flag intentionally.
 			 * The failure means address duplication was detected.
 			 */
 		}
 		break;
 	}
 
 	case SIOCDIFADDR_IN6:
 	{
 		struct nd_prefix *pr;
 
 		/*
 		 * If the address being deleted is the only one that owns
 		 * the corresponding prefix, expire the prefix as well.
 		 * XXX: theoretically, we don't have to worry about such
 		 * relationship, since we separate the address management
 		 * and the prefix management.  We do this, however, to provide
 		 * as much backward compatibility as possible in terms of
 		 * the ioctl operation.
 		 * Note that in6_purgeaddr() will decrement ndpr_addrcnt.
 		 */
 		pr = ia->ia6_ndpr;
 		in6_purgeaddr(&ia->ia_ifa);
 		if (pr != NULL && pr->ndpr_addrcnt == 0) {
 			ND6_WLOCK();
 			nd6_prefix_unlink(pr, NULL);
 			ND6_WUNLOCK();
 			nd6_prefix_del(pr);
 		}
 		EVENTHANDLER_INVOKE(ifaddr_event_ext, ifp, &ia->ia_ifa,
 		    IFADDR_EVENT_DEL);
 		break;
 	}
 
 	default:
 		if (ifp->if_ioctl == NULL) {
 			error = EOPNOTSUPP;
 			goto out;
 		}
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		goto out;
 	}
 
 	error = 0;
 out:
 	if (ia != NULL)
 		ifa_free(&ia->ia_ifa);
 	return (error);
 }
 
 
 static struct in6_multi_mship *
 in6_joingroup_legacy(struct ifnet *ifp, const struct in6_addr *mcaddr,
     int *errorp, int delay)
 {
 	struct in6_multi_mship *imm;
 	int error;
 
 	imm = malloc(sizeof(*imm), M_IP6MADDR, M_NOWAIT);
 	if (imm == NULL) {
 		*errorp = ENOBUFS;
 		return (NULL);
 	}
 
 	delay = (delay * PR_FASTHZ) / hz;
 
 	error = in6_joingroup(ifp, mcaddr, NULL, &imm->i6mm_maddr, delay);
 	if (error) {
 		*errorp = error;
 		free(imm, M_IP6MADDR);
 		return (NULL);
 	}
 
 	return (imm);
 }
 /*
  * Join necessary multicast groups.  Factored out from in6_update_ifa().
  * This entire work should only be done once, for the default FIB.
  */
 static int
 in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra,
     struct in6_ifaddr *ia, int flags, struct in6_multi **in6m_sol)
 {
 	char ip6buf[INET6_ADDRSTRLEN];
 	struct in6_addr mltaddr;
 	struct in6_multi_mship *imm;
 	int delay, error;
 
 	KASSERT(in6m_sol != NULL, ("%s: in6m_sol is NULL", __func__));
 
 	/* Join solicited multicast addr for new host id. */
 	bzero(&mltaddr, sizeof(struct in6_addr));
 	mltaddr.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
 	mltaddr.s6_addr32[2] = htonl(1);
 	mltaddr.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3];
 	mltaddr.s6_addr8[12] = 0xff;
 	if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0) {
 		/* XXX: should not happen */
 		log(LOG_ERR, "%s: in6_setscope failed\n", __func__);
 		goto cleanup;
 	}
 	delay = error = 0;
 	if ((flags & IN6_IFAUPDATE_DADDELAY)) {
 		/*
 		 * We need a random delay for DAD on the address being
 		 * configured.  It also means delaying transmission of the
 		 * corresponding MLD report to avoid report collision.
 		 * [RFC 4861, Section 6.3.7]
 		 */
 		delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz);
 	}
 	imm = in6_joingroup_legacy(ifp, &mltaddr, &error, delay);
 	if (imm == NULL) {
 		nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s "
 		    "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr),
 		    if_name(ifp), error));
 		goto cleanup;
 	}
 	LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 	*in6m_sol = imm->i6mm_maddr;
 
 	/*
 	 * Join link-local all-nodes address.
 	 */
 	mltaddr = in6addr_linklocal_allnodes;
 	if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0)
 		goto cleanup; /* XXX: should not fail */
 
 	imm = in6_joingroup_legacy(ifp, &mltaddr, &error, 0);
 	if (imm == NULL) {
 		nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s "
 		    "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr),
 		    if_name(ifp), error));
 		goto cleanup;
 	}
 	LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 
 	/*
 	 * Join node information group address.
 	 */
 	delay = 0;
 	if ((flags & IN6_IFAUPDATE_DADDELAY)) {
 		/*
 		 * The spec does not say anything about delay for this group,
 		 * but the same logic should apply.
 		 */
 		delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz);
 	}
 	if (in6_nigroup(ifp, NULL, -1, &mltaddr) == 0) {
 		/* XXX jinmei */
 		imm = in6_joingroup_legacy(ifp, &mltaddr, &error, delay);
 		if (imm == NULL)
 			nd6log((LOG_WARNING,
 			    "%s: in6_joingroup failed for %s on %s "
 			    "(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
 			    &mltaddr), if_name(ifp), error));
 			/* XXX not very fatal, go on... */
 		else
 			LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 	}
 	if (V_icmp6_nodeinfo_oldmcprefix &&
 	    in6_nigroup_oldmcprefix(ifp, NULL, -1, &mltaddr) == 0) {
 		imm = in6_joingroup_legacy(ifp, &mltaddr, &error, delay);
 		if (imm == NULL)
 			nd6log((LOG_WARNING,
 			    "%s: in6_joingroup failed for %s on %s "
 			    "(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
 			    &mltaddr), if_name(ifp), error));
 			/* XXX not very fatal, go on... */
 		else
 			LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 	}
 
 	/*
 	 * Join interface-local all-nodes address.
 	 * (ff01::1%ifN, and ff01::%ifN/32)
 	 */
 	mltaddr = in6addr_nodelocal_allnodes;
 	if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0)
 		goto cleanup; /* XXX: should not fail */
 
 	imm = in6_joingroup_legacy(ifp, &mltaddr, &error, 0);
 	if (imm == NULL) {
 		nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s "
 		    "(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
 		    &mltaddr), if_name(ifp), error));
 		goto cleanup;
 	}
 	LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 
 cleanup:
 	return (error);
 }
 
 /*
  * Update parameters of an IPv6 interface address.
  * If necessary, a new entry is created and linked into address chains.
  * This function is separated from in6_control().
  */
 int
 in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
     struct in6_ifaddr *ia, int flags)
 {
 	int error, hostIsNew = 0;
 
 	if ((error = in6_validate_ifra(ifp, ifra, ia, flags)) != 0)
 		return (error);
 
 	if (ia == NULL) {
 		hostIsNew = 1;
 		if ((ia = in6_alloc_ifa(ifp, ifra, flags)) == NULL)
 			return (ENOBUFS);
 	}
 
 	error = in6_update_ifa_internal(ifp, ifra, ia, hostIsNew, flags);
 	if (error != 0) {
 		if (hostIsNew != 0) {
 			in6_unlink_ifa(ia, ifp);
 			ifa_free(&ia->ia_ifa);
 		}
 		return (error);
 	}
 
 	if (hostIsNew)
 		error = in6_broadcast_ifa(ifp, ifra, ia, flags);
 
 	return (error);
 }
 
 /*
  * Fill in basic IPv6 address request info.
  */
 void
 in6_prepare_ifra(struct in6_aliasreq *ifra, const struct in6_addr *addr,
     const struct in6_addr *mask)
 {
 
 	memset(ifra, 0, sizeof(struct in6_aliasreq));
 
 	ifra->ifra_addr.sin6_family = AF_INET6;
 	ifra->ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
 	if (addr != NULL)
 		ifra->ifra_addr.sin6_addr = *addr;
 
 	ifra->ifra_prefixmask.sin6_family = AF_INET6;
 	ifra->ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
 	if (mask != NULL)
 		ifra->ifra_prefixmask.sin6_addr = *mask;
 }
 
 static int
 in6_validate_ifra(struct ifnet *ifp, struct in6_aliasreq *ifra,
     struct in6_ifaddr *ia, int flags)
 {
 	int plen = -1;
 	struct sockaddr_in6 dst6;
 	struct in6_addrlifetime *lt;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	/* Validate parameters */
 	if (ifp == NULL || ifra == NULL) /* this maybe redundant */
 		return (EINVAL);
 
 	/*
 	 * The destination address for a p2p link must have a family
 	 * of AF_UNSPEC or AF_INET6.
 	 */
 	if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
 	    ifra->ifra_dstaddr.sin6_family != AF_INET6 &&
 	    ifra->ifra_dstaddr.sin6_family != AF_UNSPEC)
 		return (EAFNOSUPPORT);
 
 	/*
 	 * Validate address
 	 */
 	if (ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6) ||
 	    ifra->ifra_addr.sin6_family != AF_INET6)
 		return (EINVAL);
 
 	/*
 	 * validate ifra_prefixmask.  don't check sin6_family, netmask
 	 * does not carry fields other than sin6_len.
 	 */
 	if (ifra->ifra_prefixmask.sin6_len > sizeof(struct sockaddr_in6))
 		return (EINVAL);
 	/*
 	 * Because the IPv6 address architecture is classless, we require
 	 * users to specify a (non 0) prefix length (mask) for a new address.
 	 * We also require the prefix (when specified) mask is valid, and thus
 	 * reject a non-consecutive mask.
 	 */
 	if (ia == NULL && ifra->ifra_prefixmask.sin6_len == 0)
 		return (EINVAL);
 	if (ifra->ifra_prefixmask.sin6_len != 0) {
 		plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
 		    (u_char *)&ifra->ifra_prefixmask +
 		    ifra->ifra_prefixmask.sin6_len);
 		if (plen <= 0)
 			return (EINVAL);
 	} else {
 		/*
 		 * In this case, ia must not be NULL.  We just use its prefix
 		 * length.
 		 */
 		plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL);
 	}
 	/*
 	 * If the destination address on a p2p interface is specified,
 	 * and the address is a scoped one, validate/set the scope
 	 * zone identifier.
 	 */
 	dst6 = ifra->ifra_dstaddr;
 	if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) != 0 &&
 	    (dst6.sin6_family == AF_INET6)) {
 		struct in6_addr in6_tmp;
 		u_int32_t zoneid;
 
 		in6_tmp = dst6.sin6_addr;
 		if (in6_setscope(&in6_tmp, ifp, &zoneid))
 			return (EINVAL); /* XXX: should be impossible */
 
 		if (dst6.sin6_scope_id != 0) {
 			if (dst6.sin6_scope_id != zoneid)
 				return (EINVAL);
 		} else		/* user omit to specify the ID. */
 			dst6.sin6_scope_id = zoneid;
 
 		/* convert into the internal form */
 		if (sa6_embedscope(&dst6, 0))
 			return (EINVAL); /* XXX: should be impossible */
 	}
 	/* Modify original ifra_dstaddr to reflect changes */
 	ifra->ifra_dstaddr = dst6;
 
 	/*
 	 * The destination address can be specified only for a p2p or a
 	 * loopback interface.  If specified, the corresponding prefix length
 	 * must be 128.
 	 */
 	if (ifra->ifra_dstaddr.sin6_family == AF_INET6) {
 		if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) == 0) {
 			/* XXX: noisy message */
 			nd6log((LOG_INFO, "in6_update_ifa: a destination can "
 			    "be specified for a p2p or a loopback IF only\n"));
 			return (EINVAL);
 		}
 		if (plen != 128) {
 			nd6log((LOG_INFO, "in6_update_ifa: prefixlen should "
 			    "be 128 when dstaddr is specified\n"));
 			return (EINVAL);
 		}
 	}
 	/* lifetime consistency check */
 	lt = &ifra->ifra_lifetime;
 	if (lt->ia6t_pltime > lt->ia6t_vltime)
 		return (EINVAL);
 	if (lt->ia6t_vltime == 0) {
 		/*
 		 * the following log might be noisy, but this is a typical
 		 * configuration mistake or a tool's bug.
 		 */
 		nd6log((LOG_INFO,
 		    "in6_update_ifa: valid lifetime is 0 for %s\n",
 		    ip6_sprintf(ip6buf, &ifra->ifra_addr.sin6_addr)));
 
 		if (ia == NULL)
 			return (0); /* there's nothing to do */
 	}
 
 	/* Check prefix mask */
 	if (ia != NULL && ifra->ifra_prefixmask.sin6_len != 0) {
 		/*
 		 * We prohibit changing the prefix length of an existing
 		 * address, because
 		 * + such an operation should be rare in IPv6, and
 		 * + the operation would confuse prefix management.
 		 */
 		if (ia->ia_prefixmask.sin6_len != 0 &&
 		    in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) {
 			nd6log((LOG_INFO, "in6_validate_ifa: the prefix length "
 			    "of an existing %s address should not be changed\n",
 			    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
 
 			return (EINVAL);
 		}
 	}
 
 	return (0);
 }
 
 
 /*
  * Allocate a new ifaddr and link it into chains.
  */
 static struct in6_ifaddr *
 in6_alloc_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, int flags)
 {
 	struct in6_ifaddr *ia;
 
 	/*
 	 * When in6_alloc_ifa() is called in a process of a received
 	 * RA, it is called under an interrupt context.  So, we should
 	 * call malloc with M_NOWAIT.
 	 */
 	ia = (struct in6_ifaddr *)ifa_alloc(sizeof(*ia), M_NOWAIT);
 	if (ia == NULL)
 		return (NULL);
 	LIST_INIT(&ia->ia6_memberships);
 	/* Initialize the address and masks, and put time stamp */
 	ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
 	ia->ia_addr.sin6_family = AF_INET6;
 	ia->ia_addr.sin6_len = sizeof(ia->ia_addr);
 	/* XXX: Can we assign ,sin6_addr and skip the rest? */
 	ia->ia_addr = ifra->ifra_addr;
 	ia->ia6_createtime = time_uptime;
 	if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) {
 		/*
 		 * Some functions expect that ifa_dstaddr is not
 		 * NULL for p2p interfaces.
 		 */
 		ia->ia_ifa.ifa_dstaddr =
 		    (struct sockaddr *)&ia->ia_dstaddr;
 	} else {
 		ia->ia_ifa.ifa_dstaddr = NULL;
 	}
 
 	/* set prefix mask if any */
 	ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask;
 	if (ifra->ifra_prefixmask.sin6_len != 0) {
 		ia->ia_prefixmask.sin6_family = AF_INET6;
 		ia->ia_prefixmask.sin6_len = ifra->ifra_prefixmask.sin6_len;
 		ia->ia_prefixmask.sin6_addr = ifra->ifra_prefixmask.sin6_addr;
 	}
 
 	ia->ia_ifp = ifp;
 	ifa_ref(&ia->ia_ifa);			/* if_addrhead */
 	IF_ADDR_WLOCK(ifp);
 	CK_STAILQ_INSERT_TAIL(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
 	IF_ADDR_WUNLOCK(ifp);
 
 	ifa_ref(&ia->ia_ifa);			/* in6_ifaddrhead */
 	IN6_IFADDR_WLOCK();
 	CK_STAILQ_INSERT_TAIL(&V_in6_ifaddrhead, ia, ia_link);
 	CK_LIST_INSERT_HEAD(IN6ADDR_HASH(&ia->ia_addr.sin6_addr), ia, ia6_hash);
 	IN6_IFADDR_WUNLOCK();
 
 	return (ia);
 }
 
 /*
  * Update/configure interface address parameters:
  *
  * 1) Update lifetime
  * 2) Update interface metric ad flags
  * 3) Notify other subsystems
  */
 static int
 in6_update_ifa_internal(struct ifnet *ifp, struct in6_aliasreq *ifra,
     struct in6_ifaddr *ia, int hostIsNew, int flags)
 {
 	int error;
 
 	/* update timestamp */
 	ia->ia6_updatetime = time_uptime;
 
 	/*
 	 * Set lifetimes.  We do not refer to ia6t_expire and ia6t_preferred
 	 * to see if the address is deprecated or invalidated, but initialize
 	 * these members for applications.
 	 */
 	ia->ia6_lifetime = ifra->ifra_lifetime;
 	if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
 		ia->ia6_lifetime.ia6t_expire =
 		    time_uptime + ia->ia6_lifetime.ia6t_vltime;
 	} else
 		ia->ia6_lifetime.ia6t_expire = 0;
 	if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
 		ia->ia6_lifetime.ia6t_preferred =
 		    time_uptime + ia->ia6_lifetime.ia6t_pltime;
 	} else
 		ia->ia6_lifetime.ia6t_preferred = 0;
 
 	/*
 	 * backward compatibility - if IN6_IFF_DEPRECATED is set from the
 	 * userland, make it deprecated.
 	 */
 	if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) {
 		ia->ia6_lifetime.ia6t_pltime = 0;
 		ia->ia6_lifetime.ia6t_preferred = time_uptime;
 	}
 
 	/*
 	 * configure address flags.
 	 */
 	ia->ia6_flags = ifra->ifra_flags;
 
 	/*
 	 * Make the address tentative before joining multicast addresses,
 	 * so that corresponding MLD responses would not have a tentative
 	 * source address.
 	 */
 	ia->ia6_flags &= ~IN6_IFF_DUPLICATED;	/* safety */
 
 	/*
 	 * DAD should be performed for an new address or addresses on
 	 * an interface with ND6_IFF_IFDISABLED.
 	 */
 	if (in6if_do_dad(ifp) &&
 	    (hostIsNew || (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)))
 		ia->ia6_flags |= IN6_IFF_TENTATIVE;
 
 	/* notify other subsystems */
 	error = in6_notify_ifa(ifp, ia, ifra, hostIsNew);
 
 	return (error);
 }
 
 /*
  * Do link-level ifa job:
  * 1) Add lle entry for added address
  * 2) Notifies routing socket users about new address
  * 3) join appropriate multicast group
  * 4) start DAD if enabled
  */
 static int
 in6_broadcast_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
     struct in6_ifaddr *ia, int flags)
 {
 	struct in6_multi *in6m_sol;
 	int error = 0;
 
 	/* Add local address to lltable, if necessary (ex. on p2p link). */
 	if ((error = nd6_add_ifa_lle(ia)) != 0) {
 		in6_purgeaddr(&ia->ia_ifa);
 		ifa_free(&ia->ia_ifa);
 		return (error);
 	}
 
 	/* Join necessary multicast groups. */
 	in6m_sol = NULL;
 	if ((ifp->if_flags & IFF_MULTICAST) != 0) {
 		error = in6_update_ifa_join_mc(ifp, ifra, ia, flags, &in6m_sol);
 		if (error != 0) {
 			in6_purgeaddr(&ia->ia_ifa);
 			ifa_free(&ia->ia_ifa);
 			return (error);
 		}
 	}
 
 	/* Perform DAD, if the address is TENTATIVE. */
 	if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) {
 		int delay, mindelay, maxdelay;
 
 		delay = 0;
 		if ((flags & IN6_IFAUPDATE_DADDELAY)) {
 			/*
 			 * We need to impose a delay before sending an NS
 			 * for DAD.  Check if we also needed a delay for the
 			 * corresponding MLD message.  If we did, the delay
 			 * should be larger than the MLD delay (this could be
 			 * relaxed a bit, but this simple logic is at least
 			 * safe).
 			 * XXX: Break data hiding guidelines and look at
 			 * state for the solicited multicast group.
 			 */
 			mindelay = 0;
 			if (in6m_sol != NULL &&
 			    in6m_sol->in6m_state == MLD_REPORTING_MEMBER) {
 				mindelay = in6m_sol->in6m_timer;
 			}
 			maxdelay = MAX_RTR_SOLICITATION_DELAY * hz;
 			if (maxdelay - mindelay == 0)
 				delay = 0;
 			else {
 				delay =
 				    (arc4random() % (maxdelay - mindelay)) +
 				    mindelay;
 			}
 		}
 		nd6_dad_start((struct ifaddr *)ia, delay);
 	}
 
 	in6_newaddrmsg(ia, RTM_ADD);
 	ifa_free(&ia->ia_ifa);
 	return (error);
 }
 
 void
 in6_purgeaddr(struct ifaddr *ifa)
 {
 	struct ifnet *ifp = ifa->ifa_ifp;
 	struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa;
 	struct in6_multi_mship *imm;
 	int plen, error;
 
 	if (ifa->ifa_carp)
 		(*carp_detach_p)(ifa, false);
 
 	/*
 	 * Remove the loopback route to the interface address.
 	 * The check for the current setting of "nd6_useloopback"
 	 * is not needed.
 	 */
 	if (ia->ia_flags & IFA_RTSELF) {
 		error = ifa_del_loopback_route((struct ifaddr *)ia,
 		    (struct sockaddr *)&ia->ia_addr);
 		if (error == 0)
 			ia->ia_flags &= ~IFA_RTSELF;
 	}
 
 	/* stop DAD processing */
 	nd6_dad_stop(ifa);
 
 	/* Leave multicast groups. */
 	while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) {
 		LIST_REMOVE(imm, i6mm_chain);
 		if (imm->i6mm_maddr != NULL)
 			in6_leavegroup(imm->i6mm_maddr, NULL);
 		free(imm, M_IP6MADDR);
 	}
 	plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */
 	if ((ia->ia_flags & IFA_ROUTE) && plen == 128) {
 		error = rtinit(&(ia->ia_ifa), RTM_DELETE, ia->ia_flags |
 		    (ia->ia_dstaddr.sin6_family == AF_INET6 ? RTF_HOST : 0));
 		if (error != 0)
 			log(LOG_INFO, "%s: err=%d, destination address delete "
 			    "failed\n", __func__, error);
 		ia->ia_flags &= ~IFA_ROUTE;
 	}
 
 	in6_newaddrmsg(ia, RTM_DELETE);
 	in6_unlink_ifa(ia, ifp);
 }
 
 static void
 in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
 {
 	char ip6buf[INET6_ADDRSTRLEN];
 	int remove_lle;
 
 	IF_ADDR_WLOCK(ifp);
 	CK_STAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifaddr, ifa_link);
 	IF_ADDR_WUNLOCK(ifp);
 	ifa_free(&ia->ia_ifa);			/* if_addrhead */
 
 	/*
 	 * Defer the release of what might be the last reference to the
 	 * in6_ifaddr so that it can't be freed before the remainder of the
 	 * cleanup.
 	 */
 	IN6_IFADDR_WLOCK();
 	CK_STAILQ_REMOVE(&V_in6_ifaddrhead, ia, in6_ifaddr, ia_link);
 	CK_LIST_REMOVE(ia, ia6_hash);
 	IN6_IFADDR_WUNLOCK();
 
 	/*
 	 * Release the reference to the base prefix.  There should be a
 	 * positive reference.
 	 */
 	remove_lle = 0;
 	if (ia->ia6_ndpr == NULL) {
 		nd6log((LOG_NOTICE,
 		    "in6_unlink_ifa: autoconf'ed address "
 		    "%s has no prefix\n", ip6_sprintf(ip6buf, IA6_IN6(ia))));
 	} else {
 		ia->ia6_ndpr->ndpr_addrcnt--;
 		/* Do not delete lles within prefix if refcont != 0 */
 		if (ia->ia6_ndpr->ndpr_addrcnt == 0)
 			remove_lle = 1;
 		ia->ia6_ndpr = NULL;
 	}
 
 	nd6_rem_ifa_lle(ia, remove_lle);
 
 	/*
 	 * Also, if the address being removed is autoconf'ed, call
 	 * pfxlist_onlink_check() since the release might affect the status of
 	 * other (detached) addresses.
 	 */
 	if ((ia->ia6_flags & IN6_IFF_AUTOCONF)) {
 		pfxlist_onlink_check();
 	}
 	ifa_free(&ia->ia_ifa);			/* in6_ifaddrhead */
 }
 
 /*
  * Notifies other subsystems about address change/arrival:
  * 1) Notifies device handler on the first IPv6 address assignment
  * 2) Handle routing table changes for P2P links and route
  * 3) Handle routing table changes for address host route
  */
 static int
 in6_notify_ifa(struct ifnet *ifp, struct in6_ifaddr *ia,
     struct in6_aliasreq *ifra, int hostIsNew)
 {
 	int	error = 0, plen, ifacount = 0;
 	struct ifaddr *ifa;
 	struct sockaddr_in6 *pdst;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	/*
 	 * Give the interface a chance to initialize
 	 * if this is its first address,
 	 */
 	if (hostIsNew != 0) {
 		struct epoch_tracker et;
 
 		NET_EPOCH_ENTER(et);
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			ifacount++;
 		}
 		NET_EPOCH_EXIT(et);
 	}
 
 	if (ifacount <= 1 && ifp->if_ioctl) {
 		error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
 		if (error)
 			goto done;
 	}
 
 	/*
 	 * If a new destination address is specified, scrub the old one and
 	 * install the new destination.  Note that the interface must be
 	 * p2p or loopback.
 	 */
 	pdst = &ifra->ifra_dstaddr;
 	if (pdst->sin6_family == AF_INET6 &&
 	    !IN6_ARE_ADDR_EQUAL(&pdst->sin6_addr, &ia->ia_dstaddr.sin6_addr)) {
 		if ((ia->ia_flags & IFA_ROUTE) != 0 &&
 		    (rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST) != 0)) {
 			nd6log((LOG_ERR, "in6_update_ifa_internal: failed to "
 			    "remove a route to the old destination: %s\n",
 			    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
 			/* proceed anyway... */
 		} else
 			ia->ia_flags &= ~IFA_ROUTE;
 		ia->ia_dstaddr = *pdst;
 	}
 
 	/*
 	 * If a new destination address is specified for a point-to-point
 	 * interface, install a route to the destination as an interface
 	 * direct route.
 	 * XXX: the logic below rejects assigning multiple addresses on a p2p
 	 * interface that share the same destination.
 	 */
 	plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */
 	if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 &&
 	    ia->ia_dstaddr.sin6_family == AF_INET6) {
 		int rtflags = RTF_UP | RTF_HOST;
 		/*
 		 * Handle the case for ::1 .
 		 */
 		if (ifp->if_flags & IFF_LOOPBACK)
 			ia->ia_flags |= IFA_RTSELF;
 		error = rtinit(&ia->ia_ifa, RTM_ADD, ia->ia_flags | rtflags);
 		if (error)
 			goto done;
 		ia->ia_flags |= IFA_ROUTE;
 	}
 
 	/*
 	 * add a loopback route to self if not exists
 	 */
 	if (!(ia->ia_flags & IFA_RTSELF) && V_nd6_useloopback) {
 		error = ifa_add_loopback_route((struct ifaddr *)ia,
 		    (struct sockaddr *)&ia->ia_addr);
 		if (error == 0)
 			ia->ia_flags |= IFA_RTSELF;
 	}
 done:
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 	    "Invoking IPv6 network device address event may sleep");
 
 	ifa_ref(&ia->ia_ifa);
 	EVENTHANDLER_INVOKE(ifaddr_event_ext, ifp, &ia->ia_ifa,
 	    IFADDR_EVENT_ADD);
 	ifa_free(&ia->ia_ifa);
 
 	return (error);
 }
 
 /*
  * Find an IPv6 interface link-local address specific to an interface.
  * ifaddr is returned referenced.
  */
 struct in6_ifaddr *
 in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags)
 {
 	struct ifaddr *ifa;
 
 	NET_EPOCH_ASSERT();
 
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) {
 			if ((((struct in6_ifaddr *)ifa)->ia6_flags &
 			    ignoreflags) != 0)
 				continue;
 			ifa_ref(ifa);
 			break;
 		}
 	}
 
 	return ((struct in6_ifaddr *)ifa);
 }
 
 
 /*
  * find the interface address corresponding to a given IPv6 address.
  * ifaddr is returned referenced.
  */
 struct in6_ifaddr *
 in6ifa_ifwithaddr(const struct in6_addr *addr, uint32_t zoneid)
 {
 	struct rm_priotracker in6_ifa_tracker;
 	struct in6_ifaddr *ia;
 
 	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
 	CK_LIST_FOREACH(ia, IN6ADDR_HASH(addr), ia6_hash) {
 		if (IN6_ARE_ADDR_EQUAL(IA6_IN6(ia), addr)) {
 			if (zoneid != 0 &&
 			    zoneid != ia->ia_addr.sin6_scope_id)
 				continue;
 			ifa_ref(&ia->ia_ifa);
 			break;
 		}
 	}
 	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 	return (ia);
 }
 
 /*
  * find the internet address corresponding to a given interface and address.
  * ifaddr is returned referenced.
  */
 struct in6_ifaddr *
 in6ifa_ifpwithaddr(struct ifnet *ifp, const struct in6_addr *addr)
 {
 	struct epoch_tracker et;
 	struct ifaddr *ifa;
 
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa))) {
 			ifa_ref(ifa);
 			break;
 		}
 	}
 	NET_EPOCH_EXIT(et);
 
 	return ((struct in6_ifaddr *)ifa);
 }
 
 /*
  * Find a link-local scoped address on ifp and return it if any.
  */
 struct in6_ifaddr *
 in6ifa_llaonifp(struct ifnet *ifp)
 {
 	struct epoch_tracker et;
 	struct sockaddr_in6 *sin6;
 	struct ifaddr *ifa;
 
 
 	if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
 		return (NULL);
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
 		if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) ||
 		    IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr) ||
 		    IN6_IS_ADDR_MC_NODELOCAL(&sin6->sin6_addr))
 			break;
 	}
 	NET_EPOCH_EXIT(et);
 
 	return ((struct in6_ifaddr *)ifa);
 }
 
 /*
  * Convert IP6 address to printable (loggable) representation. Caller
  * has to make sure that ip6buf is at least INET6_ADDRSTRLEN long.
  */
 static char digits[] = "0123456789abcdef";
 char *
 ip6_sprintf(char *ip6buf, const struct in6_addr *addr)
 {
 	int i, cnt = 0, maxcnt = 0, idx = 0, index = 0;
 	char *cp;
 	const u_int16_t *a = (const u_int16_t *)addr;
 	const u_int8_t *d;
 	int dcolon = 0, zero = 0;
 
 	cp = ip6buf;
 
 	for (i = 0; i < 8; i++) {
 		if (*(a + i) == 0) {
 			cnt++;
 			if (cnt == 1)
 				idx = i;
 		}
 		else if (maxcnt < cnt) {
 			maxcnt = cnt;
 			index = idx;
 			cnt = 0;
 		}
 	}
 	if (maxcnt < cnt) {
 		maxcnt = cnt;
 		index = idx;
 	}
 
 	for (i = 0; i < 8; i++) {
 		if (dcolon == 1) {
 			if (*a == 0) {
 				if (i == 7)
 					*cp++ = ':';
 				a++;
 				continue;
 			} else
 				dcolon = 2;
 		}
 		if (*a == 0) {
 			if (dcolon == 0 && *(a + 1) == 0 && i == index) {
 				if (i == 0)
 					*cp++ = ':';
 				*cp++ = ':';
 				dcolon = 1;
 			} else {
 				*cp++ = '0';
 				*cp++ = ':';
 			}
 			a++;
 			continue;
 		}
 		d = (const u_char *)a;
 		/* Try to eliminate leading zeros in printout like in :0001. */
 		zero = 1;
 		*cp = digits[*d >> 4];
 		if (*cp != '0') {
 			zero = 0;
 			cp++;
 		}
 		*cp = digits[*d++ & 0xf];
 		if (zero == 0 || (*cp != '0')) {
 			zero = 0;
 			cp++;
 		}
 		*cp = digits[*d >> 4];
 		if (zero == 0 || (*cp != '0')) {
 			zero = 0;
 			cp++;
 		}
 		*cp++ = digits[*d & 0xf];
 		*cp++ = ':';
 		a++;
 	}
 	*--cp = '\0';
 	return (ip6buf);
 }
 
 int
 in6_localaddr(struct in6_addr *in6)
 {
 	struct rm_priotracker in6_ifa_tracker;
 	struct in6_ifaddr *ia;
 
 	if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6))
 		return 1;
 
 	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
 	CK_STAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
 		if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr,
 		    &ia->ia_prefixmask.sin6_addr)) {
 			IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 			return 1;
 		}
 	}
 	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 
 	return (0);
 }
 
 /*
  * Return 1 if an internet address is for the local host and configured
  * on one of its interfaces.
  */
 int
 in6_localip(struct in6_addr *in6)
 {
 	struct rm_priotracker in6_ifa_tracker;
 	struct in6_ifaddr *ia;
 
 	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
 	CK_LIST_FOREACH(ia, IN6ADDR_HASH(in6), ia6_hash) {
 		if (IN6_ARE_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr)) {
 			IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 			return (1);
 		}
 	}
 	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 	return (0);
 }
  
 /*
  * Return 1 if an internet address is configured on an interface.
  */
 int
 in6_ifhasaddr(struct ifnet *ifp, struct in6_addr *addr)
 {
 	struct in6_addr in6;
 	struct ifaddr *ifa;
 	struct in6_ifaddr *ia6;
 
 	NET_EPOCH_ASSERT();
 
 	in6 = *addr;
 	if (in6_clearscope(&in6))
 		return (0);
 	in6_setscope(&in6, ifp, NULL);
 
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		ia6 = (struct in6_ifaddr *)ifa;
 		if (IN6_ARE_ADDR_EQUAL(&ia6->ia_addr.sin6_addr, &in6))
 			return (1);
 	}
 
 	return (0);
 }
 
 int
 in6_is_addr_deprecated(struct sockaddr_in6 *sa6)
 {
 	struct rm_priotracker in6_ifa_tracker;
 	struct in6_ifaddr *ia;
 
 	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
 	CK_LIST_FOREACH(ia, IN6ADDR_HASH(&sa6->sin6_addr), ia6_hash) {
 		if (IN6_ARE_ADDR_EQUAL(IA6_IN6(ia), &sa6->sin6_addr)) {
 			if (ia->ia6_flags & IN6_IFF_DEPRECATED) {
 				IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 				return (1); /* true */
 			}
 			break;
 		}
 	}
 	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 
 	return (0);		/* false */
 }
 
 /*
  * return length of part which dst and src are equal
  * hard coding...
  */
 int
 in6_matchlen(struct in6_addr *src, struct in6_addr *dst)
 {
 	int match = 0;
 	u_char *s = (u_char *)src, *d = (u_char *)dst;
 	u_char *lim = s + 16, r;
 
 	while (s < lim)
 		if ((r = (*d++ ^ *s++)) != 0) {
 			while (r < 128) {
 				match++;
 				r <<= 1;
 			}
 			break;
 		} else
 			match += 8;
 	return match;
 }
 
 /* XXX: to be scope conscious */
 int
 in6_are_prefix_equal(struct in6_addr *p1, struct in6_addr *p2, int len)
 {
 	int bytelen, bitlen;
 
 	/* sanity check */
 	if (0 > len || len > 128) {
 		log(LOG_ERR, "in6_are_prefix_equal: invalid prefix length(%d)\n",
 		    len);
 		return (0);
 	}
 
 	bytelen = len / 8;
 	bitlen = len % 8;
 
 	if (bcmp(&p1->s6_addr, &p2->s6_addr, bytelen))
 		return (0);
 	if (bitlen != 0 &&
 	    p1->s6_addr[bytelen] >> (8 - bitlen) !=
 	    p2->s6_addr[bytelen] >> (8 - bitlen))
 		return (0);
 
 	return (1);
 }
 
 void
 in6_prefixlen2mask(struct in6_addr *maskp, int len)
 {
 	u_char maskarray[8] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
 	int bytelen, bitlen, i;
 
 	/* sanity check */
 	if (0 > len || len > 128) {
 		log(LOG_ERR, "in6_prefixlen2mask: invalid prefix length(%d)\n",
 		    len);
 		return;
 	}
 
 	bzero(maskp, sizeof(*maskp));
 	bytelen = len / 8;
 	bitlen = len % 8;
 	for (i = 0; i < bytelen; i++)
 		maskp->s6_addr[i] = 0xff;
 	if (bitlen)
 		maskp->s6_addr[bytelen] = maskarray[bitlen - 1];
 }
 
 /*
  * return the best address out of the same scope. if no address was
  * found, return the first valid address from designated IF.
  */
 struct in6_ifaddr *
 in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst)
 {
 	int dst_scope =	in6_addrscope(dst), blen = -1, tlen;
 	struct ifaddr *ifa;
 	struct in6_ifaddr *besta = NULL;
 	struct in6_ifaddr *dep[2];	/* last-resort: deprecated */
 
 	NET_EPOCH_ASSERT();
 
 	dep[0] = dep[1] = NULL;
 
 	/*
 	 * We first look for addresses in the same scope.
 	 * If there is one, return it.
 	 * If two or more, return one which matches the dst longest.
 	 * If none, return one of global addresses assigned other ifs.
 	 */
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST)
 			continue; /* XXX: is there any case to allow anycast? */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY)
 			continue; /* don't use this interface */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
 			if (V_ip6_use_deprecated)
 				dep[0] = (struct in6_ifaddr *)ifa;
 			continue;
 		}
 
 		if (dst_scope == in6_addrscope(IFA_IN6(ifa))) {
 			/*
 			 * call in6_matchlen() as few as possible
 			 */
 			if (besta) {
 				if (blen == -1)
 					blen = in6_matchlen(&besta->ia_addr.sin6_addr, dst);
 				tlen = in6_matchlen(IFA_IN6(ifa), dst);
 				if (tlen > blen) {
 					blen = tlen;
 					besta = (struct in6_ifaddr *)ifa;
 				}
 			} else
 				besta = (struct in6_ifaddr *)ifa;
 		}
 	}
 	if (besta) {
 		ifa_ref(&besta->ia_ifa);
 		return (besta);
 	}
 
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST)
 			continue; /* XXX: is there any case to allow anycast? */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY)
 			continue; /* don't use this interface */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
 			if (V_ip6_use_deprecated)
 				dep[1] = (struct in6_ifaddr *)ifa;
 			continue;
 		}
 
 		if (ifa != NULL)
 			ifa_ref(ifa);
 		return (struct in6_ifaddr *)ifa;
 	}
 
 	/* use the last-resort values, that are, deprecated addresses */
 	if (dep[0]) {
 		ifa_ref((struct ifaddr *)dep[0]);
 		return dep[0];
 	}
 	if (dep[1]) {
 		ifa_ref((struct ifaddr *)dep[1]);
 		return dep[1];
 	}
 
 	return NULL;
 }
 
 /*
  * perform DAD when interface becomes IFF_UP.
  */
 void
 in6_if_up(struct ifnet *ifp)
 {
 	struct epoch_tracker et;
 	struct ifaddr *ifa;
 	struct in6_ifaddr *ia;
 
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		ia = (struct in6_ifaddr *)ifa;
 		if (ia->ia6_flags & IN6_IFF_TENTATIVE) {
 			/*
 			 * The TENTATIVE flag was likely set by hand
 			 * beforehand, implicitly indicating the need for DAD.
 			 * We may be able to skip the random delay in this
 			 * case, but we impose delays just in case.
 			 */
 			nd6_dad_start(ifa,
 			    arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz));
 		}
 	}
 	NET_EPOCH_EXIT(et);
 
 	/*
 	 * special cases, like 6to4, are handled in in6_ifattach
 	 */
 	in6_ifattach(ifp, NULL);
 }
 
 int
 in6if_do_dad(struct ifnet *ifp)
 {
 
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0)
 		return (0);
 	if ((ifp->if_flags & IFF_MULTICAST) == 0)
 		return (0);
 	if ((ND_IFINFO(ifp)->flags &
 	    (ND6_IFF_IFDISABLED | ND6_IFF_NO_DAD)) != 0)
 		return (0);
 	return (1);
 }
 
 /*
  * Calculate max IPv6 MTU through all the interfaces and store it
  * to in6_maxmtu.
  */
 void
 in6_setmaxmtu(void)
 {
 	struct epoch_tracker et;
 	unsigned long maxmtu = 0;
 	struct ifnet *ifp;
 
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		/* this function can be called during ifnet initialization */
 		if (!ifp->if_afdata[AF_INET6])
 			continue;
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
 		    IN6_LINKMTU(ifp) > maxmtu)
 			maxmtu = IN6_LINKMTU(ifp);
 	}
 	NET_EPOCH_EXIT(et);
 	if (maxmtu)	/* update only when maxmtu is positive */
 		V_in6_maxmtu = maxmtu;
 }
 
 /*
  * Provide the length of interface identifiers to be used for the link attached
  * to the given interface.  The length should be defined in "IPv6 over
  * xxx-link" document.  Note that address architecture might also define
  * the length for a particular set of address prefixes, regardless of the
  * link type.  As clarified in rfc2462bis, those two definitions should be
  * consistent, and those really are as of August 2004.
  */
 int
 in6_if2idlen(struct ifnet *ifp)
 {
 	switch (ifp->if_type) {
 	case IFT_ETHER:		/* RFC2464 */
 	case IFT_PROPVIRTUAL:	/* XXX: no RFC. treat it as ether */
 	case IFT_L2VLAN:	/* ditto */
 	case IFT_BRIDGE:	/* bridge(4) only does Ethernet-like links */
 	case IFT_INFINIBAND:
 		return (64);
 	case IFT_PPP:		/* RFC2472 */
 		return (64);
 	case IFT_FRELAY:	/* RFC2590 */
 		return (64);
 	case IFT_IEEE1394:	/* RFC3146 */
 		return (64);
 	case IFT_GIF:
 		return (64);	/* draft-ietf-v6ops-mech-v2-07 */
 	case IFT_LOOP:
 		return (64);	/* XXX: is this really correct? */
 	default:
 		/*
 		 * Unknown link type:
 		 * It might be controversial to use the today's common constant
 		 * of 64 for these cases unconditionally.  For full compliance,
 		 * we should return an error in this case.  On the other hand,
 		 * if we simply miss the standard for the link type or a new
 		 * standard is defined for a new link type, the IFID length
 		 * is very likely to be the common constant.  As a compromise,
 		 * we always use the constant, but make an explicit notice
 		 * indicating the "unknown" case.
 		 */
 		printf("in6_if2idlen: unknown link type (%d)\n", ifp->if_type);
 		return (64);
 	}
 }
 
 struct in6_llentry {
 	struct llentry		base;
 };
 
 #define	IN6_LLTBL_DEFAULT_HSIZE	32
 #define	IN6_LLTBL_HASH(k, h) \
 	(((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1))
 
 /*
  * Do actual deallocation of @lle.
  */
 static void
 in6_lltable_destroy_lle_unlocked(epoch_context_t ctx)
 {
 	struct llentry *lle;
 
 	lle = __containerof(ctx, struct llentry, lle_epoch_ctx);
 	LLE_LOCK_DESTROY(lle);
 	LLE_REQ_DESTROY(lle);
 	free(lle, M_LLTABLE);
 }
 
 /*
  * Called by LLE_FREE_LOCKED when number of references
  * drops to zero.
  */
 static void
 in6_lltable_destroy_lle(struct llentry *lle)
 {
 
 	LLE_WUNLOCK(lle);
 	NET_EPOCH_CALL(in6_lltable_destroy_lle_unlocked, &lle->lle_epoch_ctx);
 }
 
 static struct llentry *
 in6_lltable_new(const struct in6_addr *addr6, u_int flags)
 {
 	struct in6_llentry *lle;
 
 	lle = malloc(sizeof(struct in6_llentry), M_LLTABLE, M_NOWAIT | M_ZERO);
 	if (lle == NULL)		/* NB: caller generates msg */
 		return NULL;
 
 	lle->base.r_l3addr.addr6 = *addr6;
 	lle->base.lle_refcnt = 1;
 	lle->base.lle_free = in6_lltable_destroy_lle;
 	LLE_LOCK_INIT(&lle->base);
 	LLE_REQ_INIT(&lle->base);
 	callout_init(&lle->base.lle_timer, 1);
 
 	return (&lle->base);
 }
 
 static int
 in6_lltable_match_prefix(const struct sockaddr *saddr,
     const struct sockaddr *smask, u_int flags, struct llentry *lle)
 {
 	const struct in6_addr *addr, *mask, *lle_addr;
 
 	addr = &((const struct sockaddr_in6 *)saddr)->sin6_addr;
 	mask = &((const struct sockaddr_in6 *)smask)->sin6_addr;
 	lle_addr = &lle->r_l3addr.addr6;
 
 	if (IN6_ARE_MASKED_ADDR_EQUAL(lle_addr, addr, mask) == 0)
 		return (0);
 
 	if (lle->la_flags & LLE_IFADDR) {
 
 		/*
 		 * Delete LLE_IFADDR records IFF address & flag matches.
 		 * Note that addr is the interface address within prefix
 		 * being matched.
 		 */
 		if (IN6_ARE_ADDR_EQUAL(addr, lle_addr) &&
 		    (flags & LLE_STATIC) != 0)
 			return (1);
 		return (0);
 	}
 
 	/* flags & LLE_STATIC means deleting both dynamic and static entries */
 	if ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))
 		return (1);
 
 	return (0);
 }
 
 static void
 in6_lltable_free_entry(struct lltable *llt, struct llentry *lle)
 {
 	struct ifnet *ifp;
 
 	LLE_WLOCK_ASSERT(lle);
 	KASSERT(llt != NULL, ("lltable is NULL"));
 
 	/* Unlink entry from table */
 	if ((lle->la_flags & LLE_LINKED) != 0) {
 
 		ifp = llt->llt_ifp;
 		IF_AFDATA_WLOCK_ASSERT(ifp);
 		lltable_unlink_entry(llt, lle);
 	}
 
 	llentry_free(lle);
 }
 
 static int
 in6_lltable_rtcheck(struct ifnet *ifp,
 		    u_int flags,
 		    const struct sockaddr *l3addr)
 {
 	const struct sockaddr_in6 *sin6;
-	struct nhop6_basic nh6;
+	struct nhop_object *nh;
 	struct in6_addr dst;
 	uint32_t scopeid;
-	int error;
 	char ip6buf[INET6_ADDRSTRLEN];
 	int fibnum;
 
 	NET_EPOCH_ASSERT();
 	KASSERT(l3addr->sa_family == AF_INET6,
 	    ("sin_family %d", l3addr->sa_family));
 
 	sin6 = (const struct sockaddr_in6 *)l3addr;
 	in6_splitscope(&sin6->sin6_addr, &dst, &scopeid);
 	fibnum = V_rt_add_addr_allfibs ? RT_DEFAULT_FIB : ifp->if_fib;
-	error = fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6);
-	if (error != 0 || (nh6.nh_flags & NHF_GATEWAY) || nh6.nh_ifp != ifp) {
+	nh = fib6_lookup(fibnum, &dst, scopeid, NHR_NONE, 0);
+	if (nh && ((nh->nh_flags & NHF_GATEWAY) || nh->nh_ifp != ifp)) {
 		struct ifaddr *ifa;
 		/*
 		 * Create an ND6 cache for an IPv6 neighbor
 		 * that is not covered by our own prefix.
 		 */
 		ifa = ifaof_ifpforaddr(l3addr, ifp);
 		if (ifa != NULL) {
 			return 0;
 		}
 		log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n",
 		    ip6_sprintf(ip6buf, &sin6->sin6_addr));
 		return EINVAL;
 	}
 	return 0;
 }
 
 /*
  * Called by the datapath to indicate that the entry was used.
  */
 static void
 in6_lltable_mark_used(struct llentry *lle)
 {
 
 	LLE_REQ_LOCK(lle);
 	lle->r_skip_req = 0;
 
 	/*
 	 * Set the hit time so the callback function
 	 * can determine the remaining time before
 	 * transiting to the DELAY state.
 	 */
 	lle->lle_hittime = time_uptime;
 	LLE_REQ_UNLOCK(lle);
 }
 
 static inline uint32_t
 in6_lltable_hash_dst(const struct in6_addr *dst, uint32_t hsize)
 {
 
 	return (IN6_LLTBL_HASH(dst->s6_addr32[3], hsize));
 }
 
 static uint32_t
 in6_lltable_hash(const struct llentry *lle, uint32_t hsize)
 {
 
 	return (in6_lltable_hash_dst(&lle->r_l3addr.addr6, hsize));
 }
 
 static void
 in6_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
 {
 	struct sockaddr_in6 *sin6;
 
 	sin6 = (struct sockaddr_in6 *)sa;
 	bzero(sin6, sizeof(*sin6));
 	sin6->sin6_family = AF_INET6;
 	sin6->sin6_len = sizeof(*sin6);
 	sin6->sin6_addr = lle->r_l3addr.addr6;
 }
 
 static inline struct llentry *
 in6_lltable_find_dst(struct lltable *llt, const struct in6_addr *dst)
 {
 	struct llentry *lle;
 	struct llentries *lleh;
 	u_int hashidx;
 
 	hashidx = in6_lltable_hash_dst(dst, llt->llt_hsize);
 	lleh = &llt->lle_head[hashidx];
 	CK_LIST_FOREACH(lle, lleh, lle_next) {
 		if (lle->la_flags & LLE_DELETED)
 			continue;
 		if (IN6_ARE_ADDR_EQUAL(&lle->r_l3addr.addr6, dst))
 			break;
 	}
 
 	return (lle);
 }
 
 static void
 in6_lltable_delete_entry(struct lltable *llt, struct llentry *lle)
 {
 
 	lle->la_flags |= LLE_DELETED;
 	EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
 #ifdef DIAGNOSTIC
 	log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
 #endif
 	llentry_free(lle);
 }
 
 static struct llentry *
 in6_lltable_alloc(struct lltable *llt, u_int flags,
 	const struct sockaddr *l3addr)
 {
 	const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
 	struct ifnet *ifp = llt->llt_ifp;
 	struct llentry *lle;
 	char linkhdr[LLE_MAX_LINKHDR];
 	size_t linkhdrsize;
 	int lladdr_off;
 
 	KASSERT(l3addr->sa_family == AF_INET6,
 	    ("sin_family %d", l3addr->sa_family));
 
 	/*
 	 * A route that covers the given address must have
 	 * been installed 1st because we are doing a resolution,
 	 * verify this.
 	 */
 	if (!(flags & LLE_IFADDR) &&
 	    in6_lltable_rtcheck(ifp, flags, l3addr) != 0)
 		return (NULL);
 
 	lle = in6_lltable_new(&sin6->sin6_addr, flags);
 	if (lle == NULL) {
 		log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
 		return (NULL);
 	}
 	lle->la_flags = flags;
 	if ((flags & LLE_IFADDR) == LLE_IFADDR) {
 		linkhdrsize = LLE_MAX_LINKHDR;
 		if (lltable_calc_llheader(ifp, AF_INET6, IF_LLADDR(ifp),
 		    linkhdr, &linkhdrsize, &lladdr_off) != 0) {
 			NET_EPOCH_CALL(in6_lltable_destroy_lle_unlocked, &lle->lle_epoch_ctx);
 			return (NULL);
 		}
 		lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
 		    lladdr_off);
 		lle->la_flags |= LLE_STATIC;
 	}
 
 	if ((lle->la_flags & LLE_STATIC) != 0)
 		lle->ln_state = ND6_LLINFO_REACHABLE;
 
 	return (lle);
 }
 
 static struct llentry *
 in6_lltable_lookup(struct lltable *llt, u_int flags,
 	const struct sockaddr *l3addr)
 {
 	const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
 	struct llentry *lle;
 
 	IF_AFDATA_LOCK_ASSERT(llt->llt_ifp);
 	KASSERT(l3addr->sa_family == AF_INET6,
 	    ("sin_family %d", l3addr->sa_family));
 	KASSERT((flags & (LLE_UNLOCKED | LLE_EXCLUSIVE)) !=
 	    (LLE_UNLOCKED | LLE_EXCLUSIVE),
 	    ("wrong lle request flags: %#x", flags));
 
 	lle = in6_lltable_find_dst(llt, &sin6->sin6_addr);
 	if (lle == NULL)
 		return (NULL);
 	if (flags & LLE_UNLOCKED)
 		return (lle);
 
 	if (flags & LLE_EXCLUSIVE)
 		LLE_WLOCK(lle);
 	else
 		LLE_RLOCK(lle);
 
 	/*
 	 * If the afdata lock is not held, the LLE may have been unlinked while
 	 * we were blocked on the LLE lock.  Check for this case.
 	 */
 	if (__predict_false((lle->la_flags & LLE_LINKED) == 0)) {
 		if (flags & LLE_EXCLUSIVE)
 			LLE_WUNLOCK(lle);
 		else
 			LLE_RUNLOCK(lle);
 		return (NULL);
 	}
 	return (lle);
 }
 
 static int
 in6_lltable_dump_entry(struct lltable *llt, struct llentry *lle,
     struct sysctl_req *wr)
 {
 	struct ifnet *ifp = llt->llt_ifp;
 	/* XXX stack use */
 	struct {
 		struct rt_msghdr	rtm;
 		struct sockaddr_in6	sin6;
 		/*
 		 * ndp.c assumes that sdl is word aligned
 		 */
 #ifdef __LP64__
 		uint32_t		pad;
 #endif
 		struct sockaddr_dl	sdl;
 	} ndpc;
 	struct sockaddr_dl *sdl;
 	int error;
 
 	bzero(&ndpc, sizeof(ndpc));
 	/* skip deleted entries */
 	if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
 		return (0);
 	/* Skip if jailed and not a valid IP of the prison. */
 	lltable_fill_sa_entry(lle, (struct sockaddr *)&ndpc.sin6);
 	if (prison_if(wr->td->td_ucred, (struct sockaddr *)&ndpc.sin6) != 0)
 		return (0);
 	/*
 	 * produce a msg made of:
 	 *  struct rt_msghdr;
 	 *  struct sockaddr_in6 (IPv6)
 	 *  struct sockaddr_dl;
 	 */
 	ndpc.rtm.rtm_msglen = sizeof(ndpc);
 	ndpc.rtm.rtm_version = RTM_VERSION;
 	ndpc.rtm.rtm_type = RTM_GET;
 	ndpc.rtm.rtm_flags = RTF_UP;
 	ndpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY;
 	if (V_deembed_scopeid)
 		sa6_recoverscope(&ndpc.sin6);
 
 	/* publish */
 	if (lle->la_flags & LLE_PUB)
 		ndpc.rtm.rtm_flags |= RTF_ANNOUNCE;
 
 	sdl = &ndpc.sdl;
 	sdl->sdl_family = AF_LINK;
 	sdl->sdl_len = sizeof(*sdl);
 	sdl->sdl_index = ifp->if_index;
 	sdl->sdl_type = ifp->if_type;
 	if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
 		sdl->sdl_alen = ifp->if_addrlen;
 		bcopy(lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
 	} else {
 		sdl->sdl_alen = 0;
 		bzero(LLADDR(sdl), ifp->if_addrlen);
 	}
 	if (lle->la_expire != 0)
 		ndpc.rtm.rtm_rmx.rmx_expire = lle->la_expire +
 		    lle->lle_remtime / hz + time_second - time_uptime;
 	ndpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
 	if (lle->la_flags & LLE_STATIC)
 		ndpc.rtm.rtm_flags |= RTF_STATIC;
 	if (lle->la_flags & LLE_IFADDR)
 		ndpc.rtm.rtm_flags |= RTF_PINNED;
 	if (lle->ln_router != 0)
 		ndpc.rtm.rtm_flags |= RTF_GATEWAY;
 	ndpc.rtm.rtm_rmx.rmx_pksent = lle->la_asked;
 	/* Store state in rmx_weight value */
 	ndpc.rtm.rtm_rmx.rmx_state = lle->ln_state;
 	ndpc.rtm.rtm_index = ifp->if_index;
 	error = SYSCTL_OUT(wr, &ndpc, sizeof(ndpc));
 
 	return (error);
 }
 
 static struct lltable *
 in6_lltattach(struct ifnet *ifp)
 {
 	struct lltable *llt;
 
 	llt = lltable_allocate_htbl(IN6_LLTBL_DEFAULT_HSIZE);
 	llt->llt_af = AF_INET6;
 	llt->llt_ifp = ifp;
 
 	llt->llt_lookup = in6_lltable_lookup;
 	llt->llt_alloc_entry = in6_lltable_alloc;
 	llt->llt_delete_entry = in6_lltable_delete_entry;
 	llt->llt_dump_entry = in6_lltable_dump_entry;
 	llt->llt_hash = in6_lltable_hash;
 	llt->llt_fill_sa_entry = in6_lltable_fill_sa_entry;
 	llt->llt_free_entry = in6_lltable_free_entry;
 	llt->llt_match_prefix = in6_lltable_match_prefix;
 	llt->llt_mark_used = in6_lltable_mark_used;
  	lltable_link(llt);
 
 	return (llt);
 }
 
 void *
 in6_domifattach(struct ifnet *ifp)
 {
 	struct in6_ifextra *ext;
 
 	/* There are not IPv6-capable interfaces. */
 	switch (ifp->if_type) {
 	case IFT_PFLOG:
 	case IFT_PFSYNC:
 	case IFT_USB:
 		return (NULL);
 	}
 	ext = (struct in6_ifextra *)malloc(sizeof(*ext), M_IFADDR, M_WAITOK);
 	bzero(ext, sizeof(*ext));
 
 	ext->in6_ifstat = malloc(sizeof(counter_u64_t) *
 	    sizeof(struct in6_ifstat) / sizeof(uint64_t), M_IFADDR, M_WAITOK);
 	COUNTER_ARRAY_ALLOC(ext->in6_ifstat,
 	    sizeof(struct in6_ifstat) / sizeof(uint64_t), M_WAITOK);
 
 	ext->icmp6_ifstat = malloc(sizeof(counter_u64_t) *
 	    sizeof(struct icmp6_ifstat) / sizeof(uint64_t), M_IFADDR,
 	    M_WAITOK);
 	COUNTER_ARRAY_ALLOC(ext->icmp6_ifstat,
 	    sizeof(struct icmp6_ifstat) / sizeof(uint64_t), M_WAITOK);
 
 	ext->nd_ifinfo = nd6_ifattach(ifp);
 	ext->scope6_id = scope6_ifattach(ifp);
 	ext->lltable = in6_lltattach(ifp);
 
 	ext->mld_ifinfo = mld_domifattach(ifp);
 
 	return ext;
 }
 
 int
 in6_domifmtu(struct ifnet *ifp)
 {
 	if (ifp->if_afdata[AF_INET6] == NULL)
 		return ifp->if_mtu;
 
 	return (IN6_LINKMTU(ifp));
 }
 
 void
 in6_domifdetach(struct ifnet *ifp, void *aux)
 {
 	struct in6_ifextra *ext = (struct in6_ifextra *)aux;
 
 	mld_domifdetach(ifp);
 	scope6_ifdetach(ext->scope6_id);
 	nd6_ifdetach(ifp, ext->nd_ifinfo);
 	lltable_free(ext->lltable);
 	COUNTER_ARRAY_FREE(ext->in6_ifstat,
 	    sizeof(struct in6_ifstat) / sizeof(uint64_t));
 	free(ext->in6_ifstat, M_IFADDR);
 	COUNTER_ARRAY_FREE(ext->icmp6_ifstat,
 	    sizeof(struct icmp6_ifstat) / sizeof(uint64_t));
 	free(ext->icmp6_ifstat, M_IFADDR);
 	free(ext, M_IFADDR);
 }
 
 /*
  * Convert sockaddr_in6 to sockaddr_in.  Original sockaddr_in6 must be
  * v4 mapped addr or v4 compat addr
  */
 void
 in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6)
 {
 
 	bzero(sin, sizeof(*sin));
 	sin->sin_len = sizeof(struct sockaddr_in);
 	sin->sin_family = AF_INET;
 	sin->sin_port = sin6->sin6_port;
 	sin->sin_addr.s_addr = sin6->sin6_addr.s6_addr32[3];
 }
 
 /* Convert sockaddr_in to sockaddr_in6 in v4 mapped addr format. */
 void
 in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6)
 {
 	bzero(sin6, sizeof(*sin6));
 	sin6->sin6_len = sizeof(struct sockaddr_in6);
 	sin6->sin6_family = AF_INET6;
 	sin6->sin6_port = sin->sin_port;
 	sin6->sin6_addr.s6_addr32[0] = 0;
 	sin6->sin6_addr.s6_addr32[1] = 0;
 	sin6->sin6_addr.s6_addr32[2] = IPV6_ADDR_INT32_SMP;
 	sin6->sin6_addr.s6_addr32[3] = sin->sin_addr.s_addr;
 }
 
 /* Convert sockaddr_in6 into sockaddr_in. */
 void
 in6_sin6_2_sin_in_sock(struct sockaddr *nam)
 {
 	struct sockaddr_in *sin_p;
 	struct sockaddr_in6 sin6;
 
 	/*
 	 * Save original sockaddr_in6 addr and convert it
 	 * to sockaddr_in.
 	 */
 	sin6 = *(struct sockaddr_in6 *)nam;
 	sin_p = (struct sockaddr_in *)nam;
 	in6_sin6_2_sin(sin_p, &sin6);
 }
 
 /* Convert sockaddr_in into sockaddr_in6 in v4 mapped addr format. */
 void
 in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam)
 {
 	struct sockaddr_in *sin_p;
 	struct sockaddr_in6 *sin6_p;
 
 	sin6_p = malloc(sizeof *sin6_p, M_SONAME, M_WAITOK);
 	sin_p = (struct sockaddr_in *)*nam;
 	in6_sin_2_v4mapsin6(sin_p, sin6_p);
 	free(*nam, M_SONAME);
 	*nam = (struct sockaddr *)sin6_p;
 }
Index: head/sys/netinet6/in6_fib.c
===================================================================
--- head/sys/netinet6/in6_fib.c	(revision 362899)
+++ head/sys/netinet6/in6_fib.c	(revision 362900)
@@ -1,441 +1,263 @@
 /*-
  * Copyright (c) 2015
  * 	Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_route.h"
 #include "opt_mpath.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/kernel.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/route/route_var.h>
 #include <net/route/nhop.h>
 #include <net/route/shared.h>
 #include <net/vnet.h>
 
 #ifdef RADIX_MPATH
 #include <net/radix_mpath.h>
 #endif
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_mroute.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/scope6_var.h>
 
 #include <net/if_types.h>
 
 #ifdef INET6
-static void fib6_rte_to_nh_extended(const struct nhop_object *nh,
-    const struct in6_addr *dst, uint32_t flags, struct nhop6_extended *pnh6);
-static void fib6_rte_to_nh_basic(const struct nhop_object *nh, const struct in6_addr *dst,
-    uint32_t flags, struct nhop6_basic *pnh6);
 
-#define	ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
-
 CHK_STRUCT_ROUTE_COMPAT(struct route_in6, ro_dst);
-
-
-
-static void
-fib6_rte_to_nh_basic(const struct nhop_object *nh, const struct in6_addr *dst,
-    uint32_t flags, struct nhop6_basic *pnh6)
-{
-
-	/* Do explicit nexthop zero unless we're copying it */
-	memset(pnh6, 0, sizeof(*pnh6));
-
-	if ((flags & NHR_IFAIF) != 0)
-		pnh6->nh_ifp = nh->nh_aifp;
-	else
-		pnh6->nh_ifp = nh->nh_ifp;
-
-	pnh6->nh_mtu = nh->nh_mtu;
-	if (nh->nh_flags & NHF_GATEWAY) {
-		/* Return address with embedded scope. */
-		pnh6->nh_addr = nh->gw6_sa.sin6_addr;
-	} else
-		pnh6->nh_addr = *dst;
-	/* Set flags */
-	pnh6->nh_flags = nh->nh_flags;
-}
-
-static void
-fib6_rte_to_nh_extended(const struct nhop_object *nh, const struct in6_addr *dst,
-    uint32_t flags, struct nhop6_extended *pnh6)
-{
-
-	/* Do explicit nexthop zero unless we're copying it */
-	memset(pnh6, 0, sizeof(*pnh6));
-
-	if ((flags & NHR_IFAIF) != 0)
-		pnh6->nh_ifp = nh->nh_aifp;
-	else
-		pnh6->nh_ifp = nh->nh_ifp;
-
-	pnh6->nh_mtu = nh->nh_mtu;
-	if (nh->nh_flags & NHF_GATEWAY) {
-		/* Return address with embedded scope. */
-		pnh6->nh_addr = nh->gw6_sa.sin6_addr;
-	} else
-		pnh6->nh_addr = *dst;
-	/* Set flags */
-	pnh6->nh_flags = nh->nh_flags;
-	pnh6->nh_ia = ifatoia6(nh->nh_ifa);
-}
-
-/*
- * Performs IPv6 route table lookup on @dst. Returns 0 on success.
- * Stores basic nexthop info into provided @pnh6 structure.
- * Note that
- * - nh_ifp represents logical transmit interface (rt_ifp) by default
- * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed
- * - mtu from logical transmit interface will be returned.
- * - nh_ifp cannot be safely dereferenced
- * - nh_ifp represents rt_ifp (e.g. if looking up address on
- *   interface "ix0" pointer to "ix0" interface will be returned instead
- *   of "lo0")
- * - howewer mtu from "transmit" interface will be returned.
- * - scope will be embedded in nh_addr
- */
-int
-fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scopeid,
-    uint32_t flags, uint32_t flowid, struct nhop6_basic *pnh6)
-{
-	RIB_RLOCK_TRACKER;
-	struct rib_head *rh;
-	struct radix_node *rn;
-	struct sockaddr_in6 sin6;
-	struct nhop_object *nh;
-
-	KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_basic: bad fibnum"));
-	rh = rt_tables_get_rnh(fibnum, AF_INET6);
-	if (rh == NULL)
-		return (ENOENT);
-
-	/* Prepare lookup key */
-	memset(&sin6, 0, sizeof(sin6));
-	sin6.sin6_addr = *dst;
-	sin6.sin6_len = sizeof(struct sockaddr_in6);
-	/* Assume scopeid is valid and embed it directly */
-	if (IN6_IS_SCOPE_LINKLOCAL(dst))
-		sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff);
-
-	RIB_RLOCK(rh);
-	rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
-	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
-		nh = RNTORT(rn)->rt_nhop;
-		/* Ensure route & ifp is UP */
-		if (RT_LINK_IS_UP(nh->nh_ifp)) {
-			fib6_rte_to_nh_basic(nh, &sin6.sin6_addr, flags, pnh6);
-			RIB_RUNLOCK(rh);
-			return (0);
-		}
-	}
-	RIB_RUNLOCK(rh);
-
-	return (ENOENT);
-}
-
-/*
- * Performs IPv6 route table lookup on @dst. Returns 0 on success.
- * Stores extended nexthop info into provided @pnh6 structure.
- * Note that
- * - nh_ifp cannot be safely dereferenced unless NHR_REF is specified.
- * - in that case you need to call fib6_free_nh_ext()
- * - nh_ifp represents logical transmit interface (rt_ifp) by default
- * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed
- * - mtu from logical transmit interface will be returned.
- * - scope will be embedded in nh_addr
- */
-int
-fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,uint32_t scopeid,
-    uint32_t flags, uint32_t flowid, struct nhop6_extended *pnh6)
-{
-	RIB_RLOCK_TRACKER;
-	struct rib_head *rh;
-	struct radix_node *rn;
-	struct sockaddr_in6 sin6;
-	struct rtentry *rte;
-	struct nhop_object *nh;
-
-	KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_ext: bad fibnum"));
-	rh = rt_tables_get_rnh(fibnum, AF_INET6);
-	if (rh == NULL)
-		return (ENOENT);
-
-	/* Prepare lookup key */
-	memset(&sin6, 0, sizeof(sin6));
-	sin6.sin6_len = sizeof(struct sockaddr_in6);
-	sin6.sin6_addr = *dst;
-	/* Assume scopeid is valid and embed it directly */
-	if (IN6_IS_SCOPE_LINKLOCAL(dst))
-		sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff);
-
-	RIB_RLOCK(rh);
-	rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
-	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
-		rte = RNTORT(rn);
-#ifdef RADIX_MPATH
-		rte = rt_mpath_select(rte, flowid);
-		if (rte == NULL) {
-			RIB_RUNLOCK(rh);
-			return (ENOENT);
-		}
-#endif
-		nh = rte->rt_nhop;
-		/* Ensure route & ifp is UP */
-		if (RT_LINK_IS_UP(nh->nh_ifp)) {
-			fib6_rte_to_nh_extended(nh, &sin6.sin6_addr, flags,
-			    pnh6);
-			if ((flags & NHR_REF) != 0) {
-				/* TODO: Do lwref on egress ifp's */
-			}
-			RIB_RUNLOCK(rh);
-
-			return (0);
-		}
-	}
-	RIB_RUNLOCK(rh);
-
-	return (ENOENT);
-}
-
-void
-fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6)
-{
-
-}
 
 /*
  * Looks up path in fib @fibnum specified by @dst.
  * Assumes scope is deembedded and provided in @scopeid.
  *
  * Returns path nexthop on success. Nexthop is safe to use
  *  within the current network epoch. If longer lifetime is required,
  *  one needs to pass NHR_REF as a flag. This will return referenced
  *  nexthop.
  */
 struct nhop_object *
 fib6_lookup(uint32_t fibnum, const struct in6_addr *dst6,
     uint32_t scopeid, uint32_t flags, uint32_t flowid)
 {
 	RIB_RLOCK_TRACKER;
 	struct rib_head *rh;
 	struct radix_node *rn;
 	struct rtentry *rt;
 	struct nhop_object *nh;
 	struct sockaddr_in6 sin6;
 
 	KASSERT((fibnum < rt_numfibs), ("fib6_lookup: bad fibnum"));
 	rh = rt_tables_get_rnh(fibnum, AF_INET6);
 	if (rh == NULL)
 		return (NULL);
 
 	/* TODO: radix changes */
 	//addr = *dst6;
 	/* Prepare lookup key */
 	memset(&sin6, 0, sizeof(sin6));
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_addr = *dst6;
 
 	/* Assume scopeid is valid and embed it directly */
 	if (IN6_IS_SCOPE_LINKLOCAL(dst6))
 		sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff);
 
 	RIB_RLOCK(rh);
 	rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
 	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
 		rt = RNTORT(rn);
 #ifdef RADIX_MPATH
 		if (rt_mpath_next(rt) != NULL)
 			rt = rt_mpath_selectrte(rt, flowid);
 #endif
 		nh = rt->rt_nhop;
 		/* Ensure route & ifp is UP */
 		if (RT_LINK_IS_UP(nh->nh_ifp)) {
 			if (flags & NHR_REF)
 				nhop_ref_object(nh);
 			RIB_RUNLOCK(rh);
 			return (nh);
 		}
 	}
 	RIB_RUNLOCK(rh);
 
 	RTSTAT_INC(rts_unreach);
 	return (NULL);
 }
 
 inline static int
 check_urpf(const struct nhop_object *nh, uint32_t flags,
     const struct ifnet *src_if)
 {
 
 	if (src_if != NULL && nh->nh_aifp == src_if) {
 		return (1);
 	}
 	if (src_if == NULL) {
 		if ((flags & NHR_NODEFAULT) == 0)
 			return (1);
 		else if ((nh->nh_flags & NHF_DEFAULT) == 0)
 			return (1);
 	}
 
 	return (0);
 }
 
 #ifdef RADIX_MPATH
 inline static int
 check_urpf_mpath(struct rtentry *rt, uint32_t flags,
     const struct ifnet *src_if)
 {
 	
 	while (rt != NULL) {
 		if (check_urpf(rt->rt_nhop, flags, src_if) != 0)
 			return (1);
 		rt = rt_mpath_next(rt);
 	}
 
 	return (0);
 }
 #endif
 
 /*
  * Performs reverse path forwarding lookup.
  * If @src_if is non-zero, verifies that at least 1 path goes via
  *   this interface.
  * If @src_if is zero, verifies that route exist.
  * if @flags contains NHR_NOTDEFAULT, do not consider default route.
  *
  * Returns 1 if route matching conditions is found, 0 otherwise.
  */
 int
 fib6_check_urpf(uint32_t fibnum, const struct in6_addr *dst6,
     uint32_t scopeid, uint32_t flags, const struct ifnet *src_if)
 {
 	RIB_RLOCK_TRACKER;
 	struct rib_head *rh;
 	struct radix_node *rn;
 	struct rtentry *rt;
 	struct sockaddr_in6 sin6;
 	int ret;
 
 	KASSERT((fibnum < rt_numfibs), ("fib6_check_urpf: bad fibnum"));
 	rh = rt_tables_get_rnh(fibnum, AF_INET6);
 	if (rh == NULL)
 		return (0);
 
 	/* TODO: radix changes */
 	/* Prepare lookup key */
 	memset(&sin6, 0, sizeof(sin6));
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_addr = *dst6;
 
 	/* Assume scopeid is valid and embed it directly */
 	if (IN6_IS_SCOPE_LINKLOCAL(dst6))
 		sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff);
 
 	RIB_RLOCK(rh);
 	rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
 	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
 		rt = RNTORT(rn);
 #ifdef	RADIX_MPATH
 		ret = check_urpf_mpath(rt, flags, src_if);
 #else
 		ret = check_urpf(rt->rt_nhop, flags, src_if);
 #endif
 		RIB_RUNLOCK(rh);
 		return (ret);
 	}
 	RIB_RUNLOCK(rh);
 
 	return (0);
 }
 
 struct nhop_object *
 fib6_lookup_debugnet(uint32_t fibnum, const struct in6_addr *dst6,
     uint32_t scopeid, uint32_t flags)
 {
 	struct rib_head *rh;
 	struct radix_node *rn;
 	struct rtentry *rt;
 	struct nhop_object *nh;
 	struct sockaddr_in6 sin6;
 
 	KASSERT((fibnum < rt_numfibs), ("fib6_lookup: bad fibnum"));
 	rh = rt_tables_get_rnh(fibnum, AF_INET6);
 	if (rh == NULL)
 		return (NULL);
 
 	/* TODO: radix changes */
 	//addr = *dst6;
 	/* Prepare lookup key */
 	memset(&sin6, 0, sizeof(sin6));
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_addr = *dst6;
 
 	/* Assume scopeid is valid and embed it directly */
 	if (IN6_IS_SCOPE_LINKLOCAL(dst6))
 		sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff);
 
 	rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
 	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
 		rt = RNTORT(rn);
 		nh = rt->rt_nhop;
 		/* Ensure route & ifp is UP */
 		if (RT_LINK_IS_UP(nh->nh_ifp)) {
 			if (flags & NHR_REF)
 				nhop_ref_object(nh);
 			return (nh);
 		}
 	}
 
 	return (NULL);
 }
 
 #endif
 
Index: head/sys/netinet6/in6_fib.h
===================================================================
--- head/sys/netinet6/in6_fib.h	(revision 362899)
+++ head/sys/netinet6/in6_fib.h	(revision 362900)
@@ -1,70 +1,43 @@
 /*-
  * Copyright (c) 2015
  * 	Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _NETINET6_IN6_FIB_H_
 #define	_NETINET6_IN6_FIB_H_
 
-/* Basic nexthop info used for uRPF/mtu checks */
-struct nhop6_basic {
-	struct ifnet	*nh_ifp;	/* Logical egress interface */
-	uint16_t	nh_mtu;		/* nexthop mtu */
-	uint16_t	nh_flags;	/* nhop flags */
-	uint8_t		spare[4];
-	struct in6_addr	nh_addr;	/* GW/DST IPv4 address */
-};
-
-/* Extended nexthop info used for control protocols. */
-struct nhop6_extended {
-	struct ifnet	*nh_ifp;	/* Logical egress interface */
-	struct in6_ifaddr *nh_ia;	/* Associated address. */
-	uint16_t	nh_mtu;		/* nexthop mtu */
-	uint16_t	nh_flags;	/* nhop flags */
-	uint8_t		spare[4];
-	struct in6_addr	nh_addr;	/* GW/DST IPv6 address */
-	uint64_t	spare2[1];
-};
-
-int fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst,
-    uint32_t scopeid, uint32_t flags, uint32_t flowid,struct nhop6_basic *pnh6);
-int fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,
-    uint32_t scopeid, uint32_t flags, uint32_t flowid,
-    struct nhop6_extended *pnh6);
-void fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6);
-
 struct nhop_object *fib6_lookup(uint32_t fibnum,
     const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags,
     uint32_t flowid);
 int fib6_check_urpf(uint32_t fibnum, const struct in6_addr *dst6,
     uint32_t scopeid, uint32_t flags, const struct ifnet *src_if);
 struct nhop_object *fib6_lookup_debugnet(uint32_t fibnum,
     const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags);
 #endif
 
Index: head/sys/netinet6/in6_mcast.c
===================================================================
--- head/sys/netinet6/in6_mcast.c	(revision 362899)
+++ head/sys/netinet6/in6_mcast.c	(revision 362900)
@@ -1,2906 +1,2906 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2009 Bruce Simpson.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * IPv6 multicast socket, group, and socket option processing module.
  * Normative references: RFC 2292, RFC 3492, RFC 3542, RFC 3678, RFC 3810.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/priv.h>
 #include <sys/taskqueue.h>
 #include <sys/tree.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
+#include <net/route/nhop.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/udp.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/udp_var.h>
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/mld6_var.h>
 #include <netinet6/scope6_var.h>
 
 #ifndef KTR_MLD
 #define KTR_MLD KTR_INET6
 #endif
 
 #ifndef __SOCKUNION_DECLARED
 union sockunion {
 	struct sockaddr_storage	ss;
 	struct sockaddr		sa;
 	struct sockaddr_dl	sdl;
 	struct sockaddr_in6	sin6;
 };
 typedef union sockunion sockunion_t;
 #define __SOCKUNION_DECLARED
 #endif /* __SOCKUNION_DECLARED */
 
 static MALLOC_DEFINE(M_IN6MFILTER, "in6_mfilter",
     "IPv6 multicast PCB-layer source filter");
 MALLOC_DEFINE(M_IP6MADDR, "in6_multi", "IPv6 multicast group");
 static MALLOC_DEFINE(M_IP6MOPTS, "ip6_moptions", "IPv6 multicast options");
 static MALLOC_DEFINE(M_IP6MSOURCE, "ip6_msource",
     "IPv6 multicast MLD-layer source filter");
 
 RB_GENERATE(ip6_msource_tree, ip6_msource, im6s_link, ip6_msource_cmp);
 
 /*
  * Locking:
  * - Lock order is: Giant, IN6_MULTI_LOCK, INP_WLOCK,
  *   IN6_MULTI_LIST_LOCK, MLD_LOCK, IF_ADDR_LOCK.
  * - The IF_ADDR_LOCK is implicitly taken by in6m_lookup() earlier, however
  *   it can be taken by code in net/if.c also.
  * - ip6_moptions and in6_mfilter are covered by the INP_WLOCK.
  *
  * struct in6_multi is covered by IN6_MULTI_LOCK. There isn't strictly
  * any need for in6_multi itself to be virtualized -- it is bound to an ifp
  * anyway no matter what happens.
  */
 struct mtx in6_multi_list_mtx;
 MTX_SYSINIT(in6_multi_mtx, &in6_multi_list_mtx, "in6_multi_list_mtx", MTX_DEF);
 
 struct mtx in6_multi_free_mtx;
 MTX_SYSINIT(in6_multi_free_mtx, &in6_multi_free_mtx, "in6_multi_free_mtx", MTX_DEF);
 
 struct sx in6_multi_sx;
 SX_SYSINIT(in6_multi_sx, &in6_multi_sx, "in6_multi_sx");
 
 static void	im6f_commit(struct in6_mfilter *);
 static int	im6f_get_source(struct in6_mfilter *imf,
 		    const struct sockaddr_in6 *psin,
 		    struct in6_msource **);
 static struct in6_msource *
 		im6f_graft(struct in6_mfilter *, const uint8_t,
 		    const struct sockaddr_in6 *);
 static void	im6f_leave(struct in6_mfilter *);
 static int	im6f_prune(struct in6_mfilter *, const struct sockaddr_in6 *);
 static void	im6f_purge(struct in6_mfilter *);
 static void	im6f_rollback(struct in6_mfilter *);
 static void	im6f_reap(struct in6_mfilter *);
 static struct in6_mfilter *
 		im6o_match_group(const struct ip6_moptions *,
 		    const struct ifnet *, const struct sockaddr *);
 static struct in6_msource *
 		im6o_match_source(struct in6_mfilter *, const struct sockaddr *);
 static void	im6s_merge(struct ip6_msource *ims,
 		    const struct in6_msource *lims, const int rollback);
 static int	in6_getmulti(struct ifnet *, const struct in6_addr *,
 		    struct in6_multi **);
 static int	in6_joingroup_locked(struct ifnet *, const struct in6_addr *,
 		    struct in6_mfilter *, struct in6_multi **, int);
 static int	in6m_get_source(struct in6_multi *inm,
 		    const struct in6_addr *addr, const int noalloc,
 		    struct ip6_msource **pims);
 #ifdef KTR
 static int	in6m_is_ifp_detached(const struct in6_multi *);
 #endif
 static int	in6m_merge(struct in6_multi *, /*const*/ struct in6_mfilter *);
 static void	in6m_purge(struct in6_multi *);
 static void	in6m_reap(struct in6_multi *);
 static struct ip6_moptions *
 		in6p_findmoptions(struct inpcb *);
 static int	in6p_get_source_filters(struct inpcb *, struct sockopt *);
 static int	in6p_join_group(struct inpcb *, struct sockopt *);
 static int	in6p_leave_group(struct inpcb *, struct sockopt *);
 static struct ifnet *
 		in6p_lookup_mcast_ifp(const struct inpcb *,
 		    const struct sockaddr_in6 *);
 static int	in6p_block_unblock_source(struct inpcb *, struct sockopt *);
 static int	in6p_set_multicast_if(struct inpcb *, struct sockopt *);
 static int	in6p_set_source_filters(struct inpcb *, struct sockopt *);
 static int	sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS);
 
 SYSCTL_DECL(_net_inet6_ip6);	/* XXX Not in any common header. */
 
 static SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, mcast,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "IPv6 multicast");
 
 static u_long in6_mcast_maxgrpsrc = IPV6_MAX_GROUP_SRC_FILTER;
 SYSCTL_ULONG(_net_inet6_ip6_mcast, OID_AUTO, maxgrpsrc,
     CTLFLAG_RWTUN, &in6_mcast_maxgrpsrc, 0,
     "Max source filters per group");
 
 static u_long in6_mcast_maxsocksrc = IPV6_MAX_SOCK_SRC_FILTER;
 SYSCTL_ULONG(_net_inet6_ip6_mcast, OID_AUTO, maxsocksrc,
     CTLFLAG_RWTUN, &in6_mcast_maxsocksrc, 0,
     "Max source filters per socket");
 
 /* TODO Virtualize this switch. */
 int in6_mcast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
 SYSCTL_INT(_net_inet6_ip6_mcast, OID_AUTO, loop, CTLFLAG_RWTUN,
     &in6_mcast_loop, 0, "Loopback multicast datagrams by default");
 
 static SYSCTL_NODE(_net_inet6_ip6_mcast, OID_AUTO, filters,
     CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip6_mcast_filters,
     "Per-interface stack-wide source filters");
 
 #ifdef KTR
 /*
  * Inline function which wraps assertions for a valid ifp.
  * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
  * is detached.
  */
 static int __inline
 in6m_is_ifp_detached(const struct in6_multi *inm)
 {
 	struct ifnet *ifp;
 
 	KASSERT(inm->in6m_ifma != NULL, ("%s: no ifma", __func__));
 	ifp = inm->in6m_ifma->ifma_ifp;
 	if (ifp != NULL) {
 		/*
 		 * Sanity check that network-layer notion of ifp is the
 		 * same as that of link-layer.
 		 */
 		KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__));
 	}
 
 	return (ifp == NULL);
 }
 #endif
 
 /*
  * Initialize an in6_mfilter structure to a known state at t0, t1
  * with an empty source filter list.
  */
 static __inline void
 im6f_init(struct in6_mfilter *imf, const int st0, const int st1)
 {
 	memset(imf, 0, sizeof(struct in6_mfilter));
 	RB_INIT(&imf->im6f_sources);
 	imf->im6f_st[0] = st0;
 	imf->im6f_st[1] = st1;
 }
 
 struct in6_mfilter *
 ip6_mfilter_alloc(const int mflags, const int st0, const int st1)
 {
 	struct in6_mfilter *imf;
 
 	imf = malloc(sizeof(*imf), M_IN6MFILTER, mflags);
 
 	if (imf != NULL)
 		im6f_init(imf, st0, st1);
 
 	return (imf);
 }
 
 void
 ip6_mfilter_free(struct in6_mfilter *imf)
 {
 
 	im6f_purge(imf);
 	free(imf, M_IN6MFILTER);
 }
 
 /*
  * Find an IPv6 multicast group entry for this ip6_moptions instance
  * which matches the specified group, and optionally an interface.
  * Return its index into the array, or -1 if not found.
  */
 static struct in6_mfilter *
 im6o_match_group(const struct ip6_moptions *imo, const struct ifnet *ifp,
     const struct sockaddr *group)
 {
 	const struct sockaddr_in6 *gsin6;
         struct in6_mfilter *imf;
         struct in6_multi *inm;
 
         gsin6 = (const struct sockaddr_in6 *)group;
 
 	IP6_MFILTER_FOREACH(imf, &imo->im6o_head) {
 		inm = imf->im6f_in6m;
 		if (inm == NULL)
 			continue;
 		if ((ifp == NULL || (inm->in6m_ifp == ifp)) &&
 		    IN6_ARE_ADDR_EQUAL(&inm->in6m_addr,
 		    &gsin6->sin6_addr)) {
 			break;
 		}
 	}
 	return (imf);
 }
 
 /*
  * Find an IPv6 multicast source entry for this imo which matches
  * the given group index for this socket, and source address.
  *
  * XXX TODO: The scope ID, if present in src, is stripped before
  * any comparison. We SHOULD enforce scope/zone checks where the source
  * filter entry has a link scope.
  *
  * NOTE: This does not check if the entry is in-mode, merely if
  * it exists, which may not be the desired behaviour.
  */
 static struct in6_msource *
 im6o_match_source(struct in6_mfilter *imf, const struct sockaddr *src)
 {
 	struct ip6_msource	 find;
 	struct ip6_msource	*ims;
 	const sockunion_t	*psa;
 
 	KASSERT(src->sa_family == AF_INET6, ("%s: !AF_INET6", __func__));
 
 	psa = (const sockunion_t *)src;
 	find.im6s_addr = psa->sin6.sin6_addr;
 	in6_clearscope(&find.im6s_addr);		/* XXX */
 	ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find);
 
 	return ((struct in6_msource *)ims);
 }
 
 /*
  * Perform filtering for multicast datagrams on a socket by group and source.
  *
  * Returns 0 if a datagram should be allowed through, or various error codes
  * if the socket was not a member of the group, or the source was muted, etc.
  */
 int
 im6o_mc_filter(const struct ip6_moptions *imo, const struct ifnet *ifp,
     const struct sockaddr *group, const struct sockaddr *src)
 {
 	struct in6_mfilter *imf;
 	struct in6_msource *ims;
 	int mode;
 
 	KASSERT(ifp != NULL, ("%s: null ifp", __func__));
 
 	imf = im6o_match_group(imo, ifp, group);
 	if (imf == NULL)
 		return (MCAST_NOTGMEMBER);
 
 	/*
 	 * Check if the source was included in an (S,G) join.
 	 * Allow reception on exclusive memberships by default,
 	 * reject reception on inclusive memberships by default.
 	 * Exclude source only if an in-mode exclude filter exists.
 	 * Include source only if an in-mode include filter exists.
 	 * NOTE: We are comparing group state here at MLD t1 (now)
 	 * with socket-layer t0 (since last downcall).
 	 */
 	mode = imf->im6f_st[1];
 	ims = im6o_match_source(imf, src);
 
 	if ((ims == NULL && mode == MCAST_INCLUDE) ||
 	    (ims != NULL && ims->im6sl_st[0] != mode))
 		return (MCAST_NOTSMEMBER);
 
 	return (MCAST_PASS);
 }
 
 /*
  * Find and return a reference to an in6_multi record for (ifp, group),
  * and bump its reference count.
  * If one does not exist, try to allocate it, and update link-layer multicast
  * filters on ifp to listen for group.
  * Assumes the IN6_MULTI lock is held across the call.
  * Return 0 if successful, otherwise return an appropriate error code.
  */
 static int
 in6_getmulti(struct ifnet *ifp, const struct in6_addr *group,
     struct in6_multi **pinm)
 {
 	struct epoch_tracker	 et;
 	struct sockaddr_in6	 gsin6;
 	struct ifmultiaddr	*ifma;
 	struct in6_multi	*inm;
 	int			 error;
 
 	error = 0;
 
 	/*
 	 * XXX: Accesses to ifma_protospec must be covered by IF_ADDR_LOCK;
 	 * if_addmulti() takes this mutex itself, so we must drop and
 	 * re-acquire around the call.
 	 */
 	IN6_MULTI_LOCK_ASSERT();
 	IN6_MULTI_LIST_LOCK();
 	IF_ADDR_WLOCK(ifp);
 	NET_EPOCH_ENTER(et);
 	inm = in6m_lookup_locked(ifp, group);
 	NET_EPOCH_EXIT(et);
 
 	if (inm != NULL) {
 		/*
 		 * If we already joined this group, just bump the
 		 * refcount and return it.
 		 */
 		KASSERT(inm->in6m_refcount >= 1,
 		    ("%s: bad refcount %d", __func__, inm->in6m_refcount));
 		in6m_acquire_locked(inm);
 		*pinm = inm;
 		goto out_locked;
 	}
 
 	memset(&gsin6, 0, sizeof(gsin6));
 	gsin6.sin6_family = AF_INET6;
 	gsin6.sin6_len = sizeof(struct sockaddr_in6);
 	gsin6.sin6_addr = *group;
 
 	/*
 	 * Check if a link-layer group is already associated
 	 * with this network-layer group on the given ifnet.
 	 */
 	IN6_MULTI_LIST_UNLOCK();
 	IF_ADDR_WUNLOCK(ifp);
 	error = if_addmulti(ifp, (struct sockaddr *)&gsin6, &ifma);
 	if (error != 0)
 		return (error);
 	IN6_MULTI_LIST_LOCK();
 	IF_ADDR_WLOCK(ifp);
 
 	/*
 	 * If something other than netinet6 is occupying the link-layer
 	 * group, print a meaningful error message and back out of
 	 * the allocation.
 	 * Otherwise, bump the refcount on the existing network-layer
 	 * group association and return it.
 	 */
 	if (ifma->ifma_protospec != NULL) {
 		inm = (struct in6_multi *)ifma->ifma_protospec;
 #ifdef INVARIANTS
 		KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr",
 		    __func__));
 		KASSERT(ifma->ifma_addr->sa_family == AF_INET6,
 		    ("%s: ifma not AF_INET6", __func__));
 		KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__));
 		if (inm->in6m_ifma != ifma || inm->in6m_ifp != ifp ||
 		    !IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, group))
 			panic("%s: ifma %p is inconsistent with %p (%p)",
 			    __func__, ifma, inm, group);
 #endif
 		in6m_acquire_locked(inm);
 		*pinm = inm;
 		goto out_locked;
 	}
 
 	IF_ADDR_WLOCK_ASSERT(ifp);
 
 	/*
 	 * A new in6_multi record is needed; allocate and initialize it.
 	 * We DO NOT perform an MLD join as the in6_ layer may need to
 	 * push an initial source list down to MLD to support SSM.
 	 *
 	 * The initial source filter state is INCLUDE, {} as per the RFC.
 	 * Pending state-changes per group are subject to a bounds check.
 	 */
 	inm = malloc(sizeof(*inm), M_IP6MADDR, M_NOWAIT | M_ZERO);
 	if (inm == NULL) {
 		IN6_MULTI_LIST_UNLOCK();
 		IF_ADDR_WUNLOCK(ifp);
 		if_delmulti_ifma(ifma);
 		return (ENOMEM);
 	}
 	inm->in6m_addr = *group;
 	inm->in6m_ifp = ifp;
 	inm->in6m_mli = MLD_IFINFO(ifp);
 	inm->in6m_ifma = ifma;
 	inm->in6m_refcount = 1;
 	inm->in6m_state = MLD_NOT_MEMBER;
 	mbufq_init(&inm->in6m_scq, MLD_MAX_STATE_CHANGES);
 
 	inm->in6m_st[0].iss_fmode = MCAST_UNDEFINED;
 	inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
 	RB_INIT(&inm->in6m_srcs);
 
 	ifma->ifma_protospec = inm;
 	*pinm = inm;
 
  out_locked:
 	IN6_MULTI_LIST_UNLOCK();
 	IF_ADDR_WUNLOCK(ifp);
 	return (error);
 }
 
 /*
  * Drop a reference to an in6_multi record.
  *
  * If the refcount drops to 0, free the in6_multi record and
  * delete the underlying link-layer membership.
  */
 static void
 in6m_release(struct in6_multi *inm)
 {
 	struct ifmultiaddr *ifma;
 	struct ifnet *ifp;
 
 	CTR2(KTR_MLD, "%s: refcount is %d", __func__, inm->in6m_refcount);
 
 	MPASS(inm->in6m_refcount == 0);
 	CTR2(KTR_MLD, "%s: freeing inm %p", __func__, inm);
 
 	ifma = inm->in6m_ifma;
 	ifp = inm->in6m_ifp;
 	MPASS(ifma->ifma_llifma == NULL);
 
 	/* XXX this access is not covered by IF_ADDR_LOCK */
 	CTR2(KTR_MLD, "%s: purging ifma %p", __func__, ifma);
 	KASSERT(ifma->ifma_protospec == NULL,
 	    ("%s: ifma_protospec != NULL", __func__));
 	if (ifp == NULL)
 		ifp = ifma->ifma_ifp;
 
 	if (ifp != NULL) {
 		CURVNET_SET(ifp->if_vnet);
 		in6m_purge(inm);
 		free(inm, M_IP6MADDR);
 		if_delmulti_ifma_flags(ifma, 1);
 		CURVNET_RESTORE();
 		if_rele(ifp);
 	} else {
 		in6m_purge(inm);
 		free(inm, M_IP6MADDR);
 		if_delmulti_ifma_flags(ifma, 1);
 	}
 }
 
 /*
  * Interface detach can happen in a taskqueue thread context, so we must use a
  * dedicated thread to avoid deadlocks when draining in6m_release tasks.
  */
 TASKQUEUE_DEFINE_THREAD(in6m_free);
 static struct task in6m_free_task;
 static struct in6_multi_head in6m_free_list = SLIST_HEAD_INITIALIZER();
 static void in6m_release_task(void *arg __unused, int pending __unused);
 
 static void
 in6m_init(void)
 {
 	TASK_INIT(&in6m_free_task, 0, in6m_release_task, NULL);
 }
 SYSINIT(in6m_init, SI_SUB_TASKQ, SI_ORDER_ANY, in6m_init, NULL);
 
 void
 in6m_release_list_deferred(struct in6_multi_head *inmh)
 {
 	if (SLIST_EMPTY(inmh))
 		return;
 	mtx_lock(&in6_multi_free_mtx);
 	SLIST_CONCAT(&in6m_free_list, inmh, in6_multi, in6m_nrele);
 	mtx_unlock(&in6_multi_free_mtx);
 	taskqueue_enqueue(taskqueue_in6m_free, &in6m_free_task);
 }
 
 void
 in6m_release_wait(void)
 {
 	taskqueue_drain_all(taskqueue_in6m_free);
 }
 
 void
 in6m_disconnect_locked(struct in6_multi_head *inmh, struct in6_multi *inm)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct in6_ifaddr *ifa6;
 	struct in6_multi_mship *imm, *imm_tmp;
 	struct ifmultiaddr *ifma, *ll_ifma;
 
 	IN6_MULTI_LIST_LOCK_ASSERT();
 
 	ifp = inm->in6m_ifp;
 	if (ifp == NULL)
 		return;		/* already called */
 
 	inm->in6m_ifp = NULL;
 	IF_ADDR_WLOCK_ASSERT(ifp);
 	ifma = inm->in6m_ifma;
 	if (ifma == NULL)
 		return;
 
 	if_ref(ifp);
 	if (ifma->ifma_flags & IFMA_F_ENQUEUED) {
 		CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
 		ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
 	}
 	MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname);
 	if ((ll_ifma = ifma->ifma_llifma) != NULL) {
 		MPASS(ifma != ll_ifma);
 		ifma->ifma_llifma = NULL;
 		MPASS(ll_ifma->ifma_llifma == NULL);
 		MPASS(ll_ifma->ifma_ifp == ifp);
 		if (--ll_ifma->ifma_refcount == 0) {
 			if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) {
 				CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link);
 				ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
 			}
 			MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname);
 			if_freemulti(ll_ifma);
 		}
 	}
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		ifa6 = (void *)ifa;
 		LIST_FOREACH_SAFE(imm, &ifa6->ia6_memberships,
 		    i6mm_chain, imm_tmp) {
 			if (inm == imm->i6mm_maddr) {
 				LIST_REMOVE(imm, i6mm_chain);
 				free(imm, M_IP6MADDR);
 				in6m_rele_locked(inmh, inm);
 			}
 		}
 	}
 }
 
 static void
 in6m_release_task(void *arg __unused, int pending __unused)
 {
 	struct in6_multi_head in6m_free_tmp;
 	struct in6_multi *inm, *tinm;
 
 	SLIST_INIT(&in6m_free_tmp);
 	mtx_lock(&in6_multi_free_mtx);
 	SLIST_CONCAT(&in6m_free_tmp, &in6m_free_list, in6_multi, in6m_nrele);
 	mtx_unlock(&in6_multi_free_mtx);
 	IN6_MULTI_LOCK();
 	SLIST_FOREACH_SAFE(inm, &in6m_free_tmp, in6m_nrele, tinm) {
 		SLIST_REMOVE_HEAD(&in6m_free_tmp, in6m_nrele);
 		in6m_release(inm);
 	}
 	IN6_MULTI_UNLOCK();
 }
 
 /*
  * Clear recorded source entries for a group.
  * Used by the MLD code. Caller must hold the IN6_MULTI lock.
  * FIXME: Should reap.
  */
 void
 in6m_clear_recorded(struct in6_multi *inm)
 {
 	struct ip6_msource	*ims;
 
 	IN6_MULTI_LIST_LOCK_ASSERT();
 
 	RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) {
 		if (ims->im6s_stp) {
 			ims->im6s_stp = 0;
 			--inm->in6m_st[1].iss_rec;
 		}
 	}
 	KASSERT(inm->in6m_st[1].iss_rec == 0,
 	    ("%s: iss_rec %d not 0", __func__, inm->in6m_st[1].iss_rec));
 }
 
 /*
  * Record a source as pending for a Source-Group MLDv2 query.
  * This lives here as it modifies the shared tree.
  *
  * inm is the group descriptor.
  * naddr is the address of the source to record in network-byte order.
  *
  * If the net.inet6.mld.sgalloc sysctl is non-zero, we will
  * lazy-allocate a source node in response to an SG query.
  * Otherwise, no allocation is performed. This saves some memory
  * with the trade-off that the source will not be reported to the
  * router if joined in the window between the query response and
  * the group actually being joined on the local host.
  *
  * VIMAGE: XXX: Currently the mld_sgalloc feature has been removed.
  * This turns off the allocation of a recorded source entry if
  * the group has not been joined.
  *
  * Return 0 if the source didn't exist or was already marked as recorded.
  * Return 1 if the source was marked as recorded by this function.
  * Return <0 if any error occurred (negated errno code).
  */
 int
 in6m_record_source(struct in6_multi *inm, const struct in6_addr *addr)
 {
 	struct ip6_msource	 find;
 	struct ip6_msource	*ims, *nims;
 
 	IN6_MULTI_LIST_LOCK_ASSERT();
 
 	find.im6s_addr = *addr;
 	ims = RB_FIND(ip6_msource_tree, &inm->in6m_srcs, &find);
 	if (ims && ims->im6s_stp)
 		return (0);
 	if (ims == NULL) {
 		if (inm->in6m_nsrc == in6_mcast_maxgrpsrc)
 			return (-ENOSPC);
 		nims = malloc(sizeof(struct ip6_msource), M_IP6MSOURCE,
 		    M_NOWAIT | M_ZERO);
 		if (nims == NULL)
 			return (-ENOMEM);
 		nims->im6s_addr = find.im6s_addr;
 		RB_INSERT(ip6_msource_tree, &inm->in6m_srcs, nims);
 		++inm->in6m_nsrc;
 		ims = nims;
 	}
 
 	/*
 	 * Mark the source as recorded and update the recorded
 	 * source count.
 	 */
 	++ims->im6s_stp;
 	++inm->in6m_st[1].iss_rec;
 
 	return (1);
 }
 
 /*
  * Return a pointer to an in6_msource owned by an in6_mfilter,
  * given its source address.
  * Lazy-allocate if needed. If this is a new entry its filter state is
  * undefined at t0.
  *
  * imf is the filter set being modified.
  * addr is the source address.
  *
  * SMPng: May be called with locks held; malloc must not block.
  */
 static int
 im6f_get_source(struct in6_mfilter *imf, const struct sockaddr_in6 *psin,
     struct in6_msource **plims)
 {
 	struct ip6_msource	 find;
 	struct ip6_msource	*ims, *nims;
 	struct in6_msource	*lims;
 	int			 error;
 
 	error = 0;
 	ims = NULL;
 	lims = NULL;
 
 	find.im6s_addr = psin->sin6_addr;
 	ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find);
 	lims = (struct in6_msource *)ims;
 	if (lims == NULL) {
 		if (imf->im6f_nsrc == in6_mcast_maxsocksrc)
 			return (ENOSPC);
 		nims = malloc(sizeof(struct in6_msource), M_IN6MFILTER,
 		    M_NOWAIT | M_ZERO);
 		if (nims == NULL)
 			return (ENOMEM);
 		lims = (struct in6_msource *)nims;
 		lims->im6s_addr = find.im6s_addr;
 		lims->im6sl_st[0] = MCAST_UNDEFINED;
 		RB_INSERT(ip6_msource_tree, &imf->im6f_sources, nims);
 		++imf->im6f_nsrc;
 	}
 
 	*plims = lims;
 
 	return (error);
 }
 
 /*
  * Graft a source entry into an existing socket-layer filter set,
  * maintaining any required invariants and checking allocations.
  *
  * The source is marked as being in the new filter mode at t1.
  *
  * Return the pointer to the new node, otherwise return NULL.
  */
 static struct in6_msource *
 im6f_graft(struct in6_mfilter *imf, const uint8_t st1,
     const struct sockaddr_in6 *psin)
 {
 	struct ip6_msource	*nims;
 	struct in6_msource	*lims;
 
 	nims = malloc(sizeof(struct in6_msource), M_IN6MFILTER,
 	    M_NOWAIT | M_ZERO);
 	if (nims == NULL)
 		return (NULL);
 	lims = (struct in6_msource *)nims;
 	lims->im6s_addr = psin->sin6_addr;
 	lims->im6sl_st[0] = MCAST_UNDEFINED;
 	lims->im6sl_st[1] = st1;
 	RB_INSERT(ip6_msource_tree, &imf->im6f_sources, nims);
 	++imf->im6f_nsrc;
 
 	return (lims);
 }
 
 /*
  * Prune a source entry from an existing socket-layer filter set,
  * maintaining any required invariants and checking allocations.
  *
  * The source is marked as being left at t1, it is not freed.
  *
  * Return 0 if no error occurred, otherwise return an errno value.
  */
 static int
 im6f_prune(struct in6_mfilter *imf, const struct sockaddr_in6 *psin)
 {
 	struct ip6_msource	 find;
 	struct ip6_msource	*ims;
 	struct in6_msource	*lims;
 
 	find.im6s_addr = psin->sin6_addr;
 	ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find);
 	if (ims == NULL)
 		return (ENOENT);
 	lims = (struct in6_msource *)ims;
 	lims->im6sl_st[1] = MCAST_UNDEFINED;
 	return (0);
 }
 
 /*
  * Revert socket-layer filter set deltas at t1 to t0 state.
  */
 static void
 im6f_rollback(struct in6_mfilter *imf)
 {
 	struct ip6_msource	*ims, *tims;
 	struct in6_msource	*lims;
 
 	RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) {
 		lims = (struct in6_msource *)ims;
 		if (lims->im6sl_st[0] == lims->im6sl_st[1]) {
 			/* no change at t1 */
 			continue;
 		} else if (lims->im6sl_st[0] != MCAST_UNDEFINED) {
 			/* revert change to existing source at t1 */
 			lims->im6sl_st[1] = lims->im6sl_st[0];
 		} else {
 			/* revert source added t1 */
 			CTR2(KTR_MLD, "%s: free ims %p", __func__, ims);
 			RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims);
 			free(ims, M_IN6MFILTER);
 			imf->im6f_nsrc--;
 		}
 	}
 	imf->im6f_st[1] = imf->im6f_st[0];
 }
 
 /*
  * Mark socket-layer filter set as INCLUDE {} at t1.
  */
 static void
 im6f_leave(struct in6_mfilter *imf)
 {
 	struct ip6_msource	*ims;
 	struct in6_msource	*lims;
 
 	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
 		lims = (struct in6_msource *)ims;
 		lims->im6sl_st[1] = MCAST_UNDEFINED;
 	}
 	imf->im6f_st[1] = MCAST_INCLUDE;
 }
 
 /*
  * Mark socket-layer filter set deltas as committed.
  */
 static void
 im6f_commit(struct in6_mfilter *imf)
 {
 	struct ip6_msource	*ims;
 	struct in6_msource	*lims;
 
 	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
 		lims = (struct in6_msource *)ims;
 		lims->im6sl_st[0] = lims->im6sl_st[1];
 	}
 	imf->im6f_st[0] = imf->im6f_st[1];
 }
 
 /*
  * Reap unreferenced sources from socket-layer filter set.
  */
 static void
 im6f_reap(struct in6_mfilter *imf)
 {
 	struct ip6_msource	*ims, *tims;
 	struct in6_msource	*lims;
 
 	RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) {
 		lims = (struct in6_msource *)ims;
 		if ((lims->im6sl_st[0] == MCAST_UNDEFINED) &&
 		    (lims->im6sl_st[1] == MCAST_UNDEFINED)) {
 			CTR2(KTR_MLD, "%s: free lims %p", __func__, ims);
 			RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims);
 			free(ims, M_IN6MFILTER);
 			imf->im6f_nsrc--;
 		}
 	}
 }
 
 /*
  * Purge socket-layer filter set.
  */
 static void
 im6f_purge(struct in6_mfilter *imf)
 {
 	struct ip6_msource	*ims, *tims;
 
 	RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) {
 		CTR2(KTR_MLD, "%s: free ims %p", __func__, ims);
 		RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims);
 		free(ims, M_IN6MFILTER);
 		imf->im6f_nsrc--;
 	}
 	imf->im6f_st[0] = imf->im6f_st[1] = MCAST_UNDEFINED;
 	KASSERT(RB_EMPTY(&imf->im6f_sources),
 	    ("%s: im6f_sources not empty", __func__));
 }
 
 /*
  * Look up a source filter entry for a multicast group.
  *
  * inm is the group descriptor to work with.
  * addr is the IPv6 address to look up.
  * noalloc may be non-zero to suppress allocation of sources.
  * *pims will be set to the address of the retrieved or allocated source.
  *
  * SMPng: NOTE: may be called with locks held.
  * Return 0 if successful, otherwise return a non-zero error code.
  */
 static int
 in6m_get_source(struct in6_multi *inm, const struct in6_addr *addr,
     const int noalloc, struct ip6_msource **pims)
 {
 	struct ip6_msource	 find;
 	struct ip6_msource	*ims, *nims;
 #ifdef KTR
 	char			 ip6tbuf[INET6_ADDRSTRLEN];
 #endif
 
 	find.im6s_addr = *addr;
 	ims = RB_FIND(ip6_msource_tree, &inm->in6m_srcs, &find);
 	if (ims == NULL && !noalloc) {
 		if (inm->in6m_nsrc == in6_mcast_maxgrpsrc)
 			return (ENOSPC);
 		nims = malloc(sizeof(struct ip6_msource), M_IP6MSOURCE,
 		    M_NOWAIT | M_ZERO);
 		if (nims == NULL)
 			return (ENOMEM);
 		nims->im6s_addr = *addr;
 		RB_INSERT(ip6_msource_tree, &inm->in6m_srcs, nims);
 		++inm->in6m_nsrc;
 		ims = nims;
 		CTR3(KTR_MLD, "%s: allocated %s as %p", __func__,
 		    ip6_sprintf(ip6tbuf, addr), ims);
 	}
 
 	*pims = ims;
 	return (0);
 }
 
 /*
  * Merge socket-layer source into MLD-layer source.
  * If rollback is non-zero, perform the inverse of the merge.
  */
 static void
 im6s_merge(struct ip6_msource *ims, const struct in6_msource *lims,
     const int rollback)
 {
 	int n = rollback ? -1 : 1;
 #ifdef KTR
 	char ip6tbuf[INET6_ADDRSTRLEN];
 
 	ip6_sprintf(ip6tbuf, &lims->im6s_addr);
 #endif
 
 	if (lims->im6sl_st[0] == MCAST_EXCLUDE) {
 		CTR3(KTR_MLD, "%s: t1 ex -= %d on %s", __func__, n, ip6tbuf);
 		ims->im6s_st[1].ex -= n;
 	} else if (lims->im6sl_st[0] == MCAST_INCLUDE) {
 		CTR3(KTR_MLD, "%s: t1 in -= %d on %s", __func__, n, ip6tbuf);
 		ims->im6s_st[1].in -= n;
 	}
 
 	if (lims->im6sl_st[1] == MCAST_EXCLUDE) {
 		CTR3(KTR_MLD, "%s: t1 ex += %d on %s", __func__, n, ip6tbuf);
 		ims->im6s_st[1].ex += n;
 	} else if (lims->im6sl_st[1] == MCAST_INCLUDE) {
 		CTR3(KTR_MLD, "%s: t1 in += %d on %s", __func__, n, ip6tbuf);
 		ims->im6s_st[1].in += n;
 	}
 }
 
 /*
  * Atomically update the global in6_multi state, when a membership's
  * filter list is being updated in any way.
  *
  * imf is the per-inpcb-membership group filter pointer.
  * A fake imf may be passed for in-kernel consumers.
  *
  * XXX This is a candidate for a set-symmetric-difference style loop
  * which would eliminate the repeated lookup from root of ims nodes,
  * as they share the same key space.
  *
  * If any error occurred this function will back out of refcounts
  * and return a non-zero value.
  */
 static int
 in6m_merge(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
 {
 	struct ip6_msource	*ims, *nims;
 	struct in6_msource	*lims;
 	int			 schanged, error;
 	int			 nsrc0, nsrc1;
 
 	schanged = 0;
 	error = 0;
 	nsrc1 = nsrc0 = 0;
 	IN6_MULTI_LIST_LOCK_ASSERT();
 
 	/*
 	 * Update the source filters first, as this may fail.
 	 * Maintain count of in-mode filters at t0, t1. These are
 	 * used to work out if we transition into ASM mode or not.
 	 * Maintain a count of source filters whose state was
 	 * actually modified by this operation.
 	 */
 	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
 		lims = (struct in6_msource *)ims;
 		if (lims->im6sl_st[0] == imf->im6f_st[0]) nsrc0++;
 		if (lims->im6sl_st[1] == imf->im6f_st[1]) nsrc1++;
 		if (lims->im6sl_st[0] == lims->im6sl_st[1]) continue;
 		error = in6m_get_source(inm, &lims->im6s_addr, 0, &nims);
 		++schanged;
 		if (error)
 			break;
 		im6s_merge(nims, lims, 0);
 	}
 	if (error) {
 		struct ip6_msource *bims;
 
 		RB_FOREACH_REVERSE_FROM(ims, ip6_msource_tree, nims) {
 			lims = (struct in6_msource *)ims;
 			if (lims->im6sl_st[0] == lims->im6sl_st[1])
 				continue;
 			(void)in6m_get_source(inm, &lims->im6s_addr, 1, &bims);
 			if (bims == NULL)
 				continue;
 			im6s_merge(bims, lims, 1);
 		}
 		goto out_reap;
 	}
 
 	CTR3(KTR_MLD, "%s: imf filters in-mode: %d at t0, %d at t1",
 	    __func__, nsrc0, nsrc1);
 
 	/* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
 	if (imf->im6f_st[0] == imf->im6f_st[1] &&
 	    imf->im6f_st[1] == MCAST_INCLUDE) {
 		if (nsrc1 == 0) {
 			CTR1(KTR_MLD, "%s: --in on inm at t1", __func__);
 			--inm->in6m_st[1].iss_in;
 		}
 	}
 
 	/* Handle filter mode transition on socket. */
 	if (imf->im6f_st[0] != imf->im6f_st[1]) {
 		CTR3(KTR_MLD, "%s: imf transition %d to %d",
 		    __func__, imf->im6f_st[0], imf->im6f_st[1]);
 
 		if (imf->im6f_st[0] == MCAST_EXCLUDE) {
 			CTR1(KTR_MLD, "%s: --ex on inm at t1", __func__);
 			--inm->in6m_st[1].iss_ex;
 		} else if (imf->im6f_st[0] == MCAST_INCLUDE) {
 			CTR1(KTR_MLD, "%s: --in on inm at t1", __func__);
 			--inm->in6m_st[1].iss_in;
 		}
 
 		if (imf->im6f_st[1] == MCAST_EXCLUDE) {
 			CTR1(KTR_MLD, "%s: ex++ on inm at t1", __func__);
 			inm->in6m_st[1].iss_ex++;
 		} else if (imf->im6f_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
 			CTR1(KTR_MLD, "%s: in++ on inm at t1", __func__);
 			inm->in6m_st[1].iss_in++;
 		}
 	}
 
 	/*
 	 * Track inm filter state in terms of listener counts.
 	 * If there are any exclusive listeners, stack-wide
 	 * membership is exclusive.
 	 * Otherwise, if only inclusive listeners, stack-wide is inclusive.
 	 * If no listeners remain, state is undefined at t1,
 	 * and the MLD lifecycle for this group should finish.
 	 */
 	if (inm->in6m_st[1].iss_ex > 0) {
 		CTR1(KTR_MLD, "%s: transition to EX", __func__);
 		inm->in6m_st[1].iss_fmode = MCAST_EXCLUDE;
 	} else if (inm->in6m_st[1].iss_in > 0) {
 		CTR1(KTR_MLD, "%s: transition to IN", __func__);
 		inm->in6m_st[1].iss_fmode = MCAST_INCLUDE;
 	} else {
 		CTR1(KTR_MLD, "%s: transition to UNDEF", __func__);
 		inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
 	}
 
 	/* Decrement ASM listener count on transition out of ASM mode. */
 	if (imf->im6f_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
 		if ((imf->im6f_st[1] != MCAST_EXCLUDE) ||
 		    (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) {
 			CTR1(KTR_MLD, "%s: --asm on inm at t1", __func__);
 			--inm->in6m_st[1].iss_asm;
 		}
 	}
 
 	/* Increment ASM listener count on transition to ASM mode. */
 	if (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
 		CTR1(KTR_MLD, "%s: asm++ on inm at t1", __func__);
 		inm->in6m_st[1].iss_asm++;
 	}
 
 	CTR3(KTR_MLD, "%s: merged imf %p to inm %p", __func__, imf, inm);
 	in6m_print(inm);
 
 out_reap:
 	if (schanged > 0) {
 		CTR1(KTR_MLD, "%s: sources changed; reaping", __func__);
 		in6m_reap(inm);
 	}
 	return (error);
 }
 
 /*
  * Mark an in6_multi's filter set deltas as committed.
  * Called by MLD after a state change has been enqueued.
  */
 void
 in6m_commit(struct in6_multi *inm)
 {
 	struct ip6_msource	*ims;
 
 	CTR2(KTR_MLD, "%s: commit inm %p", __func__, inm);
 	CTR1(KTR_MLD, "%s: pre commit:", __func__);
 	in6m_print(inm);
 
 	RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) {
 		ims->im6s_st[0] = ims->im6s_st[1];
 	}
 	inm->in6m_st[0] = inm->in6m_st[1];
 }
 
 /*
  * Reap unreferenced nodes from an in6_multi's filter set.
  */
 static void
 in6m_reap(struct in6_multi *inm)
 {
 	struct ip6_msource	*ims, *tims;
 
 	RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, tims) {
 		if (ims->im6s_st[0].ex > 0 || ims->im6s_st[0].in > 0 ||
 		    ims->im6s_st[1].ex > 0 || ims->im6s_st[1].in > 0 ||
 		    ims->im6s_stp != 0)
 			continue;
 		CTR2(KTR_MLD, "%s: free ims %p", __func__, ims);
 		RB_REMOVE(ip6_msource_tree, &inm->in6m_srcs, ims);
 		free(ims, M_IP6MSOURCE);
 		inm->in6m_nsrc--;
 	}
 }
 
 /*
  * Purge all source nodes from an in6_multi's filter set.
  */
 static void
 in6m_purge(struct in6_multi *inm)
 {
 	struct ip6_msource	*ims, *tims;
 
 	RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, tims) {
 		CTR2(KTR_MLD, "%s: free ims %p", __func__, ims);
 		RB_REMOVE(ip6_msource_tree, &inm->in6m_srcs, ims);
 		free(ims, M_IP6MSOURCE);
 		inm->in6m_nsrc--;
 	}
 	/* Free state-change requests that might be queued. */
 	mbufq_drain(&inm->in6m_scq);
 }
 
 /*
  * Join a multicast address w/o sources.
  * KAME compatibility entry point.
  *
  * SMPng: Assume no mc locks held by caller.
  */
 int
 in6_joingroup(struct ifnet *ifp, const struct in6_addr *mcaddr,
     /*const*/ struct in6_mfilter *imf, struct in6_multi **pinm,
     const int delay)
 {
 	int error;
 
 	IN6_MULTI_LOCK();
 	error = in6_joingroup_locked(ifp, mcaddr, NULL, pinm, delay);
 	IN6_MULTI_UNLOCK();
 	return (error);
 }
 
 /*
  * Join a multicast group; real entry point.
  *
  * Only preserves atomicity at inm level.
  * NOTE: imf argument cannot be const due to sys/tree.h limitations.
  *
  * If the MLD downcall fails, the group is not joined, and an error
  * code is returned.
  */
 static int
 in6_joingroup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr,
     /*const*/ struct in6_mfilter *imf, struct in6_multi **pinm,
     const int delay)
 {
 	struct in6_multi_head    inmh;
 	struct in6_mfilter	 timf;
 	struct in6_multi	*inm;
 	struct ifmultiaddr *ifma;
 	int			 error;
 #ifdef KTR
 	char			 ip6tbuf[INET6_ADDRSTRLEN];
 #endif
 
 	/*
 	 * Sanity: Check scope zone ID was set for ifp, if and
 	 * only if group is scoped to an interface.
 	 */
 	KASSERT(IN6_IS_ADDR_MULTICAST(mcaddr),
 	    ("%s: not a multicast address", __func__));
 	if (IN6_IS_ADDR_MC_LINKLOCAL(mcaddr) ||
 	    IN6_IS_ADDR_MC_INTFACELOCAL(mcaddr)) {
 		KASSERT(mcaddr->s6_addr16[1] != 0,
 		    ("%s: scope zone ID not set", __func__));
 	}
 
 	IN6_MULTI_LOCK_ASSERT();
 	IN6_MULTI_LIST_UNLOCK_ASSERT();
 
 	CTR4(KTR_MLD, "%s: join %s on %p(%s))", __func__,
 	    ip6_sprintf(ip6tbuf, mcaddr), ifp, if_name(ifp));
 
 	error = 0;
 	inm = NULL;
 
 	/*
 	 * If no imf was specified (i.e. kernel consumer),
 	 * fake one up and assume it is an ASM join.
 	 */
 	if (imf == NULL) {
 		im6f_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
 		imf = &timf;
 	}
 	error = in6_getmulti(ifp, mcaddr, &inm);
 	if (error) {
 		CTR1(KTR_MLD, "%s: in6_getmulti() failure", __func__);
 		return (error);
 	}
 
 	IN6_MULTI_LIST_LOCK();
 	CTR1(KTR_MLD, "%s: merge inm state", __func__);
 	error = in6m_merge(inm, imf);
 	if (error) {
 		CTR1(KTR_MLD, "%s: failed to merge inm state", __func__);
 		goto out_in6m_release;
 	}
 
 	CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
 	error = mld_change_state(inm, delay);
 	if (error) {
 		CTR1(KTR_MLD, "%s: failed to update source", __func__);
 		goto out_in6m_release;
 	}
 
 out_in6m_release:
 	SLIST_INIT(&inmh);
 	if (error) {
 		struct epoch_tracker et;
 
 		CTR2(KTR_MLD, "%s: dropping ref on %p", __func__, inm);
 		IF_ADDR_WLOCK(ifp);
 		NET_EPOCH_ENTER(et);
 		CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_protospec == inm) {
 				ifma->ifma_protospec = NULL;
 				break;
 			}
 		}
 		in6m_disconnect_locked(&inmh, inm);
 		in6m_rele_locked(&inmh, inm);
 		NET_EPOCH_EXIT(et);
 		IF_ADDR_WUNLOCK(ifp);
 	} else {
 		*pinm = inm;
 	}
 	IN6_MULTI_LIST_UNLOCK();
 	in6m_release_list_deferred(&inmh);
 	return (error);
 }
 
 /*
  * Leave a multicast group; unlocked entry point.
  */
 int
 in6_leavegroup(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
 {
 	int error;
 
 	IN6_MULTI_LOCK();
 	error = in6_leavegroup_locked(inm, imf);
 	IN6_MULTI_UNLOCK();
 	return (error);
 }
 
 /*
  * Leave a multicast group; real entry point.
  * All source filters will be expunged.
  *
  * Only preserves atomicity at inm level.
  *
  * Holding the write lock for the INP which contains imf
  * is highly advisable. We can't assert for it as imf does not
  * contain a back-pointer to the owning inp.
  *
  * Note: This is not the same as in6m_release(*) as this function also
  * makes a state change downcall into MLD.
  */
 int
 in6_leavegroup_locked(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
 {
 	struct in6_multi_head	 inmh;
 	struct in6_mfilter	 timf;
 	struct ifnet *ifp;
 	int			 error;
 #ifdef KTR
 	char			 ip6tbuf[INET6_ADDRSTRLEN];
 #endif
 
 	error = 0;
 
 	IN6_MULTI_LOCK_ASSERT();
 
 	CTR5(KTR_MLD, "%s: leave inm %p, %s/%s, imf %p", __func__,
 	    inm, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
 	    (in6m_is_ifp_detached(inm) ? "null" : if_name(inm->in6m_ifp)),
 	    imf);
 
 	/*
 	 * If no imf was specified (i.e. kernel consumer),
 	 * fake one up and assume it is an ASM join.
 	 */
 	if (imf == NULL) {
 		im6f_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
 		imf = &timf;
 	}
 
 	/*
 	 * Begin state merge transaction at MLD layer.
 	 *
 	 * As this particular invocation should not cause any memory
 	 * to be allocated, and there is no opportunity to roll back
 	 * the transaction, it MUST NOT fail.
 	 */
 
 	ifp = inm->in6m_ifp;
 	IN6_MULTI_LIST_LOCK();
 	CTR1(KTR_MLD, "%s: merge inm state", __func__);
 	error = in6m_merge(inm, imf);
 	KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
 
 	CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
 	error = 0;
 	if (ifp)
 		error = mld_change_state(inm, 0);
 	if (error)
 		CTR1(KTR_MLD, "%s: failed mld downcall", __func__);
 
 	CTR2(KTR_MLD, "%s: dropping ref on %p", __func__, inm);
 	if (ifp)
 		IF_ADDR_WLOCK(ifp);
 
 	SLIST_INIT(&inmh);
 	if (inm->in6m_refcount == 1)
 		in6m_disconnect_locked(&inmh, inm);
 	in6m_rele_locked(&inmh, inm);
 	if (ifp)
 		IF_ADDR_WUNLOCK(ifp);
 	IN6_MULTI_LIST_UNLOCK();
 	in6m_release_list_deferred(&inmh);
 	return (error);
 }
 
 
 /*
  * Block or unblock an ASM multicast source on an inpcb.
  * This implements the delta-based API described in RFC 3678.
  *
  * The delta-based API applies only to exclusive-mode memberships.
  * An MLD downcall will be performed.
  *
  * SMPng: NOTE: Must take Giant as a join may create a new ifma.
  *
  * Return 0 if successful, otherwise return an appropriate error code.
  */
 static int
 in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct group_source_req		 gsr;
 	sockunion_t			*gsa, *ssa;
 	struct ifnet			*ifp;
 	struct in6_mfilter		*imf;
 	struct ip6_moptions		*imo;
 	struct in6_msource		*ims;
 	struct in6_multi			*inm;
 	uint16_t			 fmode;
 	int				 error, doblock;
 #ifdef KTR
 	char				 ip6tbuf[INET6_ADDRSTRLEN];
 #endif
 
 	ifp = NULL;
 	error = 0;
 	doblock = 0;
 
 	memset(&gsr, 0, sizeof(struct group_source_req));
 	gsa = (sockunion_t *)&gsr.gsr_group;
 	ssa = (sockunion_t *)&gsr.gsr_source;
 
 	switch (sopt->sopt_name) {
 	case MCAST_BLOCK_SOURCE:
 	case MCAST_UNBLOCK_SOURCE:
 		error = sooptcopyin(sopt, &gsr,
 		    sizeof(struct group_source_req),
 		    sizeof(struct group_source_req));
 		if (error)
 			return (error);
 
 		if (gsa->sin6.sin6_family != AF_INET6 ||
 		    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
 			return (EINVAL);
 
 		if (ssa->sin6.sin6_family != AF_INET6 ||
 		    ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
 			return (EINVAL);
 
 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
 			return (EADDRNOTAVAIL);
 
 		ifp = ifnet_byindex(gsr.gsr_interface);
 
 		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
 			doblock = 1;
 		break;
 
 	default:
 		CTR2(KTR_MLD, "%s: unknown sopt_name %d",
 		    __func__, sopt->sopt_name);
 		return (EOPNOTSUPP);
 		break;
 	}
 
 	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
 		return (EINVAL);
 
 	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
 
 	/*
 	 * Check if we are actually a member of this group.
 	 */
 	imo = in6p_findmoptions(inp);
 	imf = im6o_match_group(imo, ifp, &gsa->sa);
 	if (imf == NULL) {
 		error = EADDRNOTAVAIL;
 		goto out_in6p_locked;
 	}
 	inm = imf->im6f_in6m;
 
 	/*
 	 * Attempting to use the delta-based API on an
 	 * non exclusive-mode membership is an error.
 	 */
 	fmode = imf->im6f_st[0];
 	if (fmode != MCAST_EXCLUDE) {
 		error = EINVAL;
 		goto out_in6p_locked;
 	}
 
 	/*
 	 * Deal with error cases up-front:
 	 *  Asked to block, but already blocked; or
 	 *  Asked to unblock, but nothing to unblock.
 	 * If adding a new block entry, allocate it.
 	 */
 	ims = im6o_match_source(imf, &ssa->sa);
 	if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
 		CTR3(KTR_MLD, "%s: source %s %spresent", __func__,
 		    ip6_sprintf(ip6tbuf, &ssa->sin6.sin6_addr),
 		    doblock ? "" : "not ");
 		error = EADDRNOTAVAIL;
 		goto out_in6p_locked;
 	}
 
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * Begin state merge transaction at socket layer.
 	 */
 	if (doblock) {
 		CTR2(KTR_MLD, "%s: %s source", __func__, "block");
 		ims = im6f_graft(imf, fmode, &ssa->sin6);
 		if (ims == NULL)
 			error = ENOMEM;
 	} else {
 		CTR2(KTR_MLD, "%s: %s source", __func__, "allow");
 		error = im6f_prune(imf, &ssa->sin6);
 	}
 
 	if (error) {
 		CTR1(KTR_MLD, "%s: merge imf state failed", __func__);
 		goto out_im6f_rollback;
 	}
 
 	/*
 	 * Begin state merge transaction at MLD layer.
 	 */
 	IN6_MULTI_LIST_LOCK();
 	CTR1(KTR_MLD, "%s: merge inm state", __func__);
 	error = in6m_merge(inm, imf);
 	if (error)
 		CTR1(KTR_MLD, "%s: failed to merge inm state", __func__);
 	else {
 		CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
 		error = mld_change_state(inm, 0);
 		if (error)
 			CTR1(KTR_MLD, "%s: failed mld downcall", __func__);
 	}
 
 	IN6_MULTI_LIST_UNLOCK();
 
 out_im6f_rollback:
 	if (error)
 		im6f_rollback(imf);
 	else
 		im6f_commit(imf);
 
 	im6f_reap(imf);
 
 out_in6p_locked:
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 /*
  * Given an inpcb, return its multicast options structure pointer.  Accepts
  * an unlocked inpcb pointer, but will return it locked.  May sleep.
  *
  * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
  * SMPng: NOTE: Returns with the INP write lock held.
  */
 static struct ip6_moptions *
 in6p_findmoptions(struct inpcb *inp)
 {
 	struct ip6_moptions	 *imo;
 
 	INP_WLOCK(inp);
 	if (inp->in6p_moptions != NULL)
 		return (inp->in6p_moptions);
 
 	INP_WUNLOCK(inp);
 
 	imo = malloc(sizeof(*imo), M_IP6MOPTS, M_WAITOK);
 
 	imo->im6o_multicast_ifp = NULL;
 	imo->im6o_multicast_hlim = V_ip6_defmcasthlim;
 	imo->im6o_multicast_loop = in6_mcast_loop;
 	STAILQ_INIT(&imo->im6o_head);
 
 	INP_WLOCK(inp);
 	if (inp->in6p_moptions != NULL) {
 		free(imo, M_IP6MOPTS);
 		return (inp->in6p_moptions);
 	}
 	inp->in6p_moptions = imo;
 	return (imo);
 }
 
 /*
  * Discard the IPv6 multicast options (and source filters).
  *
  * SMPng: NOTE: assumes INP write lock is held.
  *
  * XXX can all be safely deferred to epoch_call
  *
  */
 
 static void
 inp_gcmoptions(struct ip6_moptions *imo)
 {
 	struct in6_mfilter *imf;
 	struct in6_multi *inm;
 	struct ifnet *ifp;
 
 	while ((imf = ip6_mfilter_first(&imo->im6o_head)) != NULL) {
                 ip6_mfilter_remove(&imo->im6o_head, imf);
 
                 im6f_leave(imf);
                 if ((inm = imf->im6f_in6m) != NULL) {
                         if ((ifp = inm->in6m_ifp) != NULL) {
                                 CURVNET_SET(ifp->if_vnet);
                                 (void)in6_leavegroup(inm, imf);
                                 CURVNET_RESTORE();
                         } else {
                                 (void)in6_leavegroup(inm, imf);
                         }
                 }
                 ip6_mfilter_free(imf);
         }
         free(imo, M_IP6MOPTS);
 }
 
 void
 ip6_freemoptions(struct ip6_moptions *imo)
 {
 	if (imo == NULL)
 		return;
 	inp_gcmoptions(imo);
 }
 
 /*
  * Atomically get source filters on a socket for an IPv6 multicast group.
  * Called with INP lock held; returns with lock released.
  */
 static int
 in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct __msfilterreq	 msfr;
 	sockunion_t		*gsa;
 	struct ifnet		*ifp;
 	struct ip6_moptions	*imo;
 	struct in6_mfilter	*imf;
 	struct ip6_msource	*ims;
 	struct in6_msource	*lims;
 	struct sockaddr_in6	*psin;
 	struct sockaddr_storage	*ptss;
 	struct sockaddr_storage	*tss;
 	int			 error;
 	size_t			 nsrcs, ncsrcs;
 
 	INP_WLOCK_ASSERT(inp);
 
 	imo = inp->in6p_moptions;
 	KASSERT(imo != NULL, ("%s: null ip6_moptions", __func__));
 
 	INP_WUNLOCK(inp);
 
 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
 	    sizeof(struct __msfilterreq));
 	if (error)
 		return (error);
 
 	if (msfr.msfr_group.ss_family != AF_INET6 ||
 	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in6))
 		return (EINVAL);
 
 	gsa = (sockunion_t *)&msfr.msfr_group;
 	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
 		return (EINVAL);
 
 	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
 		return (EADDRNOTAVAIL);
 	ifp = ifnet_byindex(msfr.msfr_ifindex);
 	if (ifp == NULL)
 		return (EADDRNOTAVAIL);
 	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
 
 	INP_WLOCK(inp);
 
 	/*
 	 * Lookup group on the socket.
 	 */
 	imf = im6o_match_group(imo, ifp, &gsa->sa);
 	if (imf == NULL) {
 		INP_WUNLOCK(inp);
 		return (EADDRNOTAVAIL);
 	}
 
 	/*
 	 * Ignore memberships which are in limbo.
 	 */
 	if (imf->im6f_st[1] == MCAST_UNDEFINED) {
 		INP_WUNLOCK(inp);
 		return (EAGAIN);
 	}
 	msfr.msfr_fmode = imf->im6f_st[1];
 
 	/*
 	 * If the user specified a buffer, copy out the source filter
 	 * entries to userland gracefully.
 	 * We only copy out the number of entries which userland
 	 * has asked for, but we always tell userland how big the
 	 * buffer really needs to be.
 	 */
 	if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc)
 		msfr.msfr_nsrcs = in6_mcast_maxsocksrc;
 	tss = NULL;
 	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
 		tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
 		    M_TEMP, M_NOWAIT | M_ZERO);
 		if (tss == NULL) {
 			INP_WUNLOCK(inp);
 			return (ENOBUFS);
 		}
 	}
 
 	/*
 	 * Count number of sources in-mode at t0.
 	 * If buffer space exists and remains, copy out source entries.
 	 */
 	nsrcs = msfr.msfr_nsrcs;
 	ncsrcs = 0;
 	ptss = tss;
 	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
 		lims = (struct in6_msource *)ims;
 		if (lims->im6sl_st[0] == MCAST_UNDEFINED ||
 		    lims->im6sl_st[0] != imf->im6f_st[0])
 			continue;
 		++ncsrcs;
 		if (tss != NULL && nsrcs > 0) {
 			psin = (struct sockaddr_in6 *)ptss;
 			psin->sin6_family = AF_INET6;
 			psin->sin6_len = sizeof(struct sockaddr_in6);
 			psin->sin6_addr = lims->im6s_addr;
 			psin->sin6_port = 0;
 			--nsrcs;
 			++ptss;
 		}
 	}
 
 	INP_WUNLOCK(inp);
 
 	if (tss != NULL) {
 		error = copyout(tss, msfr.msfr_srcs,
 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
 		free(tss, M_TEMP);
 		if (error)
 			return (error);
 	}
 
 	msfr.msfr_nsrcs = ncsrcs;
 	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
 
 	return (error);
 }
 
 /*
  * Return the IP multicast options in response to user getsockopt().
  */
 int
 ip6_getmoptions(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct ip6_moptions	*im6o;
 	int			 error;
 	u_int			 optval;
 
 	INP_WLOCK(inp);
 	im6o = inp->in6p_moptions;
 	/*
 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
 	 * or is a divert socket, reject it.
 	 */
 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
 	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
 		INP_WUNLOCK(inp);
 		return (EOPNOTSUPP);
 	}
 
 	error = 0;
 	switch (sopt->sopt_name) {
 	case IPV6_MULTICAST_IF:
 		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL) {
 			optval = 0;
 		} else {
 			optval = im6o->im6o_multicast_ifp->if_index;
 		}
 		INP_WUNLOCK(inp);
 		error = sooptcopyout(sopt, &optval, sizeof(u_int));
 		break;
 
 	case IPV6_MULTICAST_HOPS:
 		if (im6o == NULL)
 			optval = V_ip6_defmcasthlim;
 		else
 			optval = im6o->im6o_multicast_hlim;
 		INP_WUNLOCK(inp);
 		error = sooptcopyout(sopt, &optval, sizeof(u_int));
 		break;
 
 	case IPV6_MULTICAST_LOOP:
 		if (im6o == NULL)
 			optval = in6_mcast_loop; /* XXX VIMAGE */
 		else
 			optval = im6o->im6o_multicast_loop;
 		INP_WUNLOCK(inp);
 		error = sooptcopyout(sopt, &optval, sizeof(u_int));
 		break;
 
 	case IPV6_MSFILTER:
 		if (im6o == NULL) {
 			error = EADDRNOTAVAIL;
 			INP_WUNLOCK(inp);
 		} else {
 			error = in6p_get_source_filters(inp, sopt);
 		}
 		break;
 
 	default:
 		INP_WUNLOCK(inp);
 		error = ENOPROTOOPT;
 		break;
 	}
 
 	INP_UNLOCK_ASSERT(inp);
 
 	return (error);
 }
 
 /*
  * Look up the ifnet to use for a multicast group membership,
  * given the address of an IPv6 group.
  *
  * This routine exists to support legacy IPv6 multicast applications.
  *
  * If inp is non-NULL, use this socket's current FIB number for any
  * required FIB lookup. Look up the group address in the unicast FIB,
  * and use its ifp; usually, this points to the default next-hop.
  * If the FIB lookup fails, return NULL.
  *
  * FUTURE: Support multiple forwarding tables for IPv6.
  *
  * Returns NULL if no ifp could be found.
  */
 static struct ifnet *
 in6p_lookup_mcast_ifp(const struct inpcb *inp,
     const struct sockaddr_in6 *gsin6)
 {
-	struct nhop6_basic	nh6;
+	struct nhop_object	*nh;
 	struct in6_addr		dst;
 	uint32_t		scopeid;
 	uint32_t		fibnum;
 
 	KASSERT(inp->inp_vflag & INP_IPV6,
 	    ("%s: not INP_IPV6 inpcb", __func__));
 	KASSERT(gsin6->sin6_family == AF_INET6,
 	    ("%s: not AF_INET6 group", __func__));
 
 	in6_splitscope(&gsin6->sin6_addr, &dst, &scopeid);
 	fibnum = inp ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB;
-	if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6) != 0)
-		return (NULL);
+	nh = fib6_lookup(fibnum, &dst, scopeid, 0, 0);
 
-	return (nh6.nh_ifp);
+	return (nh ? nh->nh_ifp : NULL);
 }
 
 /*
  * Join an IPv6 multicast group, possibly with a source.
  *
  * FIXME: The KAME use of the unspecified address (::)
  * to join *all* multicast groups is currently unsupported.
  */
 static int
 in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct in6_multi_head		 inmh;
 	struct group_source_req		 gsr;
 	sockunion_t			*gsa, *ssa;
 	struct ifnet			*ifp;
 	struct in6_mfilter		*imf;
 	struct ip6_moptions		*imo;
 	struct in6_multi		*inm;
 	struct in6_msource		*lims;
 	int				 error, is_new;
 
 	SLIST_INIT(&inmh);
 	ifp = NULL;
 	lims = NULL;
 	error = 0;
 
 	memset(&gsr, 0, sizeof(struct group_source_req));
 	gsa = (sockunion_t *)&gsr.gsr_group;
 	gsa->ss.ss_family = AF_UNSPEC;
 	ssa = (sockunion_t *)&gsr.gsr_source;
 	ssa->ss.ss_family = AF_UNSPEC;
 
 	/*
 	 * Chew everything into struct group_source_req.
 	 * Overwrite the port field if present, as the sockaddr
 	 * being copied in may be matched with a binary comparison.
 	 * Ignore passed-in scope ID.
 	 */
 	switch (sopt->sopt_name) {
 	case IPV6_JOIN_GROUP: {
 		struct ipv6_mreq mreq;
 
 		error = sooptcopyin(sopt, &mreq, sizeof(struct ipv6_mreq),
 		    sizeof(struct ipv6_mreq));
 		if (error)
 			return (error);
 
 		gsa->sin6.sin6_family = AF_INET6;
 		gsa->sin6.sin6_len = sizeof(struct sockaddr_in6);
 		gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr;
 
 		if (mreq.ipv6mr_interface == 0) {
 			ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6);
 		} else {
 			if (V_if_index < mreq.ipv6mr_interface)
 				return (EADDRNOTAVAIL);
 			ifp = ifnet_byindex(mreq.ipv6mr_interface);
 		}
 		CTR3(KTR_MLD, "%s: ipv6mr_interface = %d, ifp = %p",
 		    __func__, mreq.ipv6mr_interface, ifp);
 	} break;
 
 	case MCAST_JOIN_GROUP:
 	case MCAST_JOIN_SOURCE_GROUP:
 		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
 			error = sooptcopyin(sopt, &gsr,
 			    sizeof(struct group_req),
 			    sizeof(struct group_req));
 		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
 			error = sooptcopyin(sopt, &gsr,
 			    sizeof(struct group_source_req),
 			    sizeof(struct group_source_req));
 		}
 		if (error)
 			return (error);
 
 		if (gsa->sin6.sin6_family != AF_INET6 ||
 		    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
 			return (EINVAL);
 
 		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
 			if (ssa->sin6.sin6_family != AF_INET6 ||
 			    ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
 				return (EINVAL);
 			if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr))
 				return (EINVAL);
 			/*
 			 * TODO: Validate embedded scope ID in source
 			 * list entry against passed-in ifp, if and only
 			 * if source list filter entry is iface or node local.
 			 */
 			in6_clearscope(&ssa->sin6.sin6_addr);
 			ssa->sin6.sin6_port = 0;
 			ssa->sin6.sin6_scope_id = 0;
 		}
 
 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
 			return (EADDRNOTAVAIL);
 		ifp = ifnet_byindex(gsr.gsr_interface);
 		break;
 
 	default:
 		CTR2(KTR_MLD, "%s: unknown sopt_name %d",
 		    __func__, sopt->sopt_name);
 		return (EOPNOTSUPP);
 		break;
 	}
 
 	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
 		return (EINVAL);
 
 	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
 		return (EADDRNOTAVAIL);
 
 	gsa->sin6.sin6_port = 0;
 	gsa->sin6.sin6_scope_id = 0;
 
 	/*
 	 * Always set the scope zone ID on memberships created from userland.
 	 * Use the passed-in ifp to do this.
 	 * XXX The in6_setscope() return value is meaningless.
 	 * XXX SCOPE6_LOCK() is taken by in6_setscope().
 	 */
 	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
 
 	IN6_MULTI_LOCK();
 
 	/*
 	 * Find the membership in the membership list.
 	 */
 	imo = in6p_findmoptions(inp);
 	imf = im6o_match_group(imo, ifp, &gsa->sa);
 	if (imf == NULL) {
 		is_new = 1;
 		inm = NULL;
 
 		if (ip6_mfilter_count(&imo->im6o_head) >= IPV6_MAX_MEMBERSHIPS) {
 			error = ENOMEM;
 			goto out_in6p_locked;
 		}
 	} else {
 		is_new = 0;
 		inm = imf->im6f_in6m;
 
 		if (ssa->ss.ss_family != AF_UNSPEC) {
 			/*
 			 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership
 			 * is an error. On an existing inclusive membership,
 			 * it just adds the source to the filter list.
 			 */
 			if (imf->im6f_st[1] != MCAST_INCLUDE) {
 				error = EINVAL;
 				goto out_in6p_locked;
 			}
 			/*
 			 * Throw out duplicates.
 			 *
 			 * XXX FIXME: This makes a naive assumption that
 			 * even if entries exist for *ssa in this imf,
 			 * they will be rejected as dupes, even if they
 			 * are not valid in the current mode (in-mode).
 			 *
 			 * in6_msource is transactioned just as for anything
 			 * else in SSM -- but note naive use of in6m_graft()
 			 * below for allocating new filter entries.
 			 *
 			 * This is only an issue if someone mixes the
 			 * full-state SSM API with the delta-based API,
 			 * which is discouraged in the relevant RFCs.
 			 */
 			lims = im6o_match_source(imf, &ssa->sa);
 			if (lims != NULL /*&&
 			    lims->im6sl_st[1] == MCAST_INCLUDE*/) {
 				error = EADDRNOTAVAIL;
 				goto out_in6p_locked;
 			}
 		} else {
 			/*
 			 * MCAST_JOIN_GROUP alone, on any existing membership,
 			 * is rejected, to stop the same inpcb tying up
 			 * multiple refs to the in_multi.
 			 * On an existing inclusive membership, this is also
 			 * an error; if you want to change filter mode,
 			 * you must use the userland API setsourcefilter().
 			 * XXX We don't reject this for imf in UNDEFINED
 			 * state at t1, because allocation of a filter
 			 * is atomic with allocation of a membership.
 			 */
 			error = EINVAL;
 			goto out_in6p_locked;
 		}
 	}
 
 	/*
 	 * Begin state merge transaction at socket layer.
 	 */
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * Graft new source into filter list for this inpcb's
 	 * membership of the group. The in6_multi may not have
 	 * been allocated yet if this is a new membership, however,
 	 * the in_mfilter slot will be allocated and must be initialized.
 	 *
 	 * Note: Grafting of exclusive mode filters doesn't happen
 	 * in this path.
 	 * XXX: Should check for non-NULL lims (node exists but may
 	 * not be in-mode) for interop with full-state API.
 	 */
 	if (ssa->ss.ss_family != AF_UNSPEC) {
 		/* Membership starts in IN mode */
 		if (is_new) {
 			CTR1(KTR_MLD, "%s: new join w/source", __func__);
 			imf = ip6_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_INCLUDE);
 			if (imf == NULL) {
 				error = ENOMEM;
 				goto out_in6p_locked;
 			}
 		} else {
 			CTR2(KTR_MLD, "%s: %s source", __func__, "allow");
 		}
 		lims = im6f_graft(imf, MCAST_INCLUDE, &ssa->sin6);
 		if (lims == NULL) {
 			CTR1(KTR_MLD, "%s: merge imf state failed",
 			    __func__);
 			error = ENOMEM;
 			goto out_in6p_locked;
 		}
 	} else {
 		/* No address specified; Membership starts in EX mode */
 		if (is_new) {
 			CTR1(KTR_MLD, "%s: new join w/o source", __func__);
 			imf = ip6_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_EXCLUDE);
 			if (imf == NULL) {
 				error = ENOMEM;
 				goto out_in6p_locked;
 			}
 		}
 	}
 
 	/*
 	 * Begin state merge transaction at MLD layer.
 	 */
 	if (is_new) {
 		in_pcbref(inp);
 		INP_WUNLOCK(inp);
 
 		error = in6_joingroup_locked(ifp, &gsa->sin6.sin6_addr, imf,
 		    &imf->im6f_in6m, 0);
 
 		INP_WLOCK(inp);
 		if (in_pcbrele_wlocked(inp)) {
 			error = ENXIO;
 			goto out_in6p_unlocked;
 		}
 		if (error) {
 			goto out_in6p_locked;
 		}
 		/*
 		 * NOTE: Refcount from in6_joingroup_locked()
 		 * is protecting membership.
 		 */
 		ip6_mfilter_insert(&imo->im6o_head, imf);
 	} else {
 		CTR1(KTR_MLD, "%s: merge inm state", __func__);
 		IN6_MULTI_LIST_LOCK();
 		error = in6m_merge(inm, imf);
 		if (error) {
 			CTR1(KTR_MLD, "%s: failed to merge inm state",
 			    __func__);
 			IN6_MULTI_LIST_UNLOCK();
 			im6f_rollback(imf);
 			im6f_reap(imf);
 			goto out_in6p_locked;
 		}
 		CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
 		error = mld_change_state(inm, 0);
 		IN6_MULTI_LIST_UNLOCK();
 
 		if (error) {
 			CTR1(KTR_MLD, "%s: failed mld downcall",
 			     __func__);
 			im6f_rollback(imf);
 			im6f_reap(imf);
 			goto out_in6p_locked;
 		}
 	}
 
 	im6f_commit(imf);
 	imf = NULL;
 
 out_in6p_locked:
 	INP_WUNLOCK(inp);
 out_in6p_unlocked:
 	IN6_MULTI_UNLOCK();
 
 	if (is_new && imf) {
 		if (imf->im6f_in6m != NULL) {
 			struct in6_multi_head inmh;
 
 			SLIST_INIT(&inmh);
 			SLIST_INSERT_HEAD(&inmh, imf->im6f_in6m, in6m_defer);
 			in6m_release_list_deferred(&inmh);
 		}
 		ip6_mfilter_free(imf);
 	}
 	return (error);
 }
 
 /*
  * Leave an IPv6 multicast group on an inpcb, possibly with a source.
  */
 static int
 in6p_leave_group(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct ipv6_mreq		 mreq;
 	struct group_source_req		 gsr;
 	sockunion_t			*gsa, *ssa;
 	struct ifnet			*ifp;
 	struct in6_mfilter		*imf;
 	struct ip6_moptions		*imo;
 	struct in6_msource		*ims;
 	struct in6_multi		*inm;
 	uint32_t			 ifindex;
 	int				 error;
 	bool				 is_final;
 #ifdef KTR
 	char				 ip6tbuf[INET6_ADDRSTRLEN];
 #endif
 
 	ifp = NULL;
 	ifindex = 0;
 	error = 0;
 	is_final = true;
 
 	memset(&gsr, 0, sizeof(struct group_source_req));
 	gsa = (sockunion_t *)&gsr.gsr_group;
 	gsa->ss.ss_family = AF_UNSPEC;
 	ssa = (sockunion_t *)&gsr.gsr_source;
 	ssa->ss.ss_family = AF_UNSPEC;
 
 	/*
 	 * Chew everything passed in up into a struct group_source_req
 	 * as that is easier to process.
 	 * Note: Any embedded scope ID in the multicast group passed
 	 * in by userland is ignored, the interface index is the recommended
 	 * mechanism to specify an interface; see below.
 	 */
 	switch (sopt->sopt_name) {
 	case IPV6_LEAVE_GROUP:
 		error = sooptcopyin(sopt, &mreq, sizeof(struct ipv6_mreq),
 		    sizeof(struct ipv6_mreq));
 		if (error)
 			return (error);
 		gsa->sin6.sin6_family = AF_INET6;
 		gsa->sin6.sin6_len = sizeof(struct sockaddr_in6);
 		gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr;
 		gsa->sin6.sin6_port = 0;
 		gsa->sin6.sin6_scope_id = 0;
 		ifindex = mreq.ipv6mr_interface;
 		break;
 
 	case MCAST_LEAVE_GROUP:
 	case MCAST_LEAVE_SOURCE_GROUP:
 		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
 			error = sooptcopyin(sopt, &gsr,
 			    sizeof(struct group_req),
 			    sizeof(struct group_req));
 		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
 			error = sooptcopyin(sopt, &gsr,
 			    sizeof(struct group_source_req),
 			    sizeof(struct group_source_req));
 		}
 		if (error)
 			return (error);
 
 		if (gsa->sin6.sin6_family != AF_INET6 ||
 		    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
 			return (EINVAL);
 		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
 			if (ssa->sin6.sin6_family != AF_INET6 ||
 			    ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
 				return (EINVAL);
 			if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr))
 				return (EINVAL);
 			/*
 			 * TODO: Validate embedded scope ID in source
 			 * list entry against passed-in ifp, if and only
 			 * if source list filter entry is iface or node local.
 			 */
 			in6_clearscope(&ssa->sin6.sin6_addr);
 		}
 		gsa->sin6.sin6_port = 0;
 		gsa->sin6.sin6_scope_id = 0;
 		ifindex = gsr.gsr_interface;
 		break;
 
 	default:
 		CTR2(KTR_MLD, "%s: unknown sopt_name %d",
 		    __func__, sopt->sopt_name);
 		return (EOPNOTSUPP);
 		break;
 	}
 
 	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
 		return (EINVAL);
 
 	/*
 	 * Validate interface index if provided. If no interface index
 	 * was provided separately, attempt to look the membership up
 	 * from the default scope as a last resort to disambiguate
 	 * the membership we are being asked to leave.
 	 * XXX SCOPE6 lock potentially taken here.
 	 */
 	if (ifindex != 0) {
 		if (V_if_index < ifindex)
 			return (EADDRNOTAVAIL);
 		ifp = ifnet_byindex(ifindex);
 		if (ifp == NULL)
 			return (EADDRNOTAVAIL);
 		(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
 	} else {
 		error = sa6_embedscope(&gsa->sin6, V_ip6_use_defzone);
 		if (error)
 			return (EADDRNOTAVAIL);
 		/*
 		 * Some badly behaved applications don't pass an ifindex
 		 * or a scope ID, which is an API violation. In this case,
 		 * perform a lookup as per a v6 join.
 		 *
 		 * XXX For now, stomp on zone ID for the corner case.
 		 * This is not the 'KAME way', but we need to see the ifp
 		 * directly until such time as this implementation is
 		 * refactored, assuming the scope IDs are the way to go.
 		 */
 		ifindex = ntohs(gsa->sin6.sin6_addr.s6_addr16[1]);
 		if (ifindex == 0) {
 			CTR2(KTR_MLD, "%s: warning: no ifindex, looking up "
 			    "ifp for group %s.", __func__,
 			    ip6_sprintf(ip6tbuf, &gsa->sin6.sin6_addr));
 			ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6);
 		} else {
 			ifp = ifnet_byindex(ifindex);
 		}
 		if (ifp == NULL)
 			return (EADDRNOTAVAIL);
 	}
 
 	CTR2(KTR_MLD, "%s: ifp = %p", __func__, ifp);
 	KASSERT(ifp != NULL, ("%s: ifp did not resolve", __func__));
 
 	IN6_MULTI_LOCK();
 
 	/*
 	 * Find the membership in the membership list.
 	 */
 	imo = in6p_findmoptions(inp);
 	imf = im6o_match_group(imo, ifp, &gsa->sa);
 	if (imf == NULL) {
 		error = EADDRNOTAVAIL;
 		goto out_in6p_locked;
 	}
 	inm = imf->im6f_in6m;
 
 	if (ssa->ss.ss_family != AF_UNSPEC)
 		is_final = false;
 
 	/*
 	 * Begin state merge transaction at socket layer.
 	 */
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * If we were instructed only to leave a given source, do so.
 	 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
 	 */
 	if (is_final) {
 		ip6_mfilter_remove(&imo->im6o_head, imf);
 		im6f_leave(imf);
 
 		/*
 		 * Give up the multicast address record to which
 		 * the membership points.
 		 */
 		(void)in6_leavegroup_locked(inm, imf);
 	} else {
 		if (imf->im6f_st[0] == MCAST_EXCLUDE) {
 			error = EADDRNOTAVAIL;
 			goto out_in6p_locked;
 		}
 		ims = im6o_match_source(imf, &ssa->sa);
 		if (ims == NULL) {
 			CTR3(KTR_MLD, "%s: source %p %spresent", __func__,
 			    ip6_sprintf(ip6tbuf, &ssa->sin6.sin6_addr),
 			    "not ");
 			error = EADDRNOTAVAIL;
 			goto out_in6p_locked;
 		}
 		CTR2(KTR_MLD, "%s: %s source", __func__, "block");
 		error = im6f_prune(imf, &ssa->sin6);
 		if (error) {
 			CTR1(KTR_MLD, "%s: merge imf state failed",
 			    __func__);
 			goto out_in6p_locked;
 		}
 	}
 
 	/*
 	 * Begin state merge transaction at MLD layer.
 	 */
 	if (!is_final) {
 		CTR1(KTR_MLD, "%s: merge inm state", __func__);
 		IN6_MULTI_LIST_LOCK();
 		error = in6m_merge(inm, imf);
 		if (error) {
 			CTR1(KTR_MLD, "%s: failed to merge inm state",
 			    __func__);
 			IN6_MULTI_LIST_UNLOCK();
 			im6f_rollback(imf);
 			im6f_reap(imf);
                         goto out_in6p_locked;
 		}
 
 		CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
 		error = mld_change_state(inm, 0);
 		IN6_MULTI_LIST_UNLOCK();
 		if (error) {
 			CTR1(KTR_MLD, "%s: failed mld downcall",
 			     __func__);
 			im6f_rollback(imf);
 			im6f_reap(imf);
                         goto out_in6p_locked;
 		}
 	}
 
 	im6f_commit(imf);
 	im6f_reap(imf);
 
 out_in6p_locked:
 	INP_WUNLOCK(inp);
 
 	if (is_final && imf)
 		ip6_mfilter_free(imf);
 
 	IN6_MULTI_UNLOCK();
 	return (error);
 }
 
 /*
  * Select the interface for transmitting IPv6 multicast datagrams.
  *
  * Either an instance of struct in6_addr or an instance of struct ipv6_mreqn
  * may be passed to this socket option. An address of in6addr_any or an
  * interface index of 0 is used to remove a previous selection.
  * When no interface is selected, one is chosen for every send.
  */
 static int
 in6p_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct ifnet		*ifp;
 	struct ip6_moptions	*imo;
 	u_int			 ifindex;
 	int			 error;
 
 	if (sopt->sopt_valsize != sizeof(u_int))
 		return (EINVAL);
 
 	error = sooptcopyin(sopt, &ifindex, sizeof(u_int), sizeof(u_int));
 	if (error)
 		return (error);
 	if (V_if_index < ifindex)
 		return (EINVAL);
 	if (ifindex == 0)
 		ifp = NULL;
 	else {
 		ifp = ifnet_byindex(ifindex);
 		if (ifp == NULL)
 			return (EINVAL);
 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
 			return (EADDRNOTAVAIL);
 	}
 	imo = in6p_findmoptions(inp);
 	imo->im6o_multicast_ifp = ifp;
 	INP_WUNLOCK(inp);
 
 	return (0);
 }
 
 /*
  * Atomically set source filters on a socket for an IPv6 multicast group.
  *
  * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
  */
 static int
 in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct __msfilterreq	 msfr;
 	sockunion_t		*gsa;
 	struct ifnet		*ifp;
 	struct in6_mfilter	*imf;
 	struct ip6_moptions	*imo;
 	struct in6_multi		*inm;
 	int			 error;
 
 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
 	    sizeof(struct __msfilterreq));
 	if (error)
 		return (error);
 
 	if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc)
 		return (ENOBUFS);
 
 	if (msfr.msfr_fmode != MCAST_EXCLUDE &&
 	    msfr.msfr_fmode != MCAST_INCLUDE)
 		return (EINVAL);
 
 	if (msfr.msfr_group.ss_family != AF_INET6 ||
 	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in6))
 		return (EINVAL);
 
 	gsa = (sockunion_t *)&msfr.msfr_group;
 	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
 		return (EINVAL);
 
 	gsa->sin6.sin6_port = 0;	/* ignore port */
 
 	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
 		return (EADDRNOTAVAIL);
 	ifp = ifnet_byindex(msfr.msfr_ifindex);
 	if (ifp == NULL)
 		return (EADDRNOTAVAIL);
 	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
 
 	/*
 	 * Take the INP write lock.
 	 * Check if this socket is a member of this group.
 	 */
 	imo = in6p_findmoptions(inp);
 	imf = im6o_match_group(imo, ifp, &gsa->sa);
 	if (imf == NULL) {
 		error = EADDRNOTAVAIL;
 		goto out_in6p_locked;
 	}
 	inm = imf->im6f_in6m;
 
 	/*
 	 * Begin state merge transaction at socket layer.
 	 */
 	INP_WLOCK_ASSERT(inp);
 
 	imf->im6f_st[1] = msfr.msfr_fmode;
 
 	/*
 	 * Apply any new source filters, if present.
 	 * Make a copy of the user-space source vector so
 	 * that we may copy them with a single copyin. This
 	 * allows us to deal with page faults up-front.
 	 */
 	if (msfr.msfr_nsrcs > 0) {
 		struct in6_msource	*lims;
 		struct sockaddr_in6	*psin;
 		struct sockaddr_storage	*kss, *pkss;
 		int			 i;
 
 		INP_WUNLOCK(inp);
  
 		CTR2(KTR_MLD, "%s: loading %lu source list entries",
 		    __func__, (unsigned long)msfr.msfr_nsrcs);
 		kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
 		    M_TEMP, M_WAITOK);
 		error = copyin(msfr.msfr_srcs, kss,
 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
 		if (error) {
 			free(kss, M_TEMP);
 			return (error);
 		}
 
 		INP_WLOCK(inp);
 
 		/*
 		 * Mark all source filters as UNDEFINED at t1.
 		 * Restore new group filter mode, as im6f_leave()
 		 * will set it to INCLUDE.
 		 */
 		im6f_leave(imf);
 		imf->im6f_st[1] = msfr.msfr_fmode;
 
 		/*
 		 * Update socket layer filters at t1, lazy-allocating
 		 * new entries. This saves a bunch of memory at the
 		 * cost of one RB_FIND() per source entry; duplicate
 		 * entries in the msfr_nsrcs vector are ignored.
 		 * If we encounter an error, rollback transaction.
 		 *
 		 * XXX This too could be replaced with a set-symmetric
 		 * difference like loop to avoid walking from root
 		 * every time, as the key space is common.
 		 */
 		for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) {
 			psin = (struct sockaddr_in6 *)pkss;
 			if (psin->sin6_family != AF_INET6) {
 				error = EAFNOSUPPORT;
 				break;
 			}
 			if (psin->sin6_len != sizeof(struct sockaddr_in6)) {
 				error = EINVAL;
 				break;
 			}
 			if (IN6_IS_ADDR_MULTICAST(&psin->sin6_addr)) {
 				error = EINVAL;
 				break;
 			}
 			/*
 			 * TODO: Validate embedded scope ID in source
 			 * list entry against passed-in ifp, if and only
 			 * if source list filter entry is iface or node local.
 			 */
 			in6_clearscope(&psin->sin6_addr);
 			error = im6f_get_source(imf, psin, &lims);
 			if (error)
 				break;
 			lims->im6sl_st[1] = imf->im6f_st[1];
 		}
 		free(kss, M_TEMP);
 	}
 
 	if (error)
 		goto out_im6f_rollback;
 
 	INP_WLOCK_ASSERT(inp);
 	IN6_MULTI_LIST_LOCK();
 
 	/*
 	 * Begin state merge transaction at MLD layer.
 	 */
 	CTR1(KTR_MLD, "%s: merge inm state", __func__);
 	error = in6m_merge(inm, imf);
 	if (error)
 		CTR1(KTR_MLD, "%s: failed to merge inm state", __func__);
 	else {
 		CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
 		error = mld_change_state(inm, 0);
 		if (error)
 			CTR1(KTR_MLD, "%s: failed mld downcall", __func__);
 	}
 
 	IN6_MULTI_LIST_UNLOCK();
 
 out_im6f_rollback:
 	if (error)
 		im6f_rollback(imf);
 	else
 		im6f_commit(imf);
 
 	im6f_reap(imf);
 
 out_in6p_locked:
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 /*
  * Set the IP multicast options in response to user setsockopt().
  *
  * Many of the socket options handled in this function duplicate the
  * functionality of socket options in the regular unicast API. However,
  * it is not possible to merge the duplicate code, because the idempotence
  * of the IPv6 multicast part of the BSD Sockets API must be preserved;
  * the effects of these options must be treated as separate and distinct.
  *
  * SMPng: XXX: Unlocked read of inp_socket believed OK.
  */
 int
 ip6_setmoptions(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct ip6_moptions	*im6o;
 	int			 error;
 
 	error = 0;
 
 	/*
 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
 	 * or is a divert socket, reject it.
 	 */
 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
 	     inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
 		return (EOPNOTSUPP);
 
 	switch (sopt->sopt_name) {
 	case IPV6_MULTICAST_IF:
 		error = in6p_set_multicast_if(inp, sopt);
 		break;
 
 	case IPV6_MULTICAST_HOPS: {
 		int hlim;
 
 		if (sopt->sopt_valsize != sizeof(int)) {
 			error = EINVAL;
 			break;
 		}
 		error = sooptcopyin(sopt, &hlim, sizeof(hlim), sizeof(int));
 		if (error)
 			break;
 		if (hlim < -1 || hlim > 255) {
 			error = EINVAL;
 			break;
 		} else if (hlim == -1) {
 			hlim = V_ip6_defmcasthlim;
 		}
 		im6o = in6p_findmoptions(inp);
 		im6o->im6o_multicast_hlim = hlim;
 		INP_WUNLOCK(inp);
 		break;
 	}
 
 	case IPV6_MULTICAST_LOOP: {
 		u_int loop;
 
 		/*
 		 * Set the loopback flag for outgoing multicast packets.
 		 * Must be zero or one.
 		 */
 		if (sopt->sopt_valsize != sizeof(u_int)) {
 			error = EINVAL;
 			break;
 		}
 		error = sooptcopyin(sopt, &loop, sizeof(u_int), sizeof(u_int));
 		if (error)
 			break;
 		if (loop > 1) {
 			error = EINVAL;
 			break;
 		}
 		im6o = in6p_findmoptions(inp);
 		im6o->im6o_multicast_loop = loop;
 		INP_WUNLOCK(inp);
 		break;
 	}
 
 	case IPV6_JOIN_GROUP:
 	case MCAST_JOIN_GROUP:
 	case MCAST_JOIN_SOURCE_GROUP:
 		error = in6p_join_group(inp, sopt);
 		break;
 
 	case IPV6_LEAVE_GROUP:
 	case MCAST_LEAVE_GROUP:
 	case MCAST_LEAVE_SOURCE_GROUP:
 		error = in6p_leave_group(inp, sopt);
 		break;
 
 	case MCAST_BLOCK_SOURCE:
 	case MCAST_UNBLOCK_SOURCE:
 		error = in6p_block_unblock_source(inp, sopt);
 		break;
 
 	case IPV6_MSFILTER:
 		error = in6p_set_source_filters(inp, sopt);
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	INP_UNLOCK_ASSERT(inp);
 
 	return (error);
 }
 
 /*
  * Expose MLD's multicast filter mode and source list(s) to userland,
  * keyed by (ifindex, group).
  * The filter mode is written out as a uint32_t, followed by
  * 0..n of struct in6_addr.
  * For use by ifmcstat(8).
  * SMPng: NOTE: unlocked read of ifindex space.
  */
 static int
 sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS)
 {
 	struct in6_addr			 mcaddr;
 	struct in6_addr			 src;
 	struct epoch_tracker		 et;
 	struct ifnet			*ifp;
 	struct ifmultiaddr		*ifma;
 	struct in6_multi		*inm;
 	struct ip6_msource		*ims;
 	int				*name;
 	int				 retval;
 	u_int				 namelen;
 	uint32_t			 fmode, ifindex;
 #ifdef KTR
 	char				 ip6tbuf[INET6_ADDRSTRLEN];
 #endif
 
 	name = (int *)arg1;
 	namelen = arg2;
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	/* int: ifindex + 4 * 32 bits of IPv6 address */
 	if (namelen != 5)
 		return (EINVAL);
 
 	ifindex = name[0];
 	if (ifindex <= 0 || ifindex > V_if_index) {
 		CTR2(KTR_MLD, "%s: ifindex %u out of range",
 		    __func__, ifindex);
 		return (ENOENT);
 	}
 
 	memcpy(&mcaddr, &name[1], sizeof(struct in6_addr));
 	if (!IN6_IS_ADDR_MULTICAST(&mcaddr)) {
 		CTR2(KTR_MLD, "%s: group %s is not multicast",
 		    __func__, ip6_sprintf(ip6tbuf, &mcaddr));
 		return (EINVAL);
 	}
 
 	NET_EPOCH_ENTER(et);
 	ifp = ifnet_byindex(ifindex);
 	if (ifp == NULL) {
 		NET_EPOCH_EXIT(et);
 		CTR2(KTR_MLD, "%s: no ifp for ifindex %u",
 		    __func__, ifindex);
 		return (ENOENT);
 	}
 	/*
 	 * Internal MLD lookups require that scope/zone ID is set.
 	 */
 	(void)in6_setscope(&mcaddr, ifp, NULL);
 
 	retval = sysctl_wire_old_buffer(req,
 	    sizeof(uint32_t) + (in6_mcast_maxgrpsrc * sizeof(struct in6_addr)));
 	if (retval) {
 		NET_EPOCH_EXIT(et);
 		return (retval);
 	}
 
 	IN6_MULTI_LOCK();
 	IN6_MULTI_LIST_LOCK();
 	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		inm = in6m_ifmultiaddr_get_inm(ifma);
 		if (inm == NULL)
 			continue;
 		if (!IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, &mcaddr))
 			continue;
 		fmode = inm->in6m_st[1].iss_fmode;
 		retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
 		if (retval != 0)
 			break;
 		RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) {
 			CTR2(KTR_MLD, "%s: visit node %p", __func__, ims);
 			/*
 			 * Only copy-out sources which are in-mode.
 			 */
 			if (fmode != im6s_get_mode(inm, ims, 1)) {
 				CTR1(KTR_MLD, "%s: skip non-in-mode",
 				    __func__);
 				continue;
 			}
 			src = ims->im6s_addr;
 			retval = SYSCTL_OUT(req, &src,
 			    sizeof(struct in6_addr));
 			if (retval != 0)
 				break;
 		}
 	}
 	IN6_MULTI_LIST_UNLOCK();
 	IN6_MULTI_UNLOCK();
 	NET_EPOCH_EXIT(et);
 
 	return (retval);
 }
 
 #ifdef KTR
 
 static const char *in6m_modestrs[] = { "un", "in", "ex" };
 
 static const char *
 in6m_mode_str(const int mode)
 {
 
 	if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
 		return (in6m_modestrs[mode]);
 	return ("??");
 }
 
 static const char *in6m_statestrs[] = {
 	"not-member",
 	"silent",
 	"idle",
 	"lazy",
 	"sleeping",
 	"awakening",
 	"query-pending",
 	"sg-query-pending",
 	"leaving"
 };
 
 static const char *
 in6m_state_str(const int state)
 {
 
 	if (state >= MLD_NOT_MEMBER && state <= MLD_LEAVING_MEMBER)
 		return (in6m_statestrs[state]);
 	return ("??");
 }
 
 /*
  * Dump an in6_multi structure to the console.
  */
 void
 in6m_print(const struct in6_multi *inm)
 {
 	int t;
 	char ip6tbuf[INET6_ADDRSTRLEN];
 
 	if ((ktr_mask & KTR_MLD) == 0)
 		return;
 
 	printf("%s: --- begin in6m %p ---\n", __func__, inm);
 	printf("addr %s ifp %p(%s) ifma %p\n",
 	    ip6_sprintf(ip6tbuf, &inm->in6m_addr),
 	    inm->in6m_ifp,
 	    if_name(inm->in6m_ifp),
 	    inm->in6m_ifma);
 	printf("timer %u state %s refcount %u scq.len %u\n",
 	    inm->in6m_timer,
 	    in6m_state_str(inm->in6m_state),
 	    inm->in6m_refcount,
 	    mbufq_len(&inm->in6m_scq));
 	printf("mli %p nsrc %lu sctimer %u scrv %u\n",
 	    inm->in6m_mli,
 	    inm->in6m_nsrc,
 	    inm->in6m_sctimer,
 	    inm->in6m_scrv);
 	for (t = 0; t < 2; t++) {
 		printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
 		    in6m_mode_str(inm->in6m_st[t].iss_fmode),
 		    inm->in6m_st[t].iss_asm,
 		    inm->in6m_st[t].iss_ex,
 		    inm->in6m_st[t].iss_in,
 		    inm->in6m_st[t].iss_rec);
 	}
 	printf("%s: --- end in6m %p ---\n", __func__, inm);
 }
 
 #else /* !KTR */
 
 void
 in6m_print(const struct in6_multi *inm)
 {
 
 }
 
 #endif /* KTR */
Index: head/sys/netinet6/in6_src.c
===================================================================
--- head/sys/netinet6/in6_src.c	(revision 362899)
+++ head/sys/netinet6/in6_src.c	(revision 362900)
@@ -1,1225 +1,1226 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_mpath.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/rmlock.h>
 #include <sys/sx.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <net/if_llatbl.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 
 static struct mtx addrsel_lock;
 #define	ADDRSEL_LOCK_INIT()	mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF)
 #define	ADDRSEL_LOCK()		mtx_lock(&addrsel_lock)
 #define	ADDRSEL_UNLOCK()	mtx_unlock(&addrsel_lock)
 #define	ADDRSEL_LOCK_ASSERT()	mtx_assert(&addrsel_lock, MA_OWNED)
 
 static struct sx addrsel_sxlock;
 #define	ADDRSEL_SXLOCK_INIT()	sx_init(&addrsel_sxlock, "addrsel_sxlock")
 #define	ADDRSEL_SLOCK()		sx_slock(&addrsel_sxlock)
 #define	ADDRSEL_SUNLOCK()	sx_sunlock(&addrsel_sxlock)
 #define	ADDRSEL_XLOCK()		sx_xlock(&addrsel_sxlock)
 #define	ADDRSEL_XUNLOCK()	sx_xunlock(&addrsel_sxlock)
 
 #define ADDR_LABEL_NOTAPP (-1)
 VNET_DEFINE_STATIC(struct in6_addrpolicy, defaultaddrpolicy);
 #define	V_defaultaddrpolicy		VNET(defaultaddrpolicy)
 
 VNET_DEFINE(int, ip6_prefer_tempaddr) = 0;
 
 static int selectroute(struct sockaddr_in6 *, struct ip6_pktopts *,
 	struct ip6_moptions *, struct route_in6 *, struct ifnet **,
 	struct nhop_object **, int, u_int, uint32_t);
 static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *,
 	struct ip6_moptions *, struct ifnet **,
 	struct ifnet *, u_int);
 static int in6_selectsrc(uint32_t, struct sockaddr_in6 *,
 	struct ip6_pktopts *, struct inpcb *, struct ucred *,
 	struct ifnet **, struct in6_addr *);
 
 static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *);
 
 static void init_policy_queue(void);
 static int add_addrsel_policyent(struct in6_addrpolicy *);
 static int delete_addrsel_policyent(struct in6_addrpolicy *);
 static int walk_addrsel_policy(int (*)(struct in6_addrpolicy *, void *),
 	void *);
 static int dump_addrsel_policyent(struct in6_addrpolicy *, void *);
 static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
 
 /*
  * Return an IPv6 address, which is the most appropriate for a given
  * destination and user specified options.
  * If necessary, this function lookups the routing table and returns
  * an entry to the caller for later use.
  */
 #define REPLACE(r) do {\
 	IP6STAT_INC(ip6s_sources_rule[(r)]); \
 	/* { \
 	char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \
 	printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \
 	} */ \
 	goto replace; \
 } while(0)
 #define NEXT(r) do {\
 	/* { \
 	char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \
 	printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \
 	} */ \
 	goto next;		/* XXX: we can't use 'continue' here */ \
 } while(0)
 #define BREAK(r) do { \
 	IP6STAT_INC(ip6s_sources_rule[(r)]); \
 	goto out;		/* XXX: we can't use 'break' here */ \
 } while(0)
 
 static int
 in6_selectsrc(uint32_t fibnum, struct sockaddr_in6 *dstsock,
     struct ip6_pktopts *opts, struct inpcb *inp, struct ucred *cred,
     struct ifnet **ifpp, struct in6_addr *srcp)
 {
 	struct rm_priotracker in6_ifa_tracker;
 	struct in6_addr dst, tmp;
 	struct ifnet *ifp = NULL, *oifp = NULL;
 	struct in6_ifaddr *ia = NULL, *ia_best = NULL;
 	struct in6_pktinfo *pi = NULL;
 	int dst_scope = -1, best_scope = -1, best_matchlen = -1;
 	struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL;
 	u_int32_t odstzone;
 	int prefer_tempaddr;
 	int error;
 	struct ip6_moptions *mopts;
 
 	KASSERT(srcp != NULL, ("%s: srcp is NULL", __func__));
 
 	dst = dstsock->sin6_addr; /* make a copy for local operation */
 	if (ifpp) {
 		/*
 		 * Save a possibly passed in ifp for in6_selectsrc. Only
 		 * neighbor discovery code should use this feature, where
 		 * we may know the interface but not the FIB number holding
 		 * the connected subnet in case someone deleted it from the
 		 * default FIB and we need to check the interface.
 		 */
 		if (*ifpp != NULL)
 			oifp = *ifpp;
 		*ifpp = NULL;
 	}
 
 	if (inp != NULL) {
 		INP_LOCK_ASSERT(inp);
 		mopts = inp->in6p_moptions;
 	} else {
 		mopts = NULL;
 	}
 
 	/*
 	 * If the source address is explicitly specified by the caller,
 	 * check if the requested source address is indeed a unicast address
 	 * assigned to the node, and can be used as the packet's source
 	 * address.  If everything is okay, use the address as source.
 	 */
 	if (opts && (pi = opts->ip6po_pktinfo) &&
 	    !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) {
 		/* get the outgoing interface */
 		if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp,
 		    fibnum))
 		    != 0)
 			return (error);
 
 		/*
 		 * determine the appropriate zone id of the source based on
 		 * the zone of the destination and the outgoing interface.
 		 * If the specified address is ambiguous wrt the scope zone,
 		 * the interface must be specified; otherwise, ifa_ifwithaddr()
 		 * will fail matching the address.
 		 */
 		tmp = pi->ipi6_addr;
 		if (ifp) {
 			error = in6_setscope(&tmp, ifp, &odstzone);
 			if (error)
 				return (error);
 		}
 		if (cred != NULL && (error = prison_local_ip6(cred,
 		    &tmp, (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0)
 			return (error);
 
 		/*
 		 * If IPV6_BINDANY socket option is set, we allow to specify
 		 * non local addresses as source address in IPV6_PKTINFO
 		 * ancillary data.
 		 */
 		if ((inp->inp_flags & INP_BINDANY) == 0) {
 			ia = in6ifa_ifwithaddr(&tmp, 0 /* XXX */);
 			if (ia == NULL || (ia->ia6_flags & (IN6_IFF_ANYCAST |
 			    IN6_IFF_NOTREADY))) {
 				if (ia != NULL)
 					ifa_free(&ia->ia_ifa);
 				return (EADDRNOTAVAIL);
 			}
 			bcopy(&ia->ia_addr.sin6_addr, srcp, sizeof(*srcp));
 			ifa_free(&ia->ia_ifa);
 		} else
 			bcopy(&tmp, srcp, sizeof(*srcp));
 		pi->ipi6_addr = tmp; /* XXX: this overrides pi */
 		if (ifpp)
 			*ifpp = ifp;
 		return (0);
 	}
 
 	/*
 	 * Otherwise, if the socket has already bound the source, just use it.
 	 */
 	if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
 		if (cred != NULL &&
 		    (error = prison_local_ip6(cred, &inp->in6p_laddr,
 		    ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
 			return (error);
 		bcopy(&inp->in6p_laddr, srcp, sizeof(*srcp));
 		return (0);
 	}
 
 	/*
 	 * Bypass source address selection and use the primary jail IP
 	 * if requested.
 	 */
 	if (cred != NULL && !prison_saddrsel_ip6(cred, srcp))
 		return (0);
 
 	/*
 	 * If the address is not specified, choose the best one based on
 	 * the outgoing interface and the destination address.
 	 */
 	/* get the outgoing interface */
 	if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp,
 	    (inp != NULL) ? inp->inp_inc.inc_fibnum : fibnum)) != 0)
 		return (error);
 
 #ifdef DIAGNOSTIC
 	if (ifp == NULL)	/* this should not happen */
 		panic("in6_selectsrc: NULL ifp");
 #endif
 	error = in6_setscope(&dst, ifp, &odstzone);
 	if (error)
 		return (error);
 
 	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
 	CK_STAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
 		int new_scope = -1, new_matchlen = -1;
 		struct in6_addrpolicy *new_policy = NULL;
 		u_int32_t srczone, osrczone, dstzone;
 		struct in6_addr src;
 		struct ifnet *ifp1 = ia->ia_ifp;
 
 		/*
 		 * We'll never take an address that breaks the scope zone
 		 * of the destination.  We also skip an address if its zone
 		 * does not contain the outgoing interface.
 		 * XXX: we should probably use sin6_scope_id here.
 		 */
 		if (in6_setscope(&dst, ifp1, &dstzone) ||
 		    odstzone != dstzone) {
 			continue;
 		}
 		src = ia->ia_addr.sin6_addr;
 		if (in6_setscope(&src, ifp, &osrczone) ||
 		    in6_setscope(&src, ifp1, &srczone) ||
 		    osrczone != srczone) {
 			continue;
 		}
 
 		/* avoid unusable addresses */
 		if ((ia->ia6_flags &
 		     (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) {
 				continue;
 		}
 		if (!V_ip6_use_deprecated && IFA6_IS_DEPRECATED(ia))
 			continue;
 
 		/* If jailed only take addresses of the jail into account. */
 		if (cred != NULL &&
 		    prison_check_ip6(cred, &ia->ia_addr.sin6_addr) != 0)
 			continue;
 
 		/* Rule 1: Prefer same address */
 		if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) {
 			ia_best = ia;
 			BREAK(1); /* there should be no better candidate */
 		}
 
 		if (ia_best == NULL)
 			REPLACE(0);
 
 		/* Rule 2: Prefer appropriate scope */
 		if (dst_scope < 0)
 			dst_scope = in6_addrscope(&dst);
 		new_scope = in6_addrscope(&ia->ia_addr.sin6_addr);
 		if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) {
 			if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0)
 				REPLACE(2);
 			NEXT(2);
 		} else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) {
 			if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0)
 				NEXT(2);
 			REPLACE(2);
 		}
 
 		/*
 		 * Rule 3: Avoid deprecated addresses.  Note that the case of
 		 * !ip6_use_deprecated is already rejected above.
 		 */
 		if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia))
 			NEXT(3);
 		if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia))
 			REPLACE(3);
 
 		/* Rule 4: Prefer home addresses */
 		/*
 		 * XXX: This is a TODO.  We should probably merge the MIP6
 		 * case above.
 		 */
 
 		/* Rule 5: Prefer outgoing interface */
 		if (!(ND_IFINFO(ifp)->flags & ND6_IFF_NO_PREFER_IFACE)) {
 			if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp)
 				NEXT(5);
 			if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp)
 				REPLACE(5);
 		}
 
 		/*
 		 * Rule 6: Prefer matching label
 		 * Note that best_policy should be non-NULL here.
 		 */
 		if (dst_policy == NULL)
 			dst_policy = lookup_addrsel_policy(dstsock);
 		if (dst_policy->label != ADDR_LABEL_NOTAPP) {
 			new_policy = lookup_addrsel_policy(&ia->ia_addr);
 			if (dst_policy->label == best_policy->label &&
 			    dst_policy->label != new_policy->label)
 				NEXT(6);
 			if (dst_policy->label != best_policy->label &&
 			    dst_policy->label == new_policy->label)
 				REPLACE(6);
 		}
 
 		/*
 		 * Rule 7: Prefer public addresses.
 		 * We allow users to reverse the logic by configuring
 		 * a sysctl variable, so that privacy conscious users can
 		 * always prefer temporary addresses.
 		 */
 		if (opts == NULL ||
 		    opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) {
 			prefer_tempaddr = V_ip6_prefer_tempaddr;
 		} else if (opts->ip6po_prefer_tempaddr ==
 		    IP6PO_TEMPADDR_NOTPREFER) {
 			prefer_tempaddr = 0;
 		} else
 			prefer_tempaddr = 1;
 		if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
 		    (ia->ia6_flags & IN6_IFF_TEMPORARY)) {
 			if (prefer_tempaddr)
 				REPLACE(7);
 			else
 				NEXT(7);
 		}
 		if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
 		    !(ia->ia6_flags & IN6_IFF_TEMPORARY)) {
 			if (prefer_tempaddr)
 				NEXT(7);
 			else
 				REPLACE(7);
 		}
 
 		/*
 		 * Rule 8: prefer addresses on alive interfaces.
 		 * This is a KAME specific rule.
 		 */
 		if ((ia_best->ia_ifp->if_flags & IFF_UP) &&
 		    !(ia->ia_ifp->if_flags & IFF_UP))
 			NEXT(8);
 		if (!(ia_best->ia_ifp->if_flags & IFF_UP) &&
 		    (ia->ia_ifp->if_flags & IFF_UP))
 			REPLACE(8);
 
 		/*
 		 * Rule 9: prefer address with better virtual status.
 		 */
 		if (ifa_preferred(&ia_best->ia_ifa, &ia->ia_ifa))
 			REPLACE(9);
 		if (ifa_preferred(&ia->ia_ifa, &ia_best->ia_ifa))
 			NEXT(9);
 
 		/*
 		 * Rule 10: prefer address with `prefer_source' flag.
 		 */
 		if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0 &&
 		    (ia->ia6_flags & IN6_IFF_PREFER_SOURCE) != 0)
 			REPLACE(10);
 		if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) != 0 &&
 		    (ia->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0)
 			NEXT(10);
 
 		/*
 		 * Rule 14: Use longest matching prefix.
 		 * Note: in the address selection draft, this rule is
 		 * documented as "Rule 8".  However, since it is also
 		 * documented that this rule can be overridden, we assign
 		 * a large number so that it is easy to assign smaller numbers
 		 * to more preferred rules.
 		 */
 		new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst);
 		if (best_matchlen < new_matchlen)
 			REPLACE(14);
 		if (new_matchlen < best_matchlen)
 			NEXT(14);
 
 		/* Rule 15 is reserved. */
 
 		/*
 		 * Last resort: just keep the current candidate.
 		 * Or, do we need more rules?
 		 */
 		continue;
 
 	  replace:
 		ia_best = ia;
 		best_scope = (new_scope >= 0 ? new_scope :
 			      in6_addrscope(&ia_best->ia_addr.sin6_addr));
 		best_policy = (new_policy ? new_policy :
 			       lookup_addrsel_policy(&ia_best->ia_addr));
 		best_matchlen = (new_matchlen >= 0 ? new_matchlen :
 				 in6_matchlen(&ia_best->ia_addr.sin6_addr,
 					      &dst));
 
 	  next:
 		continue;
 
 	  out:
 		break;
 	}
 
 	if ((ia = ia_best) == NULL) {
 		IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 		IP6STAT_INC(ip6s_sources_none);
 		return (EADDRNOTAVAIL);
 	}
 
 	/*
 	 * At this point at least one of the addresses belonged to the jail
 	 * but it could still be, that we want to further restrict it, e.g.
 	 * theoratically IN6_IS_ADDR_LOOPBACK.
 	 * It must not be IN6_IS_ADDR_UNSPECIFIED anymore.
 	 * prison_local_ip6() will fix an IN6_IS_ADDR_LOOPBACK but should
 	 * let all others previously selected pass.
 	 * Use tmp to not change ::1 on lo0 to the primary jail address.
 	 */
 	tmp = ia->ia_addr.sin6_addr;
 	if (cred != NULL && prison_local_ip6(cred, &tmp, (inp != NULL &&
 	    (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) {
 		IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 		IP6STAT_INC(ip6s_sources_none);
 		return (EADDRNOTAVAIL);
 	}
 
 	if (ifpp)
 		*ifpp = ifp;
 
 	bcopy(&tmp, srcp, sizeof(*srcp));
 	if (ia->ia_ifp == ifp)
 		IP6STAT_INC(ip6s_sources_sameif[best_scope]);
 	else
 		IP6STAT_INC(ip6s_sources_otherif[best_scope]);
 	if (dst_scope == best_scope)
 		IP6STAT_INC(ip6s_sources_samescope[best_scope]);
 	else
 		IP6STAT_INC(ip6s_sources_otherscope[best_scope]);
 	if (IFA6_IS_DEPRECATED(ia))
 		IP6STAT_INC(ip6s_sources_deprecated[best_scope]);
 	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 	return (0);
 }
 
 /*
  * Select source address based on @inp, @dstsock and @opts.
  * Stores selected address to @srcp. If @scope_ambiguous is set,
  * embed scope from selected outgoing interface. If @hlim pointer
  * is provided, stores calculated hop limit there.
  * Returns 0 on success.
  */
 int
 in6_selectsrc_socket(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
     struct inpcb *inp, struct ucred *cred, int scope_ambiguous,
     struct in6_addr *srcp, int *hlim)
 {
 	struct ifnet *retifp;
 	uint32_t fibnum;
 	int error;
 
 	fibnum = inp->inp_inc.inc_fibnum;
 	retifp = NULL;
 
 	error = in6_selectsrc(fibnum, dstsock, opts, inp, cred, &retifp, srcp);
 	if (error != 0)
 		return (error);
 
 	if (hlim != NULL)
 		*hlim = in6_selecthlim(inp, retifp);
 
 	if (retifp == NULL || scope_ambiguous == 0)
 		return (0);
 
 	/*
 	 * Application should provide a proper zone ID or the use of
 	 * default zone IDs should be enabled.  Unfortunately, some
 	 * applications do not behave as it should, so we need a
 	 * workaround.  Even if an appropriate ID is not determined
 	 * (when it's required), if we can determine the outgoing
 	 * interface. determine the zone ID based on the interface.
 	 */
 	error = in6_setscope(&dstsock->sin6_addr, retifp, NULL);
 
 	return (error);
 }
 
 /*
  * Select source address based on @fibnum, @dst and @scopeid.
  * Stores selected address to @srcp.
  * Returns 0 on success.
  *
  * Used by non-socket based consumers (ND code mostly)
  */
 int
 in6_selectsrc_addr(uint32_t fibnum, const struct in6_addr *dst,
     uint32_t scopeid, struct ifnet *ifp, struct in6_addr *srcp,
     int *hlim)
 {
 	struct ifnet *retifp;
 	struct sockaddr_in6 dst_sa;
 	int error;
 
 	retifp = ifp;
 	bzero(&dst_sa, sizeof(dst_sa));
 	dst_sa.sin6_family = AF_INET6;
 	dst_sa.sin6_len = sizeof(dst_sa);
 	dst_sa.sin6_addr = *dst;
 	dst_sa.sin6_scope_id = scopeid;
 	sa6_embedscope(&dst_sa, 0);
 
 	error = in6_selectsrc(fibnum, &dst_sa, NULL, NULL, NULL, &retifp, srcp);
 	if (hlim != NULL)
 		*hlim = in6_selecthlim(NULL, retifp);
 
 	return (error);
 }
 
 /*
  * clone - meaningful only for bsdi and freebsd
  */
 static int
 selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
     struct ip6_moptions *mopts, struct route_in6 *ro,
     struct ifnet **retifp, struct nhop_object **retnh, int norouteok,
     u_int fibnum, uint32_t flowid)
 {
 	int error = 0;
 	struct ifnet *ifp = NULL;
 	struct nhop_object *nh = NULL;
 	struct sockaddr_in6 *sin6_next;
 	struct in6_pktinfo *pi = NULL;
 	struct in6_addr *dst = &dstsock->sin6_addr;
 	uint32_t zoneid;
 #if 0
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	if (dstsock->sin6_addr.s6_addr32[0] == 0 &&
 	    dstsock->sin6_addr.s6_addr32[1] == 0 &&
 	    !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) {
 		printf("%s: strange destination %s\n", __func__,
 		       ip6_sprintf(ip6buf, &dstsock->sin6_addr));
 	} else {
 		printf("%s: destination = %s%%%d\n", __func__,
 		       ip6_sprintf(ip6buf, &dstsock->sin6_addr),
 		       dstsock->sin6_scope_id); /* for debug */
 	}
 #endif
 
 	/* If the caller specify the outgoing interface explicitly, use it. */
 	if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) {
 		/* XXX boundary check is assumed to be already done. */
 		ifp = ifnet_byindex(pi->ipi6_ifindex);
 		if (ifp != NULL &&
 		    (norouteok || retnh == NULL ||
 		    IN6_IS_ADDR_MULTICAST(dst))) {
 			/*
 			 * we do not have to check or get the route for
 			 * multicast.
 			 */
 			goto done;
 		} else
 			goto getroute;
 	}
 	/*
 	 * If the destination address is a multicast address and the outgoing
 	 * interface for the address is specified by the caller, use it.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(dst) &&
 	    mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) {
 		goto done; /* we do not need a route for multicast. */
 	}
 	/*
 	 * If destination address is LLA or link- or node-local multicast,
 	 * use it's embedded scope zone id to determine outgoing interface.
 	 */
 	if (IN6_IS_ADDR_MC_LINKLOCAL(dst) ||
 	    IN6_IS_ADDR_MC_NODELOCAL(dst)) {
 		zoneid = ntohs(in6_getscope(dst));
 		if (zoneid > 0) {
 			ifp = in6_getlinkifnet(zoneid);
 			goto done;
 		}
 	}
 
   getroute:
 	/*
 	 * If the next hop address for the packet is specified by the caller,
 	 * use it as the gateway.
 	 */
 	if (opts && opts->ip6po_nexthop) {
 		struct route_in6 *ron;
 
 		sin6_next = satosin6(opts->ip6po_nexthop);
 		if (IN6_IS_ADDR_LINKLOCAL(&sin6_next->sin6_addr)) {
 			/*
 			 * Next hop is LLA, thus it should be neighbor.
 			 * Determine outgoing interface by zone index.
 			 */
 			zoneid = ntohs(in6_getscope(&sin6_next->sin6_addr));
 			if (zoneid > 0) {
 				ifp = in6_getlinkifnet(zoneid);
 				goto done;
 			}
 		}
 		ron = &opts->ip6po_nextroute;
 		/* Use a cached route if it exists and is valid. */
 		if (ron->ro_nh != NULL && (
 		    !NH_IS_VALID(ron->ro_nh) ||
 		    ron->ro_dst.sin6_family != AF_INET6 ||
 		    !IN6_ARE_ADDR_EQUAL(&ron->ro_dst.sin6_addr,
 			&sin6_next->sin6_addr)))
 			RO_NHFREE(ron);
 		if (ron->ro_nh == NULL) {
 			ron->ro_dst = *sin6_next;
 			/*
 			 * sin6_next is not link-local OR scopeid is 0,
 			 * no need to clear scope
 			 */
 			ron->ro_nh = fib6_lookup(fibnum,
 			    &sin6_next->sin6_addr, 0, NHR_REF, flowid);
 		}
 		/*
 		 * The node identified by that address must be a
 		 * neighbor of the sending host.
 		 */
 		if (ron->ro_nh == NULL ||
 		    (ron->ro_nh->nh_flags & NHF_GATEWAY) != 0)
 			error = EHOSTUNREACH;
 		else {
 			nh = ron->ro_nh;
 			ifp = nh->nh_ifp;
 		}
 		goto done;
 	}
 
 	/*
 	 * Use a cached route if it exists and is valid, else try to allocate
 	 * a new one.  Note that we should check the address family of the
 	 * cached destination, in case of sharing the cache with IPv4.
 	 */
 	if (ro) {
 		if (ro->ro_nh &&
 		    (!NH_IS_VALID(ro->ro_nh) ||
 		     ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 ||
 		     !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr,
 		     dst))) {
 			RO_NHFREE(ro);
 		}
 		if (ro->ro_nh == (struct nhop_object *)NULL) {
 			struct sockaddr_in6 *sa6;
 
 			/* No route yet, so try to acquire one */
 			bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
 			sa6 = (struct sockaddr_in6 *)&ro->ro_dst;
 			*sa6 = *dstsock;
 			sa6->sin6_scope_id = 0;
 
 			/*
 			 * Currently dst has scopeid embedded iff it is LL.
 			 * New routing API accepts scopeid as a separate argument.
 			 * Convert dst before/after doing lookup
 			 */
 			uint32_t scopeid = 0;
 			if (IN6_IS_SCOPE_LINKLOCAL(&sa6->sin6_addr)) {
 				/* Unwrap in6_getscope() and in6_clearscope() */
 				scopeid = ntohs(sa6->sin6_addr.s6_addr16[1]);
 				sa6->sin6_addr.s6_addr16[1] = 0;
 
 			}
 
 			ro->ro_nh = fib6_lookup(fibnum,
 			    &sa6->sin6_addr, scopeid, NHR_REF, flowid);
 
 			if (IN6_IS_SCOPE_LINKLOCAL(&sa6->sin6_addr))
 				sa6->sin6_addr.s6_addr16[1] = htons(scopeid);
 		}
 				
 		/*
 		 * do not care about the result if we have the nexthop
 		 * explicitly specified.
 		 */
 		if (opts && opts->ip6po_nexthop)
 			goto done;
 
 		if (ro->ro_nh)
 			ifp = ro->ro_nh->nh_ifp;
 		else
 			error = EHOSTUNREACH;
 		nh = ro->ro_nh;
 
 		/*
 		 * Check if the outgoing interface conflicts with
 		 * the interface specified by ipi6_ifindex (if specified).
 		 * Note that loopback interface is always okay.
 		 * (this may happen when we are sending a packet to one of
 		 *  our own addresses.)
 		 */
 		if (ifp && opts && opts->ip6po_pktinfo &&
 		    opts->ip6po_pktinfo->ipi6_ifindex) {
 			if (!(ifp->if_flags & IFF_LOOPBACK) &&
 			    ifp->if_index !=
 			    opts->ip6po_pktinfo->ipi6_ifindex) {
 				error = EHOSTUNREACH;
 				goto done;
 			}
 		}
 	}
 
   done:
 	if (ifp == NULL && nh == NULL) {
 		/*
 		 * This can happen if the caller did not pass a cached route
 		 * nor any other hints.  We treat this case an error.
 		 */
 		error = EHOSTUNREACH;
 	}
 	if (error == EHOSTUNREACH)
 		IP6STAT_INC(ip6s_noroute);
 
 	if (retifp != NULL) {
 		if (nh != NULL)
 			*retifp = nh->nh_aifp;
 		else
 			*retifp = ifp;
 	}
 
 	if (retnh != NULL)
 		*retnh = nh;	/* nh may be NULL */
 
 	return (error);
 }
 
 static int
 in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
     struct ip6_moptions *mopts, struct ifnet **retifp,
     struct ifnet *oifp, u_int fibnum)
 {
 	int error;
 	struct route_in6 sro;
 	struct nhop_object *nh = NULL;
 	uint16_t nh_flags;
 
 	KASSERT(retifp != NULL, ("%s: retifp is NULL", __func__));
 
 	bzero(&sro, sizeof(sro));
 	nh_flags = 0;
 
 	error = selectroute(dstsock, opts, mopts, &sro, retifp, &nh, 1, fibnum, 0);
 
 	if (nh != NULL)
 		nh_flags = nh->nh_flags;
 	if (nh != NULL && nh == sro.ro_nh)
 		NH_FREE(nh);
 
 	if (error != 0) {
 		/* Help ND. See oifp comment in in6_selectsrc(). */
 		if (oifp != NULL && fibnum == RT_DEFAULT_FIB) {
 			*retifp = oifp;
 			error = 0;
 		}
 		return (error);
 	}
 
 	/*
 	 * do not use a rejected or black hole route.
 	 * XXX: this check should be done in the L2 output routine.
 	 * However, if we skipped this check here, we'd see the following
 	 * scenario:
 	 * - install a rejected route for a scoped address prefix
 	 *   (like fe80::/10)
 	 * - send a packet to a destination that matches the scoped prefix,
 	 *   with ambiguity about the scope zone.
 	 * - pick the outgoing interface from the route, and disambiguate the
 	 *   scope zone with the interface.
 	 * - ip6_output() would try to get another route with the "new"
 	 *   destination, which may be valid.
 	 * - we'd see no error on output.
 	 * Although this may not be very harmful, it should still be confusing.
 	 * We thus reject the case here.
 	 */
 
 	if (nh_flags & (NHF_REJECT | NHF_BLACKHOLE)) {
 		error = (nh_flags & NHF_HOST ? EHOSTUNREACH : ENETUNREACH);
 		return (error);
 	}
 
 	return (0);
 }
 
 /* Public wrapper function to selectroute(). */
 int
 in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
     struct ip6_moptions *mopts, struct route_in6 *ro,
     struct ifnet **retifp, struct nhop_object **retnh, u_int fibnum, uint32_t flowid)
 {
 
 	return (selectroute(dstsock, opts, mopts, ro, retifp,
 	    retnh, 0, fibnum, flowid));
 }
 
 /*
  * Default hop limit selection. The precedence is as follows:
  * 1. Hoplimit value specified via ioctl.
  * 2. (If the outgoing interface is detected) the current
  *     hop limit of the interface specified by router advertisement.
  * 3. The system default hoplimit.
  */
 int
 in6_selecthlim(struct inpcb *inp, struct ifnet *ifp)
 {
 
 	if (inp && inp->in6p_hops >= 0)
 		return (inp->in6p_hops);
 	else if (ifp)
 		return (ND_IFINFO(ifp)->chlim);
 	else if (inp && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
-		struct nhop6_basic nh6;
+		struct nhop_object *nh;
 		struct in6_addr dst;
 		uint32_t fibnum, scopeid;
 		int hlim;
 
 		fibnum = inp->inp_inc.inc_fibnum;
 		in6_splitscope(&inp->in6p_faddr, &dst, &scopeid);
-		if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6)==0){
-			hlim = ND_IFINFO(nh6.nh_ifp)->chlim;
+		nh = fib6_lookup(fibnum, &dst, scopeid, 0, 0);
+		if (nh != NULL) {
+			hlim = ND_IFINFO(nh->nh_ifp)->chlim;
 			return (hlim);
 		}
 	}
 	return (V_ip6_defhlim);
 }
 
 /*
  * XXX: this is borrowed from in6_pcbbind(). If possible, we should
  * share this function by all *bsd*...
  */
 int
 in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred)
 {
 	struct socket *so = inp->inp_socket;
 	u_int16_t lport = 0;
 	int error, lookupflags = 0;
 #ifdef INVARIANTS
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 #endif
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
 
 	error = prison_local_ip6(cred, laddr,
 	    ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0));
 	if (error)
 		return(error);
 
 	/* XXX: this is redundant when called from in6_pcbbind */
 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
 		lookupflags = INPLOOKUP_WILDCARD;
 
 	inp->inp_flags |= INP_ANONPORT;
 
 	error = in_pcb_lport(inp, NULL, &lport, cred, lookupflags);
 	if (error != 0)
 		return (error);
 
 	inp->inp_lport = lport;
 	if (in_pcbinshash(inp) != 0) {
 		inp->in6p_laddr = in6addr_any;
 		inp->inp_lport = 0;
 		return (EAGAIN);
 	}
 
 	return (0);
 }
 
 void
 addrsel_policy_init(void)
 {
 
 	init_policy_queue();
 
 	/* initialize the "last resort" policy */
 	bzero(&V_defaultaddrpolicy, sizeof(V_defaultaddrpolicy));
 	V_defaultaddrpolicy.label = ADDR_LABEL_NOTAPP;
 
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	ADDRSEL_LOCK_INIT();
 	ADDRSEL_SXLOCK_INIT();
 }
 
 static struct in6_addrpolicy *
 lookup_addrsel_policy(struct sockaddr_in6 *key)
 {
 	struct in6_addrpolicy *match = NULL;
 
 	ADDRSEL_LOCK();
 	match = match_addrsel_policy(key);
 
 	if (match == NULL)
 		match = &V_defaultaddrpolicy;
 	else
 		match->use++;
 	ADDRSEL_UNLOCK();
 
 	return (match);
 }
 
 /*
  * Subroutines to manage the address selection policy table via sysctl.
  */
 struct walkarg {
 	struct sysctl_req *w_req;
 };
 
 static int in6_src_sysctl(SYSCTL_HANDLER_ARGS);
 SYSCTL_DECL(_net_inet6_ip6);
 static SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy,
     CTLFLAG_RD | CTLFLAG_MPSAFE, in6_src_sysctl,
     "");
 
 static int
 in6_src_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct walkarg w;
 
 	if (req->newptr)
 		return EPERM;
 
 	bzero(&w, sizeof(w));
 	w.w_req = req;
 
 	return (walk_addrsel_policy(dump_addrsel_policyent, &w));
 }
 
 int
 in6_src_ioctl(u_long cmd, caddr_t data)
 {
 	struct in6_addrpolicy ent0;
 
 	if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY)
 		return (EOPNOTSUPP); /* check for safety */
 
 	ent0 = *(struct in6_addrpolicy *)data;
 
 	if (ent0.label == ADDR_LABEL_NOTAPP)
 		return (EINVAL);
 	/* check if the prefix mask is consecutive. */
 	if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0)
 		return (EINVAL);
 	/* clear trailing garbages (if any) of the prefix address. */
 	IN6_MASK_ADDR(&ent0.addr.sin6_addr, &ent0.addrmask.sin6_addr);
 	ent0.use = 0;
 
 	switch (cmd) {
 	case SIOCAADDRCTL_POLICY:
 		return (add_addrsel_policyent(&ent0));
 	case SIOCDADDRCTL_POLICY:
 		return (delete_addrsel_policyent(&ent0));
 	}
 
 	return (0);		/* XXX: compromise compilers */
 }
 
 /*
  * The followings are implementation of the policy table using a
  * simple tail queue.
  * XXX such details should be hidden.
  * XXX implementation using binary tree should be more efficient.
  */
 struct addrsel_policyent {
 	TAILQ_ENTRY(addrsel_policyent) ape_entry;
 	struct in6_addrpolicy ape_policy;
 };
 
 TAILQ_HEAD(addrsel_policyhead, addrsel_policyent);
 
 VNET_DEFINE_STATIC(struct addrsel_policyhead, addrsel_policytab);
 #define	V_addrsel_policytab		VNET(addrsel_policytab)
 
 static void
 init_policy_queue(void)
 {
 
 	TAILQ_INIT(&V_addrsel_policytab);
 }
 
 static int
 add_addrsel_policyent(struct in6_addrpolicy *newpolicy)
 {
 	struct addrsel_policyent *new, *pol;
 
 	new = malloc(sizeof(*new), M_IFADDR,
 	       M_WAITOK);
 	ADDRSEL_XLOCK();
 	ADDRSEL_LOCK();
 
 	/* duplication check */
 	TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) {
 		if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr,
 				       &pol->ape_policy.addr.sin6_addr) &&
 		    IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr,
 				       &pol->ape_policy.addrmask.sin6_addr)) {
 			ADDRSEL_UNLOCK();
 			ADDRSEL_XUNLOCK();
 			free(new, M_IFADDR);
 			return (EEXIST);	/* or override it? */
 		}
 	}
 
 	bzero(new, sizeof(*new));
 
 	/* XXX: should validate entry */
 	new->ape_policy = *newpolicy;
 
 	TAILQ_INSERT_TAIL(&V_addrsel_policytab, new, ape_entry);
 	ADDRSEL_UNLOCK();
 	ADDRSEL_XUNLOCK();
 
 	return (0);
 }
 
 static int
 delete_addrsel_policyent(struct in6_addrpolicy *key)
 {
 	struct addrsel_policyent *pol;
 
 	ADDRSEL_XLOCK();
 	ADDRSEL_LOCK();
 
 	/* search for the entry in the table */
 	TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) {
 		if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr,
 		    &pol->ape_policy.addr.sin6_addr) &&
 		    IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr,
 		    &pol->ape_policy.addrmask.sin6_addr)) {
 			break;
 		}
 	}
 	if (pol == NULL) {
 		ADDRSEL_UNLOCK();
 		ADDRSEL_XUNLOCK();
 		return (ESRCH);
 	}
 
 	TAILQ_REMOVE(&V_addrsel_policytab, pol, ape_entry);
 	ADDRSEL_UNLOCK();
 	ADDRSEL_XUNLOCK();
 	free(pol, M_IFADDR);
 
 	return (0);
 }
 
 static int
 walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), void *w)
 {
 	struct addrsel_policyent *pol;
 	int error = 0;
 
 	ADDRSEL_SLOCK();
 	TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) {
 		if ((error = (*callback)(&pol->ape_policy, w)) != 0) {
 			ADDRSEL_SUNLOCK();
 			return (error);
 		}
 	}
 	ADDRSEL_SUNLOCK();
 	return (error);
 }
 
 static int
 dump_addrsel_policyent(struct in6_addrpolicy *pol, void *arg)
 {
 	int error = 0;
 	struct walkarg *w = arg;
 
 	error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol));
 
 	return (error);
 }
 
 static struct in6_addrpolicy *
 match_addrsel_policy(struct sockaddr_in6 *key)
 {
 	struct addrsel_policyent *pent;
 	struct in6_addrpolicy *bestpol = NULL, *pol;
 	int matchlen, bestmatchlen = -1;
 	u_char *mp, *ep, *k, *p, m;
 
 	TAILQ_FOREACH(pent, &V_addrsel_policytab, ape_entry) {
 		matchlen = 0;
 
 		pol = &pent->ape_policy;
 		mp = (u_char *)&pol->addrmask.sin6_addr;
 		ep = mp + 16;	/* XXX: scope field? */
 		k = (u_char *)&key->sin6_addr;
 		p = (u_char *)&pol->addr.sin6_addr;
 		for (; mp < ep && *mp; mp++, k++, p++) {
 			m = *mp;
 			if ((*k & m) != *p)
 				goto next; /* not match */
 			if (m == 0xff) /* short cut for a typical case */
 				matchlen += 8;
 			else {
 				while (m >= 0x80) {
 					matchlen++;
 					m <<= 1;
 				}
 			}
 		}
 
 		/* matched.  check if this is better than the current best. */
 		if (bestpol == NULL ||
 		    matchlen > bestmatchlen) {
 			bestpol = pol;
 			bestmatchlen = matchlen;
 		}
 
 	  next:
 		continue;
 	}
 
 	return (bestpol);
 }
Index: head/sys/netpfil/ipfw/ip_fw2.c
===================================================================
--- head/sys/netpfil/ipfw/ip_fw2.c	(revision 362899)
+++ head/sys/netpfil/ipfw/ip_fw2.c	(revision 362900)
@@ -1,3557 +1,3560 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * The FreeBSD IP packet firewall, main file
  */
 
 #include "opt_ipfw.h"
 #include "opt_ipdivert.h"
 #include "opt_inet.h"
 #ifndef INET
 #error "IPFIREWALL requires INET"
 #endif /* INET */
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/condvar.h>
 #include <sys/counter.h>
 #include <sys/eventhandler.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/jail.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 #include <net/ethernet.h> /* for ETHERTYPE_IP */
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
+#include <net/route/nhop.h>
 #include <net/pfil.h>
 #include <net/vnet.h>
 
 #include <netpfil/pf/pf_mtag.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_carp.h>
 #include <netinet/pim.h>
 #include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/sctp.h>
 #include <netinet/sctp_crc32.h>
 #include <netinet/sctp_header.h>
 
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet/in_fib.h>
 #ifdef INET6
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/ip6_var.h>
 #endif
 
 #include <net/if_gre.h> /* for struct grehdr */
 
 #include <netpfil/ipfw/ip_fw_private.h>
 
 #include <machine/in_cksum.h>	/* XXX for in_cksum */
 
 #ifdef MAC
 #include <security/mac/mac_framework.h>
 #endif
 
 /*
  * static variables followed by global ones.
  * All ipfw global variables are here.
  */
 
 VNET_DEFINE_STATIC(int, fw_deny_unknown_exthdrs);
 #define	V_fw_deny_unknown_exthdrs	VNET(fw_deny_unknown_exthdrs)
 
 VNET_DEFINE_STATIC(int, fw_permit_single_frag6) = 1;
 #define	V_fw_permit_single_frag6	VNET(fw_permit_single_frag6)
 
 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
 static int default_to_accept = 1;
 #else
 static int default_to_accept;
 #endif
 
 VNET_DEFINE(int, autoinc_step);
 VNET_DEFINE(int, fw_one_pass) = 1;
 
 VNET_DEFINE(unsigned int, fw_tables_max);
 VNET_DEFINE(unsigned int, fw_tables_sets) = 0;	/* Don't use set-aware tables */
 /* Use 128 tables by default */
 static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT;
 
 #ifndef LINEAR_SKIPTO
 static int jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num,
     int tablearg, int jump_backwards);
 #define	JUMP(ch, f, num, targ, back)	jump_fast(ch, f, num, targ, back)
 #else
 static int jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num,
     int tablearg, int jump_backwards);
 #define	JUMP(ch, f, num, targ, back)	jump_linear(ch, f, num, targ, back)
 #endif
 
 /*
  * Each rule belongs to one of 32 different sets (0..31).
  * The variable set_disable contains one bit per set.
  * If the bit is set, all rules in the corresponding set
  * are disabled. Set RESVD_SET(31) is reserved for the default rule
  * and rules that are not deleted by the flush command,
  * and CANNOT be disabled.
  * Rules in set RESVD_SET can only be deleted individually.
  */
 VNET_DEFINE(u_int32_t, set_disable);
 #define	V_set_disable			VNET(set_disable)
 
 VNET_DEFINE(int, fw_verbose);
 /* counter for ipfw_log(NULL...) */
 VNET_DEFINE(u_int64_t, norule_counter);
 VNET_DEFINE(int, verbose_limit);
 
 /* layer3_chain contains the list of rules for layer 3 */
 VNET_DEFINE(struct ip_fw_chain, layer3_chain);
 
 /* ipfw_vnet_ready controls when we are open for business */
 VNET_DEFINE(int, ipfw_vnet_ready) = 0;
 
 VNET_DEFINE(int, ipfw_nat_ready) = 0;
 
 ipfw_nat_t *ipfw_nat_ptr = NULL;
 struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
 ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
 ipfw_nat_cfg_t *ipfw_nat_del_ptr;
 ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
 ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
 
 #ifdef SYSCTL_NODE
 uint32_t dummy_def = IPFW_DEFAULT_RULE;
 static int sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS);
 static int sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS);
 
 SYSBEGIN(f3)
 
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Firewall");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
     CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0,
     "Only do a single pass through ipfw when using dummynet(4)");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(autoinc_step), 0,
     "Rule number auto-increment step");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose,
     CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0,
     "Log matches to ipfw rules");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(verbose_limit), 0,
     "Set upper limit of matches of ipfw rules logged");
 SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD,
     &dummy_def, 0,
     "The default/max possible rule number.");
 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_max,
     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     0, 0, sysctl_ipfw_table_num, "IU",
     "Maximum number of concurrently used tables");
 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_sets,
     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     0, 0, sysctl_ipfw_tables_sets, "IU",
     "Use per-set namespace for tables");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN,
     &default_to_accept, 0,
     "Make the default rule accept all packets.");
 TUNABLE_INT("net.inet.ip.fw.tables_max", (int *)&default_fw_tables);
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count,
     CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0,
     "Number of static rules");
 
 #ifdef INET6
 SYSCTL_DECL(_net_inet6_ip6);
 SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Firewall");
 SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs,
     CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE,
     &VNET_NAME(fw_deny_unknown_exthdrs), 0,
     "Deny packets with unknown IPv6 Extension Headers");
 SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, permit_single_frag6,
     CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE,
     &VNET_NAME(fw_permit_single_frag6), 0,
     "Permit single packet IPv6 fragments");
 #endif /* INET6 */
 
 SYSEND
 
 #endif /* SYSCTL_NODE */
 
 
 /*
  * Some macros used in the various matching options.
  * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T
  * Other macros just cast void * into the appropriate type
  */
 #define	L3HDR(T, ip)	((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
 #define	TCP(p)		((struct tcphdr *)(p))
 #define	SCTP(p)		((struct sctphdr *)(p))
 #define	UDP(p)		((struct udphdr *)(p))
 #define	ICMP(p)		((struct icmphdr *)(p))
 #define	ICMP6(p)	((struct icmp6_hdr *)(p))
 
 static __inline int
 icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd)
 {
 	int type = icmp->icmp_type;
 
 	return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) );
 }
 
 #define TT	( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \
     (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) )
 
 static int
 is_icmp_query(struct icmphdr *icmp)
 {
 	int type = icmp->icmp_type;
 
 	return (type <= ICMP_MAXTYPE && (TT & (1<<type)) );
 }
 #undef TT
 
 /*
  * The following checks use two arrays of 8 or 16 bits to store the
  * bits that we want set or clear, respectively. They are in the
  * low and high half of cmd->arg1 or cmd->d[0].
  *
  * We scan options and store the bits we find set. We succeed if
  *
  *	(want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
  *
  * The code is sometimes optimized not to store additional variables.
  */
 
 static int
 flags_match(ipfw_insn *cmd, u_int8_t bits)
 {
 	u_char want_clear;
 	bits = ~bits;
 
 	if ( ((cmd->arg1 & 0xff) & bits) != 0)
 		return 0; /* some bits we want set were clear */
 	want_clear = (cmd->arg1 >> 8) & 0xff;
 	if ( (want_clear & bits) != want_clear)
 		return 0; /* some bits we want clear were set */
 	return 1;
 }
 
 static int
 ipopts_match(struct ip *ip, ipfw_insn *cmd)
 {
 	int optlen, bits = 0;
 	u_char *cp = (u_char *)(ip + 1);
 	int x = (ip->ip_hl << 2) - sizeof (struct ip);
 
 	for (; x > 0; x -= optlen, cp += optlen) {
 		int opt = cp[IPOPT_OPTVAL];
 
 		if (opt == IPOPT_EOL)
 			break;
 		if (opt == IPOPT_NOP)
 			optlen = 1;
 		else {
 			optlen = cp[IPOPT_OLEN];
 			if (optlen <= 0 || optlen > x)
 				return 0; /* invalid or truncated */
 		}
 		switch (opt) {
 
 		default:
 			break;
 
 		case IPOPT_LSRR:
 			bits |= IP_FW_IPOPT_LSRR;
 			break;
 
 		case IPOPT_SSRR:
 			bits |= IP_FW_IPOPT_SSRR;
 			break;
 
 		case IPOPT_RR:
 			bits |= IP_FW_IPOPT_RR;
 			break;
 
 		case IPOPT_TS:
 			bits |= IP_FW_IPOPT_TS;
 			break;
 		}
 	}
 	return (flags_match(cmd, bits));
 }
 
 /*
  * Parse TCP options. The logic copied from tcp_dooptions().
  */
 static int
 tcpopts_parse(const struct tcphdr *tcp, uint16_t *mss)
 {
 	const u_char *cp = (const u_char *)(tcp + 1);
 	int optlen, bits = 0;
 	int cnt = (tcp->th_off << 2) - sizeof(struct tcphdr);
 
 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
 		int opt = cp[0];
 		if (opt == TCPOPT_EOL)
 			break;
 		if (opt == TCPOPT_NOP)
 			optlen = 1;
 		else {
 			if (cnt < 2)
 				break;
 			optlen = cp[1];
 			if (optlen < 2 || optlen > cnt)
 				break;
 		}
 
 		switch (opt) {
 		default:
 			break;
 
 		case TCPOPT_MAXSEG:
 			if (optlen != TCPOLEN_MAXSEG)
 				break;
 			bits |= IP_FW_TCPOPT_MSS;
 			if (mss != NULL)
 				*mss = be16dec(cp + 2);
 			break;
 
 		case TCPOPT_WINDOW:
 			if (optlen == TCPOLEN_WINDOW)
 				bits |= IP_FW_TCPOPT_WINDOW;
 			break;
 
 		case TCPOPT_SACK_PERMITTED:
 			if (optlen == TCPOLEN_SACK_PERMITTED)
 				bits |= IP_FW_TCPOPT_SACK;
 			break;
 
 		case TCPOPT_SACK:
 			if (optlen > 2 && (optlen - 2) % TCPOLEN_SACK == 0)
 				bits |= IP_FW_TCPOPT_SACK;
 			break;
 
 		case TCPOPT_TIMESTAMP:
 			if (optlen == TCPOLEN_TIMESTAMP)
 				bits |= IP_FW_TCPOPT_TS;
 			break;
 		}
 	}
 	return (bits);
 }
 
 static int
 tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd)
 {
 
 	return (flags_match(cmd, tcpopts_parse(tcp, NULL)));
 }
 
 static int
 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain,
     uint32_t *tablearg)
 {
 
 	if (ifp == NULL)	/* no iface with this packet, match fails */
 		return (0);
 
 	/* Check by name or by IP address */
 	if (cmd->name[0] != '\0') { /* match by name */
 		if (cmd->name[0] == '\1') /* use tablearg to match */
 			return ipfw_lookup_table(chain, cmd->p.kidx, 0,
 			    &ifp->if_index, tablearg);
 		/* Check name */
 		if (cmd->p.glob) {
 			if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
 				return(1);
 		} else {
 			if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
 				return(1);
 		}
 	} else {
 #if !defined(USERSPACE) && defined(__FreeBSD__)	/* and OSX too ? */
 		struct ifaddr *ia;
 
 		NET_EPOCH_ASSERT();
 
 		CK_STAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
 			if (ia->ifa_addr->sa_family != AF_INET)
 				continue;
 			if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
 			    (ia->ifa_addr))->sin_addr.s_addr)
 				return (1);	/* match */
 		}
 #endif /* __FreeBSD__ */
 	}
 	return(0);	/* no match, fail ... */
 }
 
 /*
  * The verify_path function checks if a route to the src exists and
  * if it is reachable via ifp (when provided).
  * 
  * The 'verrevpath' option checks that the interface that an IP packet
  * arrives on is the same interface that traffic destined for the
  * packet's source address would be routed out of.
  * The 'versrcreach' option just checks that the source address is
  * reachable via any route (except default) in the routing table.
  * These two are a measure to block forged packets. This is also
  * commonly known as "anti-spoofing" or Unicast Reverse Path
  * Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs
  * is purposely reminiscent of the Cisco IOS command,
  *
  *   ip verify unicast reverse-path
  *   ip verify unicast source reachable-via any
  *
  * which implements the same functionality. But note that the syntax
  * is misleading, and the check may be performed on all IP packets
  * whether unicast, multicast, or broadcast.
  */
 static int
 verify_path(struct in_addr src, struct ifnet *ifp, u_int fib)
 {
 #if defined(USERSPACE) || !defined(__FreeBSD__)
 	return 0;
 #else
-	struct nhop4_basic nh4;
+	struct nhop_object *nh;
 
-	if (fib4_lookup_nh_basic(fib, src, NHR_IFAIF, 0, &nh4) != 0)
+	nh = fib4_lookup(fib, src, 0, NHR_NONE, 0);
+	if (nh == NULL)
 		return (0);
 
 	/*
 	 * If ifp is provided, check for equality with rtentry.
 	 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
 	 * in order to pass packets injected back by if_simloop():
 	 * routing entry (via lo0) for our own address
 	 * may exist, so we need to handle routing assymetry.
 	 */
-	if (ifp != NULL && ifp != nh4.nh_ifp)
+	if (ifp != NULL && ifp != nh->nh_aifp)
 		return (0);
 
 	/* if no ifp provided, check if rtentry is not default route */
-	if (ifp == NULL && (nh4.nh_flags & NHF_DEFAULT) != 0)
+	if (ifp == NULL && (nh->nh_flags & NHF_DEFAULT) != 0)
 		return (0);
 
 	/* or if this is a blackhole/reject route */
-	if (ifp == NULL && (nh4.nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0)
+	if (ifp == NULL && (nh->nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0)
 		return (0);
 
 	/* found valid route */
 	return 1;
 #endif /* __FreeBSD__ */
 }
 
 /*
  * Generate an SCTP packet containing an ABORT chunk. The verification tag
  * is given by vtag. The T-bit is set in the ABORT chunk if and only if
  * reflected is not 0.
  */
 
 static struct mbuf *
 ipfw_send_abort(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t vtag,
     int reflected)
 {
 	struct mbuf *m;
 	struct ip *ip;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 	struct sctphdr *sctp;
 	struct sctp_chunkhdr *chunk;
 	u_int16_t hlen, plen, tlen;
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (NULL);
 
 	M_SETFIB(m, id->fib);
 #ifdef MAC
 	if (replyto != NULL)
 		mac_netinet_firewall_reply(replyto, m);
 	else
 		mac_netinet_firewall_send(m);
 #else
 	(void)replyto;		/* don't warn about unused arg */
 #endif
 
 	switch (id->addr_type) {
 	case 4:
 		hlen = sizeof(struct ip);
 		break;
 #ifdef INET6
 	case 6:
 		hlen = sizeof(struct ip6_hdr);
 		break;
 #endif
 	default:
 		/* XXX: log me?!? */
 		FREE_PKT(m);
 		return (NULL);
 	}
 	plen = sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
 	tlen = hlen + plen;
 	m->m_data += max_linkhdr;
 	m->m_flags |= M_SKIP_FIREWALL;
 	m->m_pkthdr.len = m->m_len = tlen;
 	m->m_pkthdr.rcvif = NULL;
 	bzero(m->m_data, tlen);
 
 	switch (id->addr_type) {
 	case 4:
 		ip = mtod(m, struct ip *);
 
 		ip->ip_v = 4;
 		ip->ip_hl = sizeof(struct ip) >> 2;
 		ip->ip_tos = IPTOS_LOWDELAY;
 		ip->ip_len = htons(tlen);
 		ip->ip_id = htons(0);
 		ip->ip_off = htons(0);
 		ip->ip_ttl = V_ip_defttl;
 		ip->ip_p = IPPROTO_SCTP;
 		ip->ip_sum = 0;
 		ip->ip_src.s_addr = htonl(id->dst_ip);
 		ip->ip_dst.s_addr = htonl(id->src_ip);
 
 		sctp = (struct sctphdr *)(ip + 1);
 		break;
 #ifdef INET6
 	case 6:
 		ip6 = mtod(m, struct ip6_hdr *);
 
 		ip6->ip6_vfc = IPV6_VERSION;
 		ip6->ip6_plen = htons(plen);
 		ip6->ip6_nxt = IPPROTO_SCTP;
 		ip6->ip6_hlim = IPV6_DEFHLIM;
 		ip6->ip6_src = id->dst_ip6;
 		ip6->ip6_dst = id->src_ip6;
 
 		sctp = (struct sctphdr *)(ip6 + 1);
 		break;
 #endif
 	}
 
 	sctp->src_port = htons(id->dst_port);
 	sctp->dest_port = htons(id->src_port);
 	sctp->v_tag = htonl(vtag);
 	sctp->checksum = htonl(0);
 
 	chunk = (struct sctp_chunkhdr *)(sctp + 1);
 	chunk->chunk_type = SCTP_ABORT_ASSOCIATION;
 	chunk->chunk_flags = 0;
 	if (reflected != 0) {
 		chunk->chunk_flags |= SCTP_HAD_NO_TCB;
 	}
 	chunk->chunk_length = htons(sizeof(struct sctp_chunkhdr));
 
 	sctp->checksum = sctp_calculate_cksum(m, hlen);
 
 	return (m);
 }
 
 /*
  * Generate a TCP packet, containing either a RST or a keepalive.
  * When flags & TH_RST, we are sending a RST packet, because of a
  * "reset" action matched the packet.
  * Otherwise we are sending a keepalive, and flags & TH_
  * The 'replyto' mbuf is the mbuf being replied to, if any, and is required
  * so that MAC can label the reply appropriately.
  */
 struct mbuf *
 ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq,
     u_int32_t ack, int flags)
 {
 	struct mbuf *m = NULL;		/* stupid compiler */
 	struct ip *h = NULL;		/* stupid compiler */
 #ifdef INET6
 	struct ip6_hdr *h6 = NULL;
 #endif
 	struct tcphdr *th = NULL;
 	int len, dir;
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (NULL);
 
 	M_SETFIB(m, id->fib);
 #ifdef MAC
 	if (replyto != NULL)
 		mac_netinet_firewall_reply(replyto, m);
 	else
 		mac_netinet_firewall_send(m);
 #else
 	(void)replyto;		/* don't warn about unused arg */
 #endif
 
 	switch (id->addr_type) {
 	case 4:
 		len = sizeof(struct ip) + sizeof(struct tcphdr);
 		break;
 #ifdef INET6
 	case 6:
 		len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 		break;
 #endif
 	default:
 		/* XXX: log me?!? */
 		FREE_PKT(m);
 		return (NULL);
 	}
 	dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN);
 
 	m->m_data += max_linkhdr;
 	m->m_flags |= M_SKIP_FIREWALL;
 	m->m_pkthdr.len = m->m_len = len;
 	m->m_pkthdr.rcvif = NULL;
 	bzero(m->m_data, len);
 
 	switch (id->addr_type) {
 	case 4:
 		h = mtod(m, struct ip *);
 
 		/* prepare for checksum */
 		h->ip_p = IPPROTO_TCP;
 		h->ip_len = htons(sizeof(struct tcphdr));
 		if (dir) {
 			h->ip_src.s_addr = htonl(id->src_ip);
 			h->ip_dst.s_addr = htonl(id->dst_ip);
 		} else {
 			h->ip_src.s_addr = htonl(id->dst_ip);
 			h->ip_dst.s_addr = htonl(id->src_ip);
 		}
 
 		th = (struct tcphdr *)(h + 1);
 		break;
 #ifdef INET6
 	case 6:
 		h6 = mtod(m, struct ip6_hdr *);
 
 		/* prepare for checksum */
 		h6->ip6_nxt = IPPROTO_TCP;
 		h6->ip6_plen = htons(sizeof(struct tcphdr));
 		if (dir) {
 			h6->ip6_src = id->src_ip6;
 			h6->ip6_dst = id->dst_ip6;
 		} else {
 			h6->ip6_src = id->dst_ip6;
 			h6->ip6_dst = id->src_ip6;
 		}
 
 		th = (struct tcphdr *)(h6 + 1);
 		break;
 #endif
 	}
 
 	if (dir) {
 		th->th_sport = htons(id->src_port);
 		th->th_dport = htons(id->dst_port);
 	} else {
 		th->th_sport = htons(id->dst_port);
 		th->th_dport = htons(id->src_port);
 	}
 	th->th_off = sizeof(struct tcphdr) >> 2;
 
 	if (flags & TH_RST) {
 		if (flags & TH_ACK) {
 			th->th_seq = htonl(ack);
 			th->th_flags = TH_RST;
 		} else {
 			if (flags & TH_SYN)
 				seq++;
 			th->th_ack = htonl(seq);
 			th->th_flags = TH_RST | TH_ACK;
 		}
 	} else {
 		/*
 		 * Keepalive - use caller provided sequence numbers
 		 */
 		th->th_seq = htonl(seq);
 		th->th_ack = htonl(ack);
 		th->th_flags = TH_ACK;
 	}
 
 	switch (id->addr_type) {
 	case 4:
 		th->th_sum = in_cksum(m, len);
 
 		/* finish the ip header */
 		h->ip_v = 4;
 		h->ip_hl = sizeof(*h) >> 2;
 		h->ip_tos = IPTOS_LOWDELAY;
 		h->ip_off = htons(0);
 		h->ip_len = htons(len);
 		h->ip_ttl = V_ip_defttl;
 		h->ip_sum = 0;
 		break;
 #ifdef INET6
 	case 6:
 		th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6),
 		    sizeof(struct tcphdr));
 
 		/* finish the ip6 header */
 		h6->ip6_vfc |= IPV6_VERSION;
 		h6->ip6_hlim = IPV6_DEFHLIM;
 		break;
 #endif
 	}
 
 	return (m);
 }
 
 #ifdef INET6
 /*
  * ipv6 specific rules here...
  */
 static __inline int
 icmp6type_match (int type, ipfw_insn_u32 *cmd)
 {
 	return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) );
 }
 
 static int
 flow6id_match( int curr_flow, ipfw_insn_u32 *cmd )
 {
 	int i;
 	for (i=0; i <= cmd->o.arg1; ++i )
 		if (curr_flow == cmd->d[i] )
 			return 1;
 	return 0;
 }
 
 /* support for IP6_*_ME opcodes */
 static const struct in6_addr lla_mask = {{{
 	0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 }}};
 
 static int
 ipfw_localip6(struct in6_addr *in6)
 {
 	struct rm_priotracker in6_ifa_tracker;
 	struct in6_ifaddr *ia;
 
 	if (IN6_IS_ADDR_MULTICAST(in6))
 		return (0);
 
 	if (!IN6_IS_ADDR_LINKLOCAL(in6))
 		return (in6_localip(in6));
 
 	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
 	CK_STAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
 		if (!IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr))
 			continue;
 		if (IN6_ARE_MASKED_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
 		    in6, &lla_mask)) {
 			IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 			return (1);
 		}
 	}
 	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 	return (0);
 }
 
 static int
 verify_path6(struct in6_addr *src, struct ifnet *ifp, u_int fib)
 {
-	struct nhop6_basic nh6;
+	struct nhop_object *nh;
 
 	if (IN6_IS_SCOPE_LINKLOCAL(src))
 		return (1);
 
-	if (fib6_lookup_nh_basic(fib, src, 0, NHR_IFAIF, 0, &nh6) != 0)
+	nh = fib6_lookup(fib, src, 0, NHR_NONE, 0);
+	if (nh == NULL)
 		return (0);
 
 	/* If ifp is provided, check for equality with route table. */
-	if (ifp != NULL && ifp != nh6.nh_ifp)
+	if (ifp != NULL && ifp != nh->nh_aifp)
 		return (0);
 
 	/* if no ifp provided, check if rtentry is not default route */
-	if (ifp == NULL && (nh6.nh_flags & NHF_DEFAULT) != 0)
+	if (ifp == NULL && (nh->nh_flags & NHF_DEFAULT) != 0)
 		return (0);
 
 	/* or if this is a blackhole/reject route */
-	if (ifp == NULL && (nh6.nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0)
+	if (ifp == NULL && (nh->nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0)
 		return (0);
 
 	/* found valid route */
 	return 1;
 }
 
 static int
 is_icmp6_query(int icmp6_type)
 {
 	if ((icmp6_type <= ICMP6_MAXTYPE) &&
 	    (icmp6_type == ICMP6_ECHO_REQUEST ||
 	    icmp6_type == ICMP6_MEMBERSHIP_QUERY ||
 	    icmp6_type == ICMP6_WRUREQUEST ||
 	    icmp6_type == ICMP6_FQDN_QUERY ||
 	    icmp6_type == ICMP6_NI_QUERY))
 		return (1);
 
 	return (0);
 }
 
 static int
 map_icmp_unreach(int code)
 {
 
 	/* RFC 7915 p4.2 */
 	switch (code) {
 	case ICMP_UNREACH_NET:
 	case ICMP_UNREACH_HOST:
 	case ICMP_UNREACH_SRCFAIL:
 	case ICMP_UNREACH_NET_UNKNOWN:
 	case ICMP_UNREACH_HOST_UNKNOWN:
 	case ICMP_UNREACH_TOSNET:
 	case ICMP_UNREACH_TOSHOST:
 		return (ICMP6_DST_UNREACH_NOROUTE);
 	case ICMP_UNREACH_PORT:
 		return (ICMP6_DST_UNREACH_NOPORT);
 	default:
 		/*
 		 * Map the rest of codes into admit prohibited.
 		 * XXX: unreach proto should be mapped into ICMPv6
 		 * parameter problem, but we use only unreach type.
 		 */
 		return (ICMP6_DST_UNREACH_ADMIN);
 	}
 }
 
 static void
 send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6)
 {
 	struct mbuf *m;
 
 	m = args->m;
 	if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *tcp;
 		tcp = (struct tcphdr *)((char *)ip6 + hlen);
 
 		if ((tcp->th_flags & TH_RST) == 0) {
 			struct mbuf *m0;
 			m0 = ipfw_send_pkt(args->m, &(args->f_id),
 			    ntohl(tcp->th_seq), ntohl(tcp->th_ack),
 			    tcp->th_flags | TH_RST);
 			if (m0 != NULL)
 				ip6_output(m0, NULL, NULL, 0, NULL, NULL,
 				    NULL);
 		}
 		FREE_PKT(m);
 	} else if (code == ICMP6_UNREACH_ABORT &&
 	    args->f_id.proto == IPPROTO_SCTP) {
 		struct mbuf *m0;
 		struct sctphdr *sctp;
 		u_int32_t v_tag;
 		int reflected;
 
 		sctp = (struct sctphdr *)((char *)ip6 + hlen);
 		reflected = 1;
 		v_tag = ntohl(sctp->v_tag);
 		/* Investigate the first chunk header if available */
 		if (m->m_len >= hlen + sizeof(struct sctphdr) +
 		    sizeof(struct sctp_chunkhdr)) {
 			struct sctp_chunkhdr *chunk;
 
 			chunk = (struct sctp_chunkhdr *)(sctp + 1);
 			switch (chunk->chunk_type) {
 			case SCTP_INITIATION:
 				/*
 				 * Packets containing an INIT chunk MUST have
 				 * a zero v-tag.
 				 */
 				if (v_tag != 0) {
 					v_tag = 0;
 					break;
 				}
 				/* INIT chunk MUST NOT be bundled */
 				if (m->m_pkthdr.len >
 				    hlen + sizeof(struct sctphdr) +
 				    ntohs(chunk->chunk_length) + 3) {
 					break;
 				}
 				/* Use the initiate tag if available */
 				if ((m->m_len >= hlen + sizeof(struct sctphdr) +
 				    sizeof(struct sctp_chunkhdr) +
 				    offsetof(struct sctp_init, a_rwnd))) {
 					struct sctp_init *init;
 
 					init = (struct sctp_init *)(chunk + 1);
 					v_tag = ntohl(init->initiate_tag);
 					reflected = 0;
 				}
 				break;
 			case SCTP_ABORT_ASSOCIATION:
 				/*
 				 * If the packet contains an ABORT chunk, don't
 				 * reply.
 				 * XXX: We should search through all chunks,
 				 * but do not do that to avoid attacks.
 				 */
 				v_tag = 0;
 				break;
 			}
 		}
 		if (v_tag == 0) {
 			m0 = NULL;
 		} else {
 			m0 = ipfw_send_abort(args->m, &(args->f_id), v_tag,
 			    reflected);
 		}
 		if (m0 != NULL)
 			ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
 		FREE_PKT(m);
 	} else if (code != ICMP6_UNREACH_RST && code != ICMP6_UNREACH_ABORT) {
 		/* Send an ICMPv6 unreach. */
 #if 0
 		/*
 		 * Unlike above, the mbufs need to line up with the ip6 hdr,
 		 * as the contents are read. We need to m_adj() the
 		 * needed amount.
 		 * The mbuf will however be thrown away so we can adjust it.
 		 * Remember we did an m_pullup on it already so we
 		 * can make some assumptions about contiguousness.
 		 */
 		if (args->L3offset)
 			m_adj(m, args->L3offset);
 #endif
 		icmp6_error(m, ICMP6_DST_UNREACH, code, 0);
 	} else
 		FREE_PKT(m);
 
 	args->m = NULL;
 }
 
 #endif /* INET6 */
 
 
 /*
  * sends a reject message, consuming the mbuf passed as an argument.
  */
 static void
 send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip)
 {
 
 #if 0
 	/* XXX When ip is not guaranteed to be at mtod() we will
 	 * need to account for this */
 	 * The mbuf will however be thrown away so we can adjust it.
 	 * Remember we did an m_pullup on it already so we
 	 * can make some assumptions about contiguousness.
 	 */
 	if (args->L3offset)
 		m_adj(m, args->L3offset);
 #endif
 	if (code != ICMP_REJECT_RST && code != ICMP_REJECT_ABORT) {
 		/* Send an ICMP unreach */
 		icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
 	} else if (code == ICMP_REJECT_RST && args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *const tcp =
 		    L3HDR(struct tcphdr, mtod(args->m, struct ip *));
 		if ( (tcp->th_flags & TH_RST) == 0) {
 			struct mbuf *m;
 			m = ipfw_send_pkt(args->m, &(args->f_id),
 				ntohl(tcp->th_seq), ntohl(tcp->th_ack),
 				tcp->th_flags | TH_RST);
 			if (m != NULL)
 				ip_output(m, NULL, NULL, 0, NULL, NULL);
 		}
 		FREE_PKT(args->m);
 	} else if (code == ICMP_REJECT_ABORT &&
 	    args->f_id.proto == IPPROTO_SCTP) {
 		struct mbuf *m;
 		struct sctphdr *sctp;
 		struct sctp_chunkhdr *chunk;
 		struct sctp_init *init;
 		u_int32_t v_tag;
 		int reflected;
 
 		sctp = L3HDR(struct sctphdr, mtod(args->m, struct ip *));
 		reflected = 1;
 		v_tag = ntohl(sctp->v_tag);
 		if (iplen >= (ip->ip_hl << 2) + sizeof(struct sctphdr) +
 		    sizeof(struct sctp_chunkhdr)) {
 			/* Look at the first chunk header if available */
 			chunk = (struct sctp_chunkhdr *)(sctp + 1);
 			switch (chunk->chunk_type) {
 			case SCTP_INITIATION:
 				/*
 				 * Packets containing an INIT chunk MUST have
 				 * a zero v-tag.
 				 */
 				if (v_tag != 0) {
 					v_tag = 0;
 					break;
 				}
 				/* INIT chunk MUST NOT be bundled */
 				if (iplen >
 				    (ip->ip_hl << 2) + sizeof(struct sctphdr) +
 				    ntohs(chunk->chunk_length) + 3) {
 					break;
 				}
 				/* Use the initiate tag if available */
 				if ((iplen >= (ip->ip_hl << 2) +
 				    sizeof(struct sctphdr) +
 				    sizeof(struct sctp_chunkhdr) +
 				    offsetof(struct sctp_init, a_rwnd))) {
 					init = (struct sctp_init *)(chunk + 1);
 					v_tag = ntohl(init->initiate_tag);
 					reflected = 0;
 				}
 				break;
 			case SCTP_ABORT_ASSOCIATION:
 				/*
 				 * If the packet contains an ABORT chunk, don't
 				 * reply.
 				 * XXX: We should search through all chunks,
 				 * but do not do that to avoid attacks.
 				 */
 				v_tag = 0;
 				break;
 			}
 		}
 		if (v_tag == 0) {
 			m = NULL;
 		} else {
 			m = ipfw_send_abort(args->m, &(args->f_id), v_tag,
 			    reflected);
 		}
 		if (m != NULL)
 			ip_output(m, NULL, NULL, 0, NULL, NULL);
 		FREE_PKT(args->m);
 	} else
 		FREE_PKT(args->m);
 	args->m = NULL;
 }
 
 /*
  * Support for uid/gid/jail lookup. These tests are expensive
  * (because we may need to look into the list of active sockets)
  * so we cache the results. ugid_lookupp is 0 if we have not
  * yet done a lookup, 1 if we succeeded, and -1 if we tried
  * and failed. The function always returns the match value.
  * We could actually spare the variable and use *uc, setting
  * it to '(void *)check_uidgid if we have no info, NULL if
  * we tried and failed, or any other value if successful.
  */
 static int
 check_uidgid(ipfw_insn_u32 *insn, struct ip_fw_args *args, int *ugid_lookupp,
     struct ucred **uc)
 {
 #if defined(USERSPACE)
 	return 0;	// not supported in userspace
 #else
 #ifndef __FreeBSD__
 	/* XXX */
 	return cred_check(insn, proto, oif,
 	    dst_ip, dst_port, src_ip, src_port,
 	    (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb);
 #else  /* FreeBSD */
 	struct in_addr src_ip, dst_ip;
 	struct inpcbinfo *pi;
 	struct ipfw_flow_id *id;
 	struct inpcb *pcb, *inp;
 	int lookupflags;
 	int match;
 
 	id = &args->f_id;
 	inp = args->inp;
 
 	/*
 	 * Check to see if the UDP or TCP stack supplied us with
 	 * the PCB. If so, rather then holding a lock and looking
 	 * up the PCB, we can use the one that was supplied.
 	 */
 	if (inp && *ugid_lookupp == 0) {
 		INP_LOCK_ASSERT(inp);
 		if (inp->inp_socket != NULL) {
 			*uc = crhold(inp->inp_cred);
 			*ugid_lookupp = 1;
 		} else
 			*ugid_lookupp = -1;
 	}
 	/*
 	 * If we have already been here and the packet has no
 	 * PCB entry associated with it, then we can safely
 	 * assume that this is a no match.
 	 */
 	if (*ugid_lookupp == -1)
 		return (0);
 	if (id->proto == IPPROTO_TCP) {
 		lookupflags = 0;
 		pi = &V_tcbinfo;
 	} else if (id->proto == IPPROTO_UDP) {
 		lookupflags = INPLOOKUP_WILDCARD;
 		pi = &V_udbinfo;
 	} else if (id->proto == IPPROTO_UDPLITE) {
 		lookupflags = INPLOOKUP_WILDCARD;
 		pi = &V_ulitecbinfo;
 	} else
 		return 0;
 	lookupflags |= INPLOOKUP_RLOCKPCB;
 	match = 0;
 	if (*ugid_lookupp == 0) {
 		if (id->addr_type == 6) {
 #ifdef INET6
 			if (args->flags & IPFW_ARGS_IN)
 				pcb = in6_pcblookup_mbuf(pi,
 				    &id->src_ip6, htons(id->src_port),
 				    &id->dst_ip6, htons(id->dst_port),
 				    lookupflags, NULL, args->m);
 			else
 				pcb = in6_pcblookup_mbuf(pi,
 				    &id->dst_ip6, htons(id->dst_port),
 				    &id->src_ip6, htons(id->src_port),
 				    lookupflags, args->ifp, args->m);
 #else
 			*ugid_lookupp = -1;
 			return (0);
 #endif
 		} else {
 			src_ip.s_addr = htonl(id->src_ip);
 			dst_ip.s_addr = htonl(id->dst_ip);
 			if (args->flags & IPFW_ARGS_IN)
 				pcb = in_pcblookup_mbuf(pi,
 				    src_ip, htons(id->src_port),
 				    dst_ip, htons(id->dst_port),
 				    lookupflags, NULL, args->m);
 			else
 				pcb = in_pcblookup_mbuf(pi,
 				    dst_ip, htons(id->dst_port),
 				    src_ip, htons(id->src_port),
 				    lookupflags, args->ifp, args->m);
 		}
 		if (pcb != NULL) {
 			INP_RLOCK_ASSERT(pcb);
 			*uc = crhold(pcb->inp_cred);
 			*ugid_lookupp = 1;
 			INP_RUNLOCK(pcb);
 		}
 		if (*ugid_lookupp == 0) {
 			/*
 			 * We tried and failed, set the variable to -1
 			 * so we will not try again on this packet.
 			 */
 			*ugid_lookupp = -1;
 			return (0);
 		}
 	}
 	if (insn->o.opcode == O_UID)
 		match = ((*uc)->cr_uid == (uid_t)insn->d[0]);
 	else if (insn->o.opcode == O_GID)
 		match = groupmember((gid_t)insn->d[0], *uc);
 	else if (insn->o.opcode == O_JAIL)
 		match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]);
 	return (match);
 #endif /* __FreeBSD__ */
 #endif /* not supported in userspace */
 }
 
 /*
  * Helper function to set args with info on the rule after the matching
  * one. slot is precise, whereas we guess rule_id as they are
  * assigned sequentially.
  */
 static inline void
 set_match(struct ip_fw_args *args, int slot,
 	struct ip_fw_chain *chain)
 {
 	args->rule.chain_id = chain->id;
 	args->rule.slot = slot + 1; /* we use 0 as a marker */
 	args->rule.rule_id = 1 + chain->map[slot]->id;
 	args->rule.rulenum = chain->map[slot]->rulenum;
 	args->flags |= IPFW_ARGS_REF;
 }
 
 #ifndef LINEAR_SKIPTO
 /*
  * Helper function to enable cached rule lookups using
  * cached_id and cached_pos fields in ipfw rule.
  */
 static int
 jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num,
     int tablearg, int jump_backwards)
 {
 	int f_pos;
 
 	/* If possible use cached f_pos (in f->cached_pos),
 	 * whose version is written in f->cached_id
 	 * (horrible hacks to avoid changing the ABI).
 	 */
 	if (num != IP_FW_TARG && f->cached_id == chain->id)
 		f_pos = f->cached_pos;
 	else {
 		int i = IP_FW_ARG_TABLEARG(chain, num, skipto);
 		/* make sure we do not jump backward */
 		if (jump_backwards == 0 && i <= f->rulenum)
 			i = f->rulenum + 1;
 		if (chain->idxmap != NULL)
 			f_pos = chain->idxmap[i];
 		else
 			f_pos = ipfw_find_rule(chain, i, 0);
 		/* update the cache */
 		if (num != IP_FW_TARG) {
 			f->cached_id = chain->id;
 			f->cached_pos = f_pos;
 		}
 	}
 
 	return (f_pos);
 }
 #else
 /*
  * Helper function to enable real fast rule lookups.
  */
 static int
 jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num,
     int tablearg, int jump_backwards)
 {
 	int f_pos;
 
 	num = IP_FW_ARG_TABLEARG(chain, num, skipto);
 	/* make sure we do not jump backward */
 	if (jump_backwards == 0 && num <= f->rulenum)
 		num = f->rulenum + 1;
 	f_pos = chain->idxmap[num];
 
 	return (f_pos);
 }
 #endif
 
 #define	TARG(k, f)	IP_FW_ARG_TABLEARG(chain, k, f)
 /*
  * The main check routine for the firewall.
  *
  * All arguments are in args so we can modify them and return them
  * back to the caller.
  *
  * Parameters:
  *
  *	args->m	(in/out) The packet; we set to NULL when/if we nuke it.
  *		Starts with the IP header.
  *	args->L3offset	Number of bytes bypassed if we came from L2.
  *			e.g. often sizeof(eh)  ** NOTYET **
  *	args->ifp	Incoming or outgoing interface.
  *	args->divert_rule (in/out)
  *		Skip up to the first rule past this rule number;
  *		upon return, non-zero port number for divert or tee.
  *
  *	args->rule	Pointer to the last matching rule (in/out)
  *	args->next_hop	Socket we are forwarding to (out).
  *	args->next_hop6	IPv6 next hop we are forwarding to (out).
  *	args->f_id	Addresses grabbed from the packet (out)
  * 	args->rule.info	a cookie depending on rule action
  *
  * Return value:
  *
  *	IP_FW_PASS	the packet must be accepted
  *	IP_FW_DENY	the packet must be dropped
  *	IP_FW_DIVERT	divert packet, port in m_tag
  *	IP_FW_TEE	tee packet, port in m_tag
  *	IP_FW_DUMMYNET	to dummynet, pipe in args->cookie
  *	IP_FW_NETGRAPH	into netgraph, cookie args->cookie
  *		args->rule contains the matching rule,
  *		args->rule.info has additional information.
  *
  */
 int
 ipfw_chk(struct ip_fw_args *args)
 {
 
 	/*
 	 * Local variables holding state while processing a packet:
 	 *
 	 * IMPORTANT NOTE: to speed up the processing of rules, there
 	 * are some assumption on the values of the variables, which
 	 * are documented here. Should you change them, please check
 	 * the implementation of the various instructions to make sure
 	 * that they still work.
 	 *
 	 * m | args->m	Pointer to the mbuf, as received from the caller.
 	 *	It may change if ipfw_chk() does an m_pullup, or if it
 	 *	consumes the packet because it calls send_reject().
 	 *	XXX This has to change, so that ipfw_chk() never modifies
 	 *	or consumes the buffer.
 	 *	OR
 	 * args->mem	Pointer to contigous memory chunk.
 	 * ip	Is the beginning of the ip(4 or 6) header.
 	 * eh	Ethernet header in case if input is Layer2.
 	 */
 	struct mbuf *m;
 	struct ip *ip;
 	struct ether_header *eh;
 
 	/*
 	 * For rules which contain uid/gid or jail constraints, cache
 	 * a copy of the users credentials after the pcb lookup has been
 	 * executed. This will speed up the processing of rules with
 	 * these types of constraints, as well as decrease contention
 	 * on pcb related locks.
 	 */
 #ifndef __FreeBSD__
 	struct bsd_ucred ucred_cache;
 #else
 	struct ucred *ucred_cache = NULL;
 #endif
 	int ucred_lookup = 0;
 	int f_pos = 0;		/* index of current rule in the array */
 	int retval = 0;
 	struct ifnet *oif, *iif;
 
 	/*
 	 * hlen	The length of the IP header.
 	 */
 	u_int hlen = 0;		/* hlen >0 means we have an IP pkt */
 
 	/*
 	 * offset	The offset of a fragment. offset != 0 means that
 	 *	we have a fragment at this offset of an IPv4 packet.
 	 *	offset == 0 means that (if this is an IPv4 packet)
 	 *	this is the first or only fragment.
 	 *	For IPv6 offset|ip6f_mf == 0 means there is no Fragment Header
 	 *	or there is a single packet fragment (fragment header added
 	 *	without needed).  We will treat a single packet fragment as if
 	 *	there was no fragment header (or log/block depending on the
 	 *	V_fw_permit_single_frag6 sysctl setting).
 	 */
 	u_short offset = 0;
 	u_short ip6f_mf = 0;
 
 	/*
 	 * Local copies of addresses. They are only valid if we have
 	 * an IP packet.
 	 *
 	 * proto	The protocol. Set to 0 for non-ip packets,
 	 *	or to the protocol read from the packet otherwise.
 	 *	proto != 0 means that we have an IPv4 packet.
 	 *
 	 * src_port, dst_port	port numbers, in HOST format. Only
 	 *	valid for TCP and UDP packets.
 	 *
 	 * src_ip, dst_ip	ip addresses, in NETWORK format.
 	 *	Only valid for IPv4 packets.
 	 */
 	uint8_t proto;
 	uint16_t src_port, dst_port;		/* NOTE: host format	*/
 	struct in_addr src_ip, dst_ip;		/* NOTE: network format	*/
 	int iplen = 0;
 	int pktlen;
 
 	struct ipfw_dyn_info dyn_info;
 	struct ip_fw *q = NULL;
 	struct ip_fw_chain *chain = &V_layer3_chain;
 
 	/*
 	 * We store in ulp a pointer to the upper layer protocol header.
 	 * In the ipv4 case this is easy to determine from the header,
 	 * but for ipv6 we might have some additional headers in the middle.
 	 * ulp is NULL if not found.
 	 */
 	void *ulp = NULL;		/* upper layer protocol pointer. */
 
 	/* XXX ipv6 variables */
 	int is_ipv6 = 0;
 	uint8_t	icmp6_type = 0;
 	uint16_t ext_hd = 0;	/* bits vector for extension header filtering */
 	/* end of ipv6 variables */
 
 	int is_ipv4 = 0;
 
 	int done = 0;		/* flag to exit the outer loop */
 	IPFW_RLOCK_TRACKER;
 	bool mem;
 
 	if ((mem = (args->flags & IPFW_ARGS_LENMASK))) {
 		if (args->flags & IPFW_ARGS_ETHER) {
 			eh = (struct ether_header *)args->mem;
 			if (eh->ether_type == htons(ETHERTYPE_VLAN))
 				ip = (struct ip *)
 				    ((struct ether_vlan_header *)eh + 1);
 			else
 				ip = (struct ip *)(eh + 1);
 		} else {
 			eh = NULL;
 			ip = (struct ip *)args->mem;
 		}
 		pktlen = IPFW_ARGS_LENGTH(args->flags);
 		args->f_id.fib = args->ifp->if_fib;	/* best guess */
 	} else {
 		m = args->m;
 		if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready))
 			return (IP_FW_PASS);	/* accept */
 		if (args->flags & IPFW_ARGS_ETHER) {
 	                /* We need some amount of data to be contiguous. */
 			if (m->m_len < min(m->m_pkthdr.len, max_protohdr) &&
 			    (args->m = m = m_pullup(m, min(m->m_pkthdr.len,
 			    max_protohdr))) == NULL)
 				goto pullup_failed;
 			eh = mtod(m, struct ether_header *);
 			ip = (struct ip *)(eh + 1);
 		} else {
 			eh = NULL;
 			ip = mtod(m, struct ip *);
 		}
 		pktlen = m->m_pkthdr.len;
 		args->f_id.fib = M_GETFIB(m); /* mbuf not altered */
 	}
 
 	dst_ip.s_addr = 0;		/* make sure it is initialized */
 	src_ip.s_addr = 0;		/* make sure it is initialized */
 	src_port = dst_port = 0;
 
 	DYN_INFO_INIT(&dyn_info);
 /*
  * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
  * then it sets p to point at the offset "len" in the mbuf. WARNING: the
  * pointer might become stale after other pullups (but we never use it
  * this way).
  */
 #define	PULLUP_TO(_len, p, T)	PULLUP_LEN(_len, p, sizeof(T))
 #define	EHLEN	(eh != NULL ? ((char *)ip - (char *)eh) : 0)
 #define	_PULLUP_LOCKED(_len, p, T, unlock)			\
 do {								\
 	int x = (_len) + T + EHLEN;				\
 	if (mem) {						\
 		if (__predict_false(pktlen < x)) {		\
 			unlock;					\
 			goto pullup_failed;			\
 		}						\
 		p = (char *)args->mem + (_len) + EHLEN;		\
 	} else {						\
 		if (__predict_false((m)->m_len < x)) {		\
 			args->m = m = m_pullup(m, x);		\
 			if (m == NULL) {			\
 				unlock;				\
 				goto pullup_failed;		\
 			}					\
 		}						\
 		p = mtod(m, char *) + (_len) + EHLEN;		\
 	}							\
 } while (0)
 
 #define	PULLUP_LEN(_len, p, T)	_PULLUP_LOCKED(_len, p, T, )
 #define	PULLUP_LEN_LOCKED(_len, p, T)	\
     _PULLUP_LOCKED(_len, p, T, IPFW_PF_RUNLOCK(chain));	\
     UPDATE_POINTERS()
 /*
  * In case pointers got stale after pullups, update them.
  */
 #define	UPDATE_POINTERS()					\
 do {								\
 	if (!mem) {						\
 		if (eh != NULL) {				\
 			eh = mtod(m, struct ether_header *);	\
 			ip = (struct ip *)(eh + 1);		\
 		} else						\
 			ip = mtod(m, struct ip *);		\
 		args->m = m;					\
 	}							\
 } while (0)
 
 	/* Identify IP packets and fill up variables. */
 	if (pktlen >= sizeof(struct ip6_hdr) &&
 	    (eh == NULL || eh->ether_type == htons(ETHERTYPE_IPV6)) &&
 	    ip->ip_v == 6) {
 		struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
 
 		is_ipv6 = 1;
 		args->flags |= IPFW_ARGS_IP6;
 		hlen = sizeof(struct ip6_hdr);
 		proto = ip6->ip6_nxt;
 		/* Search extension headers to find upper layer protocols */
 		while (ulp == NULL && offset == 0) {
 			switch (proto) {
 			case IPPROTO_ICMPV6:
 				PULLUP_TO(hlen, ulp, struct icmp6_hdr);
 				icmp6_type = ICMP6(ulp)->icmp6_type;
 				break;
 
 			case IPPROTO_TCP:
 				PULLUP_TO(hlen, ulp, struct tcphdr);
 				dst_port = TCP(ulp)->th_dport;
 				src_port = TCP(ulp)->th_sport;
 				/* save flags for dynamic rules */
 				args->f_id._flags = TCP(ulp)->th_flags;
 				break;
 
 			case IPPROTO_SCTP:
 				if (pktlen >= hlen + sizeof(struct sctphdr) +
 				    sizeof(struct sctp_chunkhdr) +
 				    offsetof(struct sctp_init, a_rwnd))
 					PULLUP_LEN(hlen, ulp,
 					    sizeof(struct sctphdr) +
 					    sizeof(struct sctp_chunkhdr) +
 					    offsetof(struct sctp_init, a_rwnd));
 				else if (pktlen >= hlen + sizeof(struct sctphdr))
 					PULLUP_LEN(hlen, ulp, pktlen - hlen);
 				else
 					PULLUP_LEN(hlen, ulp,
 					    sizeof(struct sctphdr));
 				src_port = SCTP(ulp)->src_port;
 				dst_port = SCTP(ulp)->dest_port;
 				break;
 
 			case IPPROTO_UDP:
 			case IPPROTO_UDPLITE:
 				PULLUP_TO(hlen, ulp, struct udphdr);
 				dst_port = UDP(ulp)->uh_dport;
 				src_port = UDP(ulp)->uh_sport;
 				break;
 
 			case IPPROTO_HOPOPTS:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_hbh);
 				ext_hd |= EXT_HOPOPTS;
 				hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
 				proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_ROUTING:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_rthdr);
 				switch (((struct ip6_rthdr *)ulp)->ip6r_type) {
 				case 0:
 					ext_hd |= EXT_RTHDR0;
 					break;
 				case 2:
 					ext_hd |= EXT_RTHDR2;
 					break;
 				default:
 					if (V_fw_verbose)
 						printf("IPFW2: IPV6 - Unknown "
 						    "Routing Header type(%d)\n",
 						    ((struct ip6_rthdr *)
 						    ulp)->ip6r_type);
 					if (V_fw_deny_unknown_exthdrs)
 					    return (IP_FW_DENY);
 					break;
 				}
 				ext_hd |= EXT_ROUTING;
 				hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
 				proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_FRAGMENT:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_frag);
 				ext_hd |= EXT_FRAGMENT;
 				hlen += sizeof (struct ip6_frag);
 				proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
 				offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
 					IP6F_OFF_MASK;
 				ip6f_mf = ((struct ip6_frag *)ulp)->ip6f_offlg &
 					IP6F_MORE_FRAG;
 				if (V_fw_permit_single_frag6 == 0 &&
 				    offset == 0 && ip6f_mf == 0) {
 					if (V_fw_verbose)
 						printf("IPFW2: IPV6 - Invalid "
 						    "Fragment Header\n");
 					if (V_fw_deny_unknown_exthdrs)
 					    return (IP_FW_DENY);
 					break;
 				}
 				args->f_id.extra =
 				    ntohl(((struct ip6_frag *)ulp)->ip6f_ident);
 				ulp = NULL;
 				break;
 
 			case IPPROTO_DSTOPTS:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_hbh);
 				ext_hd |= EXT_DSTOPTS;
 				hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
 				proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_AH:	/* RFC 2402 */
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				ext_hd |= EXT_AH;
 				hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
 				proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_ESP:	/* RFC 2406 */
 				PULLUP_TO(hlen, ulp, uint32_t);	/* SPI, Seq# */
 				/* Anything past Seq# is variable length and
 				 * data past this ext. header is encrypted. */
 				ext_hd |= EXT_ESP;
 				break;
 
 			case IPPROTO_NONE:	/* RFC 2460 */
 				/*
 				 * Packet ends here, and IPv6 header has
 				 * already been pulled up. If ip6e_len!=0
 				 * then octets must be ignored.
 				 */
 				ulp = ip; /* non-NULL to get out of loop. */
 				break;
 
 			case IPPROTO_OSPFIGP:
 				/* XXX OSPF header check? */
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				break;
 
 			case IPPROTO_PIM:
 				/* XXX PIM header check? */
 				PULLUP_TO(hlen, ulp, struct pim);
 				break;
 
 			case IPPROTO_GRE:	/* RFC 1701 */
 				/* XXX GRE header check? */
 				PULLUP_TO(hlen, ulp, struct grehdr);
 				break;
 
 			case IPPROTO_CARP:
 				PULLUP_TO(hlen, ulp, offsetof(
 				    struct carp_header, carp_counter));
 				if (CARP_ADVERTISEMENT !=
 				    ((struct carp_header *)ulp)->carp_type)
 					return (IP_FW_DENY);
 				break;
 
 			case IPPROTO_IPV6:	/* RFC 2893 */
 				PULLUP_TO(hlen, ulp, struct ip6_hdr);
 				break;
 
 			case IPPROTO_IPV4:	/* RFC 2893 */
 				PULLUP_TO(hlen, ulp, struct ip);
 				break;
 
 			default:
 				if (V_fw_verbose)
 					printf("IPFW2: IPV6 - Unknown "
 					    "Extension Header(%d), ext_hd=%x\n",
 					     proto, ext_hd);
 				if (V_fw_deny_unknown_exthdrs)
 				    return (IP_FW_DENY);
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				break;
 			} /*switch */
 		}
 		UPDATE_POINTERS();
 		ip6 = (struct ip6_hdr *)ip;
 		args->f_id.addr_type = 6;
 		args->f_id.src_ip6 = ip6->ip6_src;
 		args->f_id.dst_ip6 = ip6->ip6_dst;
 		args->f_id.flow_id6 = ntohl(ip6->ip6_flow);
 		iplen = ntohs(ip6->ip6_plen) + sizeof(*ip6);
 	} else if (pktlen >= sizeof(struct ip) &&
 	    (eh == NULL || eh->ether_type == htons(ETHERTYPE_IP)) &&
 	    ip->ip_v == 4) {
 		is_ipv4 = 1;
 		args->flags |= IPFW_ARGS_IP4;
 		hlen = ip->ip_hl << 2;
 		/*
 		 * Collect parameters into local variables for faster
 		 * matching.
 		 */
 		proto = ip->ip_p;
 		src_ip = ip->ip_src;
 		dst_ip = ip->ip_dst;
 		offset = ntohs(ip->ip_off) & IP_OFFMASK;
 		iplen = ntohs(ip->ip_len);
 
 		if (offset == 0) {
 			switch (proto) {
 			case IPPROTO_TCP:
 				PULLUP_TO(hlen, ulp, struct tcphdr);
 				dst_port = TCP(ulp)->th_dport;
 				src_port = TCP(ulp)->th_sport;
 				/* save flags for dynamic rules */
 				args->f_id._flags = TCP(ulp)->th_flags;
 				break;
 
 			case IPPROTO_SCTP:
 				if (pktlen >= hlen + sizeof(struct sctphdr) +
 				    sizeof(struct sctp_chunkhdr) +
 				    offsetof(struct sctp_init, a_rwnd))
 					PULLUP_LEN(hlen, ulp,
 					    sizeof(struct sctphdr) +
 					    sizeof(struct sctp_chunkhdr) +
 					    offsetof(struct sctp_init, a_rwnd));
 				else if (pktlen >= hlen + sizeof(struct sctphdr))
 					PULLUP_LEN(hlen, ulp, pktlen - hlen);
 				else
 					PULLUP_LEN(hlen, ulp,
 					    sizeof(struct sctphdr));
 				src_port = SCTP(ulp)->src_port;
 				dst_port = SCTP(ulp)->dest_port;
 				break;
 
 			case IPPROTO_UDP:
 			case IPPROTO_UDPLITE:
 				PULLUP_TO(hlen, ulp, struct udphdr);
 				dst_port = UDP(ulp)->uh_dport;
 				src_port = UDP(ulp)->uh_sport;
 				break;
 
 			case IPPROTO_ICMP:
 				PULLUP_TO(hlen, ulp, struct icmphdr);
 				//args->f_id.flags = ICMP(ulp)->icmp_type;
 				break;
 
 			default:
 				break;
 			}
 		} else {
 			if (offset == 1 && proto == IPPROTO_TCP) {
 				/* RFC 3128 */
 				goto pullup_failed;
 			}
 		}
 
 		UPDATE_POINTERS();
 		args->f_id.addr_type = 4;
 		args->f_id.src_ip = ntohl(src_ip.s_addr);
 		args->f_id.dst_ip = ntohl(dst_ip.s_addr);
 	} else {
 		proto = 0;
 		dst_ip.s_addr = src_ip.s_addr = 0;
 
 		args->f_id.addr_type = 1; /* XXX */
 	}
 #undef PULLUP_TO
 	pktlen = iplen < pktlen ? iplen: pktlen;
 
 	/* Properly initialize the rest of f_id */
 	args->f_id.proto = proto;
 	args->f_id.src_port = src_port = ntohs(src_port);
 	args->f_id.dst_port = dst_port = ntohs(dst_port);
 
 	IPFW_PF_RLOCK(chain);
 	if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */
 		IPFW_PF_RUNLOCK(chain);
 		return (IP_FW_PASS);	/* accept */
 	}
 	if (args->flags & IPFW_ARGS_REF) {
 		/*
 		 * Packet has already been tagged as a result of a previous
 		 * match on rule args->rule aka args->rule_id (PIPE, QUEUE,
 		 * REASS, NETGRAPH, DIVERT/TEE...)
 		 * Validate the slot and continue from the next one
 		 * if still present, otherwise do a lookup.
 		 */
 		f_pos = (args->rule.chain_id == chain->id) ?
 		    args->rule.slot :
 		    ipfw_find_rule(chain, args->rule.rulenum,
 			args->rule.rule_id);
 	} else {
 		f_pos = 0;
 	}
 
 	if (args->flags & IPFW_ARGS_IN) {
 		iif = args->ifp;
 		oif = NULL;
 	} else {
 		MPASS(args->flags & IPFW_ARGS_OUT);
 		iif = mem ? NULL : m_rcvif(m);
 		oif = args->ifp;
 	}
 
 	/*
 	 * Now scan the rules, and parse microinstructions for each rule.
 	 * We have two nested loops and an inner switch. Sometimes we
 	 * need to break out of one or both loops, or re-enter one of
 	 * the loops with updated variables. Loop variables are:
 	 *
 	 *	f_pos (outer loop) points to the current rule.
 	 *		On output it points to the matching rule.
 	 *	done (outer loop) is used as a flag to break the loop.
 	 *	l (inner loop)	residual length of current rule.
 	 *		cmd points to the current microinstruction.
 	 *
 	 * We break the inner loop by setting l=0 and possibly
 	 * cmdlen=0 if we don't want to advance cmd.
 	 * We break the outer loop by setting done=1
 	 * We can restart the inner loop by setting l>0 and f_pos, f, cmd
 	 * as needed.
 	 */
 	for (; f_pos < chain->n_rules; f_pos++) {
 		ipfw_insn *cmd;
 		uint32_t tablearg = 0;
 		int l, cmdlen, skip_or; /* skip rest of OR block */
 		struct ip_fw *f;
 
 		f = chain->map[f_pos];
 		if (V_set_disable & (1 << f->set) )
 			continue;
 
 		skip_or = 0;
 		for (l = f->cmd_len, cmd = f->cmd ; l > 0 ;
 		    l -= cmdlen, cmd += cmdlen) {
 			int match;
 
 			/*
 			 * check_body is a jump target used when we find a
 			 * CHECK_STATE, and need to jump to the body of
 			 * the target rule.
 			 */
 
 /* check_body: */
 			cmdlen = F_LEN(cmd);
 			/*
 			 * An OR block (insn_1 || .. || insn_n) has the
 			 * F_OR bit set in all but the last instruction.
 			 * The first match will set "skip_or", and cause
 			 * the following instructions to be skipped until
 			 * past the one with the F_OR bit clear.
 			 */
 			if (skip_or) {		/* skip this instruction */
 				if ((cmd->len & F_OR) == 0)
 					skip_or = 0;	/* next one is good */
 				continue;
 			}
 			match = 0; /* set to 1 if we succeed */
 
 			switch (cmd->opcode) {
 			/*
 			 * The first set of opcodes compares the packet's
 			 * fields with some pattern, setting 'match' if a
 			 * match is found. At the end of the loop there is
 			 * logic to deal with F_NOT and F_OR flags associated
 			 * with the opcode.
 			 */
 			case O_NOP:
 				match = 1;
 				break;
 
 			case O_FORWARD_MAC:
 				printf("ipfw: opcode %d unimplemented\n",
 				    cmd->opcode);
 				break;
 
 			case O_GID:
 			case O_UID:
 			case O_JAIL:
 				/*
 				 * We only check offset == 0 && proto != 0,
 				 * as this ensures that we have a
 				 * packet with the ports info.
 				 */
 				if (offset != 0)
 					break;
 				if (proto == IPPROTO_TCP ||
 				    proto == IPPROTO_UDP ||
 				    proto == IPPROTO_UDPLITE)
 					match = check_uidgid(
 						    (ipfw_insn_u32 *)cmd,
 						    args, &ucred_lookup,
 #ifdef __FreeBSD__
 						    &ucred_cache);
 #else
 						    (void *)&ucred_cache);
 #endif
 				break;
 
 			case O_RECV:
 				match = iface_match(iif, (ipfw_insn_if *)cmd,
 				    chain, &tablearg);
 				break;
 
 			case O_XMIT:
 				match = iface_match(oif, (ipfw_insn_if *)cmd,
 				    chain, &tablearg);
 				break;
 
 			case O_VIA:
 				match = iface_match(args->ifp,
 				    (ipfw_insn_if *)cmd, chain, &tablearg);
 				break;
 
 			case O_MACADDR2:
 				if (args->flags & IPFW_ARGS_ETHER) {
 					u_int32_t *want = (u_int32_t *)
 						((ipfw_insn_mac *)cmd)->addr;
 					u_int32_t *mask = (u_int32_t *)
 						((ipfw_insn_mac *)cmd)->mask;
 					u_int32_t *hdr = (u_int32_t *)eh;
 
 					match =
 					    ( want[0] == (hdr[0] & mask[0]) &&
 					      want[1] == (hdr[1] & mask[1]) &&
 					      want[2] == (hdr[2] & mask[2]) );
 				}
 				break;
 
 			case O_MAC_TYPE:
 				if (args->flags & IPFW_ARGS_ETHER) {
 					u_int16_t *p =
 					    ((ipfw_insn_u16 *)cmd)->ports;
 					int i;
 
 					for (i = cmdlen - 1; !match && i>0;
 					    i--, p += 2)
 						match =
 						    (ntohs(eh->ether_type) >=
 						    p[0] &&
 						    ntohs(eh->ether_type) <=
 						    p[1]);
 				}
 				break;
 
 			case O_FRAG:
 				match = (offset != 0);
 				break;
 
 			case O_IN:	/* "out" is "not in" */
 				match = (oif == NULL);
 				break;
 
 			case O_LAYER2:
 				match = (args->flags & IPFW_ARGS_ETHER);
 				break;
 
 			case O_DIVERTED:
 				if ((args->flags & IPFW_ARGS_REF) == 0)
 					break;
 				/*
 				 * For diverted packets, args->rule.info
 				 * contains the divert port (in host format)
 				 * reason and direction.
 				 */
 				match = ((args->rule.info & IPFW_IS_MASK) ==
 				    IPFW_IS_DIVERT) && (
 				    ((args->rule.info & IPFW_INFO_IN) ?
 					1: 2) & cmd->arg1);
 				break;
 
 			case O_PROTO:
 				/*
 				 * We do not allow an arg of 0 so the
 				 * check of "proto" only suffices.
 				 */
 				match = (proto == cmd->arg1);
 				break;
 
 			case O_IP_SRC:
 				match = is_ipv4 &&
 				    (((ipfw_insn_ip *)cmd)->addr.s_addr ==
 				    src_ip.s_addr);
 				break;
 
 			case O_IP_DST_LOOKUP:
 			{
 				void *pkey;
 				uint32_t vidx, key;
 				uint16_t keylen;
 
 				if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) {
 					/* Determine lookup key type */
 					vidx = ((ipfw_insn_u32 *)cmd)->d[1];
 					if (vidx != 4 /* uid */ &&
 					    vidx != 5 /* jail */ &&
 					    is_ipv6 == 0 && is_ipv4 == 0)
 						break;
 					/* Determine key length */
 					if (vidx == 0 /* dst-ip */ ||
 					    vidx == 1 /* src-ip */)
 						keylen = is_ipv6 ?
 						    sizeof(struct in6_addr):
 						    sizeof(in_addr_t);
 					else {
 						keylen = sizeof(key);
 						pkey = &key;
 					}
 					if (vidx == 0 /* dst-ip */)
 						pkey = is_ipv4 ? (void *)&dst_ip:
 						    (void *)&args->f_id.dst_ip6;
 					else if (vidx == 1 /* src-ip */)
 						pkey = is_ipv4 ? (void *)&src_ip:
 						    (void *)&args->f_id.src_ip6;
 					else if (vidx == 6 /* dscp */) {
 						if (is_ipv4)
 							key = ip->ip_tos >> 2;
 						else {
 							key = args->f_id.flow_id6;
 							key = (key & 0x0f) << 2 |
 							    (key & 0xf000) >> 14;
 						}
 						key &= 0x3f;
 					} else if (vidx == 2 /* dst-port */ ||
 					    vidx == 3 /* src-port */) {
 						/* Skip fragments */
 						if (offset != 0)
 							break;
 						/* Skip proto without ports */
 						if (proto != IPPROTO_TCP &&
 						    proto != IPPROTO_UDP &&
 						    proto != IPPROTO_UDPLITE &&
 						    proto != IPPROTO_SCTP)
 							break;
 						if (vidx == 2 /* dst-port */)
 							key = dst_port;
 						else
 							key = src_port;
 					}
 #ifndef USERSPACE
 					else if (vidx == 4 /* uid */ ||
 					    vidx == 5 /* jail */) {
 						check_uidgid(
 						    (ipfw_insn_u32 *)cmd,
 						    args, &ucred_lookup,
 #ifdef __FreeBSD__
 						    &ucred_cache);
 						if (vidx == 4 /* uid */)
 							key = ucred_cache->cr_uid;
 						else if (vidx == 5 /* jail */)
 							key = ucred_cache->cr_prison->pr_id;
 #else /* !__FreeBSD__ */
 						    (void *)&ucred_cache);
 						if (vidx == 4 /* uid */)
 							key = ucred_cache.uid;
 						else if (vidx == 5 /* jail */)
 							key = ucred_cache.xid;
 #endif /* !__FreeBSD__ */
 					}
 #endif /* !USERSPACE */
 					else
 						break;
 					match = ipfw_lookup_table(chain,
 					    cmd->arg1, keylen, pkey, &vidx);
 					if (!match)
 						break;
 					tablearg = vidx;
 					break;
 				}
 				/* cmdlen =< F_INSN_SIZE(ipfw_insn_u32) */
 				/* FALLTHROUGH */
 			}
 			case O_IP_SRC_LOOKUP:
 			{
 				void *pkey;
 				uint32_t vidx;
 				uint16_t keylen;
 
 				if (is_ipv4) {
 					keylen = sizeof(in_addr_t);
 					if (cmd->opcode == O_IP_DST_LOOKUP)
 						pkey = &dst_ip;
 					else
 						pkey = &src_ip;
 				} else if (is_ipv6) {
 					keylen = sizeof(struct in6_addr);
 					if (cmd->opcode == O_IP_DST_LOOKUP)
 						pkey = &args->f_id.dst_ip6;
 					else
 						pkey = &args->f_id.src_ip6;
 				} else
 					break;
 				match = ipfw_lookup_table(chain, cmd->arg1,
 				    keylen, pkey, &vidx);
 				if (!match)
 					break;
 				if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) {
 					match = ((ipfw_insn_u32 *)cmd)->d[0] ==
 					    TARG_VAL(chain, vidx, tag);
 					if (!match)
 						break;
 				}
 				tablearg = vidx;
 				break;
 			}
 
 			case O_IP_FLOW_LOOKUP:
 				{
 					uint32_t v = 0;
 					match = ipfw_lookup_table(chain,
 					    cmd->arg1, 0, &args->f_id, &v);
 					if (!match)
 						break;
 					if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
 						match = ((ipfw_insn_u32 *)cmd)->d[0] ==
 						    TARG_VAL(chain, v, tag);
 					if (match)
 						tablearg = v;
 				}
 				break;
 			case O_IP_SRC_MASK:
 			case O_IP_DST_MASK:
 				if (is_ipv4) {
 				    uint32_t a =
 					(cmd->opcode == O_IP_DST_MASK) ?
 					    dst_ip.s_addr : src_ip.s_addr;
 				    uint32_t *p = ((ipfw_insn_u32 *)cmd)->d;
 				    int i = cmdlen-1;
 
 				    for (; !match && i>0; i-= 2, p+= 2)
 					match = (p[0] == (a & p[1]));
 				}
 				break;
 
 			case O_IP_SRC_ME:
 				if (is_ipv4) {
 					match = in_localip(src_ip);
 					break;
 				}
 #ifdef INET6
 				/* FALLTHROUGH */
 			case O_IP6_SRC_ME:
 				match = is_ipv6 &&
 				    ipfw_localip6(&args->f_id.src_ip6);
 #endif
 				break;
 
 			case O_IP_DST_SET:
 			case O_IP_SRC_SET:
 				if (is_ipv4) {
 					u_int32_t *d = (u_int32_t *)(cmd+1);
 					u_int32_t addr =
 					    cmd->opcode == O_IP_DST_SET ?
 						args->f_id.dst_ip :
 						args->f_id.src_ip;
 
 					    if (addr < d[0])
 						    break;
 					    addr -= d[0]; /* subtract base */
 					    match = (addr < cmd->arg1) &&
 						( d[ 1 + (addr>>5)] &
 						  (1<<(addr & 0x1f)) );
 				}
 				break;
 
 			case O_IP_DST:
 				match = is_ipv4 &&
 				    (((ipfw_insn_ip *)cmd)->addr.s_addr ==
 				    dst_ip.s_addr);
 				break;
 
 			case O_IP_DST_ME:
 				if (is_ipv4) {
 					match = in_localip(dst_ip);
 					break;
 				}
 #ifdef INET6
 				/* FALLTHROUGH */
 			case O_IP6_DST_ME:
 				match = is_ipv6 &&
 				    ipfw_localip6(&args->f_id.dst_ip6);
 #endif
 				break;
 
 
 			case O_IP_SRCPORT:
 			case O_IP_DSTPORT:
 				/*
 				 * offset == 0 && proto != 0 is enough
 				 * to guarantee that we have a
 				 * packet with port info.
 				 */
 				if ((proto == IPPROTO_UDP ||
 				    proto == IPPROTO_UDPLITE ||
 				    proto == IPPROTO_TCP ||
 				    proto == IPPROTO_SCTP) && offset == 0) {
 					u_int16_t x =
 					    (cmd->opcode == O_IP_SRCPORT) ?
 						src_port : dst_port ;
 					u_int16_t *p =
 					    ((ipfw_insn_u16 *)cmd)->ports;
 					int i;
 
 					for (i = cmdlen - 1; !match && i>0;
 					    i--, p += 2)
 						match = (x>=p[0] && x<=p[1]);
 				}
 				break;
 
 			case O_ICMPTYPE:
 				match = (offset == 0 && proto==IPPROTO_ICMP &&
 				    icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) );
 				break;
 
 #ifdef INET6
 			case O_ICMP6TYPE:
 				match = is_ipv6 && offset == 0 &&
 				    proto==IPPROTO_ICMPV6 &&
 				    icmp6type_match(
 					ICMP6(ulp)->icmp6_type,
 					(ipfw_insn_u32 *)cmd);
 				break;
 #endif /* INET6 */
 
 			case O_IPOPT:
 				match = (is_ipv4 &&
 				    ipopts_match(ip, cmd) );
 				break;
 
 			case O_IPVER:
 				match = ((is_ipv4 || is_ipv6) &&
 				    cmd->arg1 == ip->ip_v);
 				break;
 
 			case O_IPID:
 			case O_IPTTL:
 				if (!is_ipv4)
 					break;
 			case O_IPLEN:
 				{	/* only for IP packets */
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 
 				    if (cmd->opcode == O_IPLEN)
 					x = iplen;
 				    else if (cmd->opcode == O_IPTTL)
 					x = ip->ip_ttl;
 				    else /* must be IPID */
 					x = ntohs(ip->ip_id);
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* otherwise we have ranges */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i>0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_IPPRECEDENCE:
 				match = (is_ipv4 &&
 				    (cmd->arg1 == (ip->ip_tos & 0xe0)) );
 				break;
 
 			case O_IPTOS:
 				match = (is_ipv4 &&
 				    flags_match(cmd, ip->ip_tos));
 				break;
 
 			case O_DSCP:
 			    {
 				uint32_t *p;
 				uint16_t x;
 
 				p = ((ipfw_insn_u32 *)cmd)->d;
 
 				if (is_ipv4)
 					x = ip->ip_tos >> 2;
 				else if (is_ipv6) {
 					uint8_t *v;
 					v = &((struct ip6_hdr *)ip)->ip6_vfc;
 					x = (*v & 0x0F) << 2;
 					v++;
 					x |= *v >> 6;
 				} else
 					break;
 
 				/* DSCP bitmask is stored as low_u32 high_u32 */
 				if (x >= 32)
 					match = *(p + 1) & (1 << (x - 32));
 				else
 					match = *p & (1 << x);
 			    }
 				break;
 
 			case O_TCPDATALEN:
 				if (proto == IPPROTO_TCP && offset == 0) {
 				    struct tcphdr *tcp;
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 #ifdef INET6
 				    if (is_ipv6) {
 					    struct ip6_hdr *ip6;
 
 					    ip6 = (struct ip6_hdr *)ip;
 					    if (ip6->ip6_plen == 0) {
 						    /*
 						     * Jumbo payload is not
 						     * supported by this
 						     * opcode.
 						     */
 						    break;
 					    }
 					    x = iplen - hlen;
 				    } else
 #endif /* INET6 */
 					    x = iplen - (ip->ip_hl << 2);
 				    tcp = TCP(ulp);
 				    x -= tcp->th_off << 2;
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* otherwise we have ranges */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i>0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_TCPFLAGS:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    flags_match(cmd, TCP(ulp)->th_flags));
 				break;
 
 			case O_TCPOPTS:
 				if (proto == IPPROTO_TCP && offset == 0 && ulp){
 					PULLUP_LEN_LOCKED(hlen, ulp,
 					    (TCP(ulp)->th_off << 2));
 					match = tcpopts_match(TCP(ulp), cmd);
 				}
 				break;
 
 			case O_TCPSEQ:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    ((ipfw_insn_u32 *)cmd)->d[0] ==
 					TCP(ulp)->th_seq);
 				break;
 
 			case O_TCPACK:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    ((ipfw_insn_u32 *)cmd)->d[0] ==
 					TCP(ulp)->th_ack);
 				break;
 
 			case O_TCPMSS:
 				if (proto == IPPROTO_TCP &&
 				    (args->f_id._flags & TH_SYN) != 0 &&
 				    ulp != NULL) {
 					uint16_t mss, *p;
 					int i;
 
 					PULLUP_LEN_LOCKED(hlen, ulp,
 					    (TCP(ulp)->th_off << 2));
 					if ((tcpopts_parse(TCP(ulp), &mss) &
 					    IP_FW_TCPOPT_MSS) == 0)
 						break;
 					if (cmdlen == 1) {
 						match = (cmd->arg1 == mss);
 						break;
 					}
 					/* Otherwise we have ranges. */
 					p = ((ipfw_insn_u16 *)cmd)->ports;
 					i = cmdlen - 1;
 					for (; !match && i > 0; i--, p += 2)
 						match = (mss >= p[0] &&
 						    mss <= p[1]);
 				}
 				break;
 
 			case O_TCPWIN:
 				if (proto == IPPROTO_TCP && offset == 0) {
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 
 				    x = ntohs(TCP(ulp)->th_win);
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* Otherwise we have ranges. */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i > 0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_ESTAB:
 				/* reject packets which have SYN only */
 				/* XXX should i also check for TH_ACK ? */
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    (TCP(ulp)->th_flags &
 				     (TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
 				break;
 
 			case O_ALTQ: {
 				struct pf_mtag *at;
 				struct m_tag *mtag;
 				ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
 
 				/*
 				 * ALTQ uses mbuf tags from another
 				 * packet filtering system - pf(4).
 				 * We allocate a tag in its format
 				 * and fill it in, pretending to be pf(4).
 				 */
 				match = 1;
 				at = pf_find_mtag(m);
 				if (at != NULL && at->qid != 0)
 					break;
 				mtag = m_tag_get(PACKET_TAG_PF,
 				    sizeof(struct pf_mtag), M_NOWAIT | M_ZERO);
 				if (mtag == NULL) {
 					/*
 					 * Let the packet fall back to the
 					 * default ALTQ.
 					 */
 					break;
 				}
 				m_tag_prepend(m, mtag);
 				at = (struct pf_mtag *)(mtag + 1);
 				at->qid = altq->qid;
 				at->hdr = ip;
 				break;
 			}
 
 			case O_LOG:
 				ipfw_log(chain, f, hlen, args,
 				    offset | ip6f_mf, tablearg, ip);
 				match = 1;
 				break;
 
 			case O_PROB:
 				match = (random()<((ipfw_insn_u32 *)cmd)->d[0]);
 				break;
 
 			case O_VERREVPATH:
 				/* Outgoing packets automatically pass/match */
 				match = (args->flags & IPFW_ARGS_OUT ||
 				    (
 #ifdef INET6
 				    is_ipv6 ?
 					verify_path6(&(args->f_id.src_ip6),
 					    iif, args->f_id.fib) :
 #endif
 				    verify_path(src_ip, iif, args->f_id.fib)));
 				break;
 
 			case O_VERSRCREACH:
 				/* Outgoing packets automatically pass/match */
 				match = (hlen > 0 && ((oif != NULL) || (
 #ifdef INET6
 				    is_ipv6 ?
 				        verify_path6(&(args->f_id.src_ip6),
 				            NULL, args->f_id.fib) :
 #endif
 				    verify_path(src_ip, NULL, args->f_id.fib))));
 				break;
 
 			case O_ANTISPOOF:
 				/* Outgoing packets automatically pass/match */
 				if (oif == NULL && hlen > 0 &&
 				    (  (is_ipv4 && in_localaddr(src_ip))
 #ifdef INET6
 				    || (is_ipv6 &&
 				        in6_localaddr(&(args->f_id.src_ip6)))
 #endif
 				    ))
 					match =
 #ifdef INET6
 					    is_ipv6 ? verify_path6(
 					        &(args->f_id.src_ip6), iif,
 						args->f_id.fib) :
 #endif
 					    verify_path(src_ip, iif,
 					        args->f_id.fib);
 				else
 					match = 1;
 				break;
 
 			case O_IPSEC:
 				match = (m_tag_find(m,
 				    PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL);
 				/* otherwise no match */
 				break;
 
 #ifdef INET6
 			case O_IP6_SRC:
 				match = is_ipv6 &&
 				    IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6,
 				    &((ipfw_insn_ip6 *)cmd)->addr6);
 				break;
 
 			case O_IP6_DST:
 				match = is_ipv6 &&
 				IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6,
 				    &((ipfw_insn_ip6 *)cmd)->addr6);
 				break;
 			case O_IP6_SRC_MASK:
 			case O_IP6_DST_MASK:
 				if (is_ipv6) {
 					int i = cmdlen - 1;
 					struct in6_addr p;
 					struct in6_addr *d =
 					    &((ipfw_insn_ip6 *)cmd)->addr6;
 
 					for (; !match && i > 0; d += 2,
 					    i -= F_INSN_SIZE(struct in6_addr)
 					    * 2) {
 						p = (cmd->opcode ==
 						    O_IP6_SRC_MASK) ?
 						    args->f_id.src_ip6:
 						    args->f_id.dst_ip6;
 						APPLY_MASK(&p, &d[1]);
 						match =
 						    IN6_ARE_ADDR_EQUAL(&d[0],
 						    &p);
 					}
 				}
 				break;
 
 			case O_FLOW6ID:
 				match = is_ipv6 &&
 				    flow6id_match(args->f_id.flow_id6,
 				    (ipfw_insn_u32 *) cmd);
 				break;
 
 			case O_EXT_HDR:
 				match = is_ipv6 &&
 				    (ext_hd & ((ipfw_insn *) cmd)->arg1);
 				break;
 
 			case O_IP6:
 				match = is_ipv6;
 				break;
 #endif
 
 			case O_IP4:
 				match = is_ipv4;
 				break;
 
 			case O_TAG: {
 				struct m_tag *mtag;
 				uint32_t tag = TARG(cmd->arg1, tag);
 
 				/* Packet is already tagged with this tag? */
 				mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL);
 
 				/* We have `untag' action when F_NOT flag is
 				 * present. And we must remove this mtag from
 				 * mbuf and reset `match' to zero (`match' will
 				 * be inversed later).
 				 * Otherwise we should allocate new mtag and
 				 * push it into mbuf.
 				 */
 				if (cmd->len & F_NOT) { /* `untag' action */
 					if (mtag != NULL)
 						m_tag_delete(m, mtag);
 					match = 0;
 				} else {
 					if (mtag == NULL) {
 						mtag = m_tag_alloc( MTAG_IPFW,
 						    tag, 0, M_NOWAIT);
 						if (mtag != NULL)
 							m_tag_prepend(m, mtag);
 					}
 					match = 1;
 				}
 				break;
 			}
 
 			case O_FIB: /* try match the specified fib */
 				if (args->f_id.fib == cmd->arg1)
 					match = 1;
 				break;
 
 			case O_SOCKARG:	{
 #ifndef USERSPACE	/* not supported in userspace */
 				struct inpcb *inp = args->inp;
 				struct inpcbinfo *pi;
 				
 				if (is_ipv6) /* XXX can we remove this ? */
 					break;
 
 				if (proto == IPPROTO_TCP)
 					pi = &V_tcbinfo;
 				else if (proto == IPPROTO_UDP)
 					pi = &V_udbinfo;
 				else if (proto == IPPROTO_UDPLITE)
 					pi = &V_ulitecbinfo;
 				else
 					break;
 
 				/*
 				 * XXXRW: so_user_cookie should almost
 				 * certainly be inp_user_cookie?
 				 */
 
 				/* For incoming packet, lookup up the 
 				inpcb using the src/dest ip/port tuple */
 				if (inp == NULL) {
 					inp = in_pcblookup(pi, 
 						src_ip, htons(src_port),
 						dst_ip, htons(dst_port),
 						INPLOOKUP_RLOCKPCB, NULL);
 					if (inp != NULL) {
 						tablearg =
 						    inp->inp_socket->so_user_cookie;
 						if (tablearg)
 							match = 1;
 						INP_RUNLOCK(inp);
 					}
 				} else {
 					if (inp->inp_socket) {
 						tablearg =
 						    inp->inp_socket->so_user_cookie;
 						if (tablearg)
 							match = 1;
 					}
 				}
 #endif /* !USERSPACE */
 				break;
 			}
 
 			case O_TAGGED: {
 				struct m_tag *mtag;
 				uint32_t tag = TARG(cmd->arg1, tag);
 
 				if (cmdlen == 1) {
 					match = m_tag_locate(m, MTAG_IPFW,
 					    tag, NULL) != NULL;
 					break;
 				}
 
 				/* we have ranges */
 				for (mtag = m_tag_first(m);
 				    mtag != NULL && !match;
 				    mtag = m_tag_next(m, mtag)) {
 					uint16_t *p;
 					int i;
 
 					if (mtag->m_tag_cookie != MTAG_IPFW)
 						continue;
 
 					p = ((ipfw_insn_u16 *)cmd)->ports;
 					i = cmdlen - 1;
 					for(; !match && i > 0; i--, p += 2)
 						match =
 						    mtag->m_tag_id >= p[0] &&
 						    mtag->m_tag_id <= p[1];
 				}
 				break;
 			}
 				
 			/*
 			 * The second set of opcodes represents 'actions',
 			 * i.e. the terminal part of a rule once the packet
 			 * matches all previous patterns.
 			 * Typically there is only one action for each rule,
 			 * and the opcode is stored at the end of the rule
 			 * (but there are exceptions -- see below).
 			 *
 			 * In general, here we set retval and terminate the
 			 * outer loop (would be a 'break 3' in some language,
 			 * but we need to set l=0, done=1)
 			 *
 			 * Exceptions:
 			 * O_COUNT and O_SKIPTO actions:
 			 *   instead of terminating, we jump to the next rule
 			 *   (setting l=0), or to the SKIPTO target (setting
 			 *   f/f_len, cmd and l as needed), respectively.
 			 *
 			 * O_TAG, O_LOG and O_ALTQ action parameters:
 			 *   perform some action and set match = 1;
 			 *
 			 * O_LIMIT and O_KEEP_STATE: these opcodes are
 			 *   not real 'actions', and are stored right
 			 *   before the 'action' part of the rule (one
 			 *   exception is O_SKIP_ACTION which could be
 			 *   between these opcodes and 'action' one).
 			 *   These opcodes try to install an entry in the
 			 *   state tables; if successful, we continue with
 			 *   the next opcode (match=1; break;), otherwise
 			 *   the packet must be dropped (set retval,
 			 *   break loops with l=0, done=1)
 			 *
 			 * O_PROBE_STATE and O_CHECK_STATE: these opcodes
 			 *   cause a lookup of the state table, and a jump
 			 *   to the 'action' part of the parent rule
 			 *   if an entry is found, or
 			 *   (CHECK_STATE only) a jump to the next rule if
 			 *   the entry is not found.
 			 *   The result of the lookup is cached so that
 			 *   further instances of these opcodes become NOPs.
 			 *   The jump to the next rule is done by setting
 			 *   l=0, cmdlen=0.
 			 *
 			 * O_SKIP_ACTION: this opcode is not a real 'action'
 			 *  either, and is stored right before the 'action'
 			 *  part of the rule, right after the O_KEEP_STATE
 			 *  opcode. It causes match failure so the real
 			 *  'action' could be executed only if the rule
 			 *  is checked via dynamic rule from the state
 			 *  table, as in such case execution starts
 			 *  from the true 'action' opcode directly.
 			 *   
 			 */
 			case O_LIMIT:
 			case O_KEEP_STATE:
 				if (ipfw_dyn_install_state(chain, f,
 				    (ipfw_insn_limit *)cmd, args, ulp,
 				    pktlen, &dyn_info, tablearg)) {
 					/* error or limit violation */
 					retval = IP_FW_DENY;
 					l = 0;	/* exit inner loop */
 					done = 1; /* exit outer loop */
 				}
 				match = 1;
 				break;
 
 			case O_PROBE_STATE:
 			case O_CHECK_STATE:
 				/*
 				 * dynamic rules are checked at the first
 				 * keep-state or check-state occurrence,
 				 * with the result being stored in dyn_info.
 				 * The compiler introduces a PROBE_STATE
 				 * instruction for us when we have a
 				 * KEEP_STATE (because PROBE_STATE needs
 				 * to be run first).
 				 */
 				if (DYN_LOOKUP_NEEDED(&dyn_info, cmd) &&
 				    (q = ipfw_dyn_lookup_state(args, ulp,
 				    pktlen, cmd, &dyn_info)) != NULL) {
 					/*
 					 * Found dynamic entry, jump to the
 					 * 'action' part of the parent rule
 					 * by setting f, cmd, l and clearing
 					 * cmdlen.
 					 */
 					f = q;
 					f_pos = dyn_info.f_pos;
 					cmd = ACTION_PTR(f);
 					l = f->cmd_len - f->act_ofs;
 					cmdlen = 0;
 					match = 1;
 					break;
 				}
 				/*
 				 * Dynamic entry not found. If CHECK_STATE,
 				 * skip to next rule, if PROBE_STATE just
 				 * ignore and continue with next opcode.
 				 */
 				if (cmd->opcode == O_CHECK_STATE)
 					l = 0;	/* exit inner loop */
 				match = 1;
 				break;
 
 			case O_SKIP_ACTION:
 				match = 0;	/* skip to the next rule */
 				l = 0;		/* exit inner loop */
 				break;
 
 			case O_ACCEPT:
 				retval = 0;	/* accept */
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				break;
 
 			case O_PIPE:
 			case O_QUEUE:
 				set_match(args, f_pos, chain);
 				args->rule.info = TARG(cmd->arg1, pipe);
 				if (cmd->opcode == O_PIPE)
 					args->rule.info |= IPFW_IS_PIPE;
 				if (V_fw_one_pass)
 					args->rule.info |= IPFW_ONEPASS;
 				retval = IP_FW_DUMMYNET;
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				break;
 
 			case O_DIVERT:
 			case O_TEE:
 				if (args->flags & IPFW_ARGS_ETHER)
 					break;	/* not on layer 2 */
 				/* otherwise this is terminal */
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				retval = (cmd->opcode == O_DIVERT) ?
 					IP_FW_DIVERT : IP_FW_TEE;
 				set_match(args, f_pos, chain);
 				args->rule.info = TARG(cmd->arg1, divert);
 				break;
 
 			case O_COUNT:
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				l = 0;		/* exit inner loop */
 				break;
 
 			case O_SKIPTO:
 			    IPFW_INC_RULE_COUNTER(f, pktlen);
 			    f_pos = JUMP(chain, f, cmd->arg1, tablearg, 0);
 			    /*
 			     * Skip disabled rules, and re-enter
 			     * the inner loop with the correct
 			     * f_pos, f, l and cmd.
 			     * Also clear cmdlen and skip_or
 			     */
 			    for (; f_pos < chain->n_rules - 1 &&
 				    (V_set_disable &
 				     (1 << chain->map[f_pos]->set));
 				    f_pos++)
 				;
 			    /* Re-enter the inner loop at the skipto rule. */
 			    f = chain->map[f_pos];
 			    l = f->cmd_len;
 			    cmd = f->cmd;
 			    match = 1;
 			    cmdlen = 0;
 			    skip_or = 0;
 			    continue;
 			    break;	/* not reached */
 
 			case O_CALLRETURN: {
 				/*
 				 * Implementation of `subroutine' call/return,
 				 * in the stack carried in an mbuf tag. This
 				 * is different from `skipto' in that any call
 				 * address is possible (`skipto' must prevent
 				 * backward jumps to avoid endless loops).
 				 * We have `return' action when F_NOT flag is
 				 * present. The `m_tag_id' field is used as
 				 * stack pointer.
 				 */
 				struct m_tag *mtag;
 				uint16_t jmpto, *stack;
 
 #define	IS_CALL		((cmd->len & F_NOT) == 0)
 #define	IS_RETURN	((cmd->len & F_NOT) != 0)
 				/*
 				 * Hand-rolled version of m_tag_locate() with
 				 * wildcard `type'.
 				 * If not already tagged, allocate new tag.
 				 */
 				mtag = m_tag_first(m);
 				while (mtag != NULL) {
 					if (mtag->m_tag_cookie ==
 					    MTAG_IPFW_CALL)
 						break;
 					mtag = m_tag_next(m, mtag);
 				}
 				if (mtag == NULL && IS_CALL) {
 					mtag = m_tag_alloc(MTAG_IPFW_CALL, 0,
 					    IPFW_CALLSTACK_SIZE *
 					    sizeof(uint16_t), M_NOWAIT);
 					if (mtag != NULL)
 						m_tag_prepend(m, mtag);
 				}
 
 				/*
 				 * On error both `call' and `return' just
 				 * continue with next rule.
 				 */
 				if (IS_RETURN && (mtag == NULL ||
 				    mtag->m_tag_id == 0)) {
 					l = 0;		/* exit inner loop */
 					break;
 				}
 				if (IS_CALL && (mtag == NULL ||
 				    mtag->m_tag_id >= IPFW_CALLSTACK_SIZE)) {
 					printf("ipfw: call stack error, "
 					    "go to next rule\n");
 					l = 0;		/* exit inner loop */
 					break;
 				}
 
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				stack = (uint16_t *)(mtag + 1);
 
 				/*
 				 * The `call' action may use cached f_pos
 				 * (in f->next_rule), whose version is written
 				 * in f->next_rule.
 				 * The `return' action, however, doesn't have
 				 * fixed jump address in cmd->arg1 and can't use
 				 * cache.
 				 */
 				if (IS_CALL) {
 					stack[mtag->m_tag_id] = f->rulenum;
 					mtag->m_tag_id++;
 			    		f_pos = JUMP(chain, f, cmd->arg1,
 					    tablearg, 1);
 				} else {	/* `return' action */
 					mtag->m_tag_id--;
 					jmpto = stack[mtag->m_tag_id] + 1;
 					f_pos = ipfw_find_rule(chain, jmpto, 0);
 				}
 
 				/*
 				 * Skip disabled rules, and re-enter
 				 * the inner loop with the correct
 				 * f_pos, f, l and cmd.
 				 * Also clear cmdlen and skip_or
 				 */
 				for (; f_pos < chain->n_rules - 1 &&
 				    (V_set_disable &
 				    (1 << chain->map[f_pos]->set)); f_pos++)
 					;
 				/* Re-enter the inner loop at the dest rule. */
 				f = chain->map[f_pos];
 				l = f->cmd_len;
 				cmd = f->cmd;
 				cmdlen = 0;
 				skip_or = 0;
 				continue;
 				break;	/* NOTREACHED */
 			}
 #undef IS_CALL
 #undef IS_RETURN
 
 			case O_REJECT:
 				/*
 				 * Drop the packet and send a reject notice
 				 * if the packet is not ICMP (or is an ICMP
 				 * query), and it is not multicast/broadcast.
 				 */
 				if (hlen > 0 && is_ipv4 && offset == 0 &&
 				    (proto != IPPROTO_ICMP ||
 				     is_icmp_query(ICMP(ulp))) &&
 				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
 				    !IN_MULTICAST(ntohl(dst_ip.s_addr))) {
 					send_reject(args, cmd->arg1, iplen, ip);
 					m = args->m;
 				}
 				/* FALLTHROUGH */
 #ifdef INET6
 			case O_UNREACH6:
 				if (hlen > 0 && is_ipv6 &&
 				    ((offset & IP6F_OFF_MASK) == 0) &&
 				    (proto != IPPROTO_ICMPV6 ||
 				     (is_icmp6_query(icmp6_type) == 1)) &&
 				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
 				    !IN6_IS_ADDR_MULTICAST(
 					&args->f_id.dst_ip6)) {
 					send_reject6(args,
 					    cmd->opcode == O_REJECT ?
 					    map_icmp_unreach(cmd->arg1):
 					    cmd->arg1, hlen,
 					    (struct ip6_hdr *)ip);
 					m = args->m;
 				}
 				/* FALLTHROUGH */
 #endif
 			case O_DENY:
 				retval = IP_FW_DENY;
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				break;
 
 			case O_FORWARD_IP:
 				if (args->flags & IPFW_ARGS_ETHER)
 					break;	/* not valid on layer2 pkts */
 				if (q != f ||
 				    dyn_info.direction == MATCH_FORWARD) {
 				    struct sockaddr_in *sa;
 
 				    sa = &(((ipfw_insn_sa *)cmd)->sa);
 				    if (sa->sin_addr.s_addr == INADDR_ANY) {
 #ifdef INET6
 					/*
 					 * We use O_FORWARD_IP opcode for
 					 * fwd rule with tablearg, but tables
 					 * now support IPv6 addresses. And
 					 * when we are inspecting IPv6 packet,
 					 * we can use nh6 field from
 					 * table_value as next_hop6 address.
 					 */
 					if (is_ipv6) {
 						struct ip_fw_nh6 *nh6;
 
 						args->flags |= IPFW_ARGS_NH6;
 						nh6 = &args->hopstore6;
 						nh6->sin6_addr = TARG_VAL(
 						    chain, tablearg, nh6);
 						nh6->sin6_port = sa->sin_port;
 						nh6->sin6_scope_id = TARG_VAL(
 						    chain, tablearg, zoneid);
 					} else
 #endif
 					{
 						args->flags |= IPFW_ARGS_NH4;
 						args->hopstore.sin_port =
 						    sa->sin_port;
 						sa = &args->hopstore;
 						sa->sin_family = AF_INET;
 						sa->sin_len = sizeof(*sa);
 						sa->sin_addr.s_addr = htonl(
 						    TARG_VAL(chain, tablearg,
 						    nh4));
 					}
 				    } else {
 					    args->flags |= IPFW_ARGS_NH4PTR;
 					    args->next_hop = sa;
 				    }
 				}
 				retval = IP_FW_PASS;
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				break;
 
 #ifdef INET6
 			case O_FORWARD_IP6:
 				if (args->flags & IPFW_ARGS_ETHER)
 					break;	/* not valid on layer2 pkts */
 				if (q != f ||
 				    dyn_info.direction == MATCH_FORWARD) {
 					struct sockaddr_in6 *sin6;
 
 					sin6 = &(((ipfw_insn_sa6 *)cmd)->sa);
 					args->flags |= IPFW_ARGS_NH6PTR;
 					args->next_hop6 = sin6;
 				}
 				retval = IP_FW_PASS;
 				l = 0;		/* exit inner loop */
 				done = 1;	/* exit outer loop */
 				break;
 #endif
 
 			case O_NETGRAPH:
 			case O_NGTEE:
 				set_match(args, f_pos, chain);
 				args->rule.info = TARG(cmd->arg1, netgraph);
 				if (V_fw_one_pass)
 					args->rule.info |= IPFW_ONEPASS;
 				retval = (cmd->opcode == O_NETGRAPH) ?
 				    IP_FW_NETGRAPH : IP_FW_NGTEE;
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				break;
 
 			case O_SETFIB: {
 				uint32_t fib;
 
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				fib = TARG(cmd->arg1, fib) & 0x7FFF;
 				if (fib >= rt_numfibs)
 					fib = 0;
 				M_SETFIB(m, fib);
 				args->f_id.fib = fib; /* XXX */
 				l = 0;		/* exit inner loop */
 				break;
 		        }
 
 			case O_SETDSCP: {
 				uint16_t code;
 
 				code = TARG(cmd->arg1, dscp) & 0x3F;
 				l = 0;		/* exit inner loop */
 				if (is_ipv4) {
 					uint16_t old;
 
 					old = *(uint16_t *)ip;
 					ip->ip_tos = (code << 2) |
 					    (ip->ip_tos & 0x03);
 					ip->ip_sum = cksum_adjust(ip->ip_sum,
 					    old, *(uint16_t *)ip);
 				} else if (is_ipv6) {
 					uint8_t *v;
 
 					v = &((struct ip6_hdr *)ip)->ip6_vfc;
 					*v = (*v & 0xF0) | (code >> 2);
 					v++;
 					*v = (*v & 0x3F) | ((code & 0x03) << 6);
 				} else
 					break;
 
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				break;
 			}
 
 			case O_NAT:
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
 				/*
 				 * Ensure that we do not invoke NAT handler for
 				 * non IPv4 packets. Libalias expects only IPv4.
 				 */
 				if (!is_ipv4 || !IPFW_NAT_LOADED) {
 				    retval = IP_FW_DENY;
 				    break;
 				}
 
 				struct cfg_nat *t;
 				int nat_id;
 
 				args->rule.info = 0;
 				set_match(args, f_pos, chain);
 				/* Check if this is 'global' nat rule */
 				if (cmd->arg1 == IP_FW_NAT44_GLOBAL) {
 					retval = ipfw_nat_ptr(args, NULL, m);
 					break;
 				}
 				t = ((ipfw_insn_nat *)cmd)->nat;
 				if (t == NULL) {
 					nat_id = TARG(cmd->arg1, nat);
 					t = (*lookup_nat_ptr)(&chain->nat, nat_id);
 
 					if (t == NULL) {
 					    retval = IP_FW_DENY;
 					    break;
 					}
 					if (cmd->arg1 != IP_FW_TARG)
 					    ((ipfw_insn_nat *)cmd)->nat = t;
 				}
 				retval = ipfw_nat_ptr(args, t, m);
 				break;
 
 			case O_REASS: {
 				int ip_off;
 
 				l = 0;	/* in any case exit inner loop */
 				if (is_ipv6) /* IPv6 is not supported yet */
 					break;
 				IPFW_INC_RULE_COUNTER(f, pktlen);
 				ip_off = ntohs(ip->ip_off);
 
 				/* if not fragmented, go to next rule */
 				if ((ip_off & (IP_MF | IP_OFFMASK)) == 0)
 				    break;
 
 				args->m = m = ip_reass(m);
 
 				/*
 				 * do IP header checksum fixup.
 				 */
 				if (m == NULL) { /* fragment got swallowed */
 				    retval = IP_FW_DENY;
 				} else { /* good, packet complete */
 				    int hlen;
 
 				    ip = mtod(m, struct ip *);
 				    hlen = ip->ip_hl << 2;
 				    ip->ip_sum = 0;
 				    if (hlen == sizeof(struct ip))
 					ip->ip_sum = in_cksum_hdr(ip);
 				    else
 					ip->ip_sum = in_cksum(m, hlen);
 				    retval = IP_FW_REASS;
 				    args->rule.info = 0;
 				    set_match(args, f_pos, chain);
 				}
 				done = 1;	/* exit outer loop */
 				break;
 			}
 			case O_EXTERNAL_ACTION:
 				l = 0; /* in any case exit inner loop */
 				retval = ipfw_run_eaction(chain, args,
 				    cmd, &done);
 				/*
 				 * If both @retval and @done are zero,
 				 * consider this as rule matching and
 				 * update counters.
 				 */
 				if (retval == 0 && done == 0) {
 					IPFW_INC_RULE_COUNTER(f, pktlen);
 					/*
 					 * Reset the result of the last
 					 * dynamic state lookup.
 					 * External action can change
 					 * @args content, and it may be
 					 * used for new state lookup later.
 					 */
 					DYN_INFO_INIT(&dyn_info);
 				}
 				break;
 
 			default:
 				panic("-- unknown opcode %d\n", cmd->opcode);
 			} /* end of switch() on opcodes */
 			/*
 			 * if we get here with l=0, then match is irrelevant.
 			 */
 
 			if (cmd->len & F_NOT)
 				match = !match;
 
 			if (match) {
 				if (cmd->len & F_OR)
 					skip_or = 1;
 			} else {
 				if (!(cmd->len & F_OR)) /* not an OR block, */
 					break;		/* try next rule    */
 			}
 
 		}	/* end of inner loop, scan opcodes */
 #undef PULLUP_LEN
 #undef PULLUP_LEN_LOCKED
 
 		if (done)
 			break;
 
 /* next_rule:; */	/* try next rule		*/
 
 	}		/* end of outer for, scan rules */
 
 	if (done) {
 		struct ip_fw *rule = chain->map[f_pos];
 		/* Update statistics */
 		IPFW_INC_RULE_COUNTER(rule, pktlen);
 	} else {
 		retval = IP_FW_DENY;
 		printf("ipfw: ouch!, skip past end of rules, denying packet\n");
 	}
 	IPFW_PF_RUNLOCK(chain);
 #ifdef __FreeBSD__
 	if (ucred_cache != NULL)
 		crfree(ucred_cache);
 #endif
 	return (retval);
 
 pullup_failed:
 	if (V_fw_verbose)
 		printf("ipfw: pullup failed\n");
 	return (IP_FW_DENY);
 }
 
 /*
  * Set maximum number of tables that can be used in given VNET ipfw instance.
  */
 #ifdef SYSCTL_NODE
 static int
 sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	unsigned int ntables;
 
 	ntables = V_fw_tables_max;
 
 	error = sysctl_handle_int(oidp, &ntables, 0, req);
 	/* Read operation or some error */
 	if ((error != 0) || (req->newptr == NULL))
 		return (error);
 
 	return (ipfw_resize_tables(&V_layer3_chain, ntables));
 }
 
 /*
  * Switches table namespace between global and per-set.
  */
 static int
 sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	unsigned int sets;
 
 	sets = V_fw_tables_sets;
 
 	error = sysctl_handle_int(oidp, &sets, 0, req);
 	/* Read operation or some error */
 	if ((error != 0) || (req->newptr == NULL))
 		return (error);
 
 	return (ipfw_switch_tables_namespace(&V_layer3_chain, sets));
 }
 #endif
 
 /*
  * Module and VNET glue
  */
 
 /*
  * Stuff that must be initialised only on boot or module load
  */
 static int
 ipfw_init(void)
 {
 	int error = 0;
 
 	/*
  	 * Only print out this stuff the first time around,
 	 * when called from the sysinit code.
 	 */
 	printf("ipfw2 "
 #ifdef INET6
 		"(+ipv6) "
 #endif
 		"initialized, divert %s, nat %s, "
 		"default to %s, logging ",
 #ifdef IPDIVERT
 		"enabled",
 #else
 		"loadable",
 #endif
 #ifdef IPFIREWALL_NAT
 		"enabled",
 #else
 		"loadable",
 #endif
 		default_to_accept ? "accept" : "deny");
 
 	/*
 	 * Note: V_xxx variables can be accessed here but the vnet specific
 	 * initializer may not have been called yet for the VIMAGE case.
 	 * Tuneables will have been processed. We will print out values for
 	 * the default vnet. 
 	 * XXX This should all be rationalized AFTER 8.0
 	 */
 	if (V_fw_verbose == 0)
 		printf("disabled\n");
 	else if (V_verbose_limit == 0)
 		printf("unlimited\n");
 	else
 		printf("limited to %d packets/entry by default\n",
 		    V_verbose_limit);
 
 	/* Check user-supplied table count for validness */
 	if (default_fw_tables > IPFW_TABLES_MAX)
 	  default_fw_tables = IPFW_TABLES_MAX;
 
 	ipfw_init_sopt_handler();
 	ipfw_init_obj_rewriter();
 	ipfw_iface_init();
 	return (error);
 }
 
 /*
  * Called for the removal of the last instance only on module unload.
  */
 static void
 ipfw_destroy(void)
 {
 
 	ipfw_iface_destroy();
 	ipfw_destroy_sopt_handler();
 	ipfw_destroy_obj_rewriter();
 	printf("IP firewall unloaded\n");
 }
 
 /*
  * Stuff that must be initialized for every instance
  * (including the first of course).
  */
 static int
 vnet_ipfw_init(const void *unused)
 {
 	int error, first;
 	struct ip_fw *rule = NULL;
 	struct ip_fw_chain *chain;
 
 	chain = &V_layer3_chain;
 
 	first = IS_DEFAULT_VNET(curvnet) ? 1 : 0;
 
 	/* First set up some values that are compile time options */
 	V_autoinc_step = 100;	/* bounded to 1..1000 in add_rule() */
 	V_fw_deny_unknown_exthdrs = 1;
 #ifdef IPFIREWALL_VERBOSE
 	V_fw_verbose = 1;
 #endif
 #ifdef IPFIREWALL_VERBOSE_LIMIT
 	V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
 #endif
 #ifdef IPFIREWALL_NAT
 	LIST_INIT(&chain->nat);
 #endif
 
 	/* Init shared services hash table */
 	ipfw_init_srv(chain);
 
 	ipfw_init_counters();
 	/* Set initial number of tables */
 	V_fw_tables_max = default_fw_tables;
 	error = ipfw_init_tables(chain, first);
 	if (error) {
 		printf("ipfw2: setting up tables failed\n");
 		free(chain->map, M_IPFW);
 		free(rule, M_IPFW);
 		return (ENOSPC);
 	}
 
 	IPFW_LOCK_INIT(chain);
 
 	/* fill and insert the default rule */
 	rule = ipfw_alloc_rule(chain, sizeof(struct ip_fw));
 	rule->flags |= IPFW_RULE_NOOPT;
 	rule->cmd_len = 1;
 	rule->cmd[0].len = 1;
 	rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
 	chain->default_rule = rule;
 	ipfw_add_protected_rule(chain, rule, 0);
 
 	ipfw_dyn_init(chain);
 	ipfw_eaction_init(chain, first);
 #ifdef LINEAR_SKIPTO
 	ipfw_init_skipto_cache(chain);
 #endif
 	ipfw_bpf_init(first);
 
 	/* First set up some values that are compile time options */
 	V_ipfw_vnet_ready = 1;		/* Open for business */
 
 	/*
 	 * Hook the sockopt handler and pfil hooks for ipv4 and ipv6.
 	 * Even if the latter two fail we still keep the module alive
 	 * because the sockopt and layer2 paths are still useful.
 	 * ipfw[6]_hook return 0 on success, ENOENT on failure,
 	 * so we can ignore the exact return value and just set a flag.
 	 *
 	 * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so
 	 * changes in the underlying (per-vnet) variables trigger
 	 * immediate hook()/unhook() calls.
 	 * In layer2 we have the same behaviour, except that V_ether_ipfw
 	 * is checked on each packet because there are no pfil hooks.
 	 */
 	V_ip_fw_ctl_ptr = ipfw_ctl3;
 	error = ipfw_attach_hooks();
 	return (error);
 }
 
 /*
  * Called for the removal of each instance.
  */
 static int
 vnet_ipfw_uninit(const void *unused)
 {
 	struct ip_fw *reap;
 	struct ip_fw_chain *chain = &V_layer3_chain;
 	int i, last;
 
 	V_ipfw_vnet_ready = 0; /* tell new callers to go away */
 	/*
 	 * disconnect from ipv4, ipv6, layer2 and sockopt.
 	 * Then grab, release and grab again the WLOCK so we make
 	 * sure the update is propagated and nobody will be in.
 	 */
 	ipfw_detach_hooks();
 	V_ip_fw_ctl_ptr = NULL;
 
 	last = IS_DEFAULT_VNET(curvnet) ? 1 : 0;
 
 	IPFW_UH_WLOCK(chain);
 	IPFW_UH_WUNLOCK(chain);
 
 	ipfw_dyn_uninit(0);	/* run the callout_drain */
 
 	IPFW_UH_WLOCK(chain);
 
 	reap = NULL;
 	IPFW_WLOCK(chain);
 	for (i = 0; i < chain->n_rules; i++)
 		ipfw_reap_add(chain, &reap, chain->map[i]);
 	free(chain->map, M_IPFW);
 #ifdef LINEAR_SKIPTO
 	ipfw_destroy_skipto_cache(chain);
 #endif
 	IPFW_WUNLOCK(chain);
 	IPFW_UH_WUNLOCK(chain);
 	ipfw_destroy_tables(chain, last);
 	ipfw_eaction_uninit(chain, last);
 	if (reap != NULL)
 		ipfw_reap_rules(reap);
 	vnet_ipfw_iface_destroy(chain);
 	ipfw_destroy_srv(chain);
 	IPFW_LOCK_DESTROY(chain);
 	ipfw_dyn_uninit(1);	/* free the remaining parts */
 	ipfw_destroy_counters();
 	ipfw_bpf_uninit(last);
 	return (0);
 }
 
 /*
  * Module event handler.
  * In general we have the choice of handling most of these events by the
  * event handler or by the (VNET_)SYS(UN)INIT handlers. I have chosen to
  * use the SYSINIT handlers as they are more capable of expressing the
  * flow of control during module and vnet operations, so this is just
  * a skeleton. Note there is no SYSINIT equivalent of the module
  * SHUTDOWN handler, but we don't have anything to do in that case anyhow.
  */
 static int
 ipfw_modevent(module_t mod, int type, void *unused)
 {
 	int err = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		/* Called once at module load or
 	 	 * system boot if compiled in. */
 		break;
 	case MOD_QUIESCE:
 		/* Called before unload. May veto unloading. */
 		break;
 	case MOD_UNLOAD:
 		/* Called during unload. */
 		break;
 	case MOD_SHUTDOWN:
 		/* Called during system shutdown. */
 		break;
 	default:
 		err = EOPNOTSUPP;
 		break;
 	}
 	return err;
 }
 
 static moduledata_t ipfwmod = {
 	"ipfw",
 	ipfw_modevent,
 	0
 };
 
 /* Define startup order. */
 #define	IPFW_SI_SUB_FIREWALL	SI_SUB_PROTO_FIREWALL
 #define	IPFW_MODEVENT_ORDER	(SI_ORDER_ANY - 255) /* On boot slot in here. */
 #define	IPFW_MODULE_ORDER	(IPFW_MODEVENT_ORDER + 1) /* A little later. */
 #define	IPFW_VNET_ORDER		(IPFW_MODEVENT_ORDER + 2) /* Later still. */
 
 DECLARE_MODULE(ipfw, ipfwmod, IPFW_SI_SUB_FIREWALL, IPFW_MODEVENT_ORDER);
 FEATURE(ipfw_ctl3, "ipfw new sockopt calls");
 MODULE_VERSION(ipfw, 3);
 /* should declare some dependencies here */
 
 /*
  * Starting up. Done in order after ipfwmod() has been called.
  * VNET_SYSINIT is also called for each existing vnet and each new vnet.
  */
 SYSINIT(ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
 	    ipfw_init, NULL);
 VNET_SYSINIT(vnet_ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
 	    vnet_ipfw_init, NULL);
  
 /*
  * Closing up shop. These are done in REVERSE ORDER, but still
  * after ipfwmod() has been called. Not called on reboot.
  * VNET_SYSUNINIT is also called for each exiting vnet as it exits.
  * or when the module is unloaded.
  */
 SYSUNINIT(ipfw_destroy, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
 	    ipfw_destroy, NULL);
 VNET_SYSUNINIT(vnet_ipfw_uninit, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
 	    vnet_ipfw_uninit, NULL);
 /* end of file */
Index: head/sys/netpfil/ipfw/ip_fw_table_algo.c
===================================================================
--- head/sys/netpfil/ipfw/ip_fw_table_algo.c	(revision 362899)
+++ head/sys/netpfil/ipfw/ip_fw_table_algo.c	(revision 362900)
@@ -1,4113 +1,4110 @@
 /*-
  * Copyright (c) 2014 Yandex LLC
  * Copyright (c) 2014 Alexander V. Chernikov
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Lookup table algorithms.
  *
  */
 
 #include "opt_ipfw.h"
 #include "opt_inet.h"
 #ifndef INET
 #error IPFIREWALL requires INET.
 #endif /* INET */
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/queue.h>
 #include <net/if.h>	/* ip_fw.h requires IFNAMSIZ */
 #include <net/radix.h>
 #include <net/route.h>
+#include <net/route/nhop.h>
 #include <net/route/route_var.h>
 
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/ip_var.h>	/* struct ipfw_rule_ref */
 #include <netinet/ip_fw.h>
 #include <netinet6/in6_fib.h>
 
 #include <netpfil/ipfw/ip_fw_private.h>
 #include <netpfil/ipfw/ip_fw_table.h>
 
 
 /*
  * IPFW table lookup algorithms.
  *
  * What is needed to add another table algo?
  *
  * Algo init:
  * * struct table_algo has to be filled with:
  *   name: "type:algoname" format, e.g. "addr:radix". Currently
  *     there are the following types: "addr", "iface", "number" and "flow".
  *   type: one of IPFW_TABLE_* types
  *   flags: one or more TA_FLAGS_*
  *   ta_buf_size: size of structure used to store add/del item state.
  *     Needs to be less than TA_BUF_SZ.
  *   callbacks: see below for description.
  * * ipfw_add_table_algo / ipfw_del_table_algo has to be called
  *
  * Callbacks description:
  *
  * -init: request to initialize new table instance.
  * typedef int (ta_init)(struct ip_fw_chain *ch, void **ta_state,
  *     struct table_info *ti, char *data, uint8_t tflags);
  * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success.
  *
  *  Allocate all structures needed for normal operations.
  *  * Caller may want to parse @data for some algo-specific
  *    options provided by userland.
  *  * Caller may want to save configuration state pointer to @ta_state
  *  * Caller needs to save desired runtime structure pointer(s)
  *    inside @ti fields. Note that it is not correct to save
  *    @ti pointer at this moment. Use -change_ti hook for that.
  *  * Caller has to fill in ti->lookup to appropriate function
  *    pointer.
  *
  *
  *
  * -destroy: request to destroy table instance.
  * typedef void (ta_destroy)(void *ta_state, struct table_info *ti);
  * MANDATORY, unlocked. (M_WAITOK).
  *
  * Frees all table entries and all tables structures allocated by -init.
  *
  *
  *
  * -prepare_add: request to allocate state for adding new entry.
  * typedef int (ta_prepare_add)(struct ip_fw_chain *ch, struct tentry_info *tei,
  *     void *ta_buf);
  * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success.
  *
  * Allocates state and fills it in with all necessary data (EXCEPT value)
  * from @tei to minimize operations needed to be done under WLOCK.
  * "value" field has to be copied to new entry in @add callback.
  * Buffer ta_buf of size ta->ta_buf_sz may be used to store
  * allocated state.
  *
  *
  *
  * -prepare_del: request to set state for deleting existing entry.
  * typedef int (ta_prepare_del)(struct ip_fw_chain *ch, struct tentry_info *tei,
  *     void *ta_buf);
  * MANDATORY, locked, UH. (M_NOWAIT). Returns 0 on success.
  *
  * Buffer ta_buf of size ta->ta_buf_sz may be used to store
  * allocated state. Caller should use on-stack ta_buf allocation
  * instead of doing malloc().
  *
  *
  *
  * -add: request to insert new entry into runtime/config structures.
  *  typedef int (ta_add)(void *ta_state, struct table_info *ti,
  *     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
  * MANDATORY, UH+WLOCK. (M_NOWAIT). Returns 0 on success.
  *
  * Insert new entry using previously-allocated state in @ta_buf.
  * * @tei may have the following flags:
  *   TEI_FLAGS_UPDATE: request to add or update entry.
  *   TEI_FLAGS_DONTADD: request to update (but not add) entry.
  * * Caller is required to do the following:
  *   copy real entry value from @tei
  *   entry added: return 0, set 1 to @pnum
  *   entry updated: return 0, store 0 to @pnum, store old value in @tei,
  *     add TEI_FLAGS_UPDATED flag to @tei.
  *   entry exists: return EEXIST
  *   entry not found: return ENOENT
  *   other error: return non-zero error code.
  *
  *
  *
  * -del: request to delete existing entry from runtime/config structures.
  *  typedef int (ta_del)(void *ta_state, struct table_info *ti,
  *     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
  *  MANDATORY, UH+WLOCK. (M_NOWAIT). Returns 0 on success.
  *
  *  Delete entry using previously set up in @ta_buf.
  * * Caller is required to do the following:
  *   entry deleted: return 0, set 1 to @pnum, store old value in @tei.
  *   entry not found: return ENOENT
  *   other error: return non-zero error code.
  *
  *
  *
  * -flush_entry: flush entry state created by -prepare_add / -del / others
  *  typedef void (ta_flush_entry)(struct ip_fw_chain *ch,
  *      struct tentry_info *tei, void *ta_buf);
  *  MANDATORY, may be locked. (M_NOWAIT).
  *
  *  Delete state allocated by:
  *  -prepare_add (-add returned EEXIST|UPDATED)
  *  -prepare_del (if any)
  *  -del
  *  * Caller is required to handle empty @ta_buf correctly.
  *
  *
  * -find_tentry: finds entry specified by key @tei
  *  typedef int ta_find_tentry(void *ta_state, struct table_info *ti,
  *      ipfw_obj_tentry *tent);
  *  OPTIONAL, locked (UH). (M_NOWAIT). Returns 0 on success.
  *
  *  Finds entry specified by given key.
  *  * Caller is required to do the following:
  *    entry found: returns 0, export entry to @tent
  *    entry not found: returns ENOENT
  *
  *
  * -need_modify: checks if @ti has enough space to hold another @count items.
  *  typedef int (ta_need_modify)(void *ta_state, struct table_info *ti,
  *      uint32_t count, uint64_t *pflags);
  *  OPTIONAL, locked (UH). (M_NOWAIT). Returns 0 if has.
  *
  *  Checks if given table has enough space to add @count items without
  *  resize. Caller may use @pflags to store desired modification data.
  *
  *
  *
  * -prepare_mod: allocate structures for table modification.
  *  typedef int (ta_prepare_mod)(void *ta_buf, uint64_t *pflags);
  * OPTIONAL(need_modify), unlocked. (M_WAITOK). Returns 0 on success.
  *
  * Allocate all needed state for table modification. Caller
  * should use `struct mod_item` to store new state in @ta_buf.
  * Up to TA_BUF_SZ (128 bytes) can be stored in @ta_buf.
  * 
  *
  *
  * -fill_mod: copy some data to new state/
  *  typedef int (ta_fill_mod)(void *ta_state, struct table_info *ti,
  *      void *ta_buf, uint64_t *pflags);
  * OPTIONAL(need_modify), locked (UH). (M_NOWAIT). Returns 0 on success.
  *
  * Copy as much data as we can to minimize changes under WLOCK.
  * For example, array can be merged inside this callback.
  *
  *
  *
  * -modify: perform final modification.
  *  typedef void (ta_modify)(void *ta_state, struct table_info *ti,
  *      void *ta_buf, uint64_t pflags);
  * OPTIONAL(need_modify), locked (UH+WLOCK). (M_NOWAIT). 
  *
  * Performs all changes necessary to switch to new structures.
  * * Caller should save old pointers to @ta_buf storage.
  *
  *
  *
  * -flush_mod: flush table modification state.
  *  typedef void (ta_flush_mod)(void *ta_buf);
  * OPTIONAL(need_modify), unlocked. (M_WAITOK).
  *
  * Performs flush for the following:
  *   - prepare_mod (modification was not necessary)
  *   - modify (for the old state)
  *
  *
  *
  * -change_gi: monitor table info pointer changes
  * typedef void (ta_change_ti)(void *ta_state, struct table_info *ti);
  * OPTIONAL, locked (UH). (M_NOWAIT).
  *
  * Called on @ti pointer changed. Called immediately after -init
  * to set initial state.
  *
  *
  *
  * -foreach: calls @f for each table entry
  *  typedef void ta_foreach(void *ta_state, struct table_info *ti,
  *      ta_foreach_f *f, void *arg);
  * MANDATORY, locked(UH). (M_NOWAIT).
  *
  * Runs callback with specified argument for each table entry,
  * Typically used for dumping table entries.
  *
  *
  *
  * -dump_tentry: dump table entry in current @tentry format.
  *  typedef int ta_dump_tentry(void *ta_state, struct table_info *ti, void *e,
  *      ipfw_obj_tentry *tent);
  * MANDATORY, locked(UH). (M_NOWAIT). Returns 0 on success.
  *
  * Dumps entry @e to @tent.
  *
  *
  * -print_config: prints custom algorithm options into buffer.
  *  typedef void (ta_print_config)(void *ta_state, struct table_info *ti,
  *      char *buf, size_t bufsize);
  * OPTIONAL. locked(UH). (M_NOWAIT).
  *
  * Prints custom algorithm options in the format suitable to pass
  * back to -init callback.
  *
  *
  *
  * -dump_tinfo: dumps algo-specific info.
  *  typedef void ta_dump_tinfo(void *ta_state, struct table_info *ti,
  *      ipfw_ta_tinfo *tinfo);
  * OPTIONAL. locked(UH). (M_NOWAIT).
  *
  * Dumps options like items size/hash size, etc.
  */
 
 MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
 
 /*
  * Utility structures/functions common to more than one algo
  */
 
 struct mod_item {
 	void	*main_ptr;
 	size_t	size;
 	void	*main_ptr6;
 	size_t	size6;
 };
 
 static int badd(const void *key, void *item, void *base, size_t nmemb,
     size_t size, int (*compar) (const void *, const void *));
 static int bdel(const void *key, void *base, size_t nmemb, size_t size,
     int (*compar) (const void *, const void *));
 
 
 /*
  * ADDR implementation using radix
  *
  */
 
 /*
  * The radix code expects addr and mask to be array of bytes,
  * with the first byte being the length of the array. rn_inithead
  * is called with the offset in bits of the lookup key within the
  * array. If we use a sockaddr_in as the underlying type,
  * sin_len is conveniently located at offset 0, sin_addr is at
  * offset 4 and normally aligned.
  * But for portability, let's avoid assumption and make the code explicit
  */
 #define KEY_LEN(v)	*((uint8_t *)&(v))
 /*
  * Do not require radix to compare more than actual IPv4/IPv6 address
  */
 #define KEY_LEN_INET	(offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t))
 #define KEY_LEN_INET6	(offsetof(struct sa_in6, sin6_addr) + sizeof(struct in6_addr))
 
 #define OFF_LEN_INET	(8 * offsetof(struct sockaddr_in, sin_addr))
 #define OFF_LEN_INET6	(8 * offsetof(struct sa_in6, sin6_addr))
 
 struct radix_addr_entry {
 	struct radix_node	rn[2];
 	struct sockaddr_in	addr;
 	uint32_t		value;
 	uint8_t			masklen;
 };
 
 struct sa_in6 {
 	uint8_t			sin6_len;
 	uint8_t			sin6_family;
 	uint8_t			pad[2];
 	struct in6_addr		sin6_addr;
 };
 
 struct radix_addr_xentry {
 	struct radix_node	rn[2];
 	struct sa_in6		addr6;
 	uint32_t		value;
 	uint8_t			masklen;
 };
 
 struct radix_cfg {
 	struct radix_node_head	*head4;
 	struct radix_node_head	*head6;
 	size_t			count4;
 	size_t			count6;
 };
 
 struct ta_buf_radix
 {
 	void *ent_ptr;
 	struct sockaddr	*addr_ptr;
 	struct sockaddr	*mask_ptr;
 	union {
 		struct {
 			struct sockaddr_in sa;
 			struct sockaddr_in ma;
 		} a4;
 		struct {
 			struct sa_in6 sa;
 			struct sa_in6 ma;
 		} a6;
 	} addr;
 };
 
 static int ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val);
 static int ta_init_radix(struct ip_fw_chain *ch, void **ta_state,
     struct table_info *ti, char *data, uint8_t tflags);
 static int flush_radix_entry(struct radix_node *rn, void *arg);
 static void ta_destroy_radix(void *ta_state, struct table_info *ti);
 static void ta_dump_radix_tinfo(void *ta_state, struct table_info *ti,
     ipfw_ta_tinfo *tinfo);
 static int ta_dump_radix_tentry(void *ta_state, struct table_info *ti,
     void *e, ipfw_obj_tentry *tent);
 static int ta_find_radix_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent);
 static void ta_foreach_radix(void *ta_state, struct table_info *ti,
     ta_foreach_f *f, void *arg);
 static void tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa,
     struct sockaddr *ma, int *set_mask);
 static int ta_prepare_add_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_add_radix(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static int ta_prepare_del_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_del_radix(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static void ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_need_modify_radix(void *ta_state, struct table_info *ti,
     uint32_t count, uint64_t *pflags);
 
 static int
 ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val)
 {
 	struct radix_node_head *rnh;
 
 	if (keylen == sizeof(in_addr_t)) {
 		struct radix_addr_entry *ent;
 		struct sockaddr_in sa;
 		KEY_LEN(sa) = KEY_LEN_INET;
 		sa.sin_addr.s_addr = *((in_addr_t *)key);
 		rnh = (struct radix_node_head *)ti->state;
 		ent = (struct radix_addr_entry *)(rnh->rnh_matchaddr(&sa, &rnh->rh));
 		if (ent != NULL) {
 			*val = ent->value;
 			return (1);
 		}
 	} else {
 		struct radix_addr_xentry *xent;
 		struct sa_in6 sa6;
 		KEY_LEN(sa6) = KEY_LEN_INET6;
 		memcpy(&sa6.sin6_addr, key, sizeof(struct in6_addr));
 		rnh = (struct radix_node_head *)ti->xstate;
 		xent = (struct radix_addr_xentry *)(rnh->rnh_matchaddr(&sa6, &rnh->rh));
 		if (xent != NULL) {
 			*val = xent->value;
 			return (1);
 		}
 	}
 
 	return (0);
 }
 
 /*
  * New table
  */
 static int
 ta_init_radix(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
     char *data, uint8_t tflags)
 {
 	struct radix_cfg *cfg;
 
 	if (!rn_inithead(&ti->state, OFF_LEN_INET))
 		return (ENOMEM);
 	if (!rn_inithead(&ti->xstate, OFF_LEN_INET6)) {
 		rn_detachhead(&ti->state);
 		return (ENOMEM);
 	}
 
 	cfg = malloc(sizeof(struct radix_cfg), M_IPFW, M_WAITOK | M_ZERO);
 
 	*ta_state = cfg;
 	ti->lookup = ta_lookup_radix;
 
 	return (0);
 }
 
 static int
 flush_radix_entry(struct radix_node *rn, void *arg)
 {
 	struct radix_node_head * const rnh = arg;
 	struct radix_addr_entry *ent;
 
 	ent = (struct radix_addr_entry *)
 	    rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, &rnh->rh);
 	if (ent != NULL)
 		free(ent, M_IPFW_TBL);
 	return (0);
 }
 
 static void
 ta_destroy_radix(void *ta_state, struct table_info *ti)
 {
 	struct radix_cfg *cfg;
 	struct radix_node_head *rnh;
 
 	cfg = (struct radix_cfg *)ta_state;
 
 	rnh = (struct radix_node_head *)(ti->state);
 	rnh->rnh_walktree(&rnh->rh, flush_radix_entry, rnh);
 	rn_detachhead(&ti->state);
 
 	rnh = (struct radix_node_head *)(ti->xstate);
 	rnh->rnh_walktree(&rnh->rh, flush_radix_entry, rnh);
 	rn_detachhead(&ti->xstate);
 
 	free(cfg, M_IPFW);
 }
 
 /*
  * Provide algo-specific table info
  */
 static void
 ta_dump_radix_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
 {
 	struct radix_cfg *cfg;
 
 	cfg = (struct radix_cfg *)ta_state;
 
 	tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM;
 	tinfo->taclass4 = IPFW_TACLASS_RADIX;
 	tinfo->count4 = cfg->count4;
 	tinfo->itemsize4 = sizeof(struct radix_addr_entry);
 	tinfo->taclass6 = IPFW_TACLASS_RADIX;
 	tinfo->count6 = cfg->count6;
 	tinfo->itemsize6 = sizeof(struct radix_addr_xentry);
 }
 
 static int
 ta_dump_radix_tentry(void *ta_state, struct table_info *ti, void *e,
     ipfw_obj_tentry *tent)
 {
 	struct radix_addr_entry *n;
 #ifdef INET6
 	struct radix_addr_xentry *xn;
 #endif
 
 	n = (struct radix_addr_entry *)e;
 
 	/* Guess IPv4/IPv6 radix by sockaddr family */
 	if (n->addr.sin_family == AF_INET) {
 		tent->k.addr.s_addr = n->addr.sin_addr.s_addr;
 		tent->masklen = n->masklen;
 		tent->subtype = AF_INET;
 		tent->v.kidx = n->value;
 #ifdef INET6
 	} else {
 		xn = (struct radix_addr_xentry *)e;
 		memcpy(&tent->k.addr6, &xn->addr6.sin6_addr,
 		    sizeof(struct in6_addr));
 		tent->masklen = xn->masklen;
 		tent->subtype = AF_INET6;
 		tent->v.kidx = xn->value;
 #endif
 	}
 
 	return (0);
 }
 
 static int
 ta_find_radix_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent)
 {
 	struct radix_node_head *rnh;
 	void *e;
 
 	e = NULL;
 	if (tent->subtype == AF_INET) {
 		struct sockaddr_in sa;
 		KEY_LEN(sa) = KEY_LEN_INET;
 		sa.sin_addr.s_addr = tent->k.addr.s_addr;
 		rnh = (struct radix_node_head *)ti->state;
 		e = rnh->rnh_matchaddr(&sa, &rnh->rh);
 	} else {
 		struct sa_in6 sa6;
 		KEY_LEN(sa6) = KEY_LEN_INET6;
 		memcpy(&sa6.sin6_addr, &tent->k.addr6, sizeof(struct in6_addr));
 		rnh = (struct radix_node_head *)ti->xstate;
 		e = rnh->rnh_matchaddr(&sa6, &rnh->rh);
 	}
 
 	if (e != NULL) {
 		ta_dump_radix_tentry(ta_state, ti, e, tent);
 		return (0);
 	}
 
 	return (ENOENT);
 }
 
 static void
 ta_foreach_radix(void *ta_state, struct table_info *ti, ta_foreach_f *f,
     void *arg)
 {
 	struct radix_node_head *rnh;
 
 	rnh = (struct radix_node_head *)(ti->state);
 	rnh->rnh_walktree(&rnh->rh, (walktree_f_t *)f, arg);
 
 	rnh = (struct radix_node_head *)(ti->xstate);
 	rnh->rnh_walktree(&rnh->rh, (walktree_f_t *)f, arg);
 }
 
 
 #ifdef INET6
 static inline void ipv6_writemask(struct in6_addr *addr6, uint8_t mask);
 
 static inline void
 ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
 {
 	uint32_t *cp;
 
 	for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
 		*cp++ = 0xFFFFFFFF;
 	if (mask > 0)
 		*cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
 }
 #endif
 
 static void
 tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa,
     struct sockaddr *ma, int *set_mask)
 {
 	int mlen;
 #ifdef INET
 	struct sockaddr_in *addr, *mask;
 #endif
 #ifdef INET6
 	struct sa_in6 *addr6, *mask6;
 #endif
 	in_addr_t a4;
 
 	mlen = tei->masklen;
 
 	if (tei->subtype == AF_INET) {
 #ifdef INET
 		addr = (struct sockaddr_in *)sa;
 		mask = (struct sockaddr_in *)ma;
 		/* Set 'total' structure length */
 		KEY_LEN(*addr) = KEY_LEN_INET;
 		KEY_LEN(*mask) = KEY_LEN_INET;
 		addr->sin_family = AF_INET;
 		mask->sin_addr.s_addr =
 		    htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
 		a4 = *((in_addr_t *)tei->paddr);
 		addr->sin_addr.s_addr = a4 & mask->sin_addr.s_addr;
 		if (mlen != 32)
 			*set_mask = 1;
 		else
 			*set_mask = 0;
 #endif
 #ifdef INET6
 	} else if (tei->subtype == AF_INET6) {
 		/* IPv6 case */
 		addr6 = (struct sa_in6 *)sa;
 		mask6 = (struct sa_in6 *)ma;
 		/* Set 'total' structure length */
 		KEY_LEN(*addr6) = KEY_LEN_INET6;
 		KEY_LEN(*mask6) = KEY_LEN_INET6;
 		addr6->sin6_family = AF_INET6;
 		ipv6_writemask(&mask6->sin6_addr, mlen);
 		memcpy(&addr6->sin6_addr, tei->paddr, sizeof(struct in6_addr));
 		APPLY_MASK(&addr6->sin6_addr, &mask6->sin6_addr);
 		if (mlen != 128)
 			*set_mask = 1;
 		else
 			*set_mask = 0;
 #endif
 	}
 }
 
 static int
 ta_prepare_add_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_radix *tb;
 	struct radix_addr_entry *ent;
 #ifdef INET6
 	struct radix_addr_xentry *xent;
 #endif
 	struct sockaddr *addr, *mask;
 	int mlen, set_mask;
 
 	tb = (struct ta_buf_radix *)ta_buf;
 
 	mlen = tei->masklen;
 	set_mask = 0;
 	
 	if (tei->subtype == AF_INET) {
 #ifdef INET
 		if (mlen > 32)
 			return (EINVAL);
 		ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
 		ent->masklen = mlen;
 
 		addr = (struct sockaddr *)&ent->addr;
 		mask = (struct sockaddr *)&tb->addr.a4.ma;
 		tb->ent_ptr = ent;
 #endif
 #ifdef INET6
 	} else if (tei->subtype == AF_INET6) {
 		/* IPv6 case */
 		if (mlen > 128)
 			return (EINVAL);
 		xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
 		xent->masklen = mlen;
 
 		addr = (struct sockaddr *)&xent->addr6;
 		mask = (struct sockaddr *)&tb->addr.a6.ma;
 		tb->ent_ptr = xent;
 #endif
 	} else {
 		/* Unknown CIDR type */
 		return (EINVAL);
 	}
 
 	tei_to_sockaddr_ent(tei, addr, mask, &set_mask);
 	/* Set pointers */
 	tb->addr_ptr = addr;
 	if (set_mask != 0)
 		tb->mask_ptr = mask;
 
 	return (0);
 }
 
 static int
 ta_add_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct radix_cfg *cfg;
 	struct radix_node_head *rnh;
 	struct radix_node *rn;
 	struct ta_buf_radix *tb;
 	uint32_t *old_value, value;
 
 	cfg = (struct radix_cfg *)ta_state;
 	tb = (struct ta_buf_radix *)ta_buf;
 
 	/* Save current entry value from @tei */
 	if (tei->subtype == AF_INET) {
 		rnh = ti->state;
 		((struct radix_addr_entry *)tb->ent_ptr)->value = tei->value;
 	} else {
 		rnh = ti->xstate;
 		((struct radix_addr_xentry *)tb->ent_ptr)->value = tei->value;
 	}
 
 	/* Search for an entry first */
 	rn = rnh->rnh_lookup(tb->addr_ptr, tb->mask_ptr, &rnh->rh);
 	if (rn != NULL) {
 		if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
 			return (EEXIST);
 		/* Record already exists. Update value if we're asked to */
 		if (tei->subtype == AF_INET)
 			old_value = &((struct radix_addr_entry *)rn)->value;
 		else
 			old_value = &((struct radix_addr_xentry *)rn)->value;
 
 		value = *old_value;
 		*old_value = tei->value;
 		tei->value = value;
 
 		/* Indicate that update has happened instead of addition */
 		tei->flags |= TEI_FLAGS_UPDATED;
 		*pnum = 0;
 
 		return (0);
 	}
 
 	if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
 		return (EFBIG);
 
 	rn = rnh->rnh_addaddr(tb->addr_ptr, tb->mask_ptr, &rnh->rh,tb->ent_ptr);
 	if (rn == NULL) {
 		/* Unknown error */
 		return (EINVAL);
 	}
 	
 	if (tei->subtype == AF_INET)
 		cfg->count4++;
 	else
 		cfg->count6++;
 	tb->ent_ptr = NULL;
 	*pnum = 1;
 
 	return (0);
 }
 
 static int
 ta_prepare_del_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_radix *tb;
 	struct sockaddr *addr, *mask;
 	int mlen, set_mask;
 
 	tb = (struct ta_buf_radix *)ta_buf;
 
 	mlen = tei->masklen;
 	set_mask = 0;
 
 	if (tei->subtype == AF_INET) {
 		if (mlen > 32)
 			return (EINVAL);
 
 		addr = (struct sockaddr *)&tb->addr.a4.sa;
 		mask = (struct sockaddr *)&tb->addr.a4.ma;
 #ifdef INET6
 	} else if (tei->subtype == AF_INET6) {
 		if (mlen > 128)
 			return (EINVAL);
 
 		addr = (struct sockaddr *)&tb->addr.a6.sa;
 		mask = (struct sockaddr *)&tb->addr.a6.ma;
 #endif
 	} else
 		return (EINVAL);
 
 	tei_to_sockaddr_ent(tei, addr, mask, &set_mask);
 	tb->addr_ptr = addr;
 	if (set_mask != 0)
 		tb->mask_ptr = mask;
 
 	return (0);
 }
 
 static int
 ta_del_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct radix_cfg *cfg;
 	struct radix_node_head *rnh;
 	struct radix_node *rn;
 	struct ta_buf_radix *tb;
 
 	cfg = (struct radix_cfg *)ta_state;
 	tb = (struct ta_buf_radix *)ta_buf;
 
 	if (tei->subtype == AF_INET)
 		rnh = ti->state;
 	else
 		rnh = ti->xstate;
 
 	rn = rnh->rnh_deladdr(tb->addr_ptr, tb->mask_ptr, &rnh->rh);
 
 	if (rn == NULL)
 		return (ENOENT);
 
 	/* Save entry value to @tei */
 	if (tei->subtype == AF_INET)
 		tei->value = ((struct radix_addr_entry *)rn)->value;
 	else
 		tei->value = ((struct radix_addr_xentry *)rn)->value;
 
 	tb->ent_ptr = rn;
 	
 	if (tei->subtype == AF_INET)
 		cfg->count4--;
 	else
 		cfg->count6--;
 	*pnum = 1;
 
 	return (0);
 }
 
 static void
 ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_radix *tb;
 
 	tb = (struct ta_buf_radix *)ta_buf;
 
 	if (tb->ent_ptr != NULL)
 		free(tb->ent_ptr, M_IPFW_TBL);
 }
 
 static int
 ta_need_modify_radix(void *ta_state, struct table_info *ti, uint32_t count,
     uint64_t *pflags)
 {
 
 	/*
 	 * radix does not require additional memory allocations
 	 * other than nodes itself. Adding new masks to the tree do
 	 * but we don't have any API to call (and we don't known which
 	 * sizes do we need).
 	 */
 	return (0);
 }
 
 struct table_algo addr_radix = {
 	.name		= "addr:radix",
 	.type		= IPFW_TABLE_ADDR,
 	.flags		= TA_FLAG_DEFAULT,
 	.ta_buf_size	= sizeof(struct ta_buf_radix),
 	.init		= ta_init_radix,
 	.destroy	= ta_destroy_radix,
 	.prepare_add	= ta_prepare_add_radix,
 	.prepare_del	= ta_prepare_del_radix,
 	.add		= ta_add_radix,
 	.del		= ta_del_radix,
 	.flush_entry	= ta_flush_radix_entry,
 	.foreach	= ta_foreach_radix,
 	.dump_tentry	= ta_dump_radix_tentry,
 	.find_tentry	= ta_find_radix_tentry,
 	.dump_tinfo	= ta_dump_radix_tinfo,
 	.need_modify	= ta_need_modify_radix,
 };
 
 
 /*
  * addr:hash cmds
  *
  *
  * ti->data:
  * [inv.mask4][inv.mask6][log2hsize4][log2hsize6]
  * [        8][        8[          8][         8]
  *
  * inv.mask4: 32 - mask
  * inv.mask6:
  * 1) _slow lookup: mask
  * 2) _aligned: (128 - mask) / 8
  * 3) _64: 8
  *
  *
  * pflags:
  * [v4=1/v6=0][hsize]
  * [       32][   32]
  */
 
 struct chashentry;
 
 SLIST_HEAD(chashbhead, chashentry);
 
 struct chash_cfg {
 	struct chashbhead *head4;
 	struct chashbhead *head6;
 	size_t	size4;
 	size_t	size6;
 	size_t	items4;
 	size_t	items6;
 	uint8_t	mask4;
 	uint8_t	mask6;
 };
 
 struct chashentry {
 	SLIST_ENTRY(chashentry)	next;
 	uint32_t	value;
 	uint32_t	type;
 	union {
 		uint32_t	a4;	/* Host format */
 		struct in6_addr	a6;	/* Network format */
 	} a;
 };
 
 struct ta_buf_chash
 {
 	void *ent_ptr;
 	struct chashentry ent;
 };
 
 #ifdef INET
 static __inline uint32_t hash_ip(uint32_t addr, int hsize);
 #endif
 #ifdef INET6
 static __inline uint32_t hash_ip6(struct in6_addr *addr6, int hsize);
 static __inline uint16_t hash_ip64(struct in6_addr *addr6, int hsize);
 static __inline uint32_t hash_ip6_slow(struct in6_addr *addr6, void *key,
     int mask, int hsize);
 static __inline uint32_t hash_ip6_al(struct in6_addr *addr6, void *key, int mask,
     int hsize);
 #endif
 static int ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val);
 static int ta_lookup_chash_aligned(struct table_info *ti, void *key,
     uint32_t keylen, uint32_t *val);
 static int ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val);
 static int chash_parse_opts(struct chash_cfg *cfg, char *data);
 static void ta_print_chash_config(void *ta_state, struct table_info *ti,
     char *buf, size_t bufsize);
 static int ta_log2(uint32_t v);
 static int ta_init_chash(struct ip_fw_chain *ch, void **ta_state,
     struct table_info *ti, char *data, uint8_t tflags);
 static void ta_destroy_chash(void *ta_state, struct table_info *ti);
 static void ta_dump_chash_tinfo(void *ta_state, struct table_info *ti,
     ipfw_ta_tinfo *tinfo);
 static int ta_dump_chash_tentry(void *ta_state, struct table_info *ti,
     void *e, ipfw_obj_tentry *tent);
 static uint32_t hash_ent(struct chashentry *ent, int af, int mlen,
     uint32_t size);
 static int tei_to_chash_ent(struct tentry_info *tei, struct chashentry *ent);
 static int ta_find_chash_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent);
 static void ta_foreach_chash(void *ta_state, struct table_info *ti,
     ta_foreach_f *f, void *arg);
 static int ta_prepare_add_chash(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_add_chash(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static int ta_prepare_del_chash(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_del_chash(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static void ta_flush_chash_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_need_modify_chash(void *ta_state, struct table_info *ti,
     uint32_t count, uint64_t *pflags);
 static int ta_prepare_mod_chash(void *ta_buf, uint64_t *pflags);
 static int ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t *pflags);
 static void ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t pflags);
 static void ta_flush_mod_chash(void *ta_buf);
 
 
 #ifdef INET
 static __inline uint32_t
 hash_ip(uint32_t addr, int hsize)
 {
 
 	return (addr % (hsize - 1));
 }
 #endif
 
 #ifdef INET6
 static __inline uint32_t
 hash_ip6(struct in6_addr *addr6, int hsize)
 {
 	uint32_t i;
 
 	i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1] ^
 	    addr6->s6_addr32[2] ^ addr6->s6_addr32[3];
 
 	return (i % (hsize - 1));
 }
 
 
 static __inline uint16_t
 hash_ip64(struct in6_addr *addr6, int hsize)
 {
 	uint32_t i;
 
 	i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1];
 
 	return (i % (hsize - 1));
 }
 
 
 static __inline uint32_t
 hash_ip6_slow(struct in6_addr *addr6, void *key, int mask, int hsize)
 {
 	struct in6_addr mask6;
 
 	ipv6_writemask(&mask6, mask);
 	memcpy(addr6, key, sizeof(struct in6_addr));
 	APPLY_MASK(addr6, &mask6);
 	return (hash_ip6(addr6, hsize));
 }
 
 static __inline uint32_t
 hash_ip6_al(struct in6_addr *addr6, void *key, int mask, int hsize)
 {
 	uint64_t *paddr;
 
 	paddr = (uint64_t *)addr6;
 	*paddr = 0;
 	*(paddr + 1) = 0;
 	memcpy(addr6, key, mask);
 	return (hash_ip6(addr6, hsize));
 }
 #endif
 
 static int
 ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val)
 {
 	struct chashbhead *head;
 	struct chashentry *ent;
 	uint16_t hash, hsize;
 	uint8_t imask;
 
 	if (keylen == sizeof(in_addr_t)) {
 #ifdef INET
 		head = (struct chashbhead *)ti->state;
 		imask = ti->data >> 24;
 		hsize = 1 << ((ti->data & 0xFFFF) >> 8);
 		uint32_t a;
 		a = ntohl(*((in_addr_t *)key));
 		a = a >> imask;
 		hash = hash_ip(a, hsize);
 		SLIST_FOREACH(ent, &head[hash], next) {
 			if (ent->a.a4 == a) {
 				*val = ent->value;
 				return (1);
 			}
 		}
 #endif
 	} else {
 #ifdef INET6
 		/* IPv6: worst scenario: non-round mask */
 		struct in6_addr addr6;
 		head = (struct chashbhead *)ti->xstate;
 		imask = (ti->data & 0xFF0000) >> 16;
 		hsize = 1 << (ti->data & 0xFF);
 		hash = hash_ip6_slow(&addr6, key, imask, hsize);
 		SLIST_FOREACH(ent, &head[hash], next) {
 			if (memcmp(&ent->a.a6, &addr6, 16) == 0) {
 				*val = ent->value;
 				return (1);
 			}
 		}
 #endif
 	}
 
 	return (0);
 }
 
 static int
 ta_lookup_chash_aligned(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val)
 {
 	struct chashbhead *head;
 	struct chashentry *ent;
 	uint16_t hash, hsize;
 	uint8_t imask;
 
 	if (keylen == sizeof(in_addr_t)) {
 #ifdef INET
 		head = (struct chashbhead *)ti->state;
 		imask = ti->data >> 24;
 		hsize = 1 << ((ti->data & 0xFFFF) >> 8);
 		uint32_t a;
 		a = ntohl(*((in_addr_t *)key));
 		a = a >> imask;
 		hash = hash_ip(a, hsize);
 		SLIST_FOREACH(ent, &head[hash], next) {
 			if (ent->a.a4 == a) {
 				*val = ent->value;
 				return (1);
 			}
 		}
 #endif
 	} else {
 #ifdef INET6
 		/* IPv6: aligned to 8bit mask */
 		struct in6_addr addr6;
 		uint64_t *paddr, *ptmp;
 		head = (struct chashbhead *)ti->xstate;
 		imask = (ti->data & 0xFF0000) >> 16;
 		hsize = 1 << (ti->data & 0xFF);
 
 		hash = hash_ip6_al(&addr6, key, imask, hsize);
 		paddr = (uint64_t *)&addr6;
 		SLIST_FOREACH(ent, &head[hash], next) {
 			ptmp = (uint64_t *)&ent->a.a6;
 			if (paddr[0] == ptmp[0] && paddr[1] == ptmp[1]) {
 				*val = ent->value;
 				return (1);
 			}
 		}
 #endif
 	}
 
 	return (0);
 }
 
 static int
 ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val)
 {
 	struct chashbhead *head;
 	struct chashentry *ent;
 	uint16_t hash, hsize;
 	uint8_t imask;
 
 	if (keylen == sizeof(in_addr_t)) {
 #ifdef INET
 		head = (struct chashbhead *)ti->state;
 		imask = ti->data >> 24;
 		hsize = 1 << ((ti->data & 0xFFFF) >> 8);
 		uint32_t a;
 		a = ntohl(*((in_addr_t *)key));
 		a = a >> imask;
 		hash = hash_ip(a, hsize);
 		SLIST_FOREACH(ent, &head[hash], next) {
 			if (ent->a.a4 == a) {
 				*val = ent->value;
 				return (1);
 			}
 		}
 #endif
 	} else {
 #ifdef INET6
 		/* IPv6: /64 */
 		uint64_t a6, *paddr;
 		head = (struct chashbhead *)ti->xstate;
 		paddr = (uint64_t *)key;
 		hsize = 1 << (ti->data & 0xFF);
 		a6 = *paddr;
 		hash = hash_ip64((struct in6_addr *)key, hsize);
 		SLIST_FOREACH(ent, &head[hash], next) {
 			paddr = (uint64_t *)&ent->a.a6;
 			if (a6 == *paddr) {
 				*val = ent->value;
 				return (1);
 			}
 		}
 #endif
 	}
 
 	return (0);
 }
 
 static int
 chash_parse_opts(struct chash_cfg *cfg, char *data)
 {
 	char *pdel, *pend, *s;
 	int mask4, mask6;
 
 	mask4 = cfg->mask4;
 	mask6 = cfg->mask6;
 
 	if (data == NULL)
 		return (0);
 	if ((pdel = strchr(data, ' ')) == NULL)
 		return (0);
 	while (*pdel == ' ')
 		pdel++;
 	if (strncmp(pdel, "masks=", 6) != 0)
 		return (EINVAL);
 	if ((s = strchr(pdel, ' ')) != NULL)
 		*s++ = '\0';
 
 	pdel += 6;
 	/* Need /XX[,/YY] */
 	if (*pdel++ != '/')
 		return (EINVAL);
 	mask4 = strtol(pdel, &pend, 10);
 	if (*pend == ',') {
 		/* ,/YY */
 		pdel = pend + 1;
 		if (*pdel++ != '/')
 			return (EINVAL);
 		mask6 = strtol(pdel, &pend, 10);
 		if (*pend != '\0')
 			return (EINVAL);
 	} else if (*pend != '\0')
 		return (EINVAL);
 
 	if (mask4 < 0 || mask4 > 32 || mask6 < 0 || mask6 > 128)
 		return (EINVAL);
 
 	cfg->mask4 = mask4;
 	cfg->mask6 = mask6;
 
 	return (0);
 }
 
 static void
 ta_print_chash_config(void *ta_state, struct table_info *ti, char *buf,
     size_t bufsize)
 {
 	struct chash_cfg *cfg;
 
 	cfg = (struct chash_cfg *)ta_state;
 
 	if (cfg->mask4 != 32 || cfg->mask6 != 128)
 		snprintf(buf, bufsize, "%s masks=/%d,/%d", "addr:hash",
 		    cfg->mask4, cfg->mask6);
 	else
 		snprintf(buf, bufsize, "%s", "addr:hash");
 }
 
 static int
 ta_log2(uint32_t v)
 {
 	uint32_t r;
 
 	r = 0;
 	while (v >>= 1)
 		r++;
 
 	return (r);
 }
 
 /*
  * New table.
  * We assume 'data' to be either NULL or the following format:
  * 'addr:hash [masks=/32[,/128]]'
  */
 static int
 ta_init_chash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
     char *data, uint8_t tflags)
 {
 	int error, i;
 	uint32_t hsize;
 	struct chash_cfg *cfg;
 
 	cfg = malloc(sizeof(struct chash_cfg), M_IPFW, M_WAITOK | M_ZERO);
 
 	cfg->mask4 = 32;
 	cfg->mask6 = 128;
 
 	if ((error = chash_parse_opts(cfg, data)) != 0) {
 		free(cfg, M_IPFW);
 		return (error);
 	}
 
 	cfg->size4 = 128;
 	cfg->size6 = 128;
 
 	cfg->head4 = malloc(sizeof(struct chashbhead) * cfg->size4, M_IPFW,
 	    M_WAITOK | M_ZERO);
 	cfg->head6 = malloc(sizeof(struct chashbhead) * cfg->size6, M_IPFW,
 	    M_WAITOK | M_ZERO);
 	for (i = 0; i < cfg->size4; i++)
 		SLIST_INIT(&cfg->head4[i]);
 	for (i = 0; i < cfg->size6; i++)
 		SLIST_INIT(&cfg->head6[i]);
 
 
 	*ta_state = cfg;
 	ti->state = cfg->head4;
 	ti->xstate = cfg->head6;
 
 	/* Store data depending on v6 mask length */
 	hsize = ta_log2(cfg->size4) << 8 | ta_log2(cfg->size6);
 	if (cfg->mask6 == 64) {
 		ti->data = (32 - cfg->mask4) << 24 | (128 - cfg->mask6) << 16|
 		    hsize;
 		ti->lookup = ta_lookup_chash_64;
 	} else if ((cfg->mask6  % 8) == 0) {
 		ti->data = (32 - cfg->mask4) << 24 |
 		    cfg->mask6 << 13 | hsize;
 		ti->lookup = ta_lookup_chash_aligned;
 	} else {
 		/* don't do that! */
 		ti->data = (32 - cfg->mask4) << 24 |
 		    cfg->mask6 << 16 | hsize;
 		ti->lookup = ta_lookup_chash_slow;
 	}
 
 	return (0);
 }
 
 static void
 ta_destroy_chash(void *ta_state, struct table_info *ti)
 {
 	struct chash_cfg *cfg;
 	struct chashentry *ent, *ent_next;
 	int i;
 
 	cfg = (struct chash_cfg *)ta_state;
 
 	for (i = 0; i < cfg->size4; i++)
 		SLIST_FOREACH_SAFE(ent, &cfg->head4[i], next, ent_next)
 			free(ent, M_IPFW_TBL);
 
 	for (i = 0; i < cfg->size6; i++)
 		SLIST_FOREACH_SAFE(ent, &cfg->head6[i], next, ent_next)
 			free(ent, M_IPFW_TBL);
 
 	free(cfg->head4, M_IPFW);
 	free(cfg->head6, M_IPFW);
 
 	free(cfg, M_IPFW);
 }
 
 static void
 ta_dump_chash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
 {
 	struct chash_cfg *cfg;
 
 	cfg = (struct chash_cfg *)ta_state;
 
 	tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM;
 	tinfo->taclass4 = IPFW_TACLASS_HASH;
 	tinfo->size4 = cfg->size4;
 	tinfo->count4 = cfg->items4;
 	tinfo->itemsize4 = sizeof(struct chashentry);
 	tinfo->taclass6 = IPFW_TACLASS_HASH;
 	tinfo->size6 = cfg->size6;
 	tinfo->count6 = cfg->items6;
 	tinfo->itemsize6 = sizeof(struct chashentry);
 }
 
 static int
 ta_dump_chash_tentry(void *ta_state, struct table_info *ti, void *e,
     ipfw_obj_tentry *tent)
 {
 	struct chash_cfg *cfg;
 	struct chashentry *ent;
 
 	cfg = (struct chash_cfg *)ta_state;
 	ent = (struct chashentry *)e;
 
 	if (ent->type == AF_INET) {
 		tent->k.addr.s_addr = htonl(ent->a.a4 << (32 - cfg->mask4));
 		tent->masklen = cfg->mask4;
 		tent->subtype = AF_INET;
 		tent->v.kidx = ent->value;
 #ifdef INET6
 	} else {
 		memcpy(&tent->k.addr6, &ent->a.a6, sizeof(struct in6_addr));
 		tent->masklen = cfg->mask6;
 		tent->subtype = AF_INET6;
 		tent->v.kidx = ent->value;
 #endif
 	}
 
 	return (0);
 }
 
 static uint32_t
 hash_ent(struct chashentry *ent, int af, int mlen, uint32_t size)
 {
 	uint32_t hash;
 
 	hash = 0;
 
 	if (af == AF_INET) {
 #ifdef INET
 		hash = hash_ip(ent->a.a4, size);
 #endif
 	} else {
 #ifdef INET6
 		if (mlen == 64)
 			hash = hash_ip64(&ent->a.a6, size);
 		else
 			hash = hash_ip6(&ent->a.a6, size);
 #endif
 	}
 
 	return (hash);
 }
 
 static int
 tei_to_chash_ent(struct tentry_info *tei, struct chashentry *ent)
 {
 	int mlen;
 #ifdef INET6
 	struct in6_addr mask6;
 #endif
 
 
 	mlen = tei->masklen;
 	
 	if (tei->subtype == AF_INET) {
 #ifdef INET
 		if (mlen > 32)
 			return (EINVAL);
 		ent->type = AF_INET;
 
 		/* Calculate masked address */
 		ent->a.a4 = ntohl(*((in_addr_t *)tei->paddr)) >> (32 - mlen);
 #endif
 #ifdef INET6
 	} else if (tei->subtype == AF_INET6) {
 		/* IPv6 case */
 		if (mlen > 128)
 			return (EINVAL);
 		ent->type = AF_INET6;
 
 		ipv6_writemask(&mask6, mlen);
 		memcpy(&ent->a.a6, tei->paddr, sizeof(struct in6_addr));
 		APPLY_MASK(&ent->a.a6, &mask6);
 #endif
 	} else {
 		/* Unknown CIDR type */
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 static int
 ta_find_chash_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent)
 {
 	struct chash_cfg *cfg;
 	struct chashbhead *head;
 	struct chashentry ent, *tmp;
 	struct tentry_info tei;
 	int error;
 	uint32_t hash;
 
 	cfg = (struct chash_cfg *)ta_state;
 
 	memset(&ent, 0, sizeof(ent));
 	memset(&tei, 0, sizeof(tei));
 
 	if (tent->subtype == AF_INET) {
 		tei.paddr = &tent->k.addr;
 		tei.masklen = cfg->mask4;
 		tei.subtype = AF_INET;
 
 		if ((error = tei_to_chash_ent(&tei, &ent)) != 0)
 			return (error);
 
 		head = cfg->head4;
 		hash = hash_ent(&ent, AF_INET, cfg->mask4, cfg->size4);
 		/* Check for existence */
 		SLIST_FOREACH(tmp, &head[hash], next) {
 			if (tmp->a.a4 != ent.a.a4)
 				continue;
 
 			ta_dump_chash_tentry(ta_state, ti, tmp, tent);
 			return (0);
 		}
 	} else {
 		tei.paddr = &tent->k.addr6;
 		tei.masklen = cfg->mask6;
 		tei.subtype = AF_INET6;
 
 		if ((error = tei_to_chash_ent(&tei, &ent)) != 0)
 			return (error);
 
 		head = cfg->head6;
 		hash = hash_ent(&ent, AF_INET6, cfg->mask6, cfg->size6);
 		/* Check for existence */
 		SLIST_FOREACH(tmp, &head[hash], next) {
 			if (memcmp(&tmp->a.a6, &ent.a.a6, 16) != 0)
 				continue;
 			ta_dump_chash_tentry(ta_state, ti, tmp, tent);
 			return (0);
 		}
 	}
 
 	return (ENOENT);
 }
 
 static void
 ta_foreach_chash(void *ta_state, struct table_info *ti, ta_foreach_f *f,
     void *arg)
 {
 	struct chash_cfg *cfg;
 	struct chashentry *ent, *ent_next;
 	int i;
 
 	cfg = (struct chash_cfg *)ta_state;
 
 	for (i = 0; i < cfg->size4; i++)
 		SLIST_FOREACH_SAFE(ent, &cfg->head4[i], next, ent_next)
 			f(ent, arg);
 
 	for (i = 0; i < cfg->size6; i++)
 		SLIST_FOREACH_SAFE(ent, &cfg->head6[i], next, ent_next)
 			f(ent, arg);
 }
 
 static int
 ta_prepare_add_chash(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_chash *tb;
 	struct chashentry *ent;
 	int error;
 
 	tb = (struct ta_buf_chash *)ta_buf;
 
 	ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
 
 	error = tei_to_chash_ent(tei, ent);
 	if (error != 0) {
 		free(ent, M_IPFW_TBL);
 		return (error);
 	}
 	tb->ent_ptr = ent;
 
 	return (0);
 }
 
 static int
 ta_add_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct chash_cfg *cfg;
 	struct chashbhead *head;
 	struct chashentry *ent, *tmp;
 	struct ta_buf_chash *tb;
 	int exists;
 	uint32_t hash, value;
 
 	cfg = (struct chash_cfg *)ta_state;
 	tb = (struct ta_buf_chash *)ta_buf;
 	ent = (struct chashentry *)tb->ent_ptr;
 	hash = 0;
 	exists = 0;
 
 	/* Read current value from @tei */
 	ent->value = tei->value;
 
 	/* Read cuurrent value */
 	if (tei->subtype == AF_INET) {
 		if (tei->masklen != cfg->mask4)
 			return (EINVAL);
 		head = cfg->head4;
 		hash = hash_ent(ent, AF_INET, cfg->mask4, cfg->size4);
 
 		/* Check for existence */
 		SLIST_FOREACH(tmp, &head[hash], next) {
 			if (tmp->a.a4 == ent->a.a4) {
 				exists = 1;
 				break;
 			}
 		}
 	} else {
 		if (tei->masklen != cfg->mask6)
 			return (EINVAL);
 		head = cfg->head6;
 		hash = hash_ent(ent, AF_INET6, cfg->mask6, cfg->size6);
 		/* Check for existence */
 		SLIST_FOREACH(tmp, &head[hash], next) {
 			if (memcmp(&tmp->a.a6, &ent->a.a6, 16) == 0) {
 				exists = 1;
 				break;
 			}
 		}
 	}
 
 	if (exists == 1) {
 		if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
 			return (EEXIST);
 		/* Record already exists. Update value if we're asked to */
 		value = tmp->value;
 		tmp->value = tei->value;
 		tei->value = value;
 		/* Indicate that update has happened instead of addition */
 		tei->flags |= TEI_FLAGS_UPDATED;
 		*pnum = 0;
 	} else {
 		if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
 			return (EFBIG);
 		SLIST_INSERT_HEAD(&head[hash], ent, next);
 		tb->ent_ptr = NULL;
 		*pnum = 1;
 
 		/* Update counters */
 		if (tei->subtype == AF_INET)
 			cfg->items4++;
 		else
 			cfg->items6++;
 	}
 
 	return (0);
 }
 
 static int
 ta_prepare_del_chash(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_chash *tb;
 
 	tb = (struct ta_buf_chash *)ta_buf;
 
 	return (tei_to_chash_ent(tei, &tb->ent));
 }
 
 static int
 ta_del_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct chash_cfg *cfg;
 	struct chashbhead *head;
 	struct chashentry *tmp, *tmp_next, *ent;
 	struct ta_buf_chash *tb;
 	uint32_t hash;
 
 	cfg = (struct chash_cfg *)ta_state;
 	tb = (struct ta_buf_chash *)ta_buf;
 	ent = &tb->ent;
 
 	if (tei->subtype == AF_INET) {
 		if (tei->masklen != cfg->mask4)
 			return (EINVAL);
 		head = cfg->head4;
 		hash = hash_ent(ent, AF_INET, cfg->mask4, cfg->size4);
 
 		SLIST_FOREACH_SAFE(tmp, &head[hash], next, tmp_next) {
 			if (tmp->a.a4 != ent->a.a4)
 				continue;
 
 			SLIST_REMOVE(&head[hash], tmp, chashentry, next);
 			cfg->items4--;
 			tb->ent_ptr = tmp;
 			tei->value = tmp->value;
 			*pnum = 1;
 			return (0);
 		}
 	} else {
 		if (tei->masklen != cfg->mask6)
 			return (EINVAL);
 		head = cfg->head6;
 		hash = hash_ent(ent, AF_INET6, cfg->mask6, cfg->size6);
 		SLIST_FOREACH_SAFE(tmp, &head[hash], next, tmp_next) {
 			if (memcmp(&tmp->a.a6, &ent->a.a6, 16) != 0)
 				continue;
 
 			SLIST_REMOVE(&head[hash], tmp, chashentry, next);
 			cfg->items6--;
 			tb->ent_ptr = tmp;
 			tei->value = tmp->value;
 			*pnum = 1;
 			return (0);
 		}
 	}
 
 	return (ENOENT);
 }
 
 static void
 ta_flush_chash_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_chash *tb;
 
 	tb = (struct ta_buf_chash *)ta_buf;
 
 	if (tb->ent_ptr != NULL)
 		free(tb->ent_ptr, M_IPFW_TBL);
 }
 
 /*
  * Hash growing callbacks.
  */
 
 static int
 ta_need_modify_chash(void *ta_state, struct table_info *ti, uint32_t count,
     uint64_t *pflags)
 {
 	struct chash_cfg *cfg;
 	uint64_t data;
 
 	/*
 	 * Since we don't know exact number of IPv4/IPv6 records in @count,
 	 * ignore non-zero @count value at all. Check current hash sizes
 	 * and return appropriate data.
 	 */
 
 	cfg = (struct chash_cfg *)ta_state;
 
 	data = 0;
 	if (cfg->items4 > cfg->size4 && cfg->size4 < 65536)
 		data |= (cfg->size4 * 2) << 16;
 	if (cfg->items6 > cfg->size6 && cfg->size6 < 65536)
 		data |= cfg->size6 * 2;
 
 	if (data != 0) {
 		*pflags = data;
 		return (1);
 	}
 
 	return (0);
 }
 
 /*
  * Allocate new, larger chash.
  */
 static int
 ta_prepare_mod_chash(void *ta_buf, uint64_t *pflags)
 {
 	struct mod_item *mi;
 	struct chashbhead *head;
 	int i;
 
 	mi = (struct mod_item *)ta_buf;
 
 	memset(mi, 0, sizeof(struct mod_item));
 	mi->size = (*pflags >> 16) & 0xFFFF;
 	mi->size6 = *pflags & 0xFFFF;
 	if (mi->size > 0) {
 		head = malloc(sizeof(struct chashbhead) * mi->size,
 		    M_IPFW, M_WAITOK | M_ZERO);
 		for (i = 0; i < mi->size; i++)
 			SLIST_INIT(&head[i]);
 		mi->main_ptr = head;
 	}
 
 	if (mi->size6 > 0) {
 		head = malloc(sizeof(struct chashbhead) * mi->size6,
 		    M_IPFW, M_WAITOK | M_ZERO);
 		for (i = 0; i < mi->size6; i++)
 			SLIST_INIT(&head[i]);
 		mi->main_ptr6 = head;
 	}
 
 	return (0);
 }
 
 /*
  * Copy data from old runtime array to new one.
  */
 static int
 ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t *pflags)
 {
 
 	/* In is not possible to do rehash if we're not holidng WLOCK. */
 	return (0);
 }
 
 /*
  * Switch old & new arrays.
  */
 static void
 ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t pflags)
 {
 	struct mod_item *mi;
 	struct chash_cfg *cfg;
 	struct chashbhead *old_head, *new_head;
 	struct chashentry *ent, *ent_next;
 	int af, i, mlen;
 	uint32_t nhash;
 	size_t old_size, new_size;
 
 	mi = (struct mod_item *)ta_buf;
 	cfg = (struct chash_cfg *)ta_state;
 
 	/* Check which hash we need to grow and do we still need that */
 	if (mi->size > 0 && cfg->size4 < mi->size) {
 		new_head = (struct chashbhead *)mi->main_ptr;
 		new_size = mi->size;
 		old_size = cfg->size4;
 		old_head = ti->state;
 		mlen = cfg->mask4;
 		af = AF_INET;
 
 		for (i = 0; i < old_size; i++) {
 			SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) {
 				nhash = hash_ent(ent, af, mlen, new_size);
 				SLIST_INSERT_HEAD(&new_head[nhash], ent, next);
 			}
 		}
 
 		ti->state = new_head;
 		cfg->head4 = new_head;
 		cfg->size4 = mi->size;
 		mi->main_ptr = old_head;
 	}
 
 	if (mi->size6 > 0 && cfg->size6 < mi->size6) {
 		new_head = (struct chashbhead *)mi->main_ptr6;
 		new_size = mi->size6;
 		old_size = cfg->size6;
 		old_head = ti->xstate;
 		mlen = cfg->mask6;
 		af = AF_INET6;
 
 		for (i = 0; i < old_size; i++) {
 			SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) {
 				nhash = hash_ent(ent, af, mlen, new_size);
 				SLIST_INSERT_HEAD(&new_head[nhash], ent, next);
 			}
 		}
 
 		ti->xstate = new_head;
 		cfg->head6 = new_head;
 		cfg->size6 = mi->size6;
 		mi->main_ptr6 = old_head;
 	}
 
 	/* Update lower 32 bits with new values */
 	ti->data &= 0xFFFFFFFF00000000;
 	ti->data |= ta_log2(cfg->size4) << 8 | ta_log2(cfg->size6);
 }
 
 /*
  * Free unneded array.
  */
 static void
 ta_flush_mod_chash(void *ta_buf)
 {
 	struct mod_item *mi;
 
 	mi = (struct mod_item *)ta_buf;
 	if (mi->main_ptr != NULL)
 		free(mi->main_ptr, M_IPFW);
 	if (mi->main_ptr6 != NULL)
 		free(mi->main_ptr6, M_IPFW);
 }
 
 struct table_algo addr_hash = {
 	.name		= "addr:hash",
 	.type		= IPFW_TABLE_ADDR,
 	.ta_buf_size	= sizeof(struct ta_buf_chash),
 	.init		= ta_init_chash,
 	.destroy	= ta_destroy_chash,
 	.prepare_add	= ta_prepare_add_chash,
 	.prepare_del	= ta_prepare_del_chash,
 	.add		= ta_add_chash,
 	.del		= ta_del_chash,
 	.flush_entry	= ta_flush_chash_entry,
 	.foreach	= ta_foreach_chash,
 	.dump_tentry	= ta_dump_chash_tentry,
 	.find_tentry	= ta_find_chash_tentry,
 	.print_config	= ta_print_chash_config,
 	.dump_tinfo	= ta_dump_chash_tinfo,
 	.need_modify	= ta_need_modify_chash,
 	.prepare_mod	= ta_prepare_mod_chash,
 	.fill_mod	= ta_fill_mod_chash,
 	.modify		= ta_modify_chash,
 	.flush_mod	= ta_flush_mod_chash,
 };
 
 
 /*
  * Iface table cmds.
  *
  * Implementation:
  *
  * Runtime part:
  * - sorted array of "struct ifidx" pointed by ti->state.
  *   Array is allocated with rounding up to IFIDX_CHUNK. Only existing
  *   interfaces are stored in array, however its allocated size is
  *   sufficient to hold all table records if needed.
  * - current array size is stored in ti->data
  *
  * Table data:
  * - "struct iftable_cfg" is allocated to store table state (ta_state).
  * - All table records are stored inside namedobj instance.
  *
  */
 
 struct ifidx {
 	uint16_t	kidx;
 	uint16_t	spare;
 	uint32_t	value;
 };
 #define	DEFAULT_IFIDX_SIZE	64
 
 struct iftable_cfg;
 
 struct ifentry {
 	struct named_object	no;
 	struct ipfw_ifc		ic;
 	struct iftable_cfg	*icfg;
 	uint32_t		value;
 	int			linked;
 };
 
 struct iftable_cfg {
 	struct namedobj_instance	*ii;
 	struct ip_fw_chain	*ch;
 	struct table_info	*ti;
 	void	*main_ptr;
 	size_t	size;	/* Number of items allocated in array */
 	size_t	count;	/* Number of all items */
 	size_t	used;	/* Number of items _active_ now */
 };
 
 struct ta_buf_ifidx
 {
 	struct ifentry *ife;
 	uint32_t value;
 };
 
 int compare_ifidx(const void *k, const void *v);
 static struct ifidx * ifidx_find(struct table_info *ti, void *key);
 static int ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val);
 static int ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state,
     struct table_info *ti, char *data, uint8_t tflags);
 static void ta_change_ti_ifidx(void *ta_state, struct table_info *ti);
 static int destroy_ifidx_locked(struct namedobj_instance *ii,
     struct named_object *no, void *arg);
 static void ta_destroy_ifidx(void *ta_state, struct table_info *ti);
 static void ta_dump_ifidx_tinfo(void *ta_state, struct table_info *ti,
     ipfw_ta_tinfo *tinfo);
 static int ta_prepare_add_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_add_ifidx(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static int ta_prepare_del_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_del_ifidx(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static void ta_flush_ifidx_entry(struct ip_fw_chain *ch,
     struct tentry_info *tei, void *ta_buf);
 static void if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex);
 static int ta_need_modify_ifidx(void *ta_state, struct table_info *ti,
     uint32_t count, uint64_t *pflags);
 static int ta_prepare_mod_ifidx(void *ta_buf, uint64_t *pflags);
 static int ta_fill_mod_ifidx(void *ta_state, struct table_info *ti,
     void *ta_buf, uint64_t *pflags);
 static void ta_modify_ifidx(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t pflags);
 static void ta_flush_mod_ifidx(void *ta_buf);
 static int ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e,
     ipfw_obj_tentry *tent);
 static int ta_find_ifidx_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent);
 static int foreach_ifidx(struct namedobj_instance *ii, struct named_object *no,
     void *arg);
 static void ta_foreach_ifidx(void *ta_state, struct table_info *ti,
     ta_foreach_f *f, void *arg);
 
 int
 compare_ifidx(const void *k, const void *v)
 {
 	const struct ifidx *ifidx;
 	uint16_t key;
 
 	key = *((const uint16_t *)k);
 	ifidx = (const struct ifidx *)v;
 
 	if (key < ifidx->kidx)
 		return (-1);
 	else if (key > ifidx->kidx)
 		return (1);
 
 	return (0);
 }
 
 /*
  * Adds item @item with key @key into ascending-sorted array @base.
  * Assumes @base has enough additional storage.
  *
  * Returns 1 on success, 0 on duplicate key.
  */
 static int
 badd(const void *key, void *item, void *base, size_t nmemb,
     size_t size, int (*compar) (const void *, const void *))
 {
 	int min, max, mid, shift, res;
 	caddr_t paddr;
 
 	if (nmemb == 0) {
 		memcpy(base, item, size);
 		return (1);
 	}
 
 	/* Binary search */
 	min = 0;
 	max = nmemb - 1;
 	mid = 0;
 	while (min <= max) {
 		mid = (min + max) / 2;
 		res = compar(key, (const void *)((caddr_t)base + mid * size));
 		if (res == 0)
 			return (0);
 
 		if (res > 0)
 			min = mid + 1;
 		else
 			max = mid - 1;
 	}
 
 	/* Item not found. */
 	res = compar(key, (const void *)((caddr_t)base + mid * size));
 	if (res > 0)
 		shift = mid + 1;
 	else
 		shift = mid;
 
 	paddr = (caddr_t)base + shift * size;
 	if (nmemb > shift)
 		memmove(paddr + size, paddr, (nmemb - shift) * size);
 
 	memcpy(paddr, item, size);
 
 	return (1);
 }
 
 /*
  * Deletes item with key @key from ascending-sorted array @base.
  *
  * Returns 1 on success, 0 for non-existent key.
  */
 static int
 bdel(const void *key, void *base, size_t nmemb, size_t size,
     int (*compar) (const void *, const void *))
 {
 	caddr_t item;
 	size_t sz;
 
 	item = (caddr_t)bsearch(key, base, nmemb, size, compar);
 
 	if (item == NULL)
 		return (0);
 
 	sz = (caddr_t)base + nmemb * size - item;
 
 	if (sz > 0)
 		memmove(item, item + size, sz);
 
 	return (1);
 }
 
 static struct ifidx *
 ifidx_find(struct table_info *ti, void *key)
 {
 	struct ifidx *ifi;
 
 	ifi = bsearch(key, ti->state, ti->data, sizeof(struct ifidx),
 	    compare_ifidx);
 
 	return (ifi);
 }
 
 static int
 ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val)
 {
 	struct ifidx *ifi;
 
 	ifi = ifidx_find(ti, key);
 
 	if (ifi != NULL) {
 		*val = ifi->value;
 		return (1);
 	}
 
 	return (0);
 }
 
 static int
 ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
     char *data, uint8_t tflags)
 {
 	struct iftable_cfg *icfg;
 
 	icfg = malloc(sizeof(struct iftable_cfg), M_IPFW, M_WAITOK | M_ZERO);
 
 	icfg->ii = ipfw_objhash_create(DEFAULT_IFIDX_SIZE);
 	icfg->size = DEFAULT_IFIDX_SIZE;
 	icfg->main_ptr = malloc(sizeof(struct ifidx) * icfg->size, M_IPFW,
 	    M_WAITOK | M_ZERO);
 	icfg->ch = ch;
 
 	*ta_state = icfg;
 	ti->state = icfg->main_ptr;
 	ti->lookup = ta_lookup_ifidx;
 
 	return (0);
 }
 
 /*
  * Handle tableinfo @ti pointer change (on table array resize).
  */
 static void
 ta_change_ti_ifidx(void *ta_state, struct table_info *ti)
 {
 	struct iftable_cfg *icfg;
 
 	icfg = (struct iftable_cfg *)ta_state;
 	icfg->ti = ti;
 }
 
 static int
 destroy_ifidx_locked(struct namedobj_instance *ii, struct named_object *no,
     void *arg)
 {
 	struct ifentry *ife;
 	struct ip_fw_chain *ch;
 
 	ch = (struct ip_fw_chain *)arg;
 	ife = (struct ifentry *)no;
 
 	ipfw_iface_del_notify(ch, &ife->ic);
 	ipfw_iface_unref(ch, &ife->ic);
 	free(ife, M_IPFW_TBL);
 	return (0);
 }
 
 
 /*
  * Destroys table @ti
  */
 static void
 ta_destroy_ifidx(void *ta_state, struct table_info *ti)
 {
 	struct iftable_cfg *icfg;
 	struct ip_fw_chain *ch;
 
 	icfg = (struct iftable_cfg *)ta_state;
 	ch = icfg->ch;
 
 	if (icfg->main_ptr != NULL)
 		free(icfg->main_ptr, M_IPFW);
 
 	IPFW_UH_WLOCK(ch);
 	ipfw_objhash_foreach(icfg->ii, destroy_ifidx_locked, ch);
 	IPFW_UH_WUNLOCK(ch);
 
 	ipfw_objhash_destroy(icfg->ii);
 
 	free(icfg, M_IPFW);
 }
 
 /*
  * Provide algo-specific table info
  */
 static void
 ta_dump_ifidx_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
 {
 	struct iftable_cfg *cfg;
 
 	cfg = (struct iftable_cfg *)ta_state;
 
 	tinfo->taclass4 = IPFW_TACLASS_ARRAY;
 	tinfo->size4 = cfg->size;
 	tinfo->count4 = cfg->used;
 	tinfo->itemsize4 = sizeof(struct ifidx);
 }
 
 /*
  * Prepare state to add to the table:
  * allocate ifentry and reference needed interface.
  */
 static int
 ta_prepare_add_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_ifidx *tb;
 	char *ifname;
 	struct ifentry *ife;
 
 	tb = (struct ta_buf_ifidx *)ta_buf;
 
 	/* Check if string is terminated */
 	ifname = (char *)tei->paddr;
 	if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE)
 		return (EINVAL);
 
 	ife = malloc(sizeof(struct ifentry), M_IPFW_TBL, M_WAITOK | M_ZERO);
 	ife->ic.cb = if_notifier;
 	ife->ic.cbdata = ife;
 
 	if (ipfw_iface_ref(ch, ifname, &ife->ic) != 0) {
 		free(ife, M_IPFW_TBL);
 		return (EINVAL);
 	}
 
 	/* Use ipfw_iface 'ifname' field as stable storage */
 	ife->no.name = ife->ic.iface->ifname;
 
 	tb->ife = ife;
 
 	return (0);
 }
 
 static int
 ta_add_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct iftable_cfg *icfg;
 	struct ifentry *ife, *tmp;
 	struct ta_buf_ifidx *tb;
 	struct ipfw_iface *iif;
 	struct ifidx *ifi;
 	char *ifname;
 	uint32_t value;
 
 	tb = (struct ta_buf_ifidx *)ta_buf;
 	ifname = (char *)tei->paddr;
 	icfg = (struct iftable_cfg *)ta_state;
 	ife = tb->ife;
 
 	ife->icfg = icfg;
 	ife->value = tei->value;
 
 	tmp = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname);
 
 	if (tmp != NULL) {
 		if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
 			return (EEXIST);
 
 		/* Exchange values in @tmp and @tei */
 		value = tmp->value;
 		tmp->value = tei->value;
 		tei->value = value;
 
 		iif = tmp->ic.iface;
 		if (iif->resolved != 0) {
 			/* We have to update runtime value, too */
 			ifi = ifidx_find(ti, &iif->ifindex);
 			ifi->value = ife->value;
 		}
 
 		/* Indicate that update has happened instead of addition */
 		tei->flags |= TEI_FLAGS_UPDATED;
 		*pnum = 0;
 		return (0);
 	}
 
 	if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
 		return (EFBIG);
 
 	/* Link to internal list */
 	ipfw_objhash_add(icfg->ii, &ife->no);
 
 	/* Link notifier (possible running its callback) */
 	ipfw_iface_add_notify(icfg->ch, &ife->ic);
 	icfg->count++;
 
 	tb->ife = NULL;
 	*pnum = 1;
 
 	return (0);
 }
 
 /*
  * Prepare to delete key from table.
  * Do basic interface name checks.
  */
 static int
 ta_prepare_del_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_ifidx *tb;
 	char *ifname;
 
 	tb = (struct ta_buf_ifidx *)ta_buf;
 
 	/* Check if string is terminated */
 	ifname = (char *)tei->paddr;
 	if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE)
 		return (EINVAL);
 
 	return (0);
 }
 
 /*
  * Remove key from both configuration list and
  * runtime array. Removed interface notification.
  */
 static int
 ta_del_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct iftable_cfg *icfg;
 	struct ifentry *ife;
 	struct ta_buf_ifidx *tb;
 	char *ifname;
 	uint16_t ifindex;
 	int res;
 
 	tb = (struct ta_buf_ifidx *)ta_buf;
 	ifname = (char *)tei->paddr;
 	icfg = (struct iftable_cfg *)ta_state;
 
 	ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname);
 
 	if (ife == NULL)
 		return (ENOENT);
 
 	if (ife->linked != 0) {
 		/* We have to remove item from runtime */
 		ifindex = ife->ic.iface->ifindex;
 
 		res = bdel(&ifindex, icfg->main_ptr, icfg->used,
 		    sizeof(struct ifidx), compare_ifidx);
 
 		KASSERT(res == 1, ("index %d does not exist", ifindex));
 		icfg->used--;
 		ti->data = icfg->used;
 		ife->linked = 0;
 	}
 
 	/* Unlink from local list */
 	ipfw_objhash_del(icfg->ii, &ife->no);
 	/* Unlink notifier and deref */
 	ipfw_iface_del_notify(icfg->ch, &ife->ic);
 	ipfw_iface_unref(icfg->ch, &ife->ic);
 
 	icfg->count--;
 	tei->value = ife->value;
 
 	tb->ife = ife;
 	*pnum = 1;
 
 	return (0);
 }
 
 /*
  * Flush deleted entry.
  * Drops interface reference and frees entry.
  */
 static void
 ta_flush_ifidx_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_ifidx *tb;
 
 	tb = (struct ta_buf_ifidx *)ta_buf;
 
 	if (tb->ife != NULL)
 		free(tb->ife, M_IPFW_TBL);
 }
 
 
 /*
  * Handle interface announce/withdrawal for particular table.
  * Every real runtime array modification happens here.
  */
 static void
 if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex)
 {
 	struct ifentry *ife;
 	struct ifidx ifi;
 	struct iftable_cfg *icfg;
 	struct table_info *ti;
 	int res;
 
 	ife = (struct ifentry *)cbdata;
 	icfg = ife->icfg;
 	ti = icfg->ti;
 
 	KASSERT(ti != NULL, ("ti=NULL, check change_ti handler"));
 
 	if (ife->linked == 0 && ifindex != 0) {
 		/* Interface announce */
 		ifi.kidx = ifindex;
 		ifi.spare = 0;
 		ifi.value = ife->value;
 		res = badd(&ifindex, &ifi, icfg->main_ptr, icfg->used,
 		    sizeof(struct ifidx), compare_ifidx);
 		KASSERT(res == 1, ("index %d already exists", ifindex));
 		icfg->used++;
 		ti->data = icfg->used;
 		ife->linked = 1;
 	} else if (ife->linked != 0 && ifindex == 0) {
 		/* Interface withdrawal */
 		ifindex = ife->ic.iface->ifindex;
 
 		res = bdel(&ifindex, icfg->main_ptr, icfg->used,
 		    sizeof(struct ifidx), compare_ifidx);
 
 		KASSERT(res == 1, ("index %d does not exist", ifindex));
 		icfg->used--;
 		ti->data = icfg->used;
 		ife->linked = 0;
 	}
 }
 
 
 /*
  * Table growing callbacks.
  */
 
 static int
 ta_need_modify_ifidx(void *ta_state, struct table_info *ti, uint32_t count,
     uint64_t *pflags)
 {
 	struct iftable_cfg *cfg;
 	uint32_t size;
 
 	cfg = (struct iftable_cfg *)ta_state;
 
 	size = cfg->size;
 	while (size < cfg->count + count)
 		size *= 2;
 
 	if (size != cfg->size) {
 		*pflags = size;
 		return (1);
 	}
 
 	return (0);
 }
 
 /*
  * Allocate ned, larger runtime ifidx array.
  */
 static int
 ta_prepare_mod_ifidx(void *ta_buf, uint64_t *pflags)
 {
 	struct mod_item *mi;
 
 	mi = (struct mod_item *)ta_buf;
 
 	memset(mi, 0, sizeof(struct mod_item));
 	mi->size = *pflags;
 	mi->main_ptr = malloc(sizeof(struct ifidx) * mi->size, M_IPFW,
 	    M_WAITOK | M_ZERO);
 
 	return (0);
 }
 
 /*
  * Copy data from old runtime array to new one.
  */
 static int
 ta_fill_mod_ifidx(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t *pflags)
 {
 	struct mod_item *mi;
 	struct iftable_cfg *icfg;
 
 	mi = (struct mod_item *)ta_buf;
 	icfg = (struct iftable_cfg *)ta_state;
 
 	/* Check if we still need to grow array */
 	if (icfg->size >= mi->size) {
 		*pflags = 0;
 		return (0);
 	}
 
 	memcpy(mi->main_ptr, icfg->main_ptr, icfg->used * sizeof(struct ifidx));
 
 	return (0);
 }
 
 /*
  * Switch old & new arrays.
  */
 static void
 ta_modify_ifidx(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t pflags)
 {
 	struct mod_item *mi;
 	struct iftable_cfg *icfg;
 	void *old_ptr;
 
 	mi = (struct mod_item *)ta_buf;
 	icfg = (struct iftable_cfg *)ta_state;
 
 	old_ptr = icfg->main_ptr;
 	icfg->main_ptr = mi->main_ptr;
 	icfg->size = mi->size;
 	ti->state = icfg->main_ptr;
 
 	mi->main_ptr = old_ptr;
 }
 
 /*
  * Free unneded array.
  */
 static void
 ta_flush_mod_ifidx(void *ta_buf)
 {
 	struct mod_item *mi;
 
 	mi = (struct mod_item *)ta_buf;
 	if (mi->main_ptr != NULL)
 		free(mi->main_ptr, M_IPFW);
 }
 
 static int
 ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e,
     ipfw_obj_tentry *tent)
 {
 	struct ifentry *ife;
 
 	ife = (struct ifentry *)e;
 
 	tent->masklen = 8 * IF_NAMESIZE;
 	memcpy(&tent->k, ife->no.name, IF_NAMESIZE);
 	tent->v.kidx = ife->value;
 
 	return (0);
 }
 
 static int
 ta_find_ifidx_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent)
 {
 	struct iftable_cfg *icfg;
 	struct ifentry *ife;
 	char *ifname;
 
 	icfg = (struct iftable_cfg *)ta_state;
 	ifname = tent->k.iface;
 
 	if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE)
 		return (EINVAL);
 
 	ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname);
 
 	if (ife != NULL) {
 		ta_dump_ifidx_tentry(ta_state, ti, ife, tent);
 		return (0);
 	}
 
 	return (ENOENT);
 }
 
 struct wa_ifidx {
 	ta_foreach_f	*f;
 	void		*arg;
 };
 
 static int
 foreach_ifidx(struct namedobj_instance *ii, struct named_object *no,
     void *arg)
 {
 	struct ifentry *ife;
 	struct wa_ifidx *wa;
 
 	ife = (struct ifentry *)no;
 	wa = (struct wa_ifidx *)arg;
 
 	wa->f(ife, wa->arg);
 	return (0);
 }
 
 static void
 ta_foreach_ifidx(void *ta_state, struct table_info *ti, ta_foreach_f *f,
     void *arg)
 {
 	struct iftable_cfg *icfg;
 	struct wa_ifidx wa;
 
 	icfg = (struct iftable_cfg *)ta_state;
 
 	wa.f = f;
 	wa.arg = arg;
 
 	ipfw_objhash_foreach(icfg->ii, foreach_ifidx, &wa);
 }
 
 struct table_algo iface_idx = {
 	.name		= "iface:array",
 	.type		= IPFW_TABLE_INTERFACE,
 	.flags		= TA_FLAG_DEFAULT,
 	.ta_buf_size	= sizeof(struct ta_buf_ifidx),
 	.init		= ta_init_ifidx,
 	.destroy	= ta_destroy_ifidx,
 	.prepare_add	= ta_prepare_add_ifidx,
 	.prepare_del	= ta_prepare_del_ifidx,
 	.add		= ta_add_ifidx,
 	.del		= ta_del_ifidx,
 	.flush_entry	= ta_flush_ifidx_entry,
 	.foreach	= ta_foreach_ifidx,
 	.dump_tentry	= ta_dump_ifidx_tentry,
 	.find_tentry	= ta_find_ifidx_tentry,
 	.dump_tinfo	= ta_dump_ifidx_tinfo,
 	.need_modify	= ta_need_modify_ifidx,
 	.prepare_mod	= ta_prepare_mod_ifidx,
 	.fill_mod	= ta_fill_mod_ifidx,
 	.modify		= ta_modify_ifidx,
 	.flush_mod	= ta_flush_mod_ifidx,
 	.change_ti	= ta_change_ti_ifidx,
 };
 
 /*
  * Number array cmds.
  *
  * Implementation:
  *
  * Runtime part:
  * - sorted array of "struct numarray" pointed by ti->state.
  *   Array is allocated with rounding up to NUMARRAY_CHUNK.
  * - current array size is stored in ti->data
  *
  */
 
 struct numarray {
 	uint32_t	number;
 	uint32_t	value;
 };
 
 struct numarray_cfg {
 	void	*main_ptr;
 	size_t	size;	/* Number of items allocated in array */
 	size_t	used;	/* Number of items _active_ now */
 };
 
 struct ta_buf_numarray
 {
 	struct numarray na;
 };
 
 int compare_numarray(const void *k, const void *v);
 static struct numarray *numarray_find(struct table_info *ti, void *key);
 static int ta_lookup_numarray(struct table_info *ti, void *key,
     uint32_t keylen, uint32_t *val);
 static int ta_init_numarray(struct ip_fw_chain *ch, void **ta_state,
     struct table_info *ti, char *data, uint8_t tflags);
 static void ta_destroy_numarray(void *ta_state, struct table_info *ti);
 static void ta_dump_numarray_tinfo(void *ta_state, struct table_info *ti,
     ipfw_ta_tinfo *tinfo);
 static int ta_prepare_add_numarray(struct ip_fw_chain *ch,
     struct tentry_info *tei, void *ta_buf);
 static int ta_add_numarray(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static int ta_del_numarray(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static void ta_flush_numarray_entry(struct ip_fw_chain *ch,
     struct tentry_info *tei, void *ta_buf);
 static int ta_need_modify_numarray(void *ta_state, struct table_info *ti,
     uint32_t count, uint64_t *pflags);
 static int ta_prepare_mod_numarray(void *ta_buf, uint64_t *pflags);
 static int ta_fill_mod_numarray(void *ta_state, struct table_info *ti,
     void *ta_buf, uint64_t *pflags);
 static void ta_modify_numarray(void *ta_state, struct table_info *ti,
     void *ta_buf, uint64_t pflags);
 static void ta_flush_mod_numarray(void *ta_buf);
 static int ta_dump_numarray_tentry(void *ta_state, struct table_info *ti,
     void *e, ipfw_obj_tentry *tent);
 static int ta_find_numarray_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent);
 static void ta_foreach_numarray(void *ta_state, struct table_info *ti,
     ta_foreach_f *f, void *arg);
 
 int
 compare_numarray(const void *k, const void *v)
 {
 	const struct numarray *na;
 	uint32_t key;
 
 	key = *((const uint32_t *)k);
 	na = (const struct numarray *)v;
 
 	if (key < na->number)
 		return (-1);
 	else if (key > na->number)
 		return (1);
 
 	return (0);
 }
 
 static struct numarray *
 numarray_find(struct table_info *ti, void *key)
 {
 	struct numarray *ri;
 
 	ri = bsearch(key, ti->state, ti->data, sizeof(struct numarray),
 	    compare_ifidx);
 
 	return (ri);
 }
 
 static int
 ta_lookup_numarray(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val)
 {
 	struct numarray *ri;
 
 	ri = numarray_find(ti, key);
 
 	if (ri != NULL) {
 		*val = ri->value;
 		return (1);
 	}
 
 	return (0);
 }
 
 static int
 ta_init_numarray(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
     char *data, uint8_t tflags)
 {
 	struct numarray_cfg *cfg;
 
 	cfg = malloc(sizeof(*cfg), M_IPFW, M_WAITOK | M_ZERO);
 
 	cfg->size = 16;
 	cfg->main_ptr = malloc(sizeof(struct numarray) * cfg->size, M_IPFW,
 	    M_WAITOK | M_ZERO);
 
 	*ta_state = cfg;
 	ti->state = cfg->main_ptr;
 	ti->lookup = ta_lookup_numarray;
 
 	return (0);
 }
 
 /*
  * Destroys table @ti
  */
 static void
 ta_destroy_numarray(void *ta_state, struct table_info *ti)
 {
 	struct numarray_cfg *cfg;
 
 	cfg = (struct numarray_cfg *)ta_state;
 
 	if (cfg->main_ptr != NULL)
 		free(cfg->main_ptr, M_IPFW);
 
 	free(cfg, M_IPFW);
 }
 
 /*
  * Provide algo-specific table info
  */
 static void
 ta_dump_numarray_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
 {
 	struct numarray_cfg *cfg;
 
 	cfg = (struct numarray_cfg *)ta_state;
 
 	tinfo->taclass4 = IPFW_TACLASS_ARRAY;
 	tinfo->size4 = cfg->size;
 	tinfo->count4 = cfg->used;
 	tinfo->itemsize4 = sizeof(struct numarray);
 }
 
 /*
  * Prepare for addition/deletion to an array.
  */
 static int
 ta_prepare_add_numarray(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_numarray *tb;
 
 	tb = (struct ta_buf_numarray *)ta_buf;
 
 	tb->na.number = *((uint32_t *)tei->paddr);
 
 	return (0);
 }
 
 static int
 ta_add_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct numarray_cfg *cfg;
 	struct ta_buf_numarray *tb;
 	struct numarray *ri;
 	int res;
 	uint32_t value;
 
 	tb = (struct ta_buf_numarray *)ta_buf;
 	cfg = (struct numarray_cfg *)ta_state;
 
 	/* Read current value from @tei */
 	tb->na.value = tei->value;
 
 	ri = numarray_find(ti, &tb->na.number);
 	
 	if (ri != NULL) {
 		if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
 			return (EEXIST);
 
 		/* Exchange values between ri and @tei */
 		value = ri->value;
 		ri->value = tei->value;
 		tei->value = value;
 		/* Indicate that update has happened instead of addition */
 		tei->flags |= TEI_FLAGS_UPDATED;
 		*pnum = 0;
 		return (0);
 	}
 
 	if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
 		return (EFBIG);
 
 	res = badd(&tb->na.number, &tb->na, cfg->main_ptr, cfg->used,
 	    sizeof(struct numarray), compare_numarray);
 
 	KASSERT(res == 1, ("number %d already exists", tb->na.number));
 	cfg->used++;
 	ti->data = cfg->used;
 	*pnum = 1;
 
 	return (0);
 }
 
 /*
  * Remove key from both configuration list and
  * runtime array. Removed interface notification.
  */
 static int
 ta_del_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct numarray_cfg *cfg;
 	struct ta_buf_numarray *tb;
 	struct numarray *ri;
 	int res;
 
 	tb = (struct ta_buf_numarray *)ta_buf;
 	cfg = (struct numarray_cfg *)ta_state;
 
 	ri = numarray_find(ti, &tb->na.number);
 	if (ri == NULL)
 		return (ENOENT);
 
 	tei->value = ri->value;
 	
 	res = bdel(&tb->na.number, cfg->main_ptr, cfg->used,
 	    sizeof(struct numarray), compare_numarray);
 
 	KASSERT(res == 1, ("number %u does not exist", tb->na.number));
 	cfg->used--;
 	ti->data = cfg->used;
 	*pnum = 1;
 
 	return (0);
 }
 
 static void
 ta_flush_numarray_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 
 	/* We don't have any state, do nothing */
 }
 
 
 /*
  * Table growing callbacks.
  */
 
 static int
 ta_need_modify_numarray(void *ta_state, struct table_info *ti, uint32_t count,
     uint64_t *pflags)
 {
 	struct numarray_cfg *cfg;
 	size_t size;
 
 	cfg = (struct numarray_cfg *)ta_state;
 
 	size = cfg->size;
 	while (size < cfg->used + count)
 		size *= 2;
 
 	if (size != cfg->size) {
 		*pflags = size;
 		return (1);
 	}
 
 	return (0);
 }
 
 /*
  * Allocate new, larger runtime array.
  */
 static int
 ta_prepare_mod_numarray(void *ta_buf, uint64_t *pflags)
 {
 	struct mod_item *mi;
 
 	mi = (struct mod_item *)ta_buf;
 
 	memset(mi, 0, sizeof(struct mod_item));
 	mi->size = *pflags;
 	mi->main_ptr = malloc(sizeof(struct numarray) * mi->size, M_IPFW,
 	    M_WAITOK | M_ZERO);
 
 	return (0);
 }
 
 /*
  * Copy data from old runtime array to new one.
  */
 static int
 ta_fill_mod_numarray(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t *pflags)
 {
 	struct mod_item *mi;
 	struct numarray_cfg *cfg;
 
 	mi = (struct mod_item *)ta_buf;
 	cfg = (struct numarray_cfg *)ta_state;
 
 	/* Check if we still need to grow array */
 	if (cfg->size >= mi->size) {
 		*pflags = 0;
 		return (0);
 	}
 
 	memcpy(mi->main_ptr, cfg->main_ptr, cfg->used * sizeof(struct numarray));
 
 	return (0);
 }
 
 /*
  * Switch old & new arrays.
  */
 static void
 ta_modify_numarray(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t pflags)
 {
 	struct mod_item *mi;
 	struct numarray_cfg *cfg;
 	void *old_ptr;
 
 	mi = (struct mod_item *)ta_buf;
 	cfg = (struct numarray_cfg *)ta_state;
 
 	old_ptr = cfg->main_ptr;
 	cfg->main_ptr = mi->main_ptr;
 	cfg->size = mi->size;
 	ti->state = cfg->main_ptr;
 
 	mi->main_ptr = old_ptr;
 }
 
 /*
  * Free unneded array.
  */
 static void
 ta_flush_mod_numarray(void *ta_buf)
 {
 	struct mod_item *mi;
 
 	mi = (struct mod_item *)ta_buf;
 	if (mi->main_ptr != NULL)
 		free(mi->main_ptr, M_IPFW);
 }
 
 static int
 ta_dump_numarray_tentry(void *ta_state, struct table_info *ti, void *e,
     ipfw_obj_tentry *tent)
 {
 	struct numarray *na;
 
 	na = (struct numarray *)e;
 
 	tent->k.key = na->number;
 	tent->v.kidx = na->value;
 
 	return (0);
 }
 
 static int
 ta_find_numarray_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent)
 {
 	struct numarray_cfg *cfg;
 	struct numarray *ri;
 
 	cfg = (struct numarray_cfg *)ta_state;
 
 	ri = numarray_find(ti, &tent->k.key);
 
 	if (ri != NULL) {
 		ta_dump_numarray_tentry(ta_state, ti, ri, tent);
 		return (0);
 	}
 
 	return (ENOENT);
 }
 
 static void
 ta_foreach_numarray(void *ta_state, struct table_info *ti, ta_foreach_f *f,
     void *arg)
 {
 	struct numarray_cfg *cfg;
 	struct numarray *array;
 	int i;
 
 	cfg = (struct numarray_cfg *)ta_state;
 	array = cfg->main_ptr;
 
 	for (i = 0; i < cfg->used; i++)
 		f(&array[i], arg);
 }
 
 struct table_algo number_array = {
 	.name		= "number:array",
 	.type		= IPFW_TABLE_NUMBER,
 	.ta_buf_size	= sizeof(struct ta_buf_numarray),
 	.init		= ta_init_numarray,
 	.destroy	= ta_destroy_numarray,
 	.prepare_add	= ta_prepare_add_numarray,
 	.prepare_del	= ta_prepare_add_numarray,
 	.add		= ta_add_numarray,
 	.del		= ta_del_numarray,
 	.flush_entry	= ta_flush_numarray_entry,
 	.foreach	= ta_foreach_numarray,
 	.dump_tentry	= ta_dump_numarray_tentry,
 	.find_tentry	= ta_find_numarray_tentry,
 	.dump_tinfo	= ta_dump_numarray_tinfo,
 	.need_modify	= ta_need_modify_numarray,
 	.prepare_mod	= ta_prepare_mod_numarray,
 	.fill_mod	= ta_fill_mod_numarray,
 	.modify		= ta_modify_numarray,
 	.flush_mod	= ta_flush_mod_numarray,
 };
 
 /*
  * flow:hash cmds
  *
  *
  * ti->data:
  * [inv.mask4][inv.mask6][log2hsize4][log2hsize6]
  * [        8][        8[          8][         8]
  *
  * inv.mask4: 32 - mask
  * inv.mask6:
  * 1) _slow lookup: mask
  * 2) _aligned: (128 - mask) / 8
  * 3) _64: 8
  *
  *
  * pflags:
  * [hsize4][hsize6]
  * [    16][    16]
  */
 
 struct fhashentry;
 
 SLIST_HEAD(fhashbhead, fhashentry);
 
 struct fhashentry {
 	SLIST_ENTRY(fhashentry)	next;
 	uint8_t		af;
 	uint8_t		proto;
 	uint16_t	spare0;
 	uint16_t	dport;
 	uint16_t	sport;
 	uint32_t	value;
 	uint32_t	spare1;
 };
 
 struct fhashentry4 {
 	struct fhashentry	e;
 	struct in_addr		dip;
 	struct in_addr		sip;
 };
 
 struct fhashentry6 {
 	struct fhashentry	e;
 	struct in6_addr		dip6;
 	struct in6_addr		sip6;
 };
 
 struct fhash_cfg {
 	struct fhashbhead	*head;
 	size_t			size;
 	size_t			items;
 	struct fhashentry4	fe4;
 	struct fhashentry6	fe6;
 };
 
 struct ta_buf_fhash {
 	void	*ent_ptr;
 	struct fhashentry6 fe6;
 };
 
 static __inline int cmp_flow_ent(struct fhashentry *a,
     struct fhashentry *b, size_t sz);
 static __inline uint32_t hash_flow4(struct fhashentry4 *f, int hsize);
 static __inline uint32_t hash_flow6(struct fhashentry6 *f, int hsize);
 static uint32_t hash_flow_ent(struct fhashentry *ent, uint32_t size);
 static int ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val);
 static int ta_init_fhash(struct ip_fw_chain *ch, void **ta_state,
 struct table_info *ti, char *data, uint8_t tflags);
 static void ta_destroy_fhash(void *ta_state, struct table_info *ti);
 static void ta_dump_fhash_tinfo(void *ta_state, struct table_info *ti,
     ipfw_ta_tinfo *tinfo);
 static int ta_dump_fhash_tentry(void *ta_state, struct table_info *ti,
     void *e, ipfw_obj_tentry *tent);
 static int tei_to_fhash_ent(struct tentry_info *tei, struct fhashentry *ent);
 static int ta_find_fhash_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent);
 static void ta_foreach_fhash(void *ta_state, struct table_info *ti,
     ta_foreach_f *f, void *arg);
 static int ta_prepare_add_fhash(struct ip_fw_chain *ch,
     struct tentry_info *tei, void *ta_buf);
 static int ta_add_fhash(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static int ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_del_fhash(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static void ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_need_modify_fhash(void *ta_state, struct table_info *ti,
     uint32_t count, uint64_t *pflags);
 static int ta_prepare_mod_fhash(void *ta_buf, uint64_t *pflags);
 static int ta_fill_mod_fhash(void *ta_state, struct table_info *ti,
     void *ta_buf, uint64_t *pflags);
 static void ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t pflags);
 static void ta_flush_mod_fhash(void *ta_buf);
 
 static __inline int
 cmp_flow_ent(struct fhashentry *a, struct fhashentry *b, size_t sz)
 {
 	uint64_t *ka, *kb;
 
 	ka = (uint64_t *)(&a->next + 1);
 	kb = (uint64_t *)(&b->next + 1);
 
 	if (*ka == *kb && (memcmp(a + 1, b + 1, sz) == 0))
 		return (1);
 
 	return (0);
 }
 
 static __inline uint32_t
 hash_flow4(struct fhashentry4 *f, int hsize)
 {
 	uint32_t i;
 
 	i = (f->dip.s_addr) ^ (f->sip.s_addr) ^ (f->e.dport) ^ (f->e.sport);
 
 	return (i % (hsize - 1));
 }
 
 static __inline uint32_t
 hash_flow6(struct fhashentry6 *f, int hsize)
 {
 	uint32_t i;
 
 	i = (f->dip6.__u6_addr.__u6_addr32[2]) ^
 	    (f->dip6.__u6_addr.__u6_addr32[3]) ^
 	    (f->sip6.__u6_addr.__u6_addr32[2]) ^
 	    (f->sip6.__u6_addr.__u6_addr32[3]) ^
 	    (f->e.dport) ^ (f->e.sport);
 
 	return (i % (hsize - 1));
 }
 
 static uint32_t
 hash_flow_ent(struct fhashentry *ent, uint32_t size)
 {
 	uint32_t hash;
 
 	if (ent->af == AF_INET) {
 		hash = hash_flow4((struct fhashentry4 *)ent, size);
 	} else {
 		hash = hash_flow6((struct fhashentry6 *)ent, size);
 	}
 
 	return (hash);
 }
 
 static int
 ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val)
 {
 	struct fhashbhead *head;
 	struct fhashentry *ent;
 	struct fhashentry4 *m4;
 	struct ipfw_flow_id *id;
 	uint32_t hsize;
 	uint16_t hash;
 
 	id = (struct ipfw_flow_id *)key;
 	head = (struct fhashbhead *)ti->state;
 	hsize = ti->data;
 	m4 = (struct fhashentry4 *)ti->xstate;
 
 	if (id->addr_type == 4) {
 		struct fhashentry4 f;
 
 		/* Copy hash mask */
 		f = *m4;
 
 		f.dip.s_addr &= id->dst_ip;
 		f.sip.s_addr &= id->src_ip;
 		f.e.dport &= id->dst_port;
 		f.e.sport &= id->src_port;
 		f.e.proto &= id->proto;
 		hash = hash_flow4(&f, hsize);
 		SLIST_FOREACH(ent, &head[hash], next) {
 			if (cmp_flow_ent(ent, &f.e, 2 * 4) != 0) {
 				*val = ent->value;
 				return (1);
 			}
 		}
 	} else if (id->addr_type == 6) {
 		struct fhashentry6 f;
 		uint64_t *fp, *idp;
 
 		/* Copy hash mask */
 		f = *((struct fhashentry6 *)(m4 + 1));
 
 		/* Handle lack of __u6_addr.__u6_addr64 */
 		fp = (uint64_t *)&f.dip6;
 		idp = (uint64_t *)&id->dst_ip6;
 		/* src IPv6 is stored after dst IPv6 */
 		*fp++ &= *idp++;
 		*fp++ &= *idp++;
 		*fp++ &= *idp++;
 		*fp &= *idp;
 		f.e.dport &= id->dst_port;
 		f.e.sport &= id->src_port;
 		f.e.proto &= id->proto;
 		hash = hash_flow6(&f, hsize);
 		SLIST_FOREACH(ent, &head[hash], next) {
 			if (cmp_flow_ent(ent, &f.e, 2 * 16) != 0) {
 				*val = ent->value;
 				return (1);
 			}
 		}
 	}
 
 	return (0);
 }
 
 /*
  * New table.
  */
 static int
 ta_init_fhash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
     char *data, uint8_t tflags)
 {
 	struct fhash_cfg *cfg;
 	struct fhashentry4 *fe4;
 	struct fhashentry6 *fe6;
 	u_int i;
 
 	cfg = malloc(sizeof(struct fhash_cfg), M_IPFW, M_WAITOK | M_ZERO);
 
 	cfg->size = 512;
 
 	cfg->head = malloc(sizeof(struct fhashbhead) * cfg->size, M_IPFW,
 	    M_WAITOK | M_ZERO);
 	for (i = 0; i < cfg->size; i++)
 		SLIST_INIT(&cfg->head[i]);
 
 	/* Fill in fe masks based on @tflags */
 	fe4 = &cfg->fe4;
 	fe6 = &cfg->fe6;
 	if (tflags & IPFW_TFFLAG_SRCIP) {
 		memset(&fe4->sip, 0xFF, sizeof(fe4->sip));
 		memset(&fe6->sip6, 0xFF, sizeof(fe6->sip6));
 	}
 	if (tflags & IPFW_TFFLAG_DSTIP) {
 		memset(&fe4->dip, 0xFF, sizeof(fe4->dip));
 		memset(&fe6->dip6, 0xFF, sizeof(fe6->dip6));
 	}
 	if (tflags & IPFW_TFFLAG_SRCPORT) {
 		memset(&fe4->e.sport, 0xFF, sizeof(fe4->e.sport));
 		memset(&fe6->e.sport, 0xFF, sizeof(fe6->e.sport));
 	}
 	if (tflags & IPFW_TFFLAG_DSTPORT) {
 		memset(&fe4->e.dport, 0xFF, sizeof(fe4->e.dport));
 		memset(&fe6->e.dport, 0xFF, sizeof(fe6->e.dport));
 	}
 	if (tflags & IPFW_TFFLAG_PROTO) {
 		memset(&fe4->e.proto, 0xFF, sizeof(fe4->e.proto));
 		memset(&fe6->e.proto, 0xFF, sizeof(fe6->e.proto));
 	}
 
 	fe4->e.af = AF_INET;
 	fe6->e.af = AF_INET6;
 
 	*ta_state = cfg;
 	ti->state = cfg->head;
 	ti->xstate = &cfg->fe4;
 	ti->data = cfg->size;
 	ti->lookup = ta_lookup_fhash;
 
 	return (0);
 }
 
 static void
 ta_destroy_fhash(void *ta_state, struct table_info *ti)
 {
 	struct fhash_cfg *cfg;
 	struct fhashentry *ent, *ent_next;
 	int i;
 
 	cfg = (struct fhash_cfg *)ta_state;
 
 	for (i = 0; i < cfg->size; i++)
 		SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next)
 			free(ent, M_IPFW_TBL);
 
 	free(cfg->head, M_IPFW);
 	free(cfg, M_IPFW);
 }
 
 /*
  * Provide algo-specific table info
  */
 static void
 ta_dump_fhash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
 {
 	struct fhash_cfg *cfg;
 
 	cfg = (struct fhash_cfg *)ta_state;
 
 	tinfo->flags = IPFW_TATFLAGS_AFITEM;
 	tinfo->taclass4 = IPFW_TACLASS_HASH;
 	tinfo->size4 = cfg->size;
 	tinfo->count4 = cfg->items;
 	tinfo->itemsize4 = sizeof(struct fhashentry4);
 	tinfo->itemsize6 = sizeof(struct fhashentry6);
 }
 
 static int
 ta_dump_fhash_tentry(void *ta_state, struct table_info *ti, void *e,
     ipfw_obj_tentry *tent)
 {
 	struct fhash_cfg *cfg;
 	struct fhashentry *ent;
 	struct fhashentry4 *fe4;
 #ifdef INET6
 	struct fhashentry6 *fe6;
 #endif
 	struct tflow_entry *tfe;
 
 	cfg = (struct fhash_cfg *)ta_state;
 	ent = (struct fhashentry *)e;
 	tfe = &tent->k.flow;
 
 	tfe->af = ent->af;
 	tfe->proto = ent->proto;
 	tfe->dport = htons(ent->dport);
 	tfe->sport = htons(ent->sport);
 	tent->v.kidx = ent->value;
 	tent->subtype = ent->af;
 
 	if (ent->af == AF_INET) {
 		fe4 = (struct fhashentry4 *)ent;
 		tfe->a.a4.sip.s_addr = htonl(fe4->sip.s_addr);
 		tfe->a.a4.dip.s_addr = htonl(fe4->dip.s_addr);
 		tent->masklen = 32;
 #ifdef INET6
 	} else {
 		fe6 = (struct fhashentry6 *)ent;
 		tfe->a.a6.sip6 = fe6->sip6;
 		tfe->a.a6.dip6 = fe6->dip6;
 		tent->masklen = 128;
 #endif
 	}
 
 	return (0);
 }
 
 static int
 tei_to_fhash_ent(struct tentry_info *tei, struct fhashentry *ent)
 {
 #ifdef INET
 	struct fhashentry4 *fe4;
 #endif
 #ifdef INET6
 	struct fhashentry6 *fe6;
 #endif
 	struct tflow_entry *tfe;
 
 	tfe = (struct tflow_entry *)tei->paddr;
 
 	ent->af = tei->subtype;
 	ent->proto = tfe->proto;
 	ent->dport = ntohs(tfe->dport);
 	ent->sport = ntohs(tfe->sport);
 
 	if (tei->subtype == AF_INET) {
 #ifdef INET
 		fe4 = (struct fhashentry4 *)ent;
 		fe4->sip.s_addr = ntohl(tfe->a.a4.sip.s_addr);
 		fe4->dip.s_addr = ntohl(tfe->a.a4.dip.s_addr);
 #endif
 #ifdef INET6
 	} else if (tei->subtype == AF_INET6) {
 		fe6 = (struct fhashentry6 *)ent;
 		fe6->sip6 = tfe->a.a6.sip6;
 		fe6->dip6 = tfe->a.a6.dip6;
 #endif
 	} else {
 		/* Unknown CIDR type */
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 
 static int
 ta_find_fhash_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent)
 {
 	struct fhash_cfg *cfg;
 	struct fhashbhead *head;
 	struct fhashentry *ent, *tmp;
 	struct fhashentry6 fe6;
 	struct tentry_info tei;
 	int error;
 	uint32_t hash;
 	size_t sz;
 
 	cfg = (struct fhash_cfg *)ta_state;
 
 	ent = &fe6.e;
 
 	memset(&fe6, 0, sizeof(fe6));
 	memset(&tei, 0, sizeof(tei));
 
 	tei.paddr = &tent->k.flow;
 	tei.subtype = tent->subtype;
 
 	if ((error = tei_to_fhash_ent(&tei, ent)) != 0)
 		return (error);
 
 	head = cfg->head;
 	hash = hash_flow_ent(ent, cfg->size);
 
 	if (tei.subtype == AF_INET)
 		sz = 2 * sizeof(struct in_addr);
 	else
 		sz = 2 * sizeof(struct in6_addr);
 
 	/* Check for existence */
 	SLIST_FOREACH(tmp, &head[hash], next) {
 		if (cmp_flow_ent(tmp, ent, sz) != 0) {
 			ta_dump_fhash_tentry(ta_state, ti, tmp, tent);
 			return (0);
 		}
 	}
 
 	return (ENOENT);
 }
 
 static void
 ta_foreach_fhash(void *ta_state, struct table_info *ti, ta_foreach_f *f,
     void *arg)
 {
 	struct fhash_cfg *cfg;
 	struct fhashentry *ent, *ent_next;
 	int i;
 
 	cfg = (struct fhash_cfg *)ta_state;
 
 	for (i = 0; i < cfg->size; i++)
 		SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next)
 			f(ent, arg);
 }
 
 static int
 ta_prepare_add_fhash(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_fhash *tb;
 	struct fhashentry *ent;
 	size_t sz;
 	int error;
 
 	tb = (struct ta_buf_fhash *)ta_buf;
 
 	if (tei->subtype == AF_INET)
 		sz = sizeof(struct fhashentry4);
 	else if (tei->subtype == AF_INET6)
 		sz = sizeof(struct fhashentry6);
 	else
 		return (EINVAL);
 
 	ent = malloc(sz, M_IPFW_TBL, M_WAITOK | M_ZERO);
 
 	error = tei_to_fhash_ent(tei, ent);
 	if (error != 0) {
 		free(ent, M_IPFW_TBL);
 		return (error);
 	}
 	tb->ent_ptr = ent;
 
 	return (0);
 }
 
 static int
 ta_add_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct fhash_cfg *cfg;
 	struct fhashbhead *head;
 	struct fhashentry *ent, *tmp;
 	struct ta_buf_fhash *tb;
 	int exists;
 	uint32_t hash, value;
 	size_t sz;
 
 	cfg = (struct fhash_cfg *)ta_state;
 	tb = (struct ta_buf_fhash *)ta_buf;
 	ent = (struct fhashentry *)tb->ent_ptr;
 	exists = 0;
 
 	/* Read current value from @tei */
 	ent->value = tei->value;
 
 	head = cfg->head;
 	hash = hash_flow_ent(ent, cfg->size);
 
 	if (tei->subtype == AF_INET)
 		sz = 2 * sizeof(struct in_addr);
 	else
 		sz = 2 * sizeof(struct in6_addr);
 
 	/* Check for existence */
 	SLIST_FOREACH(tmp, &head[hash], next) {
 		if (cmp_flow_ent(tmp, ent, sz) != 0) {
 			exists = 1;
 			break;
 		}
 	}
 
 	if (exists == 1) {
 		if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
 			return (EEXIST);
 		/* Record already exists. Update value if we're asked to */
 		/* Exchange values between tmp and @tei */
 		value = tmp->value;
 		tmp->value = tei->value;
 		tei->value = value;
 		/* Indicate that update has happened instead of addition */
 		tei->flags |= TEI_FLAGS_UPDATED;
 		*pnum = 0;
 	} else {
 		if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
 			return (EFBIG);
 
 		SLIST_INSERT_HEAD(&head[hash], ent, next);
 		tb->ent_ptr = NULL;
 		*pnum = 1;
 
 		/* Update counters and check if we need to grow hash */
 		cfg->items++;
 	}
 
 	return (0);
 }
 
 static int
 ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_fhash *tb;
 
 	tb = (struct ta_buf_fhash *)ta_buf;
 
 	return (tei_to_fhash_ent(tei, &tb->fe6.e));
 }
 
 static int
 ta_del_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct fhash_cfg *cfg;
 	struct fhashbhead *head;
 	struct fhashentry *ent, *tmp;
 	struct ta_buf_fhash *tb;
 	uint32_t hash;
 	size_t sz;
 
 	cfg = (struct fhash_cfg *)ta_state;
 	tb = (struct ta_buf_fhash *)ta_buf;
 	ent = &tb->fe6.e;
 
 	head = cfg->head;
 	hash = hash_flow_ent(ent, cfg->size);
 
 	if (tei->subtype == AF_INET)
 		sz = 2 * sizeof(struct in_addr);
 	else
 		sz = 2 * sizeof(struct in6_addr);
 
 	/* Check for existence */
 	SLIST_FOREACH(tmp, &head[hash], next) {
 		if (cmp_flow_ent(tmp, ent, sz) == 0)
 			continue;
 
 		SLIST_REMOVE(&head[hash], tmp, fhashentry, next);
 		tei->value = tmp->value;
 		*pnum = 1;
 		cfg->items--;
 		tb->ent_ptr = tmp;
 		return (0);
 	}
 
 	return (ENOENT);
 }
 
 static void
 ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_fhash *tb;
 
 	tb = (struct ta_buf_fhash *)ta_buf;
 
 	if (tb->ent_ptr != NULL)
 		free(tb->ent_ptr, M_IPFW_TBL);
 }
 
 /*
  * Hash growing callbacks.
  */
 
 static int
 ta_need_modify_fhash(void *ta_state, struct table_info *ti, uint32_t count,
     uint64_t *pflags)
 {
 	struct fhash_cfg *cfg;
 
 	cfg = (struct fhash_cfg *)ta_state;
 
 	if (cfg->items > cfg->size && cfg->size < 65536) {
 		*pflags = cfg->size * 2;
 		return (1);
 	}
 
 	return (0);
 }
 
 /*
  * Allocate new, larger fhash.
  */
 static int
 ta_prepare_mod_fhash(void *ta_buf, uint64_t *pflags)
 {
 	struct mod_item *mi;
 	struct fhashbhead *head;
 	u_int i;
 
 	mi = (struct mod_item *)ta_buf;
 
 	memset(mi, 0, sizeof(struct mod_item));
 	mi->size = *pflags;
 	head = malloc(sizeof(struct fhashbhead) * mi->size, M_IPFW,
 	    M_WAITOK | M_ZERO);
 	for (i = 0; i < mi->size; i++)
 		SLIST_INIT(&head[i]);
 
 	mi->main_ptr = head;
 
 	return (0);
 }
 
 /*
  * Copy data from old runtime array to new one.
  */
 static int
 ta_fill_mod_fhash(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t *pflags)
 {
 
 	/* In is not possible to do rehash if we're not holidng WLOCK. */
 	return (0);
 }
 
 /*
  * Switch old & new arrays.
  */
 static void
 ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t pflags)
 {
 	struct mod_item *mi;
 	struct fhash_cfg *cfg;
 	struct fhashbhead *old_head, *new_head;
 	struct fhashentry *ent, *ent_next;
 	int i;
 	uint32_t nhash;
 	size_t old_size;
 
 	mi = (struct mod_item *)ta_buf;
 	cfg = (struct fhash_cfg *)ta_state;
 
 	old_size = cfg->size;
 	old_head = ti->state;
 
 	new_head = (struct fhashbhead *)mi->main_ptr;
 	for (i = 0; i < old_size; i++) {
 		SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) {
 			nhash = hash_flow_ent(ent, mi->size);
 			SLIST_INSERT_HEAD(&new_head[nhash], ent, next);
 		}
 	}
 
 	ti->state = new_head;
 	ti->data = mi->size;
 	cfg->head = new_head;
 	cfg->size = mi->size;
 
 	mi->main_ptr = old_head;
 }
 
 /*
  * Free unneded array.
  */
 static void
 ta_flush_mod_fhash(void *ta_buf)
 {
 	struct mod_item *mi;
 
 	mi = (struct mod_item *)ta_buf;
 	if (mi->main_ptr != NULL)
 		free(mi->main_ptr, M_IPFW);
 }
 
 struct table_algo flow_hash = {
 	.name		= "flow:hash",
 	.type		= IPFW_TABLE_FLOW,
 	.flags		= TA_FLAG_DEFAULT,
 	.ta_buf_size	= sizeof(struct ta_buf_fhash),
 	.init		= ta_init_fhash,
 	.destroy	= ta_destroy_fhash,
 	.prepare_add	= ta_prepare_add_fhash,
 	.prepare_del	= ta_prepare_del_fhash,
 	.add		= ta_add_fhash,
 	.del		= ta_del_fhash,
 	.flush_entry	= ta_flush_fhash_entry,
 	.foreach	= ta_foreach_fhash,
 	.dump_tentry	= ta_dump_fhash_tentry,
 	.find_tentry	= ta_find_fhash_tentry,
 	.dump_tinfo	= ta_dump_fhash_tinfo,
 	.need_modify	= ta_need_modify_fhash,
 	.prepare_mod	= ta_prepare_mod_fhash,
 	.fill_mod	= ta_fill_mod_fhash,
 	.modify		= ta_modify_fhash,
 	.flush_mod	= ta_flush_mod_fhash,
 };
 
 /*
  * Kernel fibs bindings.
  *
  * Implementation:
  *
  * Runtime part:
  * - fully relies on route API
  * - fib number is stored in ti->data
  *
  */
 
 static int ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val);
 static int kfib_parse_opts(int *pfib, char *data);
 static void ta_print_kfib_config(void *ta_state, struct table_info *ti,
     char *buf, size_t bufsize);
 static int ta_init_kfib(struct ip_fw_chain *ch, void **ta_state,
     struct table_info *ti, char *data, uint8_t tflags);
 static void ta_destroy_kfib(void *ta_state, struct table_info *ti);
 static void ta_dump_kfib_tinfo(void *ta_state, struct table_info *ti,
     ipfw_ta_tinfo *tinfo);
 static int contigmask(uint8_t *p, int len);
 static int ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e,
     ipfw_obj_tentry *tent);
 static int ta_dump_kfib_tentry_int(struct sockaddr *paddr,
     struct sockaddr *pmask, ipfw_obj_tentry *tent);
 static int ta_find_kfib_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent);
 static void ta_foreach_kfib(void *ta_state, struct table_info *ti,
     ta_foreach_f *f, void *arg);
 
 
 static int
 ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val)
 {
 #ifdef INET
-	struct nhop4_basic nh4;
 	struct in_addr in;
 #endif
-#ifdef INET6
-	struct nhop6_basic nh6;
-#endif
 	int error;
 
 	error = ENOENT;
 #ifdef INET
 	if (keylen == 4) {
 		in.s_addr = *(in_addr_t *)key;
-		error = fib4_lookup_nh_basic(ti->data,
-		    in, 0, 0, &nh4);
+		NET_EPOCH_ASSERT();
+		error = fib4_lookup(ti->data, in, 0, NHR_NONE, 0) != NULL;
 	}
 #endif
 #ifdef INET6
 	if (keylen == 6)
-		error = fib6_lookup_nh_basic(ti->data,
-		    (struct in6_addr *)key, 0, 0, 0, &nh6);
+		error = fib6_lookup(ti->data, (struct in6_addr *)key,
+		    0, NHR_NONE, 0) != NULL;
 #endif
 
 	if (error != 0)
 		return (0);
 
 	*val = 0;
 
 	return (1);
 }
 
 /* Parse 'fib=%d' */
 static int
 kfib_parse_opts(int *pfib, char *data)
 {
 	char *pdel, *pend, *s;
 	int fibnum;
 
 	if (data == NULL)
 		return (0);
 	if ((pdel = strchr(data, ' ')) == NULL)
 		return (0);
 	while (*pdel == ' ')
 		pdel++;
 	if (strncmp(pdel, "fib=", 4) != 0)
 		return (EINVAL);
 	if ((s = strchr(pdel, ' ')) != NULL)
 		*s++ = '\0';
 
 	pdel += 4;
 	/* Need \d+ */
 	fibnum = strtol(pdel, &pend, 10);
 	if (*pend != '\0')
 		return (EINVAL);
 
 	*pfib = fibnum;
 
 	return (0);
 }
 
 static void
 ta_print_kfib_config(void *ta_state, struct table_info *ti, char *buf,
     size_t bufsize)
 {
 
 	if (ti->data != 0)
 		snprintf(buf, bufsize, "%s fib=%lu", "addr:kfib", ti->data);
 	else
 		snprintf(buf, bufsize, "%s", "addr:kfib");
 }
 
 static int
 ta_init_kfib(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
     char *data, uint8_t tflags)
 {
 	int error, fibnum;
 
 	fibnum = 0;
 	if ((error = kfib_parse_opts(&fibnum, data)) != 0)
 		return (error);
 
 	if (fibnum >= rt_numfibs)
 		return (E2BIG);
 
 	ti->data = fibnum;
 	ti->lookup = ta_lookup_kfib;
 
 	return (0);
 }
 
 /*
  * Destroys table @ti
  */
 static void
 ta_destroy_kfib(void *ta_state, struct table_info *ti)
 {
 
 }
 
 /*
  * Provide algo-specific table info
  */
 static void
 ta_dump_kfib_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
 {
 
 	tinfo->flags = IPFW_TATFLAGS_AFDATA;
 	tinfo->taclass4 = IPFW_TACLASS_RADIX;
 	tinfo->count4 = 0;
 	tinfo->itemsize4 = sizeof(struct rtentry);
 	tinfo->taclass6 = IPFW_TACLASS_RADIX;
 	tinfo->count6 = 0;
 	tinfo->itemsize6 = sizeof(struct rtentry);
 }
 
 static int
 contigmask(uint8_t *p, int len)
 {
 	int i, n;
 
 	for (i = 0; i < len ; i++)
 		if ( (p[i/8] & (1 << (7 - (i%8)))) == 0) /* first bit unset */
 			break;
 	for (n= i + 1; n < len; n++)
 		if ( (p[n/8] & (1 << (7 - (n % 8)))) != 0)
 			return (-1); /* mask not contiguous */
 	return (i);
 }
 
 
 static int
 ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e,
     ipfw_obj_tentry *tent)
 {
 	struct rtentry *rte;
 
 	rte = (struct rtentry *)e;
 
 	return ta_dump_kfib_tentry_int(rt_key(rte), rt_mask(rte), tent);
 }
 
 static int
 ta_dump_kfib_tentry_int(struct sockaddr *paddr, struct sockaddr *pmask,
     ipfw_obj_tentry *tent)
 {
 #ifdef INET
 	struct sockaddr_in *addr, *mask;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *addr6, *mask6;
 #endif
 	int len;
 
 	len = 0;
 
 	/* Guess IPv4/IPv6 radix by sockaddr family */
 #ifdef INET
 	if (paddr->sa_family == AF_INET) {
 		addr = (struct sockaddr_in *)paddr;
 		mask = (struct sockaddr_in *)pmask;
 		tent->k.addr.s_addr = addr->sin_addr.s_addr;
 		len = 32;
 		if (mask != NULL)
 			len = contigmask((uint8_t *)&mask->sin_addr, 32);
 		if (len == -1)
 			len = 0;
 		tent->masklen = len;
 		tent->subtype = AF_INET;
 		tent->v.kidx = 0; /* Do we need to put GW here? */
 	}
 #endif
 #ifdef INET6
 	if (paddr->sa_family == AF_INET6) {
 		addr6 = (struct sockaddr_in6 *)paddr;
 		mask6 = (struct sockaddr_in6 *)pmask;
 		memcpy(&tent->k.addr6, &addr6->sin6_addr,
 		    sizeof(struct in6_addr));
 		len = 128;
 		if (mask6 != NULL)
 			len = contigmask((uint8_t *)&mask6->sin6_addr, 128);
 		if (len == -1)
 			len = 0;
 		tent->masklen = len;
 		tent->subtype = AF_INET6;
 		tent->v.kidx = 0;
 	}
 #endif
 
 	return (0);
 }
 
 static int
 ta_find_kfib_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent)
 {
 	struct rt_addrinfo info;
 	struct sockaddr_in6 key6, dst6, mask6;
 	struct sockaddr *dst, *key, *mask;
 
 	/* Prepare sockaddr for prefix/mask and info */
 	bzero(&dst6, sizeof(dst6));
 	dst6.sin6_len = sizeof(dst6);
 	dst = (struct sockaddr *)&dst6;
 	bzero(&mask6, sizeof(mask6));
 	mask6.sin6_len = sizeof(mask6);
 	mask = (struct sockaddr *)&mask6;
 
 	bzero(&info, sizeof(info));
 	info.rti_info[RTAX_DST] = dst;
 	info.rti_info[RTAX_NETMASK] = mask;
 
 	/* Prepare the lookup key */
 	bzero(&key6, sizeof(key6));
 	key6.sin6_family = tent->subtype;
 	key = (struct sockaddr *)&key6;
 
 	if (tent->subtype == AF_INET) {
 		((struct sockaddr_in *)&key6)->sin_addr = tent->k.addr;
 		key6.sin6_len = sizeof(struct sockaddr_in);
 	} else {
 		key6.sin6_addr = tent->k.addr6;
 		key6.sin6_len = sizeof(struct sockaddr_in6);
 	}
 
 	if (rib_lookup_info(ti->data, key, 0, 0, &info) != 0)
 		return (ENOENT);
 	if ((info.rti_addrs & RTA_NETMASK) == 0)
 		mask = NULL;
 
 	ta_dump_kfib_tentry_int(dst, mask, tent);
 
 	return (0);
 }
 
 static void
 ta_foreach_kfib(void *ta_state, struct table_info *ti, ta_foreach_f *f,
     void *arg)
 {
 	RIB_RLOCK_TRACKER;
 	struct rib_head *rh;
 	int error;
 
 	rh = rt_tables_get_rnh(ti->data, AF_INET);
 	if (rh != NULL) {
 		RIB_RLOCK(rh); 
 		error = rh->rnh_walktree(&rh->head, (walktree_f_t *)f, arg);
 		RIB_RUNLOCK(rh);
 	}
 
 	rh = rt_tables_get_rnh(ti->data, AF_INET6);
 	if (rh != NULL) {
 		RIB_RLOCK(rh); 
 		error = rh->rnh_walktree(&rh->head, (walktree_f_t *)f, arg);
 		RIB_RUNLOCK(rh); 
 	}
 }
 
 struct table_algo addr_kfib = {
 	.name		= "addr:kfib",
 	.type		= IPFW_TABLE_ADDR,
 	.flags		= TA_FLAG_READONLY,
 	.ta_buf_size	= 0,
 	.init		= ta_init_kfib,
 	.destroy	= ta_destroy_kfib,
 	.foreach	= ta_foreach_kfib,
 	.dump_tentry	= ta_dump_kfib_tentry,
 	.find_tentry	= ta_find_kfib_tentry,
 	.dump_tinfo	= ta_dump_kfib_tinfo,
 	.print_config	= ta_print_kfib_config,
 };
 
 void
 ipfw_table_algo_init(struct ip_fw_chain *ch)
 {
 	size_t sz;
 
 	/*
 	 * Register all algorithms presented here.
 	 */
 	sz = sizeof(struct table_algo);
 	ipfw_add_table_algo(ch, &addr_radix, sz, &addr_radix.idx);
 	ipfw_add_table_algo(ch, &addr_hash, sz, &addr_hash.idx);
 	ipfw_add_table_algo(ch, &iface_idx, sz, &iface_idx.idx);
 	ipfw_add_table_algo(ch, &number_array, sz, &number_array.idx);
 	ipfw_add_table_algo(ch, &flow_hash, sz, &flow_hash.idx);
 	ipfw_add_table_algo(ch, &addr_kfib, sz, &addr_kfib.idx);
 }
 
 void
 ipfw_table_algo_destroy(struct ip_fw_chain *ch)
 {
 
 	ipfw_del_table_algo(ch, addr_radix.idx);
 	ipfw_del_table_algo(ch, addr_hash.idx);
 	ipfw_del_table_algo(ch, iface_idx.idx);
 	ipfw_del_table_algo(ch, number_array.idx);
 	ipfw_del_table_algo(ch, flow_hash.idx);
 	ipfw_del_table_algo(ch, addr_kfib.idx);
 }
 
 
Index: head/sys/netpfil/ipfw/nat64/nat64_translate.c
===================================================================
--- head/sys/netpfil/ipfw/nat64/nat64_translate.c	(revision 362899)
+++ head/sys/netpfil/ipfw/nat64/nat64_translate.c	(revision 362900)
@@ -1,1707 +1,1716 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2015-2019 Yandex LLC
  * Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ipstealth.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/counter.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/rmlock.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/queue.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_pflog.h>
 #include <net/pfil.h>
 #include <net/netisr.h>
 #include <net/route.h>
+#include <net/route/nhop.h>
 
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
+#include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_fib.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/ip_fw_nat64.h>
 
 #include <netpfil/pf/pf.h>
 #include <netpfil/ipfw/ip_fw_private.h>
 #include <machine/in_cksum.h>
 
 #include "ip_fw_nat64.h"
 #include "nat64_translate.h"
 
 
 typedef int (*nat64_output_t)(struct ifnet *, struct mbuf *,
     struct sockaddr *, struct nat64_counters *, void *);
 typedef int (*nat64_output_one_t)(struct mbuf *, struct nat64_counters *,
     void *);
 
-static int nat64_find_route4(struct nhop4_basic *, struct sockaddr_in *,
+static struct nhop_object *nat64_find_route4(struct sockaddr_in *,
     struct mbuf *);
-static int nat64_find_route6(struct nhop6_basic *, struct sockaddr_in6 *,
+static struct nhop_object *nat64_find_route6(struct sockaddr_in6 *,
     struct mbuf *);
 static int nat64_output_one(struct mbuf *, struct nat64_counters *, void *);
 static int nat64_output(struct ifnet *, struct mbuf *, struct sockaddr *,
     struct nat64_counters *, void *);
 static int nat64_direct_output_one(struct mbuf *, struct nat64_counters *,
     void *);
 static int nat64_direct_output(struct ifnet *, struct mbuf *,
     struct sockaddr *, struct nat64_counters *, void *);
 
 struct nat64_methods {
 	nat64_output_t		output;
 	nat64_output_one_t	output_one;
 };
 static const struct nat64_methods nat64_netisr = {
 	.output = nat64_output,
 	.output_one = nat64_output_one
 };
 static const struct nat64_methods nat64_direct = {
 	.output = nat64_direct_output,
 	.output_one = nat64_direct_output_one
 };
 
 /* These variables should be initialized explicitly on module loading */
 VNET_DEFINE_STATIC(const struct nat64_methods *, nat64out);
 VNET_DEFINE_STATIC(const int *, nat64ipstealth);
 VNET_DEFINE_STATIC(const int *, nat64ip6stealth);
 #define	V_nat64out		VNET(nat64out)
 #define	V_nat64ipstealth	VNET(nat64ipstealth)
 #define	V_nat64ip6stealth	VNET(nat64ip6stealth)
 
 static const int stealth_on = 1;
 #ifndef IPSTEALTH
 static const int stealth_off = 0;
 #endif
 
 void
 nat64_set_output_method(int direct)
 {
 
 	if (direct != 0) {
 		V_nat64out = &nat64_direct;
 #ifdef IPSTEALTH
 		/* Honor corresponding variables, if IPSTEALTH is defined */
 		V_nat64ipstealth = &V_ipstealth;
 		V_nat64ip6stealth = &V_ip6stealth;
 #else
 		/* otherwise we need to decrement HLIM/TTL for direct case */
 		V_nat64ipstealth = V_nat64ip6stealth = &stealth_off;
 #endif
 	} else {
 		V_nat64out = &nat64_netisr;
 		/* Leave TTL/HLIM decrementing to forwarding code */
 		V_nat64ipstealth = V_nat64ip6stealth = &stealth_on;
 	}
 }
 
 int
 nat64_get_output_method(void)
 {
 
 	return (V_nat64out == &nat64_direct ? 1: 0);
 }
 
 static void
 nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family)
 {
 
 	logdata->dir = PF_OUT;
 	logdata->af = family;
 	ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m);
 }
 
 static int
 nat64_direct_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
     struct nat64_counters *stats, void *logdata)
 {
 	int error;
 
 	if (logdata != NULL)
 		nat64_log(logdata, m, dst->sa_family);
 	error = (*ifp->if_output)(ifp, m, dst, NULL);
 	if (error != 0)
 		NAT64STAT_INC(stats, oerrors);
 	return (error);
 }
 
 static int
 nat64_direct_output_one(struct mbuf *m, struct nat64_counters *stats,
     void *logdata)
 {
-	struct nhop6_basic nh6;
-	struct nhop4_basic nh4;
+	struct nhop_object *nh4 = NULL;
+	struct nhop_object *nh6 = NULL;
 	struct sockaddr_in6 dst6;
 	struct sockaddr_in dst4;
 	struct sockaddr *dst;
 	struct ip6_hdr *ip6;
 	struct ip *ip4;
 	struct ifnet *ifp;
 	int error;
 
 	ip4 = mtod(m, struct ip *);
+	error = 0;
 	switch (ip4->ip_v) {
 	case IPVERSION:
 		dst4.sin_addr = ip4->ip_dst;
-		error = nat64_find_route4(&nh4, &dst4, m);
-		if (error != 0)
+		nh4 = nat64_find_route4(&dst4, m);
+		if (nh4 == NULL) {
 			NAT64STAT_INC(stats, noroute4);
-		else {
-			ifp = nh4.nh_ifp;
+			error = EHOSTUNREACH;
+		} else {
+			ifp = nh4->nh_ifp;
 			dst = (struct sockaddr *)&dst4;
 		}
 		break;
 	case (IPV6_VERSION >> 4):
 		ip6 = mtod(m, struct ip6_hdr *);
 		dst6.sin6_addr = ip6->ip6_dst;
-		error = nat64_find_route6(&nh6, &dst6, m);
-		if (error != 0)
+		nh6 = nat64_find_route6(&dst6, m);
+		if (nh6 == NULL) {
 			NAT64STAT_INC(stats, noroute6);
-		else {
-			ifp = nh6.nh_ifp;
+			error = EHOSTUNREACH;
+		} else {
+			ifp = nh6->nh_ifp;
 			dst = (struct sockaddr *)&dst6;
 		}
 		break;
 	default:
 		m_freem(m);
 		NAT64STAT_INC(stats, dropped);
 		DPRINTF(DP_DROPS, "dropped due to unknown IP version");
 		return (EAFNOSUPPORT);
 	}
 	if (error != 0) {
 		m_freem(m);
 		return (EHOSTUNREACH);
 	}
 	if (logdata != NULL)
 		nat64_log(logdata, m, dst->sa_family);
 	error = (*ifp->if_output)(ifp, m, dst, NULL);
 	if (error != 0)
 		NAT64STAT_INC(stats, oerrors);
 	return (error);
 }
 
 static int
 nat64_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
     struct nat64_counters *stats, void *logdata)
 {
 	struct ip *ip4;
 	int ret, af;
 
 	ip4 = mtod(m, struct ip *);
 	switch (ip4->ip_v) {
 	case IPVERSION:
 		af = AF_INET;
 		ret = NETISR_IP;
 		break;
 	case (IPV6_VERSION >> 4):
 		af = AF_INET6;
 		ret = NETISR_IPV6;
 		break;
 	default:
 		m_freem(m);
 		NAT64STAT_INC(stats, dropped);
 		DPRINTF(DP_DROPS, "unknown IP version");
 		return (EAFNOSUPPORT);
 	}
 	if (logdata != NULL)
 		nat64_log(logdata, m, af);
 	if (m->m_pkthdr.rcvif == NULL)
 		m->m_pkthdr.rcvif = V_loif;
 	ret = netisr_queue(ret, m);
 	if (ret != 0)
 		NAT64STAT_INC(stats, oerrors);
 	return (ret);
 }
 
 static int
 nat64_output_one(struct mbuf *m, struct nat64_counters *stats, void *logdata)
 {
 
 	return (nat64_output(NULL, m, NULL, stats, logdata));
 }
 
 /*
  * Check the given IPv6 prefix and length according to RFC6052:
  *   The prefixes can only have one of the following lengths:
  *   32, 40, 48, 56, 64, or 96 (The Well-Known Prefix is 96 bits long).
  * Returns zero on success, otherwise EINVAL.
  */
 int
 nat64_check_prefixlen(int length)
 {
 
 	switch (length) {
 	case 32:
 	case 40:
 	case 48:
 	case 56:
 	case 64:
 	case 96:
 		return (0);
 	}
 	return (EINVAL);
 }
 
 int
 nat64_check_prefix6(const struct in6_addr *prefix, int length)
 {
 
 	if (nat64_check_prefixlen(length) != 0)
 		return (EINVAL);
 
 	/* Well-known prefix has 96 prefix length */
 	if (IN6_IS_ADDR_WKPFX(prefix) && length != 96)
 		return (EINVAL);
 
 	/* Bits 64 to 71 must be set to zero */
 	if (prefix->__u6_addr.__u6_addr8[8] != 0)
 		return (EINVAL);
 
 	/* Some extra checks */
 	if (IN6_IS_ADDR_MULTICAST(prefix) ||
 	    IN6_IS_ADDR_UNSPECIFIED(prefix) ||
 	    IN6_IS_ADDR_LOOPBACK(prefix))
 		return (EINVAL);
 	return (0);
 }
 
 int
 nat64_check_private_ip4(const struct nat64_config *cfg, in_addr_t ia)
 {
 
 	if (cfg->flags & NAT64_ALLOW_PRIVATE)
 		return (0);
 
 	/* WKPFX must not be used to represent non-global IPv4 addresses */
 	if (cfg->flags & NAT64_WKPFX) {
 		/* IN_PRIVATE */
 		if ((ia & htonl(0xff000000)) == htonl(0x0a000000) ||
 		    (ia & htonl(0xfff00000)) == htonl(0xac100000) ||
 		    (ia & htonl(0xffff0000)) == htonl(0xc0a80000))
 			return (1);
 		/*
 		 * RFC 5735:
 		 *  192.0.0.0/24 - reserved for IETF protocol assignments
 		 *  192.88.99.0/24 - for use as 6to4 relay anycast addresses
 		 *  198.18.0.0/15 - for use in benchmark tests
 		 *  192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use
 		 *   in documentation and example code
 		 */
 		if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) ||
 		    (ia & htonl(0xffffff00)) == htonl(0xc0586300) ||
 		    (ia & htonl(0xfffffe00)) == htonl(0xc6120000) ||
 		    (ia & htonl(0xffffff00)) == htonl(0xc0000200) ||
 		    (ia & htonl(0xfffffe00)) == htonl(0xc6336400) ||
 		    (ia & htonl(0xffffff00)) == htonl(0xcb007100))
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Embed @ia IPv4 address into @ip6 IPv6 address.
  * Place to embedding determined from prefix length @plen.
  */
 void
 nat64_embed_ip4(struct in6_addr *ip6, int plen, in_addr_t ia)
 {
 
 	switch (plen) {
 	case 32:
 	case 96:
 		ip6->s6_addr32[plen / 32] = ia;
 		break;
 	case 40:
 	case 48:
 	case 56:
 		/*
 		 * Preserve prefix bits.
 		 * Since suffix bits should be zero and reserved for future
 		 * use, we just overwrite the whole word, where they are.
 		 */
 		ip6->s6_addr32[1] &= 0xffffffff << (32 - plen % 32);
 #if BYTE_ORDER == BIG_ENDIAN
 		ip6->s6_addr32[1] |= ia >> (plen % 32);
 		ip6->s6_addr32[2] = ia << (24 - plen % 32);
 #elif BYTE_ORDER == LITTLE_ENDIAN
 		ip6->s6_addr32[1] |= ia << (plen % 32);
 		ip6->s6_addr32[2] = ia >> (24 - plen % 32);
 #endif
 		break;
 	case 64:
 #if BYTE_ORDER == BIG_ENDIAN
 		ip6->s6_addr32[2] = ia >> 8;
 		ip6->s6_addr32[3] = ia << 24;
 #elif BYTE_ORDER == LITTLE_ENDIAN
 		ip6->s6_addr32[2] = ia << 8;
 		ip6->s6_addr32[3] = ia >> 24;
 #endif
 		break;
 	default:
 		panic("Wrong plen: %d", plen);
 	};
 	/*
 	 * Bits 64 to 71 of the address are reserved for compatibility
 	 * with the host identifier format defined in the IPv6 addressing
 	 * architecture [RFC4291]. These bits MUST be set to zero.
 	 */
 	ip6->s6_addr8[8] = 0;
 }
 
 in_addr_t
 nat64_extract_ip4(const struct in6_addr *ip6, int plen)
 {
 	in_addr_t ia;
 
 	/*
 	 * According to RFC 6052 p2.2:
 	 * IPv4-embedded IPv6 addresses are composed of a variable-length
 	 * prefix, the embedded IPv4 address, and a variable length suffix.
 	 * The suffix bits are reserved for future extensions and SHOULD
 	 * be set to zero.
 	 */
 	switch (plen) {
 	case 32:
 		if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0)
 			goto badip6;
 		break;
 	case 40:
 		if (ip6->s6_addr32[3] != 0 ||
 		    (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0)
 			goto badip6;
 		break;
 	case 48:
 		if (ip6->s6_addr32[3] != 0 ||
 		    (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0)
 			goto badip6;
 		break;
 	case 56:
 		if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0)
 			goto badip6;
 		break;
 	case 64:
 		if (ip6->s6_addr8[8] != 0 ||
 		    (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0)
 			goto badip6;
 	};
 	switch (plen) {
 	case 32:
 	case 96:
 		ia = ip6->s6_addr32[plen / 32];
 		break;
 	case 40:
 	case 48:
 	case 56:
 #if BYTE_ORDER == BIG_ENDIAN
 		ia = (ip6->s6_addr32[1] << (plen % 32)) |
 		    (ip6->s6_addr32[2] >> (24 - plen % 32));
 #elif BYTE_ORDER == LITTLE_ENDIAN
 		ia = (ip6->s6_addr32[1] >> (plen % 32)) |
 		    (ip6->s6_addr32[2] << (24 - plen % 32));
 #endif
 		break;
 	case 64:
 #if BYTE_ORDER == BIG_ENDIAN
 		ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24);
 #elif BYTE_ORDER == LITTLE_ENDIAN
 		ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24);
 #endif
 		break;
 	default:
 		return (0);
 	};
 	if (nat64_check_ip4(ia) == 0)
 		return (ia);
 
 	DPRINTF(DP_GENERIC | DP_DROPS,
 	    "invalid destination address: %08x", ia);
 	return (0);
 badip6:
 	DPRINTF(DP_GENERIC | DP_DROPS, "invalid IPv4-embedded IPv6 address");
 	return (0);
 }
 
 /*
  * According to RFC 1624 the equation for incremental checksum update is:
  *	HC' = ~(~HC + ~m + m')	--	[Eqn. 3]
  *	HC' = HC - ~m - m'	--	[Eqn. 4]
  * So, when we are replacing IPv4 addresses to IPv6, we
  * can assume, that new bytes previously were zeros, and vise versa -
  * when we replacing IPv6 addresses to IPv4, now unused bytes become
  * zeros. The payload length in pseudo header has bigger size, but one
  * half of it should be zero. Using the equation 4 we get:
  *	HC' = HC - (~m0 + m0')	-- m0 is first changed word
  *	HC' = (HC - (~m0 + m0')) - (~m1 + m1')	-- m1 is second changed word
  *	HC' = HC - ~m0 - m0' - ~m1 - m1' - ... =
  *	  = HC - sum(~m[i] + m'[i])
  *
  * The function result should be used as follows:
  *	IPv6 to IPv4:	HC' = cksum_add(HC, result)
  *	IPv4 to IPv6:	HC' = cksum_add(HC, ~result)
  */
 static uint16_t
 nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip)
 {
 	uint32_t sum;
 	uint16_t *p;
 
 	sum = ~ip->ip_src.s_addr >> 16;
 	sum += ~ip->ip_src.s_addr & 0xffff;
 	sum += ~ip->ip_dst.s_addr >> 16;
 	sum += ~ip->ip_dst.s_addr & 0xffff;
 
 	for (p = (uint16_t *)&ip6->ip6_src;
 	    p < (uint16_t *)(&ip6->ip6_src + 2); p++)
 		sum += *p;
 
 	while (sum >> 16)
 		sum = (sum & 0xffff) + (sum >> 16);
 	return (sum);
 }
 
 static void
 nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag,
     uint16_t plen, uint8_t proto, struct ip *ip)
 {
 
 	/* assume addresses are already initialized */
 	ip->ip_v = IPVERSION;
 	ip->ip_hl = sizeof(*ip) >> 2;
 	ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
 	ip->ip_len = htons(sizeof(*ip) + plen);
 	ip->ip_ttl = ip6->ip6_hlim;
 	if (*V_nat64ip6stealth == 0)
 		ip->ip_ttl -= IPV6_HLIMDEC;
 	ip->ip_sum = 0;
 	ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto;
 	ip_fillid(ip);
 	if (frag != NULL) {
 		ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3);
 		if (frag->ip6f_offlg & IP6F_MORE_FRAG)
 			ip->ip_off |= htons(IP_MF);
 	} else {
 		ip->ip_off = htons(IP_DF);
 	}
 	ip->ip_sum = in_cksum_hdr(ip);
 }
 
 #define	FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag))
 static NAT64NOINLINE int
 nat64_fragment6(struct nat64_counters *stats, struct ip6_hdr *ip6,
     struct mbufq *mq, struct mbuf *m, uint32_t mtu, uint16_t ip_id,
     uint16_t ip_off)
 {
 	struct ip6_frag ip6f;
 	struct mbuf *n;
 	uint16_t hlen, len, offset;
 	int plen;
 
 	plen = ntohs(ip6->ip6_plen);
 	hlen = sizeof(struct ip6_hdr);
 
 	/* Fragmentation isn't needed */
 	if (ip_off == 0 && plen <= mtu - hlen) {
 		M_PREPEND(m, hlen, M_NOWAIT);
 		if (m == NULL) {
 			NAT64STAT_INC(stats, nomem);
 			return (ENOMEM);
 		}
 		bcopy(ip6, mtod(m, void *), hlen);
 		if (mbufq_enqueue(mq, m) != 0) {
 			m_freem(m);
 			NAT64STAT_INC(stats, dropped);
 			DPRINTF(DP_DROPS, "dropped due to mbufq overflow");
 			return (ENOBUFS);
 		}
 		return (0);
 	}
 
 	hlen += sizeof(struct ip6_frag);
 	ip6f.ip6f_reserved = 0;
 	ip6f.ip6f_nxt = ip6->ip6_nxt;
 	ip6->ip6_nxt = IPPROTO_FRAGMENT;
 	if (ip_off != 0) {
 		/*
 		 * We have got an IPv4 fragment.
 		 * Use offset value and ip_id from original fragment.
 		 */
 		ip6f.ip6f_ident = htonl(ntohs(ip_id));
 		offset = (ntohs(ip_off) & IP_OFFMASK) << 3;
 		NAT64STAT_INC(stats, ifrags);
 	} else {
 		/* The packet size exceeds interface MTU */
 		ip6f.ip6f_ident = htonl(ip6_randomid());
 		offset = 0; /* First fragment*/
 	}
 	while (plen > 0 && m != NULL) {
 		n = NULL;
 		len = FRAGSZ(mtu) & ~7;
 		if (len > plen)
 			len = plen;
 		ip6->ip6_plen = htons(len + sizeof(ip6f));
 		ip6f.ip6f_offlg = ntohs(offset);
 		if (len < plen || (ip_off & htons(IP_MF)) != 0)
 			ip6f.ip6f_offlg |= IP6F_MORE_FRAG;
 		offset += len;
 		plen -= len;
 		if (plen > 0) {
 			n = m_split(m, len, M_NOWAIT);
 			if (n == NULL)
 				goto fail;
 		}
 		M_PREPEND(m, hlen, M_NOWAIT);
 		if (m == NULL)
 			goto fail;
 		bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr));
 		bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)),
 		    sizeof(struct ip6_frag));
 		if (mbufq_enqueue(mq, m) != 0)
 			goto fail;
 		m = n;
 	}
 	NAT64STAT_ADD(stats, ofrags, mbufq_len(mq));
 	return (0);
 fail:
 	if (m != NULL)
 		m_freem(m);
 	if (n != NULL)
 		m_freem(n);
 	mbufq_drain(mq);
 	NAT64STAT_INC(stats, nomem);
 	return (ENOMEM);
 }
 
-static NAT64NOINLINE int
-nat64_find_route6(struct nhop6_basic *pnh, struct sockaddr_in6 *dst,
-    struct mbuf *m)
+static struct nhop_object *
+nat64_find_route6(struct sockaddr_in6 *dst, struct mbuf *m)
 {
-
-	if (fib6_lookup_nh_basic(M_GETFIB(m), &dst->sin6_addr, 0, 0, 0,
-	    pnh) != 0)
-		return (EHOSTUNREACH);
-	if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_REJECT))
-		return (EHOSTUNREACH);
+	struct nhop_object *nh;
+	NET_EPOCH_ASSERT();
+	nh = fib6_lookup(M_GETFIB(m), &dst->sin6_addr, 0, 0, 0);
+	if (nh == NULL)
+		return NULL;
+	if (nh->nh_flags & (NHF_BLACKHOLE | NHF_REJECT))
+		return NULL;
 	/*
 	 * XXX: we need to use destination address with embedded scope
 	 * zone id, because LLTABLE uses such form of addresses for lookup.
 	 */
 	dst->sin6_family = AF_INET6;
 	dst->sin6_len = sizeof(*dst);
-	dst->sin6_addr = pnh->nh_addr;
+	dst->sin6_addr = ifatoia6(nh->nh_ifa)->ia_addr.sin6_addr;
 	if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr))
 		dst->sin6_addr.s6_addr16[1] =
-		    htons(pnh->nh_ifp->if_index & 0xffff);
+		    htons(nh->nh_ifp->if_index & 0xffff);
 	dst->sin6_port = 0;
 	dst->sin6_scope_id = 0;
 	dst->sin6_flowinfo = 0;
 
-	return (0);
+	return nh;
 }
 
 #define	NAT64_ICMP6_PLEN	64
 static NAT64NOINLINE void
 nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu,
     struct nat64_counters *stats, void *logdata)
 {
 	struct icmp6_hdr *icmp6;
 	struct ip6_hdr *ip6, *oip6;
 	struct mbuf *n;
 	int len, plen, proto;
 
 	len = 0;
 	proto = nat64_getlasthdr(m, &len);
 	if (proto < 0) {
 		DPRINTF(DP_DROPS, "mbuf isn't contigious");
 		goto freeit;
 	}
 	/*
 	 * Do not send ICMPv6 in reply to ICMPv6 errors.
 	 */
 	if (proto == IPPROTO_ICMPV6) {
 		if (m->m_len < len + sizeof(*icmp6)) {
 			DPRINTF(DP_DROPS, "mbuf isn't contigious");
 			goto freeit;
 		}
 		icmp6 = mtodo(m, len);
 		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST ||
 		    icmp6->icmp6_type == ND_REDIRECT) {
 			DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to "
 			    "ICMPv6 errors");
 			goto freeit;
 		}
 		/*
 		 * If there are extra headers between IPv6 and ICMPv6,
 		 * strip off them.
 		 */
 		if (len > sizeof(struct ip6_hdr)) {
 			/*
 			 * NOTE: ipfw_chk already did m_pullup() and it is
 			 * expected that data is contigious from the start
 			 * of IPv6 header up to the end of ICMPv6 header.
 			 */
 			bcopy(mtod(m, caddr_t),
 			    mtodo(m, len - sizeof(struct ip6_hdr)),
 			    sizeof(struct ip6_hdr));
 			m_adj(m, len - sizeof(struct ip6_hdr));
 		}
 	}
 	/*
 	if (icmp6_ratelimit(&ip6->ip6_src, type, code))
 		goto freeit;
 		*/
 	ip6 = mtod(m, struct ip6_hdr *);
 	switch (type) {
 	case ICMP6_DST_UNREACH:
 	case ICMP6_PACKET_TOO_BIG:
 	case ICMP6_TIME_EXCEEDED:
 	case ICMP6_PARAM_PROB:
 		break;
 	default:
 		goto freeit;
 	}
 	/* Calculate length of ICMPv6 payload */
 	len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN:
 	    m->m_pkthdr.len;
 
 	/* Create new ICMPv6 datagram */
 	plen = len + sizeof(struct icmp6_hdr);
 	n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT,
 	    MT_HEADER, M_PKTHDR);
 	if (n == NULL) {
 		NAT64STAT_INC(stats, nomem);
 		m_freem(m);
 		return;
 	}
 	/*
 	 * Move pkthdr from original mbuf. We should have initialized some
 	 * fields, because we can reinject this mbuf to netisr and it will
 	 * go trough input path (it requires at least rcvif should be set).
 	 * Also do M_ALIGN() to reduce chances of need to allocate new mbuf
 	 * in the chain, when we will do M_PREPEND() or make some type of
 	 * tunneling.
 	 */
 	m_move_pkthdr(n, m);
 	M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr);
 
 	n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
 	oip6 = mtod(n, struct ip6_hdr *);
 	/*
 	 * Make IPv6 source address selection for reflected datagram.
 	 * nat64_check_ip6() doesn't allow scoped addresses, therefore
 	 * we use zero scopeid.
 	 */
 	if (in6_selectsrc_addr(M_GETFIB(n), &ip6->ip6_src, 0,
 	    n->m_pkthdr.rcvif, &oip6->ip6_src, NULL) != 0) {
 		/*
 		 * Failed to find proper source address, drop the packet.
 		 */
 		m_freem(n);
 		goto freeit;
 	}
 	oip6->ip6_dst = ip6->ip6_src;
 	oip6->ip6_nxt = IPPROTO_ICMPV6;
 	oip6->ip6_flow = 0;
 	oip6->ip6_vfc |= IPV6_VERSION;
 	oip6->ip6_hlim = V_ip6_defhlim;
 	oip6->ip6_plen = htons(plen);
 
 	icmp6 = mtodo(n, sizeof(struct ip6_hdr));
 	icmp6->icmp6_cksum = 0;
 	icmp6->icmp6_type = type;
 	icmp6->icmp6_code = code;
 	icmp6->icmp6_mtu = htonl(mtu);
 
 	m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) +
 	    sizeof(struct icmp6_hdr)));
 	icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6,
 	    sizeof(struct ip6_hdr), plen);
 	m_freem(m);
 	V_nat64out->output_one(n, stats, logdata);
 	return;
 freeit:
 	NAT64STAT_INC(stats, dropped);
 	m_freem(m);
 }
 
-static NAT64NOINLINE int
-nat64_find_route4(struct nhop4_basic *pnh, struct sockaddr_in *dst,
-    struct mbuf *m)
+static struct nhop_object *
+nat64_find_route4(struct sockaddr_in *dst, struct mbuf *m)
 {
+	struct nhop_object *nh;
 
-	if (fib4_lookup_nh_basic(M_GETFIB(m), dst->sin_addr, 0, 0, pnh) != 0)
-		return (EHOSTUNREACH);
-	if (pnh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST | NHF_REJECT))
-		return (EHOSTUNREACH);
+	NET_EPOCH_ASSERT();
+	nh = fib4_lookup(M_GETFIB(m), dst->sin_addr, 0, 0, 0);
+	if (nh == NULL)
+		return NULL;
+	if (nh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST | NHF_REJECT))
+		return NULL;
 
 	dst->sin_family = AF_INET;
 	dst->sin_len = sizeof(*dst);
-	dst->sin_addr = pnh->nh_addr;
+	dst->sin_addr = IA_SIN(nh->nh_ifa)->sin_addr;
 	dst->sin_port = 0;
-	return (0);
+	return nh;
 }
 
 #define	NAT64_ICMP_PLEN	64
 static NAT64NOINLINE void
 nat64_icmp_reflect(struct mbuf *m, uint8_t type,
     uint8_t code, uint16_t mtu, struct nat64_counters *stats, void *logdata)
 {
 	struct icmp *icmp;
 	struct ip *ip, *oip;
 	struct mbuf *n;
 	int len, plen;
 
 	ip = mtod(m, struct ip *);
 	/* Do not send ICMP error if packet is not the first fragment */
 	if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) {
 		DPRINTF(DP_DROPS, "not first fragment");
 		goto freeit;
 	}
 	/* Do not send ICMP in reply to ICMP errors */
 	if (ip->ip_p == IPPROTO_ICMP) {
 		if (m->m_len < (ip->ip_hl << 2)) {
 			DPRINTF(DP_DROPS, "mbuf isn't contigious");
 			goto freeit;
 		}
 		icmp = mtodo(m, ip->ip_hl << 2);
 		if (!ICMP_INFOTYPE(icmp->icmp_type)) {
 			DPRINTF(DP_DROPS, "do not send ICMP in reply to "
 			    "ICMP errors");
 			goto freeit;
 		}
 	}
 	switch (type) {
 	case ICMP_UNREACH:
 	case ICMP_TIMXCEED:
 	case ICMP_PARAMPROB:
 		break;
 	default:
 		goto freeit;
 	}
 	/* Calculate length of ICMP payload */
 	len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8:
 	    m->m_pkthdr.len;
 
 	/* Create new ICMPv4 datagram */
 	plen = len + sizeof(struct icmphdr) + sizeof(uint32_t);
 	n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT,
 	    MT_HEADER, M_PKTHDR);
 	if (n == NULL) {
 		NAT64STAT_INC(stats, nomem);
 		m_freem(m);
 		return;
 	}
 	m_move_pkthdr(n, m);
 	M_ALIGN(n, sizeof(struct ip) + plen + max_hdr);
 
 	n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen;
 	oip = mtod(n, struct ip *);
 	oip->ip_v = IPVERSION;
 	oip->ip_hl = sizeof(struct ip) >> 2;
 	oip->ip_tos = 0;
 	oip->ip_len = htons(n->m_pkthdr.len);
 	oip->ip_ttl = V_ip_defttl;
 	oip->ip_p = IPPROTO_ICMP;
 	ip_fillid(oip);
 	oip->ip_off = htons(IP_DF);
 	oip->ip_src = ip->ip_dst;
 	oip->ip_dst = ip->ip_src;
 	oip->ip_sum = 0;
 	oip->ip_sum = in_cksum_hdr(oip);
 
 	icmp = mtodo(n, sizeof(struct ip));
 	icmp->icmp_type = type;
 	icmp->icmp_code = code;
 	icmp->icmp_cksum = 0;
 	icmp->icmp_pmvoid = 0;
 	icmp->icmp_nextmtu = htons(mtu);
 	m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) +
 	    sizeof(struct icmphdr) + sizeof(uint32_t)));
 	icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen,
 	    sizeof(struct ip));
 	m_freem(m);
 	V_nat64out->output_one(n, stats, logdata);
 	return;
 freeit:
 	NAT64STAT_INC(stats, dropped);
 	m_freem(m);
 }
 
 /* Translate ICMP echo request/reply into ICMPv6 */
 static void
 nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6,
     uint16_t id, uint8_t type)
 {
 	uint16_t old;
 
 	old = *(uint16_t *)icmp6;	/* save type+code in one word */
 	icmp6->icmp6_type = type;
 	/* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */
 	icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
 	    old, *(uint16_t *)icmp6);
 	if (id != 0) {
 		old = icmp6->icmp6_id;
 		icmp6->icmp6_id = id;
 		/* Reflect ICMP id translation in the cksum */
 		icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
 		    old, id);
 	}
 	/* Reflect IPv6 pseudo header in the cksum */
 	icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen),
 	    IPPROTO_ICMPV6, ~icmp6->icmp6_cksum);
 }
 
 static NAT64NOINLINE struct mbuf *
 nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid,
     int offset, struct nat64_config *cfg)
 {
 	struct ip ip;
 	struct icmp *icmp;
 	struct tcphdr *tcp;
 	struct udphdr *udp;
 	struct ip6_hdr *eip6;
 	struct mbuf *n;
 	uint32_t mtu;
 	int len, hlen, plen;
 	uint8_t type, code;
 
 	if (m->m_len < offset + ICMP_MINLEN)
 		m = m_pullup(m, offset + ICMP_MINLEN);
 	if (m == NULL) {
 		NAT64STAT_INC(&cfg->stats, nomem);
 		return (m);
 	}
 	mtu = 0;
 	icmp = mtodo(m, offset);
 	/* RFC 7915 p4.2 */
 	switch (icmp->icmp_type) {
 	case ICMP_ECHOREPLY:
 		type = ICMP6_ECHO_REPLY;
 		code = 0;
 		break;
 	case ICMP_UNREACH:
 		type = ICMP6_DST_UNREACH;
 		switch (icmp->icmp_code) {
 		case ICMP_UNREACH_NET:
 		case ICMP_UNREACH_HOST:
 		case ICMP_UNREACH_SRCFAIL:
 		case ICMP_UNREACH_NET_UNKNOWN:
 		case ICMP_UNREACH_HOST_UNKNOWN:
 		case ICMP_UNREACH_TOSNET:
 		case ICMP_UNREACH_TOSHOST:
 			code = ICMP6_DST_UNREACH_NOROUTE;
 			break;
 		case ICMP_UNREACH_PROTOCOL:
 			type = ICMP6_PARAM_PROB;
 			code = ICMP6_PARAMPROB_NEXTHEADER;
 			break;
 		case ICMP_UNREACH_PORT:
 			code = ICMP6_DST_UNREACH_NOPORT;
 			break;
 		case ICMP_UNREACH_NEEDFRAG:
 			type = ICMP6_PACKET_TOO_BIG;
 			code = 0;
 			/* XXX: needs an additional look */
 			mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20);
 			break;
 		case ICMP_UNREACH_NET_PROHIB:
 		case ICMP_UNREACH_HOST_PROHIB:
 		case ICMP_UNREACH_FILTER_PROHIB:
 		case ICMP_UNREACH_PRECEDENCE_CUTOFF:
 			code = ICMP6_DST_UNREACH_ADMIN;
 			break;
 		default:
 			DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
 			    icmp->icmp_type, icmp->icmp_code);
 			goto freeit;
 		}
 		break;
 	case ICMP_TIMXCEED:
 		type = ICMP6_TIME_EXCEEDED;
 		code = icmp->icmp_code;
 		break;
 	case ICMP_ECHO:
 		type = ICMP6_ECHO_REQUEST;
 		code = 0;
 		break;
 	case ICMP_PARAMPROB:
 		type = ICMP6_PARAM_PROB;
 		switch (icmp->icmp_code) {
 		case ICMP_PARAMPROB_ERRATPTR:
 		case ICMP_PARAMPROB_LENGTH:
 			code = ICMP6_PARAMPROB_HEADER;
 			switch (icmp->icmp_pptr) {
 			case 0: /* Version/IHL */
 			case 1: /* Type Of Service */
 				mtu = icmp->icmp_pptr;
 				break;
 			case 2: /* Total Length */
 			case 3: mtu = 4; /* Payload Length */
 				break;
 			case 8: /* Time to Live */
 				mtu = 7; /* Hop Limit */
 				break;
 			case 9: /* Protocol */
 				mtu = 6; /* Next Header */
 				break;
 			case 12: /* Source address */
 			case 13:
 			case 14:
 			case 15:
 				mtu = 8;
 				break;
 			case 16: /* Destination address */
 			case 17:
 			case 18:
 			case 19:
 				mtu = 24;
 				break;
 			default: /* Silently drop */
 				DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
 				    " code %d, pptr %d", icmp->icmp_type,
 				    icmp->icmp_code, icmp->icmp_pptr);
 				goto freeit;
 			}
 			break;
 		default:
 			DPRINTF(DP_DROPS, "Unsupported ICMP type %d,"
 			    " code %d, pptr %d", icmp->icmp_type,
 			    icmp->icmp_code, icmp->icmp_pptr);
 			goto freeit;
 		}
 		break;
 	default:
 		DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d",
 		    icmp->icmp_type, icmp->icmp_code);
 		goto freeit;
 	}
 	/*
 	 * For echo request/reply we can use original payload,
 	 * but we need adjust icmp_cksum, because ICMPv6 cksum covers
 	 * IPv6 pseudo header and ICMPv6 types differs from ICMPv4.
 	 */
 	if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) {
 		nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type);
 		return (m);
 	}
 	/*
 	 * For other types of ICMP messages we need to translate inner
 	 * IPv4 header to IPv6 header.
 	 * Assume ICMP src is the same as payload dst
 	 * E.g. we have ( GWsrc1 , NATIP1 ) in outer header
 	 * and          ( NATIP1, Hostdst1 ) in ICMP copy header.
 	 * In that case, we already have map for NATIP1 and GWsrc1.
 	 * The only thing we need is to copy IPv6 map prefix to
 	 * Hostdst1.
 	 */
 	hlen = offset + ICMP_MINLEN;
 	if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) {
 		DPRINTF(DP_DROPS, "Message is too short %d",
 		    m->m_pkthdr.len);
 		goto freeit;
 	}
 	m_copydata(m, hlen, sizeof(struct ip), (char *)&ip);
 	if (ip.ip_v != IPVERSION) {
 		DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v);
 		goto freeit;
 	}
 	hlen += ip.ip_hl << 2; /* Skip inner IP header */
 	if (nat64_check_ip4(ip.ip_src.s_addr) != 0 ||
 	    nat64_check_ip4(ip.ip_dst.s_addr) != 0 ||
 	    nat64_check_private_ip4(cfg, ip.ip_src.s_addr) != 0 ||
 	    nat64_check_private_ip4(cfg, ip.ip_dst.s_addr) != 0) {
 		DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x",
 		    ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr));
 		goto freeit;
 	}
 	if (m->m_pkthdr.len < hlen + ICMP_MINLEN) {
 		DPRINTF(DP_DROPS, "Message is too short %d",
 		    m->m_pkthdr.len);
 		goto freeit;
 	}
 #if 0
 	/*
 	 * Check that inner source matches the outer destination.
 	 * XXX: We need some method to convert IPv4 into IPv6 address here,
 	 *	and compare IPv6 addresses.
 	 */
 	if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) {
 		DPRINTF(DP_GENERIC, "Inner source doesn't match destination ",
 		    "%04x vs %04x", ip.ip_src.s_addr,
 		    nat64_get_ip4(&ip6->ip6_dst));
 		goto freeit;
 	}
 #endif
 	/*
 	 * Create new mbuf for ICMPv6 datagram.
 	 * NOTE: len is data length just after inner IP header.
 	 */
 	len = m->m_pkthdr.len - hlen;
 	if (sizeof(struct ip6_hdr) +
 	    sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN)
 		len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) -
 		    sizeof(struct ip6_hdr);
 	plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len;
 	n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR);
 	if (n == NULL) {
 		NAT64STAT_INC(&cfg->stats, nomem);
 		m_freem(m);
 		return (NULL);
 	}
 	m_move_pkthdr(n, m);
 	M_ALIGN(n, offset + plen + max_hdr);
 	n->m_len = n->m_pkthdr.len = offset + plen;
 	/* Adjust ip6_plen in outer header */
 	ip6->ip6_plen = htons(plen);
 	/* Construct new inner IPv6 header */
 	eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr));
 	eip6->ip6_src = ip6->ip6_dst;
 
 	/* Use the same prefix that we have in outer header */
 	eip6->ip6_dst = ip6->ip6_src;
 	MPASS(cfg->flags & NAT64_PLATPFX);
 	nat64_embed_ip4(&eip6->ip6_dst, cfg->plat_plen, ip.ip_dst.s_addr);
 
 	eip6->ip6_flow = htonl(ip.ip_tos << 20);
 	eip6->ip6_vfc |= IPV6_VERSION;
 	eip6->ip6_hlim = ip.ip_ttl;
 	eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2));
 	eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p;
 	m_copydata(m, hlen, len, (char *)(eip6 + 1));
 	/*
 	 * We need to translate source port in the inner ULP header,
 	 * and adjust ULP checksum.
 	 */
 	switch (ip.ip_p) {
 	case IPPROTO_TCP:
 		if (len < offsetof(struct tcphdr, th_sum))
 			break;
 		tcp = TCP(eip6 + 1);
 		if (icmpid != 0) {
 			tcp->th_sum = cksum_adjust(tcp->th_sum,
 			    tcp->th_sport, icmpid);
 			tcp->th_sport = icmpid;
 		}
 		tcp->th_sum = cksum_add(tcp->th_sum,
 		    ~nat64_cksum_convert(eip6, &ip));
 		break;
 	case IPPROTO_UDP:
 		if (len < offsetof(struct udphdr, uh_sum))
 			break;
 		udp = UDP(eip6 + 1);
 		if (icmpid != 0) {
 			udp->uh_sum = cksum_adjust(udp->uh_sum,
 			    udp->uh_sport, icmpid);
 			udp->uh_sport = icmpid;
 		}
 		udp->uh_sum = cksum_add(udp->uh_sum,
 		    ~nat64_cksum_convert(eip6, &ip));
 		break;
 	case IPPROTO_ICMP:
 		/*
 		 * Check if this is an ICMP error message for echo request
 		 * that we sent. I.e. ULP in the data containing invoking
 		 * packet is IPPROTO_ICMP and its type is ICMP_ECHO.
 		 */
 		icmp = (struct icmp *)(eip6 + 1);
 		if (icmp->icmp_type != ICMP_ECHO) {
 			m_freem(n);
 			goto freeit;
 		}
 		/*
 		 * For our client this original datagram should looks
 		 * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST.
 		 * Thus we need adjust icmp_cksum and convert type from
 		 * ICMP_ECHO to ICMP6_ECHO_REQUEST.
 		 */
 		nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid,
 		    ICMP6_ECHO_REQUEST);
 	}
 	m_freem(m);
 	/* Convert ICMPv4 into ICMPv6 header */
 	icmp = mtodo(n, offset);
 	ICMP6(icmp)->icmp6_type = type;
 	ICMP6(icmp)->icmp6_code = code;
 	ICMP6(icmp)->icmp6_mtu = htonl(mtu);
 	ICMP6(icmp)->icmp6_cksum = 0;
 	ICMP6(icmp)->icmp6_cksum = cksum_add(
 	    ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0),
 	    in_cksum_skip(n, n->m_pkthdr.len, offset));
 	return (n);
 freeit:
 	m_freem(m);
 	NAT64STAT_INC(&cfg->stats, dropped);
 	return (NULL);
 }
 
 int
 nat64_getlasthdr(struct mbuf *m, int *offset)
 {
 	struct ip6_hdr *ip6;
 	struct ip6_hbh *hbh;
 	int proto, hlen;
 
 	if (offset != NULL)
 		hlen = *offset;
 	else
 		hlen = 0;
 
 	if (m->m_len < hlen + sizeof(*ip6))
 		return (-1);
 
 	ip6 = mtodo(m, hlen);
 	hlen += sizeof(*ip6);
 	proto = ip6->ip6_nxt;
 	/* Skip extension headers */
 	while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
 	    proto == IPPROTO_DSTOPTS) {
 		hbh = mtodo(m, hlen);
 		/*
 		 * We expect mbuf has contigious data up to
 		 * upper level header.
 		 */
 		if (m->m_len < hlen)
 			return (-1);
 		/*
 		 * We doesn't support Jumbo payload option,
 		 * so return error.
 		 */
 		if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0)
 			return (-1);
 		proto = hbh->ip6h_nxt;
 		hlen += (hbh->ip6h_len + 1) << 3;
 	}
 	if (offset != NULL)
 		*offset = hlen;
 	return (proto);
 }
 
 int
 nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,
     struct in6_addr *daddr, uint16_t lport, struct nat64_config *cfg,
     void *logdata)
 {
-	struct nhop6_basic nh;
+	struct nhop_object *nh;
 	struct ip6_hdr ip6;
 	struct sockaddr_in6 dst;
 	struct ip *ip;
 	struct mbufq mq;
 	uint16_t ip_id, ip_off;
 	uint16_t *csum;
 	int plen, hlen;
 	uint8_t proto;
 
 	ip = mtod(m, struct ip*);
 
 	if (*V_nat64ipstealth == 0 && ip->ip_ttl <= IPTTLDEC) {
 		nat64_icmp_reflect(m, ICMP_TIMXCEED,
 		    ICMP_TIMXCEED_INTRANS, 0, &cfg->stats, logdata);
 		return (NAT64RETURN);
 	}
 
 	ip6.ip6_dst = *daddr;
 	ip6.ip6_src = *saddr;
 
 	hlen = ip->ip_hl << 2;
 	plen = ntohs(ip->ip_len) - hlen;
 	proto = ip->ip_p;
 
 	/* Save ip_id and ip_off, both are in network byte order */
 	ip_id = ip->ip_id;
 	ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF);
 
 	/* Fragment length must be multiple of 8 octets */
 	if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) {
 		nat64_icmp_reflect(m, ICMP_PARAMPROB,
 		    ICMP_PARAMPROB_LENGTH, 0, &cfg->stats, logdata);
 		return (NAT64RETURN);
 	}
 	/* Fragmented ICMP is unsupported */
 	if (proto == IPPROTO_ICMP && ip_off != 0) {
 		DPRINTF(DP_DROPS, "dropped due to fragmented ICMP");
 		NAT64STAT_INC(&cfg->stats, dropped);
 		return (NAT64MFREE);
 	}
 
 	dst.sin6_addr = ip6.ip6_dst;
-	if (nat64_find_route6(&nh, &dst, m) != 0) {
+	nh = nat64_find_route6(&dst, m);
+	if (nh == NULL) {
 		NAT64STAT_INC(&cfg->stats, noroute6);
 		nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0,
 		    &cfg->stats, logdata);
 		return (NAT64RETURN);
 	}
-	if (nh.nh_mtu < plen + sizeof(ip6) &&
+	if (nh->nh_mtu < plen + sizeof(ip6) &&
 	    (ip->ip_off & htons(IP_DF)) != 0) {
 		nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
-		    FRAGSZ(nh.nh_mtu) + sizeof(struct ip), &cfg->stats, logdata);
+		    FRAGSZ(nh->nh_mtu) + sizeof(struct ip), &cfg->stats, logdata);
 		return (NAT64RETURN);
 	}
 
 	ip6.ip6_flow = htonl(ip->ip_tos << 20);
 	ip6.ip6_vfc |= IPV6_VERSION;
 	ip6.ip6_hlim = ip->ip_ttl;
 	if (*V_nat64ipstealth == 0)
 		ip6.ip6_hlim -= IPTTLDEC;
 	ip6.ip6_plen = htons(plen);
 	ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto;
 	/* Convert checksums. */
 	switch (proto) {
 	case IPPROTO_TCP:
 		csum = &TCP(mtodo(m, hlen))->th_sum;
 		if (lport != 0) {
 			struct tcphdr *tcp = TCP(mtodo(m, hlen));
 			*csum = cksum_adjust(*csum, tcp->th_dport, lport);
 			tcp->th_dport = lport;
 		}
 		*csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
 		break;
 	case IPPROTO_UDP:
 		csum = &UDP(mtodo(m, hlen))->uh_sum;
 		if (lport != 0) {
 			struct udphdr *udp = UDP(mtodo(m, hlen));
 			*csum = cksum_adjust(*csum, udp->uh_dport, lport);
 			udp->uh_dport = lport;
 		}
 		*csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
 		break;
 	case IPPROTO_ICMP:
 		m = nat64_icmp_translate(m, &ip6, lport, hlen, cfg);
 		if (m == NULL)	/* stats already accounted */
 			return (NAT64RETURN);
 	}
 
 	m_adj(m, hlen);
 	mbufq_init(&mq, 255);
-	nat64_fragment6(&cfg->stats, &ip6, &mq, m, nh.nh_mtu, ip_id, ip_off);
+	nat64_fragment6(&cfg->stats, &ip6, &mq, m, nh->nh_mtu, ip_id, ip_off);
 	while ((m = mbufq_dequeue(&mq)) != NULL) {
-		if (V_nat64out->output(nh.nh_ifp, m, (struct sockaddr *)&dst,
+		if (V_nat64out->output(nh->nh_ifp, m, (struct sockaddr *)&dst,
 		    &cfg->stats, logdata) != 0)
 			break;
 		NAT64STAT_INC(&cfg->stats, opcnt46);
 	}
 	mbufq_drain(&mq);
 	return (NAT64RETURN);
 }
 
 int
 nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,
     struct nat64_config *cfg, void *logdata)
 {
 	struct ip ip;
 	struct icmp6_hdr *icmp6;
 	struct ip6_frag *ip6f;
 	struct ip6_hdr *ip6, *ip6i;
 	uint32_t mtu;
 	int plen, proto;
 	uint8_t type, code;
 
 	if (hlen == 0) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
 		    nat64_check_ip6(&ip6->ip6_dst) != 0)
 			return (NAT64SKIP);
 
 		proto = nat64_getlasthdr(m, &hlen);
 		if (proto != IPPROTO_ICMPV6) {
 			DPRINTF(DP_DROPS,
 			    "dropped due to mbuf isn't contigious");
 			NAT64STAT_INC(&cfg->stats, dropped);
 			return (NAT64MFREE);
 		}
 	}
 
 	/*
 	 * Translate ICMPv6 type and code to ICMPv4 (RFC7915).
 	 * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6().
 	 */
 	icmp6 = mtodo(m, hlen);
 	mtu = 0;
 	switch (icmp6->icmp6_type) {
 	case ICMP6_DST_UNREACH:
 		type = ICMP_UNREACH;
 		switch (icmp6->icmp6_code) {
 		case ICMP6_DST_UNREACH_NOROUTE:
 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
 		case ICMP6_DST_UNREACH_ADDR:
 			code = ICMP_UNREACH_HOST;
 			break;
 		case ICMP6_DST_UNREACH_ADMIN:
 			code = ICMP_UNREACH_HOST_PROHIB;
 			break;
 		case ICMP6_DST_UNREACH_NOPORT:
 			code = ICMP_UNREACH_PORT;
 			break;
 		default:
 			DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
 			    " code %d", icmp6->icmp6_type,
 			    icmp6->icmp6_code);
 			NAT64STAT_INC(&cfg->stats, dropped);
 			return (NAT64MFREE);
 		}
 		break;
 	case ICMP6_PACKET_TOO_BIG:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_NEEDFRAG;
 		mtu = ntohl(icmp6->icmp6_mtu);
 		if (mtu < IPV6_MMTU) {
 			DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d,"
 			    " code %d", mtu, icmp6->icmp6_type,
 			    icmp6->icmp6_code);
 			NAT64STAT_INC(&cfg->stats, dropped);
 			return (NAT64MFREE);
 		}
 		/*
 		 * Adjust MTU to reflect difference between
 		 * IPv6 an IPv4 headers.
 		 */
 		mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip);
 		break;
 	case ICMP6_TIME_EXCEEDED:
 		type = ICMP_TIMXCEED;
 		code = icmp6->icmp6_code;
 		break;
 	case ICMP6_PARAM_PROB:
 		switch (icmp6->icmp6_code) {
 		case ICMP6_PARAMPROB_HEADER:
 			type = ICMP_PARAMPROB;
 			code = ICMP_PARAMPROB_ERRATPTR;
 			mtu = ntohl(icmp6->icmp6_pptr);
 			switch (mtu) {
 			case 0: /* Version/Traffic Class */
 			case 1: /* Traffic Class/Flow Label */
 				break;
 			case 4: /* Payload Length */
 			case 5:
 				mtu = 2;
 				break;
 			case 6: /* Next Header */
 				mtu = 9;
 				break;
 			case 7: /* Hop Limit */
 				mtu = 8;
 				break;
 			default:
 				if (mtu >= 8 && mtu <= 23) {
 					mtu = 12; /* Source address */
 					break;
 				}
 				if (mtu >= 24 && mtu <= 39) {
 					mtu = 16; /* Destination address */
 					break;
 				}
 				DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
 				    " code %d, pptr %d", icmp6->icmp6_type,
 				    icmp6->icmp6_code, mtu);
 				NAT64STAT_INC(&cfg->stats, dropped);
 				return (NAT64MFREE);
 			}
 		case ICMP6_PARAMPROB_NEXTHEADER:
 			type = ICMP_UNREACH;
 			code = ICMP_UNREACH_PROTOCOL;
 			break;
 		default:
 			DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d,"
 			    " code %d, pptr %d", icmp6->icmp6_type,
 			    icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr));
 			NAT64STAT_INC(&cfg->stats, dropped);
 			return (NAT64MFREE);
 		}
 		break;
 	default:
 		DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d",
 		    icmp6->icmp6_type, icmp6->icmp6_code);
 		NAT64STAT_INC(&cfg->stats, dropped);
 		return (NAT64MFREE);
 	}
 
 	hlen += sizeof(struct icmp6_hdr);
 	if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {
 		NAT64STAT_INC(&cfg->stats, dropped);
 		DPRINTF(DP_DROPS, "Message is too short %d",
 		    m->m_pkthdr.len);
 		return (NAT64MFREE);
 	}
 	/*
 	 * We need at least ICMP_MINLEN bytes of original datagram payload
 	 * to generate ICMP message. It is nice that ICMP_MINLEN is equal
 	 * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment
 	 * header we will not have to do m_pullup() again.
 	 *
 	 * What we have here:
 	 * Outer header: (IPv6iGW, v4mapPRefix+v4exthost)
 	 * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport]
 	 * We need to translate it to:
 	 *
 	 * Outer header: (alias_host, v4exthost)
 	 * Inner header: (v4exthost, alias_host) [sport, alias_port]
 	 *
 	 * Assume caller function has checked if v4mapPRefix+v4host
 	 * matches configured prefix.
 	 * The only two things we should be provided with are mapping between
 	 * IPv6iHost <> alias_host and between dport and alias_port.
 	 */
 	if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)
 		m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);
 	if (m == NULL) {
 		NAT64STAT_INC(&cfg->stats, nomem);
 		return (NAT64RETURN);
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6i = mtodo(m, hlen);
 	ip6f = NULL;
 	proto = ip6i->ip6_nxt;
 	plen = ntohs(ip6i->ip6_plen);
 	hlen += sizeof(struct ip6_hdr);
 	if (proto == IPPROTO_FRAGMENT) {
 		if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) +
 		    ICMP_MINLEN)
 			goto fail;
 		ip6f = mtodo(m, hlen);
 		proto = ip6f->ip6f_nxt;
 		plen -= sizeof(struct ip6_frag);
 		hlen += sizeof(struct ip6_frag);
 		/* Ajust MTU to reflect frag header size */
 		if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG)
 			mtu -= sizeof(struct ip6_frag);
 	}
 	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
 		DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header",
 		    proto);
 		goto fail;
 	}
 	if (nat64_check_ip6(&ip6i->ip6_src) != 0 ||
 	    nat64_check_ip6(&ip6i->ip6_dst) != 0) {
 		DPRINTF(DP_DROPS, "Inner addresses do not passes the check");
 		goto fail;
 	}
 	/* Check if outer dst is the same as inner src */
 	if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) {
 		DPRINTF(DP_DROPS, "Inner src doesn't match outer dst");
 		goto fail;
 	}
 
 	/* Now we need to make a fake IPv4 packet to generate ICMP message */
 	ip.ip_dst.s_addr = aaddr;
 	ip.ip_src.s_addr = nat64_extract_ip4(&ip6i->ip6_src, cfg->plat_plen);
 	if (ip.ip_src.s_addr == 0)
 		goto fail;
 	/* XXX: Make fake ulp header */
 	if (V_nat64out == &nat64_direct) /* init_ip4hdr will decrement it */
 		ip6i->ip6_hlim += IPV6_HLIMDEC;
 	nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip);
 	m_adj(m, hlen - sizeof(struct ip));
 	bcopy(&ip, mtod(m, void *), sizeof(ip));
 	nat64_icmp_reflect(m, type, code, (uint16_t)mtu, &cfg->stats,
 	    logdata);
 	return (NAT64RETURN);
 fail:
 	/*
 	 * We must call m_freem() because mbuf pointer could be
 	 * changed with m_pullup().
 	 */
 	m_freem(m);
 	NAT64STAT_INC(&cfg->stats, dropped);
 	return (NAT64RETURN);
 }
 
 int
 nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,
     struct nat64_config *cfg, void *logdata)
 {
 	struct ip ip;
-	struct nhop4_basic nh;
+	struct nhop_object *nh;
 	struct sockaddr_in dst;
 	struct ip6_frag *frag;
 	struct ip6_hdr *ip6;
 	struct icmp6_hdr *icmp6;
 	uint16_t *csum;
 	int plen, hlen, proto;
 
 	/*
 	 * XXX: we expect ipfw_chk() did m_pullup() up to upper level
 	 * protocol's headers. Also we skip some checks, that ip6_input(),
 	 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
 	 */
 	ip6 = mtod(m, struct ip6_hdr *);
 	if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
 	    nat64_check_ip6(&ip6->ip6_dst) != 0) {
 		return (NAT64SKIP);
 	}
 
 	/* Starting from this point we must not return zero */
 	ip.ip_src.s_addr = aaddr;
 	if (nat64_check_ip4(ip.ip_src.s_addr) != 0) {
 		DPRINTF(DP_GENERIC | DP_DROPS, "invalid source address: %08x",
 		    ip.ip_src.s_addr);
 		NAT64STAT_INC(&cfg->stats, dropped);
 		return (NAT64MFREE);
 	}
 
 	ip.ip_dst.s_addr = nat64_extract_ip4(&ip6->ip6_dst, cfg->plat_plen);
 	if (ip.ip_dst.s_addr == 0) {
 		NAT64STAT_INC(&cfg->stats, dropped);
 		return (NAT64MFREE);
 	}
 
 	if (*V_nat64ip6stealth == 0 && ip6->ip6_hlim <= IPV6_HLIMDEC) {
 		nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED,
 		    ICMP6_TIME_EXCEED_TRANSIT, 0, &cfg->stats, logdata);
 		return (NAT64RETURN);
 	}
 
 	hlen = 0;
 	plen = ntohs(ip6->ip6_plen);
 	proto = nat64_getlasthdr(m, &hlen);
 	if (proto < 0) {
 		DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious");
 		NAT64STAT_INC(&cfg->stats, dropped);
 		return (NAT64MFREE);
 	}
 	frag = NULL;
 	if (proto == IPPROTO_FRAGMENT) {
 		/* ipfw_chk should m_pullup up to frag header */
 		if (m->m_len < hlen + sizeof(*frag)) {
 			DPRINTF(DP_DROPS,
 			    "dropped due to mbuf isn't contigious");
 			NAT64STAT_INC(&cfg->stats, dropped);
 			return (NAT64MFREE);
 		}
 		frag = mtodo(m, hlen);
 		proto = frag->ip6f_nxt;
 		hlen += sizeof(*frag);
 		/* Fragmented ICMPv6 is unsupported */
 		if (proto == IPPROTO_ICMPV6) {
 			DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6");
 			NAT64STAT_INC(&cfg->stats, dropped);
 			return (NAT64MFREE);
 		}
 		/* Fragment length must be multiple of 8 octets */
 		if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 &&
 		    ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) {
 			nat64_icmp6_reflect(m, ICMP6_PARAM_PROB,
 			    ICMP6_PARAMPROB_HEADER,
 			    offsetof(struct ip6_hdr, ip6_plen), &cfg->stats,
 			    logdata);
 			return (NAT64RETURN);
 		}
 	}
 	plen -= hlen - sizeof(struct ip6_hdr);
 	if (plen < 0 || m->m_pkthdr.len < plen + hlen) {
 		DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d",
 		    plen, m->m_pkthdr.len, hlen);
 		NAT64STAT_INC(&cfg->stats, dropped);
 		return (NAT64MFREE);
 	}
 
 	icmp6 = NULL;	/* Make gcc happy */
 	if (proto == IPPROTO_ICMPV6) {
 		icmp6 = mtodo(m, hlen);
 		if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST &&
 		    icmp6->icmp6_type != ICMP6_ECHO_REPLY)
 			return (nat64_handle_icmp6(m, hlen, aaddr, aport,
 			    cfg, logdata));
 	}
 	dst.sin_addr.s_addr = ip.ip_dst.s_addr;
-	if (nat64_find_route4(&nh, &dst, m) != 0) {
+	nh = nat64_find_route4(&dst, m);
+	if (nh == NULL) {
 		NAT64STAT_INC(&cfg->stats, noroute4);
 		nat64_icmp6_reflect(m, ICMP6_DST_UNREACH,
 		    ICMP6_DST_UNREACH_NOROUTE, 0, &cfg->stats, logdata);
 		return (NAT64RETURN);
 	}
-	if (nh.nh_mtu < plen + sizeof(ip)) {
-		nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, nh.nh_mtu,
+	if (nh->nh_mtu < plen + sizeof(ip)) {
+		nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, nh->nh_mtu,
 		    &cfg->stats, logdata);
 		return (NAT64RETURN);
 	}
 	nat64_init_ip4hdr(ip6, frag, plen, proto, &ip);
 	/* Convert checksums. */
 	switch (proto) {
 	case IPPROTO_TCP:
 		csum = &TCP(mtodo(m, hlen))->th_sum;
 		if (aport != 0) {
 			struct tcphdr *tcp = TCP(mtodo(m, hlen));
 			*csum = cksum_adjust(*csum, tcp->th_sport, aport);
 			tcp->th_sport = aport;
 		}
 		*csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
 		break;
 	case IPPROTO_UDP:
 		csum = &UDP(mtodo(m, hlen))->uh_sum;
 		if (aport != 0) {
 			struct udphdr *udp = UDP(mtodo(m, hlen));
 			*csum = cksum_adjust(*csum, udp->uh_sport, aport);
 			udp->uh_sport = aport;
 		}
 		*csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
 		break;
 	case IPPROTO_ICMPV6:
 		/* Checksum in ICMPv6 covers pseudo header */
 		csum = &icmp6->icmp6_cksum;
 		*csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen,
 		    IPPROTO_ICMPV6, 0));
 		/* Convert ICMPv6 types to ICMP */
 		proto = *(uint16_t *)icmp6; /* save old word for cksum_adjust */
 		if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST)
 			icmp6->icmp6_type = ICMP_ECHO;
 		else /* ICMP6_ECHO_REPLY */
 			icmp6->icmp6_type = ICMP_ECHOREPLY;
 		*csum = cksum_adjust(*csum, (uint16_t)proto,
 		    *(uint16_t *)icmp6);
 		if (aport != 0) {
 			uint16_t old_id = icmp6->icmp6_id;
 			icmp6->icmp6_id = aport;
 			*csum = cksum_adjust(*csum, old_id, aport);
 		}
 		break;
 	};
 
 	m_adj(m, hlen - sizeof(ip));
 	bcopy(&ip, mtod(m, void *), sizeof(ip));
-	if (V_nat64out->output(nh.nh_ifp, m, (struct sockaddr *)&dst,
+	if (V_nat64out->output(nh->nh_ifp, m, (struct sockaddr *)&dst,
 	    &cfg->stats, logdata) == 0)
 		NAT64STAT_INC(&cfg->stats, opcnt64);
 	return (NAT64RETURN);
 }