diff --git a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
index 072ab8bcd4e5..7e0ac4bc2927 100644
--- a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
+++ b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
@@ -1,1496 +1,1497 @@
 /*	$FreeBSD$	*/
 
 /*
  * Copyright (C) 2012 by Darren Reed.
  *
  * See the IPFILTER.LICENCE file for details on licencing.
  */
 #if !defined(lint)
 static const char sccsid[] = "@(#)ip_fil.c	2.41 6/5/96 (C) 1993-2000 Darren Reed";
 static const char rcsid[] = "@(#)$Id$";
 #endif
 
 #if defined(KERNEL) || defined(_KERNEL)
 # undef KERNEL
 # undef _KERNEL
 # define	KERNEL	1
 # define	_KERNEL	1
 #endif
 #if defined(__FreeBSD__) && \
     !defined(KLD_MODULE) && !defined(IPFILTER_LKM)
 # include "opt_inet6.h"
 #endif
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/conf.h>
 #include <sys/errno.h>
 #include <sys/types.h>
 #include <sys/file.h>
 #include <sys/fcntl.h>
 #include <sys/filio.h>
 #include <sys/time.h>
 #include <sys/systm.h>
 #include <sys/dirent.h>
 #if defined(__FreeBSD__)
 # include <sys/jail.h>
 #endif
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/sockopt.h>
 #include <sys/socket.h>
 #include <sys/selinfo.h>
 #include <netinet/tcp_var.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #include <net/vnet.h>
 #include <netinet/udp.h>
 #include <netinet/tcpip.h>
 #include <netinet/ip_icmp.h>
 #include "netinet/ip_compat.h"
 #ifdef USE_INET6
 # include <netinet/icmp6.h>
 #endif
 #include "netinet/ip_fil.h"
 #include "netinet/ip_nat.h"
 #include "netinet/ip_frag.h"
 #include "netinet/ip_state.h"
 #include "netinet/ip_proxy.h"
 #include "netinet/ip_auth.h"
 #include "netinet/ip_sync.h"
 #include "netinet/ip_lookup.h"
 #include "netinet/ip_dstlist.h"
 #ifdef	IPFILTER_SCAN
 # include "netinet/ip_scan.h"
 #endif
 #include "netinet/ip_pool.h"
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #ifdef CSUM_DATA_VALID
 # include <machine/in_cksum.h>
 #endif
 extern	int	ip_optcopy(struct ip *, struct ip *);
 
 #ifdef IPFILTER_M_IPFILTER
 MALLOC_DEFINE(M_IPFILTER, "ipfilter", "IP Filter packet filter data structures");
 #endif
 
 
 static	int	ipf_send_ip(fr_info_t *, mb_t *);
 static void	ipf_timer_func(void *arg);
 
 VNET_DEFINE(ipf_main_softc_t, ipfmain) = {
 	.ipf_running		= -2,
 };
 #define	V_ipfmain		VNET(ipfmain)
 
 #include <sys/conf.h>
 #include <net/pfil.h>
 
 VNET_DEFINE_STATIC(eventhandler_tag, ipf_arrivetag);
 VNET_DEFINE_STATIC(eventhandler_tag, ipf_departtag);
 #define	V_ipf_arrivetag		VNET(ipf_arrivetag)
 #define	V_ipf_departtag		VNET(ipf_departtag)
 #if 0
 /*
  * Disable the "cloner" event handler;  we are getting interface
  * events before the firewall is fully initiallized and also no vnet
  * information thus leading to uninitialised memory accesses.
  * In addition it is unclear why we need it in first place.
  * If it turns out to be needed, well need a dedicated event handler
  * for it to deal with the ifc and the correct vnet.
  */
 VNET_DEFINE_STATIC(eventhandler_tag, ipf_clonetag);
 #define	V_ipf_clonetag		VNET(ipf_clonetag)
 #endif
 
 static void ipf_ifevent(void *arg, struct ifnet *ifp);
 
 static void ipf_ifevent(arg, ifp)
 	void *arg;
 	struct ifnet *ifp;
 {
 
 	CURVNET_SET(ifp->if_vnet);
 	if (V_ipfmain.ipf_running > 0)
 		ipf_sync(&V_ipfmain, NULL);
 	CURVNET_RESTORE();
 }
 
 
 
 static pfil_return_t
 ipf_check_wrapper(struct mbuf **mp, struct ifnet *ifp, int flags,
     void *ruleset __unused, struct inpcb *inp)
 {
 	struct ip *ip = mtod(*mp, struct ip *);
 	pfil_return_t rv;
 
 	CURVNET_SET(ifp->if_vnet);
 	rv = ipf_check(&V_ipfmain, ip, ip->ip_hl << 2, ifp,
 	    !!(flags & PFIL_OUT), mp);
 	CURVNET_RESTORE();
 	return (rv == 0 ? PFIL_PASS : PFIL_DROPPED);
 }
 
 #ifdef USE_INET6
 static pfil_return_t
 ipf_check_wrapper6(struct mbuf **mp, struct ifnet *ifp, int flags,
     void *ruleset __unused, struct inpcb *inp)
 {
 	pfil_return_t rv;
 
 	CURVNET_SET(ifp->if_vnet);
 	rv = ipf_check(&V_ipfmain, mtod(*mp, struct ip *),
 	    sizeof(struct ip6_hdr), ifp, !!(flags & PFIL_OUT), mp);
 	CURVNET_RESTORE();
 
 	return (rv == 0 ? PFIL_PASS : PFIL_DROPPED);
 }
 # endif
 #if	defined(IPFILTER_LKM)
 int ipf_identify(s)
 	char *s;
 {
 	if (strcmp(s, "ipl") == 0)
 		return 1;
 	return 0;
 }
 #endif /* IPFILTER_LKM */
 
 
 static void
 ipf_timer_func(arg)
 	void *arg;
 {
 	ipf_main_softc_t *softc = arg;
 	SPL_INT(s);
 
 	SPL_NET(s);
 	READ_ENTER(&softc->ipf_global);
 
         if (softc->ipf_running > 0)
 		ipf_slowtimer(softc);
 
 	if (softc->ipf_running == -1 || softc->ipf_running == 1) {
 #if 0
 		softc->ipf_slow_ch = timeout(ipf_timer_func, softc, hz/2);
 #endif
 		callout_init(&softc->ipf_slow_ch, 1);
 		callout_reset(&softc->ipf_slow_ch,
 			(hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT,
 			ipf_timer_func, softc);
 	}
 	RWLOCK_EXIT(&softc->ipf_global);
 	SPL_X(s);
 }
 
 
 int
 ipfattach(softc)
 	ipf_main_softc_t *softc;
 {
 #ifdef USE_SPL
 	int s;
 #endif
 
 	SPL_NET(s);
 	if (softc->ipf_running > 0) {
 		SPL_X(s);
 		return EBUSY;
 	}
 
 	if (ipf_init_all(softc) < 0) {
 		SPL_X(s);
 		return EIO;
 	}
 
 
 	bzero((char *)V_ipfmain.ipf_selwait, sizeof(V_ipfmain.ipf_selwait));
 	softc->ipf_running = 1;
 
 	if (softc->ipf_control_forwarding & 1)
 		V_ipforwarding = 1;
 
 	SPL_X(s);
 #if 0
 	softc->ipf_slow_ch = timeout(ipf_timer_func, softc,
 				     (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT);
 #endif
 	callout_init(&softc->ipf_slow_ch, 1);
 	callout_reset(&softc->ipf_slow_ch, (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT,
 		ipf_timer_func, softc);
 	return 0;
 }
 
 
 /*
  * Disable the filter by removing the hooks from the IP input/output
  * stream.
  */
 int
 ipfdetach(softc)
 	ipf_main_softc_t *softc;
 {
 #ifdef USE_SPL
 	int s;
 #endif
 
 	if (softc->ipf_control_forwarding & 2)
 		V_ipforwarding = 0;
 
 	SPL_NET(s);
 
 #if 0
 	if (softc->ipf_slow_ch.callout != NULL)
 		untimeout(ipf_timer_func, softc, softc->ipf_slow_ch);
 	bzero(&softc->ipf_slow, sizeof(softc->ipf_slow));
 #endif
 	callout_drain(&softc->ipf_slow_ch);
 
 	ipf_fini_all(softc);
 
 	softc->ipf_running = -2;
 
 	SPL_X(s);
 
 	return 0;
 }
 
 
 /*
  * Filter ioctl interface.
  */
 int
 ipfioctl(dev, cmd, data, mode, p)
 	struct thread *p;
 #define	p_cred	td_ucred
 #define	p_uid	td_ucred->cr_ruid
 	struct cdev *dev;
 	ioctlcmd_t cmd;
 	caddr_t data;
 	int mode;
 {
 	int error = 0, unit = 0;
 	SPL_INT(s);
 
 	CURVNET_SET(TD_TO_VNET(p));
         if (securelevel_ge(p->p_cred, 3) && (mode & FWRITE))
 	{
 		V_ipfmain.ipf_interror = 130001;
 		CURVNET_RESTORE();
 		return EPERM;
 	}
 
 	unit = GET_MINOR(dev);
 	if ((IPL_LOGMAX < unit) || (unit < 0)) {
 		V_ipfmain.ipf_interror = 130002;
 		CURVNET_RESTORE();
 		return ENXIO;
 	}
 
 	if (V_ipfmain.ipf_running <= 0) {
 		if (unit != IPL_LOGIPF && cmd != SIOCIPFINTERROR) {
 			V_ipfmain.ipf_interror = 130003;
 			CURVNET_RESTORE();
 			return EIO;
 		}
 		if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
 		    cmd != SIOCIPFSET && cmd != SIOCFRENB &&
 		    cmd != SIOCGETFS && cmd != SIOCGETFF &&
 		    cmd != SIOCIPFINTERROR) {
 			V_ipfmain.ipf_interror = 130004;
 			CURVNET_RESTORE();
 			return EIO;
 		}
 	}
 
 	SPL_NET(s);
 
 	error = ipf_ioctlswitch(&V_ipfmain, unit, data, cmd, mode, p->p_uid, p);
 	CURVNET_RESTORE();
 	if (error != -1) {
 		SPL_X(s);
 		return error;
 	}
 
 	SPL_X(s);
 
 	return error;
 }
 
 
 /*
  * ipf_send_reset - this could conceivably be a call to tcp_respond(), but that
  * requires a large amount of setting up and isn't any more efficient.
  */
 int
 ipf_send_reset(fin)
 	fr_info_t *fin;
 {
 	struct tcphdr *tcp, *tcp2;
 	int tlen = 0, hlen;
 	struct mbuf *m;
 #ifdef USE_INET6
 	ip6_t *ip6;
 #endif
 	ip_t *ip;
 
 	tcp = fin->fin_dp;
 	if (tcp->th_flags & TH_RST)
 		return -1;		/* feedback loop */
 
 	if (ipf_checkl4sum(fin) == -1)
 		return -1;
 
 	tlen = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
 			((tcp->th_flags & TH_SYN) ? 1 : 0) +
 			((tcp->th_flags & TH_FIN) ? 1 : 0);
 
 #ifdef USE_INET6
 	hlen = (fin->fin_v == 6) ? sizeof(ip6_t) : sizeof(ip_t);
 #else
 	hlen = sizeof(ip_t);
 #endif
 #ifdef MGETHDR
 	MGETHDR(m, M_NOWAIT, MT_HEADER);
 #else
 	MGET(m, M_NOWAIT, MT_HEADER);
 #endif
 	if (m == NULL)
 		return -1;
 	if (sizeof(*tcp2) + hlen > MLEN) {
 		if (!(MCLGET(m, M_NOWAIT))) {
 			FREE_MB_T(m);
 			return -1;
 		}
 	}
 
 	m->m_len = sizeof(*tcp2) + hlen;
 	m->m_data += max_linkhdr;
 	m->m_pkthdr.len = m->m_len;
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 	ip = mtod(m, struct ip *);
 	bzero((char *)ip, hlen);
 #ifdef USE_INET6
 	ip6 = (ip6_t *)ip;
 #endif
 	tcp2 = (struct tcphdr *)((char *)ip + hlen);
 	tcp2->th_sport = tcp->th_dport;
 	tcp2->th_dport = tcp->th_sport;
 
 	if (tcp->th_flags & TH_ACK) {
 		tcp2->th_seq = tcp->th_ack;
 		tcp2->th_flags = TH_RST;
 		tcp2->th_ack = 0;
 	} else {
 		tcp2->th_seq = 0;
 		tcp2->th_ack = ntohl(tcp->th_seq);
 		tcp2->th_ack += tlen;
 		tcp2->th_ack = htonl(tcp2->th_ack);
 		tcp2->th_flags = TH_RST|TH_ACK;
 	}
 	TCP_X2_A(tcp2, 0);
 	TCP_OFF_A(tcp2, sizeof(*tcp2) >> 2);
 	tcp2->th_win = tcp->th_win;
 	tcp2->th_sum = 0;
 	tcp2->th_urp = 0;
 
 #ifdef USE_INET6
 	if (fin->fin_v == 6) {
 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
 		ip6->ip6_plen = htons(sizeof(struct tcphdr));
 		ip6->ip6_nxt = IPPROTO_TCP;
 		ip6->ip6_hlim = 0;
 		ip6->ip6_src = fin->fin_dst6.in6;
 		ip6->ip6_dst = fin->fin_src6.in6;
 		tcp2->th_sum = in6_cksum(m, IPPROTO_TCP,
 					 sizeof(*ip6), sizeof(*tcp2));
 		return ipf_send_ip(fin, m);
 	}
 #endif
 	ip->ip_p = IPPROTO_TCP;
 	ip->ip_len = htons(sizeof(struct tcphdr));
 	ip->ip_src.s_addr = fin->fin_daddr;
 	ip->ip_dst.s_addr = fin->fin_saddr;
 	tcp2->th_sum = in_cksum(m, hlen + sizeof(*tcp2));
 	ip->ip_len = htons(hlen + sizeof(*tcp2));
 	return ipf_send_ip(fin, m);
 }
 
 
 /*
  * ip_len must be in network byte order when called.
  */
 static int
 ipf_send_ip(fin, m)
 	fr_info_t *fin;
 	mb_t *m;
 {
 	fr_info_t fnew;
 	ip_t *ip, *oip;
 	int hlen;
 
 	ip = mtod(m, ip_t *);
 	bzero((char *)&fnew, sizeof(fnew));
 	fnew.fin_main_soft = fin->fin_main_soft;
 
 	IP_V_A(ip, fin->fin_v);
 	switch (fin->fin_v)
 	{
 	case 4 :
 		oip = fin->fin_ip;
 		hlen = sizeof(*oip);
 		fnew.fin_v = 4;
 		fnew.fin_p = ip->ip_p;
 		fnew.fin_plen = ntohs(ip->ip_len);
 		IP_HL_A(ip, sizeof(*oip) >> 2);
 		ip->ip_tos = oip->ip_tos;
 		ip->ip_id = fin->fin_ip->ip_id;
 		ip->ip_off = htons(V_path_mtu_discovery ? IP_DF : 0);
 		ip->ip_ttl = V_ip_defttl;
 		ip->ip_sum = 0;
 		break;
 #ifdef USE_INET6
 	case 6 :
 	{
 		ip6_t *ip6 = (ip6_t *)ip;
 
 		ip6->ip6_vfc = 0x60;
 		ip6->ip6_hlim = IPDEFTTL;
 
 		hlen = sizeof(*ip6);
 		fnew.fin_p = ip6->ip6_nxt;
 		fnew.fin_v = 6;
 		fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
 		break;
 	}
 #endif
 	default :
 		return EINVAL;
 	}
 #ifdef IPSEC_SUPPORT
 	m->m_pkthdr.rcvif = NULL;
 #endif
 
 	fnew.fin_ifp = fin->fin_ifp;
 	fnew.fin_flx = FI_NOCKSUM;
 	fnew.fin_m = m;
 	fnew.fin_ip = ip;
 	fnew.fin_mp = &m;
 	fnew.fin_hlen = hlen;
 	fnew.fin_dp = (char *)ip + hlen;
 	(void) ipf_makefrip(hlen, ip, &fnew);
 
 	return ipf_fastroute(m, &m, &fnew, NULL);
 }
 
 
 int
 ipf_send_icmp_err(type, fin, dst)
 	int type;
 	fr_info_t *fin;
 	int dst;
 {
 	int err, hlen, xtra, iclen, ohlen, avail, code;
 	struct in_addr dst4;
 	struct icmp *icmp;
 	struct mbuf *m;
 	i6addr_t dst6;
 	void *ifp;
 #ifdef USE_INET6
 	ip6_t *ip6;
 #endif
 	ip_t *ip, *ip2;
 
 	if ((type < 0) || (type >= ICMP_MAXTYPE))
 		return -1;
 
 	code = fin->fin_icode;
 #ifdef USE_INET6
 	/* See NetBSD ip_fil_netbsd.c r1.4: */
 	if ((code < 0) || (code >= sizeof(icmptoicmp6unreach)/sizeof(int)))
 		return -1;
 #endif
 
 	if (ipf_checkl4sum(fin) == -1)
 		return -1;
 #ifdef MGETHDR
 	MGETHDR(m, M_NOWAIT, MT_HEADER);
 #else
 	MGET(m, M_NOWAIT, MT_HEADER);
 #endif
 	if (m == NULL)
 		return -1;
 	avail = MHLEN;
 
 	xtra = 0;
 	hlen = 0;
 	ohlen = 0;
 	dst4.s_addr = 0;
 	ifp = fin->fin_ifp;
 	if (fin->fin_v == 4) {
 		if ((fin->fin_p == IPPROTO_ICMP) && !(fin->fin_flx & FI_SHORT))
 			switch (ntohs(fin->fin_data[0]) >> 8)
 			{
 			case ICMP_ECHO :
 			case ICMP_TSTAMP :
 			case ICMP_IREQ :
 			case ICMP_MASKREQ :
 				break;
 			default :
 				FREE_MB_T(m);
 				return 0;
 			}
 
 		if (dst == 0) {
 			if (ipf_ifpaddr(&V_ipfmain, 4, FRI_NORMAL, ifp,
 					&dst6, NULL) == -1) {
 				FREE_MB_T(m);
 				return -1;
 			}
 			dst4 = dst6.in4;
 		} else
 			dst4.s_addr = fin->fin_daddr;
 
 		hlen = sizeof(ip_t);
 		ohlen = fin->fin_hlen;
 		iclen = hlen + offsetof(struct icmp, icmp_ip) + ohlen;
 		if (fin->fin_hlen < fin->fin_plen)
 			xtra = MIN(fin->fin_dlen, 8);
 		else
 			xtra = 0;
 	}
 
 #ifdef USE_INET6
 	else if (fin->fin_v == 6) {
 		hlen = sizeof(ip6_t);
 		ohlen = sizeof(ip6_t);
 		iclen = hlen + offsetof(struct icmp, icmp_ip) + ohlen;
 		type = icmptoicmp6types[type];
 		if (type == ICMP6_DST_UNREACH)
 			code = icmptoicmp6unreach[code];
 
 		if (iclen + max_linkhdr + fin->fin_plen > avail) {
 			if (!(MCLGET(m, M_NOWAIT))) {
 				FREE_MB_T(m);
 				return -1;
 			}
 			avail = MCLBYTES;
 		}
 		xtra = MIN(fin->fin_plen, avail - iclen - max_linkhdr);
 		xtra = MIN(xtra, IPV6_MMTU - iclen);
 		if (dst == 0) {
 			if (ipf_ifpaddr(&V_ipfmain, 6, FRI_NORMAL, ifp,
 					&dst6, NULL) == -1) {
 				FREE_MB_T(m);
 				return -1;
 			}
 		} else
 			dst6 = fin->fin_dst6;
 	}
 #endif
 	else {
 		FREE_MB_T(m);
 		return -1;
 	}
 
 	avail -= (max_linkhdr + iclen);
 	if (avail < 0) {
 		FREE_MB_T(m);
 		return -1;
 	}
 	if (xtra > avail)
 		xtra = avail;
 	iclen += xtra;
 	m->m_data += max_linkhdr;
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 	m->m_pkthdr.len = iclen;
 	m->m_len = iclen;
 	ip = mtod(m, ip_t *);
 	icmp = (struct icmp *)((char *)ip + hlen);
 	ip2 = (ip_t *)&icmp->icmp_ip;
 
 	icmp->icmp_type = type;
 	icmp->icmp_code = fin->fin_icode;
 	icmp->icmp_cksum = 0;
 #ifdef icmp_nextmtu
 	if (type == ICMP_UNREACH && fin->fin_icode == ICMP_UNREACH_NEEDFRAG) {
 		if (fin->fin_mtu != 0) {
 			icmp->icmp_nextmtu = htons(fin->fin_mtu);
 
 		} else if (ifp != NULL) {
 			icmp->icmp_nextmtu = htons(GETIFMTU_4(ifp));
 
 		} else {	/* make up a number... */
 			icmp->icmp_nextmtu = htons(fin->fin_plen - 20);
 		}
 	}
 #endif
 
 	bcopy((char *)fin->fin_ip, (char *)ip2, ohlen);
 
 #ifdef USE_INET6
 	ip6 = (ip6_t *)ip;
 	if (fin->fin_v == 6) {
 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
 		ip6->ip6_plen = htons(iclen - hlen);
 		ip6->ip6_nxt = IPPROTO_ICMPV6;
 		ip6->ip6_hlim = 0;
 		ip6->ip6_src = dst6.in6;
 		ip6->ip6_dst = fin->fin_src6.in6;
 		if (xtra > 0)
 			bcopy((char *)fin->fin_ip + ohlen,
 			      (char *)&icmp->icmp_ip + ohlen, xtra);
 		icmp->icmp_cksum = in6_cksum(m, IPPROTO_ICMPV6,
 					     sizeof(*ip6), iclen - hlen);
 	} else
 #endif
 	{
 		ip->ip_p = IPPROTO_ICMP;
 		ip->ip_src.s_addr = dst4.s_addr;
 		ip->ip_dst.s_addr = fin->fin_saddr;
 
 		if (xtra > 0)
 			bcopy((char *)fin->fin_ip + ohlen,
 			      (char *)&icmp->icmp_ip + ohlen, xtra);
 		icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
 					     sizeof(*icmp) + 8);
 		ip->ip_len = htons(iclen);
 		ip->ip_p = IPPROTO_ICMP;
 	}
 	err = ipf_send_ip(fin, m);
 	return err;
 }
 
 
 
 
 /*
  * m0 - pointer to mbuf where the IP packet starts
  * mpp - pointer to the mbuf pointer that is the start of the mbuf chain
  */
 int
 ipf_fastroute(m0, mpp, fin, fdp)
 	mb_t *m0, **mpp;
 	fr_info_t *fin;
 	frdest_t *fdp;
 {
 	register struct ip *ip, *mhip;
 	register struct mbuf *m = *mpp;
 	int len, off, error = 0, hlen, code;
 	struct ifnet *ifp, *sifp;
-	struct sockaddr_in dst;
+	struct route ro;
+	struct sockaddr_in *dst;
+	const struct sockaddr *gw;
 	struct nhop_object *nh;
 	u_long fibnum = 0;
 	u_short ip_off;
 	frdest_t node;
 	frentry_t *fr;
 
 #ifdef M_WRITABLE
 	/*
 	* HOT FIX/KLUDGE:
 	*
 	* If the mbuf we're about to send is not writable (because of
 	* a cluster reference, for example) we'll need to make a copy
 	* of it since this routine modifies the contents.
 	*
 	* If you have non-crappy network hardware that can transmit data
 	* from the mbuf, rather than making a copy, this is gonna be a
 	* problem.
 	*/
 	if (M_WRITABLE(m) == 0) {
 		m0 = m_dup(m, M_NOWAIT);
 		if (m0 != NULL) {
 			FREE_MB_T(m);
 			m = m0;
 			*mpp = m;
 		} else {
 			error = ENOBUFS;
 			FREE_MB_T(m);
 			goto done;
 		}
 	}
 #endif
 
 #ifdef USE_INET6
 	if (fin->fin_v == 6) {
 		/*
 		 * currently "to <if>" and "to <if>:ip#" are not supported
 		 * for IPv6
 		 */
 		return ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
 	}
 #endif
 
 	hlen = fin->fin_hlen;
 	ip = mtod(m0, struct ip *);
 	ifp = NULL;
 
 	/*
 	 * Route packet.
 	 */
-	bzero(&dst, sizeof (dst));
-	dst.sin_family = AF_INET;
-	dst.sin_addr = ip->ip_dst;
-	dst.sin_len = sizeof(dst);
+	bzero(&ro, sizeof (ro));
+	dst = (struct sockaddr_in *)&ro.ro_dst;
+	dst->sin_family = AF_INET;
+	dst->sin_addr = ip->ip_dst;
+	dst->sin_len = sizeof(dst);
+	gw = (const struct sockaddr *)dst;
 
 	fr = fin->fin_fr;
 	if ((fr != NULL) && !(fr->fr_flags & FR_KEEPSTATE) && (fdp != NULL) &&
 	    (fdp->fd_type == FRD_DSTLIST)) {
 		if (ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL, &node) == 0)
 			fdp = &node;
 	}
 
 	if (fdp != NULL)
 		ifp = fdp->fd_ptr;
 	else
 		ifp = fin->fin_ifp;
 
 	if ((ifp == NULL) && ((fr == NULL) || !(fr->fr_flags & FR_FASTROUTE))) {
 		error = -2;
 		goto bad;
 	}
 
 	if ((fdp != NULL) && (fdp->fd_ip.s_addr != 0))
-		dst.sin_addr = fdp->fd_ip;
+		dst->sin_addr = fdp->fd_ip;
 
 	fibnum = M_GETFIB(m0);
 	NET_EPOCH_ASSERT();
-	nh = fib4_lookup(fibnum, dst.sin_addr, 0, NHR_NONE, 0);
+	nh = fib4_lookup(fibnum, dst->sin_addr, 0, NHR_NONE, 0);
 	if (nh == NULL) {
 		if (in_localaddr(ip->ip_dst))
 			error = EHOSTUNREACH;
 		else
 			error = ENETUNREACH;
 		goto bad;
 	}
 
 	if (ifp == NULL)
 		ifp = nh->nh_ifp;
-	if (nh->nh_flags & NHF_GATEWAY)
-		dst.sin_addr = nh->gw4_sa.sin_addr;
+	if (nh->nh_flags & NHF_GATEWAY) {
+		gw = &nh->gw_sa;
+		ro.ro_flags |= RT_HAS_GW;
+	}
 
 	/*
 	 * For input packets which are being "fastrouted", they won't
 	 * go back through output filtering and miss their chance to get
 	 * NAT'd and counted.  Duplicated packets aren't considered to be
 	 * part of the normal packet stream, so do not NAT them or pass
 	 * them through stateful checking, etc.
 	 */
 	if ((fdp != &fr->fr_dif) && (fin->fin_out == 0)) {
 		sifp = fin->fin_ifp;
 		fin->fin_ifp = ifp;
 		fin->fin_out = 1;
 		(void) ipf_acctpkt(fin, NULL);
 		fin->fin_fr = NULL;
 		if (!fr || !(fr->fr_flags & FR_RETMASK)) {
 			u_32_t pass;
 
 			(void) ipf_state_check(fin, &pass);
 		}
 
 		switch (ipf_nat_checkout(fin, NULL))
 		{
 		case 0 :
 			break;
 		case 1 :
 			ip->ip_sum = 0;
 			break;
 		case -1 :
 			error = -1;
 			goto bad;
 			break;
 		}
 
 		fin->fin_ifp = sifp;
 		fin->fin_out = 0;
 	} else
 		ip->ip_sum = 0;
 	/*
 	 * If small enough for interface, can just send directly.
 	 */
 	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
 		if (!ip->ip_sum)
 			ip->ip_sum = in_cksum(m, hlen);
-		error = (*ifp->if_output)(ifp, m, (struct sockaddr *)&dst,
-			    NULL
-			);
+		error = (*ifp->if_output)(ifp, m, gw, &ro);
 		goto done;
 	}
 	/*
 	 * Too large for interface; fragment if possible.
 	 * Must be able to put at least 8 bytes per fragment.
 	 */
 	ip_off = ntohs(ip->ip_off);
 	if (ip_off & IP_DF) {
 		error = EMSGSIZE;
 		goto bad;
 	}
 	len = (ifp->if_mtu - hlen) &~ 7;
 	if (len < 8) {
 		error = EMSGSIZE;
 		goto bad;
 	}
 
     {
 	int mhlen, firstlen = len;
 	struct mbuf **mnext = &m->m_act;
 
 	/*
 	 * Loop through length of segment after first fragment,
 	 * make new header and copy data of each part and link onto chain.
 	 */
 	m0 = m;
 	mhlen = sizeof (struct ip);
 	for (off = hlen + len; off < ntohs(ip->ip_len); off += len) {
 #ifdef MGETHDR
 		MGETHDR(m, M_NOWAIT, MT_HEADER);
 #else
 		MGET(m, M_NOWAIT, MT_HEADER);
 #endif
 		if (m == NULL) {
 			m = m0;
 			error = ENOBUFS;
 			goto bad;
 		}
 		m->m_data += max_linkhdr;
 		mhip = mtod(m, struct ip *);
 		bcopy((char *)ip, (char *)mhip, sizeof(*ip));
 		if (hlen > sizeof (struct ip)) {
 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
 			IP_HL_A(mhip, mhlen >> 2);
 		}
 		m->m_len = mhlen;
 		mhip->ip_off = ((off - hlen) >> 3) + ip_off;
 		if (off + len >= ntohs(ip->ip_len))
 			len = ntohs(ip->ip_len) - off;
 		else
 			mhip->ip_off |= IP_MF;
 		mhip->ip_len = htons((u_short)(len + mhlen));
 		*mnext = m;
 		m->m_next = m_copym(m0, off, len, M_NOWAIT);
 		if (m->m_next == 0) {
 			error = ENOBUFS;	/* ??? */
 			goto sendorfree;
 		}
 		m->m_pkthdr.len = mhlen + len;
 		m->m_pkthdr.rcvif = NULL;
 		mhip->ip_off = htons((u_short)mhip->ip_off);
 		mhip->ip_sum = 0;
 		mhip->ip_sum = in_cksum(m, mhlen);
 		mnext = &m->m_act;
 	}
 	/*
 	 * Update first fragment by trimming what's been copied out
 	 * and updating header, then send each fragment (in order).
 	 */
 	m_adj(m0, hlen + firstlen - ip->ip_len);
 	ip->ip_len = htons((u_short)(hlen + firstlen));
 	ip->ip_off = htons((u_short)IP_MF);
 	ip->ip_sum = 0;
 	ip->ip_sum = in_cksum(m0, hlen);
 sendorfree:
 	for (m = m0; m; m = m0) {
 		m0 = m->m_act;
 		m->m_act = 0;
 		if (error == 0)
-			error = (*ifp->if_output)(ifp, m,
-			    (struct sockaddr *)&dst,
-			    NULL
-			    );
+			error = (*ifp->if_output)(ifp, m, gw, &ro);
 		else
 			FREE_MB_T(m);
 	}
     }
 done:
 	if (!error)
 		V_ipfmain.ipf_frouteok[0]++;
 	else
 		V_ipfmain.ipf_frouteok[1]++;
 
 	return 0;
 bad:
 	if (error == EMSGSIZE) {
 		sifp = fin->fin_ifp;
 		code = fin->fin_icode;
 		fin->fin_icode = ICMP_UNREACH_NEEDFRAG;
 		fin->fin_ifp = ifp;
 		(void) ipf_send_icmp_err(ICMP_UNREACH, fin, 1);
 		fin->fin_ifp = sifp;
 		fin->fin_icode = code;
 	}
 	FREE_MB_T(m);
 	goto done;
 }
 
 
 int
 ipf_verifysrc(fin)
 	fr_info_t *fin;
 {
 	struct nhop_object *nh;
 
 	NET_EPOCH_ASSERT();
 	nh = fib4_lookup(RT_DEFAULT_FIB, fin->fin_src, 0, NHR_NONE, 0);
 	if (nh == NULL)
 		return (0);
 	return (fin->fin_ifp == nh->nh_ifp);
 }
 
 
 /*
  * return the first IP Address associated with an interface
  */
 int
 ipf_ifpaddr(softc, v, atype, ifptr, inp, inpmask)
 	ipf_main_softc_t *softc;
 	int v, atype;
 	void *ifptr;
 	i6addr_t *inp, *inpmask;
 {
 #ifdef USE_INET6
 	struct in6_addr *ia6 = NULL;
 #endif
 	struct sockaddr *sock, *mask;
 	struct sockaddr_in *sin;
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 
 	if ((ifptr == NULL) || (ifptr == (void *)-1))
 		return -1;
 
 	sin = NULL;
 	ifp = ifptr;
 
 	if (v == 4)
 		inp->in4.s_addr = 0;
 #ifdef USE_INET6
 	else if (v == 6)
 		bzero((char *)inp, sizeof(*inp));
 #endif
 	ifa = CK_STAILQ_FIRST(&ifp->if_addrhead);
 
 	sock = ifa->ifa_addr;
 	while (sock != NULL && ifa != NULL) {
 		sin = (struct sockaddr_in *)sock;
 		if ((v == 4) && (sin->sin_family == AF_INET))
 			break;
 #ifdef USE_INET6
 		if ((v == 6) && (sin->sin_family == AF_INET6)) {
 			ia6 = &((struct sockaddr_in6 *)sin)->sin6_addr;
 			if (!IN6_IS_ADDR_LINKLOCAL(ia6) &&
 			    !IN6_IS_ADDR_LOOPBACK(ia6))
 				break;
 		}
 #endif
 		ifa = CK_STAILQ_NEXT(ifa, ifa_link);
 		if (ifa != NULL)
 			sock = ifa->ifa_addr;
 	}
 
 	if (ifa == NULL || sin == NULL)
 		return -1;
 
 	mask = ifa->ifa_netmask;
 	if (atype == FRI_BROADCAST)
 		sock = ifa->ifa_broadaddr;
 	else if (atype == FRI_PEERADDR)
 		sock = ifa->ifa_dstaddr;
 
 	if (sock == NULL)
 		return -1;
 
 #ifdef USE_INET6
 	if (v == 6) {
 		return ipf_ifpfillv6addr(atype, (struct sockaddr_in6 *)sock,
 					 (struct sockaddr_in6 *)mask,
 					 inp, inpmask);
 	}
 #endif
 	return ipf_ifpfillv4addr(atype, (struct sockaddr_in *)sock,
 				 (struct sockaddr_in *)mask,
 				 &inp->in4, &inpmask->in4);
 }
 
 
 u_32_t
 ipf_newisn(fin)
 	fr_info_t *fin;
 {
 	u_32_t newiss;
 	newiss = arc4random();
 	return newiss;
 }
 
 
 INLINE int
 ipf_checkv4sum(fin)
 	fr_info_t *fin;
 {
 #ifdef CSUM_DATA_VALID
 	int manual = 0;
 	u_short sum;
 	ip_t *ip;
 	mb_t *m;
 
 	if ((fin->fin_flx & FI_NOCKSUM) != 0)
 		return 0;
 
 	if ((fin->fin_flx & FI_SHORT) != 0)
 		return 1;
 
 	if (fin->fin_cksum != FI_CK_NEEDED)
 		return (fin->fin_cksum > FI_CK_NEEDED) ? 0 : -1;
 
 	m = fin->fin_m;
 	if (m == NULL) {
 		manual = 1;
 		goto skipauto;
 	}
 	ip = fin->fin_ip;
 
 	if ((m->m_pkthdr.csum_flags & (CSUM_IP_CHECKED|CSUM_IP_VALID)) ==
 	    CSUM_IP_CHECKED) {
 		fin->fin_cksum = FI_CK_BAD;
 		fin->fin_flx |= FI_BAD;
 		DT2(ipf_fi_bad_checkv4sum_csum_ip_checked, fr_info_t *, fin, u_int, m->m_pkthdr.csum_flags & (CSUM_IP_CHECKED|CSUM_IP_VALID));
 		return -1;
 	}
 	if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 		/* Depending on the driver, UDP may have zero checksum */
 		if (fin->fin_p == IPPROTO_UDP && (fin->fin_flx &
 		    (FI_FRAG|FI_SHORT|FI_BAD)) == 0) {
 			udphdr_t *udp = fin->fin_dp;
 			if (udp->uh_sum == 0) {
 				/*
 				 * we're good no matter what the hardware
 				 * checksum flags and csum_data say (handling
 				 * of csum_data for zero UDP checksum is not
 				 * consistent across all drivers)
 				 */
 				fin->fin_cksum = 1;
 				return 0;
 			}
 		}
 
 		if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 			sum = m->m_pkthdr.csum_data;
 		else
 			sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 					htonl(m->m_pkthdr.csum_data +
 					fin->fin_dlen + fin->fin_p));
 		sum ^= 0xffff;
 		if (sum != 0) {
 			fin->fin_cksum = FI_CK_BAD;
 			fin->fin_flx |= FI_BAD;
 			DT2(ipf_fi_bad_checkv4sum_sum, fr_info_t *, fin, u_int, sum);
 		} else {
 			fin->fin_cksum = FI_CK_SUMOK;
 			return 0;
 		}
 	} else {
 		if (m->m_pkthdr.csum_flags == CSUM_DELAY_DATA) {
 			fin->fin_cksum = FI_CK_L4FULL;
 			return 0;
 		} else if (m->m_pkthdr.csum_flags == CSUM_TCP ||
 			   m->m_pkthdr.csum_flags == CSUM_UDP) {
 			fin->fin_cksum = FI_CK_L4PART;
 			return 0;
 		} else if (m->m_pkthdr.csum_flags == CSUM_IP) {
 			fin->fin_cksum = FI_CK_L4PART;
 			return 0;
 		} else {
 			manual = 1;
 		}
 	}
 skipauto:
 	if (manual != 0) {
 		if (ipf_checkl4sum(fin) == -1) {
 			fin->fin_flx |= FI_BAD;
 			DT2(ipf_fi_bad_checkv4sum_manual, fr_info_t *, fin, u_int, manual);
 			return -1;
 		}
 	}
 #else
 	if (ipf_checkl4sum(fin) == -1) {
 		fin->fin_flx |= FI_BAD;
 		DT2(ipf_fi_bad_checkv4sum_checkl4sum, fr_info_t *, fin, u_int, -1);
 		return -1;
 	}
 #endif
 	return 0;
 }
 
 
 #ifdef USE_INET6
 INLINE int
 ipf_checkv6sum(fin)
 	fr_info_t *fin;
 {
 	if ((fin->fin_flx & FI_NOCKSUM) != 0) {
 		DT(ipf_checkv6sum_fi_nocksum);
 		return 0;
 	}
 
 	if ((fin->fin_flx & FI_SHORT) != 0) {
 		DT(ipf_checkv6sum_fi_short);
 		return 1;
 	}
 
 	if (fin->fin_cksum != FI_CK_NEEDED) {
 		DT(ipf_checkv6sum_fi_ck_needed);
 		return (fin->fin_cksum > FI_CK_NEEDED) ? 0 : -1;
 	}
 
 	if (ipf_checkl4sum(fin) == -1) {
 		fin->fin_flx |= FI_BAD;
 		DT2(ipf_fi_bad_checkv6sum_checkl4sum, fr_info_t *, fin, u_int, -1);
 		return -1;
 	}
 	return 0;
 }
 #endif /* USE_INET6 */
 
 
 size_t
 mbufchainlen(m0)
 	struct mbuf *m0;
 {
 	size_t len;
 
 	if ((m0->m_flags & M_PKTHDR) != 0) {
 		len = m0->m_pkthdr.len;
 	} else {
 		struct mbuf *m;
 
 		for (m = m0, len = 0; m != NULL; m = m->m_next)
 			len += m->m_len;
 	}
 	return len;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pullup                                                  */
 /* Returns:     NULL == pullup failed, else pointer to protocol header      */
 /* Parameters:  xmin(I)- pointer to buffer where data packet starts         */
 /*              fin(I) - pointer to packet information                      */
 /*              len(I) - number of bytes to pullup                          */
 /*                                                                          */
 /* Attempt to move at least len bytes (from the start of the buffer) into a */
 /* single buffer for ease of access.  Operating system native functions are */
 /* used to manage buffers - if necessary.  If the entire packet ends up in  */
 /* a single buffer, set the FI_COALESCE flag even though ipf_coalesce() has */
 /* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
 /* and ONLY if the pullup succeeds.                                         */
 /*                                                                          */
 /* We assume that 'xmin' is a pointer to a buffer that is part of the chain */
 /* of buffers that starts at *fin->fin_mp.                                  */
 /* ------------------------------------------------------------------------ */
 void *
 ipf_pullup(xmin, fin, len)
 	mb_t *xmin;
 	fr_info_t *fin;
 	int len;
 {
 	int dpoff, ipoff;
 	mb_t *m = xmin;
 	char *ip;
 
 	if (m == NULL)
 		return NULL;
 
 	ip = (char *)fin->fin_ip;
 	if ((fin->fin_flx & FI_COALESCE) != 0)
 		return ip;
 
 	ipoff = fin->fin_ipoff;
 	if (fin->fin_dp != NULL)
 		dpoff = (char *)fin->fin_dp - (char *)ip;
 	else
 		dpoff = 0;
 
 	if (M_LEN(m) < len) {
 		mb_t *n = *fin->fin_mp;
 		/*
 		 * Assume that M_PKTHDR is set and just work with what is left
 		 * rather than check..
 		 * Should not make any real difference, anyway.
 		 */
 		if (m != n) {
 			/*
 			 * Record the mbuf that points to the mbuf that we're
 			 * about to go to work on so that we can update the
 			 * m_next appropriately later.
 			 */
 			for (; n->m_next != m; n = n->m_next)
 				;
 		} else {
 			n = NULL;
 		}
 
 #ifdef MHLEN
 		if (len > MHLEN)
 #else
 		if (len > MLEN)
 #endif
 		{
 #ifdef HAVE_M_PULLDOWN
 			if (m_pulldown(m, 0, len, NULL) == NULL)
 				m = NULL;
 #else
 			FREE_MB_T(*fin->fin_mp);
 			m = NULL;
 			n = NULL;
 #endif
 		} else
 		{
 			m = m_pullup(m, len);
 		}
 		if (n != NULL)
 			n->m_next = m;
 		if (m == NULL) {
 			/*
 			 * When n is non-NULL, it indicates that m pointed to
 			 * a sub-chain (tail) of the mbuf and that the head
 			 * of this chain has not yet been free'd.
 			 */
 			if (n != NULL) {
 				FREE_MB_T(*fin->fin_mp);
 			}
 
 			*fin->fin_mp = NULL;
 			fin->fin_m = NULL;
 			return NULL;
 		}
 
 		if (n == NULL)
 			*fin->fin_mp = m;
 
 		while (M_LEN(m) == 0) {
 			m = m->m_next;
 		}
 		fin->fin_m = m;
 		ip = MTOD(m, char *) + ipoff;
 
 		fin->fin_ip = (ip_t *)ip;
 		if (fin->fin_dp != NULL)
 			fin->fin_dp = (char *)fin->fin_ip + dpoff;
 		if (fin->fin_fraghdr != NULL)
 			fin->fin_fraghdr = (char *)ip +
 					   ((char *)fin->fin_fraghdr -
 					    (char *)fin->fin_ip);
 	}
 
 	if (len == fin->fin_plen)
 		fin->fin_flx |= FI_COALESCE;
 	return ip;
 }
 
 
 int
 ipf_inject(fin, m)
 	fr_info_t *fin;
 	mb_t *m;
 {
 	struct epoch_tracker et;
 	int error = 0;
 
 	NET_EPOCH_ENTER(et);
 	if (fin->fin_out == 0) {
 		netisr_dispatch(NETISR_IP, m);
 	} else {
 		fin->fin_ip->ip_len = ntohs(fin->fin_ip->ip_len);
 		fin->fin_ip->ip_off = ntohs(fin->fin_ip->ip_off);
 		error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
 	}
 	NET_EPOCH_EXIT(et);
 
 	return error;
 }
 
 VNET_DEFINE_STATIC(pfil_hook_t, ipf_inet_hook);
 VNET_DEFINE_STATIC(pfil_hook_t, ipf_inet6_hook);
 #define	V_ipf_inet_hook		VNET(ipf_inet_hook)
 #define	V_ipf_inet6_hook	VNET(ipf_inet6_hook)
 
 int ipf_pfil_unhook(void) {
 
 	pfil_remove_hook(V_ipf_inet_hook);
 
 #ifdef USE_INET6
 	pfil_remove_hook(V_ipf_inet6_hook);
 #endif
 
 	return (0);
 }
 
 int ipf_pfil_hook(void) {
 	struct pfil_hook_args pha;
 	struct pfil_link_args pla;
 	int error, error6;
 
 	pha.pa_version = PFIL_VERSION;
 	pha.pa_flags = PFIL_IN | PFIL_OUT;
 	pha.pa_modname = "ipfilter";
 	pha.pa_rulname = "default-ip4";
 	pha.pa_func = ipf_check_wrapper;
 	pha.pa_ruleset = NULL;
 	pha.pa_type = PFIL_TYPE_IP4;
 	V_ipf_inet_hook = pfil_add_hook(&pha);
 
 #ifdef USE_INET6
 	pha.pa_rulname = "default-ip6";
 	pha.pa_func = ipf_check_wrapper6;
 	pha.pa_type = PFIL_TYPE_IP6;
 	V_ipf_inet6_hook = pfil_add_hook(&pha);
 #endif
 
 	pla.pa_version = PFIL_VERSION;
 	pla.pa_flags = PFIL_IN | PFIL_OUT |
 	    PFIL_HEADPTR | PFIL_HOOKPTR;
 	pla.pa_head = V_inet_pfil_head;
 	pla.pa_hook = V_ipf_inet_hook;
 	error = pfil_link(&pla);
 
 	error6 = 0;
 #ifdef USE_INET6
 	pla.pa_head = V_inet6_pfil_head;
 	pla.pa_hook = V_ipf_inet6_hook;
 	error6 = pfil_link(&pla);
 #endif
 
 	if (error || error6)
 		error = ENODEV;
 	else
 		error = 0;
 
 	return (error);
 }
 
 void
 ipf_event_reg(void)
 {
 	V_ipf_arrivetag = EVENTHANDLER_REGISTER(ifnet_arrival_event, \
 					       ipf_ifevent, NULL, \
 					       EVENTHANDLER_PRI_ANY);
 	V_ipf_departtag = EVENTHANDLER_REGISTER(ifnet_departure_event, \
 					       ipf_ifevent, NULL, \
 					       EVENTHANDLER_PRI_ANY);
 #if 0
 	V_ipf_clonetag  = EVENTHANDLER_REGISTER(if_clone_event, ipf_ifevent, \
 					       NULL, EVENTHANDLER_PRI_ANY);
 #endif
 }
 
 void
 ipf_event_dereg(void)
 {
 	if (V_ipf_arrivetag != NULL) {
 		EVENTHANDLER_DEREGISTER(ifnet_arrival_event, V_ipf_arrivetag);
 	}
 	if (V_ipf_departtag != NULL) {
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event, V_ipf_departtag);
 	}
 #if 0
 	if (V_ipf_clonetag != NULL) {
 		EVENTHANDLER_DEREGISTER(if_clone_event, V_ipf_clonetag);
 	}
 #endif
 }
 
 
 u_32_t
 ipf_random()
 {
 	return arc4random();
 }
 
 
 u_int
 ipf_pcksum(fin, hlen, sum)
 	fr_info_t *fin;
 	int hlen;
 	u_int sum;
 {
 	struct mbuf *m;
 	u_int sum2;
 	int off;
 
 	m = fin->fin_m;
 	off = (char *)fin->fin_dp - (char *)fin->fin_ip;
 	m->m_data += hlen;
 	m->m_len -= hlen;
 	sum2 = in_cksum(fin->fin_m, fin->fin_plen - off);
 	m->m_len += hlen;
 	m->m_data -= hlen;
 
 	/*
 	 * Both sum and sum2 are partial sums, so combine them together.
 	 */
 	sum += ~sum2 & 0xffff;
 	while (sum > 0xffff)
 		sum = (sum & 0xffff) + (sum >> 16);
 	sum2 = ~sum & 0xffff;
 	return sum2;
 }
 
 #ifdef	USE_INET6
 u_int
 ipf_pcksum6(m, ip6, off, len)
 	struct mbuf *m;
 	ip6_t *ip6;
 	u_int32_t off;
 	u_int32_t len;
 {
 #ifdef	_KERNEL
 	int sum;
 
 	if (m->m_len < sizeof(struct ip6_hdr)) {
 		return 0xffff;
 	}
 
 	sum = in6_cksum(m, ip6->ip6_nxt, off, len);
 	return(sum);
 #else
 	u_short *sp;
 	u_int sum;
 
 	sp = (u_short *)&ip6->ip6_src;
 	sum = *sp++;   /* ip6_src */
 	sum += *sp++;
 	sum += *sp++;
 	sum += *sp++;
 	sum += *sp++;
 	sum += *sp++;
 	sum += *sp++;
 	sum += *sp++;
 	sum += *sp++;   /* ip6_dst */
 	sum += *sp++;
 	sum += *sp++;
 	sum += *sp++;
 	sum += *sp++;
 	sum += *sp++;
 	sum += *sp++;
 	sum += *sp++;
 	return(ipf_pcksum(fin, off, sum));
 #endif
 }
 #endif
 
 void
 ipf_fbsd_kenv_get(ipf_main_softc_t *softc)
 {
 	TUNABLE_INT_FETCH("net.inet.ipf.large_nat",
 		&softc->ipf_large_nat);
 }
diff --git a/sys/dev/cxgbe/tom/t4_listen.c b/sys/dev/cxgbe/tom/t4_listen.c
index 8623079fe429..4d98597409d6 100644
--- a/sys/dev/cxgbe/tom/t4_listen.c
+++ b/sys/dev/cxgbe/tom/t4_listen.c
@@ -1,1601 +1,1604 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2012 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: Navdeep Parhar <np@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/module.h>
 #include <sys/protosw.h>
 #include <sys/refcount.h>
 #include <sys/domain.h>
 #include <sys/fnv_hash.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_fib.h>
 #include <netinet6/scope6_var.h>
 #include <netinet/tcp_timer.h>
 #define TCPSTATES
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_var.h>
 #include <netinet/toecore.h>
 #include <netinet/cc/cc.h>
 
 #include "common/common.h"
 #include "common/t4_msg.h"
 #include "common/t4_regs.h"
 #include "t4_clip.h"
 #include "tom/t4_tom_l2t.h"
 #include "tom/t4_tom.h"
 
 /* stid services */
 static int alloc_stid(struct adapter *, struct listen_ctx *, int);
 static struct listen_ctx *lookup_stid(struct adapter *, int);
 static void free_stid(struct adapter *, struct listen_ctx *);
 
 /* lctx services */
 static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *,
     struct vi_info *);
 static int free_lctx(struct adapter *, struct listen_ctx *);
 static void hold_lctx(struct listen_ctx *);
 static void listen_hash_add(struct adapter *, struct listen_ctx *);
 static struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *);
 static struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *);
 static struct inpcb *release_lctx(struct adapter *, struct listen_ctx *);
 
 static void send_abort_rpl_synqe(struct toedev *, struct synq_entry *, int);
 
 static int
 alloc_stid(struct adapter *sc, struct listen_ctx *lctx, int isipv6)
 {
 	struct tid_info *t = &sc->tids;
 	u_int stid, n, f, mask;
 	struct stid_region *sr = &lctx->stid_region;
 
 	/*
 	 * An IPv6 server needs 2 naturally aligned stids (1 stid = 4 cells) in
 	 * the TCAM.  The start of the stid region is properly aligned (the chip
 	 * requires each region to be 128-cell aligned).
 	 */
 	n = isipv6 ? 2 : 1;
 	mask = n - 1;
 	KASSERT((t->stid_base & mask) == 0 && (t->nstids & mask) == 0,
 	    ("%s: stid region (%u, %u) not properly aligned.  n = %u",
 	    __func__, t->stid_base, t->nstids, n));
 
 	mtx_lock(&t->stid_lock);
 	if (n > t->nstids - t->stids_in_use) {
 		mtx_unlock(&t->stid_lock);
 		return (-1);
 	}
 
 	if (t->nstids_free_head >= n) {
 		/*
 		 * This allocation will definitely succeed because the region
 		 * starts at a good alignment and we just checked we have enough
 		 * stids free.
 		 */
 		f = t->nstids_free_head & mask;
 		t->nstids_free_head -= n + f;
 		stid = t->nstids_free_head;
 		TAILQ_INSERT_HEAD(&t->stids, sr, link);
 	} else {
 		struct stid_region *s;
 
 		stid = t->nstids_free_head;
 		TAILQ_FOREACH(s, &t->stids, link) {
 			stid += s->used + s->free;
 			f = stid & mask;
 			if (s->free >= n + f) {
 				stid -= n + f;
 				s->free -= n + f;
 				TAILQ_INSERT_AFTER(&t->stids, s, sr, link);
 				goto allocated;
 			}
 		}
 
 		if (__predict_false(stid != t->nstids)) {
 			panic("%s: stids TAILQ (%p) corrupt."
 			    "  At %d instead of %d at the end of the queue.",
 			    __func__, &t->stids, stid, t->nstids);
 		}
 
 		mtx_unlock(&t->stid_lock);
 		return (-1);
 	}
 
 allocated:
 	sr->used = n;
 	sr->free = f;
 	t->stids_in_use += n;
 	t->stid_tab[stid] = lctx;
 	mtx_unlock(&t->stid_lock);
 
 	KASSERT(((stid + t->stid_base) & mask) == 0,
 	    ("%s: EDOOFUS.", __func__));
 	return (stid + t->stid_base);
 }
 
 static struct listen_ctx *
 lookup_stid(struct adapter *sc, int stid)
 {
 	struct tid_info *t = &sc->tids;
 
 	return (t->stid_tab[stid - t->stid_base]);
 }
 
 static void
 free_stid(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct tid_info *t = &sc->tids;
 	struct stid_region *sr = &lctx->stid_region;
 	struct stid_region *s;
 
 	KASSERT(sr->used > 0, ("%s: nonsense free (%d)", __func__, sr->used));
 
 	mtx_lock(&t->stid_lock);
 	s = TAILQ_PREV(sr, stid_head, link);
 	if (s != NULL)
 		s->free += sr->used + sr->free;
 	else
 		t->nstids_free_head += sr->used + sr->free;
 	KASSERT(t->stids_in_use >= sr->used,
 	    ("%s: stids_in_use (%u) < stids being freed (%u)", __func__,
 	    t->stids_in_use, sr->used));
 	t->stids_in_use -= sr->used;
 	TAILQ_REMOVE(&t->stids, sr, link);
 	mtx_unlock(&t->stid_lock);
 }
 
 static struct listen_ctx *
 alloc_lctx(struct adapter *sc, struct inpcb *inp, struct vi_info *vi)
 {
 	struct listen_ctx *lctx;
 
 	INP_WLOCK_ASSERT(inp);
 
 	lctx = malloc(sizeof(struct listen_ctx), M_CXGBE, M_NOWAIT | M_ZERO);
 	if (lctx == NULL)
 		return (NULL);
 
 	lctx->stid = alloc_stid(sc, lctx, inp->inp_vflag & INP_IPV6);
 	if (lctx->stid < 0) {
 		free(lctx, M_CXGBE);
 		return (NULL);
 	}
 
 	if (inp->inp_vflag & INP_IPV6 &&
 	    !IN6_ARE_ADDR_EQUAL(&in6addr_any, &inp->in6p_laddr)) {
 		lctx->ce = t4_get_clip_entry(sc, &inp->in6p_laddr, true);
 		if (lctx->ce == NULL) {
 			free(lctx, M_CXGBE);
 			return (NULL);
 		}
 	}
 
 	lctx->ctrlq = &sc->sge.ctrlq[vi->pi->port_id];
 	lctx->ofld_rxq = &sc->sge.ofld_rxq[vi->first_ofld_rxq];
 	refcount_init(&lctx->refcount, 1);
 
 	lctx->inp = inp;
 	lctx->vnet = inp->inp_socket->so_vnet;
 	in_pcbref(inp);
 
 	return (lctx);
 }
 
 /* Don't call this directly, use release_lctx instead */
 static int
 free_lctx(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct inpcb *inp = lctx->inp;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(lctx->refcount == 0,
 	    ("%s: refcount %d", __func__, lctx->refcount));
 	KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid));
 
 	CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p",
 	    __func__, lctx->stid, lctx, lctx->inp);
 
 	if (lctx->ce)
 		t4_release_clip_entry(sc, lctx->ce);
 	free_stid(sc, lctx);
 	free(lctx, M_CXGBE);
 
 	return (in_pcbrele_wlocked(inp));
 }
 
 static void
 hold_lctx(struct listen_ctx *lctx)
 {
 
 	refcount_acquire(&lctx->refcount);
 }
 
 static inline uint32_t
 listen_hashfn(void *key, u_long mask)
 {
 
 	return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask);
 }
 
 /*
  * Add a listen_ctx entry to the listen hash table.
  */
 static void
 listen_hash_add(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct tom_data *td = sc->tom_softc;
 	int bucket = listen_hashfn(lctx->inp, td->listen_mask);
 
 	mtx_lock(&td->lctx_hash_lock);
 	LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link);
 	td->lctx_count++;
 	mtx_unlock(&td->lctx_hash_lock);
 }
 
 /*
  * Look for the listening socket's context entry in the hash and return it.
  */
 static struct listen_ctx *
 listen_hash_find(struct adapter *sc, struct inpcb *inp)
 {
 	struct tom_data *td = sc->tom_softc;
 	int bucket = listen_hashfn(inp, td->listen_mask);
 	struct listen_ctx *lctx;
 
 	mtx_lock(&td->lctx_hash_lock);
 	LIST_FOREACH(lctx, &td->listen_hash[bucket], link) {
 		if (lctx->inp == inp)
 			break;
 	}
 	mtx_unlock(&td->lctx_hash_lock);
 
 	return (lctx);
 }
 
 /*
  * Removes the listen_ctx structure for inp from the hash and returns it.
  */
 static struct listen_ctx *
 listen_hash_del(struct adapter *sc, struct inpcb *inp)
 {
 	struct tom_data *td = sc->tom_softc;
 	int bucket = listen_hashfn(inp, td->listen_mask);
 	struct listen_ctx *lctx, *l;
 
 	mtx_lock(&td->lctx_hash_lock);
 	LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) {
 		if (lctx->inp == inp) {
 			LIST_REMOVE(lctx, link);
 			td->lctx_count--;
 			break;
 		}
 	}
 	mtx_unlock(&td->lctx_hash_lock);
 
 	return (lctx);
 }
 
 /*
  * Releases a hold on the lctx.  Must be called with the listening socket's inp
  * locked.  The inp may be freed by this function and it returns NULL to
  * indicate this.
  */
 static struct inpcb *
 release_lctx(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct inpcb *inp = lctx->inp;
 	int inp_freed = 0;
 
 	INP_WLOCK_ASSERT(inp);
 	if (refcount_release(&lctx->refcount))
 		inp_freed = free_lctx(sc, lctx);
 
 	return (inp_freed ? NULL : inp);
 }
 
 static void
 send_flowc_wr_synqe(struct adapter *sc, struct synq_entry *synqe)
 {
 	struct mbuf *m = synqe->syn;
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct vi_info *vi = ifp->if_softc;
 	struct port_info *pi = vi->pi;
 	struct wrqe *wr;
 	struct fw_flowc_wr *flowc;
 	struct sge_ofld_txq *ofld_txq;
 	struct sge_ofld_rxq *ofld_rxq;
 	const int nparams = 6;
 	const int flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
 	const u_int pfvf = sc->pf << S_FW_VIID_PFN;
 
 	INP_WLOCK_ASSERT(synqe->lctx->inp);
 	MPASS((synqe->flags & TPF_FLOWC_WR_SENT) == 0);
 
 	ofld_txq = &sc->sge.ofld_txq[synqe->params.txq_idx];
 	ofld_rxq = &sc->sge.ofld_rxq[synqe->params.rxq_idx];
 
 	wr = alloc_wrqe(roundup2(flowclen, 16), &ofld_txq->wrq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	flowc = wrtod(wr);
 	memset(flowc, 0, wr->wr_len);
 	flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
 	    V_FW_FLOWC_WR_NPARAMS(nparams));
 	flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) |
 	    V_FW_WR_FLOWID(synqe->tid));
 	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
 	flowc->mnemval[0].val = htobe32(pfvf);
 	flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
 	flowc->mnemval[1].val = htobe32(pi->tx_chan);
 	flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
 	flowc->mnemval[2].val = htobe32(pi->tx_chan);
 	flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
 	flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id);
 	flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF;
 	flowc->mnemval[4].val = htobe32(512);
 	flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS;
 	flowc->mnemval[5].val = htobe32(512);
 
 	synqe->flags |= TPF_FLOWC_WR_SENT;
 	t4_wrq_tx(sc, wr);
 }
 
 static void
 send_abort_rpl_synqe(struct toedev *tod, struct synq_entry *synqe,
     int rst_status)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct wrqe *wr;
 	struct cpl_abort_req *req;
 
 	INP_WLOCK_ASSERT(synqe->lctx->inp);
 
 	CTR5(KTR_CXGBE, "%s: synqe %p (0x%x), tid %d%s",
 	    __func__, synqe, synqe->flags, synqe->tid,
 	    synqe->flags & TPF_ABORT_SHUTDOWN ?
 	    " (abort already in progress)" : "");
 	if (synqe->flags & TPF_ABORT_SHUTDOWN)
 		return;	/* abort already in progress */
 	synqe->flags |= TPF_ABORT_SHUTDOWN;
 
 	if (!(synqe->flags & TPF_FLOWC_WR_SENT))
 		send_flowc_wr_synqe(sc, synqe);
 
 	wr = alloc_wrqe(sizeof(*req),
 	    &sc->sge.ofld_txq[synqe->params.txq_idx].wrq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	req = wrtod(wr);
 	INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, synqe->tid);
 	req->rsvd0 = 0;	/* don't have a snd_nxt */
 	req->rsvd1 = 1;	/* no data sent yet */
 	req->cmd = rst_status;
 
 	t4_l2t_send(sc, wr, &sc->l2t->l2tab[synqe->params.l2t_idx]);
 }
 
 static int
 create_server(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct wrqe *wr;
 	struct cpl_pass_open_req *req;
 	struct inpcb *inp = lctx->inp;
 
 	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
 	if (wr == NULL) {
 		log(LOG_ERR, "%s: allocation failure", __func__);
 		return (ENOMEM);
 	}
 	req = wrtod(wr);
 
 	INIT_TP_WR(req, 0);
 	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid));
 	req->local_port = inp->inp_lport;
 	req->peer_port = 0;
 	req->local_ip = inp->inp_laddr.s_addr;
 	req->peer_ip = 0;
 	req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
 	req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
 	    F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
 
 	t4_wrq_tx(sc, wr);
 	return (0);
 }
 
 static int
 create_server6(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct wrqe *wr;
 	struct cpl_pass_open_req6 *req;
 	struct inpcb *inp = lctx->inp;
 
 	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
 	if (wr == NULL) {
 		log(LOG_ERR, "%s: allocation failure", __func__);
 		return (ENOMEM);
 	}
 	req = wrtod(wr);
 
 	INIT_TP_WR(req, 0);
 	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid));
 	req->local_port = inp->inp_lport;
 	req->peer_port = 0;
 	req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
 	req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
 	req->peer_ip_hi = 0;
 	req->peer_ip_lo = 0;
 	req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
 	req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
 	    F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
 
 	t4_wrq_tx(sc, wr);
 	return (0);
 }
 
 static int
 destroy_server(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct wrqe *wr;
 	struct cpl_close_listsvr_req *req;
 
 	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	req = wrtod(wr);
 
 	INIT_TP_WR(req, 0);
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
 	    lctx->stid));
 	req->reply_ctrl = htobe16(lctx->ofld_rxq->iq.abs_id);
 	req->rsvd = htobe16(0);
 
 	t4_wrq_tx(sc, wr);
 	return (0);
 }
 
 /*
  * Start a listening server by sending a passive open request to HW.
  *
  * Can't take adapter lock here and access to sc->flags,
  * sc->offload_map, if_capenable are all race prone.
  */
 int
 t4_listen_start(struct toedev *tod, struct tcpcb *tp)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct vi_info *vi;
 	struct port_info *pi;
 	struct inpcb *inp = tp->t_inpcb;
 	struct listen_ctx *lctx;
 	int i, rc, v;
 	struct offload_settings settings;
 
 	INP_WLOCK_ASSERT(inp);
 
 	rw_rlock(&sc->policy_lock);
 	settings = *lookup_offload_policy(sc, OPEN_TYPE_LISTEN, NULL,
 	    EVL_MAKETAG(0xfff, 0, 0), inp);
 	rw_runlock(&sc->policy_lock);
 	if (!settings.offload)
 		return (0);
 
 	/* Don't start a hardware listener for any loopback address. */
 	if (inp->inp_vflag & INP_IPV6 && IN6_IS_ADDR_LOOPBACK(&inp->in6p_laddr))
 		return (0);
 	if (!(inp->inp_vflag & INP_IPV6) &&
 	    IN_LOOPBACK(ntohl(inp->inp_laddr.s_addr)))
 		return (0);
 	if (sc->flags & KERN_TLS_ON)
 		return (0);
 #if 0
 	ADAPTER_LOCK(sc);
 	if (IS_BUSY(sc)) {
 		log(LOG_ERR, "%s: listen request ignored, %s is busy",
 		    __func__, device_get_nameunit(sc->dev));
 		goto done;
 	}
 
 	KASSERT(uld_active(sc, ULD_TOM),
 	    ("%s: TOM not initialized", __func__));
 #endif
 
 	/*
 	 * Find an initialized VI with IFCAP_TOE (4 or 6).  We'll use the first
 	 * such VI's queues to send the passive open and receive the reply to
 	 * it.
 	 *
 	 * XXX: need a way to mark a port in use by offload.  if_cxgbe should
 	 * then reject any attempt to bring down such a port (and maybe reject
 	 * attempts to disable IFCAP_TOE on that port too?).
 	 */
 	for_each_port(sc, i) {
 		pi = sc->port[i];
 		for_each_vi(pi, v, vi) {
 			if (vi->flags & VI_INIT_DONE &&
 			    vi->ifp->if_capenable & IFCAP_TOE)
 				goto found;
 		}
 	}
 	goto done;	/* no port that's UP with IFCAP_TOE enabled */
 found:
 
 	if (listen_hash_find(sc, inp) != NULL)
 		goto done;	/* already setup */
 
 	lctx = alloc_lctx(sc, inp, vi);
 	if (lctx == NULL) {
 		log(LOG_ERR,
 		    "%s: listen request ignored, %s couldn't allocate lctx\n",
 		    __func__, device_get_nameunit(sc->dev));
 		goto done;
 	}
 	listen_hash_add(sc, lctx);
 
 	CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x",
 	    __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp,
 	    inp->inp_vflag);
 
 	if (inp->inp_vflag & INP_IPV6)
 		rc = create_server6(sc, lctx);
 	else
 		rc = create_server(sc, lctx);
 	if (rc != 0) {
 		log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n",
 		    __func__, device_get_nameunit(sc->dev), rc);
 		(void) listen_hash_del(sc, inp);
 		inp = release_lctx(sc, lctx);
 		/* can't be freed, host stack has a reference */
 		KASSERT(inp != NULL, ("%s: inp freed", __func__));
 		goto done;
 	}
 	lctx->flags |= LCTX_RPL_PENDING;
 done:
 #if 0
 	ADAPTER_UNLOCK(sc);
 #endif
 	return (0);
 }
 
 int
 t4_listen_stop(struct toedev *tod, struct tcpcb *tp)
 {
 	struct listen_ctx *lctx;
 	struct adapter *sc = tod->tod_softc;
 	struct inpcb *inp = tp->t_inpcb;
 
 	INP_WLOCK_ASSERT(inp);
 
 	lctx = listen_hash_del(sc, inp);
 	if (lctx == NULL)
 		return (ENOENT);	/* no hardware listener for this inp */
 
 	CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid,
 	    lctx, lctx->flags);
 
 	/*
 	 * If the reply to the PASS_OPEN is still pending we'll wait for it to
 	 * arrive and clean up when it does.
 	 */
 	if (lctx->flags & LCTX_RPL_PENDING) {
 		return (EINPROGRESS);
 	}
 
 	destroy_server(sc, lctx);
 	return (0);
 }
 
 static inline struct synq_entry *
 alloc_synqe(struct adapter *sc __unused, struct listen_ctx *lctx, int flags)
 {
 	struct synq_entry *synqe;
 
 	INP_RLOCK_ASSERT(lctx->inp);
 	MPASS(flags == M_WAITOK || flags == M_NOWAIT);
 
 	synqe = malloc(sizeof(*synqe), M_CXGBE, flags);
 	if (__predict_true(synqe != NULL)) {
 		synqe->flags = TPF_SYNQE;
 		refcount_init(&synqe->refcnt, 1);
 		synqe->lctx = lctx;
 		hold_lctx(lctx);	/* Every synqe has a ref on its lctx. */
 		synqe->syn = NULL;
 	}
 
 	return (synqe);
 }
 
 static inline void
 hold_synqe(struct synq_entry *synqe)
 {
 
 	refcount_acquire(&synqe->refcnt);
 }
 
 static inline struct inpcb *
 release_synqe(struct adapter *sc, struct synq_entry *synqe)
 {
 	struct inpcb *inp;
 
 	MPASS(synqe->flags & TPF_SYNQE);
 	MPASS(synqe->lctx != NULL);
 
 	inp = synqe->lctx->inp;
 	MPASS(inp != NULL);
 	INP_WLOCK_ASSERT(inp);
 
 	if (refcount_release(&synqe->refcnt)) {
 		inp = release_lctx(sc, synqe->lctx);
 		m_freem(synqe->syn);
 		free(synqe, M_CXGBE);
 	}
 
 	return (inp);
 }
 
 void
 t4_syncache_added(struct toedev *tod __unused, void *arg)
 {
 	struct synq_entry *synqe = arg;
 
 	hold_synqe(synqe);
 }
 
 void
 t4_syncache_removed(struct toedev *tod, void *arg)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct synq_entry *synqe = arg;
 	struct inpcb *inp = synqe->lctx->inp;
 
 	/*
 	 * XXX: this is a LOR but harmless when running from the softclock.
 	 */
 	INP_WLOCK(inp);
 	inp = release_synqe(sc, synqe);
 	if (inp != NULL)
 		INP_WUNLOCK(inp);
 }
 
 int
 t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
 {
 	struct synq_entry *synqe = arg;
 
 	if (atomic_fetchadd_int(&synqe->ok_to_respond, 1) == 0) {
 		struct tcpopt to;
 		struct ip *ip = mtod(m, struct ip *);
 		struct tcphdr *th;
 
 		if (ip->ip_v == IPVERSION)
 			th = (void *)(ip + 1);
 		else
 			th = (void *)((struct ip6_hdr *)ip + 1);
 		bzero(&to, sizeof(to));
 		tcp_dooptions(&to, (void *)(th + 1),
 		    (th->th_off << 2) - sizeof(*th), TO_SYN);
 
 		/* save these for later */
 		synqe->iss = be32toh(th->th_seq);
 		synqe->irs = be32toh(th->th_ack) - 1;
 		synqe->ts = to.to_tsval;
 	}
 
 	m_freem(m);	/* don't need this any more */
 	return (0);
 }
 
 static int
 do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_pass_open_rpl *cpl = (const void *)(rss + 1);
 	int stid = GET_TID(cpl);
 	unsigned int status = cpl->status;
 	struct listen_ctx *lctx = lookup_stid(sc, stid);
 	struct inpcb *inp = lctx->inp;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_PASS_OPEN_RPL,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
 
 	INP_WLOCK(inp);
 
 	CTR4(KTR_CXGBE, "%s: stid %d, status %u, flags 0x%x",
 	    __func__, stid, status, lctx->flags);
 
 	lctx->flags &= ~LCTX_RPL_PENDING;
 
 	if (status != CPL_ERR_NONE)
 		log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status);
 
 #ifdef INVARIANTS
 	/*
 	 * If the inp has been dropped (listening socket closed) then
 	 * listen_stop must have run and taken the inp out of the hash.
 	 */
 	if (inp->inp_flags & INP_DROPPED) {
 		KASSERT(listen_hash_del(sc, inp) == NULL,
 		    ("%s: inp %p still in listen hash", __func__, inp));
 	}
 #endif
 
 	if (inp->inp_flags & INP_DROPPED && status != CPL_ERR_NONE) {
 		if (release_lctx(sc, lctx) != NULL)
 			INP_WUNLOCK(inp);
 		return (status);
 	}
 
 	/*
 	 * Listening socket stopped listening earlier and now the chip tells us
 	 * it has started the hardware listener.  Stop it; the lctx will be
 	 * released in do_close_server_rpl.
 	 */
 	if (inp->inp_flags & INP_DROPPED) {
 		destroy_server(sc, lctx);
 		INP_WUNLOCK(inp);
 		return (status);
 	}
 
 	/*
 	 * Failed to start hardware listener.  Take inp out of the hash and
 	 * release our reference on it.  An error message has been logged
 	 * already.
 	 */
 	if (status != CPL_ERR_NONE) {
 		listen_hash_del(sc, inp);
 		if (release_lctx(sc, lctx) != NULL)
 			INP_WUNLOCK(inp);
 		return (status);
 	}
 
 	/* hardware listener open for business */
 
 	INP_WUNLOCK(inp);
 	return (status);
 }
 
 static int
 do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_close_listsvr_rpl *cpl = (const void *)(rss + 1);
 	int stid = GET_TID(cpl);
 	unsigned int status = cpl->status;
 	struct listen_ctx *lctx = lookup_stid(sc, stid);
 	struct inpcb *inp = lctx->inp;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_CLOSE_LISTSRV_RPL,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
 
 	CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status);
 
 	if (status != CPL_ERR_NONE) {
 		log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n",
 		    __func__, status, stid);
 		return (status);
 	}
 
 	INP_WLOCK(inp);
 	inp = release_lctx(sc, lctx);
 	if (inp != NULL)
 		INP_WUNLOCK(inp);
 
 	return (status);
 }
 
 static void
 done_with_synqe(struct adapter *sc, struct synq_entry *synqe)
 {
 	struct listen_ctx *lctx = synqe->lctx;
 	struct inpcb *inp = lctx->inp;
 	struct l2t_entry *e = &sc->l2t->l2tab[synqe->params.l2t_idx];
 	int ntids;
 
 	INP_WLOCK_ASSERT(inp);
 	ntids = inp->inp_vflag & INP_IPV6 ? 2 : 1;
 
 	remove_tid(sc, synqe->tid, ntids);
 	release_tid(sc, synqe->tid, lctx->ctrlq);
 	t4_l2t_release(e);
 	inp = release_synqe(sc, synqe);
 	if (inp)
 		INP_WUNLOCK(inp);
 }
 
 void
 synack_failure_cleanup(struct adapter *sc, int tid)
 {
 	struct synq_entry *synqe = lookup_tid(sc, tid);
 
 	INP_WLOCK(synqe->lctx->inp);
 	done_with_synqe(sc, synqe);
 }
 
 int
 do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct synq_entry *synqe = lookup_tid(sc, tid);
 	struct listen_ctx *lctx = synqe->lctx;
 	struct inpcb *inp = lctx->inp;
 	struct sge_ofld_txq *ofld_txq;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_ABORT_REQ_RSS,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
 	    __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
 
 	if (negative_advice(cpl->status))
 		return (0);	/* Ignore negative advice */
 
 	INP_WLOCK(inp);
 
 	ofld_txq = &sc->sge.ofld_txq[synqe->params.txq_idx];
 
 	if (!(synqe->flags & TPF_FLOWC_WR_SENT))
 		send_flowc_wr_synqe(sc, synqe);
 
 	/*
 	 * If we'd initiated an abort earlier the reply to it is responsible for
 	 * cleaning up resources.  Otherwise we tear everything down right here
 	 * right now.  We owe the T4 a CPL_ABORT_RPL no matter what.
 	 */
 	if (synqe->flags & TPF_ABORT_SHUTDOWN) {
 		INP_WUNLOCK(inp);
 		goto done;
 	}
 
 	done_with_synqe(sc, synqe);
 	/* inp lock released by done_with_synqe */
 done:
 	send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
 	return (0);
 }
 
 int
 do_abort_rpl_synqe(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct synq_entry *synqe = lookup_tid(sc, tid);
 	struct listen_ctx *lctx = synqe->lctx;
 	struct inpcb *inp = lctx->inp;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_ABORT_RPL_RSS,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
 	    __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
 
 	INP_WLOCK(inp);
 	KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
 	    ("%s: wasn't expecting abort reply for synqe %p (0x%x)",
 	    __func__, synqe, synqe->flags));
 
 	done_with_synqe(sc, synqe);
 	/* inp lock released by done_with_synqe */
 
 	return (0);
 }
 
 void
 t4_offload_socket(struct toedev *tod, void *arg, struct socket *so)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct synq_entry *synqe = arg;
 	struct inpcb *inp = sotoinpcb(so);
 	struct toepcb *toep = synqe->toep;
 
 	NET_EPOCH_ASSERT();	/* prevents bad race with accept() */
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(synqe->flags & TPF_SYNQE,
 	    ("%s: %p not a synq_entry?", __func__, arg));
 	MPASS(toep->tid == synqe->tid);
 
 	offload_socket(so, toep);
 	make_established(toep, synqe->iss, synqe->irs, synqe->tcp_opt);
 	toep->flags |= TPF_CPL_PENDING;
 	update_tid(sc, synqe->tid, toep);
 	synqe->flags |= TPF_SYNQE_EXPANDED;
 	inp->inp_flowtype = (inp->inp_vflag & INP_IPV6) ?
 	    M_HASHTYPE_RSS_TCP_IPV6 : M_HASHTYPE_RSS_TCP_IPV4;
 	inp->inp_flowid = synqe->rss_hash;
 }
 
 static void
 t4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to)
 {
 	bzero(to, sizeof(*to));
 
 	if (t4opt->mss) {
 		to->to_flags |= TOF_MSS;
 		to->to_mss = be16toh(t4opt->mss);
 	}
 
 	if (t4opt->wsf > 0 && t4opt->wsf < 15) {
 		to->to_flags |= TOF_SCALE;
 		to->to_wscale = t4opt->wsf;
 	}
 
 	if (t4opt->tstamp)
 		to->to_flags |= TOF_TS;
 
 	if (t4opt->sack)
 		to->to_flags |= TOF_SACKPERM;
 }
 
 static bool
 encapsulated_syn(struct adapter *sc, const struct cpl_pass_accept_req *cpl)
 {
 	u_int hlen = be32toh(cpl->hdr_len);
 
 	if (chip_id(sc) >= CHELSIO_T6)
 		return (G_T6_ETH_HDR_LEN(hlen) > sizeof(struct ether_vlan_header));
 	else
 		return (G_ETH_HDR_LEN(hlen) > sizeof(struct ether_vlan_header));
 }
 
 static void
 pass_accept_req_to_protohdrs(struct adapter *sc, const struct mbuf *m,
     struct in_conninfo *inc, struct tcphdr *th, uint8_t *iptos)
 {
 	const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
 	const struct ether_header *eh;
 	unsigned int hlen = be32toh(cpl->hdr_len);
 	uintptr_t l3hdr;
 	const struct tcphdr *tcp;
 
 	eh = (const void *)(cpl + 1);
 	if (chip_id(sc) >= CHELSIO_T6) {
 		l3hdr = ((uintptr_t)eh + G_T6_ETH_HDR_LEN(hlen));
 		tcp = (const void *)(l3hdr + G_T6_IP_HDR_LEN(hlen));
 	} else {
 		l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen));
 		tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen));
 	}
 
 	/* extract TOS (DiffServ + ECN) byte for AccECN */
 	if (iptos) {
 		if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
 			const struct ip *ip = (const void *)l3hdr;
 			*iptos = ip->ip_tos;
 		}
 #ifdef INET6
 		else
 		if (((struct ip *)l3hdr)->ip_v == (IPV6_VERSION >> 4)) {
 			const struct ip6_hdr *ip6 = (const void *)l3hdr;
 			*iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
 		}
 #endif /* INET */
 	}
 
 	if (inc) {
 		bzero(inc, sizeof(*inc));
 		inc->inc_fport = tcp->th_sport;
 		inc->inc_lport = tcp->th_dport;
 		if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
 			const struct ip *ip = (const void *)l3hdr;
 
 			inc->inc_faddr = ip->ip_src;
 			inc->inc_laddr = ip->ip_dst;
 		} else {
 			const struct ip6_hdr *ip6 = (const void *)l3hdr;
 
 			inc->inc_flags |= INC_ISIPV6;
 			inc->inc6_faddr = ip6->ip6_src;
 			inc->inc6_laddr = ip6->ip6_dst;
 		}
 	}
 
 	if (th) {
 		bcopy(tcp, th, sizeof(*th));
 		tcp_fields_to_host(th);		/* just like tcp_input */
 	}
 }
 
 static struct l2t_entry *
 get_l2te_for_nexthop(struct port_info *pi, struct ifnet *ifp,
     struct in_conninfo *inc)
 {
 	struct l2t_entry *e;
 	struct sockaddr_in6 sin6;
 	struct sockaddr *dst = (void *)&sin6;
 	struct nhop_object *nh;
 
 	if (inc->inc_flags & INC_ISIPV6) {
 		bzero(dst, sizeof(struct sockaddr_in6));
 		dst->sa_len = sizeof(struct sockaddr_in6);
 		dst->sa_family = AF_INET6;
 
 		if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) {
 			/* no need for route lookup */
 			e = t4_l2t_get(pi, ifp, dst);
 			return (e);
 		}
 
 		nh = fib6_lookup(RT_DEFAULT_FIB, &inc->inc6_faddr, 0, NHR_NONE, 0);
 		if (nh == NULL)
 			return (NULL);
 		if (nh->nh_ifp != ifp)
 			return (NULL);
 		if (nh->nh_flags & NHF_GATEWAY)
 			((struct sockaddr_in6 *)dst)->sin6_addr = nh->gw6_sa.sin6_addr;
 		else
 			((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr;
 	} else {
 		dst->sa_len = sizeof(struct sockaddr_in);
 		dst->sa_family = AF_INET;
 
 		nh = fib4_lookup(RT_DEFAULT_FIB, inc->inc_faddr, 0, NHR_NONE, 0);
 		if (nh == NULL)
 			return (NULL);
 		if (nh->nh_ifp != ifp)
 			return (NULL);
 		if (nh->nh_flags & NHF_GATEWAY)
-			((struct sockaddr_in *)dst)->sin_addr = nh->gw4_sa.sin_addr;
+			if (nh->gw_sa.sa_family == AF_INET)
+				((struct sockaddr_in *)dst)->sin_addr = nh->gw4_sa.sin_addr;
+			else
+				*((struct sockaddr_in6 *)dst) = nh->gw6_sa;
 		else
 			((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr;
 	}
 
 	e = t4_l2t_get(pi, ifp, dst);
 	return (e);
 }
 
 static int
 send_synack(struct adapter *sc, struct synq_entry *synqe, uint64_t opt0,
     uint32_t opt2, int tid)
 {
 	struct wrqe *wr;
 	struct cpl_pass_accept_rpl *rpl;
 	struct l2t_entry *e = &sc->l2t->l2tab[synqe->params.l2t_idx];
 
 	wr = alloc_wrqe(is_t4(sc) ? sizeof(struct cpl_pass_accept_rpl) :
 	    sizeof(struct cpl_t5_pass_accept_rpl), &sc->sge.ctrlq[0]);
 	if (wr == NULL)
 		return (ENOMEM);
 	rpl = wrtod(wr);
 
 	if (is_t4(sc))
 		INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid);
 	else {
 		struct cpl_t5_pass_accept_rpl *rpl5 = (void *)rpl;
 
 		INIT_TP_WR_MIT_CPL(rpl5, CPL_PASS_ACCEPT_RPL, tid);
 		rpl5->iss = htobe32(synqe->iss);
 	}
 	rpl->opt0 = opt0;
 	rpl->opt2 = opt2;
 
 	return (t4_l2t_send(sc, wr, e));
 }
 
 #define REJECT_PASS_ACCEPT_REQ(tunnel)	do { \
 	if (!tunnel) { \
 		m_freem(m); \
 		m = NULL; \
 	} \
 	reject_reason = __LINE__; \
 	goto reject; \
 } while (0)
 
 /*
  * The context associated with a tid entry via insert_tid could be a synq_entry
  * or a toepcb.  The only way CPL handlers can tell is via a bit in these flags.
  */
 CTASSERT(offsetof(struct toepcb, flags) == offsetof(struct synq_entry, flags));
 
 /*
  * Incoming SYN on a listening socket.
  *
  * XXX: Every use of ifp in this routine has a bad race with up/down, toe/-toe,
  * etc.
  */
 static int
 do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	struct toedev *tod;
 	const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
 	unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
 	unsigned int tid = GET_TID(cpl);
 	struct listen_ctx *lctx = lookup_stid(sc, stid);
 	struct inpcb *inp;
 	struct socket *so;
 	struct in_conninfo inc;
 	struct tcphdr th;
 	struct tcpopt to;
 	struct port_info *pi;
 	struct vi_info *vi;
 	struct ifnet *hw_ifp, *ifp;
 	struct l2t_entry *e = NULL;
 	struct synq_entry *synqe = NULL;
 	int reject_reason, v, ntids;
 	uint16_t vid, l2info;
 	struct epoch_tracker et;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 	struct offload_settings settings;
 	uint8_t iptos;
 
 	KASSERT(opcode == CPL_PASS_ACCEPT_REQ,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
 
 	CTR4(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid,
 	    lctx);
 
 	/*
 	 * Figure out the port the SYN arrived on.  We'll look for an exact VI
 	 * match in a bit but in case we don't find any we'll use the main VI as
 	 * the incoming ifnet.
 	 */
 	l2info = be16toh(cpl->l2info);
 	pi = sc->port[G_SYN_INTF(l2info)];
 	hw_ifp = pi->vi[0].ifp;
 	m->m_pkthdr.rcvif = hw_ifp;
 
 	CURVNET_SET(lctx->vnet);	/* before any potential REJECT */
 
 	/*
 	 * If VXLAN/NVGRE parsing is enabled then SYNs in the inner traffic will
 	 * also hit the listener.  We don't want to offload those.
 	 */
 	if (encapsulated_syn(sc, cpl)) {
 		REJECT_PASS_ACCEPT_REQ(true);
 	}
 
 	/*
 	 * Use the MAC index to lookup the associated VI.  If this SYN didn't
 	 * match a perfect MAC filter, punt.
 	 */
 	if (!(l2info & F_SYN_XACT_MATCH)) {
 		REJECT_PASS_ACCEPT_REQ(true);
 	}
 	for_each_vi(pi, v, vi) {
 		if (vi->xact_addr_filt == G_SYN_MAC_IDX(l2info))
 			goto found;
 	}
 	REJECT_PASS_ACCEPT_REQ(true);
 found:
 	hw_ifp = vi->ifp;	/* the cxgbe ifnet */
 	m->m_pkthdr.rcvif = hw_ifp;
 	tod = TOEDEV(hw_ifp);
 
 	/*
 	 * Don't offload if the peer requested a TCP option that's not known to
 	 * the silicon.  Send the SYN to the kernel instead.
 	 */
 	if (__predict_false(cpl->tcpopt.unknown))
 		REJECT_PASS_ACCEPT_REQ(true);
 
 	/*
 	 * Figure out if there is a pseudo interface (vlan, lagg, etc.)
 	 * involved.  Don't offload if the SYN had a VLAN tag and the vid
 	 * doesn't match anything on this interface.
 	 *
 	 * XXX: lagg support, lagg + vlan support.
 	 */
 	vid = EVL_VLANOFTAG(be16toh(cpl->vlan));
 	if (vid != 0xfff && vid != 0) {
 		ifp = VLAN_DEVAT(hw_ifp, vid);
 		if (ifp == NULL)
 			REJECT_PASS_ACCEPT_REQ(true);
 	} else
 		ifp = hw_ifp;
 
 	/*
 	 * Don't offload if the ifnet that the SYN came in on is not in the same
 	 * vnet as the listening socket.
 	 */
 	if (lctx->vnet != ifp->if_vnet)
 		REJECT_PASS_ACCEPT_REQ(true);
 
 	pass_accept_req_to_protohdrs(sc, m, &inc, &th, &iptos);
 	if (inc.inc_flags & INC_ISIPV6) {
 
 		/* Don't offload if the ifcap isn't enabled */
 		if ((ifp->if_capenable & IFCAP_TOE6) == 0)
 			REJECT_PASS_ACCEPT_REQ(true);
 
 		/*
 		 * SYN must be directed to an IP6 address on this ifnet.  This
 		 * is more restrictive than in6_localip.
 		 */
 		NET_EPOCH_ENTER(et);
 		if (!in6_ifhasaddr(ifp, &inc.inc6_laddr)) {
 			NET_EPOCH_EXIT(et);
 			REJECT_PASS_ACCEPT_REQ(true);
 		}
 
 		ntids = 2;
 	} else {
 
 		/* Don't offload if the ifcap isn't enabled */
 		if ((ifp->if_capenable & IFCAP_TOE4) == 0)
 			REJECT_PASS_ACCEPT_REQ(true);
 
 		/*
 		 * SYN must be directed to an IP address on this ifnet.  This
 		 * is more restrictive than in_localip.
 		 */
 		NET_EPOCH_ENTER(et);
 		if (!in_ifhasaddr(ifp, inc.inc_laddr)) {
 			NET_EPOCH_EXIT(et);
 			REJECT_PASS_ACCEPT_REQ(true);
 		}
 
 		ntids = 1;
 	}
 
 	e = get_l2te_for_nexthop(pi, ifp, &inc);
 	if (e == NULL) {
 		NET_EPOCH_EXIT(et);
 		REJECT_PASS_ACCEPT_REQ(true);
 	}
 
 	/* Don't offload if the 4-tuple is already in use */
 	if (toe_4tuple_check(&inc, &th, ifp) != 0) {
 		NET_EPOCH_EXIT(et);
 		REJECT_PASS_ACCEPT_REQ(false);
 	}
 
 	inp = lctx->inp;		/* listening socket, not owned by TOE */
 	INP_RLOCK(inp);
 
 	/* Don't offload if the listening socket has closed */
 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
 		INP_RUNLOCK(inp);
 		NET_EPOCH_EXIT(et);
 		REJECT_PASS_ACCEPT_REQ(false);
 	}
 	so = inp->inp_socket;
 	rw_rlock(&sc->policy_lock);
 	settings = *lookup_offload_policy(sc, OPEN_TYPE_PASSIVE, m,
 	    EVL_MAKETAG(0xfff, 0, 0), inp);
 	rw_runlock(&sc->policy_lock);
 	if (!settings.offload) {
 		INP_RUNLOCK(inp);
 		NET_EPOCH_EXIT(et);
 		REJECT_PASS_ACCEPT_REQ(true);	/* Rejected by COP. */
 	}
 
 	synqe = alloc_synqe(sc, lctx, M_NOWAIT);
 	if (synqe == NULL) {
 		INP_RUNLOCK(inp);
 		NET_EPOCH_EXIT(et);
 		REJECT_PASS_ACCEPT_REQ(true);
 	}
 	MPASS(rss->hash_type == RSS_HASH_TCP);
 	synqe->rss_hash = be32toh(rss->hash_val);
 	atomic_store_int(&synqe->ok_to_respond, 0);
 
 	init_conn_params(vi, &settings, &inc, so, &cpl->tcpopt, e->idx,
 	    &synqe->params);
 
 	/*
 	 * If all goes well t4_syncache_respond will get called during
 	 * syncache_add.  Note that syncache_add releases the pcb lock.
 	 */
 	t4opt_to_tcpopt(&cpl->tcpopt, &to);
 	toe_syncache_add(&inc, &to, &th, inp, tod, synqe, iptos);
 
 	if (atomic_load_int(&synqe->ok_to_respond) > 0) {
 		uint64_t opt0;
 		uint32_t opt2;
 
 		opt0 = calc_options0(vi, &synqe->params);
 		opt2 = calc_options2(vi, &synqe->params);
 
 		insert_tid(sc, tid, synqe, ntids);
 		synqe->tid = tid;
 		synqe->syn = m;
 		m = NULL;
 
 		if (send_synack(sc, synqe, opt0, opt2, tid) != 0) {
 			remove_tid(sc, tid, ntids);
 			m = synqe->syn;
 			synqe->syn = NULL;
 			NET_EPOCH_EXIT(et);
 			REJECT_PASS_ACCEPT_REQ(true);
 		}
 
 		CTR6(KTR_CXGBE,
 		    "%s: stid %u, tid %u, synqe %p, opt0 %#016lx, opt2 %#08x",
 		    __func__, stid, tid, synqe, be64toh(opt0), be32toh(opt2));
 	} else {
 		NET_EPOCH_EXIT(et);
 		REJECT_PASS_ACCEPT_REQ(false);
 	}
 
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 	return (0);
 reject:
 	CURVNET_RESTORE();
 	CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
 	    reject_reason);
 
 	if (e)
 		t4_l2t_release(e);
 	release_tid(sc, tid, lctx->ctrlq);
 	if (synqe) {
 		inp = synqe->lctx->inp;
 		INP_WLOCK(inp);
 		inp = release_synqe(sc, synqe);
 		if (inp)
 			INP_WUNLOCK(inp);
 	}
 
 	if (m) {
 		/*
 		 * The connection request hit a TOE listener but is being passed
 		 * on to the kernel sw stack instead of getting offloaded.
 		 */
 		m_adj(m, sizeof(*cpl));
 		m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID |
 		    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 		m->m_pkthdr.csum_data = 0xffff;
 		hw_ifp->if_input(hw_ifp, m);
 	}
 
 	return (reject_reason);
 }
 
 static void
 synqe_to_protohdrs(struct adapter *sc, struct synq_entry *synqe,
     const struct cpl_pass_establish *cpl, struct in_conninfo *inc,
     struct tcphdr *th, struct tcpopt *to)
 {
 	uint16_t tcp_opt = be16toh(cpl->tcp_opt);
 	uint8_t iptos;
 
 	/* start off with the original SYN */
 	pass_accept_req_to_protohdrs(sc, synqe->syn, inc, th, &iptos);
 
 	/* modify parts to make it look like the ACK to our SYN|ACK */
 	th->th_flags = TH_ACK;
 	th->th_ack = synqe->iss + 1;
 	th->th_seq = be32toh(cpl->rcv_isn);
 	bzero(to, sizeof(*to));
 	if (G_TCPOPT_TSTAMP(tcp_opt)) {
 		to->to_flags |= TOF_TS;
 		to->to_tsecr = synqe->ts;
 	}
 }
 
 static int
 do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	struct vi_info *vi;
 	struct ifnet *ifp;
 	const struct cpl_pass_establish *cpl = (const void *)(rss + 1);
 #if defined(KTR) || defined(INVARIANTS)
 	unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
 #endif
 	unsigned int tid = GET_TID(cpl);
 	struct synq_entry *synqe = lookup_tid(sc, tid);
 	struct listen_ctx *lctx = synqe->lctx;
 	struct inpcb *inp = lctx->inp, *new_inp;
 	struct socket *so;
 	struct tcphdr th;
 	struct tcpopt to;
 	struct in_conninfo inc;
 	struct toepcb *toep;
 	struct epoch_tracker et;
 	int rstreason;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_PASS_ESTABLISH,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
 	KASSERT(synqe->flags & TPF_SYNQE,
 	    ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
 
 	CURVNET_SET(lctx->vnet);
 	NET_EPOCH_ENTER(et);	/* for syncache_expand */
 	INP_WLOCK(inp);
 
 	CTR6(KTR_CXGBE,
 	    "%s: stid %u, tid %u, synqe %p (0x%x), inp_flags 0x%x",
 	    __func__, stid, tid, synqe, synqe->flags, inp->inp_flags);
 
 	ifp = synqe->syn->m_pkthdr.rcvif;
 	vi = ifp->if_softc;
 	KASSERT(vi->adapter == sc,
 	    ("%s: vi %p, sc %p mismatch", __func__, vi, sc));
 
 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
 reset:
 		send_abort_rpl_synqe(TOEDEV(ifp), synqe, CPL_ABORT_SEND_RST);
 		INP_WUNLOCK(inp);
 		NET_EPOCH_EXIT(et);
 		CURVNET_RESTORE();
 		return (0);
 	}
 
 	KASSERT(synqe->params.rxq_idx == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0],
 	    ("%s: CPL arrived on unexpected rxq.  %d %d", __func__,
 	    synqe->params.rxq_idx,
 	    (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0])));
 
 	toep = alloc_toepcb(vi, M_NOWAIT);
 	if (toep == NULL)
 		goto reset;
 	toep->tid = tid;
 	toep->l2te = &sc->l2t->l2tab[synqe->params.l2t_idx];
 	toep->vnet = lctx->vnet;
 	bcopy(&synqe->params, &toep->params, sizeof(toep->params));
 	init_toepcb(vi, toep);
 
 	MPASS(be32toh(cpl->snd_isn) - 1 == synqe->iss);
 	MPASS(be32toh(cpl->rcv_isn) - 1 == synqe->irs);
 	synqe->tcp_opt = cpl->tcp_opt;
 	synqe->toep = toep;
 
 	/* Come up with something that syncache_expand should be ok with. */
 	synqe_to_protohdrs(sc, synqe, cpl, &inc, &th, &to);
 	if (inc.inc_flags & INC_ISIPV6) {
 		if (lctx->ce == NULL) {
 			toep->ce = t4_get_clip_entry(sc, &inc.inc6_laddr, true);
 			if (toep->ce == NULL) {
 				free_toepcb(toep);
 				goto reset;	/* RST without a CLIP entry? */
 			}
 		} else {
 			t4_hold_clip_entry(sc, lctx->ce);
 			toep->ce = lctx->ce;
 		}
 	}
 	so = inp->inp_socket;
 	KASSERT(so != NULL, ("%s: socket is NULL", __func__));
 
 	rstreason = toe_syncache_expand(&inc, &to, &th, &so);
 	if (rstreason < 0) {
 		free_toepcb(toep);
 		send_abort_rpl_synqe(TOEDEV(ifp), synqe, CPL_ABORT_NO_RST);
 		INP_WUNLOCK(inp);
 		NET_EPOCH_EXIT(et);
 		CURVNET_RESTORE();
 		return (0);
 	} else if (rstreason == 0 || so == NULL) {
 		free_toepcb(toep);
 		goto reset;
 	}
 
 	/* New connection inpcb is already locked by syncache_expand(). */
 	new_inp = sotoinpcb(so);
 	INP_WLOCK_ASSERT(new_inp);
 	MPASS(so->so_vnet == lctx->vnet);
 
 	/*
 	 * This is for expansion from syncookies.
 	 *
 	 * XXX: we've held the tcbinfo lock throughout so there's no risk of
 	 * anyone accept'ing a connection before we've installed our hooks, but
 	 * this somewhat defeats the purpose of having a tod_offload_socket :-(
 	 */
 	if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) {
 		tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
 		t4_offload_socket(TOEDEV(ifp), synqe, so);
 	}
 
 	INP_WUNLOCK(new_inp);
 
 	/* Done with the synqe */
 	inp = release_synqe(sc, synqe);
 	if (inp != NULL)
 		INP_WUNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 
 	return (0);
 }
 
 void
 t4_init_listen_cpl_handlers(void)
 {
 
 	t4_register_cpl_handler(CPL_PASS_OPEN_RPL, do_pass_open_rpl);
 	t4_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
 	t4_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
 	t4_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish);
 }
 
 void
 t4_uninit_listen_cpl_handlers(void)
 {
 
 	t4_register_cpl_handler(CPL_PASS_OPEN_RPL, NULL);
 	t4_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, NULL);
 	t4_register_cpl_handler(CPL_PASS_ACCEPT_REQ, NULL);
 	t4_register_cpl_handler(CPL_PASS_ESTABLISH, NULL);
 }
 #endif
diff --git a/sys/dev/iicbus/if_ic.c b/sys/dev/iicbus/if_ic.c
index 4dac86141230..603265a52b13 100644
--- a/sys/dev/iicbus/if_ic.c
+++ b/sys/dev/iicbus/if_ic.c
@@ -1,440 +1,440 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1998, 2001 Nicolas Souchu
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * I2C bus IP driver
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/filio.h>
 #include <sys/sockio.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/bus.h>
 #include <sys/time.h>
 #include <sys/malloc.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/if_ether.h>
 
 #include <net/bpf.h>
 
 #include <dev/iicbus/iiconf.h>
 #include <dev/iicbus/iicbus.h>
 
 #include "iicbus_if.h"
 
 #define PCF_MASTER_ADDRESS 0xaa
 
 #define ICHDRLEN	sizeof(u_int32_t)
 #define ICMTU		1500		/* default mtu */
 
 struct ic_softc {
 	struct ifnet *ic_ifp;
 	device_t ic_dev;
 
 	u_char ic_addr;			/* peer I2C address */
 
 	int ic_flags;
 
 	char *ic_obuf;
 	char *ic_ifbuf;
 	char *ic_cp;
 
 	int ic_xfercnt;
 
 	int ic_iferrs;
 
 	struct mtx ic_lock;
 };
 
 #define	IC_SENDING		0x0001
 #define	IC_OBUF_BUSY		0x0002
 #define	IC_IFBUF_BUSY		0x0004
 #define	IC_BUFFERS_BUSY		(IC_OBUF_BUSY | IC_IFBUF_BUSY)
 #define	IC_BUFFER_WAITER	0x0004
 
 static devclass_t ic_devclass;
 
 static int icprobe(device_t);
 static int icattach(device_t);
 
 static int icioctl(struct ifnet *, u_long, caddr_t);
 static int icoutput(struct ifnet *, struct mbuf *, const struct sockaddr *,
                struct route *);
 
 static int icintr(device_t, int, char *);
 
 static device_method_t ic_methods[] = {
 	/* device interface */
 	DEVMETHOD(device_probe,		icprobe),
 	DEVMETHOD(device_attach,	icattach),
 
 	/* iicbus interface */
 	DEVMETHOD(iicbus_intr,		icintr),
 
 	{ 0, 0 }
 };
 
 static driver_t ic_driver = {
 	"ic",
 	ic_methods,
 	sizeof(struct ic_softc),
 };
 
 static void
 ic_alloc_buffers(struct ic_softc *sc, int mtu)
 {
 	char *obuf, *ifbuf;
 
 	obuf = malloc(mtu + ICHDRLEN, M_DEVBUF, M_WAITOK);
 	ifbuf = malloc(mtu + ICHDRLEN, M_DEVBUF, M_WAITOK);
 
 	mtx_lock(&sc->ic_lock);
 	while (sc->ic_flags & IC_BUFFERS_BUSY) {
 		sc->ic_flags |= IC_BUFFER_WAITER;
 		mtx_sleep(sc, &sc->ic_lock, 0, "icalloc", 0);
 		sc->ic_flags &= ~IC_BUFFER_WAITER;
 	}
 
 	free(sc->ic_obuf, M_DEVBUF);
 	free(sc->ic_ifbuf, M_DEVBUF);
 	sc->ic_obuf = obuf;
 	sc->ic_ifbuf = ifbuf;
 	sc->ic_ifp->if_mtu = mtu;
 	mtx_unlock(&sc->ic_lock);
 }
 
 /*
  * icprobe()
  */
 static int
 icprobe(device_t dev)
 {
 	return (BUS_PROBE_NOWILDCARD);
 }
 
 /*
  * icattach()
  */
 static int
 icattach(device_t dev)
 {
 	struct ic_softc *sc = (struct ic_softc *)device_get_softc(dev);
 	struct ifnet *ifp;
 
 	ifp = sc->ic_ifp = if_alloc(IFT_PARA);
 	if (ifp == NULL)
 		return (ENOSPC);
 
 	mtx_init(&sc->ic_lock, device_get_nameunit(dev), MTX_NETWORK_LOCK,
 	    MTX_DEF);
 	sc->ic_addr = PCF_MASTER_ADDRESS;	/* XXX only PCF masters */
 	sc->ic_dev = dev;
 
 	ifp->if_softc = sc;
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_flags = IFF_SIMPLEX | IFF_POINTOPOINT | IFF_MULTICAST;
 	ifp->if_ioctl = icioctl;
 	ifp->if_output = icoutput;
 	ifp->if_hdrlen = 0;
 	ifp->if_addrlen = 0;
 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
 
 	ic_alloc_buffers(sc, ICMTU);
 
 	if_attach(ifp);
 
 	bpfattach(ifp, DLT_NULL, ICHDRLEN);
 
 	return (0);
 }
 
 /*
  * iciotcl()
  */
 static int
 icioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ic_softc *sc = ifp->if_softc;
 	device_t icdev = sc->ic_dev;
 	device_t parent = device_get_parent(icdev);
 	struct ifaddr *ifa = (struct ifaddr *)data;
 	struct ifreq *ifr = (struct ifreq *)data;
 	int error;
 
 	switch (cmd) {
 
 	case SIOCAIFADDR:
 	case SIOCSIFADDR:
 		if (ifa->ifa_addr->sa_family != AF_INET)
 			return (EAFNOSUPPORT);
 		mtx_lock(&sc->ic_lock);
 		ifp->if_flags |= IFF_UP;
 		goto locked;
 	case SIOCSIFFLAGS:
 		mtx_lock(&sc->ic_lock);
 	locked:
 		if ((!(ifp->if_flags & IFF_UP)) &&
 		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 
 			/* XXX disable PCF */
 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			mtx_unlock(&sc->ic_lock);
 
 			/* IFF_UP is not set, try to release the bus anyway */
 			iicbus_release_bus(parent, icdev);
 			break;
 		}
 		if (((ifp->if_flags & IFF_UP)) &&
 		    (!(ifp->if_drv_flags & IFF_DRV_RUNNING))) {
 			mtx_unlock(&sc->ic_lock);
 			if ((error = iicbus_request_bus(parent, icdev,
 			    IIC_WAIT | IIC_INTR)))
 				return (error);
 			mtx_lock(&sc->ic_lock);
 			iicbus_reset(parent, IIC_FASTEST, 0, NULL);
 			ifp->if_drv_flags |= IFF_DRV_RUNNING;
 		}
 		mtx_unlock(&sc->ic_lock);
 		break;
 
 	case SIOCSIFMTU:
 		ic_alloc_buffers(sc, ifr->ifr_mtu);
 		break;
 
 	case SIOCGIFMTU:
 		mtx_lock(&sc->ic_lock);
 		ifr->ifr_mtu = sc->ic_ifp->if_mtu;
 		mtx_unlock(&sc->ic_lock);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (ifr == NULL)
 			return (EAFNOSUPPORT);		/* XXX */
 		switch (ifr->ifr_addr.sa_family) {
 		case AF_INET:
 			break;
 		default:
 			return (EAFNOSUPPORT);
 		}
 		break;
 	default:
 		return (EINVAL);
 	}
 	return (0);
 }
 
 /*
  * icintr()
  */
 static int
 icintr(device_t dev, int event, char *ptr)
 {
 	struct ic_softc *sc = (struct ic_softc *)device_get_softc(dev);
 	struct mbuf *top;
 	int len;
 
 	mtx_lock(&sc->ic_lock);
 
 	switch (event) {
 
 	case INTR_GENERAL:
 	case INTR_START:
 		sc->ic_cp = sc->ic_ifbuf;
 		sc->ic_xfercnt = 0;
 		sc->ic_flags |= IC_IFBUF_BUSY;
 		break;
 
 	case INTR_STOP:
 
 		/* if any error occurred during transfert,
 		 * drop the packet */
 		sc->ic_flags &= ~IC_IFBUF_BUSY;
 		if ((sc->ic_flags & (IC_BUFFERS_BUSY | IC_BUFFER_WAITER)) ==
 		    IC_BUFFER_WAITER)
 			wakeup(&sc);
 		if (sc->ic_iferrs)
 			goto err;
 		if ((len = sc->ic_xfercnt) == 0)
 			break;					/* ignore */
 		if (len <= ICHDRLEN)
 			goto err;
 		len -= ICHDRLEN;
 		if_inc_counter(sc->ic_ifp, IFCOUNTER_IPACKETS, 1);
 		if_inc_counter(sc->ic_ifp, IFCOUNTER_IBYTES, len);
 		BPF_TAP(sc->ic_ifp, sc->ic_ifbuf, len + ICHDRLEN);
 		top = m_devget(sc->ic_ifbuf + ICHDRLEN, len, 0, sc->ic_ifp, 0);
 		if (top) {
 			struct epoch_tracker et;
 
 			mtx_unlock(&sc->ic_lock);
 			M_SETFIB(top, sc->ic_ifp->if_fib);
 			NET_EPOCH_ENTER(et);
 			netisr_dispatch(NETISR_IP, top);
 			NET_EPOCH_EXIT(et);
 			mtx_lock(&sc->ic_lock);
 		}
 		break;
 	err:
 		if_printf(sc->ic_ifp, "errors (%d)!\n", sc->ic_iferrs);
 		sc->ic_iferrs = 0;			/* reset error count */
 		if_inc_counter(sc->ic_ifp, IFCOUNTER_IERRORS, 1);
 		break;
 
 	case INTR_RECEIVE:
 		if (sc->ic_xfercnt >= sc->ic_ifp->if_mtu + ICHDRLEN) {
 			sc->ic_iferrs++;
 		} else {
 			*sc->ic_cp++ = *ptr;
 			sc->ic_xfercnt++;
 		}
 		break;
 
 	case INTR_NOACK:			/* xfer terminated by master */
 		break;
 
 	case INTR_TRANSMIT:
 		*ptr = 0xff;					/* XXX */
 	  	break;
 
 	case INTR_ERROR:
 		sc->ic_iferrs++;
 		break;
 
 	default:
 		panic("%s: unknown event (%d)!", __func__, event);
 	}
 
 	mtx_unlock(&sc->ic_lock);
 	return (0);
 }
 
 /*
  * icoutput()
  */
 static int
 icoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	struct ic_softc *sc = ifp->if_softc;
 	device_t icdev = sc->ic_dev;
 	device_t parent = device_get_parent(icdev);
 	int len, sent;
 	struct mbuf *mm;
 	u_char *cp;
 	u_int32_t hdr;
 
 	/* BPF writes need to be handled specially. */ 
 	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &hdr, sizeof(hdr));
 	else 
-		hdr = dst->sa_family;
+		hdr = RO_GET_FAMILY(ro, dst);
 
 	mtx_lock(&sc->ic_lock);
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 
 	/* already sending? */
 	if (sc->ic_flags & IC_SENDING) {
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		goto error;
 	}
 		
 	/* insert header */
 	bcopy ((char *)&hdr, sc->ic_obuf, ICHDRLEN);
 
 	cp = sc->ic_obuf + ICHDRLEN;
 	len = 0;
 	mm = m;
 	do {
 		if (len + mm->m_len > sc->ic_ifp->if_mtu) {
 			/* packet too large */
 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			goto error;
 		}
 			
 		bcopy(mtod(mm,char *), cp, mm->m_len);
 		cp += mm->m_len;
 		len += mm->m_len;
 
 	} while ((mm = mm->m_next));
 
 	BPF_MTAP2(ifp, &hdr, sizeof(hdr), m);
 
 	sc->ic_flags |= (IC_SENDING | IC_OBUF_BUSY);
 
 	m_freem(m);
 	mtx_unlock(&sc->ic_lock);
 
 	/* send the packet */
 	if (iicbus_block_write(parent, sc->ic_addr, sc->ic_obuf,
 				len + ICHDRLEN, &sent))
 
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	else {
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
 	}	
 
 	mtx_lock(&sc->ic_lock);
 	sc->ic_flags &= ~(IC_SENDING | IC_OBUF_BUSY);
 	if ((sc->ic_flags & (IC_BUFFERS_BUSY | IC_BUFFER_WAITER)) ==
 	    IC_BUFFER_WAITER)
 		wakeup(&sc);
 	mtx_unlock(&sc->ic_lock);
 
 	return (0);
 
 error:
 	m_freem(m);
 	mtx_unlock(&sc->ic_lock);
 
 	return(0);
 }
 
 DRIVER_MODULE(ic, iicbus, ic_driver, ic_devclass, 0, 0);
 MODULE_DEPEND(ic, iicbus, IICBUS_MINVER, IICBUS_PREFVER, IICBUS_MAXVER);
 MODULE_VERSION(ic, 1);
diff --git a/sys/net/debugnet.c b/sys/net/debugnet.c
index bb59ff33a93f..8652597c55db 100644
--- a/sys/net/debugnet.c
+++ b/sys/net/debugnet.c
@@ -1,1076 +1,1077 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2019 Isilon Systems, LLC.
  * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved.
  * Copyright (c) 2000 Darrell Anderson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/endian.h>
 #include <sys/errno.h>
 #include <sys/eventhandler.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #include <ddb/db_lex.h>
 #endif
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
 #include <machine/in_cksum.h>
 #include <machine/pcb.h>
 
 #include <net/debugnet.h>
 #define	DEBUGNET_INTERNAL
 #include <net/debugnet_int.h>
 
 FEATURE(debugnet, "Debugnet support");
 
 SYSCTL_NODE(_net, OID_AUTO, debugnet, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
     "debugnet parameters");
 
 unsigned debugnet_debug;
 SYSCTL_UINT(_net_debugnet, OID_AUTO, debug, CTLFLAG_RWTUN,
     &debugnet_debug, 0,
     "Debug message verbosity (0: off; 1: on; 2: verbose)");
 
 int debugnet_npolls = 2000;
 SYSCTL_INT(_net_debugnet, OID_AUTO, npolls, CTLFLAG_RWTUN,
     &debugnet_npolls, 0,
     "Number of times to poll before assuming packet loss (0.5ms per poll)");
 int debugnet_nretries = 10;
 SYSCTL_INT(_net_debugnet, OID_AUTO, nretries, CTLFLAG_RWTUN,
     &debugnet_nretries, 0,
     "Number of retransmit attempts before giving up");
 int debugnet_fib = RT_DEFAULT_FIB;
 SYSCTL_INT(_net_debugnet, OID_AUTO, fib, CTLFLAG_RWTUN,
     &debugnet_fib, 0,
     "Fib to use when sending dump");
 
 static bool g_debugnet_pcb_inuse;
 static struct debugnet_pcb g_dnet_pcb;
 
 /*
  * Simple accessors for opaque PCB.
  */
 const unsigned char *
 debugnet_get_gw_mac(const struct debugnet_pcb *pcb)
 {
 	MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb &&
 	    pcb->dp_state >= DN_STATE_HAVE_GW_MAC);
 	return (pcb->dp_gw_mac.octet);
 }
 
 /*
  * Start of network primitives, beginning with output primitives.
  */
 
 /*
  * Handles creation of the ethernet header, then places outgoing packets into
  * the tx buffer for the NIC
  *
  * Parameters:
  *	m	The mbuf containing the packet to be sent (will be freed by
  *		this function or the NIC driver)
  *	ifp	The interface to send on
  *	dst	The destination ethernet address (source address will be looked
  *		up using ifp)
  *	etype	The ETHERTYPE_* value for the protocol that is being sent
  *
  * Returns:
  *	int	see errno.h, 0 for success
  */
 int
 debugnet_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst,
     u_short etype)
 {
 	struct ether_header *eh;
 
 	if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) ||
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) {
 		if_printf(ifp, "%s: interface isn't up\n", __func__);
 		m_freem(m);
 		return (ENETDOWN);
 	}
 
 	/* Fill in the ethernet header. */
 	M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
 	if (m == NULL) {
 		printf("%s: out of mbufs\n", __func__);
 		return (ENOBUFS);
 	}
 	eh = mtod(m, struct ether_header *);
 	memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 	memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN);
 	eh->ether_type = htons(etype);
 	return (ifp->if_debugnet_methods->dn_transmit(ifp, m));
 }
 
 /*
  * Unreliable transmission of an mbuf chain to the debugnet server
  * Note: can't handle fragmentation; fails if the packet is larger than
  *	 ifp->if_mtu after adding the UDP/IP headers
  *
  * Parameters:
  *	pcb	The debugnet context block
  *	m	mbuf chain
  *
  * Returns:
  *	int	see errno.h, 0 for success
  */
 static int
 debugnet_udp_output(struct debugnet_pcb *pcb, struct mbuf *m)
 {
 	struct udphdr *udp;
 
 	MPASS(pcb->dp_state >= DN_STATE_HAVE_GW_MAC);
 
 	M_PREPEND(m, sizeof(*udp), M_NOWAIT);
 	if (m == NULL) {
 		printf("%s: out of mbufs\n", __func__);
 		return (ENOBUFS);
 	}
 
 	udp = mtod(m, void *);
 	udp->uh_ulen = htons(m->m_pkthdr.len);
 	/* Use this src port so that the server can connect() the socket */
 	udp->uh_sport = htons(pcb->dp_client_port);
 	udp->uh_dport = htons(pcb->dp_server_port);
 	/* Computed later (protocol-dependent). */
 	udp->uh_sum = 0;
 
 	return (debugnet_ip_output(pcb, m));
 }
 
 int
 debugnet_ack_output(struct debugnet_pcb *pcb, uint32_t seqno /* net endian */)
 {
 	struct debugnet_ack *dn_ack;
 	struct mbuf *m;
 
 	DNETDEBUG("Acking with seqno %u\n", ntohl(seqno));
 
 	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		printf("%s: Out of mbufs\n", __func__);
 		return (ENOBUFS);
 	}
 	m->m_len = sizeof(*dn_ack);
 	m->m_pkthdr.len = sizeof(*dn_ack);
 	MH_ALIGN(m, sizeof(*dn_ack));
 	dn_ack = mtod(m, void *);
 	dn_ack->da_seqno = seqno;
 
 	return (debugnet_udp_output(pcb, m));
 }
 
 /*
  * Dummy free function for debugnet clusters.
  */
 static void
 debugnet_mbuf_free(struct mbuf *m __unused)
 {
 }
 
 /*
  * Construct and reliably send a debugnet packet.  May fail from a resource
  * shortage or extreme number of unacknowledged retransmissions.  Wait for
  * an acknowledgement before returning.  Splits packets into chunks small
  * enough to be sent without fragmentation (looks up the interface MTU)
  *
  * Parameters:
  *	type	debugnet packet type (HERALD, FINISHED, ...)
  *	data	data
  *	datalen	data size (bytes)
  *	auxdata	optional auxiliary information
  *
  * Returns:
  *	int see errno.h, 0 for success
  */
 int
 debugnet_send(struct debugnet_pcb *pcb, uint32_t type, const void *data,
     uint32_t datalen, const struct debugnet_proto_aux *auxdata)
 {
 	struct debugnet_msg_hdr *dn_msg_hdr;
 	struct mbuf *m, *m2;
 	uint64_t want_acks;
 	uint32_t i, pktlen, sent_so_far;
 	int retries, polls, error;
 
 	if (pcb->dp_state == DN_STATE_REMOTE_CLOSED)
 		return (ECONNRESET);
 
 	want_acks = 0;
 	pcb->dp_rcvd_acks = 0;
 	retries = 0;
 
 retransmit:
 	/* Chunks can be too big to fit in packets. */
 	for (i = sent_so_far = 0; sent_so_far < datalen ||
 	    (i == 0 && datalen == 0); i++) {
 		pktlen = datalen - sent_so_far;
 
 		/* Bound: the interface MTU (assume no IP options). */
 		pktlen = min(pktlen, pcb->dp_ifp->if_mtu -
 		    sizeof(struct udpiphdr) - sizeof(struct debugnet_msg_hdr));
 
 		/*
 		 * Check if it is retransmitting and this has been ACKed
 		 * already.
 		 */
 		if ((pcb->dp_rcvd_acks & (1 << i)) != 0) {
 			sent_so_far += pktlen;
 			continue;
 		}
 
 		/*
 		 * Get and fill a header mbuf, then chain data as an extended
 		 * mbuf.
 		 */
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			printf("%s: Out of mbufs\n", __func__);
 			return (ENOBUFS);
 		}
 		m->m_len = sizeof(struct debugnet_msg_hdr);
 		m->m_pkthdr.len = sizeof(struct debugnet_msg_hdr);
 		MH_ALIGN(m, sizeof(struct debugnet_msg_hdr));
 		dn_msg_hdr = mtod(m, struct debugnet_msg_hdr *);
 		dn_msg_hdr->mh_seqno = htonl(pcb->dp_seqno + i);
 		dn_msg_hdr->mh_type = htonl(type);
 		dn_msg_hdr->mh_len = htonl(pktlen);
 
 		if (auxdata != NULL) {
 			dn_msg_hdr->mh_offset =
 			    htobe64(auxdata->dp_offset_start + sent_so_far);
 			dn_msg_hdr->mh_aux2 = htobe32(auxdata->dp_aux2);
 		} else {
 			dn_msg_hdr->mh_offset = htobe64(sent_so_far);
 			dn_msg_hdr->mh_aux2 = 0;
 		}
 
 		if (pktlen != 0) {
 			m2 = m_get(M_NOWAIT, MT_DATA);
 			if (m2 == NULL) {
 				m_freem(m);
 				printf("%s: Out of mbufs\n", __func__);
 				return (ENOBUFS);
 			}
 			MEXTADD(m2, __DECONST(char *, data) + sent_so_far,
 			    pktlen, debugnet_mbuf_free, NULL, NULL, 0,
 			    EXT_DISPOSABLE);
 			m2->m_len = pktlen;
 
 			m_cat(m, m2);
 			m->m_pkthdr.len += pktlen;
 		}
 		error = debugnet_udp_output(pcb, m);
 		if (error != 0)
 			return (error);
 
 		/* Note that we're waiting for this packet in the bitfield. */
 		want_acks |= (1 << i);
 		sent_so_far += pktlen;
 	}
 	if (i >= DEBUGNET_MAX_IN_FLIGHT)
 		printf("Warning: Sent more than %d packets (%d). "
 		    "Acknowledgements will fail unless the size of "
 		    "rcvd_acks/want_acks is increased.\n",
 		    DEBUGNET_MAX_IN_FLIGHT, i);
 
 	/*
 	 * Wait for acks.  A *real* window would speed things up considerably.
 	 */
 	polls = 0;
 	while (pcb->dp_rcvd_acks != want_acks) {
 		if (polls++ > debugnet_npolls) {
 			if (retries++ > debugnet_nretries)
 				return (ETIMEDOUT);
 			printf(". ");
 			goto retransmit;
 		}
 		debugnet_network_poll(pcb);
 		DELAY(500);
 		if (pcb->dp_state == DN_STATE_REMOTE_CLOSED)
 			return (ECONNRESET);
 	}
 	pcb->dp_seqno += i;
 	return (0);
 }
 
 /*
  * Network input primitives.
  */
 
 /*
  * Just introspect the header enough to fire off a seqno ack and validate
  * length fits.
  */
 static void
 debugnet_handle_rx_msg(struct debugnet_pcb *pcb, struct mbuf **mb)
 {
 	const struct debugnet_msg_hdr *dnh;
 	struct mbuf *m;
 	int error;
 
 	m = *mb;
 
 	if (m->m_pkthdr.len < sizeof(*dnh)) {
 		DNETDEBUG("ignoring small debugnet_msg packet\n");
 		return;
 	}
 
 	/* Get ND header. */
 	if (m->m_len < sizeof(*dnh)) {
 		m = m_pullup(m, sizeof(*dnh));
 		*mb = m;
 		if (m == NULL) {
 			DNETDEBUG("m_pullup failed\n");
 			return;
 		}
 	}
 	dnh = mtod(m, const void *);
 
 	if (ntohl(dnh->mh_len) + sizeof(*dnh) > m->m_pkthdr.len) {
 		DNETDEBUG("Dropping short packet.\n");
 		return;
 	}
 
 	/*
 	 * If the issue is transient (ENOBUFS), sender should resend.  If
 	 * non-transient (like driver objecting to rx -> tx from the same
 	 * thread), not much else we can do.
 	 */
 	error = debugnet_ack_output(pcb, dnh->mh_seqno);
 	if (error != 0)
 		return;
 
 	if (ntohl(dnh->mh_type) == DEBUGNET_FINISHED) {
 		printf("Remote shut down the connection on us!\n");
 		pcb->dp_state = DN_STATE_REMOTE_CLOSED;
 
 		/*
 		 * Continue through to the user handler so they are signalled
 		 * not to wait for further rx.
 		 */
 	}
 
 	pcb->dp_rx_handler(pcb, mb);
 }
 
 static void
 debugnet_handle_ack(struct debugnet_pcb *pcb, struct mbuf **mb, uint16_t sport)
 {
 	const struct debugnet_ack *dn_ack;
 	struct mbuf *m;
 	uint32_t rcv_ackno;
 
 	m = *mb;
 
 	/* Get Ack. */
 	if (m->m_len < sizeof(*dn_ack)) {
 		m = m_pullup(m, sizeof(*dn_ack));
 		*mb = m;
 		if (m == NULL) {
 			DNETDEBUG("m_pullup failed\n");
 			return;
 		}
 	}
 	dn_ack = mtod(m, const void *);
 
 	/* Debugnet processing. */
 	/*
 	 * Packet is meant for us.  Extract the ack sequence number and the
 	 * port number if necessary.
 	 */
 	rcv_ackno = ntohl(dn_ack->da_seqno);
 	if (pcb->dp_state < DN_STATE_GOT_HERALD_PORT) {
 		pcb->dp_server_port = sport;
 		pcb->dp_state = DN_STATE_GOT_HERALD_PORT;
 	}
 	if (rcv_ackno >= pcb->dp_seqno + DEBUGNET_MAX_IN_FLIGHT)
 		printf("%s: ACK %u too far in future!\n", __func__, rcv_ackno);
 	else if (rcv_ackno >= pcb->dp_seqno) {
 		/* We're interested in this ack. Record it. */
 		pcb->dp_rcvd_acks |= 1 << (rcv_ackno - pcb->dp_seqno);
 	}
 }
 
 void
 debugnet_handle_udp(struct debugnet_pcb *pcb, struct mbuf **mb)
 {
 	const struct udphdr *udp;
 	struct mbuf *m;
 	uint16_t sport, ulen;
 
 	/* UDP processing. */
 
 	m = *mb;
 	if (m->m_pkthdr.len < sizeof(*udp)) {
 		DNETDEBUG("ignoring small UDP packet\n");
 		return;
 	}
 
 	/* Get UDP headers. */
 	if (m->m_len < sizeof(*udp)) {
 		m = m_pullup(m, sizeof(*udp));
 		*mb = m;
 		if (m == NULL) {
 			DNETDEBUG("m_pullup failed\n");
 			return;
 		}
 	}
 	udp = mtod(m, const void *);
 
 	/* We expect to receive UDP packets on the configured client port. */
 	if (ntohs(udp->uh_dport) != pcb->dp_client_port) {
 		DNETDEBUG("not on the expected port.\n");
 		return;
 	}
 
 	/* Check that ulen does not exceed actual size of data. */
 	ulen = ntohs(udp->uh_ulen);
 	if (m->m_pkthdr.len < ulen) {
 		DNETDEBUG("ignoring runt UDP packet\n");
 		return;
 	}
 
 	sport = ntohs(udp->uh_sport);
 
 	m_adj(m, sizeof(*udp));
 	ulen -= sizeof(*udp);
 
 	if (ulen == sizeof(struct debugnet_ack)) {
 		debugnet_handle_ack(pcb, mb, sport);
 		return;
 	}
 
 	if (pcb->dp_rx_handler == NULL) {
 		if (ulen < sizeof(struct debugnet_ack))
 			DNETDEBUG("ignoring small ACK packet\n");
 		else
 			DNETDEBUG("ignoring unexpected non-ACK packet on "
 			    "half-duplex connection.\n");
 		return;
 	}
 
 	debugnet_handle_rx_msg(pcb, mb);
 }
 
 /*
  * Handler for incoming packets directly from the network adapter
  * Identifies the packet type (IP or ARP) and passes it along to one of the
  * helper functions debugnet_handle_ip or debugnet_handle_arp.
  *
  * It needs to partially replicate the behaviour of ether_input() and
  * ether_demux().
  *
  * Parameters:
  *	ifp	the interface the packet came from
  *	m	an mbuf containing the packet received
  */
 static void
 debugnet_pkt_in(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ifreq ifr;
 	struct ether_header *eh;
 	u_short etype;
 
 	/* Ethernet processing. */
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		DNETDEBUG_IF(ifp, "discard frame without packet header\n");
 		goto done;
 	}
 	if (m->m_len < ETHER_HDR_LEN) {
 		DNETDEBUG_IF(ifp,
 	    "discard frame without leading eth header (len %u pktlen %u)\n",
 		    m->m_len, m->m_pkthdr.len);
 		goto done;
 	}
 	if ((m->m_flags & M_HASFCS) != 0) {
 		m_adj(m, -ETHER_CRC_LEN);
 		m->m_flags &= ~M_HASFCS;
 	}
 	eh = mtod(m, struct ether_header *);
 	etype = ntohs(eh->ether_type);
 	if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) {
 		DNETDEBUG_IF(ifp, "ignoring vlan packets\n");
 		goto done;
 	}
 	if (if_gethwaddr(ifp, &ifr) != 0) {
 		DNETDEBUG_IF(ifp, "failed to get hw addr for interface\n");
 		goto done;
 	}
 	if (memcmp(ifr.ifr_addr.sa_data, eh->ether_dhost,
 	    ETHER_ADDR_LEN) != 0 &&
 	    (etype != ETHERTYPE_ARP || !ETHER_IS_BROADCAST(eh->ether_dhost))) {
 		DNETDEBUG_IF(ifp,
 		    "discard frame with incorrect destination addr\n");
 		goto done;
 	}
 
 	MPASS(g_debugnet_pcb_inuse);
 
 	/* Done ethernet processing. Strip off the ethernet header. */
 	m_adj(m, ETHER_HDR_LEN);
 	switch (etype) {
 	case ETHERTYPE_ARP:
 		debugnet_handle_arp(&g_dnet_pcb, &m);
 		break;
 	case ETHERTYPE_IP:
 		debugnet_handle_ip(&g_dnet_pcb, &m);
 		break;
 	default:
 		DNETDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype);
 		break;
 	}
 done:
 	if (m != NULL)
 		m_freem(m);
 }
 
 /*
  * Network polling primitive.
  *
  * Instead of assuming that most of the network stack is sane, we just poll the
  * driver directly for packets.
  */
 void
 debugnet_network_poll(struct debugnet_pcb *pcb)
 {
 	struct ifnet *ifp;
 
 	ifp = pcb->dp_ifp;
 	ifp->if_debugnet_methods->dn_poll(ifp, 1000);
 }
 
 /*
  * Start of consumer API surface.
  */
 void
 debugnet_free(struct debugnet_pcb *pcb)
 {
 	struct ifnet *ifp;
 
 	MPASS(g_debugnet_pcb_inuse);
 	MPASS(pcb == &g_dnet_pcb);
 
 	ifp = pcb->dp_ifp;
 	if (ifp != NULL) {
 		if (pcb->dp_drv_input != NULL)
 			ifp->if_input = pcb->dp_drv_input;
 		if (pcb->dp_event_started)
 			ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_END);
 	}
 	debugnet_mbuf_finish();
 
 	g_debugnet_pcb_inuse = false;
 	memset(&g_dnet_pcb, 0xfd, sizeof(g_dnet_pcb));
 }
 
 int
 debugnet_connect(const struct debugnet_conn_params *dcp,
     struct debugnet_pcb **pcb_out)
 {
 	struct debugnet_proto_aux herald_auxdata;
 	struct debugnet_pcb *pcb;
 	struct ifnet *ifp;
 	int error;
 
 	if (g_debugnet_pcb_inuse) {
 		printf("%s: Only one connection at a time.\n", __func__);
 		return (EBUSY);
 	}
 
 	pcb = &g_dnet_pcb;
 	*pcb = (struct debugnet_pcb) {
 		.dp_state = DN_STATE_INIT,
 		.dp_client = dcp->dc_client,
 		.dp_server = dcp->dc_server,
 		.dp_gateway = dcp->dc_gateway,
 		.dp_server_port = dcp->dc_herald_port,	/* Initially */
 		.dp_client_port = dcp->dc_client_port,
 		.dp_seqno = 1,
 		.dp_ifp = dcp->dc_ifp,
 		.dp_rx_handler = dcp->dc_rx_handler,
 	};
 
 	/* Switch to the debugnet mbuf zones. */
 	debugnet_mbuf_start();
 
 	/* At least one needed parameter is missing; infer it. */
 	if (pcb->dp_client == INADDR_ANY || pcb->dp_gateway == INADDR_ANY ||
 	    pcb->dp_ifp == NULL) {
 		struct sockaddr_in dest_sin, *gw_sin, *local_sin;
 		struct ifnet *rt_ifp;
 		struct nhop_object *nh;
 
 		memset(&dest_sin, 0, sizeof(dest_sin));
 		dest_sin = (struct sockaddr_in) {
 			.sin_len = sizeof(dest_sin),
 			.sin_family = AF_INET,
 			.sin_addr.s_addr = pcb->dp_server,
 		};
 
 		CURVNET_SET(vnet0);
 		nh = fib4_lookup_debugnet(debugnet_fib, dest_sin.sin_addr, 0,
 		    NHR_NONE);
 		CURVNET_RESTORE();
 
 		if (nh == NULL) {
 			printf("%s: Could not get route for that server.\n",
 			    __func__);
 			error = ENOENT;
 			goto cleanup;
 		}
 
+		/* TODO support AF_INET6 */
 		if (nh->gw_sa.sa_family == AF_INET)
 			gw_sin = &nh->gw4_sa;
 		else {
 			if (nh->gw_sa.sa_family == AF_LINK)
 				DNETDEBUG("Destination address is on link.\n");
 			gw_sin = NULL;
 		}
 
 		MPASS(nh->nh_ifa->ifa_addr->sa_family == AF_INET);
 		local_sin = (struct sockaddr_in *)nh->nh_ifa->ifa_addr;
 
 		rt_ifp = nh->nh_ifp;
 
 		if (pcb->dp_client == INADDR_ANY)
 			pcb->dp_client = local_sin->sin_addr.s_addr;
 		if (pcb->dp_gateway == INADDR_ANY && gw_sin != NULL)
 			pcb->dp_gateway = gw_sin->sin_addr.s_addr;
 		if (pcb->dp_ifp == NULL)
 			pcb->dp_ifp = rt_ifp;
 	}
 
 	ifp = pcb->dp_ifp;
 
 	if (debugnet_debug > 0) {
 		char serbuf[INET_ADDRSTRLEN], clibuf[INET_ADDRSTRLEN],
 		    gwbuf[INET_ADDRSTRLEN];
 		inet_ntop(AF_INET, &pcb->dp_server, serbuf, sizeof(serbuf));
 		inet_ntop(AF_INET, &pcb->dp_client, clibuf, sizeof(clibuf));
 		if (pcb->dp_gateway != INADDR_ANY)
 			inet_ntop(AF_INET, &pcb->dp_gateway, gwbuf, sizeof(gwbuf));
 		DNETDEBUG("Connecting to %s:%d%s%s from %s:%d on %s\n",
 		    serbuf, pcb->dp_server_port,
 		    (pcb->dp_gateway == INADDR_ANY) ? "" : " via ",
 		    (pcb->dp_gateway == INADDR_ANY) ? "" : gwbuf,
 		    clibuf, pcb->dp_client_port, if_name(ifp));
 	}
 
 	/* Validate iface is online and supported. */
 	if (!DEBUGNET_SUPPORTED_NIC(ifp)) {
 		printf("%s: interface '%s' does not support debugnet\n",
 		    __func__, if_name(ifp));
 		error = ENODEV;
 		goto cleanup;
 	}
 	if ((if_getflags(ifp) & IFF_UP) == 0) {
 		printf("%s: interface '%s' link is down\n", __func__,
 		    if_name(ifp));
 		error = ENXIO;
 		goto cleanup;
 	}
 
 	ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_START);
 	pcb->dp_event_started = true;
 
 	/*
 	 * We maintain the invariant that g_debugnet_pcb_inuse is always true
 	 * while the debugnet ifp's if_input is overridden with
 	 * debugnet_pkt_in.
 	 */
 	g_debugnet_pcb_inuse = true;
 
 	/* Make the card use *our* receive callback. */
 	pcb->dp_drv_input = ifp->if_input;
 	ifp->if_input = debugnet_pkt_in;
 
 	printf("%s: searching for %s MAC...\n", __func__,
 	    (dcp->dc_gateway == INADDR_ANY) ? "server" : "gateway");
 
 	error = debugnet_arp_gw(pcb);
 	if (error != 0) {
 		printf("%s: failed to locate MAC address\n", __func__);
 		goto cleanup;
 	}
 	MPASS(pcb->dp_state == DN_STATE_HAVE_GW_MAC);
 
 	herald_auxdata = (struct debugnet_proto_aux) {
 		.dp_offset_start = dcp->dc_herald_offset,
 		.dp_aux2 = dcp->dc_herald_aux2,
 	};
 	error = debugnet_send(pcb, DEBUGNET_HERALD, dcp->dc_herald_data,
 	    dcp->dc_herald_datalen, &herald_auxdata);
 	if (error != 0) {
 		printf("%s: failed to herald debugnet server\n", __func__);
 		goto cleanup;
 	}
 
 	*pcb_out = pcb;
 	return (0);
 
 cleanup:
 	debugnet_free(pcb);
 	return (error);
 }
 
 /*
  * Pre-allocated dump-time mbuf tracking.
  *
  * We just track the high water mark we've ever seen and allocate appropriately
  * for that iface/mtu combo.
  */
 static struct {
 	int nmbuf;
 	int ncl;
 	int clsize;
 } dn_hwm;
 static struct mtx dn_hwm_lk;
 MTX_SYSINIT(debugnet_hwm_lock, &dn_hwm_lk, "Debugnet HWM lock", MTX_DEF);
 
 static void
 dn_maybe_reinit_mbufs(int nmbuf, int ncl, int clsize)
 {
 	bool any;
 
 	any = false;
 	mtx_lock(&dn_hwm_lk);
 
 	if (nmbuf > dn_hwm.nmbuf) {
 		any = true;
 		dn_hwm.nmbuf = nmbuf;
 	} else
 		nmbuf = dn_hwm.nmbuf;
 
 	if (ncl > dn_hwm.ncl) {
 		any = true;
 		dn_hwm.ncl = ncl;
 	} else
 		ncl = dn_hwm.ncl;
 
 	if (clsize > dn_hwm.clsize) {
 		any = true;
 		dn_hwm.clsize = clsize;
 	} else
 		clsize = dn_hwm.clsize;
 
 	mtx_unlock(&dn_hwm_lk);
 
 	if (any)
 		debugnet_mbuf_reinit(nmbuf, ncl, clsize);
 }
 
 void
 debugnet_any_ifnet_update(struct ifnet *ifp)
 {
 	int clsize, nmbuf, ncl, nrxr;
 
 	if (!DEBUGNET_SUPPORTED_NIC(ifp))
 		return;
 
 	ifp->if_debugnet_methods->dn_init(ifp, &nrxr, &ncl, &clsize);
 	KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr));
 
 	/*
 	 * We need two headers per message on the transmit side. Multiply by
 	 * four to give us some breathing room.
 	 */
 	nmbuf = ncl * (4 + nrxr);
 	ncl *= nrxr;
 
 	/*
 	 * Bandaid for drivers that (incorrectly) advertise LinkUp before their
 	 * dn_init method is available.
 	 */
 	if (nmbuf == 0 || ncl == 0 || clsize == 0) {
 		printf("%s: Bad dn_init result from %s (ifp %p), ignoring.\n",
 		    __func__, if_name(ifp), ifp);
 		return;
 	}
 	dn_maybe_reinit_mbufs(nmbuf, ncl, clsize);
 }
 
 /*
  * Unfortunately, the ifnet_arrival_event eventhandler hook is mostly useless
  * for us because drivers tend to if_attach before invoking DEBUGNET_SET().
  *
  * On the other hand, hooking DEBUGNET_SET() itself may still be too early,
  * because the driver is still in attach.  Since we cannot use down interfaces,
  * maybe hooking ifnet_event:IFNET_EVENT_UP is sufficient?  ... Nope, at least
  * with vtnet and dhcpclient that event just never occurs.
  *
  * So that's how I've landed on the lower level ifnet_link_event.
  */
 
 static void
 dn_ifnet_event(void *arg __unused, struct ifnet *ifp, int link_state)
 {
 	if (link_state == LINK_STATE_UP)
 		debugnet_any_ifnet_update(ifp);
 }
 
 static eventhandler_tag dn_attach_cookie;
 static void
 dn_evh_init(void *ctx __unused)
 {
 	dn_attach_cookie = EVENTHANDLER_REGISTER(ifnet_link_event,
 	    dn_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
 }
 SYSINIT(dn_evh_init, SI_SUB_EVENTHANDLER + 1, SI_ORDER_ANY, dn_evh_init, NULL);
 
 /*
  * DDB parsing helpers for debugnet(4) consumers.
  */
 #ifdef DDB
 struct my_inet_opt {
 	bool has_opt;
 	const char *printname;
 	in_addr_t *result;
 };
 
 static int
 dn_parse_optarg_ipv4(struct my_inet_opt *opt)
 {
 	in_addr_t tmp;
 	unsigned octet;
 	int t;
 
 	tmp = 0;
 	for (octet = 0; octet < 4; octet++) {
 		t = db_read_token_flags(DRT_WSPACE | DRT_DECIMAL);
 		if (t != tNUMBER) {
 			db_printf("%s:%s: octet %u expected number; found %d\n",
 			    __func__, opt->printname, octet, t);
 			return (EINVAL);
 		}
 		/*
 		 * db_lex lexes '-' distinctly from the number itself, but
 		 * let's document that invariant.
 		 */
 		MPASS(db_tok_number >= 0);
 
 		if (db_tok_number > UINT8_MAX) {
 			db_printf("%s:%s: octet %u out of range: %jd\n", __func__,
 			    opt->printname, octet, (intmax_t)db_tok_number);
 			return (EDOM);
 		}
 
 		/* Constructed host-endian and converted to network later. */
 		tmp = (tmp << 8) | db_tok_number;
 
 		if (octet < 3) {
 			t = db_read_token_flags(DRT_WSPACE);
 			if (t != tDOT) {
 				db_printf("%s:%s: octet %u expected '.'; found"
 				    " %d\n", __func__, opt->printname, octet,
 				    t);
 				return (EINVAL);
 			}
 		}
 	}
 
 	*opt->result = htonl(tmp);
 	opt->has_opt = true;
 	return (0);
 }
 
 int
 debugnet_parse_ddb_cmd(const char *cmd, struct debugnet_ddb_config *result)
 {
 	struct ifnet *ifp;
 	int t, error;
 	bool want_ifp;
 	char ch;
 
 	struct my_inet_opt opt_client = {
 		.printname = "client",
 		.result = &result->dd_client,
 	},
 	opt_server = {
 		.printname = "server",
 		.result = &result->dd_server,
 	},
 	opt_gateway = {
 		.printname = "gateway",
 		.result = &result->dd_gateway,
 	},
 	*cur_inet_opt;
 
 	ifp = NULL;
 	memset(result, 0, sizeof(*result));
 
 	/*
 	 * command [space] [-] [opt] [[space] [optarg]] ...
 	 *
 	 * db_command has already lexed 'command' for us.
 	 */
 	t = db_read_token_flags(DRT_WSPACE);
 	if (t == tWSPACE)
 		t = db_read_token_flags(DRT_WSPACE);
 
 	while (t != tEOL) {
 		if (t != tMINUS) {
 			db_printf("%s: Bad syntax; expected '-', got %d\n",
 			    cmd, t);
 			goto usage;
 		}
 
 		t = db_read_token_flags(DRT_WSPACE);
 		if (t != tIDENT) {
 			db_printf("%s: Bad syntax; expected tIDENT, got %d\n",
 			    cmd, t);
 			goto usage;
 		}
 
 		if (strlen(db_tok_string) > 1) {
 			db_printf("%s: Bad syntax; expected single option "
 			    "flag, got '%s'\n", cmd, db_tok_string);
 			goto usage;
 		}
 
 		want_ifp = false;
 		cur_inet_opt = NULL;
 		switch ((ch = db_tok_string[0])) {
 		default:
 			DNETDEBUG("Unexpected: '%c'\n", ch);
 			/* FALLTHROUGH */
 		case 'h':
 			goto usage;
 		case 'c':
 			cur_inet_opt = &opt_client;
 			break;
 		case 'g':
 			cur_inet_opt = &opt_gateway;
 			break;
 		case 's':
 			cur_inet_opt = &opt_server;
 			break;
 		case 'i':
 			want_ifp = true;
 			break;
 		}
 
 		t = db_read_token_flags(DRT_WSPACE);
 		if (t != tWSPACE) {
 			db_printf("%s: Bad syntax; expected space after "
 			    "flag %c, got %d\n", cmd, ch, t);
 			goto usage;
 		}
 
 		if (want_ifp) {
 			t = db_read_token_flags(DRT_WSPACE);
 			if (t != tIDENT) {
 				db_printf("%s: Expected interface but got %d\n",
 				    cmd, t);
 				goto usage;
 			}
 
 			CURVNET_SET(vnet0);
 			/*
 			 * We *don't* take a ref here because the only current
 			 * consumer, db_netdump_cmd, does not need it.  It
 			 * (somewhat redundantly) extracts the if_name(),
 			 * re-lookups the ifp, and takes its own reference.
 			 */
 			ifp = ifunit(db_tok_string);
 			CURVNET_RESTORE();
 			if (ifp == NULL) {
 				db_printf("Could not locate interface %s\n",
 				    db_tok_string);
 				goto cleanup;
 			}
 		} else {
 			MPASS(cur_inet_opt != NULL);
 			/* Assume IPv4 for now. */
 			error = dn_parse_optarg_ipv4(cur_inet_opt);
 			if (error != 0)
 				goto cleanup;
 		}
 
 		/* Skip (mandatory) whitespace after option, if not EOL. */
 		t = db_read_token_flags(DRT_WSPACE);
 		if (t == tEOL)
 			break;
 		if (t != tWSPACE) {
 			db_printf("%s: Bad syntax; expected space after "
 			    "flag %c option; got %d\n", cmd, ch, t);
 			goto usage;
 		}
 		t = db_read_token_flags(DRT_WSPACE);
 	}
 
 	if (!opt_server.has_opt) {
 		db_printf("%s: need a destination server address\n", cmd);
 		goto usage;
 	}
 
 	result->dd_has_client = opt_client.has_opt;
 	result->dd_has_gateway = opt_gateway.has_opt;
 	result->dd_ifp = ifp;
 
 	/* We parsed the full line to tEOL already, or bailed with an error. */
 	return (0);
 
 usage:
 	db_printf("Usage: %s -s <server> [-g <gateway> -c <localip> "
 	    "-i <interface>]\n", cmd);
 	error = EINVAL;
 	/* FALLTHROUGH */
 cleanup:
 	db_skip_to_eol();
 	return (error);
 }
 #endif /* DDB */
diff --git a/sys/net/if_disc.c b/sys/net/if_disc.c
index ac0028c42f70..14d544dfd86a 100644
--- a/sys/net/if_disc.c
+++ b/sys/net/if_disc.c
@@ -1,246 +1,246 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)if_loop.c	8.1 (Berkeley) 6/10/93
  * $FreeBSD$
  */
 
 /*
  * Discard interface driver for protocol testing and timing.
  * (Based on the loopback.)
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/bpf.h>
 #include <net/vnet.h>
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #ifdef TINY_DSMTU
 #define	DSMTU	(1024+512)
 #else
 #define DSMTU	65532
 #endif
 
 struct disc_softc {
 	struct ifnet *sc_ifp;
 };
 
 static int	discoutput(struct ifnet *, struct mbuf *,
 		    const struct sockaddr *, struct route *);
 static int	discioctl(struct ifnet *, u_long, caddr_t);
 static int	disc_clone_create(struct if_clone *, int, caddr_t);
 static void	disc_clone_destroy(struct ifnet *);
 
 static const char discname[] = "disc";
 static MALLOC_DEFINE(M_DISC, discname, "Discard interface");
 
 VNET_DEFINE_STATIC(struct if_clone *, disc_cloner);
 #define	V_disc_cloner	VNET(disc_cloner)
 
 static int
 disc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct ifnet		*ifp;
 	struct disc_softc	*sc;
 
 	sc = malloc(sizeof(struct disc_softc), M_DISC, M_WAITOK | M_ZERO);
 	ifp = sc->sc_ifp = if_alloc(IFT_LOOP);
 	if (ifp == NULL) {
 		free(sc, M_DISC);
 		return (ENOSPC);
 	}
 
 	ifp->if_softc = sc;
 	if_initname(ifp, discname, unit);
 	ifp->if_mtu = DSMTU;
 	/*
 	 * IFF_LOOPBACK should not be removed from disc's flags because
 	 * it controls what PF-specific routes are magically added when
 	 * a network address is assigned to the interface.  Things just
 	 * won't work as intended w/o such routes because the output
 	 * interface selection for a packet is totally route-driven.
 	 * A valid alternative to IFF_LOOPBACK can be IFF_BROADCAST or
 	 * IFF_POINTOPOINT, but it would result in different properties
 	 * of the interface.
 	 */
 	ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST;
 	ifp->if_drv_flags = IFF_DRV_RUNNING;
 	ifp->if_ioctl = discioctl;
 	ifp->if_output = discoutput;
 	ifp->if_hdrlen = 0;
 	ifp->if_addrlen = 0;
 	ifp->if_snd.ifq_maxlen = 20;
 	if_attach(ifp);
 	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
 
 	return (0);
 }
 
 static void
 disc_clone_destroy(struct ifnet *ifp)
 {
 	struct disc_softc	*sc;
 
 	sc = ifp->if_softc;
 
 	bpfdetach(ifp);
 	if_detach(ifp);
 	if_free(ifp);
 
 	free(sc, M_DISC);
 }
 
 static void
 vnet_disc_init(const void *unused __unused)
 {
 
 	V_disc_cloner = if_clone_simple(discname, disc_clone_create,
 	    disc_clone_destroy, 0);
 }
 VNET_SYSINIT(vnet_disc_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
     vnet_disc_init, NULL);
 
 static void
 vnet_disc_uninit(const void *unused __unused)
 {
 
 	if_clone_detach(V_disc_cloner);
 }
 VNET_SYSUNINIT(vnet_disc_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
     vnet_disc_uninit, NULL);
 
 static int
 disc_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 	case MOD_UNLOAD:
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t disc_mod = {
 	"if_disc",
 	disc_modevent,
 	NULL
 };
 
 DECLARE_MODULE(if_disc, disc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 
 static int
 discoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	u_int32_t af;
 
 	M_ASSERTPKTHDR(m);
 
 	/* BPF writes need to be handled specially. */
 	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
-		af = dst->sa_family;
+		af = RO_GET_FAMILY(ro, dst);
 
 	if (bpf_peers_present(ifp->if_bpf))
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
 
 	m->m_pkthdr.rcvif = ifp;
 
 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
 
 	m_freem(m);
 	return (0);
 }
 
 /*
  * Process an ioctl request.
  */
 static int
 discioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifreq *ifr = (struct ifreq *)data;
 	int error = 0;
 
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 
 		/*
 		 * Everything else is done at a higher level.
 		 */
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (ifr == NULL) {
 			error = EAFNOSUPPORT;		/* XXX */
 			break;
 		}
 		switch (ifr->ifr_addr.sa_family) {
 #ifdef INET
 		case AF_INET:
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			break;
 #endif
 		default:
 			error = EAFNOSUPPORT;
 			break;
 		}
 		break;
 	case SIOCSIFMTU:
 		ifp->if_mtu = ifr->ifr_mtu;
 		break;
 	default:
 		error = EINVAL;
 	}
 	return (error);
 }
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
index 6d8b79d4dd12..3209e8a82978 100644
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@@ -1,1489 +1,1490 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_ethersubr.c	8.1 (Berkeley) 6/10/93
  * $FreeBSD$
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_netgraph.h"
 #include "opt_mbuf_profiling.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/devctl.h>
 #include <sys/eventhandler.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/msan.h>
 #include <sys/proc.h>
 #include <sys/priv.h>
 #include <sys/random.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/uuid.h>
 
 #include <net/ieee_oui.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/if_llc.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if_bridgevar.h>
 #include <net/if_vlan_var.h>
 #include <net/if_llatbl.h>
 #include <net/pfil.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netpfil/pf/pf_mtag.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip_carp.h>
 #include <netinet/ip_var.h>
 #endif
 #ifdef INET6
 #include <netinet6/nd6.h>
 #endif
 #include <security/mac/mac_framework.h>
 
 #include <crypto/sha1.h>
 
 #ifdef CTASSERT
 CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
 CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
 #endif
 
 VNET_DEFINE(pfil_head_t, link_pfil_head);	/* Packet filter hooks */
 
 /* netgraph node hooks for ng_ether(4) */
 void	(*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
 void	(*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
 int	(*ng_ether_output_p)(struct ifnet *ifp, struct mbuf **mp);
 void	(*ng_ether_attach_p)(struct ifnet *ifp);
 void	(*ng_ether_detach_p)(struct ifnet *ifp);
 
 void	(*vlan_input_p)(struct ifnet *, struct mbuf *);
 
 /* if_bridge(4) support */
 void	(*bridge_dn_p)(struct mbuf *, struct ifnet *);
 
 /* if_lagg(4) support */
 struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *); 
 
 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 static	int ether_resolvemulti(struct ifnet *, struct sockaddr **,
 		struct sockaddr *);
 static	int ether_requestencap(struct ifnet *, struct if_encap_req *);
 
 #define senderr(e) do { error = (e); goto bad;} while (0)
 
 static void
 update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
 {
 	int csum_flags = 0;
 
 	if (src->m_pkthdr.csum_flags & CSUM_IP)
 		csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
 	if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
 		csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
 	if (src->m_pkthdr.csum_flags & CSUM_SCTP)
 		csum_flags |= CSUM_SCTP_VALID;
 	dst->m_pkthdr.csum_flags |= csum_flags;
 	if (csum_flags & CSUM_DATA_VALID)
 		dst->m_pkthdr.csum_data = 0xffff;
 }
 
 /*
  * Handle link-layer encapsulation requests.
  */
 static int
 ether_requestencap(struct ifnet *ifp, struct if_encap_req *req)
 {
 	struct ether_header *eh;
 	struct arphdr *ah;
 	uint16_t etype;
 	const u_char *lladdr;
 
 	if (req->rtype != IFENCAP_LL)
 		return (EOPNOTSUPP);
 
 	if (req->bufsize < ETHER_HDR_LEN)
 		return (ENOMEM);
 
 	eh = (struct ether_header *)req->buf;
 	lladdr = req->lladdr;
 	req->lladdr_off = 0;
 
 	switch (req->family) {
 	case AF_INET:
 		etype = htons(ETHERTYPE_IP);
 		break;
 	case AF_INET6:
 		etype = htons(ETHERTYPE_IPV6);
 		break;
 	case AF_ARP:
 		ah = (struct arphdr *)req->hdata;
 		ah->ar_hrd = htons(ARPHRD_ETHER);
 
 		switch(ntohs(ah->ar_op)) {
 		case ARPOP_REVREQUEST:
 		case ARPOP_REVREPLY:
 			etype = htons(ETHERTYPE_REVARP);
 			break;
 		case ARPOP_REQUEST:
 		case ARPOP_REPLY:
 		default:
 			etype = htons(ETHERTYPE_ARP);
 			break;
 		}
 
 		if (req->flags & IFENCAP_FLAG_BROADCAST)
 			lladdr = ifp->if_broadcastaddr;
 		break;
 	default:
 		return (EAFNOSUPPORT);
 	}
 
 	memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
 	memcpy(eh->ether_dhost, lladdr, ETHER_ADDR_LEN);
 	memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 	req->bufsize = sizeof(struct ether_header);
 
 	return (0);
 }
 
 static int
 ether_resolve_addr(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro, u_char *phdr,
 	uint32_t *pflags, struct llentry **plle)
 {
 	struct ether_header *eh;
 	uint32_t lleflags = 0;
 	int error = 0;
 #if defined(INET) || defined(INET6)
 	uint16_t etype;
 #endif
 
 	if (plle)
 		*plle = NULL;
 	eh = (struct ether_header *)phdr;
 
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
 			error = arpresolve(ifp, 0, m, dst, phdr, &lleflags,
 			    plle);
 		else {
 			if (m->m_flags & M_BCAST)
 				memcpy(eh->ether_dhost, ifp->if_broadcastaddr,
 				    ETHER_ADDR_LEN);
 			else {
 				const struct in_addr *a;
 				a = &(((const struct sockaddr_in *)dst)->sin_addr);
 				ETHER_MAP_IP_MULTICAST(a, eh->ether_dhost);
 			}
 			etype = htons(ETHERTYPE_IP);
 			memcpy(&eh->ether_type, &etype, sizeof(etype));
 			memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 		}
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
-		if ((m->m_flags & M_MCAST) == 0)
-			error = nd6_resolve(ifp, LLE_SF(AF_INET6, 0), m, dst, phdr,
+		if ((m->m_flags & M_MCAST) == 0) {
+			int af = RO_GET_FAMILY(ro, dst);
+			error = nd6_resolve(ifp, LLE_SF(af, 0), m, dst, phdr,
 			    &lleflags, plle);
-		else {
+		} else {
 			const struct in6_addr *a6;
 			a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr);
 			ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost);
 			etype = htons(ETHERTYPE_IPV6);
 			memcpy(&eh->ether_type, &etype, sizeof(etype));
 			memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 		}
 		break;
 #endif
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		if (m != NULL)
 			m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 
 	if (error == EHOSTDOWN) {
 		if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0)
 			error = EHOSTUNREACH;
 	}
 
 	if (error != 0)
 		return (error);
 
 	*pflags = RT_MAY_LOOP;
 	if (lleflags & LLE_IFADDR)
 		*pflags |= RT_L2_ME;
 
 	return (0);
 }
 
 /*
  * Ethernet output routine.
  * Encapsulate a packet of type family for the local net.
  * Use trailer local net encapsulation if enough data in first
  * packet leaves a multiple of 512 bytes of data in remainder.
  */
 int
 ether_output(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro)
 {
 	int error = 0;
 	char linkhdr[ETHER_HDR_LEN], *phdr;
 	struct ether_header *eh;
 	struct pf_mtag *t;
 	bool loop_copy;
 	int hlen;	/* link layer header length */
 	uint32_t pflags;
 	struct llentry *lle = NULL;
 	int addref = 0;
 
 	phdr = NULL;
 	pflags = 0;
 	if (ro != NULL) {
 		/* XXX BPF uses ro_prepend */
 		if (ro->ro_prepend != NULL) {
 			phdr = ro->ro_prepend;
 			hlen = ro->ro_plen;
 		} else if (!(m->m_flags & (M_BCAST | M_MCAST))) {
 			if ((ro->ro_flags & RT_LLE_CACHE) != 0) {
 				lle = ro->ro_lle;
 				if (lle != NULL &&
 				    (lle->la_flags & LLE_VALID) == 0) {
 					LLE_FREE(lle);
 					lle = NULL;	/* redundant */
 					ro->ro_lle = NULL;
 				}
 				if (lle == NULL) {
 					/* if we lookup, keep cache */
 					addref = 1;
 				} else
 					/*
 					 * Notify LLE code that
 					 * the entry was used
 					 * by datapath.
 					 */
 					llentry_provide_feedback(lle);
 			}
 			if (lle != NULL) {
 				phdr = lle->r_linkdata;
 				hlen = lle->r_hdrlen;
 				pflags = lle->r_flags;
 			}
 		}
 	}
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		senderr(error);
 #endif
 
 	M_PROFILE(m);
 	if (ifp->if_flags & IFF_MONITOR)
 		senderr(ENETDOWN);
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		senderr(ENETDOWN);
 
 	if (phdr == NULL) {
 		/* No prepend data supplied. Try to calculate ourselves. */
 		phdr = linkhdr;
 		hlen = ETHER_HDR_LEN;
 		error = ether_resolve_addr(ifp, m, dst, ro, phdr, &pflags,
 		    addref ? &lle : NULL);
 		if (addref && lle != NULL)
 			ro->ro_lle = lle;
 		if (error != 0)
 			return (error == EWOULDBLOCK ? 0 : error);
 	}
 
 	if ((pflags & RT_L2_ME) != 0) {
 		update_mbuf_csumflags(m, m);
-		return (if_simloop(ifp, m, dst->sa_family, 0));
+		return (if_simloop(ifp, m, RO_GET_FAMILY(ro, dst), 0));
 	}
 	loop_copy = (pflags & RT_MAY_LOOP) != 0;
 
 	/*
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 *
 	 * Note that we do prepend regardless of RT_HAS_HEADER flag.
 	 * This is done because BPF code shifts m_data pointer
 	 * to the end of ethernet header prior to calling if_output().
 	 */
 	M_PREPEND(m, hlen, M_NOWAIT);
 	if (m == NULL)
 		senderr(ENOBUFS);
 	if ((pflags & RT_HAS_HEADER) == 0) {
 		eh = mtod(m, struct ether_header *);
 		memcpy(eh, phdr, hlen);
 	}
 
 	/*
 	 * If a simplex interface, and the packet is being sent to our
 	 * Ethernet address or a broadcast address, loopback a copy.
 	 * XXX To make a simplex device behave exactly like a duplex
 	 * device, we should copy in the case of sending to our own
 	 * ethernet address (thus letting the original actually appear
 	 * on the wire). However, we don't do that here for security
 	 * reasons and compatibility with the original behavior.
 	 */
 	if ((m->m_flags & M_BCAST) && loop_copy && (ifp->if_flags & IFF_SIMPLEX) &&
 	    ((t = pf_find_mtag(m)) == NULL || !t->routed)) {
 		struct mbuf *n;
 
 		/*
 		 * Because if_simloop() modifies the packet, we need a
 		 * writable copy through m_dup() instead of a readonly
 		 * one as m_copy[m] would give us. The alternative would
 		 * be to modify if_simloop() to handle the readonly mbuf,
 		 * but performancewise it is mostly equivalent (trading
 		 * extra data copying vs. extra locking).
 		 *
 		 * XXX This is a local workaround.  A number of less
 		 * often used kernel parts suffer from the same bug.
 		 * See PR kern/105943 for a proposed general solution.
 		 */
 		if ((n = m_dup(m, M_NOWAIT)) != NULL) {
 			update_mbuf_csumflags(m, n);
-			(void)if_simloop(ifp, n, dst->sa_family, hlen);
+			(void)if_simloop(ifp, n, RO_GET_FAMILY(ro, dst), hlen);
 		} else
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 	}
 
        /*
 	* Bridges require special output handling.
 	*/
 	if (ifp->if_bridge) {
 		BRIDGE_OUTPUT(ifp, m, error);
 		return (error);
 	}
 
 #if defined(INET) || defined(INET6)
 	if (ifp->if_carp &&
 	    (error = (*carp_output_p)(ifp, m, dst)))
 		goto bad;
 #endif
 
 	/* Handle ng_ether(4) processing, if any */
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_output_p != NULL,
 		    ("ng_ether_output_p is NULL"));
 		if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) {
 bad:			if (m != NULL)
 				m_freem(m);
 			return (error);
 		}
 		if (m == NULL)
 			return (0);
 	}
 
 	/* Continue with link-layer output */
 	return ether_output_frame(ifp, m);
 }
 
 static bool
 ether_set_pcp(struct mbuf **mp, struct ifnet *ifp, uint8_t pcp)
 {
 	struct ether_8021q_tag qtag;
 	struct ether_header *eh;
 
 	eh = mtod(*mp, struct ether_header *);
 	if (ntohs(eh->ether_type) == ETHERTYPE_VLAN ||
 	    ntohs(eh->ether_type) == ETHERTYPE_QINQ)
 		return (true);
 
 	qtag.vid = 0;
 	qtag.pcp = pcp;
 	qtag.proto = ETHERTYPE_VLAN;
 	if (ether_8021q_frame(mp, ifp, ifp, &qtag))
 		return (true);
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	return (false);
 }
 
 /*
  * Ethernet link layer output routine to send a raw frame to the device.
  *
  * This assumes that the 14 byte Ethernet header is present and contiguous
  * in the first mbuf (if BRIDGE'ing).
  */
 int
 ether_output_frame(struct ifnet *ifp, struct mbuf *m)
 {
 	uint8_t pcp;
 
 	pcp = ifp->if_pcp;
 	if (pcp != IFNET_PCP_NONE && ifp->if_type != IFT_L2VLAN &&
 	    !ether_set_pcp(&m, ifp, pcp))
 		return (0);
 
 	if (PFIL_HOOKED_OUT(V_link_pfil_head))
 		switch (pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_OUT,
 		    NULL)) {
 		case PFIL_DROPPED:
 			return (EACCES);
 		case PFIL_CONSUMED:
 			return (0);
 		}
 
 #ifdef EXPERIMENTAL
 #if defined(INET6) && defined(INET)
 	/* draft-ietf-6man-ipv6only-flag */
 	/* Catch ETHERTYPE_IP, and ETHERTYPE_[REV]ARP if we are v6-only. */
 	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY_MASK) != 0) {
 		struct ether_header *eh;
 
 		eh = mtod(m, struct ether_header *);
 		switch (ntohs(eh->ether_type)) {
 		case ETHERTYPE_IP:
 		case ETHERTYPE_ARP:
 		case ETHERTYPE_REVARP:
 			m_freem(m);
 			return (EAFNOSUPPORT);
 			/* NOTREACHED */
 			break;
 		};
 	}
 #endif
 #endif
 
 	/*
 	 * Queue message on interface, update output statistics if successful,
 	 * and start output if interface not yet active.
 	 *
 	 * If KMSAN is enabled, use it to verify that the data does not contain
 	 * any uninitialized bytes.
 	 */
 	kmsan_check_mbuf(m, "ether_output");
 	return ((ifp->if_transmit)(ifp, m));
 }
 
 /*
  * Process a received Ethernet packet; the packet is in the
  * mbuf chain m with the ethernet header at the front.
  */
 static void
 ether_input_internal(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ether_header *eh;
 	u_short etype;
 
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		return;
 	}
 #ifdef DIAGNOSTIC
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		if_printf(ifp, "discard frame at !IFF_DRV_RUNNING\n");
 		m_freem(m);
 		return;
 	}
 #endif
 	if (m->m_len < ETHER_HDR_LEN) {
 		/* XXX maybe should pullup? */
 		if_printf(ifp, "discard frame w/o leading ethernet "
 				"header (len %u pkt len %u)\n",
 				m->m_len, m->m_pkthdr.len);
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
 	}
 	eh = mtod(m, struct ether_header *);
 	etype = ntohs(eh->ether_type);
 	random_harvest_queue_ether(m, sizeof(*m));
 
 #ifdef EXPERIMENTAL
 #if defined(INET6) && defined(INET)
 	/* draft-ietf-6man-ipv6only-flag */
 	/* Catch ETHERTYPE_IP, and ETHERTYPE_[REV]ARP if we are v6-only. */
 	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY_MASK) != 0) {
 		switch (etype) {
 		case ETHERTYPE_IP:
 		case ETHERTYPE_ARP:
 		case ETHERTYPE_REVARP:
 			m_freem(m);
 			return;
 			/* NOTREACHED */
 			break;
 		};
 	}
 #endif
 #endif
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
 
 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 		if (ETHER_IS_BROADCAST(eh->ether_dhost))
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 #ifdef MAC
 	/*
 	 * Tag the mbuf with an appropriate MAC label before any other
 	 * consumers can get to it.
 	 */
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * Give bpf a chance at the packet.
 	 */
 	ETHER_BPF_MTAP(ifp, m);
 
 	/*
 	 * If the CRC is still on the packet, trim it off. We do this once
 	 * and once only in case we are re-entered. Nothing else on the
 	 * Ethernet receive path expects to see the FCS.
 	 */
 	if (m->m_flags & M_HASFCS) {
 		m_adj(m, -ETHER_CRC_LEN);
 		m->m_flags &= ~M_HASFCS;
 	}
 
 	if (!(ifp->if_capenable & IFCAP_HWSTATS))
 		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 
 	/* Allow monitor mode to claim this frame, after stats are updated. */
 	if (ifp->if_flags & IFF_MONITOR) {
 		m_freem(m);
 		CURVNET_RESTORE();
 		return;
 	}
 
 	/* Handle input from a lagg(4) port */
 	if (ifp->if_type == IFT_IEEE8023ADLAG) {
 		KASSERT(lagg_input_ethernet_p != NULL,
 		    ("%s: if_lagg not loaded!", __func__));
 		m = (*lagg_input_ethernet_p)(ifp, m);
 		if (m != NULL)
 			ifp = m->m_pkthdr.rcvif;
 		else {
 			CURVNET_RESTORE();
 			return;
 		}
 	}
 
 	/*
 	 * If the hardware did not process an 802.1Q tag, do this now,
 	 * to allow 802.1P priority frames to be passed to the main input
 	 * path correctly.
 	 */
 	if ((m->m_flags & M_VLANTAG) == 0 &&
 	    ((etype == ETHERTYPE_VLAN) || (etype == ETHERTYPE_QINQ))) {
 		struct ether_vlan_header *evl;
 
 		if (m->m_len < sizeof(*evl) &&
 		    (m = m_pullup(m, sizeof(*evl))) == NULL) {
 #ifdef DIAGNOSTIC
 			if_printf(ifp, "cannot pullup VLAN header\n");
 #endif
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			CURVNET_RESTORE();
 			return;
 		}
 
 		evl = mtod(m, struct ether_vlan_header *);
 		m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
 		m->m_flags |= M_VLANTAG;
 
 		bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
 		    ETHER_HDR_LEN - ETHER_TYPE_LEN);
 		m_adj(m, ETHER_VLAN_ENCAP_LEN);
 		eh = mtod(m, struct ether_header *);
 	}
 
 	M_SETFIB(m, ifp->if_fib);
 
 	/* Allow ng_ether(4) to claim this frame. */
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_input_p != NULL,
 		    ("%s: ng_ether_input_p is NULL", __func__));
 		m->m_flags &= ~M_PROMISC;
 		(*ng_ether_input_p)(ifp, &m);
 		if (m == NULL) {
 			CURVNET_RESTORE();
 			return;
 		}
 		eh = mtod(m, struct ether_header *);
 	}
 
 	/*
 	 * Allow if_bridge(4) to claim this frame.
 	 * The BRIDGE_INPUT() macro will update ifp if the bridge changed it
 	 * and the frame should be delivered locally.
 	 */
 	if (ifp->if_bridge != NULL) {
 		m->m_flags &= ~M_PROMISC;
 		BRIDGE_INPUT(ifp, m);
 		if (m == NULL) {
 			CURVNET_RESTORE();
 			return;
 		}
 		eh = mtod(m, struct ether_header *);
 	}
 
 #if defined(INET) || defined(INET6)
 	/*
 	 * Clear M_PROMISC on frame so that carp(4) will see it when the
 	 * mbuf flows up to Layer 3.
 	 * FreeBSD's implementation of carp(4) uses the inprotosw
 	 * to dispatch IPPROTO_CARP. carp(4) also allocates its own
 	 * Ethernet addresses of the form 00:00:5e:00:01:xx, which
 	 * is outside the scope of the M_PROMISC test below.
 	 * TODO: Maintain a hash table of ethernet addresses other than
 	 * ether_dhost which may be active on this ifp.
 	 */
 	if (ifp->if_carp && (*carp_forus_p)(ifp, eh->ether_dhost)) {
 		m->m_flags &= ~M_PROMISC;
 	} else
 #endif
 	{
 		/*
 		 * If the frame received was not for our MAC address, set the
 		 * M_PROMISC flag on the mbuf chain. The frame may need to
 		 * be seen by the rest of the Ethernet input path in case of
 		 * re-entry (e.g. bridge, vlan, netgraph) but should not be
 		 * seen by upper protocol layers.
 		 */
 		if (!ETHER_IS_MULTICAST(eh->ether_dhost) &&
 		    bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0)
 			m->m_flags |= M_PROMISC;
 	}
 
 	ether_demux(ifp, m);
 	CURVNET_RESTORE();
 }
 
 /*
  * Ethernet input dispatch; by default, direct dispatch here regardless of
  * global configuration.  However, if RSS is enabled, hook up RSS affinity
  * so that when deferred or hybrid dispatch is enabled, we can redistribute
  * load based on RSS.
  *
  * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or
  * not it had already done work distribution via multi-queue.  Then we could
  * direct dispatch in the event load balancing was already complete and
  * handle the case of interfaces with different capabilities better.
  *
  * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions
  * at multiple layers?
  *
  * XXXRW: For now, enable all this only if RSS is compiled in, although it
  * works fine without RSS.  Need to characterise the performance overhead
  * of the detour through the netisr code in the event the result is always
  * direct dispatch.
  */
 static void
 ether_nh_input(struct mbuf *m)
 {
 
 	M_ASSERTPKTHDR(m);
 	KASSERT(m->m_pkthdr.rcvif != NULL,
 	    ("%s: NULL interface pointer", __func__));
 	ether_input_internal(m->m_pkthdr.rcvif, m);
 }
 
 static struct netisr_handler	ether_nh = {
 	.nh_name = "ether",
 	.nh_handler = ether_nh_input,
 	.nh_proto = NETISR_ETHER,
 #ifdef RSS
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_DIRECT,
 	.nh_m2cpuid = rss_m2cpuid,
 #else
 	.nh_policy = NETISR_POLICY_SOURCE,
 	.nh_dispatch = NETISR_DISPATCH_DIRECT,
 #endif
 };
 
 static void
 ether_init(__unused void *arg)
 {
 
 	netisr_register(&ether_nh);
 }
 SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
 
 static void
 vnet_ether_init(__unused void *arg)
 {
 	struct pfil_head_args args;
 
 	args.pa_version = PFIL_VERSION;
 	args.pa_flags = PFIL_IN | PFIL_OUT;
 	args.pa_type = PFIL_TYPE_ETHERNET;
 	args.pa_headname = PFIL_ETHER_NAME;
 	V_link_pfil_head = pfil_head_register(&args);
 
 #ifdef VIMAGE
 	netisr_register_vnet(&ether_nh);
 #endif
 }
 VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_ether_init, NULL);
 
 #ifdef VIMAGE
 static void
 vnet_ether_pfil_destroy(__unused void *arg)
 {
 
 	pfil_head_unregister(V_link_pfil_head);
 }
 VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY,
     vnet_ether_pfil_destroy, NULL);
 
 static void
 vnet_ether_destroy(__unused void *arg)
 {
 
 	netisr_unregister_vnet(&ether_nh);
 }
 VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_ether_destroy, NULL);
 #endif
 
 static void
 ether_input(struct ifnet *ifp, struct mbuf *m)
 {
 	struct epoch_tracker et;
 	struct mbuf *mn;
 	bool needs_epoch;
 
 	needs_epoch = !(ifp->if_flags & IFF_KNOWSEPOCH);
 
 	/*
 	 * The drivers are allowed to pass in a chain of packets linked with
 	 * m_nextpkt. We split them up into separate packets here and pass
 	 * them up. This allows the drivers to amortize the receive lock.
 	 */
 	CURVNET_SET_QUIET(ifp->if_vnet);
 	if (__predict_false(needs_epoch))
 		NET_EPOCH_ENTER(et);
 	while (m) {
 		mn = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 
 		/*
 		 * We will rely on rcvif being set properly in the deferred
 		 * context, so assert it is correct here.
 		 */
 		MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
 		KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch m %p "
 		    "rcvif %p ifp %p", __func__, m, m->m_pkthdr.rcvif, ifp));
 		netisr_dispatch(NETISR_ETHER, m);
 		m = mn;
 	}
 	if (__predict_false(needs_epoch))
 		NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 }
 
 /*
  * Upper layer processing for a received Ethernet packet.
  */
 void
 ether_demux(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ether_header *eh;
 	int i, isr;
 	u_short ether_type;
 
 	NET_EPOCH_ASSERT();
 	KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
 
 	/* Do not grab PROMISC frames in case we are re-entered. */
 	if (PFIL_HOOKED_IN(V_link_pfil_head) && !(m->m_flags & M_PROMISC)) {
 		i = pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_IN, NULL);
 		if (i != 0 || m == NULL)
 			return;
 	}
 
 	eh = mtod(m, struct ether_header *);
 	ether_type = ntohs(eh->ether_type);
 
 	/*
 	 * If this frame has a VLAN tag other than 0, call vlan_input()
 	 * if its module is loaded. Otherwise, drop.
 	 */
 	if ((m->m_flags & M_VLANTAG) &&
 	    EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) {
 		if (ifp->if_vlantrunk == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			m_freem(m);
 			return;
 		}
 		KASSERT(vlan_input_p != NULL,("%s: VLAN not loaded!",
 		    __func__));
 		/* Clear before possibly re-entering ether_input(). */
 		m->m_flags &= ~M_PROMISC;
 		(*vlan_input_p)(ifp, m);
 		return;
 	}
 
 	/*
 	 * Pass promiscuously received frames to the upper layer if the user
 	 * requested this by setting IFF_PPROMISC. Otherwise, drop them.
 	 */
 	if ((ifp->if_flags & IFF_PPROMISC) == 0 && (m->m_flags & M_PROMISC)) {
 		m_freem(m);
 		return;
 	}
 
 	/*
 	 * Reset layer specific mbuf flags to avoid confusing upper layers.
 	 * Strip off Ethernet header.
 	 */
 	m->m_flags &= ~M_VLANTAG;
 	m_clrprotoflags(m);
 	m_adj(m, ETHER_HDR_LEN);
 
 	/*
 	 * Dispatch frame to upper layer.
 	 */
 	switch (ether_type) {
 #ifdef INET
 	case ETHERTYPE_IP:
 		isr = NETISR_IP;
 		break;
 
 	case ETHERTYPE_ARP:
 		if (ifp->if_flags & IFF_NOARP) {
 			/* Discard packet if ARP is disabled on interface */
 			m_freem(m);
 			return;
 		}
 		isr = NETISR_ARP;
 		break;
 #endif
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		goto discard;
 	}
 	netisr_dispatch(isr, m);
 	return;
 
 discard:
 	/*
 	 * Packet is to be discarded.  If netgraph is present,
 	 * hand the packet to it for last chance processing;
 	 * otherwise dispose of it.
 	 */
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_input_orphan_p != NULL,
 		    ("ng_ether_input_orphan_p is NULL"));
 		/*
 		 * Put back the ethernet header so netgraph has a
 		 * consistent view of inbound packets.
 		 */
 		M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
 		(*ng_ether_input_orphan_p)(ifp, m);
 		return;
 	}
 	m_freem(m);
 }
 
 /*
  * Convert Ethernet address to printable (loggable) representation.
  * This routine is for compatibility; it's better to just use
  *
  *	printf("%6D", <pointer to address>, ":");
  *
  * since there's no static buffer involved.
  */
 char *
 ether_sprintf(const u_char *ap)
 {
 	static char etherbuf[18];
 	snprintf(etherbuf, sizeof (etherbuf), "%6D", ap, ":");
 	return (etherbuf);
 }
 
 /*
  * Perform common duties while attaching to interface list
  */
 void
 ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
 {
 	int i;
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 
 	ifp->if_addrlen = ETHER_ADDR_LEN;
 	ifp->if_hdrlen = ETHER_HDR_LEN;
 	ifp->if_mtu = ETHERMTU;
 	if_attach(ifp);
 	ifp->if_output = ether_output;
 	ifp->if_input = ether_input;
 	ifp->if_resolvemulti = ether_resolvemulti;
 	ifp->if_requestencap = ether_requestencap;
 #ifdef VIMAGE
 	ifp->if_reassign = ether_reassign;
 #endif
 	if (ifp->if_baudrate == 0)
 		ifp->if_baudrate = IF_Mbps(10);		/* just a default */
 	ifp->if_broadcastaddr = etherbroadcastaddr;
 
 	ifa = ifp->if_addr;
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_ETHER;
 	sdl->sdl_alen = ifp->if_addrlen;
 	bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
 
 	if (ifp->if_hw_addr != NULL)
 		bcopy(lla, ifp->if_hw_addr, ifp->if_addrlen);
 
 	bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
 	if (ng_ether_attach_p != NULL)
 		(*ng_ether_attach_p)(ifp);
 
 	/* Announce Ethernet MAC address if non-zero. */
 	for (i = 0; i < ifp->if_addrlen; i++)
 		if (lla[i] != 0)
 			break; 
 	if (i != ifp->if_addrlen)
 		if_printf(ifp, "Ethernet address: %6D\n", lla, ":");
 
 	uuid_ether_add(LLADDR(sdl));
 
 	/* Add necessary bits are setup; announce it now. */
 	EVENTHANDLER_INVOKE(ether_ifattach_event, ifp);
 	if (IS_DEFAULT_VNET(curvnet))
 		devctl_notify("ETHERNET", ifp->if_xname, "IFATTACH", NULL);
 }
 
 /*
  * Perform common duties while detaching an Ethernet interface
  */
 void
 ether_ifdetach(struct ifnet *ifp)
 {
 	struct sockaddr_dl *sdl;
 
 	sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr);
 	uuid_ether_del(LLADDR(sdl));
 
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_detach_p != NULL,
 		    ("ng_ether_detach_p is NULL"));
 		(*ng_ether_detach_p)(ifp);
 	}
 
 	bpfdetach(ifp);
 	if_detach(ifp);
 }
 
 #ifdef VIMAGE
 void
 ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused)
 {
 
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_detach_p != NULL,
 		    ("ng_ether_detach_p is NULL"));
 		(*ng_ether_detach_p)(ifp);
 	}
 
 	if (ng_ether_attach_p != NULL) {
 		CURVNET_SET_QUIET(new_vnet);
 		(*ng_ether_attach_p)(ifp);
 		CURVNET_RESTORE();
 	}
 }
 #endif
 
 SYSCTL_DECL(_net_link);
 SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Ethernet");
 
 #if 0
 /*
  * This is for reference.  We have a table-driven version
  * of the little-endian crc32 generator, which is faster
  * than the double-loop.
  */
 uint32_t
 ether_crc32_le(const uint8_t *buf, size_t len)
 {
 	size_t i;
 	uint32_t crc;
 	int bit;
 	uint8_t data;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
 			carry = (crc ^ data) & 1;
 			crc >>= 1;
 			if (carry)
 				crc = (crc ^ ETHER_CRC_POLY_LE);
 		}
 	}
 
 	return (crc);
 }
 #else
 uint32_t
 ether_crc32_le(const uint8_t *buf, size_t len)
 {
 	static const uint32_t crctab[] = {
 		0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
 		0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
 		0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
 		0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
 	};
 	size_t i;
 	uint32_t crc;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		crc ^= buf[i];
 		crc = (crc >> 4) ^ crctab[crc & 0xf];
 		crc = (crc >> 4) ^ crctab[crc & 0xf];
 	}
 
 	return (crc);
 }
 #endif
 
 uint32_t
 ether_crc32_be(const uint8_t *buf, size_t len)
 {
 	size_t i;
 	uint32_t crc, carry;
 	int bit;
 	uint8_t data;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
 			carry = ((crc & 0x80000000) ? 1 : 0) ^ (data & 0x01);
 			crc <<= 1;
 			if (carry)
 				crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
 		}
 	}
 
 	return (crc);
 }
 
 int
 ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct ifaddr *ifa = (struct ifaddr *) data;
 	struct ifreq *ifr = (struct ifreq *) data;
 	int error = 0;
 
 	switch (command) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:
 			ifp->if_init(ifp->if_softc);	/* before arpwhohas */
 			arp_ifinit(ifp, ifa);
 			break;
 #endif
 		default:
 			ifp->if_init(ifp->if_softc);
 			break;
 		}
 		break;
 
 	case SIOCGIFADDR:
 		bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0],
 		    ETHER_ADDR_LEN);
 		break;
 
 	case SIOCSIFMTU:
 		/*
 		 * Set the interface MTU.
 		 */
 		if (ifr->ifr_mtu > ETHERMTU) {
 			error = EINVAL;
 		} else {
 			ifp->if_mtu = ifr->ifr_mtu;
 		}
 		break;
 
 	case SIOCSLANPCP:
 		error = priv_check(curthread, PRIV_NET_SETLANPCP);
 		if (error != 0)
 			break;
 		if (ifr->ifr_lan_pcp > 7 &&
 		    ifr->ifr_lan_pcp != IFNET_PCP_NONE) {
 			error = EINVAL;
 		} else {
 			ifp->if_pcp = ifr->ifr_lan_pcp;
 			/* broadcast event about PCP change */
 			EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_PCP);
 		}
 		break;
 
 	case SIOCGLANPCP:
 		ifr->ifr_lan_pcp = ifp->if_pcp;
 		break;
 
 	default:
 		error = EINVAL;			/* XXX netbsd has ENOTTY??? */
 		break;
 	}
 	return (error);
 }
 
 static int
 ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 	struct sockaddr *sa)
 {
 	struct sockaddr_dl *sdl;
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 	u_char *e_addr;
 
 	switch(sa->sa_family) {
 	case AF_LINK:
 		/*
 		 * No mapping needed. Just check that it's a valid MC address.
 		 */
 		sdl = (struct sockaddr_dl *)sa;
 		e_addr = LLADDR(sdl);
 		if (!ETHER_IS_MULTICAST(e_addr))
 			return EADDRNOTAVAIL;
 		*llsa = NULL;
 		return 0;
 
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return EADDRNOTAVAIL;
 		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ETHER_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			/*
 			 * An IP6 address of 0 means listen to all
 			 * of the Ethernet multicast address used for IP6.
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
 			*llsa = NULL;
 			return 0;
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return EADDRNOTAVAIL;
 		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ETHER_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 
 	default:
 		/*
 		 * Well, the text isn't quite right, but it's the name
 		 * that counts...
 		 */
 		return EAFNOSUPPORT;
 	}
 }
 
 static moduledata_t ether_mod = {
 	.name = "ether",
 };
 
 void
 ether_vlan_mtap(struct bpf_if *bp, struct mbuf *m, void *data, u_int dlen)
 {
 	struct ether_vlan_header vlan;
 	struct mbuf mv, mb;
 
 	KASSERT((m->m_flags & M_VLANTAG) != 0,
 	    ("%s: vlan information not present", __func__));
 	KASSERT(m->m_len >= sizeof(struct ether_header),
 	    ("%s: mbuf not large enough for header", __func__));
 	bcopy(mtod(m, char *), &vlan, sizeof(struct ether_header));
 	vlan.evl_proto = vlan.evl_encap_proto;
 	vlan.evl_encap_proto = htons(ETHERTYPE_VLAN);
 	vlan.evl_tag = htons(m->m_pkthdr.ether_vtag);
 	m->m_len -= sizeof(struct ether_header);
 	m->m_data += sizeof(struct ether_header);
 	/*
 	 * If a data link has been supplied by the caller, then we will need to
 	 * re-create a stack allocated mbuf chain with the following structure:
 	 *
 	 * (1) mbuf #1 will contain the supplied data link
 	 * (2) mbuf #2 will contain the vlan header
 	 * (3) mbuf #3 will contain the original mbuf's packet data
 	 *
 	 * Otherwise, submit the packet and vlan header via bpf_mtap2().
 	 */
 	if (data != NULL) {
 		mv.m_next = m;
 		mv.m_data = (caddr_t)&vlan;
 		mv.m_len = sizeof(vlan);
 		mb.m_next = &mv;
 		mb.m_data = data;
 		mb.m_len = dlen;
 		bpf_mtap(bp, &mb);
 	} else
 		bpf_mtap2(bp, &vlan, sizeof(vlan), m);
 	m->m_len += sizeof(struct ether_header);
 	m->m_data -= sizeof(struct ether_header);
 }
 
 struct mbuf *
 ether_vlanencap_proto(struct mbuf *m, uint16_t tag, uint16_t proto)
 {
 	struct ether_vlan_header *evl;
 
 	M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
 	if (m == NULL)
 		return (NULL);
 	/* M_PREPEND takes care of m_len, m_pkthdr.len for us */
 
 	if (m->m_len < sizeof(*evl)) {
 		m = m_pullup(m, sizeof(*evl));
 		if (m == NULL)
 			return (NULL);
 	}
 
 	/*
 	 * Transform the Ethernet header into an Ethernet header
 	 * with 802.1Q encapsulation.
 	 */
 	evl = mtod(m, struct ether_vlan_header *);
 	bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
 	    (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
 	evl->evl_encap_proto = htons(proto);
 	evl->evl_tag = htons(tag);
 	return (m);
 }
 
 static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "IEEE 802.1Q VLAN");
 static SYSCTL_NODE(_net_link_vlan, PF_LINK, link,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "for consistency");
 
 VNET_DEFINE_STATIC(int, soft_pad);
 #define	V_soft_pad	VNET(soft_pad)
 SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET,
     &VNET_NAME(soft_pad), 0,
     "pad short frames before tagging");
 
 /*
  * For now, make preserving PCP via an mbuf tag optional, as it increases
  * per-packet memory allocations and frees.  In the future, it would be
  * preferable to reuse ether_vtag for this, or similar.
  */
 int vlan_mtag_pcp = 0;
 SYSCTL_INT(_net_link_vlan, OID_AUTO, mtag_pcp, CTLFLAG_RW,
     &vlan_mtag_pcp, 0,
     "Retain VLAN PCP information as packets are passed up the stack");
 
 bool
 ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, struct ifnet *p,
     struct ether_8021q_tag *qtag)
 {
 	struct m_tag *mtag;
 	int n;
 	uint16_t tag;
 	static const char pad[8];	/* just zeros */
 
 	/*
 	 * Pad the frame to the minimum size allowed if told to.
 	 * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
 	 * paragraph C.4.4.3.b.  It can help to work around buggy
 	 * bridges that violate paragraph C.4.4.3.a from the same
 	 * document, i.e., fail to pad short frames after untagging.
 	 * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
 	 * untagging it will produce a 62-byte frame, which is a runt
 	 * and requires padding.  There are VLAN-enabled network
 	 * devices that just discard such runts instead or mishandle
 	 * them somehow.
 	 */
 	if (V_soft_pad && p->if_type == IFT_ETHER) {
 		for (n = ETHERMIN + ETHER_HDR_LEN - (*mp)->m_pkthdr.len;
 		     n > 0; n -= sizeof(pad)) {
 			if (!m_append(*mp, min(n, sizeof(pad)), pad))
 				break;
 		}
 		if (n > 0) {
 			m_freem(*mp);
 			*mp = NULL;
 			if_printf(ife, "cannot pad short frame");
 			return (false);
 		}
 	}
 
 	/*
 	 * If PCP is set in mbuf, use it
 	 */
 	if ((*mp)->m_flags & M_VLANTAG) {
 		qtag->pcp = EVL_PRIOFTAG((*mp)->m_pkthdr.ether_vtag);
 	}
 
 	/*
 	 * If underlying interface can do VLAN tag insertion itself,
 	 * just pass the packet along. However, we need some way to
 	 * tell the interface where the packet came from so that it
 	 * knows how to find the VLAN tag to use, so we attach a
 	 * packet tag that holds it.
 	 */
 	if (vlan_mtag_pcp && (mtag = m_tag_locate(*mp, MTAG_8021Q,
 	    MTAG_8021Q_PCP_OUT, NULL)) != NULL)
 		tag = EVL_MAKETAG(qtag->vid, *(uint8_t *)(mtag + 1), 0);
 	else
 		tag = EVL_MAKETAG(qtag->vid, qtag->pcp, 0);
 	if ((p->if_capenable & IFCAP_VLAN_HWTAGGING) &&
 	    (qtag->proto == ETHERTYPE_VLAN)) {
 		(*mp)->m_pkthdr.ether_vtag = tag;
 		(*mp)->m_flags |= M_VLANTAG;
 	} else {
 		*mp = ether_vlanencap_proto(*mp, tag, qtag->proto);
 		if (*mp == NULL) {
 			if_printf(ife, "unable to prepend 802.1Q header");
 			return (false);
 		}
 	}
 	return (true);
 }
 
 /*
  * Allocate an address from the FreeBSD Foundation OUI.  This uses a
  * cryptographic hash function on the containing jail's name, UUID and the
  * interface name to attempt to provide a unique but stable address.
  * Pseudo-interfaces which require a MAC address should use this function to
  * allocate non-locally-administered addresses.
  */
 void
 ether_gen_addr(struct ifnet *ifp, struct ether_addr *hwaddr)
 {
 	SHA1_CTX ctx;
 	char *buf;
 	char uuid[HOSTUUIDLEN + 1];
 	uint64_t addr;
 	int i, sz;
 	char digest[SHA1_RESULTLEN];
 	char jailname[MAXHOSTNAMELEN];
 
 	getcredhostuuid(curthread->td_ucred, uuid, sizeof(uuid));
 	if (strncmp(uuid, DEFAULT_HOSTUUID, sizeof(uuid)) == 0) {
 		/* Fall back to a random mac address. */
 		goto rando;
 	}
 
 	/* If each (vnet) jail would also have a unique hostuuid this would not
 	 * be necessary. */
 	getjailname(curthread->td_ucred, jailname, sizeof(jailname));
 	sz = asprintf(&buf, M_TEMP, "%s-%s-%s", uuid, if_name(ifp),
 	    jailname);
 	if (sz < 0) {
 		/* Fall back to a random mac address. */
 		goto rando;
 	}
 
 	SHA1Init(&ctx);
 	SHA1Update(&ctx, buf, sz);
 	SHA1Final(digest, &ctx);
 	free(buf, M_TEMP);
 
 	addr = ((digest[0] << 16) | (digest[1] << 8) | digest[2]) &
 	    OUI_FREEBSD_GENERATED_MASK;
 	addr = OUI_FREEBSD(addr);
 	for (i = 0; i < ETHER_ADDR_LEN; ++i) {
 		hwaddr->octet[i] = addr >> ((ETHER_ADDR_LEN - i - 1) * 8) &
 		    0xFF;
 	}
 
 	return;
 rando:
 	arc4rand(hwaddr, sizeof(*hwaddr), 0);
 	/* Unicast */
 	hwaddr->octet[0] &= 0xFE;
 	/* Locally administered. */
 	hwaddr->octet[0] |= 0x02;
 }
 
 DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
 MODULE_VERSION(ether, 1);
diff --git a/sys/net/if_fwsubr.c b/sys/net/if_fwsubr.c
index a6c43d4d05a4..321721737d36 100644
--- a/sys/net/if_fwsubr.c
+++ b/sys/net/if_fwsubr.c
@@ -1,855 +1,873 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2004 Doug Rabson
  * Copyright (c) 1982, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/if_llc.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/bpf.h>
 #include <net/firewire.h>
 #include <net/if_llatbl.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #endif
 #ifdef INET6
 #include <netinet6/nd6.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 static MALLOC_DEFINE(M_FWCOM, "fw_com", "firewire interface internals");
 
 struct fw_hwaddr firewire_broadcastaddr = {
 	0xffffffff,
 	0xffffffff,
 	0xff,
 	0xff,
 	0xffff,
 	0xffffffff
 };
 
 static int
 firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	struct fw_com *fc = IFP2FWC(ifp);
 	int error, type;
 	struct m_tag *mtag;
 	union fw_encap *enc;
 	struct fw_hwaddr *destfw;
 	uint8_t speed;
 	uint16_t psize, fsize, dsize;
 	struct mbuf *mtail;
 	int unicast, dgl, foff;
 	static int next_dgl;
 #if defined(INET) || defined(INET6)
 	int is_gw = 0;
 #endif
+	int af = RO_GET_FAMILY(ro, dst);
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		goto bad;
 #endif
 
 	if (!((ifp->if_flags & IFF_UP) &&
 	   (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
 		error = ENETDOWN;
 		goto bad;
 	}
 
 #if defined(INET) || defined(INET6)
 	if (ro != NULL)
 		is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
 #endif
 	/*
 	 * For unicast, we make a tag to store the lladdr of the
 	 * destination. This might not be the first time we have seen
 	 * the packet (for instance, the arp code might be trying to
 	 * re-send it after receiving an arp reply) so we only
 	 * allocate a tag if there isn't one there already. For
 	 * multicast, we will eventually use a different tag to store
 	 * the channel number.
 	 */
 	unicast = !(m->m_flags & (M_BCAST | M_MCAST));
 	if (unicast) {
 		mtag = m_tag_locate(m, MTAG_FIREWIRE, MTAG_FIREWIRE_HWADDR, NULL);
 		if (!mtag) {
 			mtag = m_tag_alloc(MTAG_FIREWIRE, MTAG_FIREWIRE_HWADDR,
 			    sizeof (struct fw_hwaddr), M_NOWAIT);
 			if (!mtag) {
 				error = ENOMEM;
 				goto bad;
 			}
 			m_tag_prepend(m, mtag);
 		}
 		destfw = (struct fw_hwaddr *)(mtag + 1);
 	} else {
 		destfw = NULL;
 	}
 
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		type = ETHERTYPE_IP;
+		break;
+	case AF_ARP:
+		type = ETHERTYPE_ARP;
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		type = ETHERTYPE_IPV6;
+		break;
+#endif
+	default:
+		if_printf(ifp, "can't handle af%d\n", af);
+		error = EAFNOSUPPORT;
+		goto bad;
+	}
+
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		/*
 		 * Only bother with arp for unicast. Allocation of
 		 * channels etc. for firewire is quite different and
 		 * doesn't fit into the arp model.
 		 */
 		if (unicast) {
 			error = arpresolve(ifp, is_gw, m, dst,
 			    (u_char *) destfw, NULL, NULL);
 			if (error)
 				return (error == EWOULDBLOCK ? 0 : error);
 		}
-		type = ETHERTYPE_IP;
 		break;
 
 	case AF_ARP:
 	{
 		struct arphdr *ah;
 		ah = mtod(m, struct arphdr *);
 		ah->ar_hrd = htons(ARPHRD_IEEE1394);
-		type = ETHERTYPE_ARP;
 		if (unicast)
 			*destfw = *(struct fw_hwaddr *) ar_tha(ah);
 
 		/*
 		 * The standard arp code leaves a hole for the target
 		 * hardware address which we need to close up.
 		 */
 		bcopy(ar_tpa(ah), ar_tha(ah), ah->ar_pln);
 		m_adj(m, -ah->ar_hln);
 		break;
 	}
 #endif
 
 #ifdef INET6
 	case AF_INET6:
 		if (unicast) {
-			error = nd6_resolve(fc->fc_ifp, LLE_SF(AF_INET6, is_gw),
-			    m, dst, (u_char *) destfw, NULL, NULL);
+			error = nd6_resolve(fc->fc_ifp, LLE_SF(af, is_gw), m,
+			    dst, (u_char *) destfw, NULL, NULL);
 			if (error)
 				return (error == EWOULDBLOCK ? 0 : error);
 		}
-		type = ETHERTYPE_IPV6;
 		break;
 #endif
 
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		error = EAFNOSUPPORT;
 		goto bad;
 	}
 
 	/*
 	 * Let BPF tap off a copy before we encapsulate.
 	 */
 	if (bpf_peers_present(ifp->if_bpf)) {
 		struct fw_bpfhdr h;
 		if (unicast)
 			bcopy(destfw, h.firewire_dhost, 8);
 		else
 			bcopy(&firewire_broadcastaddr, h.firewire_dhost, 8);
 		bcopy(&fc->fc_hwaddr, h.firewire_shost, 8);
 		h.firewire_type = htons(type);
 		bpf_mtap2(ifp->if_bpf, &h, sizeof(h), m);
 	}
 
 	/*
 	 * Punt on MCAP for now and send all multicast packets on the
 	 * broadcast channel.
 	 */
 	if (m->m_flags & M_MCAST)
 		m->m_flags |= M_BCAST;
 
 	/*
 	 * Figure out what speed to use and what the largest supported
 	 * packet size is. For unicast, this is the minimum of what we
 	 * can speak and what they can hear. For broadcast, lets be
 	 * conservative and use S100. We could possibly improve that
 	 * by examining the bus manager's speed map or similar. We
 	 * also reduce the packet size for broadcast to account for
 	 * the GASP header.
 	 */
 	if (unicast) {
 		speed = min(fc->fc_speed, destfw->sspd);
 		psize = min(512 << speed, 2 << destfw->sender_max_rec);
 	} else {
 		speed = 0;
 		psize = 512 - 2*sizeof(uint32_t);
 	}
 
 	/*
 	 * Next, we encapsulate, possibly fragmenting the original
 	 * datagram if it won't fit into a single packet.
 	 */
 	if (m->m_pkthdr.len <= psize - sizeof(uint32_t)) {
 		/*
 		 * No fragmentation is necessary.
 		 */
 		M_PREPEND(m, sizeof(uint32_t), M_NOWAIT);
 		if (!m) {
 			error = ENOBUFS;
 			goto bad;
 		}
 		enc = mtod(m, union fw_encap *);
 		enc->unfrag.ether_type = type;
 		enc->unfrag.lf = FW_ENCAP_UNFRAG;
 		enc->unfrag.reserved = 0;
 
 		/*
 		 * Byte swap the encapsulation header manually.
 		 */
 		enc->ul[0] = htonl(enc->ul[0]);
 
 		error = (ifp->if_transmit)(ifp, m);
 		return (error);
 	} else {
 		/*
 		 * Fragment the datagram, making sure to leave enough
 		 * space for the encapsulation header in each packet.
 		 */
 		fsize = psize - 2*sizeof(uint32_t);
 		dgl = next_dgl++;
 		dsize = m->m_pkthdr.len;
 		foff = 0;
 		while (m) {
 			if (m->m_pkthdr.len > fsize) {
 				/*
 				 * Split off the tail segment from the
 				 * datagram, copying our tags over.
 				 */
 				mtail = m_split(m, fsize, M_NOWAIT);
 				m_tag_copy_chain(mtail, m, M_NOWAIT);
 			} else {
 				mtail = NULL;
 			}
 
 			/*
 			 * Add our encapsulation header to this
 			 * fragment and hand it off to the link.
 			 */
 			M_PREPEND(m, 2*sizeof(uint32_t), M_NOWAIT);
 			if (!m) {
 				error = ENOBUFS;
 				goto bad;
 			}
 			enc = mtod(m, union fw_encap *);
 			if (foff == 0) {
 				enc->firstfrag.lf = FW_ENCAP_FIRST;
 				enc->firstfrag.reserved1 = 0;
 				enc->firstfrag.reserved2 = 0;
 				enc->firstfrag.datagram_size = dsize - 1;
 				enc->firstfrag.ether_type = type;
 				enc->firstfrag.dgl = dgl;
 			} else {
 				if (mtail)
 					enc->nextfrag.lf = FW_ENCAP_NEXT;
 				else
 					enc->nextfrag.lf = FW_ENCAP_LAST;
 				enc->nextfrag.reserved1 = 0;
 				enc->nextfrag.reserved2 = 0;
 				enc->nextfrag.reserved3 = 0;
 				enc->nextfrag.datagram_size = dsize - 1;
 				enc->nextfrag.fragment_offset = foff;
 				enc->nextfrag.dgl = dgl;
 			}
 			foff += m->m_pkthdr.len - 2*sizeof(uint32_t);
 
 			/*
 			 * Byte swap the encapsulation header manually.
 			 */
 			enc->ul[0] = htonl(enc->ul[0]);
 			enc->ul[1] = htonl(enc->ul[1]);
 
 			error = (ifp->if_transmit)(ifp, m);
 			if (error) {
 				if (mtail)
 					m_freem(mtail);
 				return (ENOBUFS);
 			}
 
 			m = mtail;
 		}
 
 		return (0);
 	}
 
 bad:
 	if (m)
 		m_freem(m);
 	return (error);
 }
 
 static struct mbuf *
 firewire_input_fragment(struct fw_com *fc, struct mbuf *m, int src)
 {
 	union fw_encap *enc;
 	struct fw_reass *r;
 	struct mbuf *mf, *mprev;
 	int dsize;
 	int fstart, fend, start, end, islast;
 	uint32_t id;
 
 	/*
 	 * Find an existing reassembly buffer or create a new one.
 	 */
 	enc = mtod(m, union fw_encap *);
 	id = enc->firstfrag.dgl | (src << 16);
 	STAILQ_FOREACH(r, &fc->fc_frags, fr_link)
 		if (r->fr_id == id)
 			break;
 	if (!r) {
 		r = malloc(sizeof(struct fw_reass), M_TEMP, M_NOWAIT);
 		if (!r) {
 			m_freem(m);
 			return 0;
 		}
 		r->fr_id = id;
 		r->fr_frags = 0;
 		STAILQ_INSERT_HEAD(&fc->fc_frags, r, fr_link);
 	}
 
 	/*
 	 * If this fragment overlaps any other fragment, we must discard
 	 * the partial reassembly and start again.
 	 */
 	if (enc->firstfrag.lf == FW_ENCAP_FIRST)
 		fstart = 0;
 	else
 		fstart = enc->nextfrag.fragment_offset;
 	fend = fstart + m->m_pkthdr.len - 2*sizeof(uint32_t);
 	dsize = enc->nextfrag.datagram_size;
 	islast = (enc->nextfrag.lf == FW_ENCAP_LAST);
 
 	for (mf = r->fr_frags; mf; mf = mf->m_nextpkt) {
 		enc = mtod(mf, union fw_encap *);
 		if (enc->nextfrag.datagram_size != dsize) {
 			/*
 			 * This fragment must be from a different
 			 * packet.
 			 */
 			goto bad;
 		}
 		if (enc->firstfrag.lf == FW_ENCAP_FIRST)
 			start = 0;
 		else
 			start = enc->nextfrag.fragment_offset;
 		end = start + mf->m_pkthdr.len - 2*sizeof(uint32_t);
 		if ((fstart < end && fend > start) ||
 		    (islast && enc->nextfrag.lf == FW_ENCAP_LAST)) {
 			/*
 			 * Overlap - discard reassembly buffer and start
 			 * again with this fragment.
 			 */
 			goto bad;
 		}
 	}
 
 	/*
 	 * Find where to put this fragment in the list.
 	 */
 	for (mf = r->fr_frags, mprev = NULL; mf;
 	    mprev = mf, mf = mf->m_nextpkt) {
 		enc = mtod(mf, union fw_encap *);
 		if (enc->firstfrag.lf == FW_ENCAP_FIRST)
 			start = 0;
 		else
 			start = enc->nextfrag.fragment_offset;
 		if (start >= fend)
 			break;
 	}
 
 	/*
 	 * If this is a last fragment and we are not adding at the end
 	 * of the list, discard the buffer.
 	 */
 	if (islast && mprev && mprev->m_nextpkt)
 		goto bad;
 
 	if (mprev) {
 		m->m_nextpkt = mprev->m_nextpkt;
 		mprev->m_nextpkt = m;
 
 		/*
 		 * Coalesce forwards and see if we can make a whole
 		 * datagram.
 		 */
 		enc = mtod(mprev, union fw_encap *);
 		if (enc->firstfrag.lf == FW_ENCAP_FIRST)
 			start = 0;
 		else
 			start = enc->nextfrag.fragment_offset;
 		end = start + mprev->m_pkthdr.len - 2*sizeof(uint32_t);
 		while (end == fstart) {
 			/*
 			 * Strip off the encap header from m and
 			 * append it to mprev, freeing m.
 			 */
 			m_adj(m, 2*sizeof(uint32_t));
 			mprev->m_nextpkt = m->m_nextpkt;
 			mprev->m_pkthdr.len += m->m_pkthdr.len;
 			m_cat(mprev, m);
 
 			if (mprev->m_pkthdr.len == dsize + 1 + 2*sizeof(uint32_t)) {
 				/*
 				 * We have assembled a complete packet
 				 * we must be finished. Make sure we have
 				 * merged the whole chain.
 				 */
 				STAILQ_REMOVE(&fc->fc_frags, r, fw_reass, fr_link);
 				free(r, M_TEMP);
 				m = mprev->m_nextpkt;
 				while (m) {
 					mf = m->m_nextpkt;
 					m_freem(m);
 					m = mf;
 				}
 				mprev->m_nextpkt = NULL;
 
 				return (mprev);
 			}
 
 			/*
 			 * See if we can continue merging forwards.
 			 */
 			end = fend;
 			m = mprev->m_nextpkt;
 			if (m) {
 				enc = mtod(m, union fw_encap *);
 				if (enc->firstfrag.lf == FW_ENCAP_FIRST)
 					fstart = 0;
 				else
 					fstart = enc->nextfrag.fragment_offset;
 				fend = fstart + m->m_pkthdr.len
 				    - 2*sizeof(uint32_t);
 			} else {
 				break;
 			}
 		}
 	} else {
 		m->m_nextpkt = 0;
 		r->fr_frags = m;
 	}
 
 	return (0);
 
 bad:
 	while (r->fr_frags) {
 		mf = r->fr_frags;
 		r->fr_frags = mf->m_nextpkt;
 		m_freem(mf);
 	}
 	m->m_nextpkt = 0;
 	r->fr_frags = m;
 
 	return (0);
 }
 
 void
 firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src)
 {
 	struct fw_com *fc = IFP2FWC(ifp);
 	union fw_encap *enc;
 	int type, isr;
 
 	/*
 	 * The caller has already stripped off the packet header
 	 * (stream or wreqb) and marked the mbuf's M_BCAST flag
 	 * appropriately. We de-encapsulate the IP packet and pass it
 	 * up the line after handling link-level fragmentation.
 	 */
 	if (m->m_pkthdr.len < sizeof(uint32_t)) {
 		if_printf(ifp, "discarding frame without "
 		    "encapsulation header (len %u pkt len %u)\n",
 		    m->m_len, m->m_pkthdr.len);
 	}
 
 	m = m_pullup(m, sizeof(uint32_t));
 	if (m == NULL)
 		return;
 	enc = mtod(m, union fw_encap *);
 
 	/*
 	 * Byte swap the encapsulation header manually.
 	 */
 	enc->ul[0] = ntohl(enc->ul[0]);
 
 	if (enc->unfrag.lf != 0) {
 		m = m_pullup(m, 2*sizeof(uint32_t));
 		if (!m)
 			return;
 		enc = mtod(m, union fw_encap *);
 		enc->ul[1] = ntohl(enc->ul[1]);
 		m = firewire_input_fragment(fc, m, src);
 		if (!m)
 			return;
 		enc = mtod(m, union fw_encap *);
 		type = enc->firstfrag.ether_type;
 		m_adj(m, 2*sizeof(uint32_t));
 	} else {
 		type = enc->unfrag.ether_type;
 		m_adj(m, sizeof(uint32_t));
 	}
 
 	if (m->m_pkthdr.rcvif == NULL) {
 		if_printf(ifp, "discard frame w/o interface pointer\n");
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
 	}
 #ifdef DIAGNOSTIC
 	if (m->m_pkthdr.rcvif != ifp) {
 		if_printf(ifp, "Warning, frame marked as received on %s\n",
 			m->m_pkthdr.rcvif->if_xname);
 	}
 #endif
 
 #ifdef MAC
 	/*
 	 * Tag the mbuf with an appropriate MAC label before any other
 	 * consumers can get to it.
 	 */
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * Give bpf a chance at the packet. The link-level driver
 	 * should have left us a tag with the EUID of the sender.
 	 */
 	if (bpf_peers_present(ifp->if_bpf)) {
 		struct fw_bpfhdr h;
 		struct m_tag *mtag;
 
 		mtag = m_tag_locate(m, MTAG_FIREWIRE, MTAG_FIREWIRE_SENDER_EUID, 0);
 		if (mtag)
 			bcopy(mtag + 1, h.firewire_shost, 8);
 		else
 			bcopy(&firewire_broadcastaddr, h.firewire_dhost, 8);
 		bcopy(&fc->fc_hwaddr, h.firewire_dhost, 8);
 		h.firewire_type = htons(type);
 		bpf_mtap2(ifp->if_bpf, &h, sizeof(h), m);
 	}
 
 	if (ifp->if_flags & IFF_MONITOR) {
 		/*
 		 * Interface marked for monitoring; discard packet.
 		 */
 		m_freem(m);
 		return;
 	}
 
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 
 	/* Discard packet if interface is not up */
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		return;
 	}
 
 	if (m->m_flags & (M_BCAST|M_MCAST))
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 
 	switch (type) {
 #ifdef INET
 	case ETHERTYPE_IP:
 		isr = NETISR_IP;
 		break;
 
 	case ETHERTYPE_ARP:
 	{
 		struct arphdr *ah;
 		ah = mtod(m, struct arphdr *);
 
 		/*
 		 * Adjust the arp packet to insert an empty tha slot.
 		 */
 		m->m_len += ah->ar_hln;
 		m->m_pkthdr.len += ah->ar_hln;
 		bcopy(ar_tha(ah), ar_tpa(ah), ah->ar_pln);
 		isr = NETISR_ARP;
 		break;
 	}
 #endif
 
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 
 	default:
 		m_freem(m);
 		return;
 	}
 
 	M_SETFIB(m, ifp->if_fib);
 	CURVNET_SET_QUIET(ifp->if_vnet);
 	netisr_dispatch(isr, m);
 	CURVNET_RESTORE();
 }
 
 int
 firewire_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct ifaddr *ifa = (struct ifaddr *) data;
 	struct ifreq *ifr = (struct ifreq *) data;
 	int error = 0;
 
 	switch (command) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:
 			ifp->if_init(ifp->if_softc);	/* before arpwhohas */
 			arp_ifinit(ifp, ifa);
 			break;
 #endif
 		default:
 			ifp->if_init(ifp->if_softc);
 			break;
 		}
 		break;
 
 	case SIOCGIFADDR:
 		bcopy(&IFP2FWC(ifp)->fc_hwaddr, &ifr->ifr_addr.sa_data[0],
 		    sizeof(struct fw_hwaddr));
 		break;
 
 	case SIOCSIFMTU:
 		/*
 		 * Set the interface MTU.
 		 */
 		if (ifr->ifr_mtu > 1500) {
 			error = EINVAL;
 		} else {
 			ifp->if_mtu = ifr->ifr_mtu;
 		}
 		break;
 	default:
 		error = EINVAL;			/* XXX netbsd has ENOTTY??? */
 		break;
 	}
 	return (error);
 }
 
 static int
 firewire_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
     struct sockaddr *sa)
 {
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 
 	switch(sa->sa_family) {
 	case AF_LINK:
 		/*
 		 * No mapping needed.
 		 */
 		*llsa = NULL;
 		return 0;
 
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return EADDRNOTAVAIL;
 		*llsa = NULL;
 		return 0;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			/*
 			 * An IP6 address of 0 means listen to all
 			 * of the Ethernet multicast address used for IP6.
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
 			*llsa = NULL;
 			return 0;
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return EADDRNOTAVAIL;
 		*llsa = NULL;
 		return 0;
 #endif
 
 	default:
 		/*
 		 * Well, the text isn't quite right, but it's the name
 		 * that counts...
 		 */
 		return EAFNOSUPPORT;
 	}
 }
 
 void
 firewire_ifattach(struct ifnet *ifp, struct fw_hwaddr *llc)
 {
 	struct fw_com *fc = IFP2FWC(ifp);
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 	static const char* speeds[] = {
 		"S100", "S200", "S400", "S800",
 		"S1600", "S3200"
 	};
 
 	fc->fc_speed = llc->sspd;
 	STAILQ_INIT(&fc->fc_frags);
 
 	ifp->if_addrlen = sizeof(struct fw_hwaddr);
 	ifp->if_hdrlen = 0;
 	if_attach(ifp);
 	ifp->if_mtu = 1500;	/* XXX */
 	ifp->if_output = firewire_output;
 	ifp->if_resolvemulti = firewire_resolvemulti;
 	ifp->if_broadcastaddr = (u_char *) &firewire_broadcastaddr;
 
 	ifa = ifp->if_addr;
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_IEEE1394;
 	sdl->sdl_alen = ifp->if_addrlen;
 	bcopy(llc, LLADDR(sdl), ifp->if_addrlen);
 
 	bpfattach(ifp, DLT_APPLE_IP_OVER_IEEE1394,
 	    sizeof(struct fw_hwaddr));
 
 	if_printf(ifp, "Firewire address: %8D @ 0x%04x%08x, %s, maxrec %d\n",
 	    (uint8_t *) &llc->sender_unique_ID_hi, ":",
 	    ntohs(llc->sender_unicast_FIFO_hi),
 	    ntohl(llc->sender_unicast_FIFO_lo),
 	    speeds[llc->sspd],
 	    (2 << llc->sender_max_rec));
 }
 
 void
 firewire_ifdetach(struct ifnet *ifp)
 {
 	bpfdetach(ifp);
 	if_detach(ifp);
 }
 
 void
 firewire_busreset(struct ifnet *ifp)
 {
 	struct fw_com *fc = IFP2FWC(ifp);
 	struct fw_reass *r;
 	struct mbuf *m;
 
 	/*
 	 * Discard any partial datagrams since the host ids may have changed.
 	 */
 	while ((r = STAILQ_FIRST(&fc->fc_frags))) {
 		STAILQ_REMOVE_HEAD(&fc->fc_frags, fr_link);
 		while (r->fr_frags) {
 			m = r->fr_frags;
 			r->fr_frags = m->m_nextpkt;
 			m_freem(m);
 		}
 		free(r, M_TEMP);
 	}
 }
 
 static void *
 firewire_alloc(u_char type, struct ifnet *ifp)
 {
 	struct fw_com	*fc;
 
 	fc = malloc(sizeof(struct fw_com), M_FWCOM, M_WAITOK | M_ZERO);
 	fc->fc_ifp = ifp;
 
 	return (fc);
 }
 
 static void
 firewire_free(void *com, u_char type)
 {
 
 	free(com, M_FWCOM);
 }
 
 static int
 firewire_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		if_register_com_alloc(IFT_IEEE1394,
 		    firewire_alloc, firewire_free);
 		break;
 	case MOD_UNLOAD:
 		if_deregister_com_alloc(IFT_IEEE1394);
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 
 	return (0);
 }
 
 static moduledata_t firewire_mod = {
 	"if_firewire",
 	firewire_modevent,
 	0
 };
 
 DECLARE_MODULE(if_firewire, firewire_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
 MODULE_VERSION(if_firewire, 1);
diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c
index 113bcb5c916e..796f427e356b 100644
--- a/sys/net/if_gif.c
+++ b/sys/net/if_gif.c
@@ -1,723 +1,723 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/conf.h>
 #include <machine/cpu.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/bpf.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_ecn.h>
 #ifdef	INET
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #endif	/* INET */
 
 #ifdef INET6
 #ifndef INET
 #include <netinet/in.h>
 #endif
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_ecn.h>
 #include <netinet6/ip6_var.h>
 #endif /* INET6 */
 
 #include <netinet/ip_encap.h>
 #include <net/ethernet.h>
 #include <net/if_bridgevar.h>
 #include <net/if_gif.h>
 
 #include <security/mac/mac_framework.h>
 
 static const char gifname[] = "gif";
 
 MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
 static struct sx gif_ioctl_sx;
 SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
 
 void	(*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
 void	(*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
 void	(*ng_gif_attach_p)(struct ifnet *ifp);
 void	(*ng_gif_detach_p)(struct ifnet *ifp);
 
 #ifdef VIMAGE
 static void	gif_reassign(struct ifnet *, struct vnet *, char *);
 #endif
 static void	gif_delete_tunnel(struct gif_softc *);
 static int	gif_ioctl(struct ifnet *, u_long, caddr_t);
 static int	gif_transmit(struct ifnet *, struct mbuf *);
 static void	gif_qflush(struct ifnet *);
 static int	gif_clone_create(struct if_clone *, int, caddr_t);
 static void	gif_clone_destroy(struct ifnet *);
 VNET_DEFINE_STATIC(struct if_clone *, gif_cloner);
 #define	V_gif_cloner	VNET(gif_cloner)
 
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Generic Tunnel Interface");
 #ifndef MAX_GIF_NEST
 /*
  * This macro controls the default upper limitation on nesting of gif tunnels.
  * Since, setting a large value to this macro with a careless configuration
  * may introduce system crash, we don't allow any nestings by default.
  * If you need to configure nested gif tunnels, you can define this macro
  * in your kernel configuration file.  However, if you do so, please be
  * careful to configure the tunnels so that it won't make a loop.
  */
 #define MAX_GIF_NEST 1
 #endif
 VNET_DEFINE_STATIC(int, max_gif_nesting) = MAX_GIF_NEST;
 #define	V_max_gif_nesting	VNET(max_gif_nesting)
 SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
 
 static int
 gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct gif_softc *sc;
 
 	sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
 	sc->gif_fibnum = curthread->td_proc->p_fibnum;
 	GIF2IFP(sc) = if_alloc(IFT_GIF);
 	GIF2IFP(sc)->if_softc = sc;
 	if_initname(GIF2IFP(sc), gifname, unit);
 
 	GIF2IFP(sc)->if_addrlen = 0;
 	GIF2IFP(sc)->if_mtu    = GIF_MTU;
 	GIF2IFP(sc)->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
 	GIF2IFP(sc)->if_ioctl  = gif_ioctl;
 	GIF2IFP(sc)->if_transmit = gif_transmit;
 	GIF2IFP(sc)->if_qflush = gif_qflush;
 	GIF2IFP(sc)->if_output = gif_output;
 #ifdef VIMAGE
 	GIF2IFP(sc)->if_reassign = gif_reassign;
 #endif
 	GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
 	GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
 	if_attach(GIF2IFP(sc));
 	bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
 	if (ng_gif_attach_p != NULL)
 		(*ng_gif_attach_p)(GIF2IFP(sc));
 
 	return (0);
 }
 
 #ifdef VIMAGE
 static void
 gif_reassign(struct ifnet *ifp, struct vnet *new_vnet __unused,
     char *unused __unused)
 {
 	struct gif_softc *sc;
 
 	sx_xlock(&gif_ioctl_sx);
 	sc = ifp->if_softc;
 	if (sc != NULL)
 		gif_delete_tunnel(sc);
 	sx_xunlock(&gif_ioctl_sx);
 }
 #endif /* VIMAGE */
 
 static void
 gif_clone_destroy(struct ifnet *ifp)
 {
 	struct gif_softc *sc;
 
 	sx_xlock(&gif_ioctl_sx);
 	sc = ifp->if_softc;
 	gif_delete_tunnel(sc);
 	if (ng_gif_detach_p != NULL)
 		(*ng_gif_detach_p)(ifp);
 	bpfdetach(ifp);
 	if_detach(ifp);
 	ifp->if_softc = NULL;
 	sx_xunlock(&gif_ioctl_sx);
 
 	GIF_WAIT();
 	if_free(ifp);
 	free(sc, M_GIF);
 }
 
 static void
 vnet_gif_init(const void *unused __unused)
 {
 
 	V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
 	    gif_clone_destroy, 0);
 #ifdef INET
 	in_gif_init();
 #endif
 #ifdef INET6
 	in6_gif_init();
 #endif
 }
 VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_gif_init, NULL);
 
 static void
 vnet_gif_uninit(const void *unused __unused)
 {
 
 	if_clone_detach(V_gif_cloner);
 #ifdef INET
 	in_gif_uninit();
 #endif
 #ifdef INET6
 	in6_gif_uninit();
 #endif
 }
 VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_gif_uninit, NULL);
 
 static int
 gifmodevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 	case MOD_UNLOAD:
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t gif_mod = {
 	"if_gif",
 	gifmodevent,
 	0
 };
 
 DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_gif, 1);
 
 struct gif_list *
 gif_hashinit(void)
 {
 	struct gif_list *hash;
 	int i;
 
 	hash = malloc(sizeof(struct gif_list) * GIF_HASH_SIZE,
 	    M_GIF, M_WAITOK);
 	for (i = 0; i < GIF_HASH_SIZE; i++)
 		CK_LIST_INIT(&hash[i]);
 
 	return (hash);
 }
 
 void
 gif_hashdestroy(struct gif_list *hash)
 {
 
 	free(hash, M_GIF);
 }
 
 #define	MTAG_GIF	1080679712
 static int
 gif_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct gif_softc *sc;
 	struct etherip_header *eth;
 #ifdef INET
 	struct ip *ip;
 #endif
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	uint32_t t;
 #endif
 	uint32_t af;
 	uint8_t proto, ecn;
 	int error;
 
 	NET_EPOCH_ASSERT();
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error) {
 		m_freem(m);
 		goto err;
 	}
 #endif
 	error = ENETDOWN;
 	sc = ifp->if_softc;
 	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
 	    (ifp->if_flags & IFF_UP) == 0 ||
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    sc->gif_family == 0 ||
 	    (error = if_tunnel_check_nesting(ifp, m, MTAG_GIF,
 		V_max_gif_nesting)) != 0) {
 		m_freem(m);
 		goto err;
 	}
 	/* Now pull back the af that we stashed in the csum_data. */
 	if (ifp->if_bridge)
 		af = AF_LINK;
 	else
 		af = m->m_pkthdr.csum_data;
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 	M_SETFIB(m, sc->gif_fibnum);
 	BPF_MTAP2(ifp, &af, sizeof(af), m);
 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
 	/* inner AF-specific encapsulation */
 	ecn = 0;
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		proto = IPPROTO_IPV4;
 		if (m->m_len < sizeof(struct ip))
 			m = m_pullup(m, sizeof(struct ip));
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto err;
 		}
 		ip = mtod(m, struct ip *);
 		ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
 		    ECN_NOCARE, &ecn, &ip->ip_tos);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		proto = IPPROTO_IPV6;
 		if (m->m_len < sizeof(struct ip6_hdr))
 			m = m_pullup(m, sizeof(struct ip6_hdr));
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto err;
 		}
 		t = 0;
 		ip6 = mtod(m, struct ip6_hdr *);
 		ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
 		    ECN_NOCARE, &t, &ip6->ip6_flow);
 		ecn = (ntohl(t) >> 20) & 0xff;
 		break;
 #endif
 	case AF_LINK:
 		proto = IPPROTO_ETHERIP;
 		M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT);
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto err;
 		}
 		eth = mtod(m, struct etherip_header *);
 		eth->eip_resvh = 0;
 		eth->eip_ver = ETHERIP_VERSION;
 		eth->eip_resvl = 0;
 		break;
 	default:
 		error = EAFNOSUPPORT;
 		m_freem(m);
 		goto err;
 	}
 	/* XXX should we check if our outer source is legal? */
 	/* dispatch to output logic based on outer AF */
 	switch (sc->gif_family) {
 #ifdef INET
 	case AF_INET:
 		error = in_gif_output(ifp, m, proto, ecn);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		error = in6_gif_output(ifp, m, proto, ecn);
 		break;
 #endif
 	default:
 		m_freem(m);
 	}
 err:
 	if (error)
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	return (error);
 }
 
 static void
 gif_qflush(struct ifnet *ifp __unused)
 {
 
 }
 
 int
 gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	struct route *ro)
 {
 	uint32_t af;
 
 	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
-		af = dst->sa_family;
+		af = RO_GET_FAMILY(ro, dst);
 	/*
 	 * Now save the af in the inbound pkt csum data, this is a cheat since
 	 * we are using the inbound csum_data field to carry the af over to
 	 * the gif_transmit() routine, avoiding using yet another mtag.
 	 */
 	m->m_pkthdr.csum_data = af;
 	return (ifp->if_transmit(ifp, m));
 }
 
 void
 gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn)
 {
 	struct etherip_header *eip;
 #ifdef INET
 	struct ip *ip;
 #endif
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	uint32_t t;
 #endif
 	struct ether_header *eh;
 	struct ifnet *oldifp;
 	int isr, n, af;
 
 	NET_EPOCH_ASSERT();
 
 	if (ifp == NULL) {
 		/* just in case */
 		m_freem(m);
 		return;
 	}
 	m->m_pkthdr.rcvif = ifp;
 	m_clrprotoflags(m);
 	switch (proto) {
 #ifdef INET
 	case IPPROTO_IPV4:
 		af = AF_INET;
 		if (m->m_len < sizeof(struct ip))
 			m = m_pullup(m, sizeof(struct ip));
 		if (m == NULL)
 			goto drop;
 		ip = mtod(m, struct ip *);
 		if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
 		    ECN_NOCARE, &ecn, &ip->ip_tos) == 0) {
 			m_freem(m);
 			goto drop;
 		}
 		break;
 #endif
 #ifdef INET6
 	case IPPROTO_IPV6:
 		af = AF_INET6;
 		if (m->m_len < sizeof(struct ip6_hdr))
 			m = m_pullup(m, sizeof(struct ip6_hdr));
 		if (m == NULL)
 			goto drop;
 		t = htonl((uint32_t)ecn << 20);
 		ip6 = mtod(m, struct ip6_hdr *);
 		if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
 		    ECN_NOCARE, &t, &ip6->ip6_flow) == 0) {
 			m_freem(m);
 			goto drop;
 		}
 		break;
 #endif
 	case IPPROTO_ETHERIP:
 		af = AF_LINK;
 		break;
 	default:
 		m_freem(m);
 		goto drop;
 	}
 
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	if (bpf_peers_present(ifp->if_bpf)) {
 		uint32_t af1 = af;
 		bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
 	}
 
 	if ((ifp->if_flags & IFF_MONITOR) != 0) {
 		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 		m_freem(m);
 		return;
 	}
 
 	if (ng_gif_input_p != NULL) {
 		(*ng_gif_input_p)(ifp, &m, af);
 		if (m == NULL)
 			goto drop;
 	}
 
 	/*
 	 * Put the packet to the network layer input queue according to the
 	 * specified address family.
 	 * Note: older versions of gif_input directly called network layer
 	 * input functions, e.g. ip6_input, here.  We changed the policy to
 	 * prevent too many recursive calls of such input functions, which
 	 * might cause kernel panic.  But the change may introduce another
 	 * problem; if the input queue is full, packets are discarded.
 	 * The kernel stack overflow really happened, and we believed
 	 * queue-full rarely occurs, so we changed the policy.
 	 */
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		isr = NETISR_IP;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 	case AF_LINK:
 		n = sizeof(struct etherip_header) +
 		    sizeof(struct ether_header);
 		if (n > m->m_len)
 			m = m_pullup(m, n);
 		if (m == NULL)
 			goto drop;
 		eip = mtod(m, struct etherip_header *);
 		if (eip->eip_ver != ETHERIP_VERSION) {
 			/* discard unknown versions */
 			m_freem(m);
 			goto drop;
 		}
 
 		m_adj_decap(m, sizeof(struct etherip_header));
 
 		m->m_flags &= ~(M_BCAST|M_MCAST);
 		m->m_pkthdr.rcvif = ifp;
 
 		if (ifp->if_bridge) {
 			oldifp = ifp;
 			eh = mtod(m, struct ether_header *);
 			if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 				if (ETHER_IS_BROADCAST(eh->ether_dhost))
 					m->m_flags |= M_BCAST;
 				else
 					m->m_flags |= M_MCAST;
 				if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 			}
 			BRIDGE_INPUT(ifp, m);
 
 			if (m != NULL && ifp != oldifp) {
 				/*
 				 * The bridge gave us back itself or one of the
 				 * members for which the frame is addressed.
 				 */
 				ether_demux(ifp, m);
 				return;
 			}
 		}
 		if (m != NULL)
 			m_freem(m);
 		return;
 
 	default:
 		if (ng_gif_input_orphan_p != NULL)
 			(*ng_gif_input_orphan_p)(ifp, m, af);
 		else
 			m_freem(m);
 		return;
 	}
 
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
 	return;
 drop:
 	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 }
 
 static int
 gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifreq *ifr = (struct ifreq*)data;
 	struct gif_softc *sc;
 	u_int options;
 	int error;
 
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 	case SIOCGIFMTU:
 	case SIOCSIFFLAGS:
 		return (0);
 	case SIOCSIFMTU:
 		if (ifr->ifr_mtu < GIF_MTU_MIN ||
 		    ifr->ifr_mtu > GIF_MTU_MAX)
 			return (EINVAL);
 		else
 			ifp->if_mtu = ifr->ifr_mtu;
 		return (0);
 	}
 	sx_xlock(&gif_ioctl_sx);
 	sc = ifp->if_softc;
 	if (sc == NULL) {
 		error = ENXIO;
 		goto bad;
 	}
 	error = 0;
 	switch (cmd) {
 	case SIOCDIFPHYADDR:
 		if (sc->gif_family == 0)
 			break;
 		gif_delete_tunnel(sc);
 		break;
 #ifdef INET
 	case SIOCSIFPHYADDR:
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
 		error = in_gif_ioctl(sc, cmd, data);
 		break;
 #endif
 #ifdef INET6
 	case SIOCSIFPHYADDR_IN6:
 	case SIOCGIFPSRCADDR_IN6:
 	case SIOCGIFPDSTADDR_IN6:
 		error = in6_gif_ioctl(sc, cmd, data);
 		break;
 #endif
 	case SIOCGTUNFIB:
 		ifr->ifr_fib = sc->gif_fibnum;
 		break;
 	case SIOCSTUNFIB:
 		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
 			break;
 		if (ifr->ifr_fib >= rt_numfibs)
 			error = EINVAL;
 		else
 			sc->gif_fibnum = ifr->ifr_fib;
 		break;
 	case GIFGOPTS:
 		options = sc->gif_options;
 		error = copyout(&options, ifr_data_get_ptr(ifr),
 		    sizeof(options));
 		break;
 	case GIFSOPTS:
 		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
 			break;
 		error = copyin(ifr_data_get_ptr(ifr), &options,
 		    sizeof(options));
 		if (error)
 			break;
 		if (options & ~GIF_OPTMASK) {
 			error = EINVAL;
 			break;
 		}
 		if (sc->gif_options != options) {
 			switch (sc->gif_family) {
 #ifdef INET
 			case AF_INET:
 				error = in_gif_setopts(sc, options);
 				break;
 #endif
 #ifdef INET6
 			case AF_INET6:
 				error = in6_gif_setopts(sc, options);
 				break;
 #endif
 			default:
 				/* No need to invoke AF-handler */
 				sc->gif_options = options;
 			}
 		}
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	if (error == 0 && sc->gif_family != 0) {
 		if (
 #ifdef INET
 		    cmd == SIOCSIFPHYADDR ||
 #endif
 #ifdef INET6
 		    cmd == SIOCSIFPHYADDR_IN6 ||
 #endif
 		    0) {
 			if_link_state_change(ifp, LINK_STATE_UP);
 		}
 	}
 bad:
 	sx_xunlock(&gif_ioctl_sx);
 	return (error);
 }
 
 static void
 gif_delete_tunnel(struct gif_softc *sc)
 {
 
 	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
 	if (sc->gif_family != 0) {
 		CK_LIST_REMOVE(sc, srchash);
 		CK_LIST_REMOVE(sc, chain);
 		/* Wait until it become safe to free gif_hdr */
 		GIF_WAIT();
 		free(sc->gif_hdr, M_GIF);
 	}
 	sc->gif_family = 0;
 	GIF2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 	if_link_state_change(GIF2IFP(sc), LINK_STATE_DOWN);
 }
diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c
index 19014f9fd3de..5ad452ac38e0 100644
--- a/sys/net/if_gre.c
+++ b/sys/net/if_gre.c
@@ -1,832 +1,832 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
  * Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Heiko W.Rupp <hwr@pilhuhn.de>
  *
  * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/vnet.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #ifdef INET
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #ifdef RSS
 #include <netinet/in_rss.h>
 #endif
 #endif
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
 #ifdef RSS
 #include <netinet6/in6_rss.h>
 #endif
 #endif
 
 #include <netinet/ip_encap.h>
 #include <netinet/udp.h>
 #include <net/bpf.h>
 #include <net/if_gre.h>
 
 #include <machine/in_cksum.h>
 #include <security/mac/mac_framework.h>
 
 #define	GREMTU			1476
 
 static const char grename[] = "gre";
 MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation");
 
 static struct sx gre_ioctl_sx;
 SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl");
 
 static int	gre_clone_create(struct if_clone *, int, caddr_t);
 static void	gre_clone_destroy(struct ifnet *);
 VNET_DEFINE_STATIC(struct if_clone *, gre_cloner);
 #define	V_gre_cloner	VNET(gre_cloner)
 
 #ifdef VIMAGE
 static void	gre_reassign(struct ifnet *, struct vnet *, char *);
 #endif
 static void	gre_qflush(struct ifnet *);
 static int	gre_transmit(struct ifnet *, struct mbuf *);
 static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
 static int	gre_output(struct ifnet *, struct mbuf *,
 		    const struct sockaddr *, struct route *);
 static void	gre_delete_tunnel(struct gre_softc *);
 
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Generic Routing Encapsulation");
 #ifndef MAX_GRE_NEST
 /*
  * This macro controls the default upper limitation on nesting of gre tunnels.
  * Since, setting a large value to this macro with a careless configuration
  * may introduce system crash, we don't allow any nestings by default.
  * If you need to configure nested gre tunnels, you can define this macro
  * in your kernel configuration file.  However, if you do so, please be
  * careful to configure the tunnels so that it won't make a loop.
  */
 #define MAX_GRE_NEST 1
 #endif
 
 VNET_DEFINE_STATIC(int, max_gre_nesting) = MAX_GRE_NEST;
 #define	V_max_gre_nesting	VNET(max_gre_nesting)
 SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
     &VNET_NAME(max_gre_nesting), 0, "Max nested tunnels");
 
 static void
 vnet_gre_init(const void *unused __unused)
 {
 
 	V_gre_cloner = if_clone_simple(grename, gre_clone_create,
 	    gre_clone_destroy, 0);
 #ifdef INET
 	in_gre_init();
 #endif
 #ifdef INET6
 	in6_gre_init();
 #endif
 }
 VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_gre_init, NULL);
 
 static void
 vnet_gre_uninit(const void *unused __unused)
 {
 
 	if_clone_detach(V_gre_cloner);
 #ifdef INET
 	in_gre_uninit();
 #endif
 #ifdef INET6
 	in6_gre_uninit();
 #endif
 	/* XXX: epoch_call drain */
 }
 VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_gre_uninit, NULL);
 
 static int
 gre_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct gre_softc *sc;
 
 	sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO);
 	sc->gre_fibnum = curthread->td_proc->p_fibnum;
 	GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
 	GRE2IFP(sc)->if_softc = sc;
 	if_initname(GRE2IFP(sc), grename, unit);
 
 	GRE2IFP(sc)->if_mtu = GREMTU;
 	GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
 	GRE2IFP(sc)->if_output = gre_output;
 	GRE2IFP(sc)->if_ioctl = gre_ioctl;
 	GRE2IFP(sc)->if_transmit = gre_transmit;
 	GRE2IFP(sc)->if_qflush = gre_qflush;
 #ifdef VIMAGE
 	GRE2IFP(sc)->if_reassign = gre_reassign;
 #endif
 	GRE2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
 	GRE2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
 	if_attach(GRE2IFP(sc));
 	bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
 	return (0);
 }
 
 #ifdef VIMAGE
 static void
 gre_reassign(struct ifnet *ifp, struct vnet *new_vnet __unused,
     char *unused __unused)
 {
 	struct gre_softc *sc;
 
 	sx_xlock(&gre_ioctl_sx);
 	sc = ifp->if_softc;
 	if (sc != NULL)
 		gre_delete_tunnel(sc);
 	sx_xunlock(&gre_ioctl_sx);
 }
 #endif /* VIMAGE */
 
 static void
 gre_clone_destroy(struct ifnet *ifp)
 {
 	struct gre_softc *sc;
 
 	sx_xlock(&gre_ioctl_sx);
 	sc = ifp->if_softc;
 	gre_delete_tunnel(sc);
 	bpfdetach(ifp);
 	if_detach(ifp);
 	ifp->if_softc = NULL;
 	sx_xunlock(&gre_ioctl_sx);
 
 	GRE_WAIT();
 	if_free(ifp);
 	free(sc, M_GRE);
 }
 
 static int
 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct gre_softc *sc;
 	uint32_t opt;
 	int error;
 
 	switch (cmd) {
 	case SIOCSIFMTU:
 		 /* XXX: */
 		if (ifr->ifr_mtu < 576)
 			return (EINVAL);
 		ifp->if_mtu = ifr->ifr_mtu;
 		return (0);
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 	case SIOCSIFFLAGS:
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		return (0);
 	case GRESADDRS:
 	case GRESADDRD:
 	case GREGADDRS:
 	case GREGADDRD:
 	case GRESPROTO:
 	case GREGPROTO:
 		return (EOPNOTSUPP);
 	}
 	sx_xlock(&gre_ioctl_sx);
 	sc = ifp->if_softc;
 	if (sc == NULL) {
 		error = ENXIO;
 		goto end;
 	}
 	error = 0;
 	switch (cmd) {
 	case SIOCDIFPHYADDR:
 		if (sc->gre_family == 0)
 			break;
 		gre_delete_tunnel(sc);
 		break;
 #ifdef INET
 	case SIOCSIFPHYADDR:
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
 		error = in_gre_ioctl(sc, cmd, data);
 		break;
 #endif
 #ifdef INET6
 	case SIOCSIFPHYADDR_IN6:
 	case SIOCGIFPSRCADDR_IN6:
 	case SIOCGIFPDSTADDR_IN6:
 		error = in6_gre_ioctl(sc, cmd, data);
 		break;
 #endif
 	case SIOCGTUNFIB:
 		ifr->ifr_fib = sc->gre_fibnum;
 		break;
 	case SIOCSTUNFIB:
 		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
 			break;
 		if (ifr->ifr_fib >= rt_numfibs)
 			error = EINVAL;
 		else
 			sc->gre_fibnum = ifr->ifr_fib;
 		break;
 	case GRESKEY:
 	case GRESOPTS:
 	case GRESPORT:
 		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
 			break;
 		if ((error = copyin(ifr_data_get_ptr(ifr), &opt,
 		    sizeof(opt))) != 0)
 			break;
 		if (cmd == GRESKEY) {
 			if (sc->gre_key == opt)
 				break;
 		} else if (cmd == GRESOPTS) {
 			if (opt & ~GRE_OPTMASK) {
 				error = EINVAL;
 				break;
 			}
 			if (sc->gre_options == opt)
 				break;
 		} else if (cmd == GRESPORT) {
 			if (opt != 0 && (opt < V_ipport_hifirstauto ||
 			    opt > V_ipport_hilastauto)) {
 				error = EINVAL;
 				break;
 			}
 			if (sc->gre_port == opt)
 				break;
 			if ((sc->gre_options & GRE_UDPENCAP) == 0) {
 				/*
 				 * UDP encapsulation is not enabled, thus
 				 * there is no need to reattach softc.
 				 */
 				sc->gre_port = opt;
 				break;
 			}
 		}
 		switch (sc->gre_family) {
 #ifdef INET
 		case AF_INET:
 			error = in_gre_setopts(sc, cmd, opt);
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			error = in6_gre_setopts(sc, cmd, opt);
 			break;
 #endif
 		default:
 			/*
 			 * Tunnel is not yet configured.
 			 * We can just change any parameters.
 			 */
 			if (cmd == GRESKEY)
 				sc->gre_key = opt;
 			if (cmd == GRESOPTS)
 				sc->gre_options = opt;
 			if (cmd == GRESPORT)
 				sc->gre_port = opt;
 			break;
 		}
 		/*
 		 * XXX: Do we need to initiate change of interface
 		 * state here?
 		 */
 		break;
 	case GREGKEY:
 		error = copyout(&sc->gre_key, ifr_data_get_ptr(ifr),
 		    sizeof(sc->gre_key));
 		break;
 	case GREGOPTS:
 		error = copyout(&sc->gre_options, ifr_data_get_ptr(ifr),
 		    sizeof(sc->gre_options));
 		break;
 	case GREGPORT:
 		error = copyout(&sc->gre_port, ifr_data_get_ptr(ifr),
 		    sizeof(sc->gre_port));
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	if (error == 0 && sc->gre_family != 0) {
 		if (
 #ifdef INET
 		    cmd == SIOCSIFPHYADDR ||
 #endif
 #ifdef INET6
 		    cmd == SIOCSIFPHYADDR_IN6 ||
 #endif
 		    0) {
 			if_link_state_change(ifp, LINK_STATE_UP);
 		}
 	}
 end:
 	sx_xunlock(&gre_ioctl_sx);
 	return (error);
 }
 
 static void
 gre_delete_tunnel(struct gre_softc *sc)
 {
 	struct gre_socket *gs;
 
 	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
 	if (sc->gre_family != 0) {
 		CK_LIST_REMOVE(sc, chain);
 		CK_LIST_REMOVE(sc, srchash);
 		GRE_WAIT();
 		free(sc->gre_hdr, M_GRE);
 		sc->gre_family = 0;
 	}
 	/*
 	 * If this Tunnel was the last one that could use UDP socket,
 	 * we should unlink socket from hash table and close it.
 	 */
 	if ((gs = sc->gre_so) != NULL && CK_LIST_EMPTY(&gs->list)) {
 		CK_LIST_REMOVE(gs, chain);
 		soclose(gs->so);
 		NET_EPOCH_CALL(gre_sofree, &gs->epoch_ctx);
 		sc->gre_so = NULL;
 	}
 	GRE2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 	if_link_state_change(GRE2IFP(sc), LINK_STATE_DOWN);
 }
 
 struct gre_list *
 gre_hashinit(void)
 {
 	struct gre_list *hash;
 	int i;
 
 	hash = malloc(sizeof(struct gre_list) * GRE_HASH_SIZE,
 	    M_GRE, M_WAITOK);
 	for (i = 0; i < GRE_HASH_SIZE; i++)
 		CK_LIST_INIT(&hash[i]);
 
 	return (hash);
 }
 
 void
 gre_hashdestroy(struct gre_list *hash)
 {
 
 	free(hash, M_GRE);
 }
 
 void
 gre_sofree(epoch_context_t ctx)
 {
 	struct gre_socket *gs;
 
 	gs = __containerof(ctx, struct gre_socket, epoch_ctx);
 	free(gs, M_GRE);
 }
 
 static __inline uint16_t
 gre_cksum_add(uint16_t sum, uint16_t a)
 {
 	uint16_t res;
 
 	res = sum + a;
 	return (res + (res < a));
 }
 
 void
 gre_update_udphdr(struct gre_softc *sc, struct udphdr *udp, uint16_t csum)
 {
 
 	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
 	MPASS(sc->gre_options & GRE_UDPENCAP);
 
 	udp->uh_dport = htons(GRE_UDPPORT);
 	udp->uh_sport = htons(sc->gre_port);
 	udp->uh_sum = csum;
 	udp->uh_ulen = 0;
 }
 
 void
 gre_update_hdr(struct gre_softc *sc, struct grehdr *gh)
 {
 	uint32_t *opts;
 	uint16_t flags;
 
 	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
 
 	flags = 0;
 	opts = gh->gre_opts;
 	if (sc->gre_options & GRE_ENABLE_CSUM) {
 		flags |= GRE_FLAGS_CP;
 		sc->gre_hlen += 2 * sizeof(uint16_t);
 		*opts++ = 0;
 	}
 	if (sc->gre_key != 0) {
 		flags |= GRE_FLAGS_KP;
 		sc->gre_hlen += sizeof(uint32_t);
 		*opts++ = htonl(sc->gre_key);
 	}
 	if (sc->gre_options & GRE_ENABLE_SEQ) {
 		flags |= GRE_FLAGS_SP;
 		sc->gre_hlen += sizeof(uint32_t);
 		*opts++ = 0;
 	} else
 		sc->gre_oseq = 0;
 	gh->gre_flags = htons(flags);
 }
 
 int
 gre_input(struct mbuf *m, int off, int proto, void *arg)
 {
 	struct gre_softc *sc = arg;
 	struct grehdr *gh;
 	struct ifnet *ifp;
 	uint32_t *opts;
 #ifdef notyet
 	uint32_t key;
 #endif
 	uint16_t flags;
 	int hlen, isr, af;
 
 	ifp = GRE2IFP(sc);
 	hlen = off + sizeof(struct grehdr) + 4 * sizeof(uint32_t);
 	if (m->m_pkthdr.len < hlen)
 		goto drop;
 	if (m->m_len < hlen) {
 		m = m_pullup(m, hlen);
 		if (m == NULL)
 			goto drop;
 	}
 	gh = (struct grehdr *)mtodo(m, off);
 	flags = ntohs(gh->gre_flags);
 	if (flags & ~GRE_FLAGS_MASK)
 		goto drop;
 	opts = gh->gre_opts;
 	hlen = 2 * sizeof(uint16_t);
 	if (flags & GRE_FLAGS_CP) {
 		/* reserved1 field must be zero */
 		if (((uint16_t *)opts)[1] != 0)
 			goto drop;
 		if (in_cksum_skip(m, m->m_pkthdr.len, off) != 0)
 			goto drop;
 		hlen += 2 * sizeof(uint16_t);
 		opts++;
 	}
 	if (flags & GRE_FLAGS_KP) {
 #ifdef notyet
         /* 
          * XXX: The current implementation uses the key only for outgoing
          * packets. But we can check the key value here, or even in the
          * encapcheck function.
          */
 		key = ntohl(*opts);
 #endif
 		hlen += sizeof(uint32_t);
 		opts++;
     }
 #ifdef notyet
 	} else
 		key = 0;
 
 	if (sc->gre_key != 0 && (key != sc->gre_key || key != 0))
 		goto drop;
 #endif
 	if (flags & GRE_FLAGS_SP) {
 #ifdef notyet
 		seq = ntohl(*opts);
 #endif
 		hlen += sizeof(uint32_t);
 	}
 	switch (ntohs(gh->gre_proto)) {
 	case ETHERTYPE_WCCP:
 		/*
 		 * For WCCP skip an additional 4 bytes if after GRE header
 		 * doesn't follow an IP header.
 		 */
 		if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40)
 			hlen += sizeof(uint32_t);
 		/* FALLTHROUGH */
 	case ETHERTYPE_IP:
 		isr = NETISR_IP;
 		af = AF_INET;
 		break;
 	case ETHERTYPE_IPV6:
 		isr = NETISR_IPV6;
 		af = AF_INET6;
 		break;
 	default:
 		goto drop;
 	}
 	m_adj(m, off + hlen);
 	m_clrprotoflags(m);
 	m->m_pkthdr.rcvif = ifp;
 	M_SETFIB(m, ifp->if_fib);
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 	BPF_MTAP2(ifp, &af, sizeof(af), m);
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	if ((ifp->if_flags & IFF_MONITOR) != 0)
 		m_freem(m);
 	else
 		netisr_dispatch(isr, m);
 	return (IPPROTO_DONE);
 drop:
 	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 	m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 static int
 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
    struct route *ro)
 {
 	uint32_t af;
 
 	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
-		af = dst->sa_family;
+		af = RO_GET_FAMILY(ro, dst);
 	/*
 	 * Now save the af in the inbound pkt csum data, this is a cheat since
 	 * we are using the inbound csum_data field to carry the af over to
 	 * the gre_transmit() routine, avoiding using yet another mtag.
 	 */
 	m->m_pkthdr.csum_data = af;
 	return (ifp->if_transmit(ifp, m));
 }
 
 static void
 gre_setseqn(struct grehdr *gh, uint32_t seq)
 {
 	uint32_t *opts;
 	uint16_t flags;
 
 	opts = gh->gre_opts;
 	flags = ntohs(gh->gre_flags);
 	KASSERT((flags & GRE_FLAGS_SP) != 0,
 	    ("gre_setseqn called, but GRE_FLAGS_SP isn't set "));
 	if (flags & GRE_FLAGS_CP)
 		opts++;
 	if (flags & GRE_FLAGS_KP)
 		opts++;
 	*opts = htonl(seq);
 }
 
 static uint32_t
 gre_flowid(struct gre_softc *sc, struct mbuf *m, uint32_t af)
 {
 	uint32_t flowid = 0;
 
 	if ((sc->gre_options & GRE_UDPENCAP) == 0 || sc->gre_port != 0)
 		return (flowid);
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 #ifdef RSS
 		flowid = rss_hash_ip4_2tuple(mtod(m, struct ip *)->ip_src,
 		    mtod(m, struct ip *)->ip_dst);
 		break;
 #endif
 		flowid = mtod(m, struct ip *)->ip_src.s_addr ^
 		    mtod(m, struct ip *)->ip_dst.s_addr;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 #ifdef RSS
 		flowid = rss_hash_ip6_2tuple(
 		    &mtod(m, struct ip6_hdr *)->ip6_src,
 		    &mtod(m, struct ip6_hdr *)->ip6_dst);
 		break;
 #endif
 		flowid = mtod(m, struct ip6_hdr *)->ip6_src.s6_addr32[3] ^
 		    mtod(m, struct ip6_hdr *)->ip6_dst.s6_addr32[3];
 		break;
 #endif
 	default:
 		break;
 	}
 	return (flowid);
 }
 
 #define	MTAG_GRE	1307983903
 static int
 gre_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	GRE_RLOCK_TRACKER;
 	struct gre_softc *sc;
 	struct grehdr *gh;
 	struct udphdr *uh;
 	uint32_t af, flowid;
 	int error, len;
 	uint16_t proto;
 
 	len = 0;
 	GRE_RLOCK();
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error) {
 		m_freem(m);
 		goto drop;
 	}
 #endif
 	error = ENETDOWN;
 	sc = ifp->if_softc;
 	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
 	    (ifp->if_flags & IFF_UP) == 0 ||
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    sc->gre_family == 0 ||
 	    (error = if_tunnel_check_nesting(ifp, m, MTAG_GRE,
 		V_max_gre_nesting)) != 0) {
 		m_freem(m);
 		goto drop;
 	}
 	af = m->m_pkthdr.csum_data;
 	BPF_MTAP2(ifp, &af, sizeof(af), m);
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 	flowid = gre_flowid(sc, m, af);
 	M_SETFIB(m, sc->gre_fibnum);
 	M_PREPEND(m, sc->gre_hlen, M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto drop;
 	}
 	bcopy(sc->gre_hdr, mtod(m, void *), sc->gre_hlen);
 	/* Determine GRE proto */
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		proto = htons(ETHERTYPE_IP);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		proto = htons(ETHERTYPE_IPV6);
 		break;
 #endif
 	default:
 		m_freem(m);
 		error = ENETDOWN;
 		goto drop;
 	}
 	/* Determine offset of GRE header */
 	switch (sc->gre_family) {
 #ifdef INET
 	case AF_INET:
 		len = sizeof(struct ip);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		len = sizeof(struct ip6_hdr);
 		break;
 #endif
 	default:
 		m_freem(m);
 		error = ENETDOWN;
 		goto drop;
 	}
 	if (sc->gre_options & GRE_UDPENCAP) {
 		uh = (struct udphdr *)mtodo(m, len);
 		uh->uh_sport |= htons(V_ipport_hifirstauto) |
 		    (flowid >> 16) | (flowid & 0xFFFF);
 		uh->uh_sport = htons(ntohs(uh->uh_sport) %
 		    V_ipport_hilastauto);
 		uh->uh_ulen = htons(m->m_pkthdr.len - len);
 		uh->uh_sum = gre_cksum_add(uh->uh_sum,
 		    htons(m->m_pkthdr.len - len + IPPROTO_UDP));
 		m->m_pkthdr.csum_flags = sc->gre_csumflags;
 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 		len += sizeof(struct udphdr);
 	}
 	gh = (struct grehdr *)mtodo(m, len);
 	gh->gre_proto = proto;
 	if (sc->gre_options & GRE_ENABLE_SEQ)
 		gre_setseqn(gh, sc->gre_oseq++);
 	if (sc->gre_options & GRE_ENABLE_CSUM) {
 		*(uint16_t *)gh->gre_opts = in_cksum_skip(m,
 		    m->m_pkthdr.len, len);
 	}
 	len = m->m_pkthdr.len - len;
 	switch (sc->gre_family) {
 #ifdef INET
 	case AF_INET:
 		error = in_gre_output(m, af, sc->gre_hlen);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		error = in6_gre_output(m, af, sc->gre_hlen, flowid);
 		break;
 #endif
 	default:
 		m_freem(m);
 		error = ENETDOWN;
 	}
 drop:
 	if (error)
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	else {
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
 	}
 	GRE_RUNLOCK();
 	return (error);
 }
 
 static void
 gre_qflush(struct ifnet *ifp __unused)
 {
 
 }
 
 static int
 gremodevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 	case MOD_UNLOAD:
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t gre_mod = {
 	"if_gre",
 	gremodevent,
 	0
 };
 
 DECLARE_MODULE(if_gre, gre_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_gre, 1);
diff --git a/sys/net/if_infiniband.c b/sys/net/if_infiniband.c
index 244b2a5ba117..4dfbd5272d15 100644
--- a/sys/net/if_infiniband.c
+++ b/sys/net/if_infiniband.c
@@ -1,651 +1,652 @@
 /*-
  * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/devctl.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/infiniband.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_lagg.h>
 #include <net/if_llatbl.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <netinet/if_ether.h>
 #include <netinet/in.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/nd6.h>
 
 #include <security/mac/mac_framework.h>
 
 /* if_lagg(4) support */
 struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *);
 
 #ifdef INET
 static inline void
 infiniband_ipv4_multicast_map(uint32_t addr,
     const uint8_t *broadcast, uint8_t *buf)
 {
 	uint8_t scope;
 
 	addr = ntohl(addr);
 	scope = broadcast[5] & 0xF;
 
 	buf[0] = 0;
 	buf[1] = 0xff;
 	buf[2] = 0xff;
 	buf[3] = 0xff;
 	buf[4] = 0xff;
 	buf[5] = 0x10 | scope;
 	buf[6] = 0x40;
 	buf[7] = 0x1b;
 	buf[8] = broadcast[8];
 	buf[9] = broadcast[9];
 	buf[10] = 0;
 	buf[11] = 0;
 	buf[12] = 0;
 	buf[13] = 0;
 	buf[14] = 0;
 	buf[15] = 0;
 	buf[16] = (addr >> 24) & 0xff;
 	buf[17] = (addr >> 16) & 0xff;
 	buf[18] = (addr >> 8) & 0xff;
 	buf[19] = addr & 0xff;
 }
 #endif
 
 #ifdef INET6
 static inline void
 infiniband_ipv6_multicast_map(const struct in6_addr *addr,
     const uint8_t *broadcast, uint8_t *buf)
 {
 	uint8_t scope;
 
 	scope = broadcast[5] & 0xF;
 
 	buf[0] = 0;
 	buf[1] = 0xff;
 	buf[2] = 0xff;
 	buf[3] = 0xff;
 	buf[4] = 0xff;
 	buf[5] = 0x10 | scope;
 	buf[6] = 0x60;
 	buf[7] = 0x1b;
 	buf[8] = broadcast[8];
 	buf[9] = broadcast[9];
 	memcpy(&buf[10], &addr->s6_addr[6], 10);
 }
 #endif
 
 /*
  * This is for clients that have an infiniband_header in the mbuf.
  */
 void
 infiniband_bpf_mtap(struct ifnet *ifp, struct mbuf *mb)
 {
 	struct infiniband_header *ibh;
 	struct ether_header eh;
 
 	if (mb->m_len < sizeof(*ibh))
 		return;
 
 	ibh = mtod(mb, struct infiniband_header *);
 	eh.ether_type = ibh->ib_protocol;
 	memset(eh.ether_shost, 0, ETHER_ADDR_LEN);
 	memcpy(eh.ether_dhost, ibh->ib_hwaddr + 4, ETHER_ADDR_LEN);
 	mb->m_data += sizeof(*ibh);
 	mb->m_len -= sizeof(*ibh);
 	mb->m_pkthdr.len -= sizeof(*ibh);
 	bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
 	mb->m_data -= sizeof(*ibh);
 	mb->m_len += sizeof(*ibh);
 	mb->m_pkthdr.len += sizeof(*ibh);
 }
 
 static void
 update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
 {
 	int csum_flags = 0;
 
 	if (src->m_pkthdr.csum_flags & CSUM_IP)
 		csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
 	if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
 		csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
 	if (src->m_pkthdr.csum_flags & CSUM_SCTP)
 		csum_flags |= CSUM_SCTP_VALID;
 	dst->m_pkthdr.csum_flags |= csum_flags;
 	if (csum_flags & CSUM_DATA_VALID)
 		dst->m_pkthdr.csum_data = 0xffff;
 }
 
 /*
  * Handle link-layer encapsulation requests.
  */
 static int
 infiniband_requestencap(struct ifnet *ifp, struct if_encap_req *req)
 {
 	struct infiniband_header *ih;
 	struct arphdr *ah;
 	uint16_t etype;
 	const uint8_t *lladdr;
 
 	if (req->rtype != IFENCAP_LL)
 		return (EOPNOTSUPP);
 
 	if (req->bufsize < INFINIBAND_HDR_LEN)
 		return (ENOMEM);
 
 	ih = (struct infiniband_header *)req->buf;
 	lladdr = req->lladdr;
 	req->lladdr_off = 0;
 
 	switch (req->family) {
 	case AF_INET:
 		etype = htons(ETHERTYPE_IP);
 		break;
 	case AF_INET6:
 		etype = htons(ETHERTYPE_IPV6);
 		break;
 	case AF_ARP:
 		ah = (struct arphdr *)req->hdata;
 		ah->ar_hrd = htons(ARPHRD_INFINIBAND);
 
 		switch (ntohs(ah->ar_op)) {
 		case ARPOP_REVREQUEST:
 		case ARPOP_REVREPLY:
 			etype = htons(ETHERTYPE_REVARP);
 			break;
 		case ARPOP_REQUEST:
 		case ARPOP_REPLY:
 		default:
 			etype = htons(ETHERTYPE_ARP);
 			break;
 		}
 
 		if (req->flags & IFENCAP_FLAG_BROADCAST)
 			lladdr = ifp->if_broadcastaddr;
 		break;
 	default:
 		return (EAFNOSUPPORT);
 	}
 
 	ih->ib_protocol = etype;
 	ih->ib_reserved = 0;
 	memcpy(ih->ib_hwaddr, lladdr, INFINIBAND_ADDR_LEN);
 	req->bufsize = sizeof(struct infiniband_header);
 
 	return (0);
 }
 
 static int
 infiniband_resolve_addr(struct ifnet *ifp, struct mbuf *m,
     const struct sockaddr *dst, struct route *ro, uint8_t *phdr,
     uint32_t *pflags, struct llentry **plle)
 {
 	struct infiniband_header *ih;
 	uint32_t lleflags = 0;
 	int error = 0;
 
 	if (plle)
 		*plle = NULL;
 	ih = (struct infiniband_header *)phdr;
 
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
 			error = arpresolve(ifp, 0, m, dst, phdr, &lleflags, plle);
 		} else {
 			if (m->m_flags & M_BCAST) {
 				memcpy(ih->ib_hwaddr, ifp->if_broadcastaddr,
 				    INFINIBAND_ADDR_LEN);
 			} else {
 				infiniband_ipv4_multicast_map(
 				    ((const struct sockaddr_in *)dst)->sin_addr.s_addr,
 				    ifp->if_broadcastaddr, ih->ib_hwaddr);
 			}
 			ih->ib_protocol = htons(ETHERTYPE_IP);
 			ih->ib_reserved = 0;
 		}
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if ((m->m_flags & M_MCAST) == 0) {
-			error = nd6_resolve(ifp, LLE_SF(AF_INET6, 0), m, dst,
-			    phdr, &lleflags, plle);
+			int af = RO_GET_FAMILY(ro, dst);
+			error = nd6_resolve(ifp, LLE_SF(af, 0), m, dst, phdr,
+			    &lleflags, plle);
 		} else {
 			infiniband_ipv6_multicast_map(
 			    &((const struct sockaddr_in6 *)dst)->sin6_addr,
 			    ifp->if_broadcastaddr, ih->ib_hwaddr);
 			ih->ib_protocol = htons(ETHERTYPE_IPV6);
 			ih->ib_reserved = 0;
 		}
 		break;
 #endif
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		if (m != NULL)
 			m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 
 	if (error == EHOSTDOWN) {
 		if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0)
 			error = EHOSTUNREACH;
 	}
 
 	if (error != 0)
 		return (error);
 
 	*pflags = RT_MAY_LOOP;
 	if (lleflags & LLE_IFADDR)
 		*pflags |= RT_L2_ME;
 
 	return (0);
 }
 
 /*
  * Infiniband output routine.
  */
 static int
 infiniband_output(struct ifnet *ifp, struct mbuf *m,
     const struct sockaddr *dst, struct route *ro)
 {
 	uint8_t linkhdr[INFINIBAND_HDR_LEN];
 	uint8_t *phdr;
 	struct llentry *lle = NULL;
 	struct infiniband_header *ih;
 	int error = 0;
 	int hlen;	/* link layer header length */
 	uint32_t pflags;
 	bool addref;
 
 	NET_EPOCH_ASSERT();
 
 	addref = false;
 	phdr = NULL;
 	pflags = 0;
 	if (ro != NULL) {
 		/* XXX BPF uses ro_prepend */
 		if (ro->ro_prepend != NULL) {
 			phdr = ro->ro_prepend;
 			hlen = ro->ro_plen;
 		} else if (!(m->m_flags & (M_BCAST | M_MCAST))) {
 			if ((ro->ro_flags & RT_LLE_CACHE) != 0) {
 				lle = ro->ro_lle;
 				if (lle != NULL &&
 				    (lle->la_flags & LLE_VALID) == 0) {
 					LLE_FREE(lle);
 					lle = NULL;	/* redundant */
 					ro->ro_lle = NULL;
 				}
 				if (lle == NULL) {
 					/* if we lookup, keep cache */
 					addref = 1;
 				} else
 					/*
 					 * Notify LLE code that
 					 * the entry was used
 					 * by datapath.
 					 */
 					llentry_provide_feedback(lle);
 			}
 			if (lle != NULL) {
 				phdr = lle->r_linkdata;
 				hlen = lle->r_hdrlen;
 				pflags = lle->r_flags;
 			}
 		}
 	}
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		goto bad;
 #endif
 
 	M_PROFILE(m);
 	if (ifp->if_flags & IFF_MONITOR) {
 		error = ENETDOWN;
 		goto bad;
 	}
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
 		error = ENETDOWN;
 		goto bad;
 	}
 
 	if (phdr == NULL) {
 		/* No prepend data supplied. Try to calculate ourselves. */
 		phdr = linkhdr;
 		hlen = INFINIBAND_HDR_LEN;
 		error = infiniband_resolve_addr(ifp, m, dst, ro, phdr, &pflags,
 		    addref ? &lle : NULL);
 		if (addref && lle != NULL)
 			ro->ro_lle = lle;
 		if (error != 0)
 			return (error == EWOULDBLOCK ? 0 : error);
 	}
 
 	if ((pflags & RT_L2_ME) != 0) {
 		update_mbuf_csumflags(m, m);
-		return (if_simloop(ifp, m, dst->sa_family, 0));
+		return (if_simloop(ifp, m, RO_GET_FAMILY(ro, dst), 0));
 	}
 
 	/*
 	 * Add local infiniband header. If no space in first mbuf,
 	 * allocate another.
 	 */
 	M_PREPEND(m, INFINIBAND_HDR_LEN, M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto bad;
 	}
 	if ((pflags & RT_HAS_HEADER) == 0) {
 		ih = mtod(m, struct infiniband_header *);
 		memcpy(ih, phdr, hlen);
 	}
 
 	/*
 	 * Queue message on interface, update output statistics if
 	 * successful, and start output if interface not yet active.
 	 */
 	return (ifp->if_transmit(ifp, m));
 bad:
 	if (m != NULL)
 		m_freem(m);
 	return (error);
 }
 
 /*
  * Process a received Infiniband packet.
  */
 static void
 infiniband_input(struct ifnet *ifp, struct mbuf *m)
 {
 	struct infiniband_header *ibh;
 	struct epoch_tracker et;
 	int isr;
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
 
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		goto done;
 	}
 
 	ibh = mtod(m, struct infiniband_header *);
 
 	/*
 	 * Reset layer specific mbuf flags to avoid confusing upper
 	 * layers:
 	 */
 	m->m_flags &= ~M_VLANTAG;
 	m_clrprotoflags(m);
 
 	if (INFINIBAND_IS_MULTICAST(ibh->ib_hwaddr)) {
 		if (memcmp(ibh->ib_hwaddr, ifp->if_broadcastaddr,
 		    ifp->if_addrlen) == 0)
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 	/* Let BPF have it before we strip the header. */
 	INFINIBAND_BPF_MTAP(ifp, m);
 
 	/* Allow monitor mode to claim this frame, after stats are updated. */
 	if (ifp->if_flags & IFF_MONITOR) {
 		m_freem(m);
 		goto done;
 	}
 
 	/* Direct packet to correct FIB based on interface config. */
 	M_SETFIB(m, ifp->if_fib);
 
 	/* Handle input from a lagg<N> port */
 	if (ifp->if_type == IFT_INFINIBANDLAG) {
 		KASSERT(lagg_input_infiniband_p != NULL,
 		    ("%s: if_lagg not loaded!", __func__));
 		m = (*lagg_input_infiniband_p)(ifp, m);
 		if (__predict_false(m == NULL))
 			goto done;
 		ifp = m->m_pkthdr.rcvif;
 	}
 
 	/*
 	 * Dispatch frame to upper layer.
 	 */
 	switch (ibh->ib_protocol) {
 #ifdef INET
 	case htons(ETHERTYPE_IP):
 		isr = NETISR_IP;
 		break;
 
 	case htons(ETHERTYPE_ARP):
 		if (ifp->if_flags & IFF_NOARP) {
 			/* Discard packet if ARP is disabled on interface */
 			m_freem(m);
 			goto done;
 		}
 		isr = NETISR_ARP;
 		break;
 #endif
 #ifdef INET6
 	case htons(ETHERTYPE_IPV6):
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		goto done;
 	}
 
 	/* Strip off the Infiniband header. */
 	m_adj(m, INFINIBAND_HDR_LEN);
 
 #ifdef MAC
 	/*
 	 * Tag the mbuf with an appropriate MAC label before any other
 	 * consumers can get to it.
 	 */
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 	/* Allow monitor mode to claim this frame, after stats are updated. */
 	NET_EPOCH_ENTER(et);
 	netisr_dispatch(isr, m);
 	NET_EPOCH_EXIT(et);
 done:
 	CURVNET_RESTORE();
 }
 
 static int
 infiniband_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
     struct sockaddr *sa)
 {
 	struct sockaddr_dl *sdl;
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 	uint8_t *e_addr;
 
 	switch (sa->sa_family) {
 	case AF_LINK:
 		/*
 		 * No mapping needed. Just check that it's a valid MC address.
 		 */
 		sdl = (struct sockaddr_dl *)sa;
 		e_addr = LLADDR(sdl);
 		if (!INFINIBAND_IS_MULTICAST(e_addr))
 			return (EADDRNOTAVAIL);
 		*llsa = NULL;
 		return 0;
 
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return (EADDRNOTAVAIL);
 		sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
 		sdl->sdl_alen = INFINIBAND_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		infiniband_ipv4_multicast_map(
 		    sin->sin_addr.s_addr, ifp->if_broadcastaddr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return (0);
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		/*
 		 * An IP6 address of 0 means listen to all of the
 		 * multicast address used for IP6. This has no meaning
 		 * in infiniband.
 		 */
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
 			return (EADDRNOTAVAIL);
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return (EADDRNOTAVAIL);
 		sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
 		sdl->sdl_alen = INFINIBAND_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		infiniband_ipv6_multicast_map(
 		    &sin6->sin6_addr, ifp->if_broadcastaddr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return (0);
 #endif
 	default:
 		return (EAFNOSUPPORT);
 	}
 }
 
 void
 infiniband_ifattach(struct ifnet *ifp, const uint8_t *lla, const uint8_t *llb)
 {
 	struct sockaddr_dl *sdl;
 	struct ifaddr *ifa;
 	int i;
 
 	ifp->if_addrlen = INFINIBAND_ADDR_LEN;
 	ifp->if_hdrlen = INFINIBAND_HDR_LEN;
 	ifp->if_mtu = INFINIBAND_MTU;
 	if_attach(ifp);
 	ifp->if_output = infiniband_output;
 	ifp->if_input = infiniband_input;
 	ifp->if_resolvemulti = infiniband_resolvemulti;
 	ifp->if_requestencap = infiniband_requestencap;
 
 	if (ifp->if_baudrate == 0)
 		ifp->if_baudrate = IF_Gbps(10); /* default value */
 	if (llb != NULL)
 		ifp->if_broadcastaddr = llb;
 
 	ifa = ifp->if_addr;
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_INFINIBAND;
 	sdl->sdl_alen = ifp->if_addrlen;
 
 	if (lla != NULL) {
 		memcpy(LLADDR(sdl), lla, ifp->if_addrlen);
 
 		if (ifp->if_hw_addr != NULL)
 			memcpy(ifp->if_hw_addr, lla, ifp->if_addrlen);
 	} else {
 		lla = LLADDR(sdl);
 	}
 
 	/* Attach ethernet compatible network device */
 	bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
 
 	/* Announce Infiniband MAC address if non-zero. */
 	for (i = 0; i < ifp->if_addrlen; i++)
 		if (lla[i] != 0)
 			break;
 	if (i != ifp->if_addrlen)
 		if_printf(ifp, "Infiniband address: %20D\n", lla, ":");
 
 	/* Add necessary bits are setup; announce it now. */
 	EVENTHANDLER_INVOKE(infiniband_ifattach_event, ifp);
 
 	if (IS_DEFAULT_VNET(curvnet))
 		devctl_notify("INFINIBAND", ifp->if_xname, "IFATTACH", NULL);
 }
 
 /*
  * Perform common duties while detaching an Infiniband interface
  */
 void
 infiniband_ifdetach(struct ifnet *ifp)
 {
 	bpfdetach(ifp);
 	if_detach(ifp);
 }
 
 static int
 infiniband_modevent(module_t mod, int type, void *data)
 {
 	switch (type) {
 	case MOD_LOAD:
 	case MOD_UNLOAD:
 		return (0);
 	default:
 		return (EOPNOTSUPP);
 	}
 }
 
 static moduledata_t infiniband_mod = {
 	.name = "if_infiniband",
 	.evhand = &infiniband_modevent,
 };
 
 DECLARE_MODULE(if_infiniband, infiniband_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
 MODULE_VERSION(if_infiniband, 1);
diff --git a/sys/net/if_loop.c b/sys/net/if_loop.c
index cbff8200806a..643ef2240fe1 100644
--- a/sys/net/if_loop.c
+++ b/sys/net/if_loop.c
@@ -1,455 +1,455 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_loop.c	8.2 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 /*
  * Loopback interface driver for protocol testing and timing.
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/bpf.h>
 #include <net/vnet.h>
 
 #ifdef	INET
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #endif
 
 #ifdef INET6
 #ifndef INET
 #include <netinet/in.h>
 #endif
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 #ifdef TINY_LOMTU
 #define	LOMTU	(1024+512)
 #elif defined(LARGE_LOMTU)
 #define LOMTU	131072
 #else
 #define LOMTU	16384
 #endif
 
 #define	LO_CSUM_FEATURES	(CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)
 #define	LO_CSUM_FEATURES6	(CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6)
 #define	LO_CSUM_SET		(CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \
 				    CSUM_PSEUDO_HDR | \
 				    CSUM_IP_CHECKED | CSUM_IP_VALID | \
 				    CSUM_SCTP_VALID)
 
 int		loioctl(struct ifnet *, u_long, caddr_t);
 int		looutput(struct ifnet *ifp, struct mbuf *m,
 		    const struct sockaddr *dst, struct route *ro);
 static int	lo_clone_create(struct if_clone *, int, caddr_t);
 static void	lo_clone_destroy(struct ifnet *);
 
 VNET_DEFINE(struct ifnet *, loif);	/* Used externally */
 
 #ifdef VIMAGE
 VNET_DEFINE_STATIC(struct if_clone *, lo_cloner);
 #define	V_lo_cloner		VNET(lo_cloner)
 #endif
 
 static struct if_clone *lo_cloner;
 static const char loname[] = "lo";
 
 static void
 lo_clone_destroy(struct ifnet *ifp)
 {
 
 #ifndef VIMAGE
 	/* XXX: destroying lo0 will lead to panics. */
 	KASSERT(V_loif != ifp, ("%s: destroying lo0", __func__));
 #endif
 
 	bpfdetach(ifp);
 	if_detach(ifp);
 	if_free(ifp);
 }
 
 static int
 lo_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct ifnet *ifp;
 
 	ifp = if_alloc(IFT_LOOP);
 	if (ifp == NULL)
 		return (ENOSPC);
 
 	if_initname(ifp, loname, unit);
 	ifp->if_mtu = LOMTU;
 	ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST;
 	ifp->if_ioctl = loioctl;
 	ifp->if_output = looutput;
 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
 	ifp->if_capabilities = ifp->if_capenable =
 	    IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 | IFCAP_LINKSTATE;
 	ifp->if_hwassist = LO_CSUM_FEATURES | LO_CSUM_FEATURES6;
 	if_attach(ifp);
 	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
 	if (V_loif == NULL)
 		V_loif = ifp;
 
 	return (0);
 }
 
 static void
 vnet_loif_init(const void *unused __unused)
 {
 
 #ifdef VIMAGE
 	lo_cloner = if_clone_simple(loname, lo_clone_create, lo_clone_destroy,
 	    1);
 	V_lo_cloner = lo_cloner;
 #else
 	lo_cloner = if_clone_simple(loname, lo_clone_create, lo_clone_destroy,
 	    1);
 #endif
 }
 VNET_SYSINIT(vnet_loif_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
     vnet_loif_init, NULL);
 
 #ifdef VIMAGE
 static void
 vnet_loif_uninit(const void *unused __unused)
 {
 
 	if_clone_detach(V_lo_cloner);
 	V_loif = NULL;
 }
 VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND,
     vnet_loif_uninit, NULL);
 #endif
 
 static int
 loop_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		break;
 
 	case MOD_UNLOAD:
 		printf("loop module unload - not possible for this module type\n");
 		return (EINVAL);
 
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t loop_mod = {
 	"if_lo",
 	loop_modevent,
 	0
 };
 
 DECLARE_MODULE(if_lo, loop_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
 
 int
 looutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	u_int32_t af;
 #ifdef MAC
 	int error;
 #endif
 
 	M_ASSERTPKTHDR(m); /* check if we have the packet header */
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error) {
 		m_freem(m);
 		return (error);
 	}
 #endif
 
 	if (ro != NULL && ro->ro_flags & (RT_REJECT|RT_BLACKHOLE)) {
 		m_freem(m);
 		return (ro->ro_flags & RT_BLACKHOLE ? 0 : EHOSTUNREACH);
 	}
 
 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
 
 #ifdef RSS
 	M_HASHTYPE_CLEAR(m);
 #endif
 
 	/* BPF writes need to be handled specially. */
 	if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
-		af = dst->sa_family;
+		af = RO_GET_FAMILY(ro, dst);
 
 #if 1	/* XXX */
 	switch (af) {
 	case AF_INET:
 		if (ifp->if_capenable & IFCAP_RXCSUM) {
 			m->m_pkthdr.csum_data = 0xffff;
 			m->m_pkthdr.csum_flags = LO_CSUM_SET;
 		}
 		m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES;
 		break;
 	case AF_INET6:
 #if 0
 		/*
 		 * XXX-BZ for now always claim the checksum is good despite
 		 * any interface flags.   This is a workaround for 9.1-R and
 		 * a proper solution ought to be sought later.
 		 */
 		if (ifp->if_capenable & IFCAP_RXCSUM_IPV6) {
 			m->m_pkthdr.csum_data = 0xffff;
 			m->m_pkthdr.csum_flags = LO_CSUM_SET;
 		}
 #else
 		m->m_pkthdr.csum_data = 0xffff;
 		m->m_pkthdr.csum_flags = LO_CSUM_SET;
 #endif
 		m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES6;
 		break;
 	default:
 		printf("looutput: af=%d unexpected\n", af);
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 #endif
 	return (if_simloop(ifp, m, af, 0));
 }
 
 /*
  * if_simloop()
  *
  * This function is to support software emulation of hardware loopback,
  * i.e., for interfaces with the IFF_SIMPLEX attribute. Since they can't
  * hear their own broadcasts, we create a copy of the packet that we
  * would normally receive via a hardware loopback.
  *
  * This function expects the packet to include the media header of length hlen.
  */
 int
 if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen)
 {
 	int isr;
 
 	M_ASSERTPKTHDR(m);
 	m_tag_delete_nonpersistent(m);
 	m->m_pkthdr.rcvif = ifp;
 
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * Let BPF see incoming packet in the following manner:
 	 *  - Emulated packet loopback for a simplex interface
 	 *    (net/if_ethersubr.c)
 	 *	-> passes it to ifp's BPF
 	 *  - IPv4/v6 multicast packet loopback (netinet(6)/ip(6)_output.c)
 	 *	-> not passes it to any BPF
 	 *  - Normal packet loopback from myself to myself (net/if_loop.c)
 	 *	-> passes to lo0's BPF (even in case of IPv6, where ifp!=lo0)
 	 */
 	if (hlen > 0) {
 		if (bpf_peers_present(ifp->if_bpf)) {
 			bpf_mtap(ifp->if_bpf, m);
 		}
 	} else {
 		if (bpf_peers_present(V_loif->if_bpf)) {
 			if ((m->m_flags & M_MCAST) == 0 || V_loif == ifp) {
 				/* XXX beware sizeof(af) != 4 */
 				u_int32_t af1 = af;
 
 				/*
 				 * We need to prepend the address family.
 				 */
 				bpf_mtap2(V_loif->if_bpf, &af1, sizeof(af1), m);
 			}
 		}
 	}
 
 	/* Strip away media header */
 	if (hlen > 0) {
 		m_adj(m, hlen);
 #ifndef __NO_STRICT_ALIGNMENT
 		/*
 		 * Some archs do not like unaligned data, so
 		 * we move data down in the first mbuf.
 		 */
 		if (mtod(m, vm_offset_t) & 3) {
 			KASSERT(hlen >= 3, ("if_simloop: hlen too small"));
 			bcopy(m->m_data,
 			    (char *)(mtod(m, vm_offset_t)
 				- (mtod(m, vm_offset_t) & 3)),
 			    m->m_len);
 			m->m_data -= (mtod(m,vm_offset_t) & 3);
 		}
 #endif
 	}
 
 	/* Deliver to upper layer protocol */
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		isr = NETISR_IP;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		m->m_flags |= M_LOOP;
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		printf("if_simloop: can't handle af=%d\n", af);
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	netisr_queue(isr, m);	/* mbuf is free'd on failure. */
 	return (0);
 }
 
 /*
  * Process an ioctl request.
  */
 /* ARGSUSED */
 int
 loioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifreq *ifr = (struct ifreq *)data;
 	int error = 0, mask;
 
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
 		if_link_state_change(ifp, LINK_STATE_UP);
 		/*
 		 * Everything else is done at a higher level.
 		 */
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (ifr == NULL) {
 			error = EAFNOSUPPORT;		/* XXX */
 			break;
 		}
 		switch (ifr->ifr_addr.sa_family) {
 #ifdef INET
 		case AF_INET:
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			break;
 #endif
 
 		default:
 			error = EAFNOSUPPORT;
 			break;
 		}
 		break;
 
 	case SIOCSIFMTU:
 		ifp->if_mtu = ifr->ifr_mtu;
 		break;
 
 	case SIOCSIFFLAGS:
 		if_link_state_change(ifp, (ifp->if_flags & IFF_UP) ?
 		    LINK_STATE_UP: LINK_STATE_DOWN);
 		break;
 
 	case SIOCSIFCAP:
 		mask = ifp->if_capenable ^ ifr->ifr_reqcap;
 		if ((mask & IFCAP_RXCSUM) != 0)
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 		if ((mask & IFCAP_TXCSUM) != 0)
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 		if ((mask & IFCAP_RXCSUM_IPV6) != 0) {
 #if 0
 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
 #else
 			error = EOPNOTSUPP;
 			break;
 #endif
 		}
 		if ((mask & IFCAP_TXCSUM_IPV6) != 0) {
 #if 0
 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
 #else
 			error = EOPNOTSUPP;
 			break;
 #endif
 		}
 		ifp->if_hwassist = 0;
 		if (ifp->if_capenable & IFCAP_TXCSUM)
 			ifp->if_hwassist = LO_CSUM_FEATURES;
 #if 0
 		if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
 			ifp->if_hwassist |= LO_CSUM_FEATURES6;
 #endif
 		break;
 
 	default:
 		error = EINVAL;
 	}
 	return (error);
 }
diff --git a/sys/net/if_me.c b/sys/net/if_me.c
index aafc07c2b203..067ab22cd84d 100644
--- a/sys/net/if_me.c
+++ b/sys/net/if_me.c
@@ -1,686 +1,686 @@
 /*-
  * Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/vnet.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_encap.h>
 
 #include <machine/in_cksum.h>
 #include <security/mac/mac_framework.h>
 
 #define	MEMTU			(1500 - sizeof(struct mobhdr))
 static const char mename[] = "me";
 static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP");
 /* Minimal forwarding header RFC 2004 */
 struct mobhdr {
 	uint8_t		mob_proto;	/* protocol */
 	uint8_t		mob_flags;	/* flags */
 #define	MOB_FLAGS_SP	0x80		/* source present */
 	uint16_t	mob_csum;	/* header checksum */
 	struct in_addr	mob_dst;	/* original destination address */
 	struct in_addr	mob_src;	/* original source addr (optional) */
 } __packed;
 
 struct me_softc {
 	struct ifnet		*me_ifp;
 	u_int			me_fibnum;
 	struct in_addr		me_src;
 	struct in_addr		me_dst;
 
 	CK_LIST_ENTRY(me_softc) chain;
 	CK_LIST_ENTRY(me_softc) srchash;
 };
 CK_LIST_HEAD(me_list, me_softc);
 #define	ME2IFP(sc)		((sc)->me_ifp)
 #define	ME_READY(sc)		((sc)->me_src.s_addr != 0)
 #define	ME_RLOCK_TRACKER	struct epoch_tracker me_et
 #define	ME_RLOCK()		epoch_enter_preempt(net_epoch_preempt, &me_et)
 #define	ME_RUNLOCK()		epoch_exit_preempt(net_epoch_preempt, &me_et)
 #define	ME_WAIT()		epoch_wait_preempt(net_epoch_preempt)
 
 #ifndef ME_HASH_SIZE
 #define	ME_HASH_SIZE	(1 << 4)
 #endif
 VNET_DEFINE_STATIC(struct me_list *, me_hashtbl) = NULL;
 VNET_DEFINE_STATIC(struct me_list *, me_srchashtbl) = NULL;
 #define	V_me_hashtbl		VNET(me_hashtbl)
 #define	V_me_srchashtbl		VNET(me_srchashtbl)
 #define	ME_HASH(src, dst)	(V_me_hashtbl[\
     me_hashval((src), (dst)) & (ME_HASH_SIZE - 1)])
 #define	ME_SRCHASH(src)		(V_me_srchashtbl[\
     fnv_32_buf(&(src), sizeof(src), FNV1_32_INIT) & (ME_HASH_SIZE - 1)])
 
 static struct sx me_ioctl_sx;
 SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl");
 
 static int	me_clone_create(struct if_clone *, int, caddr_t);
 static void	me_clone_destroy(struct ifnet *);
 VNET_DEFINE_STATIC(struct if_clone *, me_cloner);
 #define	V_me_cloner	VNET(me_cloner)
 
 #ifdef VIMAGE
 static void	me_reassign(struct ifnet *, struct vnet *, char *);
 #endif
 static void	me_qflush(struct ifnet *);
 static int	me_transmit(struct ifnet *, struct mbuf *);
 static int	me_ioctl(struct ifnet *, u_long, caddr_t);
 static int	me_output(struct ifnet *, struct mbuf *,
 		    const struct sockaddr *, struct route *);
 static int	me_input(struct mbuf *, int, int, void *);
 
 static int	me_set_tunnel(struct me_softc *, in_addr_t, in_addr_t);
 static void	me_delete_tunnel(struct me_softc *);
 
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Minimal Encapsulation for IP (RFC 2004)");
 #ifndef MAX_ME_NEST
 #define MAX_ME_NEST 1
 #endif
 
 VNET_DEFINE_STATIC(int, max_me_nesting) = MAX_ME_NEST;
 #define	V_max_me_nesting	VNET(max_me_nesting)
 SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
     &VNET_NAME(max_me_nesting), 0, "Max nested tunnels");
 
 static uint32_t
 me_hashval(in_addr_t src, in_addr_t dst)
 {
 	uint32_t ret;
 
 	ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT);
 	return (fnv_32_buf(&dst, sizeof(dst), ret));
 }
 
 static struct me_list *
 me_hashinit(void)
 {
 	struct me_list *hash;
 	int i;
 
 	hash = malloc(sizeof(struct me_list) * ME_HASH_SIZE,
 	    M_IFME, M_WAITOK);
 	for (i = 0; i < ME_HASH_SIZE; i++)
 		CK_LIST_INIT(&hash[i]);
 
 	return (hash);
 }
 
 static void
 vnet_me_init(const void *unused __unused)
 {
 
 	V_me_cloner = if_clone_simple(mename, me_clone_create,
 	    me_clone_destroy, 0);
 }
 VNET_SYSINIT(vnet_me_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_me_init, NULL);
 
 static void
 vnet_me_uninit(const void *unused __unused)
 {
 
 	if (V_me_hashtbl != NULL) {
 		free(V_me_hashtbl, M_IFME);
 		V_me_hashtbl = NULL;
 		ME_WAIT();
 		free(V_me_srchashtbl, M_IFME);
 	}
 	if_clone_detach(V_me_cloner);
 }
 VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_me_uninit, NULL);
 
 static int
 me_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct me_softc *sc;
 
 	sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO);
 	sc->me_fibnum = curthread->td_proc->p_fibnum;
 	ME2IFP(sc) = if_alloc(IFT_TUNNEL);
 	ME2IFP(sc)->if_softc = sc;
 	if_initname(ME2IFP(sc), mename, unit);
 
 	ME2IFP(sc)->if_mtu = MEMTU;
 	ME2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
 	ME2IFP(sc)->if_output = me_output;
 	ME2IFP(sc)->if_ioctl = me_ioctl;
 	ME2IFP(sc)->if_transmit = me_transmit;
 	ME2IFP(sc)->if_qflush = me_qflush;
 #ifdef VIMAGE
 	ME2IFP(sc)->if_reassign = me_reassign;
 #endif
 	ME2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
 	ME2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
 	if_attach(ME2IFP(sc));
 	bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t));
 	return (0);
 }
 
 #ifdef VIMAGE
 static void
 me_reassign(struct ifnet *ifp, struct vnet *new_vnet __unused,
     char *unused __unused)
 {
 	struct me_softc *sc;
 
 	sx_xlock(&me_ioctl_sx);
 	sc = ifp->if_softc;
 	if (sc != NULL)
 		me_delete_tunnel(sc);
 	sx_xunlock(&me_ioctl_sx);
 }
 #endif /* VIMAGE */
 
 static void
 me_clone_destroy(struct ifnet *ifp)
 {
 	struct me_softc *sc;
 
 	sx_xlock(&me_ioctl_sx);
 	sc = ifp->if_softc;
 	me_delete_tunnel(sc);
 	bpfdetach(ifp);
 	if_detach(ifp);
 	ifp->if_softc = NULL;
 	sx_xunlock(&me_ioctl_sx);
 
 	ME_WAIT();
 	if_free(ifp);
 	free(sc, M_IFME);
 }
 
 static int
 me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct sockaddr_in *src, *dst;
 	struct me_softc *sc;
 	int error;
 
 	switch (cmd) {
 	case SIOCSIFMTU:
 		if (ifr->ifr_mtu < 576)
 			return (EINVAL);
 		ifp->if_mtu = ifr->ifr_mtu;
 		return (0);
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 	case SIOCSIFFLAGS:
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		return (0);
 	}
 	sx_xlock(&me_ioctl_sx);
 	sc = ifp->if_softc;
 	if (sc == NULL) {
 		error = ENXIO;
 		goto end;
 	}
 	error = 0;
 	switch (cmd) {
 	case SIOCSIFPHYADDR:
 		src = &((struct in_aliasreq *)data)->ifra_addr;
 		dst = &((struct in_aliasreq *)data)->ifra_dstaddr;
 		if (src->sin_family != dst->sin_family ||
 		    src->sin_family != AF_INET ||
 		    src->sin_len != dst->sin_len ||
 		    src->sin_len != sizeof(struct sockaddr_in)) {
 			error = EINVAL;
 			break;
 		}
 		if (src->sin_addr.s_addr == INADDR_ANY ||
 		    dst->sin_addr.s_addr == INADDR_ANY) {
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		error = me_set_tunnel(sc, src->sin_addr.s_addr,
 		    dst->sin_addr.s_addr);
 		break;
 	case SIOCDIFPHYADDR:
 		me_delete_tunnel(sc);
 		break;
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
 		if (!ME_READY(sc)) {
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		src = (struct sockaddr_in *)&ifr->ifr_addr;
 		memset(src, 0, sizeof(*src));
 		src->sin_family = AF_INET;
 		src->sin_len = sizeof(*src);
 		switch (cmd) {
 		case SIOCGIFPSRCADDR:
 			src->sin_addr = sc->me_src;
 			break;
 		case SIOCGIFPDSTADDR:
 			src->sin_addr = sc->me_dst;
 			break;
 		}
 		error = prison_if(curthread->td_ucred, sintosa(src));
 		if (error != 0)
 			memset(src, 0, sizeof(*src));
 		break;
 	case SIOCGTUNFIB:
 		ifr->ifr_fib = sc->me_fibnum;
 		break;
 	case SIOCSTUNFIB:
 		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
 			break;
 		if (ifr->ifr_fib >= rt_numfibs)
 			error = EINVAL;
 		else
 			sc->me_fibnum = ifr->ifr_fib;
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 end:
 	sx_xunlock(&me_ioctl_sx);
 	return (error);
 }
 
 static int
 me_lookup(const struct mbuf *m, int off, int proto, void **arg)
 {
 	const struct ip *ip;
 	struct me_softc *sc;
 
 	if (V_me_hashtbl == NULL)
 		return (0);
 
 	NET_EPOCH_ASSERT();
 	ip = mtod(m, const struct ip *);
 	CK_LIST_FOREACH(sc, &ME_HASH(ip->ip_dst.s_addr,
 	    ip->ip_src.s_addr), chain) {
 		if (sc->me_src.s_addr == ip->ip_dst.s_addr &&
 		    sc->me_dst.s_addr == ip->ip_src.s_addr) {
 			if ((ME2IFP(sc)->if_flags & IFF_UP) == 0)
 				return (0);
 			*arg = sc;
 			return (ENCAP_DRV_LOOKUP);
 		}
 	}
 	return (0);
 }
 
 /*
  * Check that ingress address belongs to local host.
  */
 static void
 me_set_running(struct me_softc *sc)
 {
 
 	if (in_localip(sc->me_src))
 		ME2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING;
 	else
 		ME2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 }
 
 /*
  * ifaddr_event handler.
  * Clear IFF_DRV_RUNNING flag when ingress address disappears to prevent
  * source address spoofing.
  */
 static void
 me_srcaddr(void *arg __unused, const struct sockaddr *sa,
     int event __unused)
 {
 	const struct sockaddr_in *sin;
 	struct me_softc *sc;
 
 	/* Check that VNET is ready */
 	if (V_me_hashtbl == NULL)
 		return;
 
 	NET_EPOCH_ASSERT();
 	sin = (const struct sockaddr_in *)sa;
 	CK_LIST_FOREACH(sc, &ME_SRCHASH(sin->sin_addr.s_addr), srchash) {
 		if (sc->me_src.s_addr != sin->sin_addr.s_addr)
 			continue;
 		me_set_running(sc);
 	}
 }
 
 static int
 me_set_tunnel(struct me_softc *sc, in_addr_t src, in_addr_t dst)
 {
 	struct me_softc *tmp;
 
 	sx_assert(&me_ioctl_sx, SA_XLOCKED);
 
 	if (V_me_hashtbl == NULL) {
 		V_me_hashtbl = me_hashinit();
 		V_me_srchashtbl = me_hashinit();
 	}
 
 	if (sc->me_src.s_addr == src && sc->me_dst.s_addr == dst)
 		return (0);
 
 	CK_LIST_FOREACH(tmp, &ME_HASH(src, dst), chain) {
 		if (tmp == sc)
 			continue;
 		if (tmp->me_src.s_addr == src &&
 		    tmp->me_dst.s_addr == dst)
 			return (EADDRNOTAVAIL);
 	}
 
 	me_delete_tunnel(sc);
 	sc->me_dst.s_addr = dst;
 	sc->me_src.s_addr = src;
 	CK_LIST_INSERT_HEAD(&ME_HASH(src, dst), sc, chain);
 	CK_LIST_INSERT_HEAD(&ME_SRCHASH(src), sc, srchash);
 
 	me_set_running(sc);
 	if_link_state_change(ME2IFP(sc), LINK_STATE_UP);
 	return (0);
 }
 
 static void
 me_delete_tunnel(struct me_softc *sc)
 {
 
 	sx_assert(&me_ioctl_sx, SA_XLOCKED);
 	if (ME_READY(sc)) {
 		CK_LIST_REMOVE(sc, chain);
 		CK_LIST_REMOVE(sc, srchash);
 		ME_WAIT();
 
 		sc->me_src.s_addr = 0;
 		sc->me_dst.s_addr = 0;
 		ME2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 		if_link_state_change(ME2IFP(sc), LINK_STATE_DOWN);
 	}
 }
 
 static uint16_t
 me_in_cksum(uint16_t *p, int nwords)
 {
 	uint32_t sum = 0;
 
 	while (nwords-- > 0)
 		sum += *p++;
 	sum = (sum >> 16) + (sum & 0xffff);
 	sum += (sum >> 16);
 	return (~sum);
 }
 
 static int
 me_input(struct mbuf *m, int off, int proto, void *arg)
 {
 	struct me_softc *sc = arg;
 	struct mobhdr *mh;
 	struct ifnet *ifp;
 	struct ip *ip;
 	int hlen;
 
 	NET_EPOCH_ASSERT();
 
 	ifp = ME2IFP(sc);
 	/* checks for short packets */
 	hlen = sizeof(struct mobhdr);
 	if (m->m_pkthdr.len < sizeof(struct ip) + hlen)
 		hlen -= sizeof(struct in_addr);
 	if (m->m_len < sizeof(struct ip) + hlen)
 		m = m_pullup(m, sizeof(struct ip) + hlen);
 	if (m == NULL)
 		goto drop;
 	mh = (struct mobhdr *)mtodo(m, sizeof(struct ip));
 	/* check for wrong flags */
 	if (mh->mob_flags & (~MOB_FLAGS_SP)) {
 		m_freem(m);
 		goto drop;
 	}
 	if (mh->mob_flags) {
 	       if (hlen != sizeof(struct mobhdr)) {
 			m_freem(m);
 			goto drop;
 	       }
 	} else
 		hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
 	/* check mobile header checksum */
 	if (me_in_cksum((uint16_t *)mh, hlen / sizeof(uint16_t)) != 0) {
 		m_freem(m);
 		goto drop;
 	}
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 	ip = mtod(m, struct ip *);
 	ip->ip_dst = mh->mob_dst;
 	ip->ip_p = mh->mob_proto;
 	ip->ip_sum = 0;
 	ip->ip_len = htons(m->m_pkthdr.len - hlen);
 	if (mh->mob_flags)
 		ip->ip_src = mh->mob_src;
 	memmove(mtodo(m, hlen), ip, sizeof(struct ip));
 	m_adj(m, hlen);
 	m_clrprotoflags(m);
 	m->m_pkthdr.rcvif = ifp;
 	m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID);
 	M_SETFIB(m, ifp->if_fib);
 	hlen = AF_INET;
 	BPF_MTAP2(ifp, &hlen, sizeof(hlen), m);
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	if ((ifp->if_flags & IFF_MONITOR) != 0)
 		m_freem(m);
 	else
 		netisr_dispatch(NETISR_IP, m);
 	return (IPPROTO_DONE);
 drop:
 	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 	return (IPPROTO_DONE);
 }
 
 static int
 me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
-   struct route *ro __unused)
+   struct route *ro)
 {
 	uint32_t af;
 
 	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
-		af = dst->sa_family;
+		af = RO_GET_FAMILY(ro, dst);
 	m->m_pkthdr.csum_data = af;
 	return (ifp->if_transmit(ifp, m));
 }
 
 #define	MTAG_ME	1414491977
 static int
 me_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	ME_RLOCK_TRACKER;
 	struct mobhdr mh;
 	struct me_softc *sc;
 	struct ip *ip;
 	uint32_t af;
 	int error, hlen, plen;
 
 	ME_RLOCK();
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error != 0)
 		goto drop;
 #endif
 	error = ENETDOWN;
 	sc = ifp->if_softc;
 	if (sc == NULL || !ME_READY(sc) ||
 	    (ifp->if_flags & IFF_MONITOR) != 0 ||
 	    (ifp->if_flags & IFF_UP) == 0 ||
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    (error = if_tunnel_check_nesting(ifp, m, MTAG_ME,
 		V_max_me_nesting)) != 0) {
 		m_freem(m);
 		goto drop;
 	}
 	af = m->m_pkthdr.csum_data;
 	if (af != AF_INET) {
 		error = EAFNOSUPPORT;
 		m_freem(m);
 		goto drop;
 	}
 	if (m->m_len < sizeof(struct ip))
 		m = m_pullup(m, sizeof(struct ip));
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto drop;
 	}
 	ip = mtod(m, struct ip *);
 	/* Fragmented datagramms shouldn't be encapsulated */
 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
 		error = EINVAL;
 		m_freem(m);
 		goto drop;
 	}
 	mh.mob_proto = ip->ip_p;
 	mh.mob_src = ip->ip_src;
 	mh.mob_dst = ip->ip_dst;
 	if (in_hosteq(sc->me_src, ip->ip_src)) {
 		hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
 		mh.mob_flags = 0;
 	} else {
 		hlen = sizeof(struct mobhdr);
 		mh.mob_flags = MOB_FLAGS_SP;
 	}
 	BPF_MTAP2(ifp, &af, sizeof(af), m);
 	plen = m->m_pkthdr.len;
 	ip->ip_src = sc->me_src;
 	ip->ip_dst = sc->me_dst;
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 	M_SETFIB(m, sc->me_fibnum);
 	M_PREPEND(m, hlen, M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto drop;
 	}
 	if (m->m_len < sizeof(struct ip) + hlen)
 		m = m_pullup(m, sizeof(struct ip) + hlen);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto drop;
 	}
 	memmove(mtod(m, void *), mtodo(m, hlen), sizeof(struct ip));
 	ip = mtod(m, struct ip *);
 	ip->ip_len = htons(m->m_pkthdr.len);
 	ip->ip_p = IPPROTO_MOBILE;
 	ip->ip_sum = 0;
 	mh.mob_csum = 0;
 	mh.mob_csum = me_in_cksum((uint16_t *)&mh, hlen / sizeof(uint16_t));
 	bcopy(&mh, mtodo(m, sizeof(struct ip)), hlen);
 	error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
 drop:
 	if (error)
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	else {
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
 	}
 	ME_RUNLOCK();
 	return (error);
 }
 
 static void
 me_qflush(struct ifnet *ifp __unused)
 {
 
 }
 
 static const struct srcaddrtab *me_srcaddrtab = NULL;
 static const struct encaptab *ecookie = NULL;
 static const struct encap_config me_encap_cfg = {
 	.proto = IPPROTO_MOBILE,
 	.min_length = sizeof(struct ip) + sizeof(struct mobhdr) -
 	    sizeof(in_addr_t),
 	.exact_match = ENCAP_DRV_LOOKUP,
 	.lookup = me_lookup,
 	.input = me_input
 };
 
 static int
 memodevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		me_srcaddrtab = ip_encap_register_srcaddr(me_srcaddr,
 		    NULL, M_WAITOK);
 		ecookie = ip_encap_attach(&me_encap_cfg, NULL, M_WAITOK);
 		break;
 	case MOD_UNLOAD:
 		ip_encap_detach(ecookie);
 		ip_encap_unregister_srcaddr(me_srcaddrtab);
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t me_mod = {
 	"if_me",
 	memodevent,
 	0
 };
 
 DECLARE_MODULE(if_me, me_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_me, 1);
diff --git a/sys/net/if_spppsubr.c b/sys/net/if_spppsubr.c
index fbf7b0ea8f4c..804367025532 100644
--- a/sys/net/if_spppsubr.c
+++ b/sys/net/if_spppsubr.c
@@ -1,5417 +1,5418 @@
 /*
  * Synchronous PPP/Cisco/Frame Relay link level subroutines.
  * Keepalive protocol implemented in both Cisco and PPP modes.
  */
 /*-
  * Copyright (C) 1994-2000 Cronyx Engineering.
  * Author: Serge Vakulenko, <vak@cronyx.ru>
  *
  * Heavily revamped to conform to RFC 1661.
  * Copyright (C) 1997, 2001 Joerg Wunsch.
  *
  * This software is distributed with NO WARRANTIES, not even the implied
  * warranties for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  *
  * Authors grant any other persons or organisations permission to use
  * or modify this software as long as this message is kept with the software,
  * all derivative works or modified versions.
  *
  * From: Version 2.4, Thu Apr 30 17:17:21 MSD 1997
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/rmlock.h>
 #include <sys/sockio.h>
 #include <sys/socket.h>
 #include <sys/syslog.h>
 #include <sys/random.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 
 #include <sys/md5.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/vnet.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <net/slcompress.h>
 
 #include <machine/stdarg.h>
 
 #include <netinet/in_var.h>
 
 #ifdef INET
 #include <netinet/ip.h>
 #include <netinet/tcp.h>
 #endif
 
 #ifdef INET6
 #include <netinet6/scope6_var.h>
 #endif
 
 #include <netinet/if_ether.h>
 
 #include <net/if_sppp.h>
 
 #define IOCTL_CMD_T	u_long
 #define MAXALIVECNT     3               /* max. alive packets */
 
 /*
  * Interface flags that can be set in an ifconfig command.
  *
  * Setting link0 will make the link passive, i.e. it will be marked
  * as being administrative openable, but won't be opened to begin
  * with.  Incoming calls will be answered, or subsequent calls with
  * -link1 will cause the administrative open of the LCP layer.
  *
  * Setting link1 will cause the link to auto-dial only as packets
  * arrive to be sent.
  *
  * Setting IFF_DEBUG will syslog the option negotiation and state
  * transitions at level kern.debug.  Note: all logs consistently look
  * like
  *
  *   <if-name><unit>: <proto-name> <additional info...>
  *
  * with <if-name><unit> being something like "bppp0", and <proto-name>
  * being one of "lcp", "ipcp", "cisco", "chap", "pap", etc.
  */
 
 #define IFF_PASSIVE	IFF_LINK0	/* wait passively for connection */
 #define IFF_AUTO	IFF_LINK1	/* auto-dial on output */
 #define IFF_CISCO	IFF_LINK2	/* auto-dial on output */
 
 #define PPP_ALLSTATIONS 0xff		/* All-Stations broadcast address */
 #define PPP_UI		0x03		/* Unnumbered Information */
 #define PPP_IP		0x0021		/* Internet Protocol */
 #define PPP_ISO		0x0023		/* ISO OSI Protocol */
 #define PPP_XNS		0x0025		/* Xerox NS Protocol */
 #define PPP_IPX		0x002b		/* Novell IPX Protocol */
 #define PPP_VJ_COMP	0x002d		/* VJ compressed TCP/IP */
 #define PPP_VJ_UCOMP	0x002f		/* VJ uncompressed TCP/IP */
 #define PPP_IPV6	0x0057		/* Internet Protocol Version 6 */
 #define PPP_LCP		0xc021		/* Link Control Protocol */
 #define PPP_PAP		0xc023		/* Password Authentication Protocol */
 #define PPP_CHAP	0xc223		/* Challenge-Handshake Auth Protocol */
 #define PPP_IPCP	0x8021		/* Internet Protocol Control Protocol */
 #define PPP_IPV6CP	0x8057		/* IPv6 Control Protocol */
 
 #define CONF_REQ	1		/* PPP configure request */
 #define CONF_ACK	2		/* PPP configure acknowledge */
 #define CONF_NAK	3		/* PPP configure negative ack */
 #define CONF_REJ	4		/* PPP configure reject */
 #define TERM_REQ	5		/* PPP terminate request */
 #define TERM_ACK	6		/* PPP terminate acknowledge */
 #define CODE_REJ	7		/* PPP code reject */
 #define PROTO_REJ	8		/* PPP protocol reject */
 #define ECHO_REQ	9		/* PPP echo request */
 #define ECHO_REPLY	10		/* PPP echo reply */
 #define DISC_REQ	11		/* PPP discard request */
 
 #define LCP_OPT_MRU		1	/* maximum receive unit */
 #define LCP_OPT_ASYNC_MAP	2	/* async control character map */
 #define LCP_OPT_AUTH_PROTO	3	/* authentication protocol */
 #define LCP_OPT_QUAL_PROTO	4	/* quality protocol */
 #define LCP_OPT_MAGIC		5	/* magic number */
 #define LCP_OPT_RESERVED	6	/* reserved */
 #define LCP_OPT_PROTO_COMP	7	/* protocol field compression */
 #define LCP_OPT_ADDR_COMP	8	/* address/control field compression */
 
 #define IPCP_OPT_ADDRESSES	1	/* both IP addresses; deprecated */
 #define IPCP_OPT_COMPRESSION	2	/* IP compression protocol (VJ) */
 #define IPCP_OPT_ADDRESS	3	/* local IP address */
 
 #define IPV6CP_OPT_IFID	1	/* interface identifier */
 #define IPV6CP_OPT_COMPRESSION	2	/* IPv6 compression protocol */
 
 #define IPCP_COMP_VJ		0x2d	/* Code for VJ compression */
 
 #define PAP_REQ			1	/* PAP name/password request */
 #define PAP_ACK			2	/* PAP acknowledge */
 #define PAP_NAK			3	/* PAP fail */
 
 #define CHAP_CHALLENGE		1	/* CHAP challenge request */
 #define CHAP_RESPONSE		2	/* CHAP challenge response */
 #define CHAP_SUCCESS		3	/* CHAP response ok */
 #define CHAP_FAILURE		4	/* CHAP response failed */
 
 #define CHAP_MD5		5	/* hash algorithm - MD5 */
 
 #define CISCO_MULTICAST		0x8f	/* Cisco multicast address */
 #define CISCO_UNICAST		0x0f	/* Cisco unicast address */
 #define CISCO_KEEPALIVE		0x8035	/* Cisco keepalive protocol */
 #define CISCO_ADDR_REQ		0	/* Cisco address request */
 #define CISCO_ADDR_REPLY	1	/* Cisco address reply */
 #define CISCO_KEEPALIVE_REQ	2	/* Cisco keepalive request */
 
 /* states are named and numbered according to RFC 1661 */
 #define STATE_INITIAL	0
 #define STATE_STARTING	1
 #define STATE_CLOSED	2
 #define STATE_STOPPED	3
 #define STATE_CLOSING	4
 #define STATE_STOPPING	5
 #define STATE_REQ_SENT	6
 #define STATE_ACK_RCVD	7
 #define STATE_ACK_SENT	8
 #define STATE_OPENED	9
 
 static MALLOC_DEFINE(M_SPPP, "sppp", "synchronous PPP interface internals");
 
 struct ppp_header {
 	u_char address;
 	u_char control;
 	u_short protocol;
 } __packed;
 #define PPP_HEADER_LEN          sizeof (struct ppp_header)
 
 struct lcp_header {
 	u_char type;
 	u_char ident;
 	u_short len;
 } __packed;
 #define LCP_HEADER_LEN          sizeof (struct lcp_header)
 
 struct cisco_packet {
 	u_long type;
 	u_long par1;
 	u_long par2;
 	u_short rel;
 	u_short time0;
 	u_short time1;
 } __packed;
 #define CISCO_PACKET_LEN	sizeof (struct cisco_packet)
 
 /*
  * We follow the spelling and capitalization of RFC 1661 here, to make
  * it easier comparing with the standard.  Please refer to this RFC in
  * case you can't make sense out of these abbreviation; it will also
  * explain the semantics related to the various events and actions.
  */
 struct cp {
 	u_short	proto;		/* PPP control protocol number */
 	u_char protoidx;	/* index into state table in struct sppp */
 	u_char flags;
 #define CP_LCP		0x01	/* this is the LCP */
 #define CP_AUTH		0x02	/* this is an authentication protocol */
 #define CP_NCP		0x04	/* this is a NCP */
 #define CP_QUAL		0x08	/* this is a quality reporting protocol */
 	const char *name;	/* name of this control protocol */
 	/* event handlers */
 	void	(*Up)(struct sppp *sp);
 	void	(*Down)(struct sppp *sp);
 	void	(*Open)(struct sppp *sp);
 	void	(*Close)(struct sppp *sp);
 	void	(*TO)(void *sp);
 	int	(*RCR)(struct sppp *sp, struct lcp_header *h, int len);
 	void	(*RCN_rej)(struct sppp *sp, struct lcp_header *h, int len);
 	void	(*RCN_nak)(struct sppp *sp, struct lcp_header *h, int len);
 	/* actions */
 	void	(*tlu)(struct sppp *sp);
 	void	(*tld)(struct sppp *sp);
 	void	(*tls)(struct sppp *sp);
 	void	(*tlf)(struct sppp *sp);
 	void	(*scr)(struct sppp *sp);
 };
 
 #define	SPP_FMT		"%s: "
 #define	SPP_ARGS(ifp)	(ifp)->if_xname
 
 #define SPPP_LOCK(sp)	mtx_lock (&(sp)->mtx)
 #define SPPP_UNLOCK(sp)	mtx_unlock (&(sp)->mtx)
 #define SPPP_LOCK_ASSERT(sp)	mtx_assert (&(sp)->mtx, MA_OWNED)
 #define SPPP_LOCK_OWNED(sp)	mtx_owned (&(sp)->mtx)
 
 #ifdef INET
 /*
  * The following disgusting hack gets around the problem that IP TOS
  * can't be set yet.  We want to put "interactive" traffic on a high
  * priority queue.  To decide if traffic is interactive, we check that
  * a) it is TCP and b) one of its ports is telnet, rlogin or ftp control.
  *
  * XXX is this really still necessary?  - joerg -
  */
 static const u_short interactive_ports[8] = {
 	0,	513,	0,	0,
 	0,	21,	0,	23,
 };
 #define INTERACTIVE(p) (interactive_ports[(p) & 7] == (p))
 #endif
 
 /* almost every function needs these */
 #define STDDCL							\
 	struct ifnet *ifp = SP2IFP(sp);				\
 	int debug = ifp->if_flags & IFF_DEBUG
 
 static int sppp_output(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro);
 
 static void sppp_cisco_send(struct sppp *sp, int type, long par1, long par2);
 static void sppp_cisco_input(struct sppp *sp, struct mbuf *m);
 
 static void sppp_cp_input(const struct cp *cp, struct sppp *sp,
 			  struct mbuf *m);
 static void sppp_cp_send(struct sppp *sp, u_short proto, u_char type,
 			 u_char ident, u_short len, void *data);
 /* static void sppp_cp_timeout(void *arg); */
 static void sppp_cp_change_state(const struct cp *cp, struct sppp *sp,
 				 int newstate);
 static void sppp_auth_send(const struct cp *cp,
 			   struct sppp *sp, unsigned int type, unsigned int id,
 			   ...);
 
 static void sppp_up_event(const struct cp *cp, struct sppp *sp);
 static void sppp_down_event(const struct cp *cp, struct sppp *sp);
 static void sppp_open_event(const struct cp *cp, struct sppp *sp);
 static void sppp_close_event(const struct cp *cp, struct sppp *sp);
 static void sppp_to_event(const struct cp *cp, struct sppp *sp);
 
 static void sppp_null(struct sppp *sp);
 
 static void sppp_pp_up(struct sppp *sp);
 static void sppp_pp_down(struct sppp *sp);
 
 static void sppp_lcp_init(struct sppp *sp);
 static void sppp_lcp_up(struct sppp *sp);
 static void sppp_lcp_down(struct sppp *sp);
 static void sppp_lcp_open(struct sppp *sp);
 static void sppp_lcp_close(struct sppp *sp);
 static void sppp_lcp_TO(void *sp);
 static int sppp_lcp_RCR(struct sppp *sp, struct lcp_header *h, int len);
 static void sppp_lcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len);
 static void sppp_lcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len);
 static void sppp_lcp_tlu(struct sppp *sp);
 static void sppp_lcp_tld(struct sppp *sp);
 static void sppp_lcp_tls(struct sppp *sp);
 static void sppp_lcp_tlf(struct sppp *sp);
 static void sppp_lcp_scr(struct sppp *sp);
 static void sppp_lcp_check_and_close(struct sppp *sp);
 static int sppp_ncp_check(struct sppp *sp);
 
 static void sppp_ipcp_init(struct sppp *sp);
 static void sppp_ipcp_up(struct sppp *sp);
 static void sppp_ipcp_down(struct sppp *sp);
 static void sppp_ipcp_open(struct sppp *sp);
 static void sppp_ipcp_close(struct sppp *sp);
 static void sppp_ipcp_TO(void *sp);
 static int sppp_ipcp_RCR(struct sppp *sp, struct lcp_header *h, int len);
 static void sppp_ipcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len);
 static void sppp_ipcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len);
 static void sppp_ipcp_tlu(struct sppp *sp);
 static void sppp_ipcp_tld(struct sppp *sp);
 static void sppp_ipcp_tls(struct sppp *sp);
 static void sppp_ipcp_tlf(struct sppp *sp);
 static void sppp_ipcp_scr(struct sppp *sp);
 
 static void sppp_ipv6cp_init(struct sppp *sp);
 static void sppp_ipv6cp_up(struct sppp *sp);
 static void sppp_ipv6cp_down(struct sppp *sp);
 static void sppp_ipv6cp_open(struct sppp *sp);
 static void sppp_ipv6cp_close(struct sppp *sp);
 static void sppp_ipv6cp_TO(void *sp);
 static int sppp_ipv6cp_RCR(struct sppp *sp, struct lcp_header *h, int len);
 static void sppp_ipv6cp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len);
 static void sppp_ipv6cp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len);
 static void sppp_ipv6cp_tlu(struct sppp *sp);
 static void sppp_ipv6cp_tld(struct sppp *sp);
 static void sppp_ipv6cp_tls(struct sppp *sp);
 static void sppp_ipv6cp_tlf(struct sppp *sp);
 static void sppp_ipv6cp_scr(struct sppp *sp);
 
 static void sppp_pap_input(struct sppp *sp, struct mbuf *m);
 static void sppp_pap_init(struct sppp *sp);
 static void sppp_pap_open(struct sppp *sp);
 static void sppp_pap_close(struct sppp *sp);
 static void sppp_pap_TO(void *sp);
 static void sppp_pap_my_TO(void *sp);
 static void sppp_pap_tlu(struct sppp *sp);
 static void sppp_pap_tld(struct sppp *sp);
 static void sppp_pap_scr(struct sppp *sp);
 
 static void sppp_chap_input(struct sppp *sp, struct mbuf *m);
 static void sppp_chap_init(struct sppp *sp);
 static void sppp_chap_open(struct sppp *sp);
 static void sppp_chap_close(struct sppp *sp);
 static void sppp_chap_TO(void *sp);
 static void sppp_chap_tlu(struct sppp *sp);
 static void sppp_chap_tld(struct sppp *sp);
 static void sppp_chap_scr(struct sppp *sp);
 
 static const char *sppp_auth_type_name(u_short proto, u_char type);
 static const char *sppp_cp_type_name(u_char type);
 #ifdef INET
 static const char *sppp_dotted_quad(u_long addr);
 static const char *sppp_ipcp_opt_name(u_char opt);
 #endif
 #ifdef INET6
 static const char *sppp_ipv6cp_opt_name(u_char opt);
 #endif
 static const char *sppp_lcp_opt_name(u_char opt);
 static const char *sppp_phase_name(enum ppp_phase phase);
 static const char *sppp_proto_name(u_short proto);
 static const char *sppp_state_name(int state);
 static int sppp_params(struct sppp *sp, u_long cmd, void *data);
 static int sppp_strnlen(u_char *p, int max);
 static void sppp_keepalive(void *dummy);
 static void sppp_phase_network(struct sppp *sp);
 static void sppp_print_bytes(const u_char *p, u_short len);
 static void sppp_print_string(const char *p, u_short len);
 static void sppp_qflush(struct ifqueue *ifq);
 #ifdef INET
 static void sppp_set_ip_addr(struct sppp *sp, u_long src);
 #endif
 #ifdef INET6
 static void sppp_get_ip6_addrs(struct sppp *sp, struct in6_addr *src,
 			       struct in6_addr *dst, struct in6_addr *srcmask);
 #ifdef IPV6CP_MYIFID_DYN
 static void sppp_set_ip6_addr(struct sppp *sp, const struct in6_addr *src);
 static void sppp_gen_ip6_addr(struct sppp *sp, const struct in6_addr *src);
 #endif
 static void sppp_suggest_ip6_addr(struct sppp *sp, struct in6_addr *src);
 #endif
 
 /* if_start () wrapper */
 static void sppp_ifstart (struct ifnet *ifp);
 
 /* our control protocol descriptors */
 static const struct cp lcp = {
 	PPP_LCP, IDX_LCP, CP_LCP, "lcp",
 	sppp_lcp_up, sppp_lcp_down, sppp_lcp_open, sppp_lcp_close,
 	sppp_lcp_TO, sppp_lcp_RCR, sppp_lcp_RCN_rej, sppp_lcp_RCN_nak,
 	sppp_lcp_tlu, sppp_lcp_tld, sppp_lcp_tls, sppp_lcp_tlf,
 	sppp_lcp_scr
 };
 
 static const struct cp ipcp = {
 	PPP_IPCP, IDX_IPCP,
 #ifdef INET	/* don't run IPCP if there's no IPv4 support */
 	CP_NCP,
 #else
 	0,
 #endif
 	"ipcp",
 	sppp_ipcp_up, sppp_ipcp_down, sppp_ipcp_open, sppp_ipcp_close,
 	sppp_ipcp_TO, sppp_ipcp_RCR, sppp_ipcp_RCN_rej, sppp_ipcp_RCN_nak,
 	sppp_ipcp_tlu, sppp_ipcp_tld, sppp_ipcp_tls, sppp_ipcp_tlf,
 	sppp_ipcp_scr
 };
 
 static const struct cp ipv6cp = {
 	PPP_IPV6CP, IDX_IPV6CP,
 #ifdef INET6	/*don't run IPv6CP if there's no IPv6 support*/
 	CP_NCP,
 #else
 	0,
 #endif
 	"ipv6cp",
 	sppp_ipv6cp_up, sppp_ipv6cp_down, sppp_ipv6cp_open, sppp_ipv6cp_close,
 	sppp_ipv6cp_TO, sppp_ipv6cp_RCR, sppp_ipv6cp_RCN_rej, sppp_ipv6cp_RCN_nak,
 	sppp_ipv6cp_tlu, sppp_ipv6cp_tld, sppp_ipv6cp_tls, sppp_ipv6cp_tlf,
 	sppp_ipv6cp_scr
 };
 
 static const struct cp pap = {
 	PPP_PAP, IDX_PAP, CP_AUTH, "pap",
 	sppp_null, sppp_null, sppp_pap_open, sppp_pap_close,
 	sppp_pap_TO, 0, 0, 0,
 	sppp_pap_tlu, sppp_pap_tld, sppp_null, sppp_null,
 	sppp_pap_scr
 };
 
 static const struct cp chap = {
 	PPP_CHAP, IDX_CHAP, CP_AUTH, "chap",
 	sppp_null, sppp_null, sppp_chap_open, sppp_chap_close,
 	sppp_chap_TO, 0, 0, 0,
 	sppp_chap_tlu, sppp_chap_tld, sppp_null, sppp_null,
 	sppp_chap_scr
 };
 
 static const struct cp *cps[IDX_COUNT] = {
 	&lcp,			/* IDX_LCP */
 	&ipcp,			/* IDX_IPCP */
 	&ipv6cp,		/* IDX_IPV6CP */
 	&pap,			/* IDX_PAP */
 	&chap,			/* IDX_CHAP */
 };
 
 static void*
 sppp_alloc(u_char type, struct ifnet *ifp)
 {
 	struct sppp	*sp;
 
         sp = malloc(sizeof(struct sppp), M_SPPP, M_WAITOK | M_ZERO);
 	sp->pp_ifp = ifp;
 
 	return (sp);
 }
 
 static void
 sppp_free(void *com, u_char type)
 {
 
 	free(com, M_SPPP);
 }
 
 static int
 sppp_modevent(module_t mod, int type, void *unused)
 {
 	switch (type) {
 	case MOD_LOAD:
 		/*
 		 * XXX: should probably be IFT_SPPP, but it's fairly
 		 * harmless to allocate struct sppp's for non-sppp
 		 * interfaces.
 		 */
 
 		if_register_com_alloc(IFT_PPP, sppp_alloc, sppp_free);
 		break;
 	case MOD_UNLOAD:
 		/* if_deregister_com_alloc(IFT_PPP); */
 		return EACCES;
 	default:
 		return EOPNOTSUPP;
 	}
 	return 0;
 }
 static moduledata_t spppmod = {
 	"sppp",
 	sppp_modevent,
 	0
 };
 MODULE_VERSION(sppp, 1);
 DECLARE_MODULE(sppp, spppmod, SI_SUB_DRIVERS, SI_ORDER_ANY);
 
 /*
  * Exported functions, comprising our interface to the lower layer.
  */
 
 /*
  * Process the received packet.
  */
 void
 sppp_input(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ppp_header *h;
 	int isr = -1;
 	struct sppp *sp = IFP2SP(ifp);
 	int debug, do_account = 0;
 #ifdef INET
 	int hlen, vjlen;
 	u_char *iphdr;
 #endif
 
 	SPPP_LOCK(sp);
 	debug = ifp->if_flags & IFF_DEBUG;
 
 	if (ifp->if_flags & IFF_UP)
 		/* Count received bytes, add FCS and one flag */
 		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len + 3);
 
 	if (m->m_pkthdr.len <= PPP_HEADER_LEN) {
 		/* Too small packet, drop it. */
 		if (debug)
 			log(LOG_DEBUG,
 			    SPP_FMT "input packet is too small, %d bytes\n",
 			    SPP_ARGS(ifp), m->m_pkthdr.len);
 	  drop:
 		m_freem (m);
 		SPPP_UNLOCK(sp);
 	  drop2:
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 		return;
 	}
 
 	if (sp->pp_mode == PP_FR) {
 		sppp_fr_input (sp, m);
 		SPPP_UNLOCK(sp);
 		return;
 	}
 
 	/* Get PPP header. */
 	h = mtod (m, struct ppp_header*);
 	m_adj (m, PPP_HEADER_LEN);
 
 	switch (h->address) {
 	case PPP_ALLSTATIONS:
 		if (h->control != PPP_UI)
 			goto invalid;
 		if (sp->pp_mode == IFF_CISCO) {
 			if (debug)
 				log(LOG_DEBUG,
 				    SPP_FMT "PPP packet in Cisco mode "
 				    "<addr=0x%x ctrl=0x%x proto=0x%x>\n",
 				    SPP_ARGS(ifp),
 				    h->address, h->control, ntohs(h->protocol));
 			goto drop;
 		}
 		switch (ntohs (h->protocol)) {
 		default:
 			if (debug)
 				log(LOG_DEBUG,
 				    SPP_FMT "rejecting protocol "
 				    "<addr=0x%x ctrl=0x%x proto=0x%x>\n",
 				    SPP_ARGS(ifp),
 				    h->address, h->control, ntohs(h->protocol));
 			if (sp->state[IDX_LCP] == STATE_OPENED)
 				sppp_cp_send (sp, PPP_LCP, PROTO_REJ,
 					++sp->pp_seq[IDX_LCP], m->m_pkthdr.len + 2,
 					&h->protocol);
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto drop;
 		case PPP_LCP:
 			sppp_cp_input(&lcp, sp, m);
 			m_freem (m);
 			SPPP_UNLOCK(sp);
 			return;
 		case PPP_PAP:
 			if (sp->pp_phase >= PHASE_AUTHENTICATE)
 				sppp_pap_input(sp, m);
 			m_freem (m);
 			SPPP_UNLOCK(sp);
 			return;
 		case PPP_CHAP:
 			if (sp->pp_phase >= PHASE_AUTHENTICATE)
 				sppp_chap_input(sp, m);
 			m_freem (m);
 			SPPP_UNLOCK(sp);
 			return;
 #ifdef INET
 		case PPP_IPCP:
 			if (sp->pp_phase == PHASE_NETWORK)
 				sppp_cp_input(&ipcp, sp, m);
 			m_freem (m);
 			SPPP_UNLOCK(sp);
 			return;
 		case PPP_IP:
 			if (sp->state[IDX_IPCP] == STATE_OPENED) {
 				isr = NETISR_IP;
 			}
 			do_account++;
 			break;
 		case PPP_VJ_COMP:
 			if (sp->state[IDX_IPCP] == STATE_OPENED) {
 				if ((vjlen =
 				     sl_uncompress_tcp_core(mtod(m, u_char *),
 							    m->m_len, m->m_len,
 							    TYPE_COMPRESSED_TCP,
 							    sp->pp_comp,
 							    &iphdr, &hlen)) <= 0) {
 					if (debug)
 						log(LOG_INFO,
 			    SPP_FMT "VJ uncompress failed on compressed packet\n",
 						    SPP_ARGS(ifp));
 					goto drop;
 				}
 
 				/*
 				 * Trim the VJ header off the packet, and prepend
 				 * the uncompressed IP header (which will usually
 				 * end up in two chained mbufs since there's not
 				 * enough leading space in the existing mbuf).
 				 */
 				m_adj(m, vjlen);
 				M_PREPEND(m, hlen, M_NOWAIT);
 				if (m == NULL) {
 					SPPP_UNLOCK(sp);
 					goto drop2;
 				}
 				bcopy(iphdr, mtod(m, u_char *), hlen);
 				isr = NETISR_IP;
 			}
 			do_account++;
 			break;
 		case PPP_VJ_UCOMP:
 			if (sp->state[IDX_IPCP] == STATE_OPENED) {
 				if (sl_uncompress_tcp_core(mtod(m, u_char *),
 							   m->m_len, m->m_len,
 							   TYPE_UNCOMPRESSED_TCP,
 							   sp->pp_comp,
 							   &iphdr, &hlen) != 0) {
 					if (debug)
 						log(LOG_INFO,
 			    SPP_FMT "VJ uncompress failed on uncompressed packet\n",
 						    SPP_ARGS(ifp));
 					goto drop;
 				}
 				isr = NETISR_IP;
 			}
 			do_account++;
 			break;
 #endif
 #ifdef INET6
 		case PPP_IPV6CP:
 			if (sp->pp_phase == PHASE_NETWORK)
 			    sppp_cp_input(&ipv6cp, sp, m);
 			m_freem (m);
 			SPPP_UNLOCK(sp);
 			return;
 
 		case PPP_IPV6:
 			if (sp->state[IDX_IPV6CP] == STATE_OPENED)
 				isr = NETISR_IPV6;
 			do_account++;
 			break;
 #endif
 		}
 		break;
 	case CISCO_MULTICAST:
 	case CISCO_UNICAST:
 		/* Don't check the control field here (RFC 1547). */
 		if (sp->pp_mode != IFF_CISCO) {
 			if (debug)
 				log(LOG_DEBUG,
 				    SPP_FMT "Cisco packet in PPP mode "
 				    "<addr=0x%x ctrl=0x%x proto=0x%x>\n",
 				    SPP_ARGS(ifp),
 				    h->address, h->control, ntohs(h->protocol));
 			goto drop;
 		}
 		switch (ntohs (h->protocol)) {
 		default:
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto invalid;
 		case CISCO_KEEPALIVE:
 			sppp_cisco_input (sp, m);
 			m_freem (m);
 			SPPP_UNLOCK(sp);
 			return;
 #ifdef INET
 		case ETHERTYPE_IP:
 			isr = NETISR_IP;
 			do_account++;
 			break;
 #endif
 #ifdef INET6
 		case ETHERTYPE_IPV6:
 			isr = NETISR_IPV6;
 			do_account++;
 			break;
 #endif
 		}
 		break;
 	default:        /* Invalid PPP packet. */
 	  invalid:
 		if (debug)
 			log(LOG_DEBUG,
 			    SPP_FMT "invalid input packet "
 			    "<addr=0x%x ctrl=0x%x proto=0x%x>\n",
 			    SPP_ARGS(ifp),
 			    h->address, h->control, ntohs(h->protocol));
 		goto drop;
 	}
 
 	if (! (ifp->if_flags & IFF_UP) || isr == -1)
 		goto drop;
 
 	SPPP_UNLOCK(sp);
 	M_SETFIB(m, ifp->if_fib);
 	/* Check queue. */
 	if (netisr_queue(isr, m)) {	/* (0) on success. */
 		if (debug)
 			log(LOG_DEBUG, SPP_FMT "protocol queue overflow\n",
 				SPP_ARGS(ifp));
 		goto drop2;
 	}
 
 	if (do_account)
 		/*
 		 * Do only account for network packets, not for control
 		 * packets.  This is used by some subsystems to detect
 		 * idle lines.
 		 */
 		sp->pp_last_recv = time_uptime;
 }
 
 static void
 sppp_ifstart_sched(void *dummy)
 {
 	struct sppp *sp = dummy;
 
 	sp->if_start(SP2IFP(sp));
 }
 
 /* if_start () wrapper function. We use it to schedule real if_start () for
  * execution. We can't call it directly
  */
 static void
 sppp_ifstart(struct ifnet *ifp)
 {
 	struct sppp *sp = IFP2SP(ifp);
 
 	if (SPPP_LOCK_OWNED(sp)) {
 		if (callout_pending(&sp->ifstart_callout))
 			return;
 		callout_reset(&sp->ifstart_callout, 1, sppp_ifstart_sched,
 		    (void *)sp); 
 	} else {
 		sp->if_start(ifp);
 	}
 }
 
 /*
  * Enqueue transmit packet.
  */
 static int
 sppp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	struct route *ro)
 {
 	struct sppp *sp = IFP2SP(ifp);
 	struct ppp_header *h;
 	struct ifqueue *ifq = NULL;
 	int error, rv = 0;
 #ifdef INET
 	int ipproto = PPP_IP;
 #endif
 	int debug = ifp->if_flags & IFF_DEBUG;
+	int af = RO_GET_FAMILY(ro, dst);
 
 	SPPP_LOCK(sp);
 
 	if (!(ifp->if_flags & IFF_UP) ||
 	    (!(ifp->if_flags & IFF_AUTO) &&
 	    !(ifp->if_drv_flags & IFF_DRV_RUNNING))) {
 #ifdef INET6
 	  drop:
 #endif
 		m_freem (m);
 		SPPP_UNLOCK(sp);
 		return (ENETDOWN);
 	}
 
 	if ((ifp->if_flags & IFF_AUTO) &&
 	    !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 #ifdef INET6
 		/*
 		 * XXX
 		 *
 		 * Hack to prevent the initialization-time generated
 		 * IPv6 multicast packet to erroneously cause a
 		 * dialout event in case IPv6 has been
 		 * administratively disabled on that interface.
 		 */
-		if (dst->sa_family == AF_INET6 &&
+		if (af == AF_INET6 &&
 		    !(sp->confflags & CONF_ENABLE_IPV6))
 			goto drop;
 #endif
 		/*
 		 * Interface is not yet running, but auto-dial.  Need
 		 * to start LCP for it.
 		 */
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
 		lcp.Open(sp);
 	}
 
 #ifdef INET
-	if (dst->sa_family == AF_INET) {
+	if (af == AF_INET) {
 		/* XXX Check mbuf length here? */
 		struct ip *ip = mtod (m, struct ip*);
 		struct tcphdr *tcp = (struct tcphdr*) ((long*)ip + ip->ip_hl);
 
 		/*
 		 * When using dynamic local IP address assignment by using
 		 * 0.0.0.0 as a local address, the first TCP session will
 		 * not connect because the local TCP checksum is computed
 		 * using 0.0.0.0 which will later become our real IP address
 		 * so the TCP checksum computed at the remote end will
 		 * become invalid. So we
 		 * - don't let packets with src ip addr 0 thru
 		 * - we flag TCP packets with src ip 0 as an error
 		 */
 
 		if(ip->ip_src.s_addr == INADDR_ANY)	/* -hm */
 		{
 			m_freem(m);
 			SPPP_UNLOCK(sp);
 			if(ip->ip_p == IPPROTO_TCP)
 				return(EADDRNOTAVAIL);
 			else
 				return(0);
 		}
 
 		/*
 		 * Put low delay, telnet, rlogin and ftp control packets
 		 * in front of the queue or let ALTQ take care.
 		 */
 		if (ALTQ_IS_ENABLED(&ifp->if_snd))
 			;
 		else if (_IF_QFULL(&sp->pp_fastq))
 			;
 		else if (ip->ip_tos & IPTOS_LOWDELAY)
 			ifq = &sp->pp_fastq;
 		else if (m->m_len < sizeof *ip + sizeof *tcp)
 			;
 		else if (ip->ip_p != IPPROTO_TCP)
 			;
 		else if (INTERACTIVE (ntohs (tcp->th_sport)))
 			ifq = &sp->pp_fastq;
 		else if (INTERACTIVE (ntohs (tcp->th_dport)))
 			ifq = &sp->pp_fastq;
 
 		/*
 		 * Do IP Header compression
 		 */
 		if (sp->pp_mode != IFF_CISCO && sp->pp_mode != PP_FR &&
 		    (sp->ipcp.flags & IPCP_VJ) && ip->ip_p == IPPROTO_TCP)
 			switch (sl_compress_tcp(m, ip, sp->pp_comp,
 						sp->ipcp.compress_cid)) {
 			case TYPE_COMPRESSED_TCP:
 				ipproto = PPP_VJ_COMP;
 				break;
 			case TYPE_UNCOMPRESSED_TCP:
 				ipproto = PPP_VJ_UCOMP;
 				break;
 			case TYPE_IP:
 				ipproto = PPP_IP;
 				break;
 			default:
 				m_freem(m);
 				SPPP_UNLOCK(sp);
 				return (EINVAL);
 			}
 	}
 #endif
 
 #ifdef INET6
-	if (dst->sa_family == AF_INET6) {
+	if (af == AF_INET6) {
 		/* XXX do something tricky here? */
 	}
 #endif
 
 	if (sp->pp_mode == PP_FR) {
 		/* Add frame relay header. */
-		m = sppp_fr_header (sp, m, dst->sa_family);
+		m = sppp_fr_header (sp, m, af);
 		if (! m)
 			goto nobufs;
 		goto out;
 	}
 
 	/*
 	 * Prepend general data packet PPP header. For now, IP only.
 	 */
 	M_PREPEND (m, PPP_HEADER_LEN, M_NOWAIT);
 	if (! m) {
 nobufs:		if (debug)
 			log(LOG_DEBUG, SPP_FMT "no memory for transmit header\n",
 				SPP_ARGS(ifp));
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		SPPP_UNLOCK(sp);
 		return (ENOBUFS);
 	}
 	/*
 	 * May want to check size of packet
 	 * (albeit due to the implementation it's always enough)
 	 */
 	h = mtod (m, struct ppp_header*);
 	if (sp->pp_mode == IFF_CISCO) {
 		h->address = CISCO_UNICAST;        /* unicast address */
 		h->control = 0;
 	} else {
 		h->address = PPP_ALLSTATIONS;        /* broadcast address */
 		h->control = PPP_UI;                 /* Unnumbered Info */
 	}
 
-	switch (dst->sa_family) {
+	switch (af) {
 #ifdef INET
 	case AF_INET:   /* Internet Protocol */
 		if (sp->pp_mode == IFF_CISCO)
 			h->protocol = htons (ETHERTYPE_IP);
 		else {
 			/*
 			 * Don't choke with an ENETDOWN early.  It's
 			 * possible that we just started dialing out,
 			 * so don't drop the packet immediately.  If
 			 * we notice that we run out of buffer space
 			 * below, we will however remember that we are
 			 * not ready to carry IP packets, and return
 			 * ENETDOWN, as opposed to ENOBUFS.
 			 */
 			h->protocol = htons(ipproto);
 			if (sp->state[IDX_IPCP] != STATE_OPENED)
 				rv = ENETDOWN;
 		}
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:   /* Internet Protocol */
 		if (sp->pp_mode == IFF_CISCO)
 			h->protocol = htons (ETHERTYPE_IPV6);
 		else {
 			/*
 			 * Don't choke with an ENETDOWN early.  It's
 			 * possible that we just started dialing out,
 			 * so don't drop the packet immediately.  If
 			 * we notice that we run out of buffer space
 			 * below, we will however remember that we are
 			 * not ready to carry IP packets, and return
 			 * ENETDOWN, as opposed to ENOBUFS.
 			 */
 			h->protocol = htons(PPP_IPV6);
 			if (sp->state[IDX_IPV6CP] != STATE_OPENED)
 				rv = ENETDOWN;
 		}
 		break;
 #endif
 	default:
 		m_freem (m);
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		SPPP_UNLOCK(sp);
 		return (EAFNOSUPPORT);
 	}
 
 	/*
 	 * Queue message on interface, and start output if interface
 	 * not yet active.
 	 */
 out:
 	if (ifq != NULL)
 		error = !(IF_HANDOFF_ADJ(ifq, m, ifp, 3));
 	else
 		IFQ_HANDOFF_ADJ(ifp, m, 3, error);
 	if (error) {
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		SPPP_UNLOCK(sp);
 		return (rv? rv: ENOBUFS);
 	}
 	SPPP_UNLOCK(sp);
 	/*
 	 * Unlike in sppp_input(), we can always bump the timestamp
 	 * here since sppp_output() is only called on behalf of
 	 * network-layer traffic; control-layer traffic is handled
 	 * by sppp_cp_send().
 	 */
 	sp->pp_last_sent = time_uptime;
 	return (0);
 }
 
 void
 sppp_attach(struct ifnet *ifp)
 {
 	struct sppp *sp = IFP2SP(ifp);
 
 	/* Initialize mtx lock */
 	mtx_init(&sp->mtx, "sppp", MTX_NETWORK_LOCK, MTX_DEF | MTX_RECURSE);
 
 	/* Initialize keepalive handler. */
  	callout_init(&sp->keepalive_callout, 1);
 	callout_reset(&sp->keepalive_callout, hz * 10, sppp_keepalive,
  		    (void *)sp); 
 
 	ifp->if_mtu = PP_MTU;
 	ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
 	ifp->if_output = sppp_output;
 #if 0
 	sp->pp_flags = PP_KEEPALIVE;
 #endif
  	ifp->if_snd.ifq_maxlen = 32;
  	sp->pp_fastq.ifq_maxlen = 32;
  	sp->pp_cpq.ifq_maxlen = 20;
 	sp->pp_loopcnt = 0;
 	sp->pp_alivecnt = 0;
 	bzero(&sp->pp_seq[0], sizeof(sp->pp_seq));
 	bzero(&sp->pp_rseq[0], sizeof(sp->pp_rseq));
 	sp->pp_phase = PHASE_DEAD;
 	sp->pp_up = sppp_pp_up;
 	sp->pp_down = sppp_pp_down;
 	if(!mtx_initialized(&sp->pp_cpq.ifq_mtx))
 		mtx_init(&sp->pp_cpq.ifq_mtx, "sppp_cpq", NULL, MTX_DEF);
 	if(!mtx_initialized(&sp->pp_fastq.ifq_mtx))
 		mtx_init(&sp->pp_fastq.ifq_mtx, "sppp_fastq", NULL, MTX_DEF);
 	sp->pp_last_recv = sp->pp_last_sent = time_uptime;
 	sp->confflags = 0;
 #ifdef INET
 	sp->confflags |= CONF_ENABLE_VJ;
 #endif
 #ifdef INET6
 	sp->confflags |= CONF_ENABLE_IPV6;
 #endif
  	callout_init(&sp->ifstart_callout, 1);
 	sp->if_start = ifp->if_start;
 	ifp->if_start = sppp_ifstart;
 	sp->pp_comp = malloc(sizeof(struct slcompress), M_TEMP, M_WAITOK);
 	sl_compress_init(sp->pp_comp, -1);
 	sppp_lcp_init(sp);
 	sppp_ipcp_init(sp);
 	sppp_ipv6cp_init(sp);
 	sppp_pap_init(sp);
 	sppp_chap_init(sp);
 }
 
 void
 sppp_detach(struct ifnet *ifp)
 {
 	struct sppp *sp = IFP2SP(ifp);
 	int i;
 
 	KASSERT(mtx_initialized(&sp->mtx), ("sppp mutex is not initialized"));
 
 	/* Stop keepalive handler. */
  	callout_drain(&sp->keepalive_callout);
 
 	for (i = 0; i < IDX_COUNT; i++) {
 		callout_drain(&sp->ch[i]);
 	}
 	callout_drain(&sp->pap_my_to_ch);
 
 	mtx_destroy(&sp->pp_cpq.ifq_mtx);
 	mtx_destroy(&sp->pp_fastq.ifq_mtx);
 	mtx_destroy(&sp->mtx);
 }
 
 /*
  * Flush the interface output queue.
  */
 static void
 sppp_flush_unlocked(struct ifnet *ifp)
 {
 	struct sppp *sp = IFP2SP(ifp);
 
 	sppp_qflush ((struct ifqueue *)&SP2IFP(sp)->if_snd);
 	sppp_qflush (&sp->pp_fastq);
 	sppp_qflush (&sp->pp_cpq);
 }
 
 void
 sppp_flush(struct ifnet *ifp)
 {
 	struct sppp *sp = IFP2SP(ifp);
 
 	SPPP_LOCK(sp);
 	sppp_flush_unlocked (ifp);
 	SPPP_UNLOCK(sp);
 }
 
 /*
  * Check if the output queue is empty.
  */
 int
 sppp_isempty(struct ifnet *ifp)
 {
 	struct sppp *sp = IFP2SP(ifp);
 	int empty;
 
 	SPPP_LOCK(sp);
 	empty = !sp->pp_fastq.ifq_head && !sp->pp_cpq.ifq_head &&
 		!SP2IFP(sp)->if_snd.ifq_head;
 	SPPP_UNLOCK(sp);
 	return (empty);
 }
 
 /*
  * Get next packet to send.
  */
 struct mbuf *
 sppp_dequeue(struct ifnet *ifp)
 {
 	struct sppp *sp = IFP2SP(ifp);
 	struct mbuf *m;
 
 	SPPP_LOCK(sp);
 	/*
 	 * Process only the control protocol queue until we have at
 	 * least one NCP open.
 	 *
 	 * Do always serve all three queues in Cisco mode.
 	 */
 	IF_DEQUEUE(&sp->pp_cpq, m);
 	if (m == NULL &&
 	    (sppp_ncp_check(sp) || sp->pp_mode == IFF_CISCO ||
 	     sp->pp_mode == PP_FR)) {
 		IF_DEQUEUE(&sp->pp_fastq, m);
 		if (m == NULL)
 			IF_DEQUEUE (&SP2IFP(sp)->if_snd, m);
 	}
 	SPPP_UNLOCK(sp);
 	return m;
 }
 
 /*
  * Pick the next packet, do not remove it from the queue.
  */
 struct mbuf *
 sppp_pick(struct ifnet *ifp)
 {
 	struct sppp *sp = IFP2SP(ifp);
 	struct mbuf *m;
 
 	SPPP_LOCK(sp);
 
 	m = sp->pp_cpq.ifq_head;
 	if (m == NULL &&
 	    (sp->pp_phase == PHASE_NETWORK ||
 	     sp->pp_mode == IFF_CISCO ||
 	     sp->pp_mode == PP_FR))
 		if ((m = sp->pp_fastq.ifq_head) == NULL)
 			m = SP2IFP(sp)->if_snd.ifq_head;
 	SPPP_UNLOCK(sp);
 	return (m);
 }
 
 /*
  * Process an ioctl request.  Called on low priority level.
  */
 int
 sppp_ioctl(struct ifnet *ifp, IOCTL_CMD_T cmd, void *data)
 {
 	struct ifreq *ifr = (struct ifreq*) data;
 	struct sppp *sp = IFP2SP(ifp);
 	int rv, going_up, going_down, newmode;
 
 	SPPP_LOCK(sp);
 	rv = 0;
 	switch (cmd) {
 	case SIOCAIFADDR:
 		break;
 
 	case SIOCSIFADDR:
 		/* set the interface "up" when assigning an IP address */
 		ifp->if_flags |= IFF_UP;
 		/* FALLTHROUGH */
 
 	case SIOCSIFFLAGS:
 		going_up = ifp->if_flags & IFF_UP &&
 			(ifp->if_drv_flags & IFF_DRV_RUNNING) == 0;
 		going_down = (ifp->if_flags & IFF_UP) == 0 &&
 			ifp->if_drv_flags & IFF_DRV_RUNNING;
 
 		newmode = ifp->if_flags & IFF_PASSIVE;
 		if (!newmode)
 			newmode = ifp->if_flags & IFF_AUTO;
 		if (!newmode)
 			newmode = ifp->if_flags & IFF_CISCO;
 		ifp->if_flags &= ~(IFF_PASSIVE | IFF_AUTO | IFF_CISCO);
 		ifp->if_flags |= newmode;
 
 		if (!newmode)
 			newmode = sp->pp_flags & PP_FR;
 
 		if (newmode != sp->pp_mode) {
 			going_down = 1;
 			if (!going_up)
 				going_up = ifp->if_drv_flags & IFF_DRV_RUNNING;
 		}
 
 		if (going_down) {
 			if (sp->pp_mode != IFF_CISCO &&
 			    sp->pp_mode != PP_FR)
 				lcp.Close(sp);
 			else if (sp->pp_tlf)
 				(sp->pp_tlf)(sp);
 			sppp_flush_unlocked(ifp);
 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			sp->pp_mode = newmode;
 		}
 
 		if (going_up) {
 			if (sp->pp_mode != IFF_CISCO &&
 			    sp->pp_mode != PP_FR)
 				lcp.Close(sp);
 			sp->pp_mode = newmode;
 			if (sp->pp_mode == 0) {
 				ifp->if_drv_flags |= IFF_DRV_RUNNING;
 				lcp.Open(sp);
 			}
 			if ((sp->pp_mode == IFF_CISCO) ||
 			    (sp->pp_mode == PP_FR)) {
 				if (sp->pp_tls)
 					(sp->pp_tls)(sp);
 				ifp->if_drv_flags |= IFF_DRV_RUNNING;
 			}
 		}
 
 		break;
 
 #ifdef SIOCSIFMTU
 #ifndef ifr_mtu
 #define ifr_mtu ifr_metric
 #endif
 	case SIOCSIFMTU:
 		if (ifr->ifr_mtu < 128 || ifr->ifr_mtu > sp->lcp.their_mru)
 			return (EINVAL);
 		ifp->if_mtu = ifr->ifr_mtu;
 		break;
 #endif
 #ifdef SLIOCSETMTU
 	case SLIOCSETMTU:
 		if (*(short*)data < 128 || *(short*)data > sp->lcp.their_mru)
 			return (EINVAL);
 		ifp->if_mtu = *(short*)data;
 		break;
 #endif
 #ifdef SIOCGIFMTU
 	case SIOCGIFMTU:
 		ifr->ifr_mtu = ifp->if_mtu;
 		break;
 #endif
 #ifdef SLIOCGETMTU
 	case SLIOCGETMTU:
 		*(short*)data = ifp->if_mtu;
 		break;
 #endif
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		break;
 
 	case SIOCGIFGENERIC:
 	case SIOCSIFGENERIC:
 		rv = sppp_params(sp, cmd, data);
 		break;
 
 	default:
 		rv = ENOTTY;
 	}
 	SPPP_UNLOCK(sp);
 	return rv;
 }
 
 /*
  * Cisco framing implementation.
  */
 
 /*
  * Handle incoming Cisco keepalive protocol packets.
  */
 static void
 sppp_cisco_input(struct sppp *sp, struct mbuf *m)
 {
 	STDDCL;
 	struct cisco_packet *h;
 	u_long me, mymask;
 
 	if (m->m_pkthdr.len < CISCO_PACKET_LEN) {
 		if (debug)
 			log(LOG_DEBUG,
 			    SPP_FMT "cisco invalid packet length: %d bytes\n",
 			    SPP_ARGS(ifp), m->m_pkthdr.len);
 		return;
 	}
 	h = mtod (m, struct cisco_packet*);
 	if (debug)
 		log(LOG_DEBUG,
 		    SPP_FMT "cisco input: %d bytes "
 		    "<0x%lx 0x%lx 0x%lx 0x%x 0x%x-0x%x>\n",
 		    SPP_ARGS(ifp), m->m_pkthdr.len,
 		    (u_long)ntohl (h->type), (u_long)h->par1, (u_long)h->par2, (u_int)h->rel,
 		    (u_int)h->time0, (u_int)h->time1);
 	switch (ntohl (h->type)) {
 	default:
 		if (debug)
 			log(-1, SPP_FMT "cisco unknown packet type: 0x%lx\n",
 			       SPP_ARGS(ifp), (u_long)ntohl (h->type));
 		break;
 	case CISCO_ADDR_REPLY:
 		/* Reply on address request, ignore */
 		break;
 	case CISCO_KEEPALIVE_REQ:
 		sp->pp_alivecnt = 0;
 		sp->pp_rseq[IDX_LCP] = ntohl (h->par1);
 		if (sp->pp_seq[IDX_LCP] == sp->pp_rseq[IDX_LCP]) {
 			/* Local and remote sequence numbers are equal.
 			 * Probably, the line is in loopback mode. */
 			if (sp->pp_loopcnt >= MAXALIVECNT) {
 				printf (SPP_FMT "loopback\n",
 					SPP_ARGS(ifp));
 				sp->pp_loopcnt = 0;
 				if (ifp->if_flags & IFF_UP) {
 					if_down (ifp);
 					sppp_qflush (&sp->pp_cpq);
 				}
 			}
 			++sp->pp_loopcnt;
 
 			/* Generate new local sequence number */
 			sp->pp_seq[IDX_LCP] = random();
 			break;
 		}
 		sp->pp_loopcnt = 0;
 		if (! (ifp->if_flags & IFF_UP) &&
 		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 			if_up(ifp);
 			printf (SPP_FMT "up\n", SPP_ARGS(ifp));
 		}
 		break;
 	case CISCO_ADDR_REQ:
 		sppp_get_ip_addrs(sp, &me, 0, &mymask);
 		if (me != 0L)
 			sppp_cisco_send(sp, CISCO_ADDR_REPLY, me, mymask);
 		break;
 	}
 }
 
 /*
  * Send Cisco keepalive packet.
  */
 static void
 sppp_cisco_send(struct sppp *sp, int type, long par1, long par2)
 {
 	STDDCL;
 	struct ppp_header *h;
 	struct cisco_packet *ch;
 	struct mbuf *m;
 	struct timeval tv;
 
 	getmicrouptime(&tv);
 
 	MGETHDR (m, M_NOWAIT, MT_DATA);
 	if (! m)
 		return;
 	m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + CISCO_PACKET_LEN;
 	m->m_pkthdr.rcvif = 0;
 
 	h = mtod (m, struct ppp_header*);
 	h->address = CISCO_MULTICAST;
 	h->control = 0;
 	h->protocol = htons (CISCO_KEEPALIVE);
 
 	ch = (struct cisco_packet*) (h + 1);
 	ch->type = htonl (type);
 	ch->par1 = htonl (par1);
 	ch->par2 = htonl (par2);
 	ch->rel = -1;
 
 	ch->time0 = htons ((u_short) (tv.tv_sec >> 16));
 	ch->time1 = htons ((u_short) tv.tv_sec);
 
 	if (debug)
 		log(LOG_DEBUG,
 		    SPP_FMT "cisco output: <0x%lx 0x%lx 0x%lx 0x%x 0x%x-0x%x>\n",
 			SPP_ARGS(ifp), (u_long)ntohl (ch->type), (u_long)ch->par1,
 			(u_long)ch->par2, (u_int)ch->rel, (u_int)ch->time0, (u_int)ch->time1);
 
 	if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 }
 
 /*
  * PPP protocol implementation.
  */
 
 /*
  * Send PPP control protocol packet.
  */
 static void
 sppp_cp_send(struct sppp *sp, u_short proto, u_char type,
 	     u_char ident, u_short len, void *data)
 {
 	STDDCL;
 	struct ppp_header *h;
 	struct lcp_header *lh;
 	struct mbuf *m;
 
 	if (len > MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN)
 		len = MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN;
 	MGETHDR (m, M_NOWAIT, MT_DATA);
 	if (! m)
 		return;
 	m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + LCP_HEADER_LEN + len;
 	m->m_pkthdr.rcvif = 0;
 
 	h = mtod (m, struct ppp_header*);
 	h->address = PPP_ALLSTATIONS;        /* broadcast address */
 	h->control = PPP_UI;                 /* Unnumbered Info */
 	h->protocol = htons (proto);         /* Link Control Protocol */
 
 	lh = (struct lcp_header*) (h + 1);
 	lh->type = type;
 	lh->ident = ident;
 	lh->len = htons (LCP_HEADER_LEN + len);
 	if (len)
 		bcopy (data, lh+1, len);
 
 	if (debug) {
 		log(LOG_DEBUG, SPP_FMT "%s output <%s id=0x%x len=%d",
 		    SPP_ARGS(ifp),
 		    sppp_proto_name(proto),
 		    sppp_cp_type_name (lh->type), lh->ident,
 		    ntohs (lh->len));
 		sppp_print_bytes ((u_char*) (lh+1), len);
 		log(-1, ">\n");
 	}
 	if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 }
 
 /*
  * Handle incoming PPP control protocol packets.
  */
 static void
 sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m)
 {
 	STDDCL;
 	struct lcp_header *h;
 	int len = m->m_pkthdr.len;
 	int rv;
 	u_char *p;
 
 	if (len < 4) {
 		if (debug)
 			log(LOG_DEBUG,
 			    SPP_FMT "%s invalid packet length: %d bytes\n",
 			    SPP_ARGS(ifp), cp->name, len);
 		return;
 	}
 	h = mtod (m, struct lcp_header*);
 	if (debug) {
 		log(LOG_DEBUG,
 		    SPP_FMT "%s input(%s): <%s id=0x%x len=%d",
 		    SPP_ARGS(ifp), cp->name,
 		    sppp_state_name(sp->state[cp->protoidx]),
 		    sppp_cp_type_name (h->type), h->ident, ntohs (h->len));
 		sppp_print_bytes ((u_char*) (h+1), len-4);
 		log(-1, ">\n");
 	}
 	if (len > ntohs (h->len))
 		len = ntohs (h->len);
 	p = (u_char *)(h + 1);
 	switch (h->type) {
 	case CONF_REQ:
 		if (len < 4) {
 			if (debug)
 				log(-1, SPP_FMT "%s invalid conf-req length %d\n",
 				       SPP_ARGS(ifp), cp->name,
 				       len);
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			break;
 		}
 		/* handle states where RCR doesn't get a SCA/SCN */
 		switch (sp->state[cp->protoidx]) {
 		case STATE_CLOSING:
 		case STATE_STOPPING:
 			return;
 		case STATE_CLOSED:
 			sppp_cp_send(sp, cp->proto, TERM_ACK, h->ident,
 				     0, 0);
 			return;
 		}
 		rv = (cp->RCR)(sp, h, len);
 		switch (sp->state[cp->protoidx]) {
 		case STATE_OPENED:
 			(cp->tld)(sp);
 			(cp->scr)(sp);
 			/* FALLTHROUGH */
 		case STATE_ACK_SENT:
 		case STATE_REQ_SENT:
 			/*
 			 * sppp_cp_change_state() have the side effect of
 			 * restarting the timeouts. We want to avoid that
 			 * if the state don't change, otherwise we won't
 			 * ever timeout and resend a configuration request
 			 * that got lost.
 			 */
 			if (sp->state[cp->protoidx] == (rv ? STATE_ACK_SENT:
 			    STATE_REQ_SENT))
 				break;
 			sppp_cp_change_state(cp, sp, rv?
 					     STATE_ACK_SENT: STATE_REQ_SENT);
 			break;
 		case STATE_STOPPED:
 			sp->rst_counter[cp->protoidx] = sp->lcp.max_configure;
 			(cp->scr)(sp);
 			sppp_cp_change_state(cp, sp, rv?
 					     STATE_ACK_SENT: STATE_REQ_SENT);
 			break;
 		case STATE_ACK_RCVD:
 			if (rv) {
 				sppp_cp_change_state(cp, sp, STATE_OPENED);
 				if (debug)
 					log(LOG_DEBUG, SPP_FMT "%s tlu\n",
 					    SPP_ARGS(ifp),
 					    cp->name);
 				(cp->tlu)(sp);
 			} else
 				sppp_cp_change_state(cp, sp, STATE_ACK_RCVD);
 			break;
 		default:
 			printf(SPP_FMT "%s illegal %s in state %s\n",
 			       SPP_ARGS(ifp), cp->name,
 			       sppp_cp_type_name(h->type),
 			       sppp_state_name(sp->state[cp->protoidx]));
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		}
 		break;
 	case CONF_ACK:
 		if (h->ident != sp->confid[cp->protoidx]) {
 			if (debug)
 				log(-1, SPP_FMT "%s id mismatch 0x%x != 0x%x\n",
 				       SPP_ARGS(ifp), cp->name,
 				       h->ident, sp->confid[cp->protoidx]);
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			break;
 		}
 		switch (sp->state[cp->protoidx]) {
 		case STATE_CLOSED:
 		case STATE_STOPPED:
 			sppp_cp_send(sp, cp->proto, TERM_ACK, h->ident, 0, 0);
 			break;
 		case STATE_CLOSING:
 		case STATE_STOPPING:
 			break;
 		case STATE_REQ_SENT:
 			sp->rst_counter[cp->protoidx] = sp->lcp.max_configure;
 			sppp_cp_change_state(cp, sp, STATE_ACK_RCVD);
 			break;
 		case STATE_OPENED:
 			(cp->tld)(sp);
 			/* FALLTHROUGH */
 		case STATE_ACK_RCVD:
 			(cp->scr)(sp);
 			sppp_cp_change_state(cp, sp, STATE_REQ_SENT);
 			break;
 		case STATE_ACK_SENT:
 			sp->rst_counter[cp->protoidx] = sp->lcp.max_configure;
 			sppp_cp_change_state(cp, sp, STATE_OPENED);
 			if (debug)
 				log(LOG_DEBUG, SPP_FMT "%s tlu\n",
 				       SPP_ARGS(ifp), cp->name);
 			(cp->tlu)(sp);
 			break;
 		default:
 			printf(SPP_FMT "%s illegal %s in state %s\n",
 			       SPP_ARGS(ifp), cp->name,
 			       sppp_cp_type_name(h->type),
 			       sppp_state_name(sp->state[cp->protoidx]));
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		}
 		break;
 	case CONF_NAK:
 	case CONF_REJ:
 		if (h->ident != sp->confid[cp->protoidx]) {
 			if (debug)
 				log(-1, SPP_FMT "%s id mismatch 0x%x != 0x%x\n",
 				       SPP_ARGS(ifp), cp->name,
 				       h->ident, sp->confid[cp->protoidx]);
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			break;
 		}
 		if (h->type == CONF_NAK)
 			(cp->RCN_nak)(sp, h, len);
 		else /* CONF_REJ */
 			(cp->RCN_rej)(sp, h, len);
 
 		switch (sp->state[cp->protoidx]) {
 		case STATE_CLOSED:
 		case STATE_STOPPED:
 			sppp_cp_send(sp, cp->proto, TERM_ACK, h->ident, 0, 0);
 			break;
 		case STATE_REQ_SENT:
 		case STATE_ACK_SENT:
 			sp->rst_counter[cp->protoidx] = sp->lcp.max_configure;
 			/*
 			 * Slow things down a bit if we think we might be
 			 * in loopback. Depend on the timeout to send the
 			 * next configuration request.
 			 */
 			if (sp->pp_loopcnt)
 				break;
 			(cp->scr)(sp);
 			break;
 		case STATE_OPENED:
 			(cp->tld)(sp);
 			/* FALLTHROUGH */
 		case STATE_ACK_RCVD:
 			sppp_cp_change_state(cp, sp, STATE_REQ_SENT);
 			(cp->scr)(sp);
 			break;
 		case STATE_CLOSING:
 		case STATE_STOPPING:
 			break;
 		default:
 			printf(SPP_FMT "%s illegal %s in state %s\n",
 			       SPP_ARGS(ifp), cp->name,
 			       sppp_cp_type_name(h->type),
 			       sppp_state_name(sp->state[cp->protoidx]));
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		}
 		break;
 
 	case TERM_REQ:
 		switch (sp->state[cp->protoidx]) {
 		case STATE_ACK_RCVD:
 		case STATE_ACK_SENT:
 			sppp_cp_change_state(cp, sp, STATE_REQ_SENT);
 			/* FALLTHROUGH */
 		case STATE_CLOSED:
 		case STATE_STOPPED:
 		case STATE_CLOSING:
 		case STATE_STOPPING:
 		case STATE_REQ_SENT:
 		  sta:
 			/* Send Terminate-Ack packet. */
 			if (debug)
 				log(LOG_DEBUG, SPP_FMT "%s send terminate-ack\n",
 				    SPP_ARGS(ifp), cp->name);
 			sppp_cp_send(sp, cp->proto, TERM_ACK, h->ident, 0, 0);
 			break;
 		case STATE_OPENED:
 			(cp->tld)(sp);
 			sp->rst_counter[cp->protoidx] = 0;
 			sppp_cp_change_state(cp, sp, STATE_STOPPING);
 			goto sta;
 			break;
 		default:
 			printf(SPP_FMT "%s illegal %s in state %s\n",
 			       SPP_ARGS(ifp), cp->name,
 			       sppp_cp_type_name(h->type),
 			       sppp_state_name(sp->state[cp->protoidx]));
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		}
 		break;
 	case TERM_ACK:
 		switch (sp->state[cp->protoidx]) {
 		case STATE_CLOSED:
 		case STATE_STOPPED:
 		case STATE_REQ_SENT:
 		case STATE_ACK_SENT:
 			break;
 		case STATE_CLOSING:
 			sppp_cp_change_state(cp, sp, STATE_CLOSED);
 			(cp->tlf)(sp);
 			break;
 		case STATE_STOPPING:
 			sppp_cp_change_state(cp, sp, STATE_STOPPED);
 			(cp->tlf)(sp);
 			break;
 		case STATE_ACK_RCVD:
 			sppp_cp_change_state(cp, sp, STATE_REQ_SENT);
 			break;
 		case STATE_OPENED:
 			(cp->tld)(sp);
 			(cp->scr)(sp);
 			sppp_cp_change_state(cp, sp, STATE_ACK_RCVD);
 			break;
 		default:
 			printf(SPP_FMT "%s illegal %s in state %s\n",
 			       SPP_ARGS(ifp), cp->name,
 			       sppp_cp_type_name(h->type),
 			       sppp_state_name(sp->state[cp->protoidx]));
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		}
 		break;
 	case CODE_REJ:
 		/* XXX catastrophic rejects (RXJ-) aren't handled yet. */
 		log(LOG_INFO,
 		    SPP_FMT "%s: ignoring RXJ (%s) for proto 0x%x, "
 		    "danger will robinson\n",
 		    SPP_ARGS(ifp), cp->name,
 		    sppp_cp_type_name(h->type), ntohs(*((u_short *)p)));
 		switch (sp->state[cp->protoidx]) {
 		case STATE_CLOSED:
 		case STATE_STOPPED:
 		case STATE_REQ_SENT:
 		case STATE_ACK_SENT:
 		case STATE_CLOSING:
 		case STATE_STOPPING:
 		case STATE_OPENED:
 			break;
 		case STATE_ACK_RCVD:
 			sppp_cp_change_state(cp, sp, STATE_REQ_SENT);
 			break;
 		default:
 			printf(SPP_FMT "%s illegal %s in state %s\n",
 			       SPP_ARGS(ifp), cp->name,
 			       sppp_cp_type_name(h->type),
 			       sppp_state_name(sp->state[cp->protoidx]));
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		}
 		break;
 	case PROTO_REJ:
 	    {
 		int catastrophic;
 		const struct cp *upper;
 		int i;
 		u_int16_t proto;
 
 		catastrophic = 0;
 		upper = NULL;
 		proto = ntohs(*((u_int16_t *)p));
 		for (i = 0; i < IDX_COUNT; i++) {
 			if (cps[i]->proto == proto) {
 				upper = cps[i];
 				break;
 			}
 		}
 		if (upper == NULL)
 			catastrophic++;
 
 		if (catastrophic || debug)
 			log(catastrophic? LOG_INFO: LOG_DEBUG,
 			    SPP_FMT "%s: RXJ%c (%s) for proto 0x%x (%s/%s)\n",
 			    SPP_ARGS(ifp), cp->name, catastrophic ? '-' : '+',
 			    sppp_cp_type_name(h->type), proto,
 			    upper ? upper->name : "unknown",
 			    upper ? sppp_state_name(sp->state[upper->protoidx]) : "?");
 
 		/*
 		 * if we got RXJ+ against conf-req, the peer does not implement
 		 * this particular protocol type.  terminate the protocol.
 		 */
 		if (upper && !catastrophic) {
 			if (sp->state[upper->protoidx] == STATE_REQ_SENT) {
 				upper->Close(sp);
 				break;
 			}
 		}
 
 		/* XXX catastrophic rejects (RXJ-) aren't handled yet. */
 		switch (sp->state[cp->protoidx]) {
 		case STATE_CLOSED:
 		case STATE_STOPPED:
 		case STATE_REQ_SENT:
 		case STATE_ACK_SENT:
 		case STATE_CLOSING:
 		case STATE_STOPPING:
 		case STATE_OPENED:
 			break;
 		case STATE_ACK_RCVD:
 			sppp_cp_change_state(cp, sp, STATE_REQ_SENT);
 			break;
 		default:
 			printf(SPP_FMT "%s illegal %s in state %s\n",
 			       SPP_ARGS(ifp), cp->name,
 			       sppp_cp_type_name(h->type),
 			       sppp_state_name(sp->state[cp->protoidx]));
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		}
 		break;
 	    }
 	case DISC_REQ:
 		if (cp->proto != PPP_LCP)
 			goto illegal;
 		/* Discard the packet. */
 		break;
 	case ECHO_REQ:
 		if (cp->proto != PPP_LCP)
 			goto illegal;
 		if (sp->state[cp->protoidx] != STATE_OPENED) {
 			if (debug)
 				log(-1, SPP_FMT "lcp echo req but lcp closed\n",
 				       SPP_ARGS(ifp));
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			break;
 		}
 		if (len < 8) {
 			if (debug)
 				log(-1, SPP_FMT "invalid lcp echo request "
 				       "packet length: %d bytes\n",
 				       SPP_ARGS(ifp), len);
 			break;
 		}
 		if ((sp->lcp.opts & (1 << LCP_OPT_MAGIC)) &&
 		    ntohl (*(long*)(h+1)) == sp->lcp.magic) {
 			/* Line loopback mode detected. */
 			printf(SPP_FMT "loopback\n", SPP_ARGS(ifp));
 			sp->pp_loopcnt = MAXALIVECNT * 5;
 			if_down (ifp);
 			sppp_qflush (&sp->pp_cpq);
 
 			/* Shut down the PPP link. */
 			/* XXX */
 			lcp.Down(sp);
 			lcp.Up(sp);
 			break;
 		}
 		*(long*)(h+1) = htonl (sp->lcp.magic);
 		if (debug)
 			log(-1, SPP_FMT "got lcp echo req, sending echo rep\n",
 			       SPP_ARGS(ifp));
 		sppp_cp_send (sp, PPP_LCP, ECHO_REPLY, h->ident, len-4, h+1);
 		break;
 	case ECHO_REPLY:
 		if (cp->proto != PPP_LCP)
 			goto illegal;
 		if (h->ident != sp->lcp.echoid) {
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			break;
 		}
 		if (len < 8) {
 			if (debug)
 				log(-1, SPP_FMT "lcp invalid echo reply "
 				       "packet length: %d bytes\n",
 				       SPP_ARGS(ifp), len);
 			break;
 		}
 		if (debug)
 			log(-1, SPP_FMT "lcp got echo rep\n",
 			       SPP_ARGS(ifp));
 		if (!(sp->lcp.opts & (1 << LCP_OPT_MAGIC)) ||
 		    ntohl (*(long*)(h+1)) != sp->lcp.magic)
 			sp->pp_alivecnt = 0;
 		break;
 	default:
 		/* Unknown packet type -- send Code-Reject packet. */
 	  illegal:
 		if (debug)
 			log(-1, SPP_FMT "%s send code-rej for 0x%x\n",
 			       SPP_ARGS(ifp), cp->name, h->type);
 		sppp_cp_send(sp, cp->proto, CODE_REJ,
 			     ++sp->pp_seq[cp->protoidx], m->m_pkthdr.len, h);
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 	}
 }
 
 /*
  * The generic part of all Up/Down/Open/Close/TO event handlers.
  * Basically, the state transition handling in the automaton.
  */
 static void
 sppp_up_event(const struct cp *cp, struct sppp *sp)
 {
 	STDDCL;
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "%s up(%s)\n",
 		    SPP_ARGS(ifp), cp->name,
 		    sppp_state_name(sp->state[cp->protoidx]));
 
 	switch (sp->state[cp->protoidx]) {
 	case STATE_INITIAL:
 		sppp_cp_change_state(cp, sp, STATE_CLOSED);
 		break;
 	case STATE_STARTING:
 		sp->rst_counter[cp->protoidx] = sp->lcp.max_configure;
 		(cp->scr)(sp);
 		sppp_cp_change_state(cp, sp, STATE_REQ_SENT);
 		break;
 	default:
 		printf(SPP_FMT "%s illegal up in state %s\n",
 		       SPP_ARGS(ifp), cp->name,
 		       sppp_state_name(sp->state[cp->protoidx]));
 	}
 }
 
 static void
 sppp_down_event(const struct cp *cp, struct sppp *sp)
 {
 	STDDCL;
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "%s down(%s)\n",
 		    SPP_ARGS(ifp), cp->name,
 		    sppp_state_name(sp->state[cp->protoidx]));
 
 	switch (sp->state[cp->protoidx]) {
 	case STATE_CLOSED:
 	case STATE_CLOSING:
 		sppp_cp_change_state(cp, sp, STATE_INITIAL);
 		break;
 	case STATE_STOPPED:
 		sppp_cp_change_state(cp, sp, STATE_STARTING);
 		(cp->tls)(sp);
 		break;
 	case STATE_STOPPING:
 	case STATE_REQ_SENT:
 	case STATE_ACK_RCVD:
 	case STATE_ACK_SENT:
 		sppp_cp_change_state(cp, sp, STATE_STARTING);
 		break;
 	case STATE_OPENED:
 		(cp->tld)(sp);
 		sppp_cp_change_state(cp, sp, STATE_STARTING);
 		break;
 	default:
 		printf(SPP_FMT "%s illegal down in state %s\n",
 		       SPP_ARGS(ifp), cp->name,
 		       sppp_state_name(sp->state[cp->protoidx]));
 	}
 }
 
 static void
 sppp_open_event(const struct cp *cp, struct sppp *sp)
 {
 	STDDCL;
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "%s open(%s)\n",
 		    SPP_ARGS(ifp), cp->name,
 		    sppp_state_name(sp->state[cp->protoidx]));
 
 	switch (sp->state[cp->protoidx]) {
 	case STATE_INITIAL:
 		sppp_cp_change_state(cp, sp, STATE_STARTING);
 		(cp->tls)(sp);
 		break;
 	case STATE_STARTING:
 		break;
 	case STATE_CLOSED:
 		sp->rst_counter[cp->protoidx] = sp->lcp.max_configure;
 		(cp->scr)(sp);
 		sppp_cp_change_state(cp, sp, STATE_REQ_SENT);
 		break;
 	case STATE_STOPPED:
 		/*
 		 * Try escaping stopped state.  This seems to bite
 		 * people occasionally, in particular for IPCP,
 		 * presumably following previous IPCP negotiation
 		 * aborts.  Somehow, we must have missed a Down event
 		 * which would have caused a transition into starting
 		 * state, so as a bandaid we force the Down event now.
 		 * This effectively implements (something like the)
 		 * `restart' option mentioned in the state transition
 		 * table of RFC 1661.
 		 */
 		sppp_cp_change_state(cp, sp, STATE_STARTING);
 		(cp->tls)(sp);
 		break;
 	case STATE_STOPPING:
 	case STATE_REQ_SENT:
 	case STATE_ACK_RCVD:
 	case STATE_ACK_SENT:
 	case STATE_OPENED:
 		break;
 	case STATE_CLOSING:
 		sppp_cp_change_state(cp, sp, STATE_STOPPING);
 		break;
 	}
 }
 
 static void
 sppp_close_event(const struct cp *cp, struct sppp *sp)
 {
 	STDDCL;
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "%s close(%s)\n",
 		    SPP_ARGS(ifp), cp->name,
 		    sppp_state_name(sp->state[cp->protoidx]));
 
 	switch (sp->state[cp->protoidx]) {
 	case STATE_INITIAL:
 	case STATE_CLOSED:
 	case STATE_CLOSING:
 		break;
 	case STATE_STARTING:
 		sppp_cp_change_state(cp, sp, STATE_INITIAL);
 		(cp->tlf)(sp);
 		break;
 	case STATE_STOPPED:
 		sppp_cp_change_state(cp, sp, STATE_CLOSED);
 		break;
 	case STATE_STOPPING:
 		sppp_cp_change_state(cp, sp, STATE_CLOSING);
 		break;
 	case STATE_OPENED:
 		(cp->tld)(sp);
 		/* FALLTHROUGH */
 	case STATE_REQ_SENT:
 	case STATE_ACK_RCVD:
 	case STATE_ACK_SENT:
 		sp->rst_counter[cp->protoidx] = sp->lcp.max_terminate;
 		sppp_cp_send(sp, cp->proto, TERM_REQ,
 			     ++sp->pp_seq[cp->protoidx], 0, 0);
 		sppp_cp_change_state(cp, sp, STATE_CLOSING);
 		break;
 	}
 }
 
 static void
 sppp_to_event(const struct cp *cp, struct sppp *sp)
 {
 	STDDCL;
 
 	SPPP_LOCK(sp);
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "%s TO(%s) rst_counter = %d\n",
 		    SPP_ARGS(ifp), cp->name,
 		    sppp_state_name(sp->state[cp->protoidx]),
 		    sp->rst_counter[cp->protoidx]);
 
 	if (--sp->rst_counter[cp->protoidx] < 0)
 		/* TO- event */
 		switch (sp->state[cp->protoidx]) {
 		case STATE_CLOSING:
 			sppp_cp_change_state(cp, sp, STATE_CLOSED);
 			(cp->tlf)(sp);
 			break;
 		case STATE_STOPPING:
 			sppp_cp_change_state(cp, sp, STATE_STOPPED);
 			(cp->tlf)(sp);
 			break;
 		case STATE_REQ_SENT:
 		case STATE_ACK_RCVD:
 		case STATE_ACK_SENT:
 			sppp_cp_change_state(cp, sp, STATE_STOPPED);
 			(cp->tlf)(sp);
 			break;
 		}
 	else
 		/* TO+ event */
 		switch (sp->state[cp->protoidx]) {
 		case STATE_CLOSING:
 		case STATE_STOPPING:
 			sppp_cp_send(sp, cp->proto, TERM_REQ,
 				     ++sp->pp_seq[cp->protoidx], 0, 0);
 			callout_reset(&sp->ch[cp->protoidx], sp->lcp.timeout,
 				      cp->TO, (void *)sp);
 			break;
 		case STATE_REQ_SENT:
 		case STATE_ACK_RCVD:
 			(cp->scr)(sp);
 			/* sppp_cp_change_state() will restart the timer */
 			sppp_cp_change_state(cp, sp, STATE_REQ_SENT);
 			break;
 		case STATE_ACK_SENT:
 			(cp->scr)(sp);
 			callout_reset(&sp->ch[cp->protoidx], sp->lcp.timeout,
 				      cp->TO, (void *)sp);
 			break;
 		}
 
 	SPPP_UNLOCK(sp);
 }
 
 /*
  * Change the state of a control protocol in the state automaton.
  * Takes care of starting/stopping the restart timer.
  */
 static void
 sppp_cp_change_state(const struct cp *cp, struct sppp *sp, int newstate)
 {
 	sp->state[cp->protoidx] = newstate;
 
 	callout_stop (&sp->ch[cp->protoidx]);
 
 	switch (newstate) {
 	case STATE_INITIAL:
 	case STATE_STARTING:
 	case STATE_CLOSED:
 	case STATE_STOPPED:
 	case STATE_OPENED:
 		break;
 	case STATE_CLOSING:
 	case STATE_STOPPING:
 	case STATE_REQ_SENT:
 	case STATE_ACK_RCVD:
 	case STATE_ACK_SENT:
 		callout_reset(&sp->ch[cp->protoidx], sp->lcp.timeout,
 			      cp->TO, (void *)sp);
 		break;
 	}
 }
 
 /*
  *--------------------------------------------------------------------------*
  *                                                                          *
  *                         The LCP implementation.                          *
  *                                                                          *
  *--------------------------------------------------------------------------*
  */
 static void
 sppp_pp_up(struct sppp *sp)
 {
 	SPPP_LOCK(sp);
 	lcp.Up(sp);
 	SPPP_UNLOCK(sp);
 }
 
 static void
 sppp_pp_down(struct sppp *sp)
 {
 	SPPP_LOCK(sp);
 	lcp.Down(sp);
 	SPPP_UNLOCK(sp);
 }
 
 static void
 sppp_lcp_init(struct sppp *sp)
 {
 	sp->lcp.opts = (1 << LCP_OPT_MAGIC);
 	sp->lcp.magic = 0;
 	sp->state[IDX_LCP] = STATE_INITIAL;
 	sp->fail_counter[IDX_LCP] = 0;
 	sp->pp_seq[IDX_LCP] = 0;
 	sp->pp_rseq[IDX_LCP] = 0;
 	sp->lcp.protos = 0;
 	sp->lcp.mru = sp->lcp.their_mru = PP_MTU;
 
 	/* Note that these values are  relevant for all control protocols */
 	sp->lcp.timeout = 3 * hz;
 	sp->lcp.max_terminate = 2;
 	sp->lcp.max_configure = 10;
 	sp->lcp.max_failure = 10;
  	callout_init(&sp->ch[IDX_LCP], 1);
 }
 
 static void
 sppp_lcp_up(struct sppp *sp)
 {
 	STDDCL;
 
 	sp->pp_alivecnt = 0;
 	sp->lcp.opts = (1 << LCP_OPT_MAGIC);
 	sp->lcp.magic = 0;
 	sp->lcp.protos = 0;
 	sp->lcp.mru = sp->lcp.their_mru = PP_MTU;
 	/*
 	 * If we are authenticator, negotiate LCP_AUTH
 	 */
 	if (sp->hisauth.proto != 0)
 		sp->lcp.opts |= (1 << LCP_OPT_AUTH_PROTO);
 	else
 		sp->lcp.opts &= ~(1 << LCP_OPT_AUTH_PROTO);
 	sp->pp_flags &= ~PP_NEEDAUTH;
 	/*
 	 * If this interface is passive or dial-on-demand, and we are
 	 * still in Initial state, it means we've got an incoming
 	 * call.  Activate the interface.
 	 */
 	if ((ifp->if_flags & (IFF_AUTO | IFF_PASSIVE)) != 0) {
 		if (debug)
 			log(LOG_DEBUG,
 			    SPP_FMT "Up event", SPP_ARGS(ifp));
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
 		if (sp->state[IDX_LCP] == STATE_INITIAL) {
 			if (debug)
 				log(-1, "(incoming call)\n");
 			sp->pp_flags |= PP_CALLIN;
 			lcp.Open(sp);
 		} else if (debug)
 			log(-1, "\n");
 	} else if ((ifp->if_flags & (IFF_AUTO | IFF_PASSIVE)) == 0 &&
 		   (sp->state[IDX_LCP] == STATE_INITIAL)) {
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
 		lcp.Open(sp);
 	}
 
 	sppp_up_event(&lcp, sp);
 }
 
 static void
 sppp_lcp_down(struct sppp *sp)
 {
 	STDDCL;
 
 	sppp_down_event(&lcp, sp);
 
 	/*
 	 * If this is neither a dial-on-demand nor a passive
 	 * interface, simulate an ``ifconfig down'' action, so the
 	 * administrator can force a redial by another ``ifconfig
 	 * up''.  XXX For leased line operation, should we immediately
 	 * try to reopen the connection here?
 	 */
 	if ((ifp->if_flags & (IFF_AUTO | IFF_PASSIVE)) == 0) {
 		log(LOG_INFO,
 		    SPP_FMT "Down event, taking interface down.\n",
 		    SPP_ARGS(ifp));
 		if_down(ifp);
 	} else {
 		if (debug)
 			log(LOG_DEBUG,
 			    SPP_FMT "Down event (carrier loss)\n",
 			    SPP_ARGS(ifp));
 		sp->pp_flags &= ~PP_CALLIN;
 		if (sp->state[IDX_LCP] != STATE_INITIAL)
 			lcp.Close(sp);
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	}
 }
 
 static void
 sppp_lcp_open(struct sppp *sp)
 {
 	sppp_open_event(&lcp, sp);
 }
 
 static void
 sppp_lcp_close(struct sppp *sp)
 {
 	sppp_close_event(&lcp, sp);
 }
 
 static void
 sppp_lcp_TO(void *cookie)
 {
 	sppp_to_event(&lcp, (struct sppp *)cookie);
 }
 
 /*
  * Analyze a configure request.  Return true if it was agreeable, and
  * caused action sca, false if it has been rejected or nak'ed, and
  * caused action scn.  (The return value is used to make the state
  * transition decision in the state automaton.)
  */
 static int
 sppp_lcp_RCR(struct sppp *sp, struct lcp_header *h, int len)
 {
 	STDDCL;
 	u_char *buf, *r, *p;
 	int origlen, rlen;
 	u_long nmagic;
 	u_short authproto;
 
 	len -= 4;
 	origlen = len;
 	buf = r = malloc (len, M_TEMP, M_NOWAIT);
 	if (! buf)
 		return (0);
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "lcp parse opts: ",
 		    SPP_ARGS(ifp));
 
 	/* pass 1: check for things that need to be rejected */
 	p = (void*) (h+1);
 	for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1];
 	    len-=p[1], p+=p[1]) {
 		if (debug)
 			log(-1, " %s ", sppp_lcp_opt_name(*p));
 		switch (*p) {
 		case LCP_OPT_MAGIC:
 			/* Magic number. */
 			if (len >= 6 && p[1] == 6)
 				continue;
 			if (debug)
 				log(-1, "[invalid] ");
 			break;
 		case LCP_OPT_ASYNC_MAP:
 			/* Async control character map. */
 			if (len >= 6 && p[1] == 6)
 				continue;
 			if (debug)
 				log(-1, "[invalid] ");
 			break;
 		case LCP_OPT_MRU:
 			/* Maximum receive unit. */
 			if (len >= 4 && p[1] == 4)
 				continue;
 			if (debug)
 				log(-1, "[invalid] ");
 			break;
 		case LCP_OPT_AUTH_PROTO:
 			if (len < 4) {
 				if (debug)
 					log(-1, "[invalid] ");
 				break;
 			}
 			authproto = (p[2] << 8) + p[3];
 			if (authproto == PPP_CHAP && p[1] != 5) {
 				if (debug)
 					log(-1, "[invalid chap len] ");
 				break;
 			}
 			if (sp->myauth.proto == 0) {
 				/* we are not configured to do auth */
 				if (debug)
 					log(-1, "[not configured] ");
 				break;
 			}
 			/*
 			 * Remote want us to authenticate, remember this,
 			 * so we stay in PHASE_AUTHENTICATE after LCP got
 			 * up.
 			 */
 			sp->pp_flags |= PP_NEEDAUTH;
 			continue;
 		default:
 			/* Others not supported. */
 			if (debug)
 				log(-1, "[rej] ");
 			break;
 		}
 		/* Add the option to rejected list. */
 		bcopy (p, r, p[1]);
 		r += p[1];
 		rlen += p[1];
 	}
 	if (rlen) {
 		if (debug)
 			log(-1, " send conf-rej\n");
 		sppp_cp_send (sp, PPP_LCP, CONF_REJ, h->ident, rlen, buf);
 		return 0;
 	} else if (debug)
 		log(-1, "\n");
 
 	/*
 	 * pass 2: check for option values that are unacceptable and
 	 * thus require to be nak'ed.
 	 */
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "lcp parse opt values: ",
 		    SPP_ARGS(ifp));
 
 	p = (void*) (h+1);
 	len = origlen;
 	for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1];
 	    len-=p[1], p+=p[1]) {
 		if (debug)
 			log(-1, " %s ", sppp_lcp_opt_name(*p));
 		switch (*p) {
 		case LCP_OPT_MAGIC:
 			/* Magic number -- extract. */
 			nmagic = (u_long)p[2] << 24 |
 				(u_long)p[3] << 16 | p[4] << 8 | p[5];
 			if (nmagic != sp->lcp.magic) {
 				sp->pp_loopcnt = 0;
 				if (debug)
 					log(-1, "0x%lx ", nmagic);
 				continue;
 			}
 			if (debug && sp->pp_loopcnt < MAXALIVECNT*5)
 				log(-1, "[glitch] ");
 			++sp->pp_loopcnt;
 			/*
 			 * We negate our magic here, and NAK it.  If
 			 * we see it later in an NAK packet, we
 			 * suggest a new one.
 			 */
 			nmagic = ~sp->lcp.magic;
 			/* Gonna NAK it. */
 			p[2] = nmagic >> 24;
 			p[3] = nmagic >> 16;
 			p[4] = nmagic >> 8;
 			p[5] = nmagic;
 			break;
 
 		case LCP_OPT_ASYNC_MAP:
 			/*
 			 * Async control character map -- just ignore it.
 			 *
 			 * Quote from RFC 1662, chapter 6:
 			 * To enable this functionality, synchronous PPP
 			 * implementations MUST always respond to the
 			 * Async-Control-Character-Map Configuration
 			 * Option with the LCP Configure-Ack.  However,
 			 * acceptance of the Configuration Option does
 			 * not imply that the synchronous implementation
 			 * will do any ACCM mapping.  Instead, all such
 			 * octet mapping will be performed by the
 			 * asynchronous-to-synchronous converter.
 			 */
 			continue;
 
 		case LCP_OPT_MRU:
 			/*
 			 * Maximum receive unit.  Always agreeable,
 			 * but ignored by now.
 			 */
 			sp->lcp.their_mru = p[2] * 256 + p[3];
 			if (debug)
 				log(-1, "%lu ", sp->lcp.their_mru);
 			continue;
 
 		case LCP_OPT_AUTH_PROTO:
 			authproto = (p[2] << 8) + p[3];
 			if (sp->myauth.proto != authproto) {
 				/* not agreed, nak */
 				if (debug)
 					log(-1, "[mine %s != his %s] ",
 					       sppp_proto_name(sp->hisauth.proto),
 					       sppp_proto_name(authproto));
 				p[2] = sp->myauth.proto >> 8;
 				p[3] = sp->myauth.proto;
 				break;
 			}
 			if (authproto == PPP_CHAP && p[4] != CHAP_MD5) {
 				if (debug)
 					log(-1, "[chap not MD5] ");
 				p[4] = CHAP_MD5;
 				break;
 			}
 			continue;
 		}
 		/* Add the option to nak'ed list. */
 		bcopy (p, r, p[1]);
 		r += p[1];
 		rlen += p[1];
 	}
 	if (rlen) {
 		/*
 		 * Local and remote magics equal -- loopback?
 		 */
 		if (sp->pp_loopcnt >= MAXALIVECNT*5) {
 			if (sp->pp_loopcnt == MAXALIVECNT*5)
 				printf (SPP_FMT "loopback\n",
 					SPP_ARGS(ifp));
 			if (ifp->if_flags & IFF_UP) {
 				if_down(ifp);
 				sppp_qflush(&sp->pp_cpq);
 				/* XXX ? */
 				lcp.Down(sp);
 				lcp.Up(sp);
 			}
 		} else if (!sp->pp_loopcnt &&
 			   ++sp->fail_counter[IDX_LCP] >= sp->lcp.max_failure) {
 			if (debug)
 				log(-1, " max_failure (%d) exceeded, "
 				       "send conf-rej\n",
 				       sp->lcp.max_failure);
 			sppp_cp_send(sp, PPP_LCP, CONF_REJ, h->ident, rlen, buf);
 		} else {
 			if (debug)
 				log(-1, " send conf-nak\n");
 			sppp_cp_send (sp, PPP_LCP, CONF_NAK, h->ident, rlen, buf);
 		}
 	} else {
 		if (debug)
 			log(-1, " send conf-ack\n");
 		sp->fail_counter[IDX_LCP] = 0;
 		sp->pp_loopcnt = 0;
 		sppp_cp_send (sp, PPP_LCP, CONF_ACK,
 			      h->ident, origlen, h+1);
 	}
 
 	free (buf, M_TEMP);
 	return (rlen == 0);
 }
 
 /*
  * Analyze the LCP Configure-Reject option list, and adjust our
  * negotiation.
  */
 static void
 sppp_lcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len)
 {
 	STDDCL;
 	u_char *buf, *p;
 
 	len -= 4;
 	buf = malloc (len, M_TEMP, M_NOWAIT);
 	if (!buf)
 		return;
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "lcp rej opts: ",
 		    SPP_ARGS(ifp));
 
 	p = (void*) (h+1);
 	for (; len >= 2 && p[1] >= 2 && len >= p[1];
 	    len -= p[1], p += p[1]) {
 		if (debug)
 			log(-1, " %s ", sppp_lcp_opt_name(*p));
 		switch (*p) {
 		case LCP_OPT_MAGIC:
 			/* Magic number -- can't use it, use 0 */
 			sp->lcp.opts &= ~(1 << LCP_OPT_MAGIC);
 			sp->lcp.magic = 0;
 			break;
 		case LCP_OPT_MRU:
 			/*
 			 * Should not be rejected anyway, since we only
 			 * negotiate a MRU if explicitly requested by
 			 * peer.
 			 */
 			sp->lcp.opts &= ~(1 << LCP_OPT_MRU);
 			break;
 		case LCP_OPT_AUTH_PROTO:
 			/*
 			 * Peer doesn't want to authenticate himself,
 			 * deny unless this is a dialout call, and
 			 * AUTHFLAG_NOCALLOUT is set.
 			 */
 			if ((sp->pp_flags & PP_CALLIN) == 0 &&
 			    (sp->hisauth.flags & AUTHFLAG_NOCALLOUT) != 0) {
 				if (debug)
 					log(-1, "[don't insist on auth "
 					       "for callout]");
 				sp->lcp.opts &= ~(1 << LCP_OPT_AUTH_PROTO);
 				break;
 			}
 			if (debug)
 				log(-1, "[access denied]\n");
 			lcp.Close(sp);
 			break;
 		}
 	}
 	if (debug)
 		log(-1, "\n");
 	free (buf, M_TEMP);
 	return;
 }
 
 /*
  * Analyze the LCP Configure-NAK option list, and adjust our
  * negotiation.
  */
 static void
 sppp_lcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len)
 {
 	STDDCL;
 	u_char *buf, *p;
 	u_long magic;
 
 	len -= 4;
 	buf = malloc (len, M_TEMP, M_NOWAIT);
 	if (!buf)
 		return;
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "lcp nak opts: ",
 		    SPP_ARGS(ifp));
 
 	p = (void*) (h+1);
 	for (; len >= 2 && p[1] >= 2 && len >= p[1];
 	    len -= p[1], p += p[1]) {
 		if (debug)
 			log(-1, " %s ", sppp_lcp_opt_name(*p));
 		switch (*p) {
 		case LCP_OPT_MAGIC:
 			/* Magic number -- renegotiate */
 			if ((sp->lcp.opts & (1 << LCP_OPT_MAGIC)) &&
 			    len >= 6 && p[1] == 6) {
 				magic = (u_long)p[2] << 24 |
 					(u_long)p[3] << 16 | p[4] << 8 | p[5];
 				/*
 				 * If the remote magic is our negated one,
 				 * this looks like a loopback problem.
 				 * Suggest a new magic to make sure.
 				 */
 				if (magic == ~sp->lcp.magic) {
 					if (debug)
 						log(-1, "magic glitch ");
 					sp->lcp.magic = random();
 				} else {
 					sp->lcp.magic = magic;
 					if (debug)
 						log(-1, "%lu ", magic);
 				}
 			}
 			break;
 		case LCP_OPT_MRU:
 			/*
 			 * Peer wants to advise us to negotiate an MRU.
 			 * Agree on it if it's reasonable, or use
 			 * default otherwise.
 			 */
 			if (len >= 4 && p[1] == 4) {
 				u_int mru = p[2] * 256 + p[3];
 				if (debug)
 					log(-1, "%d ", mru);
 				if (mru < PP_MTU || mru > PP_MAX_MRU)
 					mru = PP_MTU;
 				sp->lcp.mru = mru;
 				sp->lcp.opts |= (1 << LCP_OPT_MRU);
 			}
 			break;
 		case LCP_OPT_AUTH_PROTO:
 			/*
 			 * Peer doesn't like our authentication method,
 			 * deny.
 			 */
 			if (debug)
 				log(-1, "[access denied]\n");
 			lcp.Close(sp);
 			break;
 		}
 	}
 	if (debug)
 		log(-1, "\n");
 	free (buf, M_TEMP);
 	return;
 }
 
 static void
 sppp_lcp_tlu(struct sppp *sp)
 {
 	STDDCL;
 	int i;
 	u_long mask;
 
 	/* XXX ? */
 	if (! (ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 		/* Coming out of loopback mode. */
 		if_up(ifp);
 		printf (SPP_FMT "up\n", SPP_ARGS(ifp));
 	}
 
 	for (i = 0; i < IDX_COUNT; i++)
 		if ((cps[i])->flags & CP_QUAL)
 			(cps[i])->Open(sp);
 
 	if ((sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) != 0 ||
 	    (sp->pp_flags & PP_NEEDAUTH) != 0)
 		sp->pp_phase = PHASE_AUTHENTICATE;
 	else
 		sp->pp_phase = PHASE_NETWORK;
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp),
 		    sppp_phase_name(sp->pp_phase));
 
 	/*
 	 * Open all authentication protocols.  This is even required
 	 * if we already proceeded to network phase, since it might be
 	 * that remote wants us to authenticate, so we might have to
 	 * send a PAP request.  Undesired authentication protocols
 	 * don't do anything when they get an Open event.
 	 */
 	for (i = 0; i < IDX_COUNT; i++)
 		if ((cps[i])->flags & CP_AUTH)
 			(cps[i])->Open(sp);
 
 	if (sp->pp_phase == PHASE_NETWORK) {
 		/* Notify all NCPs. */
 		for (i = 0; i < IDX_COUNT; i++)
 			if (((cps[i])->flags & CP_NCP) &&
 			    /*
 			     * XXX
 			     * Hack to administratively disable IPv6 if
 			     * not desired.  Perhaps we should have another
 			     * flag for this, but right now, we can make
 			     * all struct cp's read/only.
 			     */
 			    (cps[i] != &ipv6cp ||
 			     (sp->confflags & CONF_ENABLE_IPV6)))
 				(cps[i])->Open(sp);
 	}
 
 	/* Send Up events to all started protos. */
 	for (i = 0, mask = 1; i < IDX_COUNT; i++, mask <<= 1)
 		if ((sp->lcp.protos & mask) && ((cps[i])->flags & CP_LCP) == 0)
 			(cps[i])->Up(sp);
 
 	/* notify low-level driver of state change */
 	if (sp->pp_chg)
 		sp->pp_chg(sp, (int)sp->pp_phase);
 
 	if (sp->pp_phase == PHASE_NETWORK)
 		/* if no NCP is starting, close down */
 		sppp_lcp_check_and_close(sp);
 }
 
 static void
 sppp_lcp_tld(struct sppp *sp)
 {
 	STDDCL;
 	int i;
 	u_long mask;
 
 	sp->pp_phase = PHASE_TERMINATE;
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp),
 		    sppp_phase_name(sp->pp_phase));
 
 	/*
 	 * Take upper layers down.  We send the Down event first and
 	 * the Close second to prevent the upper layers from sending
 	 * ``a flurry of terminate-request packets'', as the RFC
 	 * describes it.
 	 */
 	for (i = 0, mask = 1; i < IDX_COUNT; i++, mask <<= 1)
 		if ((sp->lcp.protos & mask) && ((cps[i])->flags & CP_LCP) == 0) {
 			(cps[i])->Down(sp);
 			(cps[i])->Close(sp);
 		}
 }
 
 static void
 sppp_lcp_tls(struct sppp *sp)
 {
 	STDDCL;
 
 	sp->pp_phase = PHASE_ESTABLISH;
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp),
 		    sppp_phase_name(sp->pp_phase));
 
 	/* Notify lower layer if desired. */
 	if (sp->pp_tls)
 		(sp->pp_tls)(sp);
 	else
 		(sp->pp_up)(sp);
 }
 
 static void
 sppp_lcp_tlf(struct sppp *sp)
 {
 	STDDCL;
 
 	sp->pp_phase = PHASE_DEAD;
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp),
 		    sppp_phase_name(sp->pp_phase));
 
 	/* Notify lower layer if desired. */
 	if (sp->pp_tlf)
 		(sp->pp_tlf)(sp);
 	else
 		(sp->pp_down)(sp);
 }
 
 static void
 sppp_lcp_scr(struct sppp *sp)
 {
 	char opt[6 /* magicnum */ + 4 /* mru */ + 5 /* chap */];
 	int i = 0;
 	u_short authproto;
 
 	if (sp->lcp.opts & (1 << LCP_OPT_MAGIC)) {
 		if (! sp->lcp.magic)
 			sp->lcp.magic = random();
 		opt[i++] = LCP_OPT_MAGIC;
 		opt[i++] = 6;
 		opt[i++] = sp->lcp.magic >> 24;
 		opt[i++] = sp->lcp.magic >> 16;
 		opt[i++] = sp->lcp.magic >> 8;
 		opt[i++] = sp->lcp.magic;
 	}
 
 	if (sp->lcp.opts & (1 << LCP_OPT_MRU)) {
 		opt[i++] = LCP_OPT_MRU;
 		opt[i++] = 4;
 		opt[i++] = sp->lcp.mru >> 8;
 		opt[i++] = sp->lcp.mru;
 	}
 
 	if (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) {
 		authproto = sp->hisauth.proto;
 		opt[i++] = LCP_OPT_AUTH_PROTO;
 		opt[i++] = authproto == PPP_CHAP? 5: 4;
 		opt[i++] = authproto >> 8;
 		opt[i++] = authproto;
 		if (authproto == PPP_CHAP)
 			opt[i++] = CHAP_MD5;
 	}
 
 	sp->confid[IDX_LCP] = ++sp->pp_seq[IDX_LCP];
 	sppp_cp_send (sp, PPP_LCP, CONF_REQ, sp->confid[IDX_LCP], i, &opt);
 }
 
 /*
  * Check the open NCPs, return true if at least one NCP is open.
  */
 static int
 sppp_ncp_check(struct sppp *sp)
 {
 	int i, mask;
 
 	for (i = 0, mask = 1; i < IDX_COUNT; i++, mask <<= 1)
 		if ((sp->lcp.protos & mask) && (cps[i])->flags & CP_NCP)
 			return 1;
 	return 0;
 }
 
 /*
  * Re-check the open NCPs and see if we should terminate the link.
  * Called by the NCPs during their tlf action handling.
  */
 static void
 sppp_lcp_check_and_close(struct sppp *sp)
 {
 
 	if (sp->pp_phase < PHASE_NETWORK)
 		/* don't bother, we are already going down */
 		return;
 
 	if (sppp_ncp_check(sp))
 		return;
 
 	lcp.Close(sp);
 }
 
 /*
  *--------------------------------------------------------------------------*
  *                                                                          *
  *                        The IPCP implementation.                          *
  *                                                                          *
  *--------------------------------------------------------------------------*
  */
 
 #ifdef INET
 static void
 sppp_ipcp_init(struct sppp *sp)
 {
 	sp->ipcp.opts = 0;
 	sp->ipcp.flags = 0;
 	sp->state[IDX_IPCP] = STATE_INITIAL;
 	sp->fail_counter[IDX_IPCP] = 0;
 	sp->pp_seq[IDX_IPCP] = 0;
 	sp->pp_rseq[IDX_IPCP] = 0;
  	callout_init(&sp->ch[IDX_IPCP], 1);
 }
 
 static void
 sppp_ipcp_up(struct sppp *sp)
 {
 	sppp_up_event(&ipcp, sp);
 }
 
 static void
 sppp_ipcp_down(struct sppp *sp)
 {
 	sppp_down_event(&ipcp, sp);
 }
 
 static void
 sppp_ipcp_open(struct sppp *sp)
 {
 	STDDCL;
 	u_long myaddr, hisaddr;
 
 	sp->ipcp.flags &= ~(IPCP_HISADDR_SEEN | IPCP_MYADDR_SEEN |
 			    IPCP_MYADDR_DYN | IPCP_VJ);
 	sp->ipcp.opts = 0;
 
 	sppp_get_ip_addrs(sp, &myaddr, &hisaddr, 0);
 	/*
 	 * If we don't have his address, this probably means our
 	 * interface doesn't want to talk IP at all.  (This could
 	 * be the case if somebody wants to speak only IPX, for
 	 * example.)  Don't open IPCP in this case.
 	 */
 	if (hisaddr == 0L) {
 		/* XXX this message should go away */
 		if (debug)
 			log(LOG_DEBUG, SPP_FMT "ipcp_open(): no IP interface\n",
 			    SPP_ARGS(ifp));
 		return;
 	}
 	if (myaddr == 0L) {
 		/*
 		 * I don't have an assigned address, so i need to
 		 * negotiate my address.
 		 */
 		sp->ipcp.flags |= IPCP_MYADDR_DYN;
 		sp->ipcp.opts |= (1 << IPCP_OPT_ADDRESS);
 	} else
 		sp->ipcp.flags |= IPCP_MYADDR_SEEN;
 	if (sp->confflags & CONF_ENABLE_VJ) {
 		sp->ipcp.opts |= (1 << IPCP_OPT_COMPRESSION);
 		sp->ipcp.max_state = MAX_STATES - 1;
 		sp->ipcp.compress_cid = 1;
 	}
 	sppp_open_event(&ipcp, sp);
 }
 
 static void
 sppp_ipcp_close(struct sppp *sp)
 {
 	sppp_close_event(&ipcp, sp);
 	if (sp->ipcp.flags & IPCP_MYADDR_DYN)
 		/*
 		 * My address was dynamic, clear it again.
 		 */
 		sppp_set_ip_addr(sp, 0L);
 }
 
 static void
 sppp_ipcp_TO(void *cookie)
 {
 	sppp_to_event(&ipcp, (struct sppp *)cookie);
 }
 
 /*
  * Analyze a configure request.  Return true if it was agreeable, and
  * caused action sca, false if it has been rejected or nak'ed, and
  * caused action scn.  (The return value is used to make the state
  * transition decision in the state automaton.)
  */
 static int
 sppp_ipcp_RCR(struct sppp *sp, struct lcp_header *h, int len)
 {
 	u_char *buf, *r, *p;
 	struct ifnet *ifp = SP2IFP(sp);
 	int rlen, origlen, debug = ifp->if_flags & IFF_DEBUG;
 	u_long hisaddr, desiredaddr;
 	int gotmyaddr = 0;
 	int desiredcomp;
 
 	len -= 4;
 	origlen = len;
 	/*
 	 * Make sure to allocate a buf that can at least hold a
 	 * conf-nak with an `address' option.  We might need it below.
 	 */
 	buf = r = malloc ((len < 6? 6: len), M_TEMP, M_NOWAIT);
 	if (! buf)
 		return (0);
 
 	/* pass 1: see if we can recognize them */
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "ipcp parse opts: ",
 		    SPP_ARGS(ifp));
 	p = (void*) (h+1);
 	for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1];
 	    len-=p[1], p+=p[1]) {
 		if (debug)
 			log(-1, " %s ", sppp_ipcp_opt_name(*p));
 		switch (*p) {
 		case IPCP_OPT_COMPRESSION:
 			if (!(sp->confflags & CONF_ENABLE_VJ)) {
 				/* VJ compression administratively disabled */
 				if (debug)
 					log(-1, "[locally disabled] ");
 				break;
 			}
 			/*
 			 * In theory, we should only conf-rej an
 			 * option that is shorter than RFC 1618
 			 * requires (i.e. < 4), and should conf-nak
 			 * anything else that is not VJ.  However,
 			 * since our algorithm always uses the
 			 * original option to NAK it with new values,
 			 * things would become more complicated.  In
 			 * practice, the only commonly implemented IP
 			 * compression option is VJ anyway, so the
 			 * difference is negligible.
 			 */
 			if (len >= 6 && p[1] == 6) {
 				/*
 				 * correctly formed compression option
 				 * that could be VJ compression
 				 */
 				continue;
 			}
 			if (debug)
 				log(-1,
 				    "optlen %d [invalid/unsupported] ",
 				    p[1]);
 			break;
 		case IPCP_OPT_ADDRESS:
 			if (len >= 6 && p[1] == 6) {
 				/* correctly formed address option */
 				continue;
 			}
 			if (debug)
 				log(-1, "[invalid] ");
 			break;
 		default:
 			/* Others not supported. */
 			if (debug)
 				log(-1, "[rej] ");
 			break;
 		}
 		/* Add the option to rejected list. */
 		bcopy (p, r, p[1]);
 		r += p[1];
 		rlen += p[1];
 	}
 	if (rlen) {
 		if (debug)
 			log(-1, " send conf-rej\n");
 		sppp_cp_send (sp, PPP_IPCP, CONF_REJ, h->ident, rlen, buf);
 		return 0;
 	} else if (debug)
 		log(-1, "\n");
 
 	/* pass 2: parse option values */
 	sppp_get_ip_addrs(sp, 0, &hisaddr, 0);
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "ipcp parse opt values: ",
 		       SPP_ARGS(ifp));
 	p = (void*) (h+1);
 	len = origlen;
 	for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1];
 	    len-=p[1], p+=p[1]) {
 		if (debug)
 			log(-1, " %s ", sppp_ipcp_opt_name(*p));
 		switch (*p) {
 		case IPCP_OPT_COMPRESSION:
 			desiredcomp = p[2] << 8 | p[3];
 			/* We only support VJ */
 			if (desiredcomp == IPCP_COMP_VJ) {
 				if (debug)
 					log(-1, "VJ [ack] ");
 				sp->ipcp.flags |= IPCP_VJ;
 				sl_compress_init(sp->pp_comp, p[4]);
 				sp->ipcp.max_state = p[4];
 				sp->ipcp.compress_cid = p[5];
 				continue;
 			}
 			if (debug)
 				log(-1,
 				    "compproto %#04x [not supported] ",
 				    desiredcomp);
 			p[2] = IPCP_COMP_VJ >> 8;
 			p[3] = IPCP_COMP_VJ;
 			p[4] = sp->ipcp.max_state;
 			p[5] = sp->ipcp.compress_cid;
 			break;
 		case IPCP_OPT_ADDRESS:
 			/* This is the address he wants in his end */
 			desiredaddr = p[2] << 24 | p[3] << 16 |
 				p[4] << 8 | p[5];
 			if (desiredaddr == hisaddr ||
 			    (hisaddr >= 1 && hisaddr <= 254 && desiredaddr != 0)) {
 				/*
 				 * Peer's address is same as our value,
 				 * or we have set it to 0.0.0.* to
 				 * indicate that we do not really care,
 				 * this is agreeable.  Gonna conf-ack
 				 * it.
 				 */
 				if (debug)
 					log(-1, "%s [ack] ",
 						sppp_dotted_quad(hisaddr));
 				/* record that we've seen it already */
 				sp->ipcp.flags |= IPCP_HISADDR_SEEN;
 				continue;
 			}
 			/*
 			 * The address wasn't agreeable.  This is either
 			 * he sent us 0.0.0.0, asking to assign him an
 			 * address, or he send us another address not
 			 * matching our value.  Either case, we gonna
 			 * conf-nak it with our value.
 			 * XXX: we should "rej" if hisaddr == 0
 			 */
 			if (debug) {
 				if (desiredaddr == 0)
 					log(-1, "[addr requested] ");
 				else
 					log(-1, "%s [not agreed] ",
 						sppp_dotted_quad(desiredaddr));
 			}
 			p[2] = hisaddr >> 24;
 			p[3] = hisaddr >> 16;
 			p[4] = hisaddr >> 8;
 			p[5] = hisaddr;
 			break;
 		}
 		/* Add the option to nak'ed list. */
 		bcopy (p, r, p[1]);
 		r += p[1];
 		rlen += p[1];
 	}
 
 	/*
 	 * If we are about to conf-ack the request, but haven't seen
 	 * his address so far, gonna conf-nak it instead, with the
 	 * `address' option present and our idea of his address being
 	 * filled in there, to request negotiation of both addresses.
 	 *
 	 * XXX This can result in an endless req - nak loop if peer
 	 * doesn't want to send us his address.  Q: What should we do
 	 * about it?  XXX  A: implement the max-failure counter.
 	 */
 	if (rlen == 0 && !(sp->ipcp.flags & IPCP_HISADDR_SEEN) && !gotmyaddr) {
 		buf[0] = IPCP_OPT_ADDRESS;
 		buf[1] = 6;
 		buf[2] = hisaddr >> 24;
 		buf[3] = hisaddr >> 16;
 		buf[4] = hisaddr >> 8;
 		buf[5] = hisaddr;
 		rlen = 6;
 		if (debug)
 			log(-1, "still need hisaddr ");
 	}
 
 	if (rlen) {
 		if (debug)
 			log(-1, " send conf-nak\n");
 		sppp_cp_send (sp, PPP_IPCP, CONF_NAK, h->ident, rlen, buf);
 	} else {
 		if (debug)
 			log(-1, " send conf-ack\n");
 		sppp_cp_send (sp, PPP_IPCP, CONF_ACK,
 			      h->ident, origlen, h+1);
 	}
 
 	free (buf, M_TEMP);
 	return (rlen == 0);
 }
 
 /*
  * Analyze the IPCP Configure-Reject option list, and adjust our
  * negotiation.
  */
 static void
 sppp_ipcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len)
 {
 	u_char *buf, *p;
 	struct ifnet *ifp = SP2IFP(sp);
 	int debug = ifp->if_flags & IFF_DEBUG;
 
 	len -= 4;
 	buf = malloc (len, M_TEMP, M_NOWAIT);
 	if (!buf)
 		return;
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "ipcp rej opts: ",
 		    SPP_ARGS(ifp));
 
 	p = (void*) (h+1);
 	for (; len >= 2 && p[1] >= 2 && len >= p[1];
 	    len -= p[1], p += p[1]) {
 		if (debug)
 			log(-1, " %s ", sppp_ipcp_opt_name(*p));
 		switch (*p) {
 		case IPCP_OPT_COMPRESSION:
 			sp->ipcp.opts &= ~(1 << IPCP_OPT_COMPRESSION);
 			break;
 		case IPCP_OPT_ADDRESS:
 			/*
 			 * Peer doesn't grok address option.  This is
 			 * bad.  XXX  Should we better give up here?
 			 * XXX We could try old "addresses" option...
 			 */
 			sp->ipcp.opts &= ~(1 << IPCP_OPT_ADDRESS);
 			break;
 		}
 	}
 	if (debug)
 		log(-1, "\n");
 	free (buf, M_TEMP);
 	return;
 }
 
 /*
  * Analyze the IPCP Configure-NAK option list, and adjust our
  * negotiation.
  */
 static void
 sppp_ipcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len)
 {
 	u_char *buf, *p;
 	struct ifnet *ifp = SP2IFP(sp);
 	int debug = ifp->if_flags & IFF_DEBUG;
 	int desiredcomp;
 	u_long wantaddr;
 
 	len -= 4;
 	buf = malloc (len, M_TEMP, M_NOWAIT);
 	if (!buf)
 		return;
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "ipcp nak opts: ",
 		    SPP_ARGS(ifp));
 
 	p = (void*) (h+1);
 	for (; len >= 2 && p[1] >= 2 && len >= p[1];
 	    len -= p[1], p += p[1]) {
 		if (debug)
 			log(-1, " %s ", sppp_ipcp_opt_name(*p));
 		switch (*p) {
 		case IPCP_OPT_COMPRESSION:
 			if (len >= 6 && p[1] == 6) {
 				desiredcomp = p[2] << 8 | p[3];
 				if (debug)
 					log(-1, "[wantcomp %#04x] ",
 						desiredcomp);
 				if (desiredcomp == IPCP_COMP_VJ) {
 					sl_compress_init(sp->pp_comp, p[4]);
 					sp->ipcp.max_state = p[4];
 					sp->ipcp.compress_cid = p[5];
 					if (debug)
 						log(-1, "[agree] ");
 				} else
 					sp->ipcp.opts &=
 						~(1 << IPCP_OPT_COMPRESSION);
 			}
 			break;
 		case IPCP_OPT_ADDRESS:
 			/*
 			 * Peer doesn't like our local IP address.  See
 			 * if we can do something for him.  We'll drop
 			 * him our address then.
 			 */
 			if (len >= 6 && p[1] == 6) {
 				wantaddr = p[2] << 24 | p[3] << 16 |
 					p[4] << 8 | p[5];
 				sp->ipcp.opts |= (1 << IPCP_OPT_ADDRESS);
 				if (debug)
 					log(-1, "[wantaddr %s] ",
 					       sppp_dotted_quad(wantaddr));
 				/*
 				 * When doing dynamic address assignment,
 				 * we accept his offer.  Otherwise, we
 				 * ignore it and thus continue to negotiate
 				 * our already existing value.
 			 	 * XXX: Bogus, if he said no once, he'll
 				 * just say no again, might as well die.
 				 */
 				if (sp->ipcp.flags & IPCP_MYADDR_DYN) {
 					sppp_set_ip_addr(sp, wantaddr);
 					if (debug)
 						log(-1, "[agree] ");
 					sp->ipcp.flags |= IPCP_MYADDR_SEEN;
 				}
 			}
 			break;
 		}
 	}
 	if (debug)
 		log(-1, "\n");
 	free (buf, M_TEMP);
 	return;
 }
 
 static void
 sppp_ipcp_tlu(struct sppp *sp)
 {
 	/* we are up - notify isdn daemon */
 	if (sp->pp_con)
 		sp->pp_con(sp);
 }
 
 static void
 sppp_ipcp_tld(struct sppp *sp)
 {
 }
 
 static void
 sppp_ipcp_tls(struct sppp *sp)
 {
 	/* indicate to LCP that it must stay alive */
 	sp->lcp.protos |= (1 << IDX_IPCP);
 }
 
 static void
 sppp_ipcp_tlf(struct sppp *sp)
 {
 	/* we no longer need LCP */
 	sp->lcp.protos &= ~(1 << IDX_IPCP);
 	sppp_lcp_check_and_close(sp);
 }
 
 static void
 sppp_ipcp_scr(struct sppp *sp)
 {
 	char opt[6 /* compression */ + 6 /* address */];
 	u_long ouraddr;
 	int i = 0;
 
 	if (sp->ipcp.opts & (1 << IPCP_OPT_COMPRESSION)) {
 		opt[i++] = IPCP_OPT_COMPRESSION;
 		opt[i++] = 6;
 		opt[i++] = IPCP_COMP_VJ >> 8;
 		opt[i++] = IPCP_COMP_VJ;
 		opt[i++] = sp->ipcp.max_state;
 		opt[i++] = sp->ipcp.compress_cid;
 	}
 	if (sp->ipcp.opts & (1 << IPCP_OPT_ADDRESS)) {
 		sppp_get_ip_addrs(sp, &ouraddr, 0, 0);
 		opt[i++] = IPCP_OPT_ADDRESS;
 		opt[i++] = 6;
 		opt[i++] = ouraddr >> 24;
 		opt[i++] = ouraddr >> 16;
 		opt[i++] = ouraddr >> 8;
 		opt[i++] = ouraddr;
 	}
 
 	sp->confid[IDX_IPCP] = ++sp->pp_seq[IDX_IPCP];
 	sppp_cp_send(sp, PPP_IPCP, CONF_REQ, sp->confid[IDX_IPCP], i, &opt);
 }
 #else /* !INET */
 static void
 sppp_ipcp_init(struct sppp *sp)
 {
 }
 
 static void
 sppp_ipcp_up(struct sppp *sp)
 {
 }
 
 static void
 sppp_ipcp_down(struct sppp *sp)
 {
 }
 
 static void
 sppp_ipcp_open(struct sppp *sp)
 {
 }
 
 static void
 sppp_ipcp_close(struct sppp *sp)
 {
 }
 
 static void
 sppp_ipcp_TO(void *cookie)
 {
 }
 
 static int
 sppp_ipcp_RCR(struct sppp *sp, struct lcp_header *h, int len)
 {
 	return (0);
 }
 
 static void
 sppp_ipcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len)
 {
 }
 
 static void
 sppp_ipcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len)
 {
 }
 
 static void
 sppp_ipcp_tlu(struct sppp *sp)
 {
 }
 
 static void
 sppp_ipcp_tld(struct sppp *sp)
 {
 }
 
 static void
 sppp_ipcp_tls(struct sppp *sp)
 {
 }
 
 static void
 sppp_ipcp_tlf(struct sppp *sp)
 {
 }
 
 static void
 sppp_ipcp_scr(struct sppp *sp)
 {
 }
 #endif
 
 /*
  *--------------------------------------------------------------------------*
  *                                                                          *
  *                      The IPv6CP implementation.                          *
  *                                                                          *
  *--------------------------------------------------------------------------*
  */
 
 #ifdef INET6
 static void
 sppp_ipv6cp_init(struct sppp *sp)
 {
 	sp->ipv6cp.opts = 0;
 	sp->ipv6cp.flags = 0;
 	sp->state[IDX_IPV6CP] = STATE_INITIAL;
 	sp->fail_counter[IDX_IPV6CP] = 0;
 	sp->pp_seq[IDX_IPV6CP] = 0;
 	sp->pp_rseq[IDX_IPV6CP] = 0;
  	callout_init(&sp->ch[IDX_IPV6CP], 1);
 }
 
 static void
 sppp_ipv6cp_up(struct sppp *sp)
 {
 	sppp_up_event(&ipv6cp, sp);
 }
 
 static void
 sppp_ipv6cp_down(struct sppp *sp)
 {
 	sppp_down_event(&ipv6cp, sp);
 }
 
 static void
 sppp_ipv6cp_open(struct sppp *sp)
 {
 	STDDCL;
 	struct in6_addr myaddr, hisaddr;
 
 #ifdef IPV6CP_MYIFID_DYN
 	sp->ipv6cp.flags &= ~(IPV6CP_MYIFID_SEEN|IPV6CP_MYIFID_DYN);
 #else
 	sp->ipv6cp.flags &= ~IPV6CP_MYIFID_SEEN;
 #endif
 
 	sppp_get_ip6_addrs(sp, &myaddr, &hisaddr, 0);
 	/*
 	 * If we don't have our address, this probably means our
 	 * interface doesn't want to talk IPv6 at all.  (This could
 	 * be the case if somebody wants to speak only IPX, for
 	 * example.)  Don't open IPv6CP in this case.
 	 */
 	if (IN6_IS_ADDR_UNSPECIFIED(&myaddr)) {
 		/* XXX this message should go away */
 		if (debug)
 			log(LOG_DEBUG, SPP_FMT "ipv6cp_open(): no IPv6 interface\n",
 			    SPP_ARGS(ifp));
 		return;
 	}
 
 	sp->ipv6cp.flags |= IPV6CP_MYIFID_SEEN;
 	sp->ipv6cp.opts |= (1 << IPV6CP_OPT_IFID);
 	sppp_open_event(&ipv6cp, sp);
 }
 
 static void
 sppp_ipv6cp_close(struct sppp *sp)
 {
 	sppp_close_event(&ipv6cp, sp);
 }
 
 static void
 sppp_ipv6cp_TO(void *cookie)
 {
 	sppp_to_event(&ipv6cp, (struct sppp *)cookie);
 }
 
 /*
  * Analyze a configure request.  Return true if it was agreeable, and
  * caused action sca, false if it has been rejected or nak'ed, and
  * caused action scn.  (The return value is used to make the state
  * transition decision in the state automaton.)
  */
 static int
 sppp_ipv6cp_RCR(struct sppp *sp, struct lcp_header *h, int len)
 {
 	u_char *buf, *r, *p;
 	struct ifnet *ifp = SP2IFP(sp);
 	int rlen, origlen, debug = ifp->if_flags & IFF_DEBUG;
 	struct in6_addr myaddr, desiredaddr, suggestaddr;
 	int ifidcount;
 	int type;
 	int collision, nohisaddr;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	len -= 4;
 	origlen = len;
 	/*
 	 * Make sure to allocate a buf that can at least hold a
 	 * conf-nak with an `address' option.  We might need it below.
 	 */
 	buf = r = malloc ((len < 6? 6: len), M_TEMP, M_NOWAIT);
 	if (! buf)
 		return (0);
 
 	/* pass 1: see if we can recognize them */
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "ipv6cp parse opts:",
 		    SPP_ARGS(ifp));
 	p = (void*) (h+1);
 	ifidcount = 0;
 	for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1];
 	    len-=p[1], p+=p[1]) {
 		if (debug)
 			log(-1, " %s", sppp_ipv6cp_opt_name(*p));
 		switch (*p) {
 		case IPV6CP_OPT_IFID:
 			if (len >= 10 && p[1] == 10 && ifidcount == 0) {
 				/* correctly formed address option */
 				ifidcount++;
 				continue;
 			}
 			if (debug)
 				log(-1, " [invalid]");
 			break;
 #ifdef notyet
 		case IPV6CP_OPT_COMPRESSION:
 			if (len >= 4 && p[1] >= 4) {
 				/* correctly formed compress option */
 				continue;
 			}
 			if (debug)
 				log(-1, " [invalid]");
 			break;
 #endif
 		default:
 			/* Others not supported. */
 			if (debug)
 				log(-1, " [rej]");
 			break;
 		}
 		/* Add the option to rejected list. */
 		bcopy (p, r, p[1]);
 		r += p[1];
 		rlen += p[1];
 	}
 	if (rlen) {
 		if (debug)
 			log(-1, " send conf-rej\n");
 		sppp_cp_send (sp, PPP_IPV6CP, CONF_REJ, h->ident, rlen, buf);
 		goto end;
 	} else if (debug)
 		log(-1, "\n");
 
 	/* pass 2: parse option values */
 	sppp_get_ip6_addrs(sp, &myaddr, 0, 0);
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "ipv6cp parse opt values: ",
 		    SPP_ARGS(ifp));
 	p = (void*) (h+1);
 	len = origlen;
 	type = CONF_ACK;
 	for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1];
 	    len-=p[1], p+=p[1]) {
 		if (debug)
 			log(-1, " %s", sppp_ipv6cp_opt_name(*p));
 		switch (*p) {
 #ifdef notyet
 		case IPV6CP_OPT_COMPRESSION:
 			continue;
 #endif
 		case IPV6CP_OPT_IFID:
 			bzero(&desiredaddr, sizeof(desiredaddr));
 			bcopy(&p[2], &desiredaddr.s6_addr[8], 8);
 			collision = (bcmp(&desiredaddr.s6_addr[8],
 					  &myaddr.s6_addr[8], 8) == 0);
 			nohisaddr = IN6_IS_ADDR_UNSPECIFIED(&desiredaddr);
 
 			desiredaddr.s6_addr16[0] = htons(0xfe80);
 			(void)in6_setscope(&desiredaddr, SP2IFP(sp), NULL);
 
 			if (!collision && !nohisaddr) {
 				/* no collision, hisaddr known - Conf-Ack */
 				type = CONF_ACK;
 
 				if (debug) {
 					log(-1, " %s [%s]",
 					    ip6_sprintf(ip6buf, &desiredaddr),
 					    sppp_cp_type_name(type));
 				}
 				continue;
 			}
 
 			bzero(&suggestaddr, sizeof(suggestaddr));
 			if (collision && nohisaddr) {
 				/* collision, hisaddr unknown - Conf-Rej */
 				type = CONF_REJ;
 				bzero(&p[2], 8);
 			} else {
 				/*
 				 * - no collision, hisaddr unknown, or
 				 * - collision, hisaddr known
 				 * Conf-Nak, suggest hisaddr
 				 */
 				type = CONF_NAK;
 				sppp_suggest_ip6_addr(sp, &suggestaddr);
 				bcopy(&suggestaddr.s6_addr[8], &p[2], 8);
 			}
 			if (debug)
 				log(-1, " %s [%s]",
 				    ip6_sprintf(ip6buf, &desiredaddr),
 				    sppp_cp_type_name(type));
 			break;
 		}
 		/* Add the option to nak'ed list. */
 		bcopy (p, r, p[1]);
 		r += p[1];
 		rlen += p[1];
 	}
 
 	if (rlen == 0 && type == CONF_ACK) {
 		if (debug)
 			log(-1, " send %s\n", sppp_cp_type_name(type));
 		sppp_cp_send (sp, PPP_IPV6CP, type, h->ident, origlen, h+1);
 	} else {
 #ifdef DIAGNOSTIC
 		if (type == CONF_ACK)
 			panic("IPv6CP RCR: CONF_ACK with non-zero rlen");
 #endif
 
 		if (debug) {
 			log(-1, " send %s suggest %s\n",
 			    sppp_cp_type_name(type),
 			    ip6_sprintf(ip6buf, &suggestaddr));
 		}
 		sppp_cp_send (sp, PPP_IPV6CP, type, h->ident, rlen, buf);
 	}
 
  end:
 	free (buf, M_TEMP);
 	return (rlen == 0);
 }
 
 /*
  * Analyze the IPv6CP Configure-Reject option list, and adjust our
  * negotiation.
  */
 static void
 sppp_ipv6cp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len)
 {
 	u_char *buf, *p;
 	struct ifnet *ifp = SP2IFP(sp);
 	int debug = ifp->if_flags & IFF_DEBUG;
 
 	len -= 4;
 	buf = malloc (len, M_TEMP, M_NOWAIT);
 	if (!buf)
 		return;
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "ipv6cp rej opts:",
 		    SPP_ARGS(ifp));
 
 	p = (void*) (h+1);
 	for (; len >= 2 && p[1] >= 2 && len >= p[1];
 	    len -= p[1], p += p[1]) {
 		if (debug)
 			log(-1, " %s", sppp_ipv6cp_opt_name(*p));
 		switch (*p) {
 		case IPV6CP_OPT_IFID:
 			/*
 			 * Peer doesn't grok address option.  This is
 			 * bad.  XXX  Should we better give up here?
 			 */
 			sp->ipv6cp.opts &= ~(1 << IPV6CP_OPT_IFID);
 			break;
 #ifdef notyet
 		case IPV6CP_OPT_COMPRESS:
 			sp->ipv6cp.opts &= ~(1 << IPV6CP_OPT_COMPRESS);
 			break;
 #endif
 		}
 	}
 	if (debug)
 		log(-1, "\n");
 	free (buf, M_TEMP);
 	return;
 }
 
 /*
  * Analyze the IPv6CP Configure-NAK option list, and adjust our
  * negotiation.
  */
 static void
 sppp_ipv6cp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len)
 {
 	u_char *buf, *p;
 	struct ifnet *ifp = SP2IFP(sp);
 	int debug = ifp->if_flags & IFF_DEBUG;
 	struct in6_addr suggestaddr;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	len -= 4;
 	buf = malloc (len, M_TEMP, M_NOWAIT);
 	if (!buf)
 		return;
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "ipv6cp nak opts:",
 		    SPP_ARGS(ifp));
 
 	p = (void*) (h+1);
 	for (; len >= 2 && p[1] >= 2 && len >= p[1];
 	    len -= p[1], p += p[1]) {
 		if (debug)
 			log(-1, " %s", sppp_ipv6cp_opt_name(*p));
 		switch (*p) {
 		case IPV6CP_OPT_IFID:
 			/*
 			 * Peer doesn't like our local ifid.  See
 			 * if we can do something for him.  We'll drop
 			 * him our address then.
 			 */
 			if (len < 10 || p[1] != 10)
 				break;
 			bzero(&suggestaddr, sizeof(suggestaddr));
 			suggestaddr.s6_addr16[0] = htons(0xfe80);
 			(void)in6_setscope(&suggestaddr, SP2IFP(sp), NULL);
 			bcopy(&p[2], &suggestaddr.s6_addr[8], 8);
 
 			sp->ipv6cp.opts |= (1 << IPV6CP_OPT_IFID);
 			if (debug)
 				log(-1, " [suggestaddr %s]",
 				       ip6_sprintf(ip6buf, &suggestaddr));
 #ifdef IPV6CP_MYIFID_DYN
 			/*
 			 * When doing dynamic address assignment,
 			 * we accept his offer.
 			 */
 			if (sp->ipv6cp.flags & IPV6CP_MYIFID_DYN) {
 				struct in6_addr lastsuggest;
 				/*
 				 * If <suggested myaddr from peer> equals to
 				 * <hisaddr we have suggested last time>,
 				 * we have a collision.  generate new random
 				 * ifid.
 				 */
 				sppp_suggest_ip6_addr(&lastsuggest);
 				if (IN6_ARE_ADDR_EQUAL(&suggestaddr,
 						       lastsuggest)) {
 					if (debug)
 						log(-1, " [random]");
 					sppp_gen_ip6_addr(sp, &suggestaddr);
 				}
 				sppp_set_ip6_addr(sp, &suggestaddr, 0);
 				if (debug)
 					log(-1, " [agree]");
 				sp->ipv6cp.flags |= IPV6CP_MYIFID_SEEN;
 			}
 #else
 			/*
 			 * Since we do not do dynamic address assignment,
 			 * we ignore it and thus continue to negotiate
 			 * our already existing value.  This can possibly
 			 * go into infinite request-reject loop.
 			 *
 			 * This is not likely because we normally use
 			 * ifid based on MAC-address.
 			 * If you have no ethernet card on the node, too bad.
 			 * XXX should we use fail_counter?
 			 */
 #endif
 			break;
 #ifdef notyet
 		case IPV6CP_OPT_COMPRESS:
 			/*
 			 * Peer wants different compression parameters.
 			 */
 			break;
 #endif
 		}
 	}
 	if (debug)
 		log(-1, "\n");
 	free (buf, M_TEMP);
 	return;
 }
 static void
 sppp_ipv6cp_tlu(struct sppp *sp)
 {
 	/* we are up - notify isdn daemon */
 	if (sp->pp_con)
 		sp->pp_con(sp);
 }
 
 static void
 sppp_ipv6cp_tld(struct sppp *sp)
 {
 }
 
 static void
 sppp_ipv6cp_tls(struct sppp *sp)
 {
 	/* indicate to LCP that it must stay alive */
 	sp->lcp.protos |= (1 << IDX_IPV6CP);
 }
 
 static void
 sppp_ipv6cp_tlf(struct sppp *sp)
 {
 
 #if 0	/* need #if 0 to close IPv6CP properly */
 	/* we no longer need LCP */
 	sp->lcp.protos &= ~(1 << IDX_IPV6CP);
 	sppp_lcp_check_and_close(sp);
 #endif
 }
 
 static void
 sppp_ipv6cp_scr(struct sppp *sp)
 {
 	char opt[10 /* ifid */ + 4 /* compression, minimum */];
 	struct in6_addr ouraddr;
 	int i = 0;
 
 	if (sp->ipv6cp.opts & (1 << IPV6CP_OPT_IFID)) {
 		sppp_get_ip6_addrs(sp, &ouraddr, 0, 0);
 		opt[i++] = IPV6CP_OPT_IFID;
 		opt[i++] = 10;
 		bcopy(&ouraddr.s6_addr[8], &opt[i], 8);
 		i += 8;
 	}
 
 #ifdef notyet
 	if (sp->ipv6cp.opts & (1 << IPV6CP_OPT_COMPRESSION)) {
 		opt[i++] = IPV6CP_OPT_COMPRESSION;
 		opt[i++] = 4;
 		opt[i++] = 0;   /* TBD */
 		opt[i++] = 0;   /* TBD */
 		/* variable length data may follow */
 	}
 #endif
 
 	sp->confid[IDX_IPV6CP] = ++sp->pp_seq[IDX_IPV6CP];
 	sppp_cp_send(sp, PPP_IPV6CP, CONF_REQ, sp->confid[IDX_IPV6CP], i, &opt);
 }
 #else /*INET6*/
 static void sppp_ipv6cp_init(struct sppp *sp)
 {
 }
 
 static void sppp_ipv6cp_up(struct sppp *sp)
 {
 }
 
 static void sppp_ipv6cp_down(struct sppp *sp)
 {
 }
 
 static void sppp_ipv6cp_open(struct sppp *sp)
 {
 }
 
 static void sppp_ipv6cp_close(struct sppp *sp)
 {
 }
 
 static void sppp_ipv6cp_TO(void *sp)
 {
 }
 
 static int sppp_ipv6cp_RCR(struct sppp *sp, struct lcp_header *h, int len)
 {
 	return 0;
 }
 
 static void sppp_ipv6cp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len)
 {
 }
 
 static void sppp_ipv6cp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len)
 {
 }
 
 static void sppp_ipv6cp_tlu(struct sppp *sp)
 {
 }
 
 static void sppp_ipv6cp_tld(struct sppp *sp)
 {
 }
 
 static void sppp_ipv6cp_tls(struct sppp *sp)
 {
 }
 
 static void sppp_ipv6cp_tlf(struct sppp *sp)
 {
 }
 
 static void sppp_ipv6cp_scr(struct sppp *sp)
 {
 }
 #endif /*INET6*/
 
 /*
  *--------------------------------------------------------------------------*
  *                                                                          *
  *                        The CHAP implementation.                          *
  *                                                                          *
  *--------------------------------------------------------------------------*
  */
 
 /*
  * The authentication protocols don't employ a full-fledged state machine as
  * the control protocols do, since they do have Open and Close events, but
  * not Up and Down, nor are they explicitly terminated.  Also, use of the
  * authentication protocols may be different in both directions (this makes
  * sense, think of a machine that never accepts incoming calls but only
  * calls out, it doesn't require the called party to authenticate itself).
  *
  * Our state machine for the local authentication protocol (we are requesting
  * the peer to authenticate) looks like:
  *
  *						    RCA-
  *	      +--------------------------------------------+
  *	      V					    scn,tld|
  *	  +--------+			       Close   +---------+ RCA+
  *	  |	   |<----------------------------------|	 |------+
  *   +--->| Closed |				TO*    | Opened	 | sca	|
  *   |	  |	   |-----+		       +-------|	 |<-----+
  *   |	  +--------+ irc |		       |       +---------+
  *   |	    ^		 |		       |	   ^
  *   |	    |		 |		       |	   |
  *   |	    |		 |		       |	   |
  *   |	 TO-|		 |		       |	   |
  *   |	    |tld  TO+	 V		       |	   |
  *   |	    |	+------->+		       |	   |
  *   |	    |	|	 |		       |	   |
  *   |	  +--------+	 V		       |	   |
  *   |	  |	   |<----+<--------------------+	   |
  *   |	  | Req-   | scr				   |
  *   |	  | Sent   |					   |
  *   |	  |	   |					   |
  *   |	  +--------+					   |
  *   | RCA- |	| RCA+					   |
  *   +------+	+------------------------------------------+
  *   scn,tld	  sca,irc,ict,tlu
  *
  *
  *   with:
  *
  *	Open:	LCP reached authentication phase
  *	Close:	LCP reached terminate phase
  *
  *	RCA+:	received reply (pap-req, chap-response), acceptable
  *	RCN:	received reply (pap-req, chap-response), not acceptable
  *	TO+:	timeout with restart counter >= 0
  *	TO-:	timeout with restart counter < 0
  *	TO*:	reschedule timeout for CHAP
  *
  *	scr:	send request packet (none for PAP, chap-challenge)
  *	sca:	send ack packet (pap-ack, chap-success)
  *	scn:	send nak packet (pap-nak, chap-failure)
  *	ict:	initialize re-challenge timer (CHAP only)
  *
  *	tlu:	this-layer-up, LCP reaches network phase
  *	tld:	this-layer-down, LCP enters terminate phase
  *
  * Note that in CHAP mode, after sending a new challenge, while the state
  * automaton falls back into Req-Sent state, it doesn't signal a tld
  * event to LCP, so LCP remains in network phase.  Only after not getting
  * any response (or after getting an unacceptable response), CHAP closes,
  * causing LCP to enter terminate phase.
  *
  * With PAP, there is no initial request that can be sent.  The peer is
  * expected to send one based on the successful negotiation of PAP as
  * the authentication protocol during the LCP option negotiation.
  *
  * Incoming authentication protocol requests (remote requests
  * authentication, we are peer) don't employ a state machine at all,
  * they are simply answered.  Some peers [Ascend P50 firmware rev
  * 4.50] react allergically when sending IPCP requests while they are
  * still in authentication phase (thereby violating the standard that
  * demands that these NCP packets are to be discarded), so we keep
  * track of the peer demanding us to authenticate, and only proceed to
  * phase network once we've seen a positive acknowledge for the
  * authentication.
  */
 
 /*
  * Handle incoming CHAP packets.
  */
 static void
 sppp_chap_input(struct sppp *sp, struct mbuf *m)
 {
 	STDDCL;
 	struct lcp_header *h;
 	int len;
 	u_char *value, *name, digest[AUTHKEYLEN], dsize;
 	int value_len, name_len;
 	MD5_CTX ctx;
 
 	len = m->m_pkthdr.len;
 	if (len < 4) {
 		if (debug)
 			log(LOG_DEBUG,
 			    SPP_FMT "chap invalid packet length: %d bytes\n",
 			    SPP_ARGS(ifp), len);
 		return;
 	}
 	h = mtod (m, struct lcp_header*);
 	if (len > ntohs (h->len))
 		len = ntohs (h->len);
 
 	switch (h->type) {
 	/* challenge, failure and success are his authproto */
 	case CHAP_CHALLENGE:
 		value = 1 + (u_char*)(h+1);
 		value_len = value[-1];
 		name = value + value_len;
 		name_len = len - value_len - 5;
 		if (name_len < 0) {
 			if (debug) {
 				log(LOG_DEBUG,
 				    SPP_FMT "chap corrupted challenge "
 				    "<%s id=0x%x len=%d",
 				    SPP_ARGS(ifp),
 				    sppp_auth_type_name(PPP_CHAP, h->type),
 				    h->ident, ntohs(h->len));
 				sppp_print_bytes((u_char*) (h+1), len-4);
 				log(-1, ">\n");
 			}
 			break;
 		}
 
 		if (debug) {
 			log(LOG_DEBUG,
 			    SPP_FMT "chap input <%s id=0x%x len=%d name=",
 			    SPP_ARGS(ifp),
 			    sppp_auth_type_name(PPP_CHAP, h->type), h->ident,
 			    ntohs(h->len));
 			sppp_print_string((char*) name, name_len);
 			log(-1, " value-size=%d value=", value_len);
 			sppp_print_bytes(value, value_len);
 			log(-1, ">\n");
 		}
 
 		/* Compute reply value. */
 		MD5Init(&ctx);
 		MD5Update(&ctx, &h->ident, 1);
 		MD5Update(&ctx, sp->myauth.secret,
 			  sppp_strnlen(sp->myauth.secret, AUTHKEYLEN));
 		MD5Update(&ctx, value, value_len);
 		MD5Final(digest, &ctx);
 		dsize = sizeof digest;
 
 		sppp_auth_send(&chap, sp, CHAP_RESPONSE, h->ident,
 			       sizeof dsize, (const char *)&dsize,
 			       sizeof digest, digest,
 			       (size_t)sppp_strnlen(sp->myauth.name, AUTHNAMELEN),
 			       sp->myauth.name,
 			       0);
 		break;
 
 	case CHAP_SUCCESS:
 		if (debug) {
 			log(LOG_DEBUG, SPP_FMT "chap success",
 			    SPP_ARGS(ifp));
 			if (len > 4) {
 				log(-1, ": ");
 				sppp_print_string((char*)(h + 1), len - 4);
 			}
 			log(-1, "\n");
 		}
 		SPPP_LOCK(sp);
 		sp->pp_flags &= ~PP_NEEDAUTH;
 		if (sp->myauth.proto == PPP_CHAP &&
 		    (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) &&
 		    (sp->lcp.protos & (1 << IDX_CHAP)) == 0) {
 			/*
 			 * We are authenticator for CHAP but didn't
 			 * complete yet.  Leave it to tlu to proceed
 			 * to network phase.
 			 */
 			SPPP_UNLOCK(sp);
 			break;
 		}
 		SPPP_UNLOCK(sp);
 		sppp_phase_network(sp);
 		break;
 
 	case CHAP_FAILURE:
 		if (debug) {
 			log(LOG_INFO, SPP_FMT "chap failure",
 			    SPP_ARGS(ifp));
 			if (len > 4) {
 				log(-1, ": ");
 				sppp_print_string((char*)(h + 1), len - 4);
 			}
 			log(-1, "\n");
 		} else
 			log(LOG_INFO, SPP_FMT "chap failure\n",
 			    SPP_ARGS(ifp));
 		/* await LCP shutdown by authenticator */
 		break;
 
 	/* response is my authproto */
 	case CHAP_RESPONSE:
 		value = 1 + (u_char*)(h+1);
 		value_len = value[-1];
 		name = value + value_len;
 		name_len = len - value_len - 5;
 		if (name_len < 0) {
 			if (debug) {
 				log(LOG_DEBUG,
 				    SPP_FMT "chap corrupted response "
 				    "<%s id=0x%x len=%d",
 				    SPP_ARGS(ifp),
 				    sppp_auth_type_name(PPP_CHAP, h->type),
 				    h->ident, ntohs(h->len));
 				sppp_print_bytes((u_char*)(h+1), len-4);
 				log(-1, ">\n");
 			}
 			break;
 		}
 		if (h->ident != sp->confid[IDX_CHAP]) {
 			if (debug)
 				log(LOG_DEBUG,
 				    SPP_FMT "chap dropping response for old ID "
 				    "(got %d, expected %d)\n",
 				    SPP_ARGS(ifp),
 				    h->ident, sp->confid[IDX_CHAP]);
 			break;
 		}
 		if (name_len != sppp_strnlen(sp->hisauth.name, AUTHNAMELEN)
 		    || bcmp(name, sp->hisauth.name, name_len) != 0) {
 			log(LOG_INFO, SPP_FMT "chap response, his name ",
 			    SPP_ARGS(ifp));
 			sppp_print_string(name, name_len);
 			log(-1, " != expected ");
 			sppp_print_string(sp->hisauth.name,
 					  sppp_strnlen(sp->hisauth.name, AUTHNAMELEN));
 			log(-1, "\n");
 		}
 		if (debug) {
 			log(LOG_DEBUG, SPP_FMT "chap input(%s) "
 			    "<%s id=0x%x len=%d name=",
 			    SPP_ARGS(ifp),
 			    sppp_state_name(sp->state[IDX_CHAP]),
 			    sppp_auth_type_name(PPP_CHAP, h->type),
 			    h->ident, ntohs (h->len));
 			sppp_print_string((char*)name, name_len);
 			log(-1, " value-size=%d value=", value_len);
 			sppp_print_bytes(value, value_len);
 			log(-1, ">\n");
 		}
 		if (value_len != AUTHKEYLEN) {
 			if (debug)
 				log(LOG_DEBUG,
 				    SPP_FMT "chap bad hash value length: "
 				    "%d bytes, should be %d\n",
 				    SPP_ARGS(ifp), value_len,
 				    AUTHKEYLEN);
 			break;
 		}
 
 		MD5Init(&ctx);
 		MD5Update(&ctx, &h->ident, 1);
 		MD5Update(&ctx, sp->hisauth.secret,
 			  sppp_strnlen(sp->hisauth.secret, AUTHKEYLEN));
 		MD5Update(&ctx, sp->myauth.challenge, AUTHKEYLEN);
 		MD5Final(digest, &ctx);
 
 #define FAILMSG "Failed..."
 #define SUCCMSG "Welcome!"
 
 		if (value_len != sizeof digest ||
 		    bcmp(digest, value, value_len) != 0) {
 			/* action scn, tld */
 			sppp_auth_send(&chap, sp, CHAP_FAILURE, h->ident,
 				       sizeof(FAILMSG) - 1, (u_char *)FAILMSG,
 				       0);
 			chap.tld(sp);
 			break;
 		}
 		/* action sca, perhaps tlu */
 		if (sp->state[IDX_CHAP] == STATE_REQ_SENT ||
 		    sp->state[IDX_CHAP] == STATE_OPENED)
 			sppp_auth_send(&chap, sp, CHAP_SUCCESS, h->ident,
 				       sizeof(SUCCMSG) - 1, (u_char *)SUCCMSG,
 				       0);
 		if (sp->state[IDX_CHAP] == STATE_REQ_SENT) {
 			sppp_cp_change_state(&chap, sp, STATE_OPENED);
 			chap.tlu(sp);
 		}
 		break;
 
 	default:
 		/* Unknown CHAP packet type -- ignore. */
 		if (debug) {
 			log(LOG_DEBUG, SPP_FMT "chap unknown input(%s) "
 			    "<0x%x id=0x%xh len=%d",
 			    SPP_ARGS(ifp),
 			    sppp_state_name(sp->state[IDX_CHAP]),
 			    h->type, h->ident, ntohs(h->len));
 			sppp_print_bytes((u_char*)(h+1), len-4);
 			log(-1, ">\n");
 		}
 		break;
 	}
 }
 
 static void
 sppp_chap_init(struct sppp *sp)
 {
 	/* Chap doesn't have STATE_INITIAL at all. */
 	sp->state[IDX_CHAP] = STATE_CLOSED;
 	sp->fail_counter[IDX_CHAP] = 0;
 	sp->pp_seq[IDX_CHAP] = 0;
 	sp->pp_rseq[IDX_CHAP] = 0;
  	callout_init(&sp->ch[IDX_CHAP], 1);
 }
 
 static void
 sppp_chap_open(struct sppp *sp)
 {
 	if (sp->myauth.proto == PPP_CHAP &&
 	    (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) != 0) {
 		/* we are authenticator for CHAP, start it */
 		chap.scr(sp);
 		sp->rst_counter[IDX_CHAP] = sp->lcp.max_configure;
 		sppp_cp_change_state(&chap, sp, STATE_REQ_SENT);
 	}
 	/* nothing to be done if we are peer, await a challenge */
 }
 
 static void
 sppp_chap_close(struct sppp *sp)
 {
 	if (sp->state[IDX_CHAP] != STATE_CLOSED)
 		sppp_cp_change_state(&chap, sp, STATE_CLOSED);
 }
 
 static void
 sppp_chap_TO(void *cookie)
 {
 	struct sppp *sp = (struct sppp *)cookie;
 	STDDCL;
 
 	SPPP_LOCK(sp);
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "chap TO(%s) rst_counter = %d\n",
 		    SPP_ARGS(ifp),
 		    sppp_state_name(sp->state[IDX_CHAP]),
 		    sp->rst_counter[IDX_CHAP]);
 
 	if (--sp->rst_counter[IDX_CHAP] < 0)
 		/* TO- event */
 		switch (sp->state[IDX_CHAP]) {
 		case STATE_REQ_SENT:
 			chap.tld(sp);
 			sppp_cp_change_state(&chap, sp, STATE_CLOSED);
 			break;
 		}
 	else
 		/* TO+ (or TO*) event */
 		switch (sp->state[IDX_CHAP]) {
 		case STATE_OPENED:
 			/* TO* event */
 			sp->rst_counter[IDX_CHAP] = sp->lcp.max_configure;
 			/* FALLTHROUGH */
 		case STATE_REQ_SENT:
 			chap.scr(sp);
 			/* sppp_cp_change_state() will restart the timer */
 			sppp_cp_change_state(&chap, sp, STATE_REQ_SENT);
 			break;
 		}
 
 	SPPP_UNLOCK(sp);
 }
 
 static void
 sppp_chap_tlu(struct sppp *sp)
 {
 	STDDCL;
 	int i;
 
 	i = 0;
 	sp->rst_counter[IDX_CHAP] = sp->lcp.max_configure;
 
 	/*
 	 * Some broken CHAP implementations (Conware CoNet, firmware
 	 * 4.0.?) don't want to re-authenticate their CHAP once the
 	 * initial challenge-response exchange has taken place.
 	 * Provide for an option to avoid rechallenges.
 	 */
 	if ((sp->hisauth.flags & AUTHFLAG_NORECHALLENGE) == 0) {
 		/*
 		 * Compute the re-challenge timeout.  This will yield
 		 * a number between 300 and 810 seconds.
 		 */
 		i = 300 + ((unsigned)(random() & 0xff00) >> 7);
 		callout_reset(&sp->ch[IDX_CHAP], i * hz, chap.TO, (void *)sp);
 	}
 
 	if (debug) {
 		log(LOG_DEBUG,
 		    SPP_FMT "chap %s, ",
 		    SPP_ARGS(ifp),
 		    sp->pp_phase == PHASE_NETWORK? "reconfirmed": "tlu");
 		if ((sp->hisauth.flags & AUTHFLAG_NORECHALLENGE) == 0)
 			log(-1, "next re-challenge in %d seconds\n", i);
 		else
 			log(-1, "re-challenging suppressed\n");
 	}
 
 	SPPP_LOCK(sp);
 	/* indicate to LCP that we need to be closed down */
 	sp->lcp.protos |= (1 << IDX_CHAP);
 
 	if (sp->pp_flags & PP_NEEDAUTH) {
 		/*
 		 * Remote is authenticator, but his auth proto didn't
 		 * complete yet.  Defer the transition to network
 		 * phase.
 		 */
 		SPPP_UNLOCK(sp);
 		return;
 	}
 	SPPP_UNLOCK(sp);
 
 	/*
 	 * If we are already in phase network, we are done here.  This
 	 * is the case if this is a dummy tlu event after a re-challenge.
 	 */
 	if (sp->pp_phase != PHASE_NETWORK)
 		sppp_phase_network(sp);
 }
 
 static void
 sppp_chap_tld(struct sppp *sp)
 {
 	STDDCL;
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "chap tld\n", SPP_ARGS(ifp));
 	callout_stop(&sp->ch[IDX_CHAP]);
 	sp->lcp.protos &= ~(1 << IDX_CHAP);
 
 	lcp.Close(sp);
 }
 
 static void
 sppp_chap_scr(struct sppp *sp)
 {
 	u_long *ch;
 	u_char clen;
 
 	/* Compute random challenge. */
 	ch = (u_long *)sp->myauth.challenge;
 	arc4random_buf(ch, 4 * sizeof(*ch));
 	clen = AUTHKEYLEN;
 
 	sp->confid[IDX_CHAP] = ++sp->pp_seq[IDX_CHAP];
 
 	sppp_auth_send(&chap, sp, CHAP_CHALLENGE, sp->confid[IDX_CHAP],
 		       sizeof clen, (const char *)&clen,
 		       (size_t)AUTHKEYLEN, sp->myauth.challenge,
 		       (size_t)sppp_strnlen(sp->myauth.name, AUTHNAMELEN),
 		       sp->myauth.name,
 		       0);
 }
 
 /*
  *--------------------------------------------------------------------------*
  *                                                                          *
  *                        The PAP implementation.                           *
  *                                                                          *
  *--------------------------------------------------------------------------*
  */
 /*
  * For PAP, we need to keep a little state also if we are the peer, not the
  * authenticator.  This is since we don't get a request to authenticate, but
  * have to repeatedly authenticate ourself until we got a response (or the
  * retry counter is expired).
  */
 
 /*
  * Handle incoming PAP packets.  */
 static void
 sppp_pap_input(struct sppp *sp, struct mbuf *m)
 {
 	STDDCL;
 	struct lcp_header *h;
 	int len;
 	u_char *name, *passwd, mlen;
 	int name_len, passwd_len;
 
 	len = m->m_pkthdr.len;
 	if (len < 5) {
 		if (debug)
 			log(LOG_DEBUG,
 			    SPP_FMT "pap invalid packet length: %d bytes\n",
 			    SPP_ARGS(ifp), len);
 		return;
 	}
 	h = mtod (m, struct lcp_header*);
 	if (len > ntohs (h->len))
 		len = ntohs (h->len);
 	switch (h->type) {
 	/* PAP request is my authproto */
 	case PAP_REQ:
 		name = 1 + (u_char*)(h+1);
 		name_len = name[-1];
 		passwd = name + name_len + 1;
 		if (name_len > len - 6 ||
 		    (passwd_len = passwd[-1]) > len - 6 - name_len) {
 			if (debug) {
 				log(LOG_DEBUG, SPP_FMT "pap corrupted input "
 				    "<%s id=0x%x len=%d",
 				    SPP_ARGS(ifp),
 				    sppp_auth_type_name(PPP_PAP, h->type),
 				    h->ident, ntohs(h->len));
 				sppp_print_bytes((u_char*)(h+1), len-4);
 				log(-1, ">\n");
 			}
 			break;
 		}
 		if (debug) {
 			log(LOG_DEBUG, SPP_FMT "pap input(%s) "
 			    "<%s id=0x%x len=%d name=",
 			    SPP_ARGS(ifp),
 			    sppp_state_name(sp->state[IDX_PAP]),
 			    sppp_auth_type_name(PPP_PAP, h->type),
 			    h->ident, ntohs(h->len));
 			sppp_print_string((char*)name, name_len);
 			log(-1, " passwd=");
 			sppp_print_string((char*)passwd, passwd_len);
 			log(-1, ">\n");
 		}
 		if (name_len != sppp_strnlen(sp->hisauth.name, AUTHNAMELEN) ||
 		    passwd_len != sppp_strnlen(sp->hisauth.secret, AUTHKEYLEN) ||
 		    bcmp(name, sp->hisauth.name, name_len) != 0 ||
 		    bcmp(passwd, sp->hisauth.secret, passwd_len) != 0) {
 			/* action scn, tld */
 			mlen = sizeof(FAILMSG) - 1;
 			sppp_auth_send(&pap, sp, PAP_NAK, h->ident,
 				       sizeof mlen, (const char *)&mlen,
 				       sizeof(FAILMSG) - 1, (u_char *)FAILMSG,
 				       0);
 			pap.tld(sp);
 			break;
 		}
 		/* action sca, perhaps tlu */
 		if (sp->state[IDX_PAP] == STATE_REQ_SENT ||
 		    sp->state[IDX_PAP] == STATE_OPENED) {
 			mlen = sizeof(SUCCMSG) - 1;
 			sppp_auth_send(&pap, sp, PAP_ACK, h->ident,
 				       sizeof mlen, (const char *)&mlen,
 				       sizeof(SUCCMSG) - 1, (u_char *)SUCCMSG,
 				       0);
 		}
 		if (sp->state[IDX_PAP] == STATE_REQ_SENT) {
 			sppp_cp_change_state(&pap, sp, STATE_OPENED);
 			pap.tlu(sp);
 		}
 		break;
 
 	/* ack and nak are his authproto */
 	case PAP_ACK:
 		callout_stop(&sp->pap_my_to_ch);
 		if (debug) {
 			log(LOG_DEBUG, SPP_FMT "pap success",
 			    SPP_ARGS(ifp));
 			name_len = *((char *)h);
 			if (len > 5 && name_len) {
 				log(-1, ": ");
 				sppp_print_string((char*)(h+1), name_len);
 			}
 			log(-1, "\n");
 		}
 		SPPP_LOCK(sp);
 		sp->pp_flags &= ~PP_NEEDAUTH;
 		if (sp->myauth.proto == PPP_PAP &&
 		    (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) &&
 		    (sp->lcp.protos & (1 << IDX_PAP)) == 0) {
 			/*
 			 * We are authenticator for PAP but didn't
 			 * complete yet.  Leave it to tlu to proceed
 			 * to network phase.
 			 */
 			SPPP_UNLOCK(sp);
 			break;
 		}
 		SPPP_UNLOCK(sp);
 		sppp_phase_network(sp);
 		break;
 
 	case PAP_NAK:
 		callout_stop (&sp->pap_my_to_ch);
 		if (debug) {
 			log(LOG_INFO, SPP_FMT "pap failure",
 			    SPP_ARGS(ifp));
 			name_len = *((char *)h);
 			if (len > 5 && name_len) {
 				log(-1, ": ");
 				sppp_print_string((char*)(h+1), name_len);
 			}
 			log(-1, "\n");
 		} else
 			log(LOG_INFO, SPP_FMT "pap failure\n",
 			    SPP_ARGS(ifp));
 		/* await LCP shutdown by authenticator */
 		break;
 
 	default:
 		/* Unknown PAP packet type -- ignore. */
 		if (debug) {
 			log(LOG_DEBUG, SPP_FMT "pap corrupted input "
 			    "<0x%x id=0x%x len=%d",
 			    SPP_ARGS(ifp),
 			    h->type, h->ident, ntohs(h->len));
 			sppp_print_bytes((u_char*)(h+1), len-4);
 			log(-1, ">\n");
 		}
 		break;
 	}
 }
 
 static void
 sppp_pap_init(struct sppp *sp)
 {
 	/* PAP doesn't have STATE_INITIAL at all. */
 	sp->state[IDX_PAP] = STATE_CLOSED;
 	sp->fail_counter[IDX_PAP] = 0;
 	sp->pp_seq[IDX_PAP] = 0;
 	sp->pp_rseq[IDX_PAP] = 0;
  	callout_init(&sp->ch[IDX_PAP], 1);
  	callout_init(&sp->pap_my_to_ch, 1);
 }
 
 static void
 sppp_pap_open(struct sppp *sp)
 {
 	if (sp->hisauth.proto == PPP_PAP &&
 	    (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) != 0) {
 		/* we are authenticator for PAP, start our timer */
 		sp->rst_counter[IDX_PAP] = sp->lcp.max_configure;
 		sppp_cp_change_state(&pap, sp, STATE_REQ_SENT);
 	}
 	if (sp->myauth.proto == PPP_PAP) {
 		/* we are peer, send a request, and start a timer */
 		pap.scr(sp);
 		callout_reset(&sp->pap_my_to_ch, sp->lcp.timeout,
 			      sppp_pap_my_TO, (void *)sp);
 	}
 }
 
 static void
 sppp_pap_close(struct sppp *sp)
 {
 	if (sp->state[IDX_PAP] != STATE_CLOSED)
 		sppp_cp_change_state(&pap, sp, STATE_CLOSED);
 }
 
 /*
  * That's the timeout routine if we are authenticator.  Since the
  * authenticator is basically passive in PAP, we can't do much here.
  */
 static void
 sppp_pap_TO(void *cookie)
 {
 	struct sppp *sp = (struct sppp *)cookie;
 	STDDCL;
 
 	SPPP_LOCK(sp);
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "pap TO(%s) rst_counter = %d\n",
 		    SPP_ARGS(ifp),
 		    sppp_state_name(sp->state[IDX_PAP]),
 		    sp->rst_counter[IDX_PAP]);
 
 	if (--sp->rst_counter[IDX_PAP] < 0)
 		/* TO- event */
 		switch (sp->state[IDX_PAP]) {
 		case STATE_REQ_SENT:
 			pap.tld(sp);
 			sppp_cp_change_state(&pap, sp, STATE_CLOSED);
 			break;
 		}
 	else
 		/* TO+ event, not very much we could do */
 		switch (sp->state[IDX_PAP]) {
 		case STATE_REQ_SENT:
 			/* sppp_cp_change_state() will restart the timer */
 			sppp_cp_change_state(&pap, sp, STATE_REQ_SENT);
 			break;
 		}
 
 	SPPP_UNLOCK(sp);
 }
 
 /*
  * That's the timeout handler if we are peer.  Since the peer is active,
  * we need to retransmit our PAP request since it is apparently lost.
  * XXX We should impose a max counter.
  */
 static void
 sppp_pap_my_TO(void *cookie)
 {
 	struct sppp *sp = (struct sppp *)cookie;
 	STDDCL;
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "pap peer TO\n",
 		    SPP_ARGS(ifp));
 
 	SPPP_LOCK(sp);
 	pap.scr(sp);
 	SPPP_UNLOCK(sp);
 }
 
 static void
 sppp_pap_tlu(struct sppp *sp)
 {
 	STDDCL;
 
 	sp->rst_counter[IDX_PAP] = sp->lcp.max_configure;
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "%s tlu\n",
 		    SPP_ARGS(ifp), pap.name);
 
 	SPPP_LOCK(sp);
 	/* indicate to LCP that we need to be closed down */
 	sp->lcp.protos |= (1 << IDX_PAP);
 
 	if (sp->pp_flags & PP_NEEDAUTH) {
 		/*
 		 * Remote is authenticator, but his auth proto didn't
 		 * complete yet.  Defer the transition to network
 		 * phase.
 		 */
 		SPPP_UNLOCK(sp);
 		return;
 	}
 	SPPP_UNLOCK(sp);
 	sppp_phase_network(sp);
 }
 
 static void
 sppp_pap_tld(struct sppp *sp)
 {
 	STDDCL;
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "pap tld\n", SPP_ARGS(ifp));
 	callout_stop (&sp->ch[IDX_PAP]);
 	callout_stop (&sp->pap_my_to_ch);
 	sp->lcp.protos &= ~(1 << IDX_PAP);
 
 	lcp.Close(sp);
 }
 
 static void
 sppp_pap_scr(struct sppp *sp)
 {
 	u_char idlen, pwdlen;
 
 	sp->confid[IDX_PAP] = ++sp->pp_seq[IDX_PAP];
 	pwdlen = sppp_strnlen(sp->myauth.secret, AUTHKEYLEN);
 	idlen = sppp_strnlen(sp->myauth.name, AUTHNAMELEN);
 
 	sppp_auth_send(&pap, sp, PAP_REQ, sp->confid[IDX_PAP],
 		       sizeof idlen, (const char *)&idlen,
 		       (size_t)idlen, sp->myauth.name,
 		       sizeof pwdlen, (const char *)&pwdlen,
 		       (size_t)pwdlen, sp->myauth.secret,
 		       0);
 }
 
 /*
  * Random miscellaneous functions.
  */
 
 /*
  * Send a PAP or CHAP proto packet.
  *
  * Varadic function, each of the elements for the ellipsis is of type
  * ``size_t mlen, const u_char *msg''.  Processing will stop iff
  * mlen == 0.
  * NOTE: never declare variadic functions with types subject to type
  * promotion (i.e. u_char). This is asking for big trouble depending
  * on the architecture you are on...
  */
 
 static void
 sppp_auth_send(const struct cp *cp, struct sppp *sp,
                unsigned int type, unsigned int id,
 	       ...)
 {
 	STDDCL;
 	struct ppp_header *h;
 	struct lcp_header *lh;
 	struct mbuf *m;
 	u_char *p;
 	int len;
 	unsigned int mlen;
 	const char *msg;
 	va_list ap;
 
 	MGETHDR (m, M_NOWAIT, MT_DATA);
 	if (! m)
 		return;
 	m->m_pkthdr.rcvif = 0;
 
 	h = mtod (m, struct ppp_header*);
 	h->address = PPP_ALLSTATIONS;		/* broadcast address */
 	h->control = PPP_UI;			/* Unnumbered Info */
 	h->protocol = htons(cp->proto);
 
 	lh = (struct lcp_header*)(h + 1);
 	lh->type = type;
 	lh->ident = id;
 	p = (u_char*) (lh+1);
 
 	va_start(ap, id);
 	len = 0;
 
 	while ((mlen = (unsigned int)va_arg(ap, size_t)) != 0) {
 		msg = va_arg(ap, const char *);
 		len += mlen;
 		if (len > MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN) {
 			va_end(ap);
 			m_freem(m);
 			return;
 		}
 
 		bcopy(msg, p, mlen);
 		p += mlen;
 	}
 	va_end(ap);
 
 	m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + LCP_HEADER_LEN + len;
 	lh->len = htons (LCP_HEADER_LEN + len);
 
 	if (debug) {
 		log(LOG_DEBUG, SPP_FMT "%s output <%s id=0x%x len=%d",
 		    SPP_ARGS(ifp), cp->name,
 		    sppp_auth_type_name(cp->proto, lh->type),
 		    lh->ident, ntohs(lh->len));
 		sppp_print_bytes((u_char*) (lh+1), len);
 		log(-1, ">\n");
 	}
 	if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3))
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 }
 
 /*
  * Flush interface queue.
  */
 static void
 sppp_qflush(struct ifqueue *ifq)
 {
 	struct mbuf *m, *n;
 
 	n = ifq->ifq_head;
 	while ((m = n)) {
 		n = m->m_nextpkt;
 		m_freem (m);
 	}
 	ifq->ifq_head = 0;
 	ifq->ifq_tail = 0;
 	ifq->ifq_len = 0;
 }
 
 /*
  * Send keepalive packets, every 10 seconds.
  */
 static void
 sppp_keepalive(void *dummy)
 {
 	struct sppp *sp = (struct sppp*)dummy;
 	struct ifnet *ifp = SP2IFP(sp);
 
 	SPPP_LOCK(sp);
 	/* Keepalive mode disabled or channel down? */
 	if (! (sp->pp_flags & PP_KEEPALIVE) ||
 	    ! (ifp->if_drv_flags & IFF_DRV_RUNNING))
 		goto out;
 
 	if (sp->pp_mode == PP_FR) {
 		sppp_fr_keepalive (sp);
 		goto out;
 	}
 
 	/* No keepalive in PPP mode if LCP not opened yet. */
 	if (sp->pp_mode != IFF_CISCO &&
 	    sp->pp_phase < PHASE_AUTHENTICATE)
 		goto out;
 
 	if (sp->pp_alivecnt == MAXALIVECNT) {
 		/* No keepalive packets got.  Stop the interface. */
 		printf (SPP_FMT "down\n", SPP_ARGS(ifp));
 		if_down (ifp);
 		sppp_qflush (&sp->pp_cpq);
 		if (sp->pp_mode != IFF_CISCO) {
 			/* XXX */
 			/* Shut down the PPP link. */
 			lcp.Down(sp);
 			/* Initiate negotiation. XXX */
 			lcp.Up(sp);
 		}
 	}
 	if (sp->pp_alivecnt <= MAXALIVECNT)
 		++sp->pp_alivecnt;
 	if (sp->pp_mode == IFF_CISCO)
 		sppp_cisco_send (sp, CISCO_KEEPALIVE_REQ,
 			 ++sp->pp_seq[IDX_LCP],	sp->pp_rseq[IDX_LCP]);
 	else if (sp->pp_phase >= PHASE_AUTHENTICATE) {
 		uint32_t nmagic = htonl(sp->lcp.magic);
 		sp->lcp.echoid = ++sp->pp_seq[IDX_LCP];
 		sppp_cp_send (sp, PPP_LCP, ECHO_REQ,
 			sp->lcp.echoid, 4, &nmagic);
 	}
 out:
 	SPPP_UNLOCK(sp);
  	callout_reset(&sp->keepalive_callout, hz * 10, sppp_keepalive,
 		      (void *)sp);
 }
 
 /*
  * Get both IP addresses.
  */
 void
 sppp_get_ip_addrs(struct sppp *sp, u_long *src, u_long *dst, u_long *srcmask)
 {
 	struct epoch_tracker et;
 	struct ifnet *ifp = SP2IFP(sp);
 	struct ifaddr *ifa;
 	struct sockaddr_in *si, *sm;
 	u_long ssrc, ddst;
 
 	sm = NULL;
 	ssrc = ddst = 0L;
 	/*
 	 * Pick the first AF_INET address from the list,
 	 * aliases don't make any sense on a p2p link anyway.
 	 */
 	si = NULL;
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			si = (struct sockaddr_in *)ifa->ifa_addr;
 			sm = (struct sockaddr_in *)ifa->ifa_netmask;
 			if (si)
 				break;
 		}
 	if (ifa) {
 		if (si && si->sin_addr.s_addr) {
 			ssrc = si->sin_addr.s_addr;
 			if (srcmask)
 				*srcmask = ntohl(sm->sin_addr.s_addr);
 		}
 
 		si = (struct sockaddr_in *)ifa->ifa_dstaddr;
 		if (si && si->sin_addr.s_addr)
 			ddst = si->sin_addr.s_addr;
 	}
 	NET_EPOCH_EXIT(et);
 
 	if (dst) *dst = ntohl(ddst);
 	if (src) *src = ntohl(ssrc);
 }
 
 #ifdef INET
 /*
  * Set my IP address.
  */
 static void
 sppp_set_ip_addr(struct sppp *sp, u_long src)
 {
 	STDDCL;
 	struct epoch_tracker et;
 	struct ifaddr *ifa;
 	struct sockaddr_in *si;
 	struct in_ifaddr *ia;
 
 	/*
 	 * Pick the first AF_INET address from the list,
 	 * aliases don't make any sense on a p2p link anyway.
 	 */
 	si = NULL;
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			si = (struct sockaddr_in *)ifa->ifa_addr;
 			if (si != NULL) {
 				ifa_ref(ifa);
 				break;
 			}
 		}
 	}
 	NET_EPOCH_EXIT(et);
 
 	if (ifa != NULL) {
 		int error;
 		int fibnum = ifp->if_fib;
 
 		rt_addrmsg(RTM_DELETE, ifa, fibnum);
 		/* delete old route */
 		ia = ifatoia(ifa);
 		error = in_handle_ifaddr_route(RTM_DELETE, ia);
 		if (debug && error) {
 			log(LOG_DEBUG, SPP_FMT "sppp_set_ip_addr: rtinit DEL failed, error=%d\n",
 		    		SPP_ARGS(ifp), error);
 		}
 
 		/* set new address */
 		si->sin_addr.s_addr = htonl(src);
 		IN_IFADDR_WLOCK();
 		LIST_REMOVE(ia, ia_hash);
 		LIST_INSERT_HEAD(INADDR_HASH(si->sin_addr.s_addr), ia, ia_hash);
 		IN_IFADDR_WUNLOCK();
 
 		rt_addrmsg(RTM_ADD, ifa, fibnum);
 		/* add new route */
 		error = in_handle_ifaddr_route(RTM_ADD, ia);
 		if (debug && error) {
 			log(LOG_DEBUG, SPP_FMT "sppp_set_ip_addr: rtinit ADD failed, error=%d",
 		    		SPP_ARGS(ifp), error);
 		}
 		ifa_free(ifa);
 	}
 }
 #endif
 
 #ifdef INET6
 /*
  * Get both IPv6 addresses.
  */
 static void
 sppp_get_ip6_addrs(struct sppp *sp, struct in6_addr *src, struct in6_addr *dst,
 		   struct in6_addr *srcmask)
 {
 	struct epoch_tracker et;
 	struct ifnet *ifp = SP2IFP(sp);
 	struct ifaddr *ifa;
 	struct sockaddr_in6 *si, *sm;
 	struct in6_addr ssrc, ddst;
 
 	sm = NULL;
 	bzero(&ssrc, sizeof(ssrc));
 	bzero(&ddst, sizeof(ddst));
 	/*
 	 * Pick the first link-local AF_INET6 address from the list,
 	 * aliases don't make any sense on a p2p link anyway.
 	 */
 	si = NULL;
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (ifa->ifa_addr->sa_family == AF_INET6) {
 			si = (struct sockaddr_in6 *)ifa->ifa_addr;
 			sm = (struct sockaddr_in6 *)ifa->ifa_netmask;
 			if (si && IN6_IS_ADDR_LINKLOCAL(&si->sin6_addr))
 				break;
 		}
 	if (ifa) {
 		if (si && !IN6_IS_ADDR_UNSPECIFIED(&si->sin6_addr)) {
 			bcopy(&si->sin6_addr, &ssrc, sizeof(ssrc));
 			if (srcmask) {
 				bcopy(&sm->sin6_addr, srcmask,
 				      sizeof(*srcmask));
 			}
 		}
 
 		si = (struct sockaddr_in6 *)ifa->ifa_dstaddr;
 		if (si && !IN6_IS_ADDR_UNSPECIFIED(&si->sin6_addr))
 			bcopy(&si->sin6_addr, &ddst, sizeof(ddst));
 	}
 
 	if (dst)
 		bcopy(&ddst, dst, sizeof(*dst));
 	if (src)
 		bcopy(&ssrc, src, sizeof(*src));
 	NET_EPOCH_EXIT(et);
 }
 
 #ifdef IPV6CP_MYIFID_DYN
 /*
  * Generate random ifid.
  */
 static void
 sppp_gen_ip6_addr(struct sppp *sp, struct in6_addr *addr)
 {
 	/* TBD */
 }
 
 /*
  * Set my IPv6 address.
  */
 static void
 sppp_set_ip6_addr(struct sppp *sp, const struct in6_addr *src)
 {
 	STDDCL;
 	struct epoch_tracker et;
 	struct ifaddr *ifa;
 	struct sockaddr_in6 *sin6;
 
 	/*
 	 * Pick the first link-local AF_INET6 address from the list,
 	 * aliases don't make any sense on a p2p link anyway.
 	 */
 
 	sin6 = NULL;
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family == AF_INET6) {
 			sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
 			if (sin6 && IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
 				ifa_ref(ifa);
 				break;
 			}
 		}
 	}
 	NET_EPOCH_EXIT(et);
 
 	if (ifa != NULL) {
 		int error;
 		struct sockaddr_in6 new_sin6 = *sin6;
 
 		bcopy(src, &new_sin6.sin6_addr, sizeof(new_sin6.sin6_addr));
 		error = in6_ifinit(ifp, ifatoia6(ifa), &new_sin6, 1);
 		if (debug && error) {
 			log(LOG_DEBUG, SPP_FMT "sppp_set_ip6_addr: in6_ifinit "
 			    " failed, error=%d\n", SPP_ARGS(ifp), error);
 		}
 		ifa_free(ifa);
 	}
 }
 #endif
 
 /*
  * Suggest a candidate address to be used by peer.
  */
 static void
 sppp_suggest_ip6_addr(struct sppp *sp, struct in6_addr *suggest)
 {
 	struct in6_addr myaddr;
 	struct timeval tv;
 
 	sppp_get_ip6_addrs(sp, &myaddr, 0, 0);
 
 	myaddr.s6_addr[8] &= ~0x02;	/* u bit to "local" */
 	microtime(&tv);
 	if ((tv.tv_usec & 0xff) == 0 && (tv.tv_sec & 0xff) == 0) {
 		myaddr.s6_addr[14] ^= 0xff;
 		myaddr.s6_addr[15] ^= 0xff;
 	} else {
 		myaddr.s6_addr[14] ^= (tv.tv_usec & 0xff);
 		myaddr.s6_addr[15] ^= (tv.tv_sec & 0xff);
 	}
 	if (suggest)
 		bcopy(&myaddr, suggest, sizeof(myaddr));
 }
 #endif /*INET6*/
 
 static int
 sppp_params(struct sppp *sp, u_long cmd, void *data)
 {
 	u_long subcmd;
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct spppreq *spr;
 	int rv = 0;
 
 	if ((spr = malloc(sizeof(struct spppreq), M_TEMP, M_NOWAIT)) == NULL)
 		return (EAGAIN);
 	/*
 	 * ifr_data_get_ptr(ifr) is supposed to point to a struct spppreq.
 	 * Check the cmd word first before attempting to fetch all the
 	 * data.
 	 */
 	rv = fueword(ifr_data_get_ptr(ifr), &subcmd);
 	if (rv == -1) {
 		rv = EFAULT;
 		goto quit;
 	}
 
 	if (copyin(ifr_data_get_ptr(ifr), spr, sizeof(struct spppreq)) != 0) {
 		rv = EFAULT;
 		goto quit;
 	}
 
 	switch (subcmd) {
 	case (u_long)SPPPIOGDEFS:
 		if (cmd != SIOCGIFGENERIC) {
 			rv = EINVAL;
 			break;
 		}
 		/*
 		 * We copy over the entire current state, but clean
 		 * out some of the stuff we don't wanna pass up.
 		 * Remember, SIOCGIFGENERIC is unprotected, and can be
 		 * called by any user.  No need to ever get PAP or
 		 * CHAP secrets back to userland anyway.
 		 */
 		spr->defs.pp_phase = sp->pp_phase;
 		spr->defs.enable_vj = (sp->confflags & CONF_ENABLE_VJ) != 0;
 		spr->defs.enable_ipv6 = (sp->confflags & CONF_ENABLE_IPV6) != 0;
 		spr->defs.lcp = sp->lcp;
 		spr->defs.ipcp = sp->ipcp;
 		spr->defs.ipv6cp = sp->ipv6cp;
 		spr->defs.myauth = sp->myauth;
 		spr->defs.hisauth = sp->hisauth;
 		bzero(spr->defs.myauth.secret, AUTHKEYLEN);
 		bzero(spr->defs.myauth.challenge, AUTHKEYLEN);
 		bzero(spr->defs.hisauth.secret, AUTHKEYLEN);
 		bzero(spr->defs.hisauth.challenge, AUTHKEYLEN);
 		/*
 		 * Fixup the LCP timeout value to milliseconds so
 		 * spppcontrol doesn't need to bother about the value
 		 * of "hz".  We do the reverse calculation below when
 		 * setting it.
 		 */
 		spr->defs.lcp.timeout = sp->lcp.timeout * 1000 / hz;
 		rv = copyout(spr, ifr_data_get_ptr(ifr),
 		    sizeof(struct spppreq));
 		break;
 
 	case (u_long)SPPPIOSDEFS:
 		if (cmd != SIOCSIFGENERIC) {
 			rv = EINVAL;
 			break;
 		}
 		/*
 		 * We have a very specific idea of which fields we
 		 * allow being passed back from userland, so to not
 		 * clobber our current state.  For one, we only allow
 		 * setting anything if LCP is in dead or establish
 		 * phase.  Once the authentication negotiations
 		 * started, the authentication settings must not be
 		 * changed again.  (The administrator can force an
 		 * ifconfig down in order to get LCP back into dead
 		 * phase.)
 		 *
 		 * Also, we only allow for authentication parameters to be
 		 * specified.
 		 *
 		 * XXX Should allow to set or clear pp_flags.
 		 *
 		 * Finally, if the respective authentication protocol to
 		 * be used is set differently than 0, but the secret is
 		 * passed as all zeros, we don't trash the existing secret.
 		 * This allows an administrator to change the system name
 		 * only without clobbering the secret (which he didn't get
 		 * back in a previous SPPPIOGDEFS call).  However, the
 		 * secrets are cleared if the authentication protocol is
 		 * reset to 0.  */
 		if (sp->pp_phase != PHASE_DEAD &&
 		    sp->pp_phase != PHASE_ESTABLISH) {
 			rv = EBUSY;
 			break;
 		}
 
 		if ((spr->defs.myauth.proto != 0 && spr->defs.myauth.proto != PPP_PAP &&
 		     spr->defs.myauth.proto != PPP_CHAP) ||
 		    (spr->defs.hisauth.proto != 0 && spr->defs.hisauth.proto != PPP_PAP &&
 		     spr->defs.hisauth.proto != PPP_CHAP)) {
 			rv = EINVAL;
 			break;
 		}
 
 		if (spr->defs.myauth.proto == 0)
 			/* resetting myauth */
 			bzero(&sp->myauth, sizeof sp->myauth);
 		else {
 			/* setting/changing myauth */
 			sp->myauth.proto = spr->defs.myauth.proto;
 			bcopy(spr->defs.myauth.name, sp->myauth.name, AUTHNAMELEN);
 			if (spr->defs.myauth.secret[0] != '\0')
 				bcopy(spr->defs.myauth.secret, sp->myauth.secret,
 				      AUTHKEYLEN);
 		}
 		if (spr->defs.hisauth.proto == 0)
 			/* resetting hisauth */
 			bzero(&sp->hisauth, sizeof sp->hisauth);
 		else {
 			/* setting/changing hisauth */
 			sp->hisauth.proto = spr->defs.hisauth.proto;
 			sp->hisauth.flags = spr->defs.hisauth.flags;
 			bcopy(spr->defs.hisauth.name, sp->hisauth.name, AUTHNAMELEN);
 			if (spr->defs.hisauth.secret[0] != '\0')
 				bcopy(spr->defs.hisauth.secret, sp->hisauth.secret,
 				      AUTHKEYLEN);
 		}
 		/* set LCP restart timer timeout */
 		if (spr->defs.lcp.timeout != 0)
 			sp->lcp.timeout = spr->defs.lcp.timeout * hz / 1000;
 		/* set VJ enable and IPv6 disable flags */
 #ifdef INET
 		if (spr->defs.enable_vj)
 			sp->confflags |= CONF_ENABLE_VJ;
 		else
 			sp->confflags &= ~CONF_ENABLE_VJ;
 #endif
 #ifdef INET6
 		if (spr->defs.enable_ipv6)
 			sp->confflags |= CONF_ENABLE_IPV6;
 		else
 			sp->confflags &= ~CONF_ENABLE_IPV6;
 #endif
 		break;
 
 	default:
 		rv = EINVAL;
 	}
 
  quit:
 	free(spr, M_TEMP);
 
 	return (rv);
 }
 
 static void
 sppp_phase_network(struct sppp *sp)
 {
 	STDDCL;
 	int i;
 	u_long mask;
 
 	sp->pp_phase = PHASE_NETWORK;
 
 	if (debug)
 		log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp),
 		    sppp_phase_name(sp->pp_phase));
 
 	/* Notify NCPs now. */
 	for (i = 0; i < IDX_COUNT; i++)
 		if ((cps[i])->flags & CP_NCP)
 			(cps[i])->Open(sp);
 
 	/* Send Up events to all NCPs. */
 	for (i = 0, mask = 1; i < IDX_COUNT; i++, mask <<= 1)
 		if ((sp->lcp.protos & mask) && ((cps[i])->flags & CP_NCP))
 			(cps[i])->Up(sp);
 
 	/* if no NCP is starting, all this was in vain, close down */
 	sppp_lcp_check_and_close(sp);
 }
 
 static const char *
 sppp_cp_type_name(u_char type)
 {
 	static char buf[12];
 	switch (type) {
 	case CONF_REQ:   return "conf-req";
 	case CONF_ACK:   return "conf-ack";
 	case CONF_NAK:   return "conf-nak";
 	case CONF_REJ:   return "conf-rej";
 	case TERM_REQ:   return "term-req";
 	case TERM_ACK:   return "term-ack";
 	case CODE_REJ:   return "code-rej";
 	case PROTO_REJ:  return "proto-rej";
 	case ECHO_REQ:   return "echo-req";
 	case ECHO_REPLY: return "echo-reply";
 	case DISC_REQ:   return "discard-req";
 	}
 	snprintf (buf, sizeof(buf), "cp/0x%x", type);
 	return buf;
 }
 
 static const char *
 sppp_auth_type_name(u_short proto, u_char type)
 {
 	static char buf[12];
 	switch (proto) {
 	case PPP_CHAP:
 		switch (type) {
 		case CHAP_CHALLENGE:	return "challenge";
 		case CHAP_RESPONSE:	return "response";
 		case CHAP_SUCCESS:	return "success";
 		case CHAP_FAILURE:	return "failure";
 		}
 	case PPP_PAP:
 		switch (type) {
 		case PAP_REQ:		return "req";
 		case PAP_ACK:		return "ack";
 		case PAP_NAK:		return "nak";
 		}
 	}
 	snprintf (buf, sizeof(buf), "auth/0x%x", type);
 	return buf;
 }
 
 static const char *
 sppp_lcp_opt_name(u_char opt)
 {
 	static char buf[12];
 	switch (opt) {
 	case LCP_OPT_MRU:		return "mru";
 	case LCP_OPT_ASYNC_MAP:		return "async-map";
 	case LCP_OPT_AUTH_PROTO:	return "auth-proto";
 	case LCP_OPT_QUAL_PROTO:	return "qual-proto";
 	case LCP_OPT_MAGIC:		return "magic";
 	case LCP_OPT_PROTO_COMP:	return "proto-comp";
 	case LCP_OPT_ADDR_COMP:		return "addr-comp";
 	}
 	snprintf (buf, sizeof(buf), "lcp/0x%x", opt);
 	return buf;
 }
 
 #ifdef INET
 static const char *
 sppp_ipcp_opt_name(u_char opt)
 {
 	static char buf[12];
 	switch (opt) {
 	case IPCP_OPT_ADDRESSES:	return "addresses";
 	case IPCP_OPT_COMPRESSION:	return "compression";
 	case IPCP_OPT_ADDRESS:		return "address";
 	}
 	snprintf (buf, sizeof(buf), "ipcp/0x%x", opt);
 	return buf;
 }
 #endif
 
 #ifdef INET6
 static const char *
 sppp_ipv6cp_opt_name(u_char opt)
 {
 	static char buf[12];
 	switch (opt) {
 	case IPV6CP_OPT_IFID:		return "ifid";
 	case IPV6CP_OPT_COMPRESSION:	return "compression";
 	}
 	sprintf (buf, "0x%x", opt);
 	return buf;
 }
 #endif
 
 static const char *
 sppp_state_name(int state)
 {
 	switch (state) {
 	case STATE_INITIAL:	return "initial";
 	case STATE_STARTING:	return "starting";
 	case STATE_CLOSED:	return "closed";
 	case STATE_STOPPED:	return "stopped";
 	case STATE_CLOSING:	return "closing";
 	case STATE_STOPPING:	return "stopping";
 	case STATE_REQ_SENT:	return "req-sent";
 	case STATE_ACK_RCVD:	return "ack-rcvd";
 	case STATE_ACK_SENT:	return "ack-sent";
 	case STATE_OPENED:	return "opened";
 	}
 	return "illegal";
 }
 
 static const char *
 sppp_phase_name(enum ppp_phase phase)
 {
 	switch (phase) {
 	case PHASE_DEAD:	return "dead";
 	case PHASE_ESTABLISH:	return "establish";
 	case PHASE_TERMINATE:	return "terminate";
 	case PHASE_AUTHENTICATE: return "authenticate";
 	case PHASE_NETWORK:	return "network";
 	}
 	return "illegal";
 }
 
 static const char *
 sppp_proto_name(u_short proto)
 {
 	static char buf[12];
 	switch (proto) {
 	case PPP_LCP:	return "lcp";
 	case PPP_IPCP:	return "ipcp";
 	case PPP_PAP:	return "pap";
 	case PPP_CHAP:	return "chap";
 	case PPP_IPV6CP: return "ipv6cp";
 	}
 	snprintf(buf, sizeof(buf), "proto/0x%x", (unsigned)proto);
 	return buf;
 }
 
 static void
 sppp_print_bytes(const u_char *p, u_short len)
 {
 	if (len)
 		log(-1, " %*D", len, p, "-");
 }
 
 static void
 sppp_print_string(const char *p, u_short len)
 {
 	u_char c;
 
 	while (len-- > 0) {
 		c = *p++;
 		/*
 		 * Print only ASCII chars directly.  RFC 1994 recommends
 		 * using only them, but we don't rely on it.  */
 		if (c < ' ' || c > '~')
 			log(-1, "\\x%x", c);
 		else
 			log(-1, "%c", c);
 	}
 }
 
 #ifdef INET
 static const char *
 sppp_dotted_quad(u_long addr)
 {
 	static char s[16];
 	sprintf(s, "%d.%d.%d.%d",
 		(int)((addr >> 24) & 0xff),
 		(int)((addr >> 16) & 0xff),
 		(int)((addr >> 8) & 0xff),
 		(int)(addr & 0xff));
 	return s;
 }
 #endif
 
 static int
 sppp_strnlen(u_char *p, int max)
 {
 	int len;
 
 	for (len = 0; len < max && *p; ++p)
 		++len;
 	return len;
 }
 
 /* a dummy, used to drop uninteresting events */
 static void
 sppp_null(struct sppp *unused)
 {
 	/* do just nothing */
 }
diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c
index b4b1b77ddc7c..bd9fc811d19f 100644
--- a/sys/net/if_tuntap.c
+++ b/sys/net/if_tuntap.c
@@ -1,2012 +1,2012 @@
 /*	$NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $	*/
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
  * All rights reserved.
  * Copyright (c) 2019 Kyle Evans <kevans@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * BASED ON:
  * -------------------------------------------------------------------------
  *
  * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
  * Nottingham University 1987.
  *
  * This source may be freely distributed, however I would be interested
  * in any changes that are made.
  *
  * This driver takes packets off the IP i/f and hands them up to a
  * user process to have its wicked way with. This driver has it's
  * roots in a similar driver written by Phil Cockcroft (formerly) at
  * UCL. This driver is based much more on read/write/poll mode of
  * operation though.
  *
  * $FreeBSD$
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/jail.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/filio.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/syslog.h>
 #include <sys/ttycom.h>
 #include <sys/poll.h>
 #include <sys/selinfo.h>
 #include <sys/signalvar.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/conf.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
 #include <sys/random.h>
 #include <sys/ctype.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/vnet.h>
 #include <netinet/in.h>
 #ifdef INET
 #include <netinet/ip.h>
 #endif
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #endif
 #include <netinet/udp.h>
 #include <netinet/tcp.h>
 #include <net/bpf.h>
 #include <net/if_tap.h>
 #include <net/if_tun.h>
 
 #include <dev/virtio/network/virtio_net.h>
 
 #include <sys/queue.h>
 #include <sys/condvar.h>
 #include <security/mac/mac_framework.h>
 
 struct tuntap_driver;
 
 /*
  * tun_list is protected by global tunmtx.  Other mutable fields are
  * protected by tun->tun_mtx, or by their owning subsystem.  tun_dev is
  * static for the duration of a tunnel interface.
  */
 struct tuntap_softc {
 	TAILQ_ENTRY(tuntap_softc)	 tun_list;
 	struct cdev			*tun_alias;
 	struct cdev			*tun_dev;
 	u_short				 tun_flags;	/* misc flags */
 #define	TUN_OPEN	0x0001
 #define	TUN_INITED	0x0002
 #define	TUN_UNUSED1	0x0008
 #define	TUN_UNUSED2	0x0010
 #define	TUN_LMODE	0x0020
 #define	TUN_RWAIT	0x0040
 #define	TUN_ASYNC	0x0080
 #define	TUN_IFHEAD	0x0100
 #define	TUN_DYING	0x0200
 #define	TUN_L2		0x0400
 #define	TUN_VMNET	0x0800
 
 #define	TUN_DRIVER_IDENT_MASK	(TUN_L2 | TUN_VMNET)
 #define	TUN_READY		(TUN_OPEN | TUN_INITED)
 
 	pid_t			 tun_pid;	/* owning pid */
 	struct ifnet		*tun_ifp;	/* the interface */
 	struct sigio		*tun_sigio;	/* async I/O info */
 	struct tuntap_driver	*tun_drv;	/* appropriate driver */
 	struct selinfo		 tun_rsel;	/* read select */
 	struct mtx		 tun_mtx;	/* softc field mutex */
 	struct cv		 tun_cv;	/* for ref'd dev destroy */
 	struct ether_addr	 tun_ether;	/* remote address */
 	int			 tun_busy;	/* busy count */
 	int			 tun_vhdrlen;	/* virtio-net header length */
 };
 #define	TUN2IFP(sc)	((sc)->tun_ifp)
 
 #define	TUNDEBUG	if (tundebug) if_printf
 
 #define	TUN_LOCK(tp)		mtx_lock(&(tp)->tun_mtx)
 #define	TUN_UNLOCK(tp)		mtx_unlock(&(tp)->tun_mtx)
 #define	TUN_LOCK_ASSERT(tp)	mtx_assert(&(tp)->tun_mtx, MA_OWNED);
 
 #define	TUN_VMIO_FLAG_MASK	0x0fff
 
 /*
  * Interface capabilities of a tap device that supports the virtio-net
  * header.
  */
 #define TAP_VNET_HDR_CAPS	(IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6	\
 				| IFCAP_VLAN_HWCSUM			\
 				| IFCAP_TSO | IFCAP_LRO			\
 				| IFCAP_VLAN_HWTSO)
 
 #define TAP_ALL_OFFLOAD		(CSUM_TSO | CSUM_TCP | CSUM_UDP |\
 				    CSUM_TCP_IPV6 | CSUM_UDP_IPV6)
 
 /*
  * All mutable global variables in if_tun are locked using tunmtx, with
  * the exception of tundebug, which is used unlocked, and the drivers' *clones,
  * which are static after setup.
  */
 static struct mtx tunmtx;
 static eventhandler_tag arrival_tag;
 static eventhandler_tag clone_tag;
 static const char tunname[] = "tun";
 static const char tapname[] = "tap";
 static const char vmnetname[] = "vmnet";
 static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
 static int tundebug = 0;
 static int tundclone = 1;
 static int tap_allow_uopen = 0;	/* allow user devfs cloning */
 static int tapuponopen = 0;	/* IFF_UP on open() */
 static int tapdclone = 1;	/* enable devfs cloning */
 
 static TAILQ_HEAD(,tuntap_softc)	tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
 SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
 
 static struct sx tun_ioctl_sx;
 SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl");
 
 SYSCTL_DECL(_net_link);
 /* tun */
 static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "IP tunnel software network interface");
 SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0,
     "Enable legacy devfs interface creation");
 
 /* tap */
 static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Ethernet tunnel software network interface");
 SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tap_allow_uopen, 0,
     "Enable legacy devfs interface creation for all users");
 SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
     "Bring interface up when /dev/tap is opened");
 SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0,
     "Enable legacy devfs interface creation");
 SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tundebug, 0, "");
 
 static int	tun_create_device(struct tuntap_driver *drv, int unit,
     struct ucred *cr, struct cdev **dev, const char *name);
 static int	tun_busy_locked(struct tuntap_softc *tp);
 static void	tun_unbusy_locked(struct tuntap_softc *tp);
 static int	tun_busy(struct tuntap_softc *tp);
 static void	tun_unbusy(struct tuntap_softc *tp);
 
 static int	tuntap_name2info(const char *name, int *unit, int *flags);
 static void	tunclone(void *arg, struct ucred *cred, char *name,
 		    int namelen, struct cdev **dev);
 static void	tuncreate(struct cdev *dev);
 static void	tundtor(void *data);
 static void	tunrename(void *arg, struct ifnet *ifp);
 static int	tunifioctl(struct ifnet *, u_long, caddr_t);
 static void	tuninit(struct ifnet *);
 static void	tunifinit(void *xtp);
 static int	tuntapmodevent(module_t, int, void *);
 static int	tunoutput(struct ifnet *, struct mbuf *,
 		    const struct sockaddr *, struct route *ro);
 static void	tunstart(struct ifnet *);
 static void	tunstart_l2(struct ifnet *);
 
 static int	tun_clone_match(struct if_clone *ifc, const char *name);
 static int	tap_clone_match(struct if_clone *ifc, const char *name);
 static int	vmnet_clone_match(struct if_clone *ifc, const char *name);
 static int	tun_clone_create(struct if_clone *, char *, size_t, caddr_t);
 static int	tun_clone_destroy(struct if_clone *, struct ifnet *);
 static void	tun_vnethdr_set(struct ifnet *ifp, int vhdrlen);
 
 static d_open_t		tunopen;
 static d_read_t		tunread;
 static d_write_t	tunwrite;
 static d_ioctl_t	tunioctl;
 static d_poll_t		tunpoll;
 static d_kqfilter_t	tunkqfilter;
 
 static int		tunkqread(struct knote *, long);
 static int		tunkqwrite(struct knote *, long);
 static void		tunkqdetach(struct knote *);
 
 static struct filterops tun_read_filterops = {
 	.f_isfd =	1,
 	.f_attach =	NULL,
 	.f_detach =	tunkqdetach,
 	.f_event =	tunkqread,
 };
 
 static struct filterops tun_write_filterops = {
 	.f_isfd =	1,
 	.f_attach =	NULL,
 	.f_detach =	tunkqdetach,
 	.f_event =	tunkqwrite,
 };
 
 static struct tuntap_driver {
 	struct cdevsw		 cdevsw;
 	int			 ident_flags;
 	struct unrhdr		*unrhdr;
 	struct clonedevs	*clones;
 	ifc_match_t		*clone_match_fn;
 	ifc_create_t		*clone_create_fn;
 	ifc_destroy_t		*clone_destroy_fn;
 } tuntap_drivers[] = {
 	{
 		.ident_flags =	0,
 		.cdevsw =	{
 		    .d_version =	D_VERSION,
 		    .d_flags =		D_NEEDMINOR,
 		    .d_open =		tunopen,
 		    .d_read =		tunread,
 		    .d_write =		tunwrite,
 		    .d_ioctl =		tunioctl,
 		    .d_poll =		tunpoll,
 		    .d_kqfilter =	tunkqfilter,
 		    .d_name =		tunname,
 		},
 		.clone_match_fn =	tun_clone_match,
 		.clone_create_fn =	tun_clone_create,
 		.clone_destroy_fn =	tun_clone_destroy,
 	},
 	{
 		.ident_flags =	TUN_L2,
 		.cdevsw =	{
 		    .d_version =	D_VERSION,
 		    .d_flags =		D_NEEDMINOR,
 		    .d_open =		tunopen,
 		    .d_read =		tunread,
 		    .d_write =		tunwrite,
 		    .d_ioctl =		tunioctl,
 		    .d_poll =		tunpoll,
 		    .d_kqfilter =	tunkqfilter,
 		    .d_name =		tapname,
 		},
 		.clone_match_fn =	tap_clone_match,
 		.clone_create_fn =	tun_clone_create,
 		.clone_destroy_fn =	tun_clone_destroy,
 	},
 	{
 		.ident_flags =	TUN_L2 | TUN_VMNET,
 		.cdevsw =	{
 		    .d_version =	D_VERSION,
 		    .d_flags =		D_NEEDMINOR,
 		    .d_open =		tunopen,
 		    .d_read =		tunread,
 		    .d_write =		tunwrite,
 		    .d_ioctl =		tunioctl,
 		    .d_poll =		tunpoll,
 		    .d_kqfilter =	tunkqfilter,
 		    .d_name =		vmnetname,
 		},
 		.clone_match_fn =	vmnet_clone_match,
 		.clone_create_fn =	tun_clone_create,
 		.clone_destroy_fn =	tun_clone_destroy,
 	},
 };
 
 struct tuntap_driver_cloner {
 	SLIST_ENTRY(tuntap_driver_cloner)	 link;
 	struct tuntap_driver			*drv;
 	struct if_clone				*cloner;
 };
 
 VNET_DEFINE_STATIC(SLIST_HEAD(, tuntap_driver_cloner), tuntap_driver_cloners) =
     SLIST_HEAD_INITIALIZER(tuntap_driver_cloners);
 
 #define	V_tuntap_driver_cloners	VNET(tuntap_driver_cloners)
 
 /*
  * Mechanism for marking a tunnel device as busy so that we can safely do some
  * orthogonal operations (such as operations on devices) without racing against
  * tun_destroy.  tun_destroy will wait on the condvar if we're at all busy or
  * open, to be woken up when the condition is alleviated.
  */
 static int
 tun_busy_locked(struct tuntap_softc *tp)
 {
 
 	TUN_LOCK_ASSERT(tp);
 	if ((tp->tun_flags & TUN_DYING) != 0) {
 		/*
 		 * Perhaps unintuitive, but the device is busy going away.
 		 * Other interpretations of EBUSY from tun_busy make little
 		 * sense, since making a busy device even more busy doesn't
 		 * sound like a problem.
 		 */
 		return (EBUSY);
 	}
 
 	++tp->tun_busy;
 	return (0);
 }
 
 static void
 tun_unbusy_locked(struct tuntap_softc *tp)
 {
 
 	TUN_LOCK_ASSERT(tp);
 	KASSERT(tp->tun_busy != 0, ("tun_unbusy: called for non-busy tunnel"));
 
 	--tp->tun_busy;
 	/* Wake up anything that may be waiting on our busy tunnel. */
 	if (tp->tun_busy == 0)
 		cv_broadcast(&tp->tun_cv);
 }
 
 static int
 tun_busy(struct tuntap_softc *tp)
 {
 	int ret;
 
 	TUN_LOCK(tp);
 	ret = tun_busy_locked(tp);
 	TUN_UNLOCK(tp);
 	return (ret);
 }
 
 static void
 tun_unbusy(struct tuntap_softc *tp)
 {
 
 	TUN_LOCK(tp);
 	tun_unbusy_locked(tp);
 	TUN_UNLOCK(tp);
 }
 
 /*
  * Sets unit and/or flags given the device name.  Must be called with correct
  * vnet context.
  */
 static int
 tuntap_name2info(const char *name, int *outunit, int *outflags)
 {
 	struct tuntap_driver *drv;
 	struct tuntap_driver_cloner *drvc;
 	char *dname;
 	int flags, unit;
 	bool found;
 
 	if (name == NULL)
 		return (EINVAL);
 
 	/*
 	 * Needed for dev_stdclone, but dev_stdclone will not modify, it just
 	 * wants to be able to pass back a char * through the second param. We
 	 * will always set that as NULL here, so we'll fake it.
 	 */
 	dname = __DECONST(char *, name);
 	found = false;
 
 	KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners),
 	    ("tuntap_driver_cloners failed to initialize"));
 	SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) {
 		KASSERT(drvc->drv != NULL,
 		    ("tuntap_driver_cloners entry not properly initialized"));
 		drv = drvc->drv;
 
 		if (strcmp(name, drv->cdevsw.d_name) == 0) {
 			found = true;
 			unit = -1;
 			flags = drv->ident_flags;
 			break;
 		}
 
 		if (dev_stdclone(dname, NULL, drv->cdevsw.d_name, &unit) == 1) {
 			found = true;
 			flags = drv->ident_flags;
 			break;
 		}
 	}
 
 	if (!found)
 		return (ENXIO);
 
 	if (outunit != NULL)
 		*outunit = unit;
 	if (outflags != NULL)
 		*outflags = flags;
 	return (0);
 }
 
 /*
  * Get driver information from a set of flags specified.  Masks the identifying
  * part of the flags and compares it against all of the available
  * tuntap_drivers. Must be called with correct vnet context.
  */
 static struct tuntap_driver *
 tuntap_driver_from_flags(int tun_flags)
 {
 	struct tuntap_driver *drv;
 	struct tuntap_driver_cloner *drvc;
 
 	KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners),
 	    ("tuntap_driver_cloners failed to initialize"));
 	SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) {
 		KASSERT(drvc->drv != NULL,
 		    ("tuntap_driver_cloners entry not properly initialized"));
 		drv = drvc->drv;
 		if ((tun_flags & TUN_DRIVER_IDENT_MASK) == drv->ident_flags)
 			return (drv);
 	}
 
 	return (NULL);
 }
 
 static int
 tun_clone_match(struct if_clone *ifc, const char *name)
 {
 	int tunflags;
 
 	if (tuntap_name2info(name, NULL, &tunflags) == 0) {
 		if ((tunflags & TUN_L2) == 0)
 			return (1);
 	}
 
 	return (0);
 }
 
 static int
 tap_clone_match(struct if_clone *ifc, const char *name)
 {
 	int tunflags;
 
 	if (tuntap_name2info(name, NULL, &tunflags) == 0) {
 		if ((tunflags & (TUN_L2 | TUN_VMNET)) == TUN_L2)
 			return (1);
 	}
 
 	return (0);
 }
 
 static int
 vmnet_clone_match(struct if_clone *ifc, const char *name)
 {
 	int tunflags;
 
 	if (tuntap_name2info(name, NULL, &tunflags) == 0) {
 		if ((tunflags & TUN_VMNET) != 0)
 			return (1);
 	}
 
 	return (0);
 }
 
 static int
 tun_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 {
 	struct tuntap_driver *drv;
 	struct cdev *dev;
 	int err, i, tunflags, unit;
 
 	tunflags = 0;
 	/* The name here tells us exactly what we're creating */
 	err = tuntap_name2info(name, &unit, &tunflags);
 	if (err != 0)
 		return (err);
 
 	drv = tuntap_driver_from_flags(tunflags);
 	if (drv == NULL)
 		return (ENXIO);
 
 	if (unit != -1) {
 		/* If this unit number is still available that's okay. */
 		if (alloc_unr_specific(drv->unrhdr, unit) == -1)
 			return (EEXIST);
 	} else {
 		unit = alloc_unr(drv->unrhdr);
 	}
 
 	snprintf(name, IFNAMSIZ, "%s%d", drv->cdevsw.d_name, unit);
 
 	/* find any existing device, or allocate new unit number */
 	dev = NULL;
 	i = clone_create(&drv->clones, &drv->cdevsw, &unit, &dev, 0);
 	/* No preexisting struct cdev *, create one */
 	if (i != 0)
 		i = tun_create_device(drv, unit, NULL, &dev, name);
 	if (i == 0)
 		tuncreate(dev);
 
 	return (i);
 }
 
 static void
 tunclone(void *arg, struct ucred *cred, char *name, int namelen,
     struct cdev **dev)
 {
 	char devname[SPECNAMELEN + 1];
 	struct tuntap_driver *drv;
 	int append_unit, i, u, tunflags;
 	bool mayclone;
 
 	if (*dev != NULL)
 		return;
 
 	tunflags = 0;
 	CURVNET_SET(CRED_TO_VNET(cred));
 	if (tuntap_name2info(name, &u, &tunflags) != 0)
 		goto out;	/* Not recognized */
 
 	if (u != -1 && u > IF_MAXUNIT)
 		goto out;	/* Unit number too high */
 
 	mayclone = priv_check_cred(cred, PRIV_NET_IFCREATE) == 0;
 	if ((tunflags & TUN_L2) != 0) {
 		/* tap/vmnet allow user open with a sysctl */
 		mayclone = (mayclone || tap_allow_uopen) && tapdclone;
 	} else {
 		mayclone = mayclone && tundclone;
 	}
 
 	/*
 	 * If tun cloning is enabled, only the superuser can create an
 	 * interface.
 	 */
 	if (!mayclone)
 		goto out;
 
 	if (u == -1)
 		append_unit = 1;
 	else
 		append_unit = 0;
 
 	drv = tuntap_driver_from_flags(tunflags);
 	if (drv == NULL)
 		goto out;
 
 	/* find any existing device, or allocate new unit number */
 	i = clone_create(&drv->clones, &drv->cdevsw, &u, dev, 0);
 	if (i) {
 		if (append_unit) {
 			namelen = snprintf(devname, sizeof(devname), "%s%d",
 			    name, u);
 			name = devname;
 		}
 
 		i = tun_create_device(drv, u, cred, dev, name);
 	}
 	if (i == 0)
 		if_clone_create(name, namelen, NULL);
 out:
 	CURVNET_RESTORE();
 }
 
 static void
 tun_destroy(struct tuntap_softc *tp)
 {
 
 	TUN_LOCK(tp);
 	tp->tun_flags |= TUN_DYING;
 	if (tp->tun_busy != 0)
 		cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
 	else
 		TUN_UNLOCK(tp);
 
 	CURVNET_SET(TUN2IFP(tp)->if_vnet);
 
 	/* destroy_dev will take care of any alias. */
 	destroy_dev(tp->tun_dev);
 	seldrain(&tp->tun_rsel);
 	knlist_clear(&tp->tun_rsel.si_note, 0);
 	knlist_destroy(&tp->tun_rsel.si_note);
 	if ((tp->tun_flags & TUN_L2) != 0) {
 		ether_ifdetach(TUN2IFP(tp));
 	} else {
 		bpfdetach(TUN2IFP(tp));
 		if_detach(TUN2IFP(tp));
 	}
 	sx_xlock(&tun_ioctl_sx);
 	TUN2IFP(tp)->if_softc = NULL;
 	sx_xunlock(&tun_ioctl_sx);
 	free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit);
 	if_free(TUN2IFP(tp));
 	mtx_destroy(&tp->tun_mtx);
 	cv_destroy(&tp->tun_cv);
 	free(tp, M_TUN);
 	CURVNET_RESTORE();
 }
 
 static int
 tun_clone_destroy(struct if_clone *ifc __unused, struct ifnet *ifp)
 {
 	struct tuntap_softc *tp = ifp->if_softc;
 
 	mtx_lock(&tunmtx);
 	TAILQ_REMOVE(&tunhead, tp, tun_list);
 	mtx_unlock(&tunmtx);
 	tun_destroy(tp);
 
 	return (0);
 }
 
 static void
 vnet_tun_init(const void *unused __unused)
 {
 	struct tuntap_driver *drv;
 	struct tuntap_driver_cloner *drvc;
 	int i;
 
 	for (i = 0; i < nitems(tuntap_drivers); ++i) {
 		drv = &tuntap_drivers[i];
 		drvc = malloc(sizeof(*drvc), M_TUN, M_WAITOK | M_ZERO);
 
 		drvc->drv = drv;
 		drvc->cloner = if_clone_advanced(drv->cdevsw.d_name, 0,
 		    drv->clone_match_fn, drv->clone_create_fn,
 		    drv->clone_destroy_fn);
 		SLIST_INSERT_HEAD(&V_tuntap_driver_cloners, drvc, link);
 	};
 }
 VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
 		vnet_tun_init, NULL);
 
 static void
 vnet_tun_uninit(const void *unused __unused)
 {
 	struct tuntap_driver_cloner *drvc;
 
 	while (!SLIST_EMPTY(&V_tuntap_driver_cloners)) {
 		drvc = SLIST_FIRST(&V_tuntap_driver_cloners);
 		SLIST_REMOVE_HEAD(&V_tuntap_driver_cloners, link);
 
 		if_clone_detach(drvc->cloner);
 		free(drvc, M_TUN);
 	}
 }
 VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_tun_uninit, NULL);
 
 static void
 tun_uninit(const void *unused __unused)
 {
 	struct tuntap_driver *drv;
 	struct tuntap_softc *tp;
 	int i;
 
 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, arrival_tag);
 	EVENTHANDLER_DEREGISTER(dev_clone, clone_tag);
 	drain_dev_clone_events();
 
 	mtx_lock(&tunmtx);
 	while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
 		TAILQ_REMOVE(&tunhead, tp, tun_list);
 		mtx_unlock(&tunmtx);
 		tun_destroy(tp);
 		mtx_lock(&tunmtx);
 	}
 	mtx_unlock(&tunmtx);
 	for (i = 0; i < nitems(tuntap_drivers); ++i) {
 		drv = &tuntap_drivers[i];
 		delete_unrhdr(drv->unrhdr);
 		clone_cleanup(&drv->clones);
 	}
 	mtx_destroy(&tunmtx);
 }
 SYSUNINIT(tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, tun_uninit, NULL);
 
 static struct tuntap_driver *
 tuntap_driver_from_ifnet(const struct ifnet *ifp)
 {
 	struct tuntap_driver *drv;
 	int i;
 
 	if (ifp == NULL)
 		return (NULL);
 
 	for (i = 0; i < nitems(tuntap_drivers); ++i) {
 		drv = &tuntap_drivers[i];
 		if (strcmp(ifp->if_dname, drv->cdevsw.d_name) == 0)
 			return (drv);
 	}
 
 	return (NULL);
 }
 
 static int
 tuntapmodevent(module_t mod, int type, void *data)
 {
 	struct tuntap_driver *drv;
 	int i;
 
 	switch (type) {
 	case MOD_LOAD:
 		mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF);
 		for (i = 0; i < nitems(tuntap_drivers); ++i) {
 			drv = &tuntap_drivers[i];
 			clone_setup(&drv->clones);
 			drv->unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx);
 		}
 		arrival_tag = EVENTHANDLER_REGISTER(ifnet_arrival_event,
 		   tunrename, 0, 1000);
 		if (arrival_tag == NULL)
 			return (ENOMEM);
 		clone_tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
 		if (clone_tag == NULL)
 			return (ENOMEM);
 		break;
 	case MOD_UNLOAD:
 		/* See tun_uninit, so it's done after the vnet_sysuninit() */
 		break;
 	default:
 		return EOPNOTSUPP;
 	}
 	return 0;
 }
 
 static moduledata_t tuntap_mod = {
 	"if_tuntap",
 	tuntapmodevent,
 	0
 };
 
 /* We'll only ever have these two, so no need for a macro. */
 static moduledata_t tun_mod = { "if_tun", NULL, 0 };
 static moduledata_t tap_mod = { "if_tap", NULL, 0 };
 
 DECLARE_MODULE(if_tuntap, tuntap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_tuntap, 1);
 DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_tun, 1);
 DECLARE_MODULE(if_tap, tap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_tap, 1);
 
 static int
 tun_create_device(struct tuntap_driver *drv, int unit, struct ucred *cr,
     struct cdev **dev, const char *name)
 {
 	struct make_dev_args args;
 	struct tuntap_softc *tp;
 	int error;
 
 	tp = malloc(sizeof(*tp), M_TUN, M_WAITOK | M_ZERO);
 	mtx_init(&tp->tun_mtx, "tun_mtx", NULL, MTX_DEF);
 	cv_init(&tp->tun_cv, "tun_condvar");
 	tp->tun_flags = drv->ident_flags;
 	tp->tun_drv = drv;
 
 	make_dev_args_init(&args);
 	if (cr != NULL)
 		args.mda_flags = MAKEDEV_REF;
 	args.mda_devsw = &drv->cdevsw;
 	args.mda_cr = cr;
 	args.mda_uid = UID_UUCP;
 	args.mda_gid = GID_DIALER;
 	args.mda_mode = 0600;
 	args.mda_unit = unit;
 	args.mda_si_drv1 = tp;
 	error = make_dev_s(&args, dev, "%s", name);
 	if (error != 0) {
 		free(tp, M_TUN);
 		return (error);
 	}
 
 	KASSERT((*dev)->si_drv1 != NULL,
 	    ("Failed to set si_drv1 at %s creation", name));
 	tp->tun_dev = *dev;
 	knlist_init_mtx(&tp->tun_rsel.si_note, &tp->tun_mtx);
 	mtx_lock(&tunmtx);
 	TAILQ_INSERT_TAIL(&tunhead, tp, tun_list);
 	mtx_unlock(&tunmtx);
 	return (0);
 }
 
 static void
 tunstart(struct ifnet *ifp)
 {
 	struct tuntap_softc *tp = ifp->if_softc;
 	struct mbuf *m;
 
 	TUNDEBUG(ifp, "starting\n");
 	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
 		IFQ_LOCK(&ifp->if_snd);
 		IFQ_POLL_NOLOCK(&ifp->if_snd, m);
 		if (m == NULL) {
 			IFQ_UNLOCK(&ifp->if_snd);
 			return;
 		}
 		IFQ_UNLOCK(&ifp->if_snd);
 	}
 
 	TUN_LOCK(tp);
 	if (tp->tun_flags & TUN_RWAIT) {
 		tp->tun_flags &= ~TUN_RWAIT;
 		wakeup(tp);
 	}
 	selwakeuppri(&tp->tun_rsel, PZERO + 1);
 	KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
 	if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
 		TUN_UNLOCK(tp);
 		pgsigio(&tp->tun_sigio, SIGIO, 0);
 	} else
 		TUN_UNLOCK(tp);
 }
 
 /*
  * tunstart_l2
  *
  * queue packets from higher level ready to put out
  */
 static void
 tunstart_l2(struct ifnet *ifp)
 {
 	struct tuntap_softc	*tp = ifp->if_softc;
 
 	TUNDEBUG(ifp, "starting\n");
 
 	/*
 	 * do not junk pending output if we are in VMnet mode.
 	 * XXX: can this do any harm because of queue overflow?
 	 */
 
 	TUN_LOCK(tp);
 	if (((tp->tun_flags & TUN_VMNET) == 0) &&
 	    ((tp->tun_flags & TUN_READY) != TUN_READY)) {
 		struct mbuf *m;
 
 		/* Unlocked read. */
 		TUNDEBUG(ifp, "not ready, tun_flags = 0x%x\n", tp->tun_flags);
 
 		for (;;) {
 			IF_DEQUEUE(&ifp->if_snd, m);
 			if (m != NULL) {
 				m_freem(m);
 				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			} else
 				break;
 		}
 		TUN_UNLOCK(tp);
 
 		return;
 	}
 
 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 
 	if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
 		if (tp->tun_flags & TUN_RWAIT) {
 			tp->tun_flags &= ~TUN_RWAIT;
 			wakeup(tp);
 		}
 
 		if ((tp->tun_flags & TUN_ASYNC) && (tp->tun_sigio != NULL)) {
 			TUN_UNLOCK(tp);
 			pgsigio(&tp->tun_sigio, SIGIO, 0);
 			TUN_LOCK(tp);
 		}
 
 		selwakeuppri(&tp->tun_rsel, PZERO+1);
 		KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */
 	}
 
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	TUN_UNLOCK(tp);
 } /* tunstart_l2 */
 
 /* XXX: should return an error code so it can fail. */
 static void
 tuncreate(struct cdev *dev)
 {
 	struct tuntap_driver *drv;
 	struct tuntap_softc *tp;
 	struct ifnet *ifp;
 	struct ether_addr eaddr;
 	int iflags;
 	u_char type;
 
 	tp = dev->si_drv1;
 	KASSERT(tp != NULL,
 	    ("si_drv1 should have been initialized at creation"));
 
 	drv = tp->tun_drv;
 	iflags = IFF_MULTICAST;
 	if ((tp->tun_flags & TUN_L2) != 0) {
 		type = IFT_ETHER;
 		iflags |= IFF_BROADCAST | IFF_SIMPLEX;
 	} else {
 		type = IFT_PPP;
 		iflags |= IFF_POINTOPOINT;
 	}
 	ifp = tp->tun_ifp = if_alloc(type);
 	if (ifp == NULL)
 		panic("%s%d: failed to if_alloc() interface.\n",
 		    drv->cdevsw.d_name, dev2unit(dev));
 	ifp->if_softc = tp;
 	if_initname(ifp, drv->cdevsw.d_name, dev2unit(dev));
 	ifp->if_ioctl = tunifioctl;
 	ifp->if_flags = iflags;
 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
 	ifp->if_capabilities |= IFCAP_LINKSTATE;
 	ifp->if_capenable |= IFCAP_LINKSTATE;
 
 	if ((tp->tun_flags & TUN_L2) != 0) {
 		ifp->if_init = tunifinit;
 		ifp->if_start = tunstart_l2;
 
 		ether_gen_addr(ifp, &eaddr);
 		ether_ifattach(ifp, eaddr.octet);
 	} else {
 		ifp->if_mtu = TUNMTU;
 		ifp->if_start = tunstart;
 		ifp->if_output = tunoutput;
 
 		ifp->if_snd.ifq_drv_maxlen = 0;
 		IFQ_SET_READY(&ifp->if_snd);
 
 		if_attach(ifp);
 		bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
 	}
 
 	TUN_LOCK(tp);
 	tp->tun_flags |= TUN_INITED;
 	TUN_UNLOCK(tp);
 
 	TUNDEBUG(ifp, "interface %s is created, minor = %#x\n",
 	    ifp->if_xname, dev2unit(dev));
 }
 
 static void
 tunrename(void *arg __unused, struct ifnet *ifp)
 {
 	struct tuntap_softc *tp;
 	int error;
 
 	if ((ifp->if_flags & IFF_RENAMING) == 0)
 		return;
 
 	if (tuntap_driver_from_ifnet(ifp) == NULL)
 		return;
 
 	/*
 	 * We need to grab the ioctl sx long enough to make sure the softc is
 	 * still there.  If it is, we can safely try to busy the tun device.
 	 * The busy may fail if the device is currently dying, in which case
 	 * we do nothing.  If it doesn't fail, the busy count stops the device
 	 * from dying until we've created the alias (that will then be
 	 * subsequently destroyed).
 	 */
 	sx_xlock(&tun_ioctl_sx);
 	tp = ifp->if_softc;
 	if (tp == NULL) {
 		sx_xunlock(&tun_ioctl_sx);
 		return;
 	}
 	error = tun_busy(tp);
 	sx_xunlock(&tun_ioctl_sx);
 	if (error != 0)
 		return;
 	if (tp->tun_alias != NULL) {
 		destroy_dev(tp->tun_alias);
 		tp->tun_alias = NULL;
 	}
 
 	if (strcmp(ifp->if_xname, tp->tun_dev->si_name) == 0)
 		goto out;
 
 	/*
 	 * Failure's ok, aliases are created on a best effort basis.  If a
 	 * tun user/consumer decides to rename the interface to conflict with
 	 * another device (non-ifnet) on the system, we will assume they know
 	 * what they are doing.  make_dev_alias_p won't touch tun_alias on
 	 * failure, so we use it but ignore the return value.
 	 */
 	make_dev_alias_p(MAKEDEV_CHECKNAME, &tp->tun_alias, tp->tun_dev, "%s",
 	    ifp->if_xname);
 out:
 	tun_unbusy(tp);
 }
 
 static int
 tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
 {
 	struct ifnet	*ifp;
 	struct tuntap_softc *tp;
 	int error __diagused, tunflags;
 
 	tunflags = 0;
 	CURVNET_SET(TD_TO_VNET(td));
 	error = tuntap_name2info(dev->si_name, NULL, &tunflags);
 	if (error != 0) {
 		CURVNET_RESTORE();
 		return (error);	/* Shouldn't happen */
 	}
 
 	tp = dev->si_drv1;
 	KASSERT(tp != NULL,
 	    ("si_drv1 should have been initialized at creation"));
 
 	TUN_LOCK(tp);
 	if ((tp->tun_flags & TUN_INITED) == 0) {
 		TUN_UNLOCK(tp);
 		CURVNET_RESTORE();
 		return (ENXIO);
 	}
 	if ((tp->tun_flags & (TUN_OPEN | TUN_DYING)) != 0) {
 		TUN_UNLOCK(tp);
 		CURVNET_RESTORE();
 		return (EBUSY);
 	}
 
 	error = tun_busy_locked(tp);
 	KASSERT(error == 0, ("Must be able to busy an unopen tunnel"));
 	ifp = TUN2IFP(tp);
 
 	if ((tp->tun_flags & TUN_L2) != 0) {
 		bcopy(IF_LLADDR(ifp), tp->tun_ether.octet,
 		    sizeof(tp->tun_ether.octet));
 
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 		if (tapuponopen)
 			ifp->if_flags |= IFF_UP;
 	}
 
 	tp->tun_pid = td->td_proc->p_pid;
 	tp->tun_flags |= TUN_OPEN;
 
 	if_link_state_change(ifp, LINK_STATE_UP);
 	TUNDEBUG(ifp, "open\n");
 	TUN_UNLOCK(tp);
 
 	/*
 	 * This can fail with either ENOENT or EBUSY.  This is in the middle of
 	 * d_open, so ENOENT should not be possible.  EBUSY is possible, but
 	 * the only cdevpriv dtor being set will be tundtor and the softc being
 	 * passed is constant for a given cdev.  We ignore the possible error
 	 * because of this as either "unlikely" or "not actually a problem."
 	 */
 	(void)devfs_set_cdevpriv(tp, tundtor);
 	CURVNET_RESTORE();
 	return (0);
 }
 
 /*
  * tundtor - tear down the device - mark i/f down & delete
  * routing info
  */
 static void
 tundtor(void *data)
 {
 	struct proc *p;
 	struct tuntap_softc *tp;
 	struct ifnet *ifp;
 	bool l2tun;
 
 	tp = data;
 	p = curproc;
 	ifp = TUN2IFP(tp);
 
 	TUN_LOCK(tp);
 
 	/*
 	 * Realistically, we can't be obstinate here.  This only means that the
 	 * tuntap device was closed out of order, and the last closer wasn't the
 	 * controller.  These are still good to know about, though, as software
 	 * should avoid multiple processes with a tuntap device open and
 	 * ill-defined transfer of control (e.g., handoff, TUNSIFPID, close in
 	 * parent).
 	 */
 	if (p->p_pid != tp->tun_pid) {
 		log(LOG_INFO,
 		    "pid %d (%s), %s: tun/tap protocol violation, non-controlling process closed last.\n",
 		    p->p_pid, p->p_comm, tp->tun_dev->si_name);
 	}
 
 	/*
 	 * junk all pending output
 	 */
 	CURVNET_SET(ifp->if_vnet);
 
 	l2tun = false;
 	if ((tp->tun_flags & TUN_L2) != 0) {
 		l2tun = true;
 		IF_DRAIN(&ifp->if_snd);
 	} else {
 		IFQ_PURGE(&ifp->if_snd);
 	}
 
 	/* For vmnet, we won't do most of the address/route bits */
 	if ((tp->tun_flags & TUN_VMNET) != 0 ||
 	    (l2tun && (ifp->if_flags & IFF_LINK0) != 0))
 		goto out;
 
 	if (ifp->if_flags & IFF_UP) {
 		TUN_UNLOCK(tp);
 		if_down(ifp);
 		TUN_LOCK(tp);
 	}
 
 	/* Delete all addresses and routes which reference this interface. */
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		TUN_UNLOCK(tp);
 		if_purgeaddrs(ifp);
 		TUN_LOCK(tp);
 	}
 
 out:
 	if_link_state_change(ifp, LINK_STATE_DOWN);
 	CURVNET_RESTORE();
 
 	funsetown(&tp->tun_sigio);
 	selwakeuppri(&tp->tun_rsel, PZERO + 1);
 	KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
 	TUNDEBUG (ifp, "closed\n");
 	tp->tun_flags &= ~TUN_OPEN;
 	tp->tun_pid = 0;
 	tun_vnethdr_set(ifp, 0);
 
 	tun_unbusy_locked(tp);
 	TUN_UNLOCK(tp);
 }
 
 static void
 tuninit(struct ifnet *ifp)
 {
 	struct tuntap_softc *tp = ifp->if_softc;
 
 	TUNDEBUG(ifp, "tuninit\n");
 
 	TUN_LOCK(tp);
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	if ((tp->tun_flags & TUN_L2) == 0) {
 		ifp->if_flags |= IFF_UP;
 		getmicrotime(&ifp->if_lastchange);
 		TUN_UNLOCK(tp);
 	} else {
 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 		TUN_UNLOCK(tp);
 		/* attempt to start output */
 		tunstart_l2(ifp);
 	}
 
 }
 
 /*
  * Used only for l2 tunnel.
  */
 static void
 tunifinit(void *xtp)
 {
 	struct tuntap_softc *tp;
 
 	tp = (struct tuntap_softc *)xtp;
 	tuninit(tp->tun_ifp);
 }
 
 /*
  * To be called under TUN_LOCK. Update ifp->if_hwassist according to the
  * current value of ifp->if_capenable.
  */
 static void
 tun_caps_changed(struct ifnet *ifp)
 {
 	uint64_t hwassist = 0;
 
 	TUN_LOCK_ASSERT((struct tuntap_softc *)ifp->if_softc);
 	if (ifp->if_capenable & IFCAP_TXCSUM)
 		hwassist |= CSUM_TCP | CSUM_UDP;
 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
 		hwassist |= CSUM_TCP_IPV6
 		    | CSUM_UDP_IPV6;
 	if (ifp->if_capenable & IFCAP_TSO4)
 		hwassist |= CSUM_IP_TSO;
 	if (ifp->if_capenable & IFCAP_TSO6)
 		hwassist |= CSUM_IP6_TSO;
 	ifp->if_hwassist = hwassist;
 }
 
 /*
  * To be called under TUN_LOCK. Update tp->tun_vhdrlen and adjust
  * if_capabilities and if_capenable as needed.
  */
 static void
 tun_vnethdr_set(struct ifnet *ifp, int vhdrlen)
 {
 	struct tuntap_softc *tp = ifp->if_softc;
 
 	TUN_LOCK_ASSERT(tp);
 
 	if (tp->tun_vhdrlen == vhdrlen)
 		return;
 
 	/*
 	 * Update if_capabilities to reflect the
 	 * functionalities offered by the virtio-net
 	 * header.
 	 */
 	if (vhdrlen != 0)
 		ifp->if_capabilities |=
 			TAP_VNET_HDR_CAPS;
 	else
 		ifp->if_capabilities &=
 			~TAP_VNET_HDR_CAPS;
 	/*
 	 * Disable any capabilities that we don't
 	 * support anymore.
 	 */
 	ifp->if_capenable &= ifp->if_capabilities;
 	tun_caps_changed(ifp);
 	tp->tun_vhdrlen = vhdrlen;
 
 	TUNDEBUG(ifp, "vnet_hdr_len=%d, if_capabilities=%x\n",
 	    vhdrlen, ifp->if_capabilities);
 }
 
 /*
  * Process an ioctl request.
  */
 static int
 tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct tuntap_softc *tp;
 	struct ifstat *ifs;
 	struct ifmediareq	*ifmr;
 	int		dummy, error = 0;
 	bool		l2tun;
 
 	ifmr = NULL;
 	sx_xlock(&tun_ioctl_sx);
 	tp = ifp->if_softc;
 	if (tp == NULL) {
 		error = ENXIO;
 		goto bad;
 	}
 	l2tun = (tp->tun_flags & TUN_L2) != 0;
 	switch(cmd) {
 	case SIOCGIFSTATUS:
 		ifs = (struct ifstat *)data;
 		TUN_LOCK(tp);
 		if (tp->tun_pid)
 			snprintf(ifs->ascii, sizeof(ifs->ascii),
 			    "\tOpened by PID %d\n", tp->tun_pid);
 		else
 			ifs->ascii[0] = '\0';
 		TUN_UNLOCK(tp);
 		break;
 	case SIOCSIFADDR:
 		if (l2tun)
 			error = ether_ioctl(ifp, cmd, data);
 		else
 			tuninit(ifp);
 		if (error == 0)
 		    TUNDEBUG(ifp, "address set\n");
 		break;
 	case SIOCSIFMTU:
 		ifp->if_mtu = ifr->ifr_mtu;
 		TUNDEBUG(ifp, "mtu set\n");
 		break;
 	case SIOCSIFFLAGS:
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		break;
 	case SIOCGIFMEDIA:
 		if (!l2tun) {
 			error = EINVAL;
 			break;
 		}
 
 		ifmr = (struct ifmediareq *)data;
 		dummy = ifmr->ifm_count;
 		ifmr->ifm_count = 1;
 		ifmr->ifm_status = IFM_AVALID;
 		ifmr->ifm_active = IFM_ETHER;
 		if (tp->tun_flags & TUN_OPEN)
 			ifmr->ifm_status |= IFM_ACTIVE;
 		ifmr->ifm_current = ifmr->ifm_active;
 		if (dummy >= 1) {
 			int media = IFM_ETHER;
 			error = copyout(&media, ifmr->ifm_ulist, sizeof(int));
 		}
 		break;
 	case SIOCSIFCAP:
 		TUN_LOCK(tp);
 		ifp->if_capenable = ifr->ifr_reqcap;
 		tun_caps_changed(ifp);
 		TUN_UNLOCK(tp);
 		VLAN_CAPABILITIES(ifp);
 		break;
 	default:
 		if (l2tun) {
 			error = ether_ioctl(ifp, cmd, data);
 		} else {
 			error = EINVAL;
 		}
 	}
 bad:
 	sx_xunlock(&tun_ioctl_sx);
 	return (error);
 }
 
 /*
  * tunoutput - queue packets from higher level ready to put out.
  */
 static int
 tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
     struct route *ro)
 {
 	struct tuntap_softc *tp = ifp->if_softc;
 	u_short cached_tun_flags;
 	int error;
 	u_int32_t af;
 
 	TUNDEBUG (ifp, "tunoutput\n");
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m0);
 	if (error) {
 		m_freem(m0);
 		return (error);
 	}
 #endif
 
 	/* Could be unlocked read? */
 	TUN_LOCK(tp);
 	cached_tun_flags = tp->tun_flags;
 	TUN_UNLOCK(tp);
 	if ((cached_tun_flags & TUN_READY) != TUN_READY) {
 		TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
 		m_freem (m0);
 		return (EHOSTDOWN);
 	}
 
 	if ((ifp->if_flags & IFF_UP) != IFF_UP) {
 		m_freem (m0);
 		return (EHOSTDOWN);
 	}
 
 	/* BPF writes need to be handled specially. */
 	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
-		af = dst->sa_family;
+		af = RO_GET_FAMILY(ro, dst);
 
 	if (bpf_peers_present(ifp->if_bpf))
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
 
 	/* prepend sockaddr? this may abort if the mbuf allocation fails */
 	if (cached_tun_flags & TUN_LMODE) {
 		/* allocate space for sockaddr */
 		M_PREPEND(m0, dst->sa_len, M_NOWAIT);
 
 		/* if allocation failed drop packet */
 		if (m0 == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			return (ENOBUFS);
 		} else {
 			bcopy(dst, m0->m_data, dst->sa_len);
 		}
 	}
 
 	if (cached_tun_flags & TUN_IFHEAD) {
 		/* Prepend the address family */
 		M_PREPEND(m0, 4, M_NOWAIT);
 
 		/* if allocation failed drop packet */
 		if (m0 == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			return (ENOBUFS);
 		} else
 			*(u_int32_t *)m0->m_data = htonl(af);
 	} else {
 #ifdef INET
 		if (af != AF_INET)
 #endif
 		{
 			m_freem(m0);
 			return (EAFNOSUPPORT);
 		}
 	}
 
 	error = (ifp->if_transmit)(ifp, m0);
 	if (error)
 		return (ENOBUFS);
 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 	return (0);
 }
 
 /*
  * the cdevsw interface is now pretty minimal.
  */
 static	int
 tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
     struct thread *td)
 {
 	struct ifreq ifr, *ifrp;
 	struct tuntap_softc *tp = dev->si_drv1;
 	struct ifnet *ifp = TUN2IFP(tp);
 	struct tuninfo *tunp;
 	int error, iflags, ival;
 	bool	l2tun;
 
 	l2tun = (tp->tun_flags & TUN_L2) != 0;
 	if (l2tun) {
 		/* tap specific ioctls */
 		switch(cmd) {
 		/* VMware/VMnet port ioctl's */
 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD4)
 		case _IO('V', 0):
 			ival = IOCPARM_IVAL(data);
 			data = (caddr_t)&ival;
 			/* FALLTHROUGH */
 #endif
 		case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
 			iflags = *(int *)data;
 			iflags &= TUN_VMIO_FLAG_MASK;
 			iflags &= ~IFF_CANTCHANGE;
 			iflags |= IFF_UP;
 
 			TUN_LOCK(tp);
 			ifp->if_flags = iflags |
 			    (ifp->if_flags & IFF_CANTCHANGE);
 			TUN_UNLOCK(tp);
 
 			return (0);
 		case SIOCGIFADDR:	/* get MAC address of the remote side */
 			TUN_LOCK(tp);
 			bcopy(&tp->tun_ether.octet, data,
 			    sizeof(tp->tun_ether.octet));
 			TUN_UNLOCK(tp);
 
 			return (0);
 		case SIOCSIFADDR:	/* set MAC address of the remote side */
 			TUN_LOCK(tp);
 			bcopy(data, &tp->tun_ether.octet,
 			    sizeof(tp->tun_ether.octet));
 			TUN_UNLOCK(tp);
 
 			return (0);
 		case TAPSVNETHDR:
 			ival = *(int *)data;
 			if (ival != 0 &&
 			    ival != sizeof(struct virtio_net_hdr) &&
 			    ival != sizeof(struct virtio_net_hdr_mrg_rxbuf)) {
 				return (EINVAL);
 			}
 			TUN_LOCK(tp);
 			tun_vnethdr_set(ifp, ival);
 			TUN_UNLOCK(tp);
 
 			return (0);
 		case TAPGVNETHDR:
 			TUN_LOCK(tp);
 			*(int *)data = tp->tun_vhdrlen;
 			TUN_UNLOCK(tp);
 
 			return (0);
 		}
 
 		/* Fall through to the common ioctls if unhandled */
 	} else {
 		switch (cmd) {
 		case TUNSLMODE:
 			TUN_LOCK(tp);
 			if (*(int *)data) {
 				tp->tun_flags |= TUN_LMODE;
 				tp->tun_flags &= ~TUN_IFHEAD;
 			} else
 				tp->tun_flags &= ~TUN_LMODE;
 			TUN_UNLOCK(tp);
 
 			return (0);
 		case TUNSIFHEAD:
 			TUN_LOCK(tp);
 			if (*(int *)data) {
 				tp->tun_flags |= TUN_IFHEAD;
 				tp->tun_flags &= ~TUN_LMODE;
 			} else
 				tp->tun_flags &= ~TUN_IFHEAD;
 			TUN_UNLOCK(tp);
 
 			return (0);
 		case TUNGIFHEAD:
 			TUN_LOCK(tp);
 			*(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
 			TUN_UNLOCK(tp);
 
 			return (0);
 		case TUNSIFMODE:
 			/* deny this if UP */
 			if (TUN2IFP(tp)->if_flags & IFF_UP)
 				return (EBUSY);
 
 			switch (*(int *)data & ~IFF_MULTICAST) {
 			case IFF_POINTOPOINT:
 			case IFF_BROADCAST:
 				TUN_LOCK(tp);
 				TUN2IFP(tp)->if_flags &=
 				    ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
 				TUN2IFP(tp)->if_flags |= *(int *)data;
 				TUN_UNLOCK(tp);
 
 				break;
 			default:
 				return (EINVAL);
 			}
 
 			return (0);
 		case TUNSIFPID:
 			TUN_LOCK(tp);
 			tp->tun_pid = curthread->td_proc->p_pid;
 			TUN_UNLOCK(tp);
 
 			return (0);
 		}
 		/* Fall through to the common ioctls if unhandled */
 	}
 
 	switch (cmd) {
 	case TUNGIFNAME:
 		ifrp = (struct ifreq *)data;
 		strlcpy(ifrp->ifr_name, TUN2IFP(tp)->if_xname, IFNAMSIZ);
 
 		return (0);
 	case TUNSIFINFO:
 		tunp = (struct tuninfo *)data;
 		if (TUN2IFP(tp)->if_type != tunp->type)
 			return (EPROTOTYPE);
 		TUN_LOCK(tp);
 		if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
 			strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ);
 			ifr.ifr_mtu = tunp->mtu;
 			CURVNET_SET(TUN2IFP(tp)->if_vnet);
 			error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp),
 			    (caddr_t)&ifr, td);
 			CURVNET_RESTORE();
 			if (error) {
 				TUN_UNLOCK(tp);
 				return (error);
 			}
 		}
 		TUN2IFP(tp)->if_baudrate = tunp->baudrate;
 		TUN_UNLOCK(tp);
 		break;
 	case TUNGIFINFO:
 		tunp = (struct tuninfo *)data;
 		TUN_LOCK(tp);
 		tunp->mtu = TUN2IFP(tp)->if_mtu;
 		tunp->type = TUN2IFP(tp)->if_type;
 		tunp->baudrate = TUN2IFP(tp)->if_baudrate;
 		TUN_UNLOCK(tp);
 		break;
 	case TUNSDEBUG:
 		tundebug = *(int *)data;
 		break;
 	case TUNGDEBUG:
 		*(int *)data = tundebug;
 		break;
 	case FIONBIO:
 		break;
 	case FIOASYNC:
 		TUN_LOCK(tp);
 		if (*(int *)data)
 			tp->tun_flags |= TUN_ASYNC;
 		else
 			tp->tun_flags &= ~TUN_ASYNC;
 		TUN_UNLOCK(tp);
 		break;
 	case FIONREAD:
 		if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
 			struct mbuf *mb;
 			IFQ_LOCK(&TUN2IFP(tp)->if_snd);
 			IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
 			for (*(int *)data = 0; mb != NULL; mb = mb->m_next)
 				*(int *)data += mb->m_len;
 			IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
 		} else
 			*(int *)data = 0;
 		break;
 	case FIOSETOWN:
 		return (fsetown(*(int *)data, &tp->tun_sigio));
 
 	case FIOGETOWN:
 		*(int *)data = fgetown(&tp->tun_sigio);
 		return (0);
 
 	/* This is deprecated, FIOSETOWN should be used instead. */
 	case TIOCSPGRP:
 		return (fsetown(-(*(int *)data), &tp->tun_sigio));
 
 	/* This is deprecated, FIOGETOWN should be used instead. */
 	case TIOCGPGRP:
 		*(int *)data = -fgetown(&tp->tun_sigio);
 		return (0);
 
 	default:
 		return (ENOTTY);
 	}
 	return (0);
 }
 
 /*
  * The cdevsw read interface - reads a packet at a time, or at
  * least as much of a packet as can be read.
  */
 static	int
 tunread(struct cdev *dev, struct uio *uio, int flag)
 {
 	struct tuntap_softc *tp = dev->si_drv1;
 	struct ifnet	*ifp = TUN2IFP(tp);
 	struct mbuf	*m;
 	size_t		len;
 	int		error = 0;
 
 	TUNDEBUG (ifp, "read\n");
 	TUN_LOCK(tp);
 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
 		TUN_UNLOCK(tp);
 		TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
 		return (EHOSTDOWN);
 	}
 
 	tp->tun_flags &= ~TUN_RWAIT;
 
 	for (;;) {
 		IFQ_DEQUEUE(&ifp->if_snd, m);
 		if (m != NULL)
 			break;
 		if (flag & O_NONBLOCK) {
 			TUN_UNLOCK(tp);
 			return (EWOULDBLOCK);
 		}
 		tp->tun_flags |= TUN_RWAIT;
 		error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1),
 		    "tunread", 0);
 		if (error != 0) {
 			TUN_UNLOCK(tp);
 			return (error);
 		}
 	}
 	TUN_UNLOCK(tp);
 
 	if ((tp->tun_flags & TUN_L2) != 0)
 		BPF_MTAP(ifp, m);
 
 	len = min(tp->tun_vhdrlen, uio->uio_resid);
 	if (len > 0) {
 		struct virtio_net_hdr_mrg_rxbuf vhdr;
 
 		bzero(&vhdr, sizeof(vhdr));
 		if (m->m_pkthdr.csum_flags & TAP_ALL_OFFLOAD) {
 			m = virtio_net_tx_offload(ifp, m, false, &vhdr.hdr);
 		}
 
 		TUNDEBUG(ifp, "txvhdr: f %u, gt %u, hl %u, "
 		    "gs %u, cs %u, co %u\n", vhdr.hdr.flags,
 		    vhdr.hdr.gso_type, vhdr.hdr.hdr_len,
 		    vhdr.hdr.gso_size, vhdr.hdr.csum_start,
 		    vhdr.hdr.csum_offset);
 		error = uiomove(&vhdr, len, uio);
 	}
 
 	while (m && uio->uio_resid > 0 && error == 0) {
 		len = min(uio->uio_resid, m->m_len);
 		if (len != 0)
 			error = uiomove(mtod(m, void *), len, uio);
 		m = m_free(m);
 	}
 
 	if (m) {
 		TUNDEBUG(ifp, "Dropping mbuf\n");
 		m_freem(m);
 	}
 	return (error);
 }
 
 static int
 tunwrite_l2(struct tuntap_softc *tp, struct mbuf *m,
 	    struct virtio_net_hdr_mrg_rxbuf *vhdr)
 {
 	struct epoch_tracker et;
 	struct ether_header *eh;
 	struct ifnet *ifp;
 
 	ifp = TUN2IFP(tp);
 
 	/*
 	 * Only pass a unicast frame to ether_input(), if it would
 	 * actually have been received by non-virtual hardware.
 	 */
 	if (m->m_len < sizeof(struct ether_header)) {
 		m_freem(m);
 		return (0);
 	}
 
 	eh = mtod(m, struct ether_header *);
 
 	if (eh && (ifp->if_flags & IFF_PROMISC) == 0 &&
 	    !ETHER_IS_MULTICAST(eh->ether_dhost) &&
 	    bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) {
 		m_freem(m);
 		return (0);
 	}
 
 	if (vhdr != NULL && virtio_net_rx_csum(m, &vhdr->hdr)) {
 		m_freem(m);
 		return (0);
 	}
 
 	/* Pass packet up to parent. */
 	CURVNET_SET(ifp->if_vnet);
 	NET_EPOCH_ENTER(et);
 	(*ifp->if_input)(ifp, m);
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 	/* ibytes are counted in parent */
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	return (0);
 }
 
 static int
 tunwrite_l3(struct tuntap_softc *tp, struct mbuf *m)
 {
 	struct epoch_tracker et;
 	struct ifnet *ifp;
 	int family, isr;
 
 	ifp = TUN2IFP(tp);
 	/* Could be unlocked read? */
 	TUN_LOCK(tp);
 	if (tp->tun_flags & TUN_IFHEAD) {
 		TUN_UNLOCK(tp);
 		if (m->m_len < sizeof(family) &&
 		(m = m_pullup(m, sizeof(family))) == NULL)
 			return (ENOBUFS);
 		family = ntohl(*mtod(m, u_int32_t *));
 		m_adj(m, sizeof(family));
 	} else {
 		TUN_UNLOCK(tp);
 		family = AF_INET;
 	}
 
 	BPF_MTAP2(ifp, &family, sizeof(family), m);
 
 	switch (family) {
 #ifdef INET
 	case AF_INET:
 		isr = NETISR_IP;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 	random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	CURVNET_SET(ifp->if_vnet);
 	M_SETFIB(m, ifp->if_fib);
 	NET_EPOCH_ENTER(et);
 	netisr_dispatch(isr, m);
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 	return (0);
 }
 
 /*
  * the cdevsw write interface - an atomic write is a packet - or else!
  */
 static	int
 tunwrite(struct cdev *dev, struct uio *uio, int flag)
 {
 	struct virtio_net_hdr_mrg_rxbuf vhdr;
 	struct tuntap_softc *tp;
 	struct ifnet	*ifp;
 	struct mbuf	*m;
 	uint32_t	mru;
 	int		align, vhdrlen, error;
 	bool		l2tun;
 
 	tp = dev->si_drv1;
 	ifp = TUN2IFP(tp);
 	TUNDEBUG(ifp, "tunwrite\n");
 	if ((ifp->if_flags & IFF_UP) != IFF_UP)
 		/* ignore silently */
 		return (0);
 
 	if (uio->uio_resid == 0)
 		return (0);
 
 	l2tun = (tp->tun_flags & TUN_L2) != 0;
 	mru = l2tun ? TAPMRU : TUNMRU;
 	vhdrlen = tp->tun_vhdrlen;
 	align = 0;
 	if (l2tun) {
 		align = ETHER_ALIGN;
 		mru += vhdrlen;
 	} else if ((tp->tun_flags & TUN_IFHEAD) != 0)
 		mru += sizeof(uint32_t);	/* family */
 	if (uio->uio_resid < 0 || uio->uio_resid > mru) {
 		TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid);
 		return (EIO);
 	}
 
 	if (vhdrlen > 0) {
 		error = uiomove(&vhdr, vhdrlen, uio);
 		if (error != 0)
 			return (error);
 		TUNDEBUG(ifp, "txvhdr: f %u, gt %u, hl %u, "
 		    "gs %u, cs %u, co %u\n", vhdr.hdr.flags,
 		    vhdr.hdr.gso_type, vhdr.hdr.hdr_len,
 		    vhdr.hdr.gso_size, vhdr.hdr.csum_start,
 		    vhdr.hdr.csum_offset);
 	}
 
 	if ((m = m_uiotombuf(uio, M_NOWAIT, 0, align, M_PKTHDR)) == NULL) {
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		return (ENOBUFS);
 	}
 
 	m->m_pkthdr.rcvif = ifp;
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	if (l2tun)
 		return (tunwrite_l2(tp, m, vhdrlen > 0 ? &vhdr : NULL));
 
 	return (tunwrite_l3(tp, m));
 }
 
 /*
  * tunpoll - the poll interface, this is only useful on reads
  * really. The write detect always returns true, write never blocks
  * anyway, it either accepts the packet or drops it.
  */
 static	int
 tunpoll(struct cdev *dev, int events, struct thread *td)
 {
 	struct tuntap_softc *tp = dev->si_drv1;
 	struct ifnet	*ifp = TUN2IFP(tp);
 	int		revents = 0;
 
 	TUNDEBUG(ifp, "tunpoll\n");
 
 	if (events & (POLLIN | POLLRDNORM)) {
 		IFQ_LOCK(&ifp->if_snd);
 		if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
 			TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len);
 			revents |= events & (POLLIN | POLLRDNORM);
 		} else {
 			TUNDEBUG(ifp, "tunpoll waiting\n");
 			selrecord(td, &tp->tun_rsel);
 		}
 		IFQ_UNLOCK(&ifp->if_snd);
 	}
 	revents |= events & (POLLOUT | POLLWRNORM);
 
 	return (revents);
 }
 
 /*
  * tunkqfilter - support for the kevent() system call.
  */
 static int
 tunkqfilter(struct cdev *dev, struct knote *kn)
 {
 	struct tuntap_softc	*tp = dev->si_drv1;
 	struct ifnet	*ifp = TUN2IFP(tp);
 
 	switch(kn->kn_filter) {
 	case EVFILT_READ:
 		TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n",
 		    ifp->if_xname, dev2unit(dev));
 		kn->kn_fop = &tun_read_filterops;
 		break;
 
 	case EVFILT_WRITE:
 		TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n",
 		    ifp->if_xname, dev2unit(dev));
 		kn->kn_fop = &tun_write_filterops;
 		break;
 
 	default:
 		TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n",
 		    ifp->if_xname, dev2unit(dev));
 		return(EINVAL);
 	}
 
 	kn->kn_hook = tp;
 	knlist_add(&tp->tun_rsel.si_note, kn, 0);
 
 	return (0);
 }
 
 /*
  * Return true of there is data in the interface queue.
  */
 static int
 tunkqread(struct knote *kn, long hint)
 {
 	int			ret;
 	struct tuntap_softc	*tp = kn->kn_hook;
 	struct cdev		*dev = tp->tun_dev;
 	struct ifnet	*ifp = TUN2IFP(tp);
 
 	if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
 		TUNDEBUG(ifp,
 		    "%s have data in the queue.  Len = %d, minor = %#x\n",
 		    ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
 		ret = 1;
 	} else {
 		TUNDEBUG(ifp,
 		    "%s waiting for data, minor = %#x\n", ifp->if_xname,
 		    dev2unit(dev));
 		ret = 0;
 	}
 
 	return (ret);
 }
 
 /*
  * Always can write, always return MTU in kn->data.
  */
 static int
 tunkqwrite(struct knote *kn, long hint)
 {
 	struct tuntap_softc	*tp = kn->kn_hook;
 	struct ifnet	*ifp = TUN2IFP(tp);
 
 	kn->kn_data = ifp->if_mtu;
 
 	return (1);
 }
 
 static void
 tunkqdetach(struct knote *kn)
 {
 	struct tuntap_softc	*tp = kn->kn_hook;
 
 	knlist_remove(&tp->tun_rsel.si_note, kn, 0);
 }
diff --git a/sys/net/route.h b/sys/net/route.h
index 67217f237e0b..ec77d39b9649 100644
--- a/sys/net/route.h
+++ b/sys/net/route.h
@@ -1,449 +1,453 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)route.h	8.4 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #ifndef _NET_ROUTE_H_
 #define _NET_ROUTE_H_
 
 #include <net/vnet.h>
 
 /*
  * Kernel resident routing tables.
  *
  * The routing tables are initialized when interface addresses
  * are set by making entries for all directly connected interfaces.
  */
 
 /*
  * Struct route consiste of a destination address,
  * a route entry pointer, link-layer prepend data pointer along
  * with its length.
  */
 struct route {
 	struct	nhop_object *ro_nh;
 	struct	llentry *ro_lle;
 	/*
 	 * ro_prepend and ro_plen are only used for bpf to pass in a
 	 * preformed header.  They are not cacheable.
 	 */
 	char		*ro_prepend;
 	uint16_t	ro_plen;
 	uint16_t	ro_flags;
 	uint16_t	ro_mtu;	/* saved ro_rt mtu */
 	uint16_t	spare;
 	struct	sockaddr ro_dst;
 };
 
 #define	RT_L2_ME_BIT		2	/* dst L2 addr is our address */
 #define	RT_MAY_LOOP_BIT		3	/* dst may require loop copy */
 #define	RT_HAS_HEADER_BIT	4	/* mbuf already have its header prepended */
 
 #define	RT_L2_ME		(1 << RT_L2_ME_BIT)		/* 0x0004 */
 #define	RT_MAY_LOOP		(1 << RT_MAY_LOOP_BIT)		/* 0x0008 */
 #define	RT_HAS_HEADER		(1 << RT_HAS_HEADER_BIT)	/* 0x0010 */
 
 #define	RT_REJECT		0x0020		/* Destination is reject */
 #define	RT_BLACKHOLE		0x0040		/* Destination is blackhole */
 #define	RT_HAS_GW		0x0080		/* Destination has GW  */
 #define	RT_LLE_CACHE		0x0100		/* Cache link layer  */
 
 struct rt_metrics {
 	u_long	rmx_locks;	/* Kernel must leave these values alone */
 	u_long	rmx_mtu;	/* MTU for this path */
 	u_long	rmx_hopcount;	/* max hops expected */
 	u_long	rmx_expire;	/* lifetime for route, e.g. redirect */
 	u_long	rmx_recvpipe;	/* inbound delay-bandwidth product */
 	u_long	rmx_sendpipe;	/* outbound delay-bandwidth product */
 	u_long	rmx_ssthresh;	/* outbound gateway buffer limit */
 	u_long	rmx_rtt;	/* estimated round trip time */
 	u_long	rmx_rttvar;	/* estimated rtt variance */
 	u_long	rmx_pksent;	/* packets sent using this route */
 	u_long	rmx_weight;	/* route weight */
 	u_long	rmx_nhidx;	/* route nexhop index */
 	u_long	rmx_filler[2];	/* will be used for T/TCP later */
 };
 
 /*
  * rmx_rtt and rmx_rttvar are stored as microseconds;
  * RTTTOPRHZ(rtt) converts to a value suitable for use
  * by a protocol slowtimo counter.
  */
 #define	RTM_RTTUNIT	1000000	/* units for rtt, rttvar, as units per sec */
 #define	RTTTOPRHZ(r)	((r) / (RTM_RTTUNIT / PR_SLOWHZ))
 
 /* lle state is exported in rmx_state rt_metrics field */
 #define	rmx_state	rmx_weight
 
 /* default route weight */
 #define	RT_DEFAULT_WEIGHT	1
 #define	RT_MAX_WEIGHT		16777215	/* 3 bytes */
 
 /*
  * Keep a generation count of routing table, incremented on route addition,
  * so we can invalidate caches.  This is accessed without a lock, as precision
  * is not required.
  */
 typedef volatile u_int rt_gen_t;	/* tree generation (for adds) */
 #define RT_GEN(fibnum, af)	rt_tables_get_gen(fibnum, af)
 
 #define	RT_DEFAULT_FIB	0	/* Explicitly mark fib=0 restricted cases */
 #define	RT_ALL_FIBS	-1	/* Announce event for every fib */
 #ifdef _KERNEL
 VNET_DECLARE(uint32_t, _rt_numfibs);	/* number of existing route tables */
 #define	V_rt_numfibs		VNET(_rt_numfibs)
 /* temporary compat arg */
 #define	rt_numfibs		V_rt_numfibs
 VNET_DECLARE(u_int, rt_add_addr_allfibs); /* Announce interfaces to all fibs */
 #define	V_rt_add_addr_allfibs	VNET(rt_add_addr_allfibs)
 
 /* Calculate flowid for locally-originated packets */
 #define	V_fib_hash_outbound	VNET(fib_hash_outbound)
 VNET_DECLARE(u_int, fib_hash_outbound);
 
 /* Outbound flowid generation rules */
 #ifdef RSS
 
 #define fib4_calc_packet_hash		xps_proto_software_hash_v4
 #define fib6_calc_packet_hash		xps_proto_software_hash_v6
 #define	CALC_FLOWID_OUTBOUND_SENDTO	true
 
 #ifdef ROUTE_MPATH
 #define	CALC_FLOWID_OUTBOUND		V_fib_hash_outbound
 #else
 #define	CALC_FLOWID_OUTBOUND		false
 #endif
 
 #else /* !RSS */
 
 #define fib4_calc_packet_hash		fib4_calc_software_hash
 #define fib6_calc_packet_hash		fib6_calc_software_hash
 
 #ifdef ROUTE_MPATH
 #define	CALC_FLOWID_OUTBOUND_SENDTO	V_fib_hash_outbound
 #define	CALC_FLOWID_OUTBOUND		V_fib_hash_outbound
 #else
 #define	CALC_FLOWID_OUTBOUND_SENDTO	false
 #define	CALC_FLOWID_OUTBOUND		false
 #endif
 
 #endif /* RSS */
 
 
 #endif /* _KERNEL */
 
 /*
  * We distinguish between routes to hosts and routes to networks,
  * preferring the former if available.  For each route we infer
  * the interface to use from the gateway address supplied when
  * the route was entered.  Routes that forward packets through
  * gateways are marked so that the output routines know to address the
  * gateway rather than the ultimate destination.
  */
 #define	RTF_UP		0x1		/* route usable */
 #define	RTF_GATEWAY	0x2		/* destination is a gateway */
 #define	RTF_HOST	0x4		/* host entry (net otherwise) */
 #define	RTF_REJECT	0x8		/* host or net unreachable */
 #define	RTF_DYNAMIC	0x10		/* created dynamically (by redirect) */
 #define	RTF_MODIFIED	0x20		/* modified dynamically (by redirect) */
 #define RTF_DONE	0x40		/* message confirmed */
 /*			0x80		   unused, was RTF_DELCLONE */
 /*			0x100		   unused, was RTF_CLONING */
 #define RTF_XRESOLVE	0x200		/* external daemon resolves name */
 #define RTF_LLINFO	0x400		/* DEPRECATED - exists ONLY for backward 
 					   compatibility */
 #define RTF_LLDATA	0x400		/* used by apps to add/del L2 entries */
 #define RTF_STATIC	0x800		/* manually added */
 #define RTF_BLACKHOLE	0x1000		/* just discard pkts (during updates) */
 #define RTF_PROTO2	0x4000		/* protocol specific routing flag */
 #define RTF_PROTO1	0x8000		/* protocol specific routing flag */
 /*			0x10000		   unused, was RTF_PRCLONING */
 /*			0x20000		   unused, was RTF_WASCLONED */
 #define RTF_PROTO3	0x40000		/* protocol specific routing flag */
 #define	RTF_FIXEDMTU	0x80000		/* MTU was explicitly specified */
 #define RTF_PINNED	0x100000	/* route is immutable */
 #define	RTF_LOCAL	0x200000 	/* route represents a local address */
 #define	RTF_BROADCAST	0x400000	/* route represents a bcast address */
 #define	RTF_MULTICAST	0x800000	/* route represents a mcast address */
 					/* 0x8000000 and up unassigned */
 #define	RTF_STICKY	 0x10000000	/* always route dst->src */
 
 /*			0x40000000	   unused, was RTF_RNH_LOCKED */
 
 #define	RTF_GWFLAG_COMPAT 0x80000000	/* a compatibility bit for interacting
 					   with existing routing apps */
 
 /* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */
 #define RTF_FMASK	\
 	(RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_BLACKHOLE | \
 	 RTF_REJECT | RTF_STATIC | RTF_STICKY)
 
 /*
  * fib_ nexthop API flags.
  */
 
 /* Consumer-visible nexthop info flags */
 #define	NHF_MULTIPATH		0x0008	/* Nexhop is a nexthop group */
 #define	NHF_REJECT		0x0010	/* RTF_REJECT */
 #define	NHF_BLACKHOLE		0x0020	/* RTF_BLACKHOLE */
 #define	NHF_REDIRECT		0x0040	/* RTF_DYNAMIC|RTF_MODIFIED */
 #define	NHF_DEFAULT		0x0080	/* Default route */
 #define	NHF_BROADCAST		0x0100	/* RTF_BROADCAST */
 #define	NHF_GATEWAY		0x0200	/* RTF_GATEWAY */
 #define	NHF_HOST		0x0400	/* RTF_HOST */
 
 /* Nexthop request flags */
 #define	NHR_NONE		0x00	/* empty flags field */
 #define	NHR_REF			0x01	/* reference nexhop */
 #define	NHR_NODEFAULT		0x02	/* uRPF: do not consider default route */
 
 /* Control plane route request flags */
 #define	NHR_COPY		0x100	/* Copy rte data */
 #define	NHR_UNLOCKED		0x200	/* Do not lock table */
 
 /*
  * Routing statistics.
  */
 struct rtstat {
 	uint64_t rts_badredirect;	/* bogus redirect calls */
 	uint64_t rts_dynamic;		/* routes created by redirects */
 	uint64_t rts_newgateway;	/* routes modified by redirects */
 	uint64_t rts_unreach;		/* lookups which failed */
 	uint64_t rts_wildcard;		/* lookups satisfied by a wildcard */
 	uint64_t rts_nh_idx_alloc_failure;	/* nexthop index alloc failure*/
 	uint64_t rts_nh_alloc_failure;	/* nexthop allocation failure*/
 	uint64_t rts_add_failure;	/* # of route addition failures */
 	uint64_t rts_add_retry;		/* # of route addition retries */
 	uint64_t rts_del_failure;	/* # of route deletion failure */
 	uint64_t rts_del_retry;		/* # of route deletion retries */
 };
 
 /*
  * Structures for routing messages.
  */
 struct rt_msghdr {
 	u_short	rtm_msglen;	/* to skip over non-understood messages */
 	u_char	rtm_version;	/* future binary compatibility */
 	u_char	rtm_type;	/* message type */
 	u_short	rtm_index;	/* index for associated ifp */
 	u_short _rtm_spare1;
 	int	rtm_flags;	/* flags, incl. kern & message, e.g. DONE */
 	int	rtm_addrs;	/* bitmask identifying sockaddrs in msg */
 	pid_t	rtm_pid;	/* identify sender */
 	int	rtm_seq;	/* for sender to identify action */
 	int	rtm_errno;	/* why failed */
 	int	rtm_fmask;	/* bitmask used in RTM_CHANGE message */
 	u_long	rtm_inits;	/* which metrics we are initializing */
 	struct	rt_metrics rtm_rmx; /* metrics themselves */
 };
 
 #define RTM_VERSION	5	/* Up the ante and ignore older versions */
 
 /*
  * Message types.
  *
  * The format for each message is annotated below using the following
  * identifiers:
  *
  * (1) struct rt_msghdr
  * (2) struct ifa_msghdr
  * (3) struct if_msghdr
  * (4) struct ifma_msghdr
  * (5) struct if_announcemsghdr
  *
  */
 #define	RTM_ADD		0x1	/* (1) Add Route */
 #define	RTM_DELETE	0x2	/* (1) Delete Route */
 #define	RTM_CHANGE	0x3	/* (1) Change Metrics or flags */
 #define	RTM_GET		0x4	/* (1) Report Metrics */
 #define	RTM_LOSING	0x5	/* (1) Kernel Suspects Partitioning */
 #define	RTM_REDIRECT	0x6	/* (1) Told to use different route */
 #define	RTM_MISS	0x7	/* (1) Lookup failed on this address */
 #define	RTM_LOCK	0x8	/* (1) fix specified metrics */
 		    /*	0x9  */
 		    /*	0xa  */
 #define	RTM_RESOLVE	0xb	/* (1) req to resolve dst to LL addr */
 #define	RTM_NEWADDR	0xc	/* (2) address being added to iface */
 #define	RTM_DELADDR	0xd	/* (2) address being removed from iface */
 #define	RTM_IFINFO	0xe	/* (3) iface going up/down etc. */
 #define	RTM_NEWMADDR	0xf	/* (4) mcast group membership being added to if */
 #define	RTM_DELMADDR	0x10	/* (4) mcast group membership being deleted */
 #define	RTM_IFANNOUNCE	0x11	/* (5) iface arrival/departure */
 #define	RTM_IEEE80211	0x12	/* (5) IEEE80211 wireless event */
 
 /*
  * Bitmask values for rtm_inits and rmx_locks.
  */
 #define RTV_MTU		0x1	/* init or lock _mtu */
 #define RTV_HOPCOUNT	0x2	/* init or lock _hopcount */
 #define RTV_EXPIRE	0x4	/* init or lock _expire */
 #define RTV_RPIPE	0x8	/* init or lock _recvpipe */
 #define RTV_SPIPE	0x10	/* init or lock _sendpipe */
 #define RTV_SSTHRESH	0x20	/* init or lock _ssthresh */
 #define RTV_RTT		0x40	/* init or lock _rtt */
 #define RTV_RTTVAR	0x80	/* init or lock _rttvar */
 #define RTV_WEIGHT	0x100	/* init or lock _weight */
 
 /*
  * Bitmask values for rtm_addrs.
  */
 #define RTA_DST		0x1	/* destination sockaddr present */
 #define RTA_GATEWAY	0x2	/* gateway sockaddr present */
 #define RTA_NETMASK	0x4	/* netmask sockaddr present */
 #define RTA_GENMASK	0x8	/* cloning mask sockaddr present */
 #define RTA_IFP		0x10	/* interface name sockaddr present */
 #define RTA_IFA		0x20	/* interface addr sockaddr present */
 #define RTA_AUTHOR	0x40	/* sockaddr for author of redirect */
 #define RTA_BRD		0x80	/* for NEWADDR, broadcast or p-p dest addr */
 
 /*
  * Index offsets for sockaddr array for alternate internal encoding.
  */
 #define RTAX_DST	0	/* destination sockaddr present */
 #define RTAX_GATEWAY	1	/* gateway sockaddr present */
 #define RTAX_NETMASK	2	/* netmask sockaddr present */
 #define RTAX_GENMASK	3	/* cloning mask sockaddr present */
 #define RTAX_IFP	4	/* interface name sockaddr present */
 #define RTAX_IFA	5	/* interface addr sockaddr present */
 #define RTAX_AUTHOR	6	/* sockaddr for author of redirect */
 #define RTAX_BRD	7	/* for NEWADDR, broadcast or p-p dest addr */
 #define RTAX_MAX	8	/* size of array to allocate */
 
 struct rtentry;
 struct nhop_object;
 typedef int rib_filter_f_t(const struct rtentry *, const struct nhop_object *,
     void *);
 
 struct rt_addrinfo {
 	int	rti_addrs;			/* Route RTF_ flags */
 	int	rti_flags;			/* Route RTF_ flags */
 	struct	sockaddr *rti_info[RTAX_MAX];	/* Sockaddr data */
 	struct	ifaddr *rti_ifa;		/* value of rt_ifa addr */
 	struct	ifnet *rti_ifp;			/* route interface */
 	rib_filter_f_t	*rti_filter;		/* filter function */
 	void	*rti_filterdata;		/* filter paramenters */
 	u_long	rti_mflags;			/* metrics RTV_ flags */
 	u_long	rti_spare;			/* Will be used for fib */
 	struct	rt_metrics *rti_rmx;		/* Pointer to route metrics */
 };
 
 /*
  * This macro returns the size of a struct sockaddr when passed
  * through a routing socket. Basically we round up sa_len to
  * a multiple of sizeof(long), with a minimum of sizeof(long).
  * The case sa_len == 0 should only apply to empty structures.
  */
 #define SA_SIZE(sa)						\
     (  (((struct sockaddr *)(sa))->sa_len == 0) ?		\
 	sizeof(long)		:				\
 	1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(long) - 1) ) )
 
 #define	sa_equal(a, b) (	\
     (((const struct sockaddr *)(a))->sa_len == ((const struct sockaddr *)(b))->sa_len) && \
     (bcmp((a), (b), ((const struct sockaddr *)(b))->sa_len) == 0))
 
 #ifdef _KERNEL
 
 #define RT_LINK_IS_UP(ifp)	(!((ifp)->if_capabilities & IFCAP_LINKSTATE) \
 				 || (ifp)->if_link_state == LINK_STATE_UP)
 
 #define	RO_NHFREE(_ro) do {					\
 	if ((_ro)->ro_nh) {					\
 		NH_FREE((_ro)->ro_nh);				\
 		(_ro)->ro_nh = NULL;				\
 	}							\
 } while (0)
 
 #define	RO_INVALIDATE_CACHE(ro) do {					\
 		if ((ro)->ro_lle != NULL) {				\
 			LLE_FREE((ro)->ro_lle);				\
 			(ro)->ro_lle = NULL;				\
 		}							\
 		if ((ro)->ro_nh != NULL) {				\
 			NH_FREE((ro)->ro_nh);				\
 			(ro)->ro_nh = NULL;				\
 		}							\
 	} while (0)
 
+#define RO_GET_FAMILY(ro, dst)	((ro) != NULL &&		\
+	(ro)->ro_flags & RT_HAS_GW				\
+	? (ro)->ro_dst.sa_family : (dst)->sa_family)
+
 /*
  * Validate a cached route based on a supplied cookie.  If there is an
  * out-of-date cache, simply free it.  Update the generation number
  * for the new allocation
  */
 #define NH_VALIDATE(ro, cookiep, fibnum) do {				\
 	rt_gen_t cookie = RT_GEN(fibnum, (ro)->ro_dst.sa_family);	\
 	if (*(cookiep) != cookie) {					\
 		RO_INVALIDATE_CACHE(ro);				\
 		*(cookiep) = cookie;					\
 	}								\
 } while (0)
 
 struct ifmultiaddr;
 struct rib_head;
 
 void	 rt_ieee80211msg(struct ifnet *, int, void *, size_t);
 void	 rt_ifannouncemsg(struct ifnet *, int);
 void	 rt_ifmsg(struct ifnet *);
 void	 rt_missmsg(int, struct rt_addrinfo *, int, int);
 void	 rt_missmsg_fib(int, struct rt_addrinfo *, int, int, int);
 int	 rt_addrmsg(int, struct ifaddr *, int);
 int	 rt_routemsg(int, struct rtentry *, struct nhop_object *, int);
 int	 rt_routemsg_info(int, struct rt_addrinfo *, int);
 void	 rt_newmaddrmsg(int, struct ifmultiaddr *);
 void 	 rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *);
 struct rib_head *rt_table_init(int, int, u_int);
 void	rt_table_destroy(struct rib_head *);
 u_int	rt_tables_get_gen(uint32_t table, sa_family_t family);
 
 struct sockaddr *rtsock_fix_netmask(const struct sockaddr *dst,
 	    const struct sockaddr *smask, struct sockaddr_storage *dmask);
 
 void	rt_updatemtu(struct ifnet *);
 
 void	rt_flushifroutes(struct ifnet *ifp);
 
 /* XXX MRT NEW VERSIONS THAT USE FIBs
  * For now the protocol indepedent versions are the same as the AF_INET ones
  * but this will change.. 
  */
 int	rtioctl_fib(u_long, caddr_t, u_int);
 int	rib_lookup_info(uint32_t, const struct sockaddr *, uint32_t, uint32_t,
 	    struct rt_addrinfo *);
 void	rib_free_info(struct rt_addrinfo *info);
 
 /* New API */
 void rib_flush_routes_family(int family);
 struct nhop_object *rib_lookup(uint32_t fibnum, const struct sockaddr *dst,
 	    uint32_t flags, uint32_t flowid);
 #endif
 
 #endif
diff --git a/sys/net/route/route_ctl.c b/sys/net/route/route_ctl.c
index a686d1623053..6db088102cd3 100644
--- a/sys/net/route/route_ctl.c
+++ b/sys/net/route/route_ctl.c
@@ -1,1560 +1,1588 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2020 Alexander V. Chernikov
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_route.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/vnet.h>
 #include <net/route.h>
 #include <net/route/route_ctl.h>
 #include <net/route/route_var.h>
 #include <net/route/nhop_utils.h>
 #include <net/route/nhop.h>
 #include <net/route/nhop_var.h>
 #include <netinet/in.h>
 #include <netinet6/scope6_var.h>
 
 #include <vm/uma.h>
 
 /*
  * This file contains control plane routing tables functions.
  *
  * All functions assumes they are called in net epoch.
  */
 
 struct rib_subscription {
 	CK_STAILQ_ENTRY(rib_subscription)	next;
 	rib_subscription_cb_t			*func;
 	void					*arg;
 	struct rib_head				*rnh;
 	enum rib_subscription_type		type;
 	struct epoch_context			epoch_ctx;
 };
 
 static int add_route(struct rib_head *rnh, struct rt_addrinfo *info,
     struct rib_cmd_info *rc);
 static int add_route_nhop(struct rib_head *rnh, struct rtentry *rt,
     struct rt_addrinfo *info, struct route_nhop_data *rnd,
     struct rib_cmd_info *rc);
 static int del_route(struct rib_head *rnh, struct rt_addrinfo *info,
     struct rib_cmd_info *rc);
 static int change_route(struct rib_head *rnh, struct rt_addrinfo *info,
     struct route_nhop_data *nhd_orig, struct rib_cmd_info *rc);
 
 static int rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info,
     struct rib_cmd_info *rc);
 
 static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type,
     struct rib_cmd_info *rc);
 
 static void destroy_subscription_epoch(epoch_context_t ctx);
 #ifdef ROUTE_MPATH
 static bool rib_can_multipath(struct rib_head *rh);
 #endif
 
 /* Per-vnet multipath routing configuration */
 SYSCTL_DECL(_net_route);
 #define	V_rib_route_multipath	VNET(rib_route_multipath)
 #ifdef ROUTE_MPATH
 #define _MP_FLAGS	CTLFLAG_RW
 #else
 #define _MP_FLAGS	CTLFLAG_RD
 #endif
 VNET_DEFINE(u_int, rib_route_multipath) = 1;
 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET,
     &VNET_NAME(rib_route_multipath), 0, "Enable route multipath");
 #undef _MP_FLAGS
 
+#if defined(INET) && defined(INET6)
+FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops");
+#define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop)
+VNET_DEFINE(u_int, rib_route_ipv6_nexthop) = 1;
+SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET,
+    &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address");
+#endif
+
 /* Routing table UMA zone */
 VNET_DEFINE_STATIC(uma_zone_t, rtzone);
 #define	V_rtzone	VNET(rtzone)
 
 void
 vnet_rtzone_init()
 {
 
 	V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry),
 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 }
 
 #ifdef VIMAGE
 void
 vnet_rtzone_destroy()
 {
 
 	uma_zdestroy(V_rtzone);
 }
 #endif
 
 static void
 destroy_rtentry(struct rtentry *rt)
 {
 #ifdef VIMAGE
 	struct nhop_object *nh = rt->rt_nhop;
 
 	/*
 	 * At this moment rnh, nh_control may be already freed.
 	 * nhop interface may have been migrated to a different vnet.
 	 * Use vnet stored in the nexthop to delete the entry.
 	 */
 #ifdef ROUTE_MPATH
 	if (NH_IS_NHGRP(nh)) {
 		struct weightened_nhop *wn;
 		uint32_t num_nhops;
 		wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
 		nh = wn[0].nh;
 	}
 #endif
 	CURVNET_SET(nhop_get_vnet(nh));
 #endif
 
 	/* Unreference nexthop */
 	nhop_free_any(rt->rt_nhop);
 
 	uma_zfree(V_rtzone, rt);
 
 	CURVNET_RESTORE();
 }
 
 /*
  * Epoch callback indicating rtentry is safe to destroy
  */
 static void
 destroy_rtentry_epoch(epoch_context_t ctx)
 {
 	struct rtentry *rt;
 
 	rt = __containerof(ctx, struct rtentry, rt_epoch_ctx);
 
 	destroy_rtentry(rt);
 }
 
 /*
  * Schedule rtentry deletion
  */
 static void
 rtfree(struct rtentry *rt)
 {
 
 	KASSERT(rt != NULL, ("%s: NULL rt", __func__));
 
 	epoch_call(net_epoch_preempt, destroy_rtentry_epoch,
 	    &rt->rt_epoch_ctx);
 }
 
 static struct rib_head *
 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info)
 {
 	struct rib_head *rnh;
 	struct sockaddr *dst;
 
 	KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum"));
 
 	dst = info->rti_info[RTAX_DST];
 	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
 
 	return (rnh);
 }
 
+#if defined(INET) && defined(INET6)
+static bool
+rib_can_ipv6_nexthop_address(struct rib_head *rh)
+{
+	int result;
+
+	CURVNET_SET(rh->rib_vnet);
+	result = !!V_rib_route_ipv6_nexthop;
+	CURVNET_RESTORE();
+
+	return (result);
+}
+#endif
+
 #ifdef ROUTE_MPATH
 static bool
 rib_can_multipath(struct rib_head *rh)
 {
 	int result;
 
 	CURVNET_SET(rh->rib_vnet);
 	result = !!V_rib_route_multipath;
 	CURVNET_RESTORE();
 
 	return (result);
 }
 
 /*
  * Check is nhop is multipath-eligible.
  * Avoid nhops without gateways and redirects.
  *
  * Returns 1 for multipath-eligible nexthop,
  * 0 otherwise.
  */
 bool
 nhop_can_multipath(const struct nhop_object *nh)
 {
 
 	if ((nh->nh_flags & NHF_MULTIPATH) != 0)
 		return (1);
 	if ((nh->nh_flags & NHF_GATEWAY) == 0)
 		return (0);
 	if ((nh->nh_flags & NHF_REDIRECT) != 0)
 		return (0);
 
 	return (1);
 }
 #endif
 
 static int
 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight)
 {
 	uint32_t weight;
 
 	if (info->rti_mflags & RTV_WEIGHT)
 		weight = info->rti_rmx->rmx_weight;
 	else
 		weight = default_weight;
 	/* Keep upper 1 byte for adm distance purposes */
 	if (weight > RT_MAX_WEIGHT)
 		weight = RT_MAX_WEIGHT;
 
 	return (weight);
 }
 
 bool
 rt_is_host(const struct rtentry *rt)
 {
 
 	return (rt->rte_flags & RTF_HOST);
 }
 
 sa_family_t
 rt_get_family(const struct rtentry *rt)
 {
 	const struct sockaddr *dst;
 
 	dst = (const struct sockaddr *)rt_key_const(rt);
 
 	return (dst->sa_family);
 }
 
 /*
  * Returns pointer to nexthop or nexthop group
  * associated with @rt
  */
 struct nhop_object *
 rt_get_raw_nhop(const struct rtentry *rt)
 {
 
 	return (rt->rt_nhop);
 }
 
 #ifdef INET
 /*
  * Stores IPv4 address and prefix length of @rt inside
  *  @paddr and @plen.
  * @pscopeid is currently always set to 0.
  */
 void
 rt_get_inet_prefix_plen(const struct rtentry *rt, struct in_addr *paddr,
     int *plen, uint32_t *pscopeid)
 {
 	const struct sockaddr_in *dst;
 
 	dst = (const struct sockaddr_in *)rt_key_const(rt);
 	KASSERT((dst->sin_family == AF_INET),
 	    ("rt family is %d, not inet", dst->sin_family));
 	*paddr = dst->sin_addr;
 	dst = (const struct sockaddr_in *)rt_mask_const(rt);
 	if (dst == NULL)
 		*plen = 32;
 	else
 		*plen = bitcount32(dst->sin_addr.s_addr);
 	*pscopeid = 0;
 }
 
 /*
  * Stores IPv4 address and prefix mask of @rt inside
  *  @paddr and @pmask. Sets mask to INADDR_ANY for host routes.
  * @pscopeid is currently always set to 0.
  */
 void
 rt_get_inet_prefix_pmask(const struct rtentry *rt, struct in_addr *paddr,
     struct in_addr *pmask, uint32_t *pscopeid)
 {
 	const struct sockaddr_in *dst;
 
 	dst = (const struct sockaddr_in *)rt_key_const(rt);
 	KASSERT((dst->sin_family == AF_INET),
 	    ("rt family is %d, not inet", dst->sin_family));
 	*paddr = dst->sin_addr;
 	dst = (const struct sockaddr_in *)rt_mask_const(rt);
 	if (dst == NULL)
 		pmask->s_addr = INADDR_BROADCAST;
 	else
 		*pmask = dst->sin_addr;
 	*pscopeid = 0;
 }
 #endif
 
 #ifdef INET6
 static int
 inet6_get_plen(const struct in6_addr *addr)
 {
 
 	return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) +
 	    bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3]));
 }
 
 /*
  * Stores IPv6 address and prefix length of @rt inside
  *  @paddr and @plen. Addresses are returned in de-embedded form.
  * Scopeid is set to 0 for non-LL addresses.
  */
 void
 rt_get_inet6_prefix_plen(const struct rtentry *rt, struct in6_addr *paddr,
     int *plen, uint32_t *pscopeid)
 {
 	const struct sockaddr_in6 *dst;
 
 	dst = (const struct sockaddr_in6 *)rt_key_const(rt);
 	KASSERT((dst->sin6_family == AF_INET6),
 	    ("rt family is %d, not inet6", dst->sin6_family));
 	if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr))
 		in6_splitscope(&dst->sin6_addr, paddr, pscopeid);
 	else
 		*paddr = dst->sin6_addr;
 	dst = (const struct sockaddr_in6 *)rt_mask_const(rt);
 	if (dst == NULL)
 		*plen = 128;
 	else
 		*plen = inet6_get_plen(&dst->sin6_addr);
 }
 
 /*
  * Stores IPv6 address and prefix mask of @rt inside
  *  @paddr and @pmask. Addresses are returned in de-embedded form.
  * Scopeid is set to 0 for non-LL addresses.
  */
 void
 rt_get_inet6_prefix_pmask(const struct rtentry *rt, struct in6_addr *paddr,
     struct in6_addr *pmask, uint32_t *pscopeid)
 {
 	const struct sockaddr_in6 *dst;
 
 	dst = (const struct sockaddr_in6 *)rt_key_const(rt);
 	KASSERT((dst->sin6_family == AF_INET6),
 	    ("rt family is %d, not inet", dst->sin6_family));
 	if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr))
 		in6_splitscope(&dst->sin6_addr, paddr, pscopeid);
 	else
 		*paddr = dst->sin6_addr;
 	dst = (const struct sockaddr_in6 *)rt_mask_const(rt);
 	if (dst == NULL)
 		memset(pmask, 0xFF, sizeof(struct in6_addr));
 	else
 		*pmask = dst->sin6_addr;
 }
 #endif
 
 static void
 rt_set_expire_info(struct rtentry *rt, const struct rt_addrinfo *info)
 {
 
 	/* Kernel -> userland timebase conversion. */
 	if (info->rti_mflags & RTV_EXPIRE)
 		rt->rt_expire = info->rti_rmx->rmx_expire ?
 		    info->rti_rmx->rmx_expire - time_second + time_uptime : 0;
 }
 
 /*
  * Check if specified @gw matches gw data in the nexthop @nh.
  *
  * Returns true if matches, false otherwise.
  */
 bool
 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw)
 {
 
 	if (nh->gw_sa.sa_family != gw->sa_family)
 		return (false);
 
 	switch (gw->sa_family) {
 	case AF_INET:
 		return (nh->gw4_sa.sin_addr.s_addr ==
 		    ((const struct sockaddr_in *)gw)->sin_addr.s_addr);
 	case AF_INET6:
 		{
 			const struct sockaddr_in6 *gw6;
 			gw6 = (const struct sockaddr_in6 *)gw;
 
 			/*
 			 * Currently (2020-09) IPv6 gws in kernel have their
 			 * scope embedded. Once this becomes false, this code
 			 * has to be revisited.
 			 */
 			if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr,
 			    &gw6->sin6_addr))
 				return (true);
 			return (false);
 		}
 	case AF_LINK:
 		{
 			const struct sockaddr_dl *sdl;
 			sdl = (const struct sockaddr_dl *)gw;
 			return (nh->gwl_sa.sdl_index == sdl->sdl_index);
 		}
 	default:
 		return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0);
 	}
 
 	/* NOTREACHED */
 	return (false);
 }
 
 /*
  * Checks if data in @info matches nexhop @nh.
  *
  * Returns 0 on success,
  * ESRCH if not matched,
  * ENOENT if filter function returned false
  */
 int
 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt,
     const struct nhop_object *nh)
 {
 	const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY];
 
 	if (info->rti_filter != NULL) {
 	    if (info->rti_filter(rt, nh, info->rti_filterdata) == 0)
 		    return (ENOENT);
 	    else
 		    return (0);
 	}
 	if ((gw != NULL) && !match_nhop_gw(nh, gw))
 		return (ESRCH);
 
 	return (0);
 }
 
 /*
  * Checks if nexhop @nh can be rewritten by data in @info because
  *  of higher "priority". Currently the only case for such scenario
  *  is kernel installing interface routes, marked by RTF_PINNED flag.
  *
  * Returns:
  * 1 if @info data has higher priority
  * 0 if priority is the same
  * -1 if priority is lower
  */
 int
 can_override_nhop(const struct rt_addrinfo *info, const struct nhop_object *nh)
 {
 
 	if (info->rti_flags & RTF_PINNED) {
 		return (NH_IS_PINNED(nh)) ? 0 : 1;
 	} else {
 		return (NH_IS_PINNED(nh)) ? -1 : 0;
 	}
 }
 
 /*
  * Runs exact prefix match based on @dst and @netmask.
  * Returns matched @rtentry if found or NULL.
  * If rtentry was found, saves nexthop / weight value into @rnd.
  */
 static struct rtentry *
 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst,
     const struct sockaddr *netmask, struct route_nhop_data *rnd)
 {
 	struct rtentry *rt;
 
 	RIB_LOCK_ASSERT(rnh);
 
 	rt = (struct rtentry *)rnh->rnh_lookup(__DECONST(void *, dst),
 	    __DECONST(void *, netmask), &rnh->head);
 	if (rt != NULL) {
 		rnd->rnd_nhop = rt->rt_nhop;
 		rnd->rnd_weight = rt->rt_weight;
 	} else {
 		rnd->rnd_nhop = NULL;
 		rnd->rnd_weight = 0;
 	}
 
 	return (rt);
 }
 
 /*
  * Runs exact prefix match based on dst/netmask from @info.
  * Assumes RIB lock is held.
  * Returns matched @rtentry if found or NULL.
  * If rtentry was found, saves nexthop / weight value into @rnd.
  */
 struct rtentry *
 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info,
     struct route_nhop_data *rnd)
 {
 	struct rtentry *rt;
 
 	rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST],
 	    info->rti_info[RTAX_NETMASK], rnd);
 
 	return (rt);
 }
 
 /*
  * Adds route defined by @info into the kernel table specified by @fibnum and
  * sa_family in @info->rti_info[RTAX_DST].
  *
  * Returns 0 on success and fills in operation metadata into @rc.
  */
 int
 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info,
     struct rib_cmd_info *rc)
 {
 	struct rib_head *rnh;
 	int error;
 
 	NET_EPOCH_ASSERT();
 
 	rnh = get_rnh(fibnum, info);
 	if (rnh == NULL)
 		return (EAFNOSUPPORT);
 
 	/*
 	 * Check consistency between RTF_HOST flag and netmask
 	 * existence.
 	 */
 	if (info->rti_flags & RTF_HOST)
 		info->rti_info[RTAX_NETMASK] = NULL;
 	else if (info->rti_info[RTAX_NETMASK] == NULL)
 		return (EINVAL);
 
 	bzero(rc, sizeof(struct rib_cmd_info));
 	rc->rc_cmd = RTM_ADD;
 
 	error = add_route(rnh, info, rc);
 	if (error == 0)
 		rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
 
 	return (error);
 }
 
 /*
  * Checks if @dst and @gateway is valid combination.
  *
  * Returns true if is valid, false otherwise.
  */
 static bool
 check_gateway(struct rib_head *rnh, struct sockaddr *dst,
     struct sockaddr *gateway)
 {
 	if (dst->sa_family == gateway->sa_family)
 		return (true);
 	else if (gateway->sa_family == AF_UNSPEC)
 		return (true);
 	else if (gateway->sa_family == AF_LINK)
 		return (true);
-	return (false);
+#if defined(INET) && defined(INET6)
+	else if (dst->sa_family == AF_INET && gateway->sa_family == AF_INET6 &&
+		rib_can_ipv6_nexthop_address(rnh))
+		return (true);
+#endif
+	else
+		return (false);
 }
 
 /*
  * Creates rtentry and nexthop based on @info data.
  * Return 0 and fills in rtentry into @prt on success,
  * return errno otherwise.
  */
 static int
 create_rtentry(struct rib_head *rnh, struct rt_addrinfo *info,
     struct rtentry **prt)
 {
 	struct sockaddr *dst, *ndst, *gateway, *netmask;
 	struct rtentry *rt;
 	struct nhop_object *nh;
 	struct ifaddr *ifa;
 	int error, flags;
 
 	dst = info->rti_info[RTAX_DST];
 	gateway = info->rti_info[RTAX_GATEWAY];
 	netmask = info->rti_info[RTAX_NETMASK];
 	flags = info->rti_flags;
 
 	if ((flags & RTF_GATEWAY) && !gateway)
 		return (EINVAL);
 	if (dst && gateway && !check_gateway(rnh, dst, gateway))
 		return (EINVAL);
 
 	if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb))
 		return (EINVAL);
 
 	if (info->rti_ifa == NULL) {
 		error = rt_getifa_fib(info, rnh->rib_fibnum);
 		if (error)
 			return (error);
 	}
 
 	error = nhop_create_from_info(rnh, info, &nh);
 	if (error != 0)
 		return (error);
 
 	rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO);
 	if (rt == NULL) {
 		nhop_free(nh);
 		return (ENOBUFS);
 	}
 	rt->rte_flags = (RTF_UP | flags) & RTE_RT_FLAG_MASK;
 	rt->rt_nhop = nh;
 
 	/* Fill in dst */
 	memcpy(&rt->rt_dst, dst, dst->sa_len);
 	rt_key(rt) = &rt->rt_dst;
 
 	/*
 	 * point to the (possibly newly malloc'd) dest address.
 	 */
 	ndst = (struct sockaddr *)rt_key(rt);
 
 	/*
 	 * make sure it contains the value we want (masked if needed).
 	 */
 	if (netmask) {
 		rt_maskedcopy(dst, ndst, netmask);
 	} else
 		bcopy(dst, ndst, dst->sa_len);
 
 	/*
 	 * We use the ifa reference returned by rt_getifa_fib().
 	 * This moved from below so that rnh->rnh_addaddr() can
 	 * examine the ifa and  ifa->ifa_ifp if it so desires.
 	 */
 	ifa = info->rti_ifa;
 	rt->rt_weight = get_info_weight(info, RT_DEFAULT_WEIGHT);
 	rt_set_expire_info(rt, info);
 
 	*prt = rt;
 	return (0);
 }
 
 static int
 add_route(struct rib_head *rnh, struct rt_addrinfo *info,
     struct rib_cmd_info *rc)
 {
 	struct nhop_object *nh_orig;
 	struct route_nhop_data rnd_orig, rnd_add;
 	struct nhop_object *nh;
 	struct rtentry *rt, *rt_orig;
 	int error;
 
 	error = create_rtentry(rnh, info, &rt);
 	if (error != 0)
 		return (error);
 
 	rnd_add.rnd_nhop = rt->rt_nhop;
 	rnd_add.rnd_weight = rt->rt_weight;
 	nh = rt->rt_nhop;
 
 	RIB_WLOCK(rnh);
 	error = add_route_nhop(rnh, rt, info, &rnd_add, rc);
 	if (error == 0) {
 		RIB_WUNLOCK(rnh);
 		return (0);
 	}
 
 	/* addition failed. Lookup prefix in the rib to determine the cause */
 	rt_orig = lookup_prefix(rnh, info, &rnd_orig);
 	if (rt_orig == NULL) {
 		/* No prefix -> rnh_addaddr() failed to allocate memory */
 		RIB_WUNLOCK(rnh);
 		nhop_free(nh);
 		uma_zfree(V_rtzone, rt);
 		return (ENOMEM);
 	}
 
 	/* We have existing route in the RIB. */
 	nh_orig = rnd_orig.rnd_nhop;
 	/* Check if new route has higher preference */
 	if (can_override_nhop(info, nh_orig) > 0) {
 		/* Update nexthop to the new route */
 		change_route_nhop(rnh, rt_orig, info, &rnd_add, rc);
 		RIB_WUNLOCK(rnh);
 		uma_zfree(V_rtzone, rt);
 		nhop_free(nh_orig);
 		return (0);
 	}
 
 	RIB_WUNLOCK(rnh);
 
 #ifdef ROUTE_MPATH
 	if (rib_can_multipath(rnh) && nhop_can_multipath(rnd_add.rnd_nhop) &&
 	    nhop_can_multipath(rnd_orig.rnd_nhop))
 		error = add_route_mpath(rnh, info, rt, &rnd_add, &rnd_orig, rc);
 	else
 #endif
 	/* Unable to add - another route with the same preference exists */
 	error = EEXIST;
 
 	/*
 	 * ROUTE_MPATH disabled: failed to add route, free both nhop and rt.
 	 * ROUTE_MPATH enabled: original nhop reference is unused in any case,
 	 *  free rt only if not _adding_ new route to rib (e.g. the case
 	 *  when initial lookup returned existing route, but then it got
 	 *  deleted prior to multipath group insertion, leading to a simple
 	 *  non-multipath add as a result).
 	 */
 	nhop_free(nh);
 	if ((error != 0) || rc->rc_cmd != RTM_ADD)
 		uma_zfree(V_rtzone, rt);
 
 	return (error);
 }
 
 /*
  * Removes route defined by @info from the kernel table specified by @fibnum and
  * sa_family in @info->rti_info[RTAX_DST].
  *
  * Returns 0 on success and fills in operation metadata into @rc.
  */
 int
 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc)
 {
 	struct rib_head *rnh;
 	struct sockaddr *dst_orig, *netmask;
 	struct sockaddr_storage mdst;
 	int error;
 
 	NET_EPOCH_ASSERT();
 
 	rnh = get_rnh(fibnum, info);
 	if (rnh == NULL)
 		return (EAFNOSUPPORT);
 
 	bzero(rc, sizeof(struct rib_cmd_info));
 	rc->rc_cmd = RTM_DELETE;
 
 	dst_orig = info->rti_info[RTAX_DST];
 	netmask = info->rti_info[RTAX_NETMASK];
 
 	if (netmask != NULL) {
 		/* Ensure @dst is always properly masked */
 		if (dst_orig->sa_len > sizeof(mdst))
 			return (EINVAL);
 		rt_maskedcopy(dst_orig, (struct sockaddr *)&mdst, netmask);
 		info->rti_info[RTAX_DST] = (struct sockaddr *)&mdst;
 	}
 	error = del_route(rnh, info, rc);
 	info->rti_info[RTAX_DST] = dst_orig;
 
 	return (error);
 }
 
 /*
  * Conditionally unlinks rtentry matching data inside @info from @rnh.
  * Returns 0 on success with operation result stored in @rc.
  * On error, returns:
  * ESRCH - if prefix was not found,
  * EADDRINUSE - if trying to delete higher priority route.
  * ENOENT - if supplied filter function returned 0 (not matched).
  */
 static int
 rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, struct rib_cmd_info *rc)
 {
 	struct rtentry *rt;
 	struct nhop_object *nh;
 	struct radix_node *rn;
 	struct route_nhop_data rnd;
 	int error;
 
 	rt = lookup_prefix(rnh, info, &rnd);
 	if (rt == NULL)
 		return (ESRCH);
 
 	nh = rt->rt_nhop;
 #ifdef ROUTE_MPATH
 	if (NH_IS_NHGRP(nh)) {
 		error = del_route_mpath(rnh, info, rt,
 		    (struct nhgrp_object *)nh, rc);
 		return (error);
 	}
 #endif
 	error = check_info_match_nhop(info, rt, nh);
 	if (error != 0)
 		return (error);
 
 	if (can_override_nhop(info, nh) < 0)
 		return (EADDRINUSE);
 
 	/*
 	 * Remove the item from the tree and return it.
 	 * Complain if it is not there and do no more processing.
 	 */
 	rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST],
 	    info->rti_info[RTAX_NETMASK], &rnh->head);
 	if (rn == NULL)
 		return (ESRCH);
 
 	if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
 		panic ("rtrequest delete");
 
 	rt = RNTORT(rn);
 	rt->rte_flags &= ~RTF_UP;
 
 	/* Finalize notification */
 	rib_bump_gen(rnh);
 	rnh->rnh_prefixes--;
 
 	rc->rc_cmd = RTM_DELETE;
 	rc->rc_rt = rt;
 	rc->rc_nh_old = rt->rt_nhop;
 	rc->rc_nh_weight = rt->rt_weight;
 	rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
 
 	return (0);
 }
 
 static int
 del_route(struct rib_head *rnh, struct rt_addrinfo *info,
     struct rib_cmd_info *rc)
 {
 	int error;
 
 	RIB_WLOCK(rnh);
 	error = rt_unlinkrte(rnh, info, rc);
 	RIB_WUNLOCK(rnh);
 	if (error != 0)
 		return (error);
 
 	rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
 
 	/*
 	 * If the caller wants it, then it can have it,
 	 * the entry will be deleted after the end of the current epoch.
 	 */
 	if (rc->rc_cmd == RTM_DELETE)
 		rtfree(rc->rc_rt);
 #ifdef ROUTE_MPATH
 	else {
 		/*
 		 * Deleting 1 path may result in RTM_CHANGE to
 		 * a different mpath group/nhop.
 		 * Free old mpath group.
 		 */
 		nhop_free_any(rc->rc_nh_old);
 	}
 #endif
 
 	return (0);
 }
 
 int
 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
     struct rib_cmd_info *rc)
 {
 	RIB_RLOCK_TRACKER;
 	struct route_nhop_data rnd_orig;
 	struct rib_head *rnh;
 	struct rtentry *rt;
 	int error;
 
 	NET_EPOCH_ASSERT();
 
 	rnh = get_rnh(fibnum, info);
 	if (rnh == NULL)
 		return (EAFNOSUPPORT);
 
 	bzero(rc, sizeof(struct rib_cmd_info));
 	rc->rc_cmd = RTM_CHANGE;
 
 	/* Check if updated gateway exists */
 	if ((info->rti_flags & RTF_GATEWAY) &&
 	    (info->rti_info[RTAX_GATEWAY] == NULL)) {
 
 		/*
 		 * route(8) adds RTF_GATEWAY flag if -interface is not set.
 		 * Remove RTF_GATEWAY to enforce consistency and maintain
 		 * compatibility..
 		 */
 		info->rti_flags &= ~RTF_GATEWAY;
 	}
 
 	/*
 	 * route change is done in multiple steps, with dropping and
 	 * reacquiring lock. In the situations with multiple processes
 	 * changes the same route in can lead to the case when route
 	 * is changed between the steps. Address it by retrying the operation
 	 * multiple times before failing.
 	 */
 
 	RIB_RLOCK(rnh);
 	rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
 	    info->rti_info[RTAX_NETMASK], &rnh->head);
 
 	if (rt == NULL) {
 		RIB_RUNLOCK(rnh);
 		return (ESRCH);
 	}
 
 	rnd_orig.rnd_nhop = rt->rt_nhop;
 	rnd_orig.rnd_weight = rt->rt_weight;
 
 	RIB_RUNLOCK(rnh);
 
 	for (int i = 0; i < RIB_MAX_RETRIES; i++) {
 		error = change_route(rnh, info, &rnd_orig, rc);
 		if (error != EAGAIN)
 			break;
 	}
 
 	return (error);
 }
 
 static int
 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
     struct nhop_object *nh_orig, struct nhop_object **nh_new)
 {
 	int error;
 
 	/*
 	 * New gateway could require new ifaddr, ifp;
 	 * flags may also be different; ifp may be specified
 	 * by ll sockaddr when protocol address is ambiguous
 	 */
 	if (((nh_orig->nh_flags & NHF_GATEWAY) &&
 	    info->rti_info[RTAX_GATEWAY] != NULL) ||
 	    info->rti_info[RTAX_IFP] != NULL ||
 	    (info->rti_info[RTAX_IFA] != NULL &&
 	     !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) {
 		error = rt_getifa_fib(info, rnh->rib_fibnum);
 
 		if (error != 0) {
 			info->rti_ifa = NULL;
 			return (error);
 		}
 	}
 
 	error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new);
 	info->rti_ifa = NULL;
 
 	return (error);
 }
 
 #ifdef ROUTE_MPATH
 static int
 change_mpath_route(struct rib_head *rnh, struct rt_addrinfo *info,
     struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc)
 {
 	int error = 0;
 	struct nhop_object *nh, *nh_orig, *nh_new;
 	struct route_nhop_data rnd_new;
 
 	nh = NULL;
 	nh_orig = rnd_orig->rnd_nhop;
 
 	struct weightened_nhop *wn = NULL, *wn_new;
 	uint32_t num_nhops;
 
 	wn = nhgrp_get_nhops((struct nhgrp_object *)nh_orig, &num_nhops);
 	nh_orig = NULL;
 	for (int i = 0; i < num_nhops; i++) {
 		if (check_info_match_nhop(info, NULL, wn[i].nh)) {
 			nh_orig = wn[i].nh;
 			break;
 		}
 	}
 
 	if (nh_orig == NULL)
 		return (ESRCH);
 
 	error = change_nhop(rnh, info, nh_orig, &nh_new);
 	if (error != 0)
 		return (error);
 
 	wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop),
 	    M_TEMP, M_NOWAIT | M_ZERO);
 	if (wn_new == NULL) {
 		nhop_free(nh_new);
 		return (EAGAIN);
 	}
 
 	memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop));
 	for (int i = 0; i < num_nhops; i++) {
 		if (wn[i].nh == nh_orig) {
 			wn[i].nh = nh_new;
 			wn[i].weight = get_info_weight(info, rnd_orig->rnd_weight);
 			break;
 		}
 	}
 
 	error = nhgrp_get_group(rnh, wn_new, num_nhops, &rnd_new);
 	nhop_free(nh_new);
 	free(wn_new, M_TEMP);
 
 	if (error != 0)
 		return (error);
 
 	error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc);
 
 	return (error);
 }
 #endif
 
 static int
 change_route(struct rib_head *rnh, struct rt_addrinfo *info,
     struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc)
 {
 	int error = 0;
 	struct nhop_object *nh, *nh_orig;
 	struct route_nhop_data rnd_new;
 
 	nh = NULL;
 	nh_orig = rnd_orig->rnd_nhop;
 	if (nh_orig == NULL)
 		return (ESRCH);
 
 #ifdef ROUTE_MPATH
 	if (NH_IS_NHGRP(nh_orig))
 		return (change_mpath_route(rnh, info, rnd_orig, rc));
 #endif
 
 	rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight);
 	error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop);
 	if (error != 0)
 		return (error);
 	error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc);
 
 	return (error);
 }
 
 /*
  * Insert @rt with nhop data from @rnd_new to @rnh.
  * Returns 0 on success and stores operation results in @rc.
  */
 static int
 add_route_nhop(struct rib_head *rnh, struct rtentry *rt,
     struct rt_addrinfo *info, struct route_nhop_data *rnd,
     struct rib_cmd_info *rc)
 {
 	struct sockaddr *ndst, *netmask;
 	struct radix_node *rn;
 	int error = 0;
 
 	RIB_WLOCK_ASSERT(rnh);
 
 	ndst = (struct sockaddr *)rt_key(rt);
 	netmask = info->rti_info[RTAX_NETMASK];
 
 	rt->rt_nhop = rnd->rnd_nhop;
 	rt->rt_weight = rnd->rnd_weight;
 	rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes);
 
 	if (rn != NULL) {
 		if (rt->rt_expire > 0)
 			tmproutes_update(rnh, rt);
 
 		/* Finalize notification */
 		rib_bump_gen(rnh);
 		rnh->rnh_prefixes++;
 
 		rc->rc_cmd = RTM_ADD;
 		rc->rc_rt = rt;
 		rc->rc_nh_old = NULL;
 		rc->rc_nh_new = rnd->rnd_nhop;
 		rc->rc_nh_weight = rnd->rnd_weight;
 
 		rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
 	} else {
 		/* Existing route or memory allocation failure */
 		error = EEXIST;
 	}
 
 	return (error);
 }
 
 /*
  * Switch @rt nhop/weigh to the ones specified in @rnd.
  *  Conditionally set rt_expire if set in @info.
  * Returns 0 on success.
  */
 int
 change_route_nhop(struct rib_head *rnh, struct rtentry *rt,
     struct rt_addrinfo *info, struct route_nhop_data *rnd,
     struct rib_cmd_info *rc)
 {
 	struct nhop_object *nh_orig;
 
 	RIB_WLOCK_ASSERT(rnh);
 
 	nh_orig = rt->rt_nhop;
 
 	if (rnd->rnd_nhop != NULL) {
 		/* Changing expiration & nexthop & weight to a new one */
 		rt_set_expire_info(rt, info);
 		rt->rt_nhop = rnd->rnd_nhop;
 		rt->rt_weight = rnd->rnd_weight;
 		if (rt->rt_expire > 0)
 			tmproutes_update(rnh, rt);
 	} else {
 		/* Route deletion requested. */
 		struct sockaddr *ndst, *netmask;
 		struct radix_node *rn;
 
 		ndst = (struct sockaddr *)rt_key(rt);
 		netmask = info->rti_info[RTAX_NETMASK];
 		rn = rnh->rnh_deladdr(ndst, netmask, &rnh->head);
 		if (rn == NULL)
 			return (ESRCH);
 		rt = RNTORT(rn);
 		rt->rte_flags &= ~RTF_UP;
 	}
 
 	/* Finalize notification */
 	rib_bump_gen(rnh);
 	if (rnd->rnd_nhop == NULL)
 		rnh->rnh_prefixes--;
 
 	rc->rc_cmd = (rnd->rnd_nhop != NULL) ? RTM_CHANGE : RTM_DELETE;
 	rc->rc_rt = rt;
 	rc->rc_nh_old = nh_orig;
 	rc->rc_nh_new = rnd->rnd_nhop;
 	rc->rc_nh_weight = rnd->rnd_weight;
 
 	rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
 
 	return (0);
 }
 
 /*
  * Conditionally update route nhop/weight IFF data in @nhd_orig is
  *  consistent with the current route data.
  * Nexthop in @nhd_new is consumed.
  */
 int
 change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
     struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
     struct route_nhop_data *rnd_new, struct rib_cmd_info *rc)
 {
 	struct rtentry *rt_new;
 	int error = 0;
 
 	RIB_WLOCK(rnh);
 
 	rt_new = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
 	    info->rti_info[RTAX_NETMASK], &rnh->head);
 
 	if (rt_new == NULL) {
 		if (rnd_orig->rnd_nhop == NULL)
 			error = add_route_nhop(rnh, rt, info, rnd_new, rc);
 		else {
 			/*
 			 * Prefix does not exist, which was not our assumption.
 			 * Update @rnd_orig with the new data and return
 			 */
 			rnd_orig->rnd_nhop = NULL;
 			rnd_orig->rnd_weight = 0;
 			error = EAGAIN;
 		}
 	} else {
 		/* Prefix exists, try to update */
 		if (rnd_orig->rnd_nhop == rt_new->rt_nhop) {
 			/*
 			 * Nhop/mpath group hasn't changed. Flip
 			 * to the new precalculated one and return
 			 */
 			error = change_route_nhop(rnh, rt_new, info, rnd_new, rc);
 		} else {
 			/* Update and retry */
 			rnd_orig->rnd_nhop = rt_new->rt_nhop;
 			rnd_orig->rnd_weight = rt_new->rt_weight;
 			error = EAGAIN;
 		}
 	}
 
 	RIB_WUNLOCK(rnh);
 
 	if (error == 0) {
 		rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
 
 		if (rnd_orig->rnd_nhop != NULL)
 			nhop_free_any(rnd_orig->rnd_nhop);
 
 	} else {
 		if (rnd_new->rnd_nhop != NULL)
 			nhop_free_any(rnd_new->rnd_nhop);
 	}
 
 	return (error);
 }
 
 /*
  * Performs modification of routing table specificed by @action.
  * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST].
  * Needs to be run in network epoch.
  *
  * Returns 0 on success and fills in @rc with action result.
  */
 int
 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info,
     struct rib_cmd_info *rc)
 {
 	int error;
 
 	switch (action) {
 	case RTM_ADD:
 		error = rib_add_route(fibnum, info, rc);
 		break;
 	case RTM_DELETE:
 		error = rib_del_route(fibnum, info, rc);
 		break;
 	case RTM_CHANGE:
 		error = rib_change_route(fibnum, info, rc);
 		break;
 	default:
 		error = ENOTSUP;
 	}
 
 	return (error);
 }
 
 struct rt_delinfo
 {
 	struct rt_addrinfo info;
 	struct rib_head *rnh;
 	struct rtentry *head;
 	struct rib_cmd_info rc;
 };
 
 /*
  * Conditionally unlinks @rn from radix tree based
  * on info data passed in @arg.
  */
 static int
 rt_checkdelroute(struct radix_node *rn, void *arg)
 {
 	struct rt_delinfo *di;
 	struct rt_addrinfo *info;
 	struct rtentry *rt;
 
 	di = (struct rt_delinfo *)arg;
 	rt = (struct rtentry *)rn;
 	info = &di->info;
 
 	info->rti_info[RTAX_DST] = rt_key(rt);
 	info->rti_info[RTAX_NETMASK] = rt_mask(rt);
 
 	if (rt_unlinkrte(di->rnh, info, &di->rc) != 0)
 		return (0);
 
 	/*
 	 * Add deleted rtentries to the list to GC them
 	 *  after dropping the lock.
 	 *
 	 * XXX: Delayed notifications not implemented
 	 *  for nexthop updates.
 	 */
 	if (di->rc.rc_cmd == RTM_DELETE) {
 		/* Add to the list and return */
 		rt->rt_chain = di->head;
 		di->head = rt;
 #ifdef ROUTE_MPATH
 	} else {
 		/*
 		 * RTM_CHANGE to a diferent nexthop or nexthop group.
 		 * Free old multipath group.
 		 */
 		nhop_free_any(di->rc.rc_nh_old);
 #endif
 	}
 
 	return (0);
 }
 
 /*
  * Iterates over a routing table specified by @fibnum and @family and
  *  deletes elements marked by @filter_f.
  * @fibnum: rtable id
  * @family: AF_ address family
  * @filter_f: function returning non-zero value for items to delete
  * @arg: data to pass to the @filter_f function
  * @report: true if rtsock notification is needed.
  */
 void
 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *arg, bool report)
 {
 	struct rib_head *rnh;
 	struct rt_delinfo di;
 	struct rtentry *rt;
 	struct nhop_object *nh;
 	struct epoch_tracker et;
 
 	rnh = rt_tables_get_rnh(fibnum, family);
 	if (rnh == NULL)
 		return;
 
 	bzero(&di, sizeof(di));
 	di.info.rti_filter = filter_f;
 	di.info.rti_filterdata = arg;
 	di.rnh = rnh;
 	di.rc.rc_cmd = RTM_DELETE;
 
 	NET_EPOCH_ENTER(et);
 
 	RIB_WLOCK(rnh);
 	rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
 	RIB_WUNLOCK(rnh);
 
 	/* We might have something to reclaim. */
 	bzero(&di.rc, sizeof(di.rc));
 	di.rc.rc_cmd = RTM_DELETE;
 	while (di.head != NULL) {
 		rt = di.head;
 		di.head = rt->rt_chain;
 		rt->rt_chain = NULL;
 		nh = rt->rt_nhop;
 
 		di.rc.rc_rt = rt;
 		di.rc.rc_nh_old = nh;
 		rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc);
 
 		/* TODO std rt -> rt_addrinfo export */
 		di.info.rti_info[RTAX_DST] = rt_key(rt);
 		di.info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 
 		if (report) {
 #ifdef ROUTE_MPATH
 			struct nhgrp_object *nhg;
 			struct weightened_nhop *wn;
 			uint32_t num_nhops;
 			if (NH_IS_NHGRP(nh)) {
 				nhg = (struct nhgrp_object *)nh;
 				wn = nhgrp_get_nhops(nhg, &num_nhops);
 				for (int i = 0; i < num_nhops; i++)
 					rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum);
 			} else
 #endif
 			rt_routemsg(RTM_DELETE, rt, nh, fibnum);
 		}
 		rtfree(rt);
 	}
 
 	NET_EPOCH_EXIT(et);
 }
 
 static int
 rt_delete_unconditional(struct radix_node *rn, void *arg)
 {
 	struct rtentry *rt = RNTORT(rn);
 	struct rib_head *rnh = (struct rib_head *)arg;
 
 	rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head);
 	if (RNTORT(rn) == rt)
 		rtfree(rt);
 
 	return (0);
 }
 
 /*
  * Removes all routes from the routing table without executing notifications.
  * rtentres will be removed after the end of a current epoch.
  */
 static void
 rib_flush_routes(struct rib_head *rnh)
 {
 	RIB_WLOCK(rnh);
 	rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh);
 	RIB_WUNLOCK(rnh);
 }
 
 void
 rib_flush_routes_family(int family)
 {
 	struct rib_head *rnh;
 
 	for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) {
 		if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL)
 			rib_flush_routes(rnh);
 	}
 }
 
 static void
 rib_notify(struct rib_head *rnh, enum rib_subscription_type type,
     struct rib_cmd_info *rc)
 {
 	struct rib_subscription *rs;
 
 	CK_STAILQ_FOREACH(rs, &rnh->rnh_subscribers, next) {
 		if (rs->type == type)
 			rs->func(rnh, rc, rs->arg);
 	}
 }
 
 static struct rib_subscription *
 allocate_subscription(rib_subscription_cb_t *f, void *arg,
     enum rib_subscription_type type, bool waitok)
 {
 	struct rib_subscription *rs;
 	int flags = M_ZERO | (waitok ? M_WAITOK : M_NOWAIT);
 
 	rs = malloc(sizeof(struct rib_subscription), M_RTABLE, flags);
 	if (rs == NULL)
 		return (NULL);
 
 	rs->func = f;
 	rs->arg = arg;
 	rs->type = type;
 
 	return (rs);
 }
 
 /*
  * Subscribe for the changes in the routing table specified by @fibnum and
  *  @family.
  *
  * Returns pointer to the subscription structure on success.
  */
 struct rib_subscription *
 rib_subscribe(uint32_t fibnum, int family, rib_subscription_cb_t *f, void *arg,
     enum rib_subscription_type type, bool waitok)
 {
 	struct rib_head *rnh;
 	struct epoch_tracker et;
 
 	NET_EPOCH_ENTER(et);
 	KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__));
 	rnh = rt_tables_get_rnh(fibnum, family);
 	NET_EPOCH_EXIT(et);
 
 	return (rib_subscribe_internal(rnh, f, arg, type, waitok));
 }
 
 struct rib_subscription *
 rib_subscribe_internal(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg,
     enum rib_subscription_type type, bool waitok)
 {
 	struct rib_subscription *rs;
 	struct epoch_tracker et;
 
 	if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL)
 		return (NULL);
 	rs->rnh = rnh;
 
 	NET_EPOCH_ENTER(et);
 	RIB_WLOCK(rnh);
 	CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next);
 	RIB_WUNLOCK(rnh);
 	NET_EPOCH_EXIT(et);
 
 	return (rs);
 }
 
 struct rib_subscription *
 rib_subscribe_locked(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg,
     enum rib_subscription_type type)
 {
 	struct rib_subscription *rs;
 
 	NET_EPOCH_ASSERT();
 	RIB_WLOCK_ASSERT(rnh);
 
 	if ((rs = allocate_subscription(f, arg, type, false)) == NULL)
 		return (NULL);
 	rs->rnh = rnh;
 
 	CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next);
 
 	return (rs);
 }
 
 /*
  * Remove rtable subscription @rs from the routing table.
  * Needs to be run in network epoch.
  */
 void
 rib_unsubscribe(struct rib_subscription *rs)
 {
 	struct rib_head *rnh = rs->rnh;
 
 	NET_EPOCH_ASSERT();
 
 	RIB_WLOCK(rnh);
 	CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next);
 	RIB_WUNLOCK(rnh);
 
 	epoch_call(net_epoch_preempt, destroy_subscription_epoch,
 	    &rs->epoch_ctx);
 }
 
 void
 rib_unsubscribe_locked(struct rib_subscription *rs)
 {
 	struct rib_head *rnh = rs->rnh;
 
 	NET_EPOCH_ASSERT();
 	RIB_WLOCK_ASSERT(rnh);
 
 	CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next);
 
 	epoch_call(net_epoch_preempt, destroy_subscription_epoch,
 	    &rs->epoch_ctx);
 }
 
 /*
  * Epoch callback indicating subscription is safe to destroy
  */
 static void
 destroy_subscription_epoch(epoch_context_t ctx)
 {
 	struct rib_subscription *rs;
 
 	rs = __containerof(ctx, struct rib_subscription, epoch_ctx);
 
 	free(rs, M_RTABLE);
 }
 
 void
 rib_init_subscriptions(struct rib_head *rnh)
 {
 
 	CK_STAILQ_INIT(&rnh->rnh_subscribers);
 }
 
 void
 rib_destroy_subscriptions(struct rib_head *rnh)
 {
 	struct rib_subscription *rs;
 	struct epoch_tracker et;
 
 	NET_EPOCH_ENTER(et);
 	RIB_WLOCK(rnh);
 	while ((rs = CK_STAILQ_FIRST(&rnh->rnh_subscribers)) != NULL) {
 		CK_STAILQ_REMOVE_HEAD(&rnh->rnh_subscribers, next);
 		epoch_call(net_epoch_preempt, destroy_subscription_epoch,
 		    &rs->epoch_ctx);
 	}
 	RIB_WUNLOCK(rnh);
 	NET_EPOCH_EXIT(et);
 }
diff --git a/sys/netgraph/netflow/netflow.c b/sys/netgraph/netflow/netflow.c
index 7d4108ee59a7..5c0358d9b67a 100644
--- a/sys/netgraph/netflow/netflow.c
+++ b/sys/netgraph/netflow/netflow.c
@@ -1,1178 +1,1182 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2010-2011 Alexander V. Chernikov <melifaro@ipfw.ru>
  * Copyright (c) 2004-2005 Gleb Smirnoff <glebius@FreeBSD.org>
  * Copyright (c) 2001-2003 Roman V. Palagin <romanp@unshadow.net>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $SourceForge: netflow.c,v 1.41 2004/09/05 11:41:10 glebius Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_route.h"
 #include <sys/param.h>
 #include <sys/bitstring.h>
 #include <sys/systm.h>
 #include <sys/counter.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/mbuf.h>
 #include <sys/syslog.h>
 #include <sys/socket.h>
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <net/route/route_ctl.h>
 #include <net/ethernet.h>
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 
 #include <netinet6/in6_fib.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 
 #include <netgraph/netflow/netflow.h>
 #include <netgraph/netflow/netflow_v9.h>
 #include <netgraph/netflow/ng_netflow.h>
 
 #define	NBUCKETS	(65536)		/* must be power of 2 */
 
 /* This hash is for TCP or UDP packets. */
 #define FULL_HASH(addr1, addr2, port1, port2)	\
 	(((addr1 ^ (addr1 >> 16) ^ 		\
 	htons(addr2 ^ (addr2 >> 16))) ^ 	\
 	port1 ^ htons(port2)) &			\
 	(NBUCKETS - 1))
 
 /* This hash is for all other IP packets. */
 #define ADDR_HASH(addr1, addr2)			\
 	((addr1 ^ (addr1 >> 16) ^ 		\
 	htons(addr2 ^ (addr2 >> 16))) &		\
 	(NBUCKETS - 1))
 
 /* Macros to shorten logical constructions */
 /* XXX: priv must exist in namespace */
 #define	INACTIVE(fle)	(time_uptime - fle->f.last > priv->nfinfo_inact_t)
 #define	AGED(fle)	(time_uptime - fle->f.first > priv->nfinfo_act_t)
 #define	ISFREE(fle)	(fle->f.packets == 0)
 
 /*
  * 4 is a magical number: statistically number of 4-packet flows is
  * bigger than 5,6,7...-packet flows by an order of magnitude. Most UDP/ICMP
  * scans are 1 packet (~ 90% of flow cache). TCP scans are 2-packet in case
  * of reachable host and 4-packet otherwise.
  */
 #define	SMALL(fle)	(fle->f.packets <= 4)
 
 MALLOC_DEFINE(M_NETFLOW_HASH, "netflow_hash", "NetFlow hash");
 
 static int export_add(item_p, struct flow_entry *);
 static int export_send(priv_p, fib_export_p, item_p, int);
 
 #ifdef INET
 static int hash_insert(priv_p, struct flow_hash_entry *, struct flow_rec *,
     int, uint8_t, uint8_t);
 #endif
 #ifdef INET6
 static int hash6_insert(priv_p, struct flow_hash_entry *, struct flow6_rec *,
     int, uint8_t, uint8_t);
 #endif
 
 static void expire_flow(priv_p, fib_export_p, struct flow_entry *, int);
 
 #ifdef INET
 /*
  * Generate hash for a given flow record.
  *
  * FIB is not used here, because:
  * most VRFS will carry public IPv4 addresses which are unique even
  * without FIB private addresses can overlap, but this is worked out
  * via flow_rec bcmp() containing fib id. In IPv6 world addresses are
  * all globally unique (it's not fully true, there is FC00::/7 for example,
  * but chances of address overlap are MUCH smaller)
  */
 static inline uint32_t
 ip_hash(struct flow_rec *r)
 {
 
 	switch (r->r_ip_p) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
 		return FULL_HASH(r->r_src.s_addr, r->r_dst.s_addr,
 		    r->r_sport, r->r_dport);
 	default:
 		return ADDR_HASH(r->r_src.s_addr, r->r_dst.s_addr);
 	}
 }
 #endif
 
 #ifdef INET6
 /* Generate hash for a given flow6 record. Use lower 4 octets from v6 addresses */
 static inline uint32_t
 ip6_hash(struct flow6_rec *r)
 {
 
 	switch (r->r_ip_p) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
 		return FULL_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3],
 		    r->dst.r_dst6.__u6_addr.__u6_addr32[3], r->r_sport,
 		    r->r_dport);
 	default:
 		return ADDR_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3],
 		    r->dst.r_dst6.__u6_addr.__u6_addr32[3]);
  	}
 }
 
 #endif
 
 /*
  * Detach export datagram from priv, if there is any.
  * If there is no, allocate a new one.
  */
 static item_p
 get_export_dgram(priv_p priv, fib_export_p fe)
 {
 	item_p	item = NULL;
 
 	mtx_lock(&fe->export_mtx);
 	if (fe->exp.item != NULL) {
 		item = fe->exp.item;
 		fe->exp.item = NULL;
 	}
 	mtx_unlock(&fe->export_mtx);
 
 	if (item == NULL) {
 		struct netflow_v5_export_dgram *dgram;
 		struct mbuf *m;
 
 		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		if (m == NULL)
 			return (NULL);
 		item = ng_package_data(m, NG_NOFLAGS);
 		if (item == NULL)
 			return (NULL);
 		dgram = mtod(m, struct netflow_v5_export_dgram *);
 		dgram->header.count = 0;
 		dgram->header.version = htons(NETFLOW_V5);
 		dgram->header.pad = 0;
 	}
 
 	return (item);
 }
 
 /*
  * Re-attach incomplete datagram back to priv.
  * If there is already another one, then send incomplete. */
 static void
 return_export_dgram(priv_p priv, fib_export_p fe, item_p item, int flags)
 {
 
 	/*
 	 * It may happen on SMP, that some thread has already
 	 * put its item there, in this case we bail out and
 	 * send what we have to collector.
 	 */
 	mtx_lock(&fe->export_mtx);
 	if (fe->exp.item == NULL) {
 		fe->exp.item = item;
 		mtx_unlock(&fe->export_mtx);
 	} else {
 		mtx_unlock(&fe->export_mtx);
 		export_send(priv, fe, item, flags);
 	}
 }
 
 /*
  * The flow is over. Call export_add() and free it. If datagram is
  * full, then call export_send().
  */
 static void
 expire_flow(priv_p priv, fib_export_p fe, struct flow_entry *fle, int flags)
 {
 	struct netflow_export_item exp;
 	uint16_t version = fle->f.version;
 
 	if ((priv->export != NULL) && (version == IPVERSION)) {
 		exp.item = get_export_dgram(priv, fe);
 		if (exp.item == NULL) {
 			priv->nfinfo_export_failed++;
 			if (priv->export9 != NULL)
 				priv->nfinfo_export9_failed++;
 			/* fle definitely contains IPv4 flow. */
 			uma_zfree_arg(priv->zone, fle, priv);
 			return;
 		}
 
 		if (export_add(exp.item, fle) > 0)
 			export_send(priv, fe, exp.item, flags);
 		else
 			return_export_dgram(priv, fe, exp.item, NG_QUEUE);
 	}
 
 	if (priv->export9 != NULL) {
 		exp.item9 = get_export9_dgram(priv, fe, &exp.item9_opt);
 		if (exp.item9 == NULL) {
 			priv->nfinfo_export9_failed++;
 			if (version == IPVERSION)
 				uma_zfree_arg(priv->zone, fle, priv);
 #ifdef INET6
 			else if (version == IP6VERSION)
 				uma_zfree_arg(priv->zone6, fle, priv);
 #endif
 			else
 				panic("ng_netflow: Unknown IP proto: %d",
 				    version);
 			return;
 		}
 
 		if (export9_add(exp.item9, exp.item9_opt, fle) > 0)
 			export9_send(priv, fe, exp.item9, exp.item9_opt, flags);
 		else
 			return_export9_dgram(priv, fe, exp.item9,
 			    exp.item9_opt, NG_QUEUE);
 	}
 
 	if (version == IPVERSION)
 		uma_zfree_arg(priv->zone, fle, priv);
 #ifdef INET6
 	else if (version == IP6VERSION)
 		uma_zfree_arg(priv->zone6, fle, priv);
 #endif
 }
 
 /* Get a snapshot of node statistics */
 void
 ng_netflow_copyinfo(priv_p priv, struct ng_netflow_info *i)
 {
 
 	i->nfinfo_bytes = counter_u64_fetch(priv->nfinfo_bytes);
 	i->nfinfo_packets = counter_u64_fetch(priv->nfinfo_packets);
 	i->nfinfo_bytes6 = counter_u64_fetch(priv->nfinfo_bytes6);
 	i->nfinfo_packets6 = counter_u64_fetch(priv->nfinfo_packets6);
 	i->nfinfo_sbytes = counter_u64_fetch(priv->nfinfo_sbytes);
 	i->nfinfo_spackets = counter_u64_fetch(priv->nfinfo_spackets);
 	i->nfinfo_sbytes6 = counter_u64_fetch(priv->nfinfo_sbytes6);
 	i->nfinfo_spackets6 = counter_u64_fetch(priv->nfinfo_spackets6);
 	i->nfinfo_act_exp = counter_u64_fetch(priv->nfinfo_act_exp);
 	i->nfinfo_inact_exp = counter_u64_fetch(priv->nfinfo_inact_exp);
 
 	i->nfinfo_used = uma_zone_get_cur(priv->zone);
 #ifdef INET6
 	i->nfinfo_used6 = uma_zone_get_cur(priv->zone6);
 #endif
 
 	i->nfinfo_alloc_failed = priv->nfinfo_alloc_failed;
 	i->nfinfo_export_failed = priv->nfinfo_export_failed;
 	i->nfinfo_export9_failed = priv->nfinfo_export9_failed;
 	i->nfinfo_realloc_mbuf = priv->nfinfo_realloc_mbuf;
 	i->nfinfo_alloc_fibs = priv->nfinfo_alloc_fibs;
 	i->nfinfo_inact_t = priv->nfinfo_inact_t;
 	i->nfinfo_act_t = priv->nfinfo_act_t;
 }
 
 /*
  * Insert a record into defined slot.
  *
  * First we get for us a free flow entry, then fill in all
  * possible fields in it.
  *
  * TODO: consider dropping hash mutex while filling in datagram,
  * as this was done in previous version. Need to test & profile
  * to be sure.
  */
 #ifdef INET
 static int
 hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r,
 	int plen, uint8_t flags, uint8_t tcp_flags)
 {
 	struct flow_entry *fle;
 
 	mtx_assert(&hsh->mtx, MA_OWNED);
 
 	fle = uma_zalloc_arg(priv->zone, priv, M_NOWAIT);
 	if (fle == NULL) {
 		priv->nfinfo_alloc_failed++;
 		return (ENOMEM);
 	}
 
 	/*
 	 * Now fle is totally ours. It is detached from all lists,
 	 * we can safely edit it.
 	 */
 	fle->f.version = IPVERSION;
 	bcopy(r, &fle->f.r, sizeof(struct flow_rec));
 	fle->f.bytes = plen;
 	fle->f.packets = 1;
 	fle->f.tcp_flags = tcp_flags;
 
 	fle->f.first = fle->f.last = time_uptime;
 
 	/*
 	 * First we do route table lookup on destination address. So we can
 	 * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases.
 	 */
 	if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) {
 		struct rtentry *rt;
 		struct route_nhop_data rnd;
 
 		rt = fib4_lookup_rt(r->fib, fle->f.r.r_dst, 0, NHR_NONE, &rnd);
 		if (rt != NULL) {
 			struct in_addr addr;
 			uint32_t scopeid;
 			struct nhop_object *nh = nhop_select_func(rnd.rnd_nhop, 0);
 			int plen;
 
 			rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid);
 			fle->f.fle_o_ifx = nh->nh_ifp->if_index;
 			if (nh->gw_sa.sa_family == AF_INET)
 				fle->f.next_hop = nh->gw4_sa.sin_addr;
+			/*
+			 * XXX we're leaving an empty gateway here for
+			 * IPv6 nexthops.
+			 */
 			fle->f.dst_mask = plen;
 		}
 	}
 
 	/* Do route lookup on source address, to fill in src_mask. */
 	if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) {
 		struct rtentry *rt;
 		struct route_nhop_data rnd;
 
 		rt = fib4_lookup_rt(r->fib, fle->f.r.r_src, 0, NHR_NONE, &rnd);
 		if (rt != NULL) {
 			struct in_addr addr;
 			uint32_t scopeid;
 			int plen;
 
 			rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid);
 			fle->f.src_mask = plen;
 		}
 	}
 
 	/* Push new flow at the and of hash. */
 	TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
 
 	return (0);
 }
 #endif
 
 #ifdef INET6
 static int
 hash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r,
 	int plen, uint8_t flags, uint8_t tcp_flags)
 {
 	struct flow6_entry *fle6;
 
 	mtx_assert(&hsh6->mtx, MA_OWNED);
 
 	fle6 = uma_zalloc_arg(priv->zone6, priv, M_NOWAIT);
 	if (fle6 == NULL) {
 		priv->nfinfo_alloc_failed++;
 		return (ENOMEM);
 	}
 
 	/*
 	 * Now fle is totally ours. It is detached from all lists,
 	 * we can safely edit it.
 	 */
 
 	fle6->f.version = IP6VERSION;
 	bcopy(r, &fle6->f.r, sizeof(struct flow6_rec));
 	fle6->f.bytes = plen;
 	fle6->f.packets = 1;
 	fle6->f.tcp_flags = tcp_flags;
 
 	fle6->f.first = fle6->f.last = time_uptime;
 
 	/*
 	 * First we do route table lookup on destination address. So we can
 	 * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases.
 	 */
 	if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) {
 		struct rtentry *rt;
 		struct route_nhop_data rnd;
 
 		rt = fib6_lookup_rt(r->fib, &fle6->f.r.dst.r_dst6, 0, NHR_NONE, &rnd);
 		if (rt != NULL) {
 			struct in6_addr addr;
 			uint32_t scopeid;
 			struct nhop_object *nh = nhop_select_func(rnd.rnd_nhop, 0);
 			int plen;
 
 			rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid);
 			fle6->f.fle_o_ifx = nh->nh_ifp->if_index;
 			if (nh->gw_sa.sa_family == AF_INET6)
 				fle6->f.n.next_hop6 = nh->gw6_sa.sin6_addr;
 			fle6->f.dst_mask = plen;
 		}
 	}
 
 	if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) {
 		/* Do route lookup on source address, to fill in src_mask. */
 		struct rtentry *rt;
 		struct route_nhop_data rnd;
 
 		rt = fib6_lookup_rt(r->fib, &fle6->f.r.src.r_src6, 0, NHR_NONE, &rnd);
 		if (rt != NULL) {
 			struct in6_addr addr;
 			uint32_t scopeid;
 			int plen;
 
 			rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid);
 			fle6->f.src_mask = plen;
 		}
 	}
 
 	/* Push new flow at the and of hash. */
 	TAILQ_INSERT_TAIL(&hsh6->head, (struct flow_entry *)fle6, fle_hash);
 
 	return (0);
 }
 #endif
 
 /*
  * Non-static functions called from ng_netflow.c
  */
 
 /* Allocate memory and set up flow cache */
 void
 ng_netflow_cache_init(priv_p priv)
 {
 	struct flow_hash_entry *hsh;
 	int i;
 
 	/* Initialize cache UMA zone. */
 	priv->zone = uma_zcreate("NetFlow IPv4 cache",
 	    sizeof(struct flow_entry), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_CACHE, 0);
 	uma_zone_set_max(priv->zone, CACHESIZE);
 #ifdef INET6	
 	priv->zone6 = uma_zcreate("NetFlow IPv6 cache",
 	    sizeof(struct flow6_entry), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_CACHE, 0);
 	uma_zone_set_max(priv->zone6, CACHESIZE);
 #endif	
 
 	/* Allocate hash. */
 	priv->hash = malloc(NBUCKETS * sizeof(struct flow_hash_entry),
 	    M_NETFLOW_HASH, M_WAITOK | M_ZERO);
 
 	/* Initialize hash. */
 	for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) {
 		mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF);
 		TAILQ_INIT(&hsh->head);
 	}
 
 #ifdef INET6
 	/* Allocate hash. */
 	priv->hash6 = malloc(NBUCKETS * sizeof(struct flow_hash_entry),
 	    M_NETFLOW_HASH, M_WAITOK | M_ZERO);
 
 	/* Initialize hash. */
 	for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++) {
 		mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF);
 		TAILQ_INIT(&hsh->head);
 	}
 #endif
 
 	priv->nfinfo_bytes = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_packets = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_bytes6 = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_packets6 = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_sbytes = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_spackets = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_sbytes6 = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_spackets6 = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_act_exp = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_inact_exp = counter_u64_alloc(M_WAITOK);
 
 	ng_netflow_v9_cache_init(priv);
 	CTR0(KTR_NET, "ng_netflow startup()");
 }
 
 /* Initialize new FIB table for v5 and v9 */
 int
 ng_netflow_fib_init(priv_p priv, int fib)
 {
 	fib_export_p	fe = priv_to_fib(priv, fib);
 
 	CTR1(KTR_NET, "ng_netflow(): fib init: %d", fib);
 
 	if (fe != NULL)
 		return (0);
 
 	if ((fe = malloc(sizeof(struct fib_export), M_NETGRAPH,
 	    M_NOWAIT | M_ZERO)) == NULL)
 		return (ENOMEM);
 
 	mtx_init(&fe->export_mtx, "export dgram lock", NULL, MTX_DEF);
 	mtx_init(&fe->export9_mtx, "export9 dgram lock", NULL, MTX_DEF);
 	fe->fib = fib;
 	fe->domain_id = fib;
 
 	if (atomic_cmpset_ptr((volatile uintptr_t *)&priv->fib_data[fib],
 	    (uintptr_t)NULL, (uintptr_t)fe) == 0) {
 		/* FIB already set up by other ISR */
 		CTR3(KTR_NET, "ng_netflow(): fib init: %d setup %p but got %p",
 		    fib, fe, priv_to_fib(priv, fib));
 		mtx_destroy(&fe->export_mtx);
 		mtx_destroy(&fe->export9_mtx);
 		free(fe, M_NETGRAPH);
 	} else {
 		/* Increase counter for statistics */
 		CTR3(KTR_NET, "ng_netflow(): fib %d setup to %p (%p)",
 		    fib, fe, priv_to_fib(priv, fib));
 		priv->nfinfo_alloc_fibs++;
 	}
 
 	return (0);
 }
 
 /* Free all flow cache memory. Called from node close method. */
 void
 ng_netflow_cache_flush(priv_p priv)
 {
 	struct flow_entry	*fle, *fle1;
 	struct flow_hash_entry	*hsh;
 	struct netflow_export_item exp;
 	fib_export_p fe;
 	int i;
 
 	bzero(&exp, sizeof(exp));
 
 	/*
 	 * We are going to free probably billable data.
 	 * Expire everything before freeing it.
 	 * No locking is required since callout is already drained.
 	 */
 	for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++)
 		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			fe = priv_to_fib(priv, fle->f.r.fib);
 			expire_flow(priv, fe, fle, NG_QUEUE);
 		}
 #ifdef INET6
 	for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++)
 		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			fe = priv_to_fib(priv, fle->f.r.fib);
 			expire_flow(priv, fe, fle, NG_QUEUE);
 		}
 #endif
 
 	uma_zdestroy(priv->zone);
 	/* Destroy hash mutexes. */
 	for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++)
 		mtx_destroy(&hsh->mtx);
 
 	/* Free hash memory. */
 	if (priv->hash != NULL)
 		free(priv->hash, M_NETFLOW_HASH);
 #ifdef INET6
 	uma_zdestroy(priv->zone6);
 	/* Destroy hash mutexes. */
 	for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++)
 		mtx_destroy(&hsh->mtx);
 
 	/* Free hash memory. */
 	if (priv->hash6 != NULL)
 		free(priv->hash6, M_NETFLOW_HASH);
 #endif
 
 	for (i = 0; i < priv->maxfibs; i++) {
 		if ((fe = priv_to_fib(priv, i)) == NULL)
 			continue;
 
 		if (fe->exp.item != NULL)
 			export_send(priv, fe, fe->exp.item, NG_QUEUE);
 
 		if (fe->exp.item9 != NULL)
 			export9_send(priv, fe, fe->exp.item9,
 			    fe->exp.item9_opt, NG_QUEUE);
 
 		mtx_destroy(&fe->export_mtx);
 		mtx_destroy(&fe->export9_mtx);
 		free(fe, M_NETGRAPH);
 	}
 
 	counter_u64_free(priv->nfinfo_bytes);
 	counter_u64_free(priv->nfinfo_packets);
 	counter_u64_free(priv->nfinfo_bytes6);
 	counter_u64_free(priv->nfinfo_packets6);
 	counter_u64_free(priv->nfinfo_sbytes);
 	counter_u64_free(priv->nfinfo_spackets);
 	counter_u64_free(priv->nfinfo_sbytes6);
 	counter_u64_free(priv->nfinfo_spackets6);
 	counter_u64_free(priv->nfinfo_act_exp);
 	counter_u64_free(priv->nfinfo_inact_exp);
 
 	ng_netflow_v9_cache_flush(priv);
 }
 
 #ifdef INET
 /* Insert packet from into flow cache. */
 int
 ng_netflow_flow_add(priv_p priv, fib_export_p fe, struct ip *ip,
     caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags,
     unsigned int src_if_index)
 {
 	struct flow_entry	*fle, *fle1;
 	struct flow_hash_entry	*hsh;
 	struct flow_rec		r;
 	int			hlen, plen;
 	int			error = 0;
 	uint16_t		eproto;
 	uint8_t			tcp_flags = 0;
 
 	bzero(&r, sizeof(r));
 
 	if (ip->ip_v != IPVERSION)
 		return (EINVAL);
 
 	hlen = ip->ip_hl << 2;
 	if (hlen < sizeof(struct ip))
 		return (EINVAL);
 
 	eproto = ETHERTYPE_IP;
 	/* Assume L4 template by default */
 	r.flow_type = NETFLOW_V9_FLOW_V4_L4;
 
 	r.r_src = ip->ip_src;
 	r.r_dst = ip->ip_dst;
 	r.fib = fe->fib;
 
 	plen = ntohs(ip->ip_len);
 
 	r.r_ip_p = ip->ip_p;
 	r.r_tos = ip->ip_tos;
 
 	r.r_i_ifx = src_if_index;
 
 	/*
 	 * XXX NOTE: only first fragment of fragmented TCP, UDP and
 	 * ICMP packet will be recorded with proper s_port and d_port.
 	 * Following fragments will be recorded simply as IP packet with
 	 * ip_proto = ip->ip_p and s_port, d_port set to zero.
 	 * I know, it looks like bug. But I don't want to re-implement
 	 * ip packet assebmling here. Anyway, (in)famous trafd works this way -
 	 * and nobody complains yet :)
 	 */
 	if ((ip->ip_off & htons(IP_OFFMASK)) == 0)
 		switch(r.r_ip_p) {
 		case IPPROTO_TCP:
 		    {
 			struct tcphdr *tcp;
 
 			tcp = (struct tcphdr *)((caddr_t )ip + hlen);
 			r.r_sport = tcp->th_sport;
 			r.r_dport = tcp->th_dport;
 			tcp_flags = tcp->th_flags;
 			break;
 		    }
 		case IPPROTO_UDP:
 			r.r_ports = *(uint32_t *)((caddr_t )ip + hlen);
 			break;
 		}
 
 	counter_u64_add(priv->nfinfo_packets, 1);
 	counter_u64_add(priv->nfinfo_bytes, plen);
 
 	/* Find hash slot. */
 	hsh = &priv->hash[ip_hash(&r)];
 
 	mtx_lock(&hsh->mtx);
 
 	/*
 	 * Go through hash and find our entry. If we encounter an
 	 * entry, that should be expired, purge it. We do a reverse
 	 * search since most active entries are first, and most
 	 * searches are done on most active entries.
 	 */
 	TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) {
 		if (bcmp(&r, &fle->f.r, sizeof(struct flow_rec)) == 0)
 			break;
 		if ((INACTIVE(fle) && SMALL(fle)) || AGED(fle)) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib),
 			    fle, NG_QUEUE);
 			counter_u64_add(priv->nfinfo_act_exp, 1);
 		}
 	}
 
 	if (fle) {			/* An existent entry. */
 
 		fle->f.bytes += plen;
 		fle->f.packets ++;
 		fle->f.tcp_flags |= tcp_flags;
 		fle->f.last = time_uptime;
 
 		/*
 		 * We have the following reasons to expire flow in active way:
 		 * - it hit active timeout
 		 * - a TCP connection closed
 		 * - it is going to overflow counter
 		 */
 		if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle) ||
 		    (fle->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib),
 			    fle, NG_QUEUE);
 			counter_u64_add(priv->nfinfo_act_exp, 1);
 		} else {
 			/*
 			 * It is the newest, move it to the tail,
 			 * if it isn't there already. Next search will
 			 * locate it quicker.
 			 */
 			if (fle != TAILQ_LAST(&hsh->head, fhead)) {
 				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 				TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
 			}
 		}
 	} else				/* A new flow entry. */
 		error = hash_insert(priv, hsh, &r, plen, flags, tcp_flags);
 
 	mtx_unlock(&hsh->mtx);
 
 	return (error);
 }
 #endif
 
 #ifdef INET6
 /* Insert IPv6 packet from into flow cache. */
 int
 ng_netflow_flow6_add(priv_p priv, fib_export_p fe, struct ip6_hdr *ip6,
     caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags,
     unsigned int src_if_index)
 {
 	struct flow_entry	*fle = NULL, *fle1;
 	struct flow6_entry	*fle6;
 	struct flow_hash_entry	*hsh;
 	struct flow6_rec	r;
 	int			plen;
 	int			error = 0;
 	uint8_t			tcp_flags = 0;
 
 	/* check version */
 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
 		return (EINVAL);
 
 	bzero(&r, sizeof(r));
 
 	r.src.r_src6 = ip6->ip6_src;
 	r.dst.r_dst6 = ip6->ip6_dst;
 	r.fib = fe->fib;
 
 	/* Assume L4 template by default */
 	r.flow_type = NETFLOW_V9_FLOW_V6_L4;
 
 	plen = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
 
 #if 0
 	/* XXX: set DSCP/CoS value */
 	r.r_tos = ip->ip_tos;
 #endif
 	if ((flags & NG_NETFLOW_IS_FRAG) == 0) {
 		switch(upper_proto) {
 		case IPPROTO_TCP:
 		    {
 			struct tcphdr *tcp;
 
 			tcp = (struct tcphdr *)upper_ptr;
 			r.r_ports = *(uint32_t *)upper_ptr;
 			tcp_flags = tcp->th_flags;
 			break;
 		    }
  		case IPPROTO_UDP:
 		case IPPROTO_SCTP:
 			r.r_ports = *(uint32_t *)upper_ptr;
 			break;
 		}
 	}	
 
 	r.r_ip_p = upper_proto;
 	r.r_i_ifx = src_if_index;
 
 	counter_u64_add(priv->nfinfo_packets6, 1);
 	counter_u64_add(priv->nfinfo_bytes6, plen);
 
 	/* Find hash slot. */
 	hsh = &priv->hash6[ip6_hash(&r)];
 
 	mtx_lock(&hsh->mtx);
 
 	/*
 	 * Go through hash and find our entry. If we encounter an
 	 * entry, that should be expired, purge it. We do a reverse
 	 * search since most active entries are first, and most
 	 * searches are done on most active entries.
 	 */
 	TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) {
 		if (fle->f.version != IP6VERSION)
 			continue;
 		fle6 = (struct flow6_entry *)fle;
 		if (bcmp(&r, &fle6->f.r, sizeof(struct flow6_rec)) == 0)
 			break;
 		if ((INACTIVE(fle6) && SMALL(fle6)) || AGED(fle6)) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle,
 			    NG_QUEUE);
 			counter_u64_add(priv->nfinfo_act_exp, 1);
 		}
 	}
 
 	if (fle != NULL) {			/* An existent entry. */
 		fle6 = (struct flow6_entry *)fle;
 
 		fle6->f.bytes += plen;
 		fle6->f.packets ++;
 		fle6->f.tcp_flags |= tcp_flags;
 		fle6->f.last = time_uptime;
 
 		/*
 		 * We have the following reasons to expire flow in active way:
 		 * - it hit active timeout
 		 * - a TCP connection closed
 		 * - it is going to overflow counter
 		 */
 		if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle6) ||
 		    (fle6->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle,
 			    NG_QUEUE);
 			counter_u64_add(priv->nfinfo_act_exp, 1);
 		} else {
 			/*
 			 * It is the newest, move it to the tail,
 			 * if it isn't there already. Next search will
 			 * locate it quicker.
 			 */
 			if (fle != TAILQ_LAST(&hsh->head, fhead)) {
 				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 				TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
 			}
 		}
 	} else				/* A new flow entry. */
 		error = hash6_insert(priv, hsh, &r, plen, flags, tcp_flags);
 
 	mtx_unlock(&hsh->mtx);
 
 	return (error);
 }
 #endif
 
 /*
  * Return records from cache to userland.
  *
  * TODO: matching particular IP should be done in kernel, here.
  */
 int
 ng_netflow_flow_show(priv_p priv, struct ngnf_show_header *req,
 struct ngnf_show_header *resp)
 {
 	struct flow_hash_entry	*hsh;
 	struct flow_entry	*fle;
 	struct flow_entry_data	*data = (struct flow_entry_data *)(resp + 1);
 #ifdef INET6
 	struct flow6_entry_data	*data6 = (struct flow6_entry_data *)(resp + 1);
 #endif
 	int	i, max;
 
 	i = req->hash_id;
 	if (i > NBUCKETS-1)
 		return (EINVAL);
 
 #ifdef INET6
 	if (req->version == 6) {
 		resp->version = 6;
 		hsh = priv->hash6 + i;
 		max = NREC6_AT_ONCE;
 	} else
 #endif
 	if (req->version == 4) {
 		resp->version = 4;
 		hsh = priv->hash + i;
 		max = NREC_AT_ONCE;
 	} else
 		return (EINVAL);
 
 	/*
 	 * We will transfer not more than NREC_AT_ONCE. More data
 	 * will come in next message.
 	 * We send current hash index and current record number in list 
 	 * to userland, and userland should return it back to us. 
 	 * Then, we will restart with new entry.
 	 *
 	 * The resulting cache snapshot can be inaccurate if flow expiration
 	 * is taking place on hash item between userland data requests for 
 	 * this hash item id.
 	 */
 	resp->nentries = 0;
 	for (; i < NBUCKETS; hsh++, i++) {
 		int list_id;
 
 		if (mtx_trylock(&hsh->mtx) == 0) {
 			/* 
 			 * Requested hash index is not available,
 			 * relay decision to skip or re-request data
 			 * to userland.
 			 */
 			resp->hash_id = i;
 			resp->list_id = 0;
 			return (0);
 		}
 
 		list_id = 0;
 		TAILQ_FOREACH(fle, &hsh->head, fle_hash) {
 			if (hsh->mtx.mtx_lock & MTX_CONTESTED) {
 				resp->hash_id = i;
 				resp->list_id = list_id;
 				mtx_unlock(&hsh->mtx);
 				return (0);
 			}
 
 			list_id++;
 			/* Search for particular record in list. */
 			if (req->list_id > 0) {
 				if (list_id < req->list_id)
 					continue;
 
 				/* Requested list position found. */
 				req->list_id = 0;
 			}
 #ifdef INET6
 			if (req->version == 6) {
 				struct flow6_entry *fle6;
 
 				fle6 = (struct flow6_entry *)fle;
 				bcopy(&fle6->f, data6 + resp->nentries,
 				    sizeof(fle6->f));
 			} else
 #endif
 				bcopy(&fle->f, data + resp->nentries,
 				    sizeof(fle->f));
 			resp->nentries++;
 			if (resp->nentries == max) {
 				resp->hash_id = i;
 				/* 
 				 * If it was the last item in list
 				 * we simply skip to next hash_id.
 				 */
 				resp->list_id = list_id + 1;
 				mtx_unlock(&hsh->mtx);
 				return (0);
 			}
 		}
 		mtx_unlock(&hsh->mtx);
 	}
 
 	resp->hash_id = resp->list_id = 0;
 
 	return (0);
 }
 
 /* We have full datagram in privdata. Send it to export hook. */
 static int
 export_send(priv_p priv, fib_export_p fe, item_p item, int flags)
 {
 	struct mbuf *m = NGI_M(item);
 	struct netflow_v5_export_dgram *dgram = mtod(m,
 					struct netflow_v5_export_dgram *);
 	struct netflow_v5_header *header = &dgram->header;
 	struct timespec ts;
 	int error = 0;
 
 	/* Fill mbuf header. */
 	m->m_len = m->m_pkthdr.len = sizeof(struct netflow_v5_record) *
 	   header->count + sizeof(struct netflow_v5_header);
 
 	/* Fill export header. */
 	header->sys_uptime = htonl(MILLIUPTIME(time_uptime));
 	getnanotime(&ts);
 	header->unix_secs  = htonl(ts.tv_sec);
 	header->unix_nsecs = htonl(ts.tv_nsec);
 	header->engine_type = 0;
 	header->engine_id = fe->domain_id;
 	header->pad = 0;
 	header->flow_seq = htonl(atomic_fetchadd_32(&fe->flow_seq,
 	    header->count));
 	header->count = htons(header->count);
 
 	if (priv->export != NULL)
 		NG_FWD_ITEM_HOOK_FLAGS(error, item, priv->export, flags);
 	else
 		NG_FREE_ITEM(item);
 
 	return (error);
 }
 
 /* Add export record to dgram. */
 static int
 export_add(item_p item, struct flow_entry *fle)
 {
 	struct netflow_v5_export_dgram *dgram = mtod(NGI_M(item),
 					struct netflow_v5_export_dgram *);
 	struct netflow_v5_header *header = &dgram->header;
 	struct netflow_v5_record *rec;
 
 	rec = &dgram->r[header->count];
 	header->count ++;
 
 	KASSERT(header->count <= NETFLOW_V5_MAX_RECORDS,
 	    ("ng_netflow: export too big"));
 
 	/* Fill in export record. */
 	rec->src_addr = fle->f.r.r_src.s_addr;
 	rec->dst_addr = fle->f.r.r_dst.s_addr;
 	rec->next_hop = fle->f.next_hop.s_addr;
 	rec->i_ifx    = htons(fle->f.fle_i_ifx);
 	rec->o_ifx    = htons(fle->f.fle_o_ifx);
 	rec->packets  = htonl(fle->f.packets);
 	rec->octets   = htonl(fle->f.bytes);
 	rec->first    = htonl(MILLIUPTIME(fle->f.first));
 	rec->last     = htonl(MILLIUPTIME(fle->f.last));
 	rec->s_port   = fle->f.r.r_sport;
 	rec->d_port   = fle->f.r.r_dport;
 	rec->flags    = fle->f.tcp_flags;
 	rec->prot     = fle->f.r.r_ip_p;
 	rec->tos      = fle->f.r.r_tos;
 	rec->dst_mask = fle->f.dst_mask;
 	rec->src_mask = fle->f.src_mask;
 	rec->pad1     = 0;
 	rec->pad2     = 0;
 
 	/* Not supported fields. */
 	rec->src_as = rec->dst_as = 0;
 
 	if (header->count == NETFLOW_V5_MAX_RECORDS)
 		return (1); /* end of datagram */
 	else
 		return (0);	
 }
 
 /* Periodic flow expiry run. */
 void
 ng_netflow_expire(void *arg)
 {
 	struct flow_entry	*fle, *fle1;
 	struct flow_hash_entry	*hsh;
 	priv_p			priv = (priv_p )arg;
 	int			used, i;
 
 	/*
 	 * Going through all the cache.
 	 */
 	used = uma_zone_get_cur(priv->zone);
 	for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) {
 		/*
 		 * Skip entries, that are already being worked on.
 		 */
 		if (mtx_trylock(&hsh->mtx) == 0)
 			continue;
 
 		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
 			/*
 			 * Interrupt thread wants this entry!
 			 * Quick! Quick! Bail out!
 			 */
 			if (hsh->mtx.mtx_lock & MTX_CONTESTED)
 				break;
 
 			/*
 			 * Don't expire aggressively while hash collision
 			 * ratio is predicted small.
 			 */
 			if (used <= (NBUCKETS*2) && !INACTIVE(fle))
 				break;
 
 			if ((INACTIVE(fle) && (SMALL(fle) ||
 			    (used > (NBUCKETS*2)))) || AGED(fle)) {
 				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 				expire_flow(priv, priv_to_fib(priv,
 				    fle->f.r.fib), fle, NG_NOFLAGS);
 				used--;
 				counter_u64_add(priv->nfinfo_inact_exp, 1);
 			}
 		}
 		mtx_unlock(&hsh->mtx);
 	}
 
 #ifdef INET6
 	used = uma_zone_get_cur(priv->zone6);
 	for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++) {
 		struct flow6_entry	*fle6;
 
 		/*
 		 * Skip entries, that are already being worked on.
 		 */
 		if (mtx_trylock(&hsh->mtx) == 0)
 			continue;
 
 		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
 			fle6 = (struct flow6_entry *)fle;
 			/*
 			 * Interrupt thread wants this entry!
 			 * Quick! Quick! Bail out!
 			 */
 			if (hsh->mtx.mtx_lock & MTX_CONTESTED)
 				break;
 
 			/*
 			 * Don't expire aggressively while hash collision
 			 * ratio is predicted small.
 			 */
 			if (used <= (NBUCKETS*2) && !INACTIVE(fle6))
 				break;
 
 			if ((INACTIVE(fle6) && (SMALL(fle6) ||
 			    (used > (NBUCKETS*2)))) || AGED(fle6)) {
 				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 				expire_flow(priv, priv_to_fib(priv,
 				    fle->f.r.fib), fle, NG_NOFLAGS);
 				used--;
 				counter_u64_add(priv->nfinfo_inact_exp, 1);
 			}
 		}
 		mtx_unlock(&hsh->mtx);
 	}
 #endif
 
 	/* Schedule next expire. */
 	callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire,
 	    (void *)priv);
 }
diff --git a/sys/netgraph/ng_iface.c b/sys/netgraph/ng_iface.c
index 1e586d687244..e6871435fa88 100644
--- a/sys/netgraph/ng_iface.c
+++ b/sys/netgraph/ng_iface.c
@@ -1,818 +1,818 @@
 /*
  * ng_iface.c
  */
 
 /*-
  * Copyright (c) 1996-1999 Whistle Communications, Inc.
  * All rights reserved.
  * 
  * Subject to the following obligations and disclaimer of warranty, use and
  * redistribution of this software, in source or object code forms, with or
  * without modifications are expressly permitted by Whistle Communications;
  * provided, however, that:
  * 1. Any and all reproductions of the source or object code must include the
  *    copyright notice above and the following disclaimer of warranties; and
  * 2. No rights are granted, in any manner or form, to use Whistle
  *    Communications, Inc. trademarks, including the mark "WHISTLE
  *    COMMUNICATIONS" on advertising, endorsements, or otherwise except as
  *    such appears in the above copyright notice or in the software.
  * 
  * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND
  * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO
  * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE,
  * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
  * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY
  * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS
  * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE.
  * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES
  * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING
  * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
  * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY
  * OF SUCH DAMAGE.
  *
  * Author: Archie Cobbs <archie@freebsd.org>
  *
  * $FreeBSD$
  * $Whistle: ng_iface.c,v 1.33 1999/11/01 09:24:51 julian Exp $
  */
 
 /*
  * This node is also a system networking interface. It has
  * a hook for each protocol (IP, AppleTalk, etc). Packets
  * are simply relayed between the interface and the hooks.
  *
  * Interfaces are named ng0, ng1, etc.  New nodes take the
  * first available interface name.
  *
  * This node also includes Berkeley packet filter support.
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
 #include <sys/proc.h>
 #include <sys/random.h>
 #include <sys/rmlock.h>
 #include <sys/sockio.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/libkern.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/bpf.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 #include <netgraph/ng_parse.h>
 #include <netgraph/ng_iface.h>
 
 #ifdef NG_SEPARATE_MALLOC
 static MALLOC_DEFINE(M_NETGRAPH_IFACE, "netgraph_iface", "netgraph iface node");
 #else
 #define M_NETGRAPH_IFACE M_NETGRAPH
 #endif
 
 static SYSCTL_NODE(_net_graph, OID_AUTO, iface, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Point to point netgraph interface");
 VNET_DEFINE_STATIC(int, ng_iface_max_nest) = 2;
 #define	V_ng_iface_max_nest	VNET(ng_iface_max_nest)
 SYSCTL_INT(_net_graph_iface, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ng_iface_max_nest), 0, "Max nested tunnels");
 
 /* This struct describes one address family */
 struct iffam {
 	sa_family_t	family;		/* Address family */
 	const char	*hookname;	/* Name for hook */
 };
 typedef const struct iffam *iffam_p;
 
 /* List of address families supported by our interface */
 const static struct iffam gFamilies[] = {
 	{ AF_INET,	NG_IFACE_HOOK_INET	},
 	{ AF_INET6,	NG_IFACE_HOOK_INET6	},
 };
 #define	NUM_FAMILIES		nitems(gFamilies)
 
 /* Node private data */
 struct ng_iface_private {
 	struct	ifnet *ifp;		/* Our interface */
 	int	unit;			/* Interface unit number */
 	node_p	node;			/* Our netgraph node */
 	hook_p	hooks[NUM_FAMILIES];	/* Hook for each address family */
 	struct rmlock	lock;		/* Protect private data changes */
 };
 typedef struct ng_iface_private *priv_p;
 
 #define	PRIV_RLOCK(priv, t)	rm_rlock(&priv->lock, t)
 #define	PRIV_RUNLOCK(priv, t)	rm_runlock(&priv->lock, t)
 #define	PRIV_WLOCK(priv)	rm_wlock(&priv->lock)
 #define	PRIV_WUNLOCK(priv)	rm_wunlock(&priv->lock)
 
 /* Interface methods */
 static void	ng_iface_start(struct ifnet *ifp);
 static int	ng_iface_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
 static int	ng_iface_output(struct ifnet *ifp, struct mbuf *m0,
     			const struct sockaddr *dst, struct route *ro);
 static void	ng_iface_bpftap(struct ifnet *ifp,
 			struct mbuf *m, sa_family_t family);
 static int	ng_iface_send(struct ifnet *ifp, struct mbuf *m,
 			sa_family_t sa);
 #ifdef DEBUG
 static void	ng_iface_print_ioctl(struct ifnet *ifp, int cmd, caddr_t data);
 #endif
 
 /* Netgraph methods */
 static int		ng_iface_mod_event(module_t, int, void *);
 static ng_constructor_t	ng_iface_constructor;
 static ng_rcvmsg_t	ng_iface_rcvmsg;
 static ng_shutdown_t	ng_iface_shutdown;
 static ng_newhook_t	ng_iface_newhook;
 static ng_rcvdata_t	ng_iface_rcvdata;
 static ng_disconnect_t	ng_iface_disconnect;
 
 /* Helper stuff */
 static iffam_p	get_iffam_from_af(sa_family_t family);
 static iffam_p	get_iffam_from_hook(priv_p priv, hook_p hook);
 static iffam_p	get_iffam_from_name(const char *name);
 static hook_p  *get_hook_from_iffam(priv_p priv, iffam_p iffam);
 
 /* List of commands and how to convert arguments to/from ASCII */
 static const struct ng_cmdlist ng_iface_cmds[] = {
 	{
 	  NGM_IFACE_COOKIE,
 	  NGM_IFACE_GET_IFNAME,
 	  "getifname",
 	  NULL,
 	  &ng_parse_string_type
 	},
 	{
 	  NGM_IFACE_COOKIE,
 	  NGM_IFACE_POINT2POINT,
 	  "point2point",
 	  NULL,
 	  NULL
 	},
 	{
 	  NGM_IFACE_COOKIE,
 	  NGM_IFACE_BROADCAST,
 	  "broadcast",
 	  NULL,
 	  NULL
 	},
 	{
 	  NGM_IFACE_COOKIE,
 	  NGM_IFACE_GET_IFINDEX,
 	  "getifindex",
 	  NULL,
 	  &ng_parse_uint32_type
 	},
 	{ 0 }
 };
 
 /* Node type descriptor */
 static struct ng_type typestruct = {
 	.version =	NG_ABI_VERSION,
 	.name =		NG_IFACE_NODE_TYPE,
 	.mod_event =	ng_iface_mod_event,
 	.constructor =	ng_iface_constructor,
 	.rcvmsg =	ng_iface_rcvmsg,
 	.shutdown =	ng_iface_shutdown,
 	.newhook =	ng_iface_newhook,
 	.rcvdata =	ng_iface_rcvdata,
 	.disconnect =	ng_iface_disconnect,
 	.cmdlist =	ng_iface_cmds,
 };
 NETGRAPH_INIT(iface, &typestruct);
 
 VNET_DEFINE_STATIC(struct unrhdr *, ng_iface_unit);
 #define	V_ng_iface_unit			VNET(ng_iface_unit)
 
 /************************************************************************
 			HELPER STUFF
  ************************************************************************/
 
 /*
  * Get the family descriptor from the family ID
  */
 static __inline iffam_p
 get_iffam_from_af(sa_family_t family)
 {
 	iffam_p iffam;
 	int k;
 
 	for (k = 0; k < NUM_FAMILIES; k++) {
 		iffam = &gFamilies[k];
 		if (iffam->family == family)
 			return (iffam);
 	}
 	return (NULL);
 }
 
 /*
  * Get the family descriptor from the hook
  */
 static __inline iffam_p
 get_iffam_from_hook(priv_p priv, hook_p hook)
 {
 	int k;
 
 	for (k = 0; k < NUM_FAMILIES; k++)
 		if (priv->hooks[k] == hook)
 			return (&gFamilies[k]);
 	return (NULL);
 }
 
 /*
  * Get the hook from the iffam descriptor
  */
 
 static __inline hook_p *
 get_hook_from_iffam(priv_p priv, iffam_p iffam)
 {
 	return (&priv->hooks[iffam - gFamilies]);
 }
 
 /*
  * Get the iffam descriptor from the name
  */
 static __inline iffam_p
 get_iffam_from_name(const char *name)
 {
 	iffam_p iffam;
 	int k;
 
 	for (k = 0; k < NUM_FAMILIES; k++) {
 		iffam = &gFamilies[k];
 		if (!strcmp(iffam->hookname, name))
 			return (iffam);
 	}
 	return (NULL);
 }
 
 /************************************************************************
 			INTERFACE STUFF
  ************************************************************************/
 
 /*
  * Process an ioctl for the virtual interface
  */
 static int
 ng_iface_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct ifreq *const ifr = (struct ifreq *) data;
 	int error = 0;
 
 #ifdef DEBUG
 	ng_iface_print_ioctl(ifp, command, data);
 #endif
 	switch (command) {
 	/* These two are mostly handled at a higher layer */
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
 		ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE);
 		break;
 	case SIOCGIFADDR:
 		break;
 
 	/* Set flags */
 	case SIOCSIFFLAGS:
 		/*
 		 * If the interface is marked up and stopped, then start it.
 		 * If it is marked down and running, then stop it.
 		 */
 		if (ifr->ifr_flags & IFF_UP) {
 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 				ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE);
 				ifp->if_drv_flags |= IFF_DRV_RUNNING;
 			}
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				ifp->if_drv_flags &= ~(IFF_DRV_RUNNING |
 				    IFF_DRV_OACTIVE);
 		}
 		break;
 
 	/* Set the interface MTU */
 	case SIOCSIFMTU:
 		if (ifr->ifr_mtu > NG_IFACE_MTU_MAX
 		    || ifr->ifr_mtu < NG_IFACE_MTU_MIN)
 			error = EINVAL;
 		else
 			ifp->if_mtu = ifr->ifr_mtu;
 		break;
 
 	/* Stuff that's not supported */
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		error = 0;
 		break;
 	case SIOCSIFPHYS:
 		error = EOPNOTSUPP;
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 /*
  * This routine is called to deliver a packet out the interface.
  * We simply look at the address family and relay the packet to
  * the corresponding hook, if it exists and is connected.
  */
 
 static int
 ng_iface_output(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro)
 {
 	uint32_t af;
 	int error;
 
 	/* Check interface flags */
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
 		m_freem(m);
 		return (ENETDOWN);
 	}
 
 	/* Protect from deadly infinite recursion. */
 	error = if_tunnel_check_nesting(ifp, m, NGM_IFACE_COOKIE,
 	    V_ng_iface_max_nest);
 	if (error) {
 		m_freem(m);
 		return (error);
 	}
 
 	/* BPF writes need to be handled specially. */
 	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
-		af = dst->sa_family;
+		af = RO_GET_FAMILY(ro, dst);
 
 	/* Berkeley packet filter */
 	ng_iface_bpftap(ifp, m, af);
 
 	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
 		M_PREPEND(m, sizeof(sa_family_t), M_NOWAIT);
 		if (m == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
 			return (ENOBUFS);
 		}
 		*(sa_family_t *)m->m_data = af;
 		error = (ifp->if_transmit)(ifp, m);
 	} else
 		error = ng_iface_send(ifp, m, af);
 
 	return (error);
 }
 
 /*
  * Start method is used only when ALTQ is enabled.
  */
 static void
 ng_iface_start(struct ifnet *ifp)
 {
 	struct mbuf *m;
 	sa_family_t sa;
 
 	KASSERT(ALTQ_IS_ENABLED(&ifp->if_snd), ("%s without ALTQ", __func__));
 
 	for(;;) {
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
 		if (m == NULL)
 			break;
 		sa = *mtod(m, sa_family_t *);
 		m_adj(m, sizeof(sa_family_t));
 		ng_iface_send(ifp, m, sa);
 	}
 }
 
 /*
  * Flash a packet by the BPF (requires prepending 4 byte AF header)
  * Note the phoney mbuf; this is OK because BPF treats it read-only.
  */
 static void
 ng_iface_bpftap(struct ifnet *ifp, struct mbuf *m, sa_family_t family)
 {
 	KASSERT(family != AF_UNSPEC, ("%s: family=AF_UNSPEC", __func__));
 	if (bpf_peers_present(ifp->if_bpf)) {
 		int32_t family4 = (int32_t)family;
 		bpf_mtap2(ifp->if_bpf, &family4, sizeof(family4), m);
 	}
 }
 
 /*
  * This routine does actual delivery of the packet into the
  * netgraph(4). It is called from ng_iface_start() and
  * ng_iface_output().
  */
 static int
 ng_iface_send(struct ifnet *ifp, struct mbuf *m, sa_family_t sa)
 {
 	struct rm_priotracker priv_tracker;
 	const priv_p priv = (priv_p) ifp->if_softc;
 	const iffam_p iffam = get_iffam_from_af(sa);
 	hook_p hook;
 	int error;
 	int len;
 
 	/* Check address family to determine hook (if known) */
 	if (iffam == NULL) {
 		m_freem(m);
 		log(LOG_WARNING, "%s: can't handle af%d\n", ifp->if_xname, sa);
 		return (EAFNOSUPPORT);
 	}
 
 	/* Copy length before the mbuf gets invalidated. */
 	len = m->m_pkthdr.len;
 
 	PRIV_RLOCK(priv, &priv_tracker);
 	hook = *get_hook_from_iffam(priv, iffam);
 	if (hook == NULL) {
 		NG_FREE_M(m);
 		PRIV_RUNLOCK(priv, &priv_tracker);
 		return ENETDOWN;
 	}
 	NG_HOOK_REF(hook);
 	PRIV_RUNLOCK(priv, &priv_tracker);
 
 	NG_OUTBOUND_THREAD_REF();
 	NG_SEND_DATA_ONLY(error, hook, m);
 	NG_OUTBOUND_THREAD_UNREF();
 	NG_HOOK_UNREF(hook);
 
 	/* Update stats. */
 	if (error == 0) {
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 	}
 
 	return (error);
 }
 
 #ifdef DEBUG
 /*
  * Display an ioctl to the virtual interface
  */
 
 static void
 ng_iface_print_ioctl(struct ifnet *ifp, int command, caddr_t data)
 {
 	char   *str;
 
 	switch (command & IOC_DIRMASK) {
 	case IOC_VOID:
 		str = "IO";
 		break;
 	case IOC_OUT:
 		str = "IOR";
 		break;
 	case IOC_IN:
 		str = "IOW";
 		break;
 	case IOC_INOUT:
 		str = "IORW";
 		break;
 	default:
 		str = "IO??";
 	}
 	log(LOG_DEBUG, "%s: %s('%c', %d, char[%d])\n",
 	       ifp->if_xname,
 	       str,
 	       IOCGROUP(command),
 	       command & 0xff,
 	       IOCPARM_LEN(command));
 }
 #endif /* DEBUG */
 
 /************************************************************************
 			NETGRAPH NODE STUFF
  ************************************************************************/
 
 /*
  * Constructor for a node
  */
 static int
 ng_iface_constructor(node_p node)
 {
 	struct ifnet *ifp;
 	priv_p priv;
 
 	/* Allocate node and interface private structures */
 	priv = malloc(sizeof(*priv), M_NETGRAPH_IFACE, M_WAITOK | M_ZERO);
 	ifp = if_alloc(IFT_PROPVIRTUAL);
 	if (ifp == NULL) {
 		free(priv, M_NETGRAPH_IFACE);
 		return (ENOMEM);
 	}
 
 	rm_init(&priv->lock, "ng_iface private rmlock");
 
 	/* Link them together */
 	ifp->if_softc = priv;
 	priv->ifp = ifp;
 
 	/* Get an interface unit number */
 	priv->unit = alloc_unr(V_ng_iface_unit);
 
 	/* Link together node and private info */
 	NG_NODE_SET_PRIVATE(node, priv);
 	priv->node = node;
 
 	/* Initialize interface structure */
 	if_initname(ifp, NG_IFACE_IFACE_NAME, priv->unit);
 	ifp->if_output = ng_iface_output;
 	ifp->if_start = ng_iface_start;
 	ifp->if_ioctl = ng_iface_ioctl;
 	ifp->if_mtu = NG_IFACE_MTU_DEFAULT;
 	ifp->if_flags = (IFF_SIMPLEX|IFF_POINTOPOINT|IFF_NOARP|IFF_MULTICAST);
 	ifp->if_type = IFT_PROPVIRTUAL;		/* XXX */
 	ifp->if_addrlen = 0;			/* XXX */
 	ifp->if_hdrlen = 0;			/* XXX */
 	ifp->if_baudrate = 64000;		/* XXX */
 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
 	ifp->if_snd.ifq_drv_maxlen = ifqmaxlen;
 	IFQ_SET_READY(&ifp->if_snd);
 
 	/* Give this node the same name as the interface (if possible) */
 	if (ng_name_node(node, ifp->if_xname) != 0)
 		log(LOG_WARNING, "%s: can't acquire netgraph name\n",
 		    ifp->if_xname);
 
 	/* Attach the interface */
 	if_attach(ifp);
 	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
 
 	/* Done */
 	return (0);
 }
 
 /*
  * Give our ok for a hook to be added
  */
 static int
 ng_iface_newhook(node_p node, hook_p hook, const char *name)
 {
 	const iffam_p iffam = get_iffam_from_name(name);
 	const priv_p priv = NG_NODE_PRIVATE(node);
 	hook_p *hookptr;
 
 	if (iffam == NULL)
 		return (EPFNOSUPPORT);
 	PRIV_WLOCK(priv);
 	hookptr = get_hook_from_iffam(priv, iffam);
 	if (*hookptr != NULL) {
 		PRIV_WUNLOCK(priv);
 		return (EISCONN);
 	}
 	*hookptr = hook;
 	NG_HOOK_HI_STACK(hook);
 	NG_HOOK_SET_TO_INBOUND(hook);
 	PRIV_WUNLOCK(priv);
 	return (0);
 }
 
 /*
  * Receive a control message
  */
 static int
 ng_iface_rcvmsg(node_p node, item_p item, hook_p lasthook)
 {
 	const priv_p priv = NG_NODE_PRIVATE(node);
 	struct ifnet *const ifp = priv->ifp;
 	struct ng_mesg *resp = NULL;
 	int error = 0;
 	struct ng_mesg *msg;
 
 	NGI_GET_MSG(item, msg);
 	switch (msg->header.typecookie) {
 	case NGM_IFACE_COOKIE:
 		switch (msg->header.cmd) {
 		case NGM_IFACE_GET_IFNAME:
 			NG_MKRESPONSE(resp, msg, IFNAMSIZ, M_NOWAIT);
 			if (resp == NULL) {
 				error = ENOMEM;
 				break;
 			}
 			strlcpy(resp->data, ifp->if_xname, IFNAMSIZ);
 			break;
 
 		case NGM_IFACE_POINT2POINT:
 		case NGM_IFACE_BROADCAST:
 		    {
 			/* Deny request if interface is UP */
 			if ((ifp->if_flags & IFF_UP) != 0)
 				return (EBUSY);
 
 			/* Change flags */
 			switch (msg->header.cmd) {
 			case NGM_IFACE_POINT2POINT:
 				ifp->if_flags |= IFF_POINTOPOINT;
 				ifp->if_flags &= ~IFF_BROADCAST;
 				break;
 			case NGM_IFACE_BROADCAST:
 				ifp->if_flags &= ~IFF_POINTOPOINT;
 				ifp->if_flags |= IFF_BROADCAST;
 				break;
 			}
 			break;
 		    }
 
 		case NGM_IFACE_GET_IFINDEX:
 			NG_MKRESPONSE(resp, msg, sizeof(uint32_t), M_NOWAIT);
 			if (resp == NULL) {
 				error = ENOMEM;
 				break;
 			}
 			*((uint32_t *)resp->data) = priv->ifp->if_index;
 			break;
 
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 	case NGM_FLOW_COOKIE:
 		switch (msg->header.cmd) {
 		case NGM_LINK_IS_UP:
 			if_link_state_change(ifp, LINK_STATE_UP);
 			break;
 		case NGM_LINK_IS_DOWN:
 			if_link_state_change(ifp, LINK_STATE_DOWN);
 			break;
 		default:
 			break;
 		}
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	NG_RESPOND_MSG(error, node, item, resp);
 	NG_FREE_MSG(msg);
 	return (error);
 }
 
 /*
  * Recive data from a hook. Pass the packet to the correct input routine.
  */
 static int
 ng_iface_rcvdata(hook_p hook, item_p item)
 {
 	const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook));
 	const iffam_p iffam = get_iffam_from_hook(priv, hook);
 	struct ifnet *const ifp = priv->ifp;
 	struct epoch_tracker et;
 	struct mbuf *m;
 	int isr;
 
 	NGI_GET_M(item, m);
 	NG_FREE_ITEM(item);
 	/* Sanity checks */
 	KASSERT(iffam != NULL, ("%s: iffam", __func__));
 	M_ASSERTPKTHDR(m);
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		NG_FREE_M(m);
 		return (ENETDOWN);
 	}
 
 	/* Update interface stats */
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 
 	/* Note receiving interface */
 	m->m_pkthdr.rcvif = ifp;
 
 	/* Berkeley packet filter */
 	ng_iface_bpftap(ifp, m, iffam->family);
 
 	/* Send packet */
 	switch (iffam->family) {
 #ifdef INET
 	case AF_INET:
 		isr = NETISR_IP;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 	random_harvest_queue(m, sizeof(*m), RANDOM_NET_NG);
 	M_SETFIB(m, ifp->if_fib);
 	CURVNET_SET(ifp->if_vnet);
 	NET_EPOCH_ENTER(et);
 	netisr_dispatch(isr, m);
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 	return (0);
 }
 
 /*
  * Shutdown and remove the node and its associated interface.
  */
 static int
 ng_iface_shutdown(node_p node)
 {
 	const priv_p priv = NG_NODE_PRIVATE(node);
 
 	/*
 	 * The ifnet may be in a different vnet than the netgraph node, 
 	 * hence we have to change the current vnet context here.
 	 */
 	CURVNET_SET_QUIET(priv->ifp->if_vnet);
 	bpfdetach(priv->ifp);
 	if_detach(priv->ifp);
 	if_free(priv->ifp);
 	CURVNET_RESTORE();
 	priv->ifp = NULL;
 	free_unr(V_ng_iface_unit, priv->unit);
 	rm_destroy(&priv->lock);
 	free(priv, M_NETGRAPH_IFACE);
 	NG_NODE_SET_PRIVATE(node, NULL);
 	NG_NODE_UNREF(node);
 	return (0);
 }
 
 /*
  * Hook disconnection. Note that we do *not* shutdown when all
  * hooks have been disconnected.
  */
 static int
 ng_iface_disconnect(hook_p hook)
 {
 	const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook));
 	const iffam_p iffam = get_iffam_from_hook(priv, hook);
 
 	if (iffam == NULL)
 		panic("%s", __func__);
 	PRIV_WLOCK(priv);
 	*get_hook_from_iffam(priv, iffam) = NULL;
 	PRIV_WUNLOCK(priv);
 	return (0);
 }
 
 /*
  * Handle loading and unloading for this node type.
  */
 static int
 ng_iface_mod_event(module_t mod, int event, void *data)
 {
 	int error = 0;
 
 	switch (event) {
 	case MOD_LOAD:
 	case MOD_UNLOAD:
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 
 static void
 vnet_ng_iface_init(const void *unused)
 {
 
 	V_ng_iface_unit = new_unrhdr(0, 0xffff, NULL);
 }
 VNET_SYSINIT(vnet_ng_iface_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
     vnet_ng_iface_init, NULL);
 
 static void
 vnet_ng_iface_uninit(const void *unused)
 {
 
 	delete_unrhdr(V_ng_iface_unit);
 }
 VNET_SYSUNINIT(vnet_ng_iface_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
     vnet_ng_iface_uninit, NULL);
diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c
index 44da6b73e41c..facf876f18cc 100644
--- a/sys/netinet/ip_fastfwd.c
+++ b/sys/netinet/ip_fastfwd.c
@@ -1,523 +1,528 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2003 Andre Oppermann, Internet Business Solutions AG
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * ip_fastforward gets its speed from processing the forwarded packet to
  * completion (if_output on the other side) without any queues or netisr's.
  * The receiving interface DMAs the packet into memory, the upper half of
  * driver calls ip_fastforward, we do our routing table lookup and directly
  * send it off to the outgoing interface, which DMAs the packet to the
  * network card. The only part of the packet we touch with the CPU is the
  * IP header (unless there are complex firewall rules touching other parts
  * of the packet, but that is up to you). We are essentially limited by bus
  * bandwidth and how fast the network card/driver can set up receives and
  * transmits.
  *
  * We handle basic errors, IP header errors, checksum errors,
  * destination unreachable, fragmentation and fragmentation needed and
  * report them via ICMP to the sender.
  *
  * Else if something is not pure IPv4 unicast forwarding we fall back to
  * the normal ip_input processing path. We should only be called from
  * interfaces connected to the outside world.
  *
  * Firewalling is fully supported including divert, ipfw fwd and ipfilter
  * ipnat and address rewrite.
  *
  * IPSEC is not supported if this host is a tunnel broker. IPSEC is
  * supported for connections to/from local host.
  *
  * We try to do the least expensive (in CPU ops) checks and operations
  * first to catch junk with as little overhead as possible.
  *
  * We take full advantage of hardware support for IP checksum and
  * fragmentation offloading.
  *
  * We don't do ICMP redirect in the fast forwarding path. I have had my own
  * cases where two core routers with Zebra routing suite would send millions
  * ICMP redirects to connected hosts if the destination router was not the
  * default gateway. In one case it was filling the routing table of a host
  * with approximately 300.000 cloned redirect entries until it ran out of
  * kernel memory. However the networking code proved very robust and it didn't
  * crash or fail in other ways.
  */
 
 /*
  * Many thanks to Matt Thomas of NetBSD for basic structure of ip_flow.c which
  * is being followed here.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ipstealth.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/pfil.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_options.h>
 
 #include <machine/in_cksum.h>
 
 #define	V_ipsendredirects	VNET(ipsendredirects)
 
 static struct mbuf *
 ip_redir_alloc(struct mbuf *m, struct nhop_object *nh,
     struct ip *ip, in_addr_t *addr)
 {
 	struct mbuf *mcopy = m_gethdr(M_NOWAIT, m->m_type);
 
 	if (mcopy == NULL)
 		return (NULL);
 
 	if (m_dup_pkthdr(mcopy, m, M_NOWAIT) == 0) {
 		/*
 		 * It's probably ok if the pkthdr dup fails (because
 		 * the deep copy of the tag chain failed), but for now
 		 * be conservative and just discard the copy since
 		 * code below may some day want the tags.
 		 */
 		m_free(mcopy);
 		return (NULL);
 	}
 	mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
 	mcopy->m_pkthdr.len = mcopy->m_len;
 	m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
 
 	if (nh != NULL &&
 	    ((nh->nh_flags & (NHF_REDIRECT|NHF_DEFAULT)) == 0)) {
 		struct in_ifaddr *nh_ia = (struct in_ifaddr *)(nh->nh_ifa);
 		u_long src = ntohl(ip->ip_src.s_addr);
 
 		if (nh_ia != NULL &&
 		    (src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) {
 			if (nh->nh_flags & NHF_GATEWAY)
 				*addr = nh->gw4_sa.sin_addr.s_addr;
 			else
 				*addr = ip->ip_dst.s_addr;
 		}
 	}
 	return (mcopy);
 }
 
 
 static int
 ip_findroute(struct nhop_object **pnh, struct in_addr dest, struct mbuf *m)
 {
 	struct nhop_object *nh;
 
 	nh = fib4_lookup(M_GETFIB(m), dest, 0, NHR_NONE,
 	    m->m_pkthdr.flowid);
 	if (nh == NULL) {
 		IPSTAT_INC(ips_noroute);
 		IPSTAT_INC(ips_cantforward);
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
 		return (EHOSTUNREACH);
 	}
 	/*
 	 * Drop blackholed traffic and directed broadcasts.
 	 */
 	if ((nh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST)) != 0) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 		return (EHOSTUNREACH);
 	}
 
 	if (nh->nh_flags & NHF_REJECT) {
 		IPSTAT_INC(ips_cantforward);
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
 		return (EHOSTUNREACH);
 	}
 
 	*pnh = nh;
 
 	return (0);
 }
 
 /*
  * Try to forward a packet based on the destination address.
  * This is a fast path optimized for the plain forwarding case.
  * If the packet is handled (and consumed) here then we return NULL;
  * otherwise mbuf is returned and the packet should be delivered
  * to ip_input for full processing.
  */
 struct mbuf *
 ip_tryforward(struct mbuf *m)
 {
 	struct ip *ip;
 	struct mbuf *m0 = NULL;
 	struct nhop_object *nh = NULL;
-	struct sockaddr_in dst;
+	struct route ro;
+	struct sockaddr_in *dst;
+	const struct sockaddr *gw;
 	struct in_addr dest, odest, rtdest;
 	uint16_t ip_len, ip_off;
 	int error = 0;
 	struct m_tag *fwd_tag = NULL;
 	struct mbuf *mcopy = NULL;
 	struct in_addr redest;
 	/*
 	 * Are we active and forwarding packets?
 	 */
 
 	M_ASSERTVALID(m);
 	M_ASSERTPKTHDR(m);
 
 #ifdef ALTQ
 	/*
 	 * Is packet dropped by traffic conditioner?
 	 */
 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
 		goto drop;
 #endif
 
 	/*
 	 * Only IP packets without options
 	 */
 	ip = mtod(m, struct ip *);
 
 	if (ip->ip_hl != (sizeof(struct ip) >> 2)) {
 		if (V_ip_doopts == 1)
 			return m;
 		else if (V_ip_doopts == 2) {
 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_FILTER_PROHIB,
 				0, 0);
 			return NULL;	/* mbuf already free'd */
 		}
 		/* else ignore IP options and continue */
 	}
 
 	/*
 	 * Only unicast IP, not from loopback, no L2 or IP broadcast,
 	 * no multicast, no INADDR_ANY
 	 *
 	 * XXX: Probably some of these checks could be direct drop
 	 * conditions.  However it is not clear whether there are some
 	 * hacks or obscure behaviours which make it necessary to
 	 * let ip_input handle it.  We play safe here and let ip_input
 	 * deal with it until it is proven that we can directly drop it.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST)) ||
 	    (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
 	    ntohl(ip->ip_src.s_addr) == (u_long)INADDR_BROADCAST ||
 	    ntohl(ip->ip_dst.s_addr) == (u_long)INADDR_BROADCAST ||
 	    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 	    IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 	    IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) ||
 	    IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) ||
 	    ip->ip_src.s_addr == INADDR_ANY ||
 	    ip->ip_dst.s_addr == INADDR_ANY )
 		return m;
 
 	/*
 	 * Is it for a local address on this host?
 	 */
 	if (in_localip(ip->ip_dst))
 		return m;
 
 	IPSTAT_INC(ips_total);
 
 	/*
 	 * Step 3: incoming packet firewall processing
 	 */
 
 	odest.s_addr = dest.s_addr = ip->ip_dst.s_addr;
 
 	/*
 	 * Run through list of ipfilter hooks for input packets
 	 */
 	if (!PFIL_HOOKED_IN(V_inet_pfil_head))
 		goto passin;
 
 	if (pfil_run_hooks(V_inet_pfil_head, &m, m->m_pkthdr.rcvif, PFIL_IN,
 	    NULL) != PFIL_PASS)
 		goto drop;
 
 	M_ASSERTVALID(m);
 	M_ASSERTPKTHDR(m);
 
 	ip = mtod(m, struct ip *);	/* m may have changed by pfil hook */
 	dest.s_addr = ip->ip_dst.s_addr;
 
 	/*
 	 * Destination address changed?
 	 */
 	if (odest.s_addr != dest.s_addr) {
 		/*
 		 * Is it now for a local address on this host?
 		 */
 		if (in_localip(dest))
 			goto forwardlocal;
 		/*
 		 * Go on with new destination address
 		 */
 	}
 
 	if (m->m_flags & M_FASTFWD_OURS) {
 		/*
 		 * ipfw changed it for a local address on this host.
 		 */
 		goto forwardlocal;
 	}
 
 passin:
 	/*
 	 * Step 4: decrement TTL and look up route
 	 */
 
 	/*
 	 * Check TTL
 	 */
 #ifdef IPSTEALTH
 	if (!V_ipstealth) {
 #endif
 	if (ip->ip_ttl <= IPTTLDEC) {
 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0);
 		return NULL;	/* mbuf already free'd */
 	}
 
 	/*
 	 * Decrement the TTL and incrementally change the IP header checksum.
 	 * Don't bother doing this with hw checksum offloading, it's faster
 	 * doing it right here.
 	 */
 	ip->ip_ttl -= IPTTLDEC;
 	if (ip->ip_sum >= (u_int16_t) ~htons(IPTTLDEC << 8))
 		ip->ip_sum -= ~htons(IPTTLDEC << 8);
 	else
 		ip->ip_sum += htons(IPTTLDEC << 8);
 #ifdef IPSTEALTH
 	}
 #endif
 
 	/*
 	 * Next hop forced by pfil(9) hook?
 	 */
 	if ((m->m_flags & M_IP_NEXTHOP) &&
 	    ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) {
 		/*
 		 * Now we will find route to forced destination.
 		 */
 		dest.s_addr = ((struct sockaddr_in *)
 			    (fwd_tag + 1))->sin_addr.s_addr;
 		m_tag_delete(m, fwd_tag);
 		m->m_flags &= ~M_IP_NEXTHOP;
 	}
 
 	/*
 	 * Find route to destination.
 	 */
 	if (ip_findroute(&nh, dest, m) != 0)
 		return (NULL);	/* icmp unreach already sent */
 
 	/*
 	 * Avoid second route lookup by caching destination.
 	 */
 	rtdest.s_addr = dest.s_addr;
 
 	/*
 	 * Step 5: outgoing firewall packet processing
 	 */
 	if (!PFIL_HOOKED_OUT(V_inet_pfil_head))
 		goto passout;
 
 	if (pfil_run_hooks(V_inet_pfil_head, &m, nh->nh_ifp,
 	    PFIL_OUT | PFIL_FWD, NULL) != PFIL_PASS)
 		goto drop;
 
 	M_ASSERTVALID(m);
 	M_ASSERTPKTHDR(m);
 
 	ip = mtod(m, struct ip *);
 	dest.s_addr = ip->ip_dst.s_addr;
 
 	/*
 	 * Destination address changed?
 	 */
 	if (m->m_flags & M_IP_NEXTHOP)
 		fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
 	else
 		fwd_tag = NULL;
 	if (odest.s_addr != dest.s_addr || fwd_tag != NULL) {
 		/*
 		 * Is it now for a local address on this host?
 		 */
 		if (m->m_flags & M_FASTFWD_OURS || in_localip(dest)) {
 forwardlocal:
 			/*
 			 * Return packet for processing by ip_input().
 			 */
 			m->m_flags |= M_FASTFWD_OURS;
 			return (m);
 		}
 		/*
 		 * Redo route lookup with new destination address
 		 */
 		if (fwd_tag) {
 			dest.s_addr = ((struct sockaddr_in *)
 				    (fwd_tag + 1))->sin_addr.s_addr;
 			m_tag_delete(m, fwd_tag);
 			m->m_flags &= ~M_IP_NEXTHOP;
 		}
 		if (dest.s_addr != rtdest.s_addr &&
 		    ip_findroute(&nh, dest, m) != 0)
 			return (NULL);	/* icmp unreach already sent */
 	}
 
 passout:
 	/*
 	 * Step 6: send off the packet
 	 */
 	ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
-	bzero(&dst, sizeof(dst));
-	dst.sin_family = AF_INET;
-	dst.sin_len = sizeof(dst);
-	if (nh->nh_flags & NHF_GATEWAY)
-		dst.sin_addr = nh->gw4_sa.sin_addr;
-	else
-		dst.sin_addr = dest;
+	bzero(&ro, sizeof(ro));
+	dst = (struct sockaddr_in *)&ro.ro_dst;
+	dst->sin_family = AF_INET;
+	dst->sin_len = sizeof(*dst);
+	dst->sin_addr = dest;
+	if (nh->nh_flags & NHF_GATEWAY) {
+		gw = &nh->gw_sa;
+		ro.ro_flags |= RT_HAS_GW;
+	} else
+		gw = (const struct sockaddr *)dst;
 
 	/*
 	 * Handle redirect case.
 	 */
 	redest.s_addr = 0;
-	if (V_ipsendredirects && (nh->nh_ifp == m->m_pkthdr.rcvif))
+	if (V_ipsendredirects && (nh->nh_ifp == m->m_pkthdr.rcvif) &&
+	    gw->sa_family == AF_INET)
 		mcopy = ip_redir_alloc(m, nh, ip, &redest.s_addr);
 
 	/*
 	 * Check if packet fits MTU or if hardware will fragment for us
 	 */
 	if (ip_len <= nh->nh_mtu) {
 		/*
 		 * Avoid confusing lower layers.
 		 */
 		m_clrprotoflags(m);
 		/*
 		 * Send off the packet via outgoing interface
 		 */
 		IP_PROBE(send, NULL, NULL, ip, nh->nh_ifp, ip, NULL);
-		error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m,
-		    (struct sockaddr *)&dst, NULL);
+		error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m, gw, &ro);
 	} else {
 		/*
 		 * Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery
 		 */
 		if (ip_off & IP_DF) {
 			IPSTAT_INC(ips_cantfrag);
 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
 				0, nh->nh_mtu);
 			goto consumed;
 		} else {
 			/*
 			 * We have to fragment the packet
 			 */
 			m->m_pkthdr.csum_flags |= CSUM_IP;
 			if (ip_fragment(ip, &m, nh->nh_mtu,
 			    nh->nh_ifp->if_hwassist) != 0)
 				goto drop;
 			KASSERT(m != NULL, ("null mbuf and no error"));
 			/*
 			 * Send off the fragments via outgoing interface
 			 */
 			error = 0;
 			do {
 				m0 = m->m_nextpkt;
 				m->m_nextpkt = NULL;
 				/*
 				 * Avoid confusing lower layers.
 				 */
 				m_clrprotoflags(m);
 
 				IP_PROBE(send, NULL, NULL,
 				    mtod(m, struct ip *), nh->nh_ifp,
 				    mtod(m, struct ip *), NULL);
 				error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m,
-				    (struct sockaddr *)&dst, NULL);
+				    gw, &ro);
 				if (error)
 					break;
 			} while ((m = m0) != NULL);
 			if (error) {
 				/* Reclaim remaining fragments */
 				for (m = m0; m; m = m0) {
 					m0 = m->m_nextpkt;
 					m_freem(m);
 				}
 			} else
 				IPSTAT_INC(ips_fragmented);
 		}
 	}
 
 	if (error != 0)
 		IPSTAT_INC(ips_odropped);
 	else {
 		IPSTAT_INC(ips_forward);
 		IPSTAT_INC(ips_fastforward);
 	}
 
 	/* Send required redirect */
 	if (mcopy != NULL) {
 		icmp_error(mcopy, ICMP_REDIRECT, ICMP_REDIRECT_HOST, redest.s_addr, 0);
 		mcopy = NULL; /* Freed by caller */
 	}
 
 consumed:
 	if (mcopy != NULL)
 		m_freem(mcopy);
 	return NULL;
 drop:
 	if (m)
 		m_freem(m);
 	return NULL;
 }
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index 733cc2901879..465c00e4dac7 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -1,1436 +1,1439 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bootp.h"
 #include "opt_ipstealth.h"
 #include "opt_ipsec.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/hhook.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/rwlock.h>
 #include <sys/sdt.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/pfil.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <net/netisr.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_options.h>
 #include <machine/in_cksum.h>
 #include <netinet/ip_carp.h>
 #include <netinet/in_rss.h>
 #include <netinet/ip_mroute.h>
 
 #include <netipsec/ipsec_support.h>
 
 #include <sys/socketvar.h>
 
 #include <security/mac/mac_framework.h>
 
 #ifdef CTASSERT
 CTASSERT(sizeof(struct ip) == 20);
 #endif
 
 /* IP reassembly functions are defined in ip_reass.c. */
 extern void ipreass_init(void);
 extern void ipreass_drain(void);
 extern void ipreass_slowtimo(void);
 #ifdef VIMAGE
 extern void ipreass_destroy(void);
 #endif
 
 struct rmlock in_ifaddr_lock;
 RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock");
 
 VNET_DEFINE(int, rsvp_on);
 
 VNET_DEFINE(int, ipforwarding);
 SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ipforwarding), 0,
     "Enable IP forwarding between interfaces");
 
 /*
  * Respond with an ICMP host redirect when we forward a packet out of
  * the same interface on which it was received.  See RFC 792.
  */
 VNET_DEFINE(int, ipsendredirects) = 1;
 SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ipsendredirects), 0,
     "Enable sending IP redirects");
 
 /*
  * XXX - Setting ip_checkinterface mostly implements the receive side of
  * the Strong ES model described in RFC 1122, but since the routing table
  * and transmit implementation do not implement the Strong ES model,
  * setting this to 1 results in an odd hybrid.
  *
  * XXX - ip_checkinterface currently must be disabled if you use ipnat
  * to translate the destination address to another local interface.
  *
  * XXX - ip_checkinterface must be disabled if you add IP aliases
  * to the loopback interface instead of the interface where the
  * packets for those addresses are received.
  */
 VNET_DEFINE_STATIC(int, ip_checkinterface);
 #define	V_ip_checkinterface	VNET(ip_checkinterface)
 SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip_checkinterface), 0,
     "Verify packet arrives on correct interface");
 
 VNET_DEFINE(pfil_head_t, inet_pfil_head);	/* Packet filter hooks */
 
 static struct netisr_handler ip_nh = {
 	.nh_name = "ip",
 	.nh_handler = ip_input,
 	.nh_proto = NETISR_IP,
 #ifdef	RSS
 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
 #else
 	.nh_policy = NETISR_POLICY_FLOW,
 #endif
 };
 
 #ifdef	RSS
 /*
  * Directly dispatched frames are currently assumed
  * to have a flowid already calculated.
  *
  * It should likely have something that assert it
  * actually has valid flow details.
  */
 static struct netisr_handler ip_direct_nh = {
 	.nh_name = "ip_direct",
 	.nh_handler = ip_direct_input,
 	.nh_proto = NETISR_IP_DIRECT,
 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
 };
 #endif
 
 extern	struct domain inetdomain;
 extern	struct protosw inetsw[];
 u_char	ip_protox[IPPROTO_MAX];
 VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
 VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
 VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
 
 #ifdef IPCTL_DEFMTU
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
     &ip_mtu, 0, "Default MTU");
 #endif
 
 #ifdef IPSTEALTH
 VNET_DEFINE(int, ipstealth);
 SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ipstealth), 0,
     "IP stealth mode, no TTL decrementation on forwarding");
 #endif
 
 /*
  * IP statistics are stored in the "array" of counter(9)s.
  */
 VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
 VNET_PCPUSTAT_SYSINIT(ipstat);
 SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
     "IP statistics (struct ipstat, netinet/ip_var.h)");
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(ipstat);
 #endif /* VIMAGE */
 
 /*
  * Kernel module interface for updating ipstat.  The argument is an index
  * into ipstat treated as an array.
  */
 void
 kmod_ipstat_inc(int statnum)
 {
 
 	counter_u64_add(VNET(ipstat)[statnum], 1);
 }
 
 void
 kmod_ipstat_dec(int statnum)
 {
 
 	counter_u64_add(VNET(ipstat)[statnum], -1);
 }
 
 static int
 sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
 {
 	int error, qlimit;
 
 	netisr_getqlimit(&ip_nh, &qlimit);
 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qlimit < 1)
 		return (EINVAL);
 	return (netisr_setqlimit(&ip_nh, qlimit));
 }
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
     sysctl_netinet_intr_queue_maxlen, "I",
     "Maximum size of the IP input queue");
 
 static int
 sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
 {
 	u_int64_t qdrops_long;
 	int error, qdrops;
 
 	netisr_getqdrops(&ip_nh, &qdrops_long);
 	qdrops = qdrops_long;
 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qdrops != 0)
 		return (EINVAL);
 	netisr_clearqdrops(&ip_nh);
 	return (0);
 }
 
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
     0, 0, sysctl_netinet_intr_queue_drops, "I",
     "Number of packets dropped from the IP input queue");
 
 #ifdef	RSS
 static int
 sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
 {
 	int error, qlimit;
 
 	netisr_getqlimit(&ip_direct_nh, &qlimit);
 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qlimit < 1)
 		return (EINVAL);
 	return (netisr_setqlimit(&ip_direct_nh, qlimit));
 }
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQMAXLEN, intr_direct_queue_maxlen,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     0, 0, sysctl_netinet_intr_direct_queue_maxlen,
     "I", "Maximum size of the IP direct input queue");
 
 static int
 sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
 {
 	u_int64_t qdrops_long;
 	int error, qdrops;
 
 	netisr_getqdrops(&ip_direct_nh, &qdrops_long);
 	qdrops = qdrops_long;
 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qdrops != 0)
 		return (EINVAL);
 	netisr_clearqdrops(&ip_direct_nh);
 	return (0);
 }
 
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops,
     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
     sysctl_netinet_intr_direct_queue_drops, "I",
     "Number of packets dropped from the IP direct input queue");
 #endif	/* RSS */
 
 /*
  * IP initialization: fill in IP protocol switch table.
  * All protocols not implemented in kernel go to raw IP protocol handler.
  */
 void
 ip_init(void)
 {
 	struct pfil_head_args args;
 	struct protosw *pr;
 	int i;
 
 	CK_STAILQ_INIT(&V_in_ifaddrhead);
 	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
 
 	/* Initialize IP reassembly queue. */
 	ipreass_init();
 
 	/* Initialize packet filter hooks. */
 	args.pa_version = PFIL_VERSION;
 	args.pa_flags = PFIL_IN | PFIL_OUT;
 	args.pa_type = PFIL_TYPE_IP4;
 	args.pa_headname = PFIL_INET_NAME;
 	V_inet_pfil_head = pfil_head_register(&args);
 
 	if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET,
 	    &V_ipsec_hhh_in[HHOOK_IPSEC_INET],
 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register input helper hook\n",
 		    __func__);
 	if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET,
 	    &V_ipsec_hhh_out[HHOOK_IPSEC_INET],
 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register output helper hook\n",
 		    __func__);
 
 	/* Skip initialization of globals for non-default instances. */
 #ifdef VIMAGE
 	if (!IS_DEFAULT_VNET(curvnet)) {
 		netisr_register_vnet(&ip_nh);
 #ifdef	RSS
 		netisr_register_vnet(&ip_direct_nh);
 #endif
 		return;
 	}
 #endif
 
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		panic("ip_init: PF_INET not found");
 
 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
 	for (i = 0; i < IPPROTO_MAX; i++)
 		ip_protox[i] = pr - inetsw;
 	/*
 	 * Cycle through IP protocols and put them into the appropriate place
 	 * in ip_protox[].
 	 */
 	for (pr = inetdomain.dom_protosw;
 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
 		if (pr->pr_domain->dom_family == PF_INET &&
 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
 			/* Be careful to only index valid IP protocols. */
 			if (pr->pr_protocol < IPPROTO_MAX)
 				ip_protox[pr->pr_protocol] = pr - inetsw;
 		}
 
 	netisr_register(&ip_nh);
 #ifdef	RSS
 	netisr_register(&ip_direct_nh);
 #endif
 }
 
 #ifdef VIMAGE
 static void
 ip_destroy(void *unused __unused)
 {
 	int error;
 
 #ifdef	RSS
 	netisr_unregister_vnet(&ip_direct_nh);
 #endif
 	netisr_unregister_vnet(&ip_nh);
 
 	pfil_head_unregister(V_inet_pfil_head);
 	error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister input helper hook "
 		    "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: "
 		    "error %d returned\n", __func__, error);
 	}
 	error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister output helper hook "
 		    "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: "
 		    "error %d returned\n", __func__, error);
 	}
 
 	/* Remove the IPv4 addresses from all interfaces. */
 	in_ifscrub_all();
 
 	/* Make sure the IPv4 routes are gone as well. */
 	rib_flush_routes_family(AF_INET);
 
 	/* Destroy IP reassembly queue. */
 	ipreass_destroy();
 
 	/* Cleanup in_ifaddr hash table; should be empty. */
 	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
 }
 
 VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL);
 #endif
 
 #ifdef	RSS
 /*
  * IP direct input routine.
  *
  * This is called when reinjecting completed fragments where
  * all of the previous checking and book-keeping has been done.
  */
 void
 ip_direct_input(struct mbuf *m)
 {
 	struct ip *ip;
 	int hlen;
 
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	if (IPSEC_ENABLED(ipv4)) {
 		if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0)
 			return;
 	}
 #endif /* IPSEC */
 	IPSTAT_INC(ips_delivered);
 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
 	return;
 }
 #endif
 
 /*
  * Ip input routine.  Checksum and byte swap header.  If fragmented
  * try to reassemble.  Process options.  Pass to next level.
  */
 void
 ip_input(struct mbuf *m)
 {
 	MROUTER_RLOCK_TRACKER;
 	struct rm_priotracker in_ifa_tracker;
 	struct ip *ip = NULL;
 	struct in_ifaddr *ia = NULL;
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 	int    checkif, hlen = 0;
 	uint16_t sum, ip_len;
 	int dchg = 0;				/* dest changed after fw */
 	struct in_addr odst;			/* original dst address */
 
 	M_ASSERTPKTHDR(m);
 	NET_EPOCH_ASSERT();
 
 	if (m->m_flags & M_FASTFWD_OURS) {
 		m->m_flags &= ~M_FASTFWD_OURS;
 		/* Set up some basics that will be used later. */
 		ip = mtod(m, struct ip *);
 		hlen = ip->ip_hl << 2;
 		ip_len = ntohs(ip->ip_len);
 		goto ours;
 	}
 
 	IPSTAT_INC(ips_total);
 
 	if (m->m_pkthdr.len < sizeof(struct ip))
 		goto tooshort;
 
 	if (m->m_len < sizeof (struct ip) &&
 	    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
 		IPSTAT_INC(ips_toosmall);
 		return;
 	}
 	ip = mtod(m, struct ip *);
 
 	if (ip->ip_v != IPVERSION) {
 		IPSTAT_INC(ips_badvers);
 		goto bad;
 	}
 
 	hlen = ip->ip_hl << 2;
 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
 		IPSTAT_INC(ips_badhlen);
 		goto bad;
 	}
 	if (hlen > m->m_len) {
 		if ((m = m_pullup(m, hlen)) == NULL) {
 			IPSTAT_INC(ips_badhlen);
 			return;
 		}
 		ip = mtod(m, struct ip *);
 	}
 
 	IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
 
 	/* IN_LOOPBACK must not appear on the wire - RFC1122 */
 	ifp = m->m_pkthdr.rcvif;
 	if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) ||
 	    IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) {
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
 			IPSTAT_INC(ips_badaddr);
 			goto bad;
 		}
 	}
 
 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
 	} else {
 		if (hlen == sizeof(struct ip)) {
 			sum = in_cksum_hdr(ip);
 		} else {
 			sum = in_cksum(m, hlen);
 		}
 	}
 	if (sum) {
 		IPSTAT_INC(ips_badsum);
 		goto bad;
 	}
 
 #ifdef ALTQ
 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
 		/* packet is dropped by traffic conditioner */
 		return;
 #endif
 
 	ip_len = ntohs(ip->ip_len);
 	if (ip_len < hlen) {
 		IPSTAT_INC(ips_badlen);
 		goto bad;
 	}
 
 	/*
 	 * Check that the amount of data in the buffers
 	 * is as at least much as the IP header would have us expect.
 	 * Trim mbufs if longer than we expect.
 	 * Drop packet if shorter than we expect.
 	 */
 	if (m->m_pkthdr.len < ip_len) {
 tooshort:
 		IPSTAT_INC(ips_tooshort);
 		goto bad;
 	}
 	if (m->m_pkthdr.len > ip_len) {
 		if (m->m_len == m->m_pkthdr.len) {
 			m->m_len = ip_len;
 			m->m_pkthdr.len = ip_len;
 		} else
 			m_adj(m, ip_len - m->m_pkthdr.len);
 	}
 
 	/*
 	 * Try to forward the packet, but if we fail continue.
 	 * ip_tryforward() does not generate redirects, so fall
 	 * through to normal processing if redirects are required.
 	 * ip_tryforward() does inbound and outbound packet firewall
 	 * processing. If firewall has decided that destination becomes
 	 * our local address, it sets M_FASTFWD_OURS flag. In this
 	 * case skip another inbound firewall processing and update
 	 * ip pointer.
 	 */
 	if (V_ipforwarding != 0
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	    && (!IPSEC_ENABLED(ipv4) ||
 	    IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0)
 #endif
 	    ) {
 		if ((m = ip_tryforward(m)) == NULL)
 			return;
 		if (m->m_flags & M_FASTFWD_OURS) {
 			m->m_flags &= ~M_FASTFWD_OURS;
 			ip = mtod(m, struct ip *);
 			goto ours;
 		}
 	}
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	/*
 	 * Bypass packet filtering for packets previously handled by IPsec.
 	 */
 	if (IPSEC_ENABLED(ipv4) &&
 	    IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0)
 			goto passin;
 #endif
 
 	/*
 	 * Run through list of hooks for input packets.
 	 *
 	 * NB: Beware of the destination address changing (e.g.
 	 *     by NAT rewriting).  When this happens, tell
 	 *     ip_forward to do the right thing.
 	 */
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED_IN(V_inet_pfil_head))
 		goto passin;
 
 	odst = ip->ip_dst;
 	if (pfil_run_hooks(V_inet_pfil_head, &m, ifp, PFIL_IN, NULL) !=
 	    PFIL_PASS)
 		return;
 	if (m == NULL)			/* consumed by filter */
 		return;
 
 	ip = mtod(m, struct ip *);
 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
 	ifp = m->m_pkthdr.rcvif;
 
 	if (m->m_flags & M_FASTFWD_OURS) {
 		m->m_flags &= ~M_FASTFWD_OURS;
 		goto ours;
 	}
 	if (m->m_flags & M_IP_NEXTHOP) {
 		if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
 			/*
 			 * Directly ship the packet on.  This allows
 			 * forwarding packets originally destined to us
 			 * to some other directly connected host.
 			 */
 			ip_forward(m, 1);
 			return;
 		}
 	}
 passin:
 
 	/*
 	 * Process options and, if not destined for us,
 	 * ship it on.  ip_dooptions returns 1 when an
 	 * error was detected (causing an icmp message
 	 * to be sent and the original packet to be freed).
 	 */
 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
 		return;
 
         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
          * matter if it is destined to another node, or whether it is
          * a multicast one, RSVP wants it! and prevents it from being forwarded
          * anywhere else. Also checks if the rsvp daemon is running before
 	 * grabbing the packet.
          */
 	if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP)
 		goto ours;
 
 	/*
 	 * Check our list of addresses, to see if the packet is for us.
 	 * If we don't have any addresses, assume any unicast packet
 	 * we receive might be for us (and let the upper layers deal
 	 * with it).
 	 */
 	if (CK_STAILQ_EMPTY(&V_in_ifaddrhead) &&
 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
 		goto ours;
 
 	/*
 	 * Enable a consistency check between the destination address
 	 * and the arrival interface for a unicast packet (the RFC 1122
 	 * strong ES model) if IP forwarding is disabled and the packet
 	 * is not locally generated and the packet is not subject to
 	 * 'ipfw fwd'.
 	 *
 	 * XXX - Checking also should be disabled if the destination
 	 * address is ipnat'ed to a different interface.
 	 *
 	 * XXX - Checking is incompatible with IP aliases added
 	 * to the loopback interface instead of the interface where
 	 * the packets are received.
 	 *
 	 * XXX - This is the case for carp vhost IPs as well so we
 	 * insert a workaround. If the packet got here, we already
 	 * checked with carp_iamatch() and carp_forus().
 	 */
 	checkif = V_ip_checkinterface && (V_ipforwarding == 0) &&
 	    ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
 	    ifp->if_carp == NULL && (dchg == 0);
 
 	/*
 	 * Check for exact addresses in the hash bucket.
 	 */
 	IN_IFADDR_RLOCK(&in_ifa_tracker);
 	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
 		/*
 		 * If the address matches, verify that the packet
 		 * arrived via the correct interface if checking is
 		 * enabled.
 		 */
 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr &&
 		    (!checkif || ia->ia_ifp == ifp)) {
 			counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
 			counter_u64_add(ia->ia_ifa.ifa_ibytes,
 			    m->m_pkthdr.len);
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			goto ours;
 		}
 	}
 	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 
 	/*
 	 * Check for broadcast addresses.
 	 *
 	 * Only accept broadcast packets that arrive via the matching
 	 * interface.  Reception of forwarded directed broadcasts would
 	 * be handled via ip_forward() and ether_output() with the loopback
 	 * into the stack for SIMPLEX interfaces handled by ether_output().
 	 */
 	if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) {
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 			ia = ifatoia(ifa);
 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
 			    ip->ip_dst.s_addr) {
 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
 				    m->m_pkthdr.len);
 				goto ours;
 			}
 #ifdef BOOTP_COMPAT
 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
 				    m->m_pkthdr.len);
 				goto ours;
 			}
 #endif
 		}
 		ia = NULL;
 	}
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		MROUTER_RLOCK();
 		/*
 		 * RFC 3927 2.7: Do not forward multicast packets from
 		 * IN_LINKLOCAL.
 		 */
 		if (V_ip_mrouter && !IN_LINKLOCAL(ntohl(ip->ip_src.s_addr))) {
 			/*
 			 * If we are acting as a multicast router, all
 			 * incoming multicast packets are passed to the
 			 * kernel-level multicast forwarding function.
 			 * The packet is returned (relatively) intact; if
 			 * ip_mforward() returns a non-zero value, the packet
 			 * must be discarded, else it may be accepted below.
 			 */
 			if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
 				MROUTER_RUNLOCK();
 				IPSTAT_INC(ips_cantforward);
 				m_freem(m);
 				return;
 			}
 
 			/*
 			 * The process-level routing daemon needs to receive
 			 * all multicast IGMP packets, whether or not this
 			 * host belongs to their destination groups.
 			 */
 			if (ip->ip_p == IPPROTO_IGMP) {
 				MROUTER_RUNLOCK();
 				goto ours;
 			}
 			IPSTAT_INC(ips_forward);
 		}
 		MROUTER_RUNLOCK();
 		/*
 		 * Assume the packet is for us, to avoid prematurely taking
 		 * a lock on the in_multi hash. Protocols must perform
 		 * their own filtering and update statistics accordingly.
 		 */
 		goto ours;
 	}
 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
 		goto ours;
 	if (ip->ip_dst.s_addr == INADDR_ANY)
 		goto ours;
 	/* RFC 3927 2.7: Do not forward packets to or from IN_LINKLOCAL. */
 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) ||
 	    IN_LINKLOCAL(ntohl(ip->ip_src.s_addr))) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 		return;
 	}
 
 	/*
 	 * Not for us; forward if possible and desirable.
 	 */
 	if (V_ipforwarding == 0) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 	} else {
 		ip_forward(m, dchg);
 	}
 	return;
 
 ours:
 #ifdef IPSTEALTH
 	/*
 	 * IPSTEALTH: Process non-routing options only
 	 * if the packet is destined for us.
 	 */
 	if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1))
 		return;
 #endif /* IPSTEALTH */
 
 	/*
 	 * Attempt reassembly; if it succeeds, proceed.
 	 * ip_reass() will return a different mbuf.
 	 */
 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
 		/* XXXGL: shouldn't we save & set m_flags? */
 		m = ip_reass(m);
 		if (m == NULL)
 			return;
 		ip = mtod(m, struct ip *);
 		/* Get the header length of the reassembled packet */
 		hlen = ip->ip_hl << 2;
 	}
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	if (IPSEC_ENABLED(ipv4)) {
 		if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0)
 			return;
 	}
 #endif /* IPSEC */
 
 	/*
 	 * Switch out to protocol's input routine.
 	 */
 	IPSTAT_INC(ips_delivered);
 
 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
 	return;
 bad:
 	m_freem(m);
 }
 
 /*
  * IP timer processing;
  * if a timer expires on a reassembly
  * queue, discard it.
  */
 void
 ip_slowtimo(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		ipreass_slowtimo();
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 void
 ip_drain(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		ipreass_drain();
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 /*
  * The protocol to be inserted into ip_protox[] must be already registered
  * in inetsw[], either statically or through pf_proto_register().
  */
 int
 ipproto_register(short ipproto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
 		return (EPROTONOSUPPORT);
 
 	/*
 	 * The protocol slot must not be occupied by another protocol
 	 * already.  An index pointing to IPPROTO_RAW is unused.
 	 */
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
 		return (EEXIST);
 
 	/* Find the protocol position in inetsw[] and set the index. */
 	for (pr = inetdomain.dom_protosw;
 	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
 		if (pr->pr_domain->dom_family == PF_INET &&
 		    pr->pr_protocol && pr->pr_protocol == ipproto) {
 			ip_protox[pr->pr_protocol] = pr - inetsw;
 			return (0);
 		}
 	}
 	return (EPROTONOSUPPORT);
 }
 
 int
 ipproto_unregister(short ipproto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
 		return (EPROTONOSUPPORT);
 
 	/* Check if the protocol was indeed registered. */
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
 		return (ENOENT);
 
 	/* Reset the protocol slot to IPPROTO_RAW. */
 	ip_protox[ipproto] = pr - inetsw;
 	return (0);
 }
 
 u_char inetctlerrmap[PRC_NCMDS] = {
 	0,		0,		0,		0,
 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
 	0,		0,		EHOSTUNREACH,	0,
 	ENOPROTOOPT,	ECONNREFUSED
 };
 
 /*
  * Forward a packet.  If some error occurs return the sender
  * an icmp packet.  Note we can't always generate a meaningful
  * icmp message because icmp doesn't have a large enough repertoire
  * of codes and types.
  *
  * If not forwarding, just drop the packet.  This could be confusing
  * if ipforwarding was zero but some routing protocol was advancing
  * us as a gateway to somewhere.  However, we must let the routing
  * protocol deal with that.
  *
  * The srcrt parameter indicates whether the packet is being forwarded
  * via a source route.
  */
 void
 ip_forward(struct mbuf *m, int srcrt)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	struct in_ifaddr *ia;
 	struct mbuf *mcopy;
 	struct sockaddr_in *sin;
 	struct in_addr dest;
 	struct route ro;
 	uint32_t flowid;
 	int error, type = 0, code = 0, mtu = 0;
 
 	NET_EPOCH_ASSERT();
 
 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 		return;
 	}
 	if (
 #ifdef IPSTEALTH
 	    V_ipstealth == 0 &&
 #endif
 	    ip->ip_ttl <= IPTTLDEC) {
 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0);
 		return;
 	}
 
 	bzero(&ro, sizeof(ro));
 	sin = (struct sockaddr_in *)&ro.ro_dst;
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	sin->sin_addr = ip->ip_dst;
 	flowid = m->m_pkthdr.flowid;
 	ro.ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_REF, flowid);
 	if (ro.ro_nh != NULL) {
 		ia = ifatoia(ro.ro_nh->nh_ifa);
 	} else
 		ia = NULL;
 	/*
 	 * Save the IP header and at most 8 bytes of the payload,
 	 * in case we need to generate an ICMP message to the src.
 	 *
 	 * XXX this can be optimized a lot by saving the data in a local
 	 * buffer on the stack (72 bytes at most), and only allocating the
 	 * mbuf if really necessary. The vast majority of the packets
 	 * are forwarded without having to send an ICMP back (either
 	 * because unnecessary, or because rate limited), so we are
 	 * really we are wasting a lot of work here.
 	 *
 	 * We don't use m_copym() because it might return a reference
 	 * to a shared cluster. Both this function and ip_output()
 	 * assume exclusive access to the IP header in `m', so any
 	 * data in a cluster may change before we reach icmp_error().
 	 */
 	mcopy = m_gethdr(M_NOWAIT, m->m_type);
 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
 		/*
 		 * It's probably ok if the pkthdr dup fails (because
 		 * the deep copy of the tag chain failed), but for now
 		 * be conservative and just discard the copy since
 		 * code below may some day want the tags.
 		 */
 		m_free(mcopy);
 		mcopy = NULL;
 	}
 	if (mcopy != NULL) {
 		mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
 		mcopy->m_pkthdr.len = mcopy->m_len;
 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
 	}
 #ifdef IPSTEALTH
 	if (V_ipstealth == 0)
 #endif
 		ip->ip_ttl -= IPTTLDEC;
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	if (IPSEC_ENABLED(ipv4)) {
 		if ((error = IPSEC_FORWARD(ipv4, m)) != 0) {
 			/* mbuf consumed by IPsec */
 			RO_NHFREE(&ro);
 			m_freem(mcopy);
 			if (error != EINPROGRESS)
 				IPSTAT_INC(ips_cantforward);
 			return;
 		}
 		/* No IPsec processing required */
 	}
 #endif /* IPSEC */
 	/*
 	 * If forwarding packet using same interface that it came in on,
 	 * perhaps should send a redirect to sender to shortcut a hop.
 	 * Only send redirect if source is sending directly to us,
 	 * and if packet was not source routed (or has any options).
 	 * Also, don't send redirect if forwarding using a default route
 	 * or a route modified by a redirect.
 	 */
 	dest.s_addr = 0;
 	if (!srcrt && V_ipsendredirects &&
 	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
 		struct nhop_object *nh;
 
 		nh = ro.ro_nh;
 
 		if (nh != NULL && ((nh->nh_flags & (NHF_REDIRECT|NHF_DEFAULT)) == 0)) {
 			struct in_ifaddr *nh_ia = (struct in_ifaddr *)(nh->nh_ifa);
 			u_long src = ntohl(ip->ip_src.s_addr);
 
 			if (nh_ia != NULL &&
 			    (src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) {
-				if (nh->nh_flags & NHF_GATEWAY)
-					dest.s_addr = nh->gw4_sa.sin_addr.s_addr;
-				else
-					dest.s_addr = ip->ip_dst.s_addr;
 				/* Router requirements says to only send host redirects */
 				type = ICMP_REDIRECT;
 				code = ICMP_REDIRECT_HOST;
+				if (nh->nh_flags & NHF_GATEWAY) {
+				    if (nh->gw_sa.sa_family == AF_INET)
+					dest.s_addr = nh->gw4_sa.sin_addr.s_addr;
+				    else /* Do not redirect in case gw is AF_INET6 */
+					type = 0;
+				} else
+					dest.s_addr = ip->ip_dst.s_addr;
 			}
 		}
 	}
 
 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
 
 	if (error == EMSGSIZE && ro.ro_nh)
 		mtu = ro.ro_nh->nh_mtu;
 	RO_NHFREE(&ro);
 
 	if (error)
 		IPSTAT_INC(ips_cantforward);
 	else {
 		IPSTAT_INC(ips_forward);
 		if (type)
 			IPSTAT_INC(ips_redirectsent);
 		else {
 			if (mcopy)
 				m_freem(mcopy);
 			return;
 		}
 	}
 	if (mcopy == NULL)
 		return;
 
 	switch (error) {
 	case 0:				/* forwarded, but need redirect */
 		/* type, code set above */
 		break;
 
 	case ENETUNREACH:
 	case EHOSTUNREACH:
 	case ENETDOWN:
 	case EHOSTDOWN:
 	default:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_HOST;
 		break;
 
 	case EMSGSIZE:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_NEEDFRAG;
 		/*
 		 * If the MTU was set before make sure we are below the
 		 * interface MTU.
 		 * If the MTU wasn't set before use the interface mtu or
 		 * fall back to the next smaller mtu step compared to the
 		 * current packet size.
 		 */
 		if (mtu != 0) {
 			if (ia != NULL)
 				mtu = min(mtu, ia->ia_ifp->if_mtu);
 		} else {
 			if (ia != NULL)
 				mtu = ia->ia_ifp->if_mtu;
 			else
 				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
 		}
 		IPSTAT_INC(ips_cantfrag);
 		break;
 
 	case ENOBUFS:
 	case EACCES:			/* ipfw denied packet */
 		m_freem(mcopy);
 		return;
 	}
 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
 }
 
 #define	CHECK_SO_CT(sp, ct) \
     (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0)
 
 void
 ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
     struct mbuf *m)
 {
 	bool stamped;
 
 	stamped = false;
 	if ((inp->inp_socket->so_options & SO_BINTIME) ||
 	    CHECK_SO_CT(inp->inp_socket, SO_TS_BINTIME)) {
 		struct bintime boottimebin, bt;
 		struct timespec ts1;
 
 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
 		    M_TSTMP)) {
 			mbuf_tstmp2timespec(m, &ts1);
 			timespec2bintime(&ts1, &bt);
 			getboottimebin(&boottimebin);
 			bintime_add(&bt, &boottimebin);
 		} else {
 			bintime(&bt);
 		}
 		*mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt),
 		    SCM_BINTIME, SOL_SOCKET);
 		if (*mp != NULL) {
 			mp = &(*mp)->m_next;
 			stamped = true;
 		}
 	}
 	if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME_MICRO)) {
 		struct bintime boottimebin, bt1;
 		struct timespec ts1;
 		struct timeval tv;
 
 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
 		    M_TSTMP)) {
 			mbuf_tstmp2timespec(m, &ts1);
 			timespec2bintime(&ts1, &bt1);
 			getboottimebin(&boottimebin);
 			bintime_add(&bt1, &boottimebin);
 			bintime2timeval(&bt1, &tv);
 		} else {
 			microtime(&tv);
 		}
 		*mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv),
 		    SCM_TIMESTAMP, SOL_SOCKET);
 		if (*mp != NULL) {
 			mp = &(*mp)->m_next;
 			stamped = true;
 		}
 	} else if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME)) {
 		struct bintime boottimebin;
 		struct timespec ts, ts1;
 
 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
 		    M_TSTMP)) {
 			mbuf_tstmp2timespec(m, &ts);
 			getboottimebin(&boottimebin);
 			bintime2timespec(&boottimebin, &ts1);
 			timespecadd(&ts, &ts1, &ts);
 		} else {
 			nanotime(&ts);
 		}
 		*mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts),
 		    SCM_REALTIME, SOL_SOCKET);
 		if (*mp != NULL) {
 			mp = &(*mp)->m_next;
 			stamped = true;
 		}
 	} else if (CHECK_SO_CT(inp->inp_socket, SO_TS_MONOTONIC)) {
 		struct timespec ts;
 
 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
 		    M_TSTMP))
 			mbuf_tstmp2timespec(m, &ts);
 		else
 			nanouptime(&ts);
 		*mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts),
 		    SCM_MONOTONIC, SOL_SOCKET);
 		if (*mp != NULL) {
 			mp = &(*mp)->m_next;
 			stamped = true;
 		}
 	}
 	if (stamped && (m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
 	    M_TSTMP)) {
 		struct sock_timestamp_info sti;
 
 		bzero(&sti, sizeof(sti));
 		sti.st_info_flags = ST_INFO_HW;
 		if ((m->m_flags & M_TSTMP_HPREC) != 0)
 			sti.st_info_flags |= ST_INFO_HW_HPREC;
 		*mp = sbcreatecontrol((caddr_t)&sti, sizeof(sti), SCM_TIME_INFO,
 		    SOL_SOCKET);
 		if (*mp != NULL)
 			mp = &(*mp)->m_next;
 	}
 	if (inp->inp_flags & INP_RECVDSTADDR) {
 		*mp = sbcreatecontrol((caddr_t)&ip->ip_dst,
 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	if (inp->inp_flags & INP_RECVTTL) {
 		*mp = sbcreatecontrol((caddr_t)&ip->ip_ttl,
 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #ifdef notyet
 	/* XXX
 	 * Moving these out of udp_input() made them even more broken
 	 * than they already were.
 	 */
 	/* options were tossed already */
 	if (inp->inp_flags & INP_RECVOPTS) {
 		*mp = sbcreatecontrol((caddr_t)opts_deleted_above,
 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	/* ip_srcroute doesn't do what we want here, need to fix */
 	if (inp->inp_flags & INP_RECVRETOPTS) {
 		*mp = sbcreatecontrol((caddr_t)ip_srcroute(m),
 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #endif
 	if (inp->inp_flags & INP_RECVIF) {
 		struct ifnet *ifp;
 		struct sdlbuf {
 			struct sockaddr_dl sdl;
 			u_char	pad[32];
 		} sdlbuf;
 		struct sockaddr_dl *sdp;
 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
 
 		if ((ifp = m->m_pkthdr.rcvif) &&
 		    ifp->if_index && ifp->if_index <= V_if_index) {
 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
 			/*
 			 * Change our mind and don't try copy.
 			 */
 			if (sdp->sdl_family != AF_LINK ||
 			    sdp->sdl_len > sizeof(sdlbuf)) {
 				goto makedummy;
 			}
 			bcopy(sdp, sdl2, sdp->sdl_len);
 		} else {
 makedummy:
 			sdl2->sdl_len =
 			    offsetof(struct sockaddr_dl, sdl_data[0]);
 			sdl2->sdl_family = AF_LINK;
 			sdl2->sdl_index = 0;
 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
 		}
 		*mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len,
 		    IP_RECVIF, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	if (inp->inp_flags & INP_RECVTOS) {
 		*mp = sbcreatecontrol((caddr_t)&ip->ip_tos,
 		    sizeof(u_char), IP_RECVTOS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	if (inp->inp_flags2 & INP_RECVFLOWID) {
 		uint32_t flowid, flow_type;
 
 		flowid = m->m_pkthdr.flowid;
 		flow_type = M_HASHTYPE_GET(m);
 
 		/*
 		 * XXX should handle the failure of one or the
 		 * other - don't populate both?
 		 */
 		*mp = sbcreatecontrol((caddr_t) &flowid,
 		    sizeof(uint32_t), IP_FLOWID, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 		*mp = sbcreatecontrol((caddr_t) &flow_type,
 		    sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 #ifdef	RSS
 	if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
 		uint32_t flowid, flow_type;
 		uint32_t rss_bucketid;
 
 		flowid = m->m_pkthdr.flowid;
 		flow_type = M_HASHTYPE_GET(m);
 
 		if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
 			*mp = sbcreatecontrol((caddr_t) &rss_bucketid,
 			   sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 	}
 #endif
 }
 
 /*
  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
  * compiled.
  */
 VNET_DEFINE_STATIC(int, ip_rsvp_on);
 VNET_DEFINE(struct socket *, ip_rsvpd);
 
 #define	V_ip_rsvp_on		VNET(ip_rsvp_on)
 
 int
 ip_rsvp_init(struct socket *so)
 {
 
 	if (so->so_type != SOCK_RAW ||
 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
 		return EOPNOTSUPP;
 
 	if (V_ip_rsvpd != NULL)
 		return EADDRINUSE;
 
 	V_ip_rsvpd = so;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-increment
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (!V_ip_rsvp_on) {
 		V_ip_rsvp_on = 1;
 		V_rsvp_on++;
 	}
 
 	return 0;
 }
 
 int
 ip_rsvp_done(void)
 {
 
 	V_ip_rsvpd = NULL;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-decrement
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (V_ip_rsvp_on) {
 		V_ip_rsvp_on = 0;
 		V_rsvp_on--;
 	}
 	return 0;
 }
 
 int
 rsvp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m;
 
 	m = *mp;
 	*mp = NULL;
 
 	if (rsvp_input_p) { /* call the real one if loaded */
 		*mp = m;
 		rsvp_input_p(mp, offp, proto);
 		return (IPPROTO_DONE);
 	}
 
 	/* Can still get packets with rsvp_on = 0 if there is a local member
 	 * of the group to which the RSVP packet is addressed.  But in this
 	 * case we want to throw the packet away.
 	 */
 
 	if (!V_rsvp_on) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	if (V_ip_rsvpd != NULL) {
 		*mp = m;
 		rip_input(mp, offp, proto);
 		return (IPPROTO_DONE);
 	}
 	/* Drop the packet */
 	m_freem(m);
 	return (IPPROTO_DONE);
 }
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index 3041232b7223..ad41c9df0b8c 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -1,1632 +1,1627 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
 #include "opt_kern_tls.h"
 #include "opt_mbuf_stress_test.h"
 #include "opt_ratelimit.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 #include "opt_sctp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktls.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/rmlock.h>
 #include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/ucred.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_vlan_var.h>
 #include <net/if_llatbl.h>
 #include <net/ethernet.h>
 #include <net/netisr.h>
 #include <net/pfil.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_rss.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/ip_mroute.h>
 
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
 #if defined(SCTP) || defined(SCTP_SUPPORT)
 #include <netinet/sctp.h>
 #include <netinet/sctp_crc32.h>
 #endif
 
 #include <netipsec/ipsec_support.h>
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 #ifdef MBUF_STRESS_TEST
 static int mbuf_frag_size = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
 	&mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
 #endif
 
 static void	ip_mloopback(struct ifnet *, const struct mbuf *, int);
 
 extern int in_mcast_loop;
 extern	struct protosw inetsw[];
 
 static inline int
 ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, int flags,
     struct inpcb *inp, struct sockaddr_in *dst, int *fibnum, int *error)
 {
 	struct m_tag *fwd_tag = NULL;
 	struct mbuf *m;
 	struct in_addr odst;
 	struct ip *ip;
 	int pflags = PFIL_OUT;
 
 	if (flags & IP_FORWARDING)
 		pflags |= PFIL_FWD;
 
 	m = *mp;
 	ip = mtod(m, struct ip *);
 
 	/* Run through list of hooks for output packets. */
 	odst.s_addr = ip->ip_dst.s_addr;
 	switch (pfil_run_hooks(V_inet_pfil_head, mp, ifp, pflags, inp)) {
 	case PFIL_DROPPED:
 		*error = EACCES;
 		/* FALLTHROUGH */
 	case PFIL_CONSUMED:
 		return 1; /* Finished */
 	case PFIL_PASS:
 		*error = 0;
 	}
 	m = *mp;
 	ip = mtod(m, struct ip *);
 
 	/* See if destination IP address was changed by packet filter. */
 	if (odst.s_addr != ip->ip_dst.s_addr) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		/* If destination is now ourself drop to ip_input(). */
 		if (in_localip(ip->ip_dst)) {
 			m->m_flags |= M_FASTFWD_OURS;
 			if (m->m_pkthdr.rcvif == NULL)
 				m->m_pkthdr.rcvif = V_loif;
 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 				m->m_pkthdr.csum_flags |=
 					CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 			m->m_pkthdr.csum_flags |=
 				CSUM_IP_CHECKED | CSUM_IP_VALID;
 #if defined(SCTP) || defined(SCTP_SUPPORT)
 			if (m->m_pkthdr.csum_flags & CSUM_SCTP)
 				m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 			*error = netisr_queue(NETISR_IP, m);
 			return 1; /* Finished */
 		}
 
 		bzero(dst, sizeof(*dst));
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = ip->ip_dst;
 
 		return -1; /* Reloop */
 	}
 	/* See if fib was changed by packet filter. */
 	if ((*fibnum) != M_GETFIB(m)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		*fibnum = M_GETFIB(m);
 		return -1; /* Reloop for FIB change */
 	}
 
 	/* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
 	if (m->m_flags & M_FASTFWD_OURS) {
 		if (m->m_pkthdr.rcvif == NULL)
 			m->m_pkthdr.rcvif = V_loif;
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 			m->m_pkthdr.csum_flags |=
 				CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 #if defined(SCTP) || defined(SCTP_SUPPORT)
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP)
 			m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 		m->m_pkthdr.csum_flags |=
 			CSUM_IP_CHECKED | CSUM_IP_VALID;
 
 		*error = netisr_queue(NETISR_IP, m);
 		return 1; /* Finished */
 	}
 	/* Or forward to some other address? */
 	if ((m->m_flags & M_IP_NEXTHOP) &&
 	    ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) {
 		bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
 		m->m_flags |= M_SKIP_FIREWALL;
 		m->m_flags &= ~M_IP_NEXTHOP;
 		m_tag_delete(m, fwd_tag);
 
 		return -1; /* Reloop for CHANGE of dst */
 	}
 
 	return 0;
 }
 
 static int
 ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m,
-    const struct sockaddr_in *gw, struct route *ro, bool stamp_tag)
+    const struct sockaddr *gw, struct route *ro, bool stamp_tag)
 {
 #ifdef KERN_TLS
 	struct ktls_session *tls = NULL;
 #endif
 	struct m_snd_tag *mst;
 	int error;
 
 	MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
 	mst = NULL;
 
 #ifdef KERN_TLS
 	/*
 	 * If this is an unencrypted TLS record, save a reference to
 	 * the record.  This local reference is used to call
 	 * ktls_output_eagain after the mbuf has been freed (thus
 	 * dropping the mbuf's reference) in if_output.
 	 */
 	if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) {
 		tls = ktls_hold(m->m_next->m_epg_tls);
 		mst = tls->snd_tag;
 
 		/*
 		 * If a TLS session doesn't have a valid tag, it must
 		 * have had an earlier ifp mismatch, so drop this
 		 * packet.
 		 */
 		if (mst == NULL) {
 			error = EAGAIN;
 			goto done;
 		}
 		/*
 		 * Always stamp tags that include NIC ktls.
 		 */
 		stamp_tag = true;
 	}
 #endif
 #ifdef RATELIMIT
 	if (inp != NULL && mst == NULL) {
 		if ((inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) != 0 ||
 		    (inp->inp_snd_tag != NULL &&
 		    inp->inp_snd_tag->ifp != ifp))
 			in_pcboutput_txrtlmt(inp, ifp, m);
 
 		if (inp->inp_snd_tag != NULL)
 			mst = inp->inp_snd_tag;
 	}
 #endif
 	if (stamp_tag && mst != NULL) {
 		KASSERT(m->m_pkthdr.rcvif == NULL,
 		    ("trying to add a send tag to a forwarded packet"));
 		if (mst->ifp != ifp) {
 			error = EAGAIN;
 			goto done;
 		}
 
 		/* stamp send tag on mbuf */
 		m->m_pkthdr.snd_tag = m_snd_tag_ref(mst);
 		m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
 	}
 
-	error = (*ifp->if_output)(ifp, m, (const struct sockaddr *)gw, ro);
+	error = (*ifp->if_output)(ifp, m, gw, ro);
 
 done:
 	/* Check for route change invalidating send tags. */
 #ifdef KERN_TLS
 	if (tls != NULL) {
 		if (error == EAGAIN)
 			error = ktls_output_eagain(inp, tls);
 		ktls_free(tls);
 	}
 #endif
 #ifdef RATELIMIT
 	if (error == EAGAIN)
 		in_pcboutput_eagain(inp);
 #endif
 	return (error);
 }
 
 /* rte<>ro_flags translation */
 static inline void
 rt_update_ro_flags(struct route *ro, const struct nhop_object *nh)
 {
 	int nh_flags = nh->nh_flags;
 
 	ro->ro_flags &= ~ (RT_REJECT|RT_BLACKHOLE|RT_HAS_GW);
 
 	ro->ro_flags |= (nh_flags & NHF_REJECT) ? RT_REJECT : 0;
 	ro->ro_flags |= (nh_flags & NHF_BLACKHOLE) ? RT_BLACKHOLE : 0;
 	ro->ro_flags |= (nh_flags & NHF_GATEWAY) ? RT_HAS_GW : 0;
 }
 
 /*
  * IP output.  The packet in mbuf chain m contains a skeletal IP
  * header (with len, off, ttl, proto, tos, src, dst).
  * The mbuf chain containing the packet will be freed.
  * The mbuf opt, if present, will not be freed.
  * If route ro is present and has ro_rt initialized, route lookup would be
  * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
  * then result of route lookup is stored in ro->ro_rt.
  *
  * In the IP forwarding case, the packet will arrive with options already
  * inserted, so must have a NULL opt pointer.
  */
 int
 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
     struct ip_moptions *imo, struct inpcb *inp)
 {
 	MROUTER_RLOCK_TRACKER;
 	struct rm_priotracker in_ifa_tracker;
 	struct ip *ip;
 	struct ifnet *ifp = NULL;	/* keep compiler happy */
 	struct mbuf *m0;
 	int hlen = sizeof (struct ip);
 	int mtu = 0;
 	int error = 0;
 	int vlan_pcp = -1;
-	struct sockaddr_in *dst, sin;
-	const struct sockaddr_in *gw;
+	struct sockaddr_in *dst;
+	const struct sockaddr *gw;
 	struct in_ifaddr *ia = NULL;
 	struct in_addr src;
 	int isbroadcast;
 	uint16_t ip_len, ip_off;
+	struct route iproute;
 	uint32_t fibnum;
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	int no_route_but_check_spd = 0;
 #endif
 
 	M_ASSERTPKTHDR(m);
 	NET_EPOCH_ASSERT();
 
 	if (inp != NULL) {
 		INP_LOCK_ASSERT(inp);
 		M_SETFIB(m, inp->inp_inc.inc_fibnum);
 		if ((flags & IP_NODEFAULTFLOWID) == 0) {
 			m->m_pkthdr.flowid = inp->inp_flowid;
 			M_HASHTYPE_SET(m, inp->inp_flowtype);
 		}
 		if ((inp->inp_flags2 & INP_2PCP_SET) != 0)
 			vlan_pcp = (inp->inp_flags2 & INP_2PCP_MASK) >>
 			    INP_2PCP_SHIFT;
 #ifdef NUMA
 		m->m_pkthdr.numa_domain = inp->inp_numa_domain;
 #endif
 	}
 
 	if (opt) {
 		int len = 0;
 		m = ip_insertoptions(m, opt, &len);
 		if (len != 0)
 			hlen = len; /* ip->ip_hl is updated above */
 	}
 	ip = mtod(m, struct ip *);
 	ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
 	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = hlen >> 2;
 		ip_fillid(ip);
 	} else {
 		/* Header already set, fetch hlen from there */
 		hlen = ip->ip_hl << 2;
 	}
 	if ((flags & IP_FORWARDING) == 0)
 		IPSTAT_INC(ips_localout);
 
 	/*
 	 * dst/gw handling:
 	 *
 	 * gw is readonly but can point either to dst OR rt_gateway,
 	 * therefore we need restore gw if we're redoing lookup.
 	 */
 	fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
-	if (ro != NULL)
-		dst = (struct sockaddr_in *)&ro->ro_dst;
-	else
-		dst = &sin;
-	if (ro == NULL || ro->ro_nh == NULL) {
-		bzero(dst, sizeof(*dst));
+	if (ro == NULL) {
+		ro = &iproute;
+		bzero(ro, sizeof (*ro));
+	}
+	dst = (struct sockaddr_in *)&ro->ro_dst;
+	if (ro->ro_nh == NULL) {
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = ip->ip_dst;
 	}
-	gw = dst;
+	gw = (const struct sockaddr *)dst;
 again:
 	/*
 	 * Validate route against routing table additions;
 	 * a better/more specific route might have been added.
 	 */
-	if (inp != NULL && ro != NULL && ro->ro_nh != NULL)
+	if (inp != NULL && ro->ro_nh != NULL)
 		NH_VALIDATE(ro, &inp->inp_rt_cookie, fibnum);
 	/*
 	 * If there is a cached route,
 	 * check that it is to the same destination
 	 * and is still up.  If not, free it and try again.
 	 * The address family should also be checked in case of sharing the
 	 * cache with IPv6.
 	 * Also check whether routing cache needs invalidation.
 	 */
-	if (ro != NULL && ro->ro_nh != NULL &&
+	if (ro->ro_nh != NULL &&
 	    ((!NH_IS_VALID(ro->ro_nh)) || dst->sin_family != AF_INET ||
 	    dst->sin_addr.s_addr != ip->ip_dst.s_addr))
 		RO_INVALIDATE_CACHE(ro);
 	ia = NULL;
 	/*
 	 * If routing to interface only, short circuit routing lookup.
 	 * The use of an all-ones broadcast address implies this; an
 	 * interface is specified by the broadcast address of an interface,
 	 * or the destination address of a ptp interface.
 	 */
 	if (flags & IP_SENDONES) {
 		if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst),
 						      M_GETFIB(m)))) == NULL &&
 		    (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
 						    M_GETFIB(m)))) == NULL) {
 			IPSTAT_INC(ips_noroute);
 			error = ENETUNREACH;
 			goto bad;
 		}
 		ip->ip_dst.s_addr = INADDR_BROADCAST;
 		dst->sin_addr = ip->ip_dst;
 		ifp = ia->ia_ifp;
 		mtu = ifp->if_mtu;
 		ip->ip_ttl = 1;
 		isbroadcast = 1;
 		src = IA_SIN(ia)->sin_addr;
 	} else if (flags & IP_ROUTETOIF) {
 		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
 						    M_GETFIB(m)))) == NULL &&
 		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0,
 						M_GETFIB(m)))) == NULL) {
 			IPSTAT_INC(ips_noroute);
 			error = ENETUNREACH;
 			goto bad;
 		}
 		ifp = ia->ia_ifp;
 		mtu = ifp->if_mtu;
 		ip->ip_ttl = 1;
 		isbroadcast = ifp->if_flags & IFF_BROADCAST ?
 		    in_ifaddr_broadcast(dst->sin_addr, ia) : 0;
 		src = IA_SIN(ia)->sin_addr;
 	} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
 	    imo != NULL && imo->imo_multicast_ifp != NULL) {
 		/*
 		 * Bypass the normal routing lookup for multicast
 		 * packets if the interface is specified.
 		 */
 		ifp = imo->imo_multicast_ifp;
 		mtu = ifp->if_mtu;
 		IFP_TO_IA(ifp, ia, &in_ifa_tracker);
 		isbroadcast = 0;	/* fool gcc */
 		/* Interface may have no addresses. */
 		if (ia != NULL)
 			src = IA_SIN(ia)->sin_addr;
 		else
 			src.s_addr = INADDR_ANY;
-	} else if (ro != NULL) {
+	} else if (ro != &iproute) {
 		if (ro->ro_nh == NULL) {
 			/*
 			 * We want to do any cloning requested by the link
 			 * layer, as this is probably required in all cases
 			 * for correct operation (as it is for ARP).
 			 */
 			uint32_t flowid;
 			flowid = m->m_pkthdr.flowid;
 			ro->ro_nh = fib4_lookup(fibnum, dst->sin_addr, 0,
 			    NHR_REF, flowid);
 
 			if (ro->ro_nh == NULL || (!NH_IS_VALID(ro->ro_nh))) {
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 				/*
 				 * There is no route for this packet, but it is
 				 * possible that a matching SPD entry exists.
 				 */
 				no_route_but_check_spd = 1;
 				goto sendit;
 #endif
 				IPSTAT_INC(ips_noroute);
 				error = EHOSTUNREACH;
 				goto bad;
 			}
 		}
 		struct nhop_object *nh = ro->ro_nh;
 
 		ia = ifatoia(nh->nh_ifa);
 		ifp = nh->nh_ifp;
 		counter_u64_add(nh->nh_pksent, 1);
 		rt_update_ro_flags(ro, nh);
 		if (nh->nh_flags & NHF_GATEWAY)
-			gw = &nh->gw4_sa;
+			gw = &nh->gw_sa;
 		if (nh->nh_flags & NHF_HOST)
 			isbroadcast = (nh->nh_flags & NHF_BROADCAST);
-		else if (ifp->if_flags & IFF_BROADCAST)
-			isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia);
+		else if ((ifp->if_flags & IFF_BROADCAST) && (gw->sa_family == AF_INET))
+			isbroadcast = in_ifaddr_broadcast(((const struct sockaddr_in *)gw)->sin_addr, ia);
 		else
 			isbroadcast = 0;
 		mtu = nh->nh_mtu;
 		src = IA_SIN(ia)->sin_addr;
 	} else {
 		struct nhop_object *nh;
 
 		nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_NONE,
 		    m->m_pkthdr.flowid);
 		if (nh == NULL) {
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 			/*
 			 * There is no route for this packet, but it is
 			 * possible that a matching SPD entry exists.
 			 */
 			no_route_but_check_spd = 1;
 			goto sendit;
 #endif
 			IPSTAT_INC(ips_noroute);
 			error = EHOSTUNREACH;
 			goto bad;
 		}
 		ifp = nh->nh_ifp;
 		mtu = nh->nh_mtu;
-		/*
-		 * We are rewriting here dst to be gw actually, contradicting
-		 * comment at the beginning of the function. However, in this
-		 * case we are always dealing with on stack dst.
-		 * In case if pfil(9) sends us back to beginning of the
-		 * function, the dst would be rewritten by ip_output_pfil().
-		 */
-		MPASS(dst == &sin);
+		rt_update_ro_flags(ro, nh);
 		if (nh->nh_flags & NHF_GATEWAY)
-			dst->sin_addr = nh->gw4_sa.sin_addr;
+			gw = &nh->gw_sa;
 		ia = ifatoia(nh->nh_ifa);
 		src = IA_SIN(ia)->sin_addr;
 		isbroadcast = (((nh->nh_flags & (NHF_HOST | NHF_BROADCAST)) ==
 		    (NHF_HOST | NHF_BROADCAST)) ||
 		    ((ifp->if_flags & IFF_BROADCAST) &&
-		    in_ifaddr_broadcast(dst->sin_addr, ia)));
+		    (gw->sa_family == AF_INET) &&
+		    in_ifaddr_broadcast(((const struct sockaddr_in *)gw)->sin_addr, ia)));
 	}
 
 	/* Catch a possible divide by zero later. */
 	KASSERT(mtu > 0, ("%s: mtu %d <= 0, ro=%p (nh_flags=0x%08x) ifp=%p",
 	    __func__, mtu, ro,
 	    (ro != NULL && ro->ro_nh != NULL) ? ro->ro_nh->nh_flags : 0, ifp));
 
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		m->m_flags |= M_MCAST;
 		/*
 		 * IP destination address is multicast.  Make sure "gw"
 		 * still points to the address in "ro".  (It may have been
 		 * changed to point to a gateway address, above.)
 		 */
-		gw = dst;
+		gw = (const struct sockaddr *)dst;
 		/*
 		 * See if the caller provided any multicast options
 		 */
 		if (imo != NULL) {
 			ip->ip_ttl = imo->imo_multicast_ttl;
 			if (imo->imo_multicast_vif != -1)
 				ip->ip_src.s_addr =
 				    ip_mcast_src ?
 				    ip_mcast_src(imo->imo_multicast_vif) :
 				    INADDR_ANY;
 		} else
 			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
 		/*
 		 * Confirm that the outgoing interface supports multicast.
 		 */
 		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
 			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 				IPSTAT_INC(ips_noroute);
 				error = ENETUNREACH;
 				goto bad;
 			}
 		}
 		/*
 		 * If source address not specified yet, use address
 		 * of outgoing interface.
 		 */
 		if (ip->ip_src.s_addr == INADDR_ANY)
 			ip->ip_src = src;
 
 		if ((imo == NULL && in_mcast_loop) ||
 		    (imo && imo->imo_multicast_loop)) {
 			/*
 			 * Loop back multicast datagram if not expressly
 			 * forbidden to do so, even if we are not a member
 			 * of the group; ip_input() will filter it later,
 			 * thus deferring a hash lookup and mutex acquisition
 			 * at the expense of a cheap copy using m_copym().
 			 */
 			ip_mloopback(ifp, m, hlen);
 		} else {
 			/*
 			 * If we are acting as a multicast router, perform
 			 * multicast forwarding as if the packet had just
 			 * arrived on the interface to which we are about
 			 * to send.  The multicast forwarding function
 			 * recursively calls this function, using the
 			 * IP_FORWARDING flag to prevent infinite recursion.
 			 *
 			 * Multicasts that are looped back by ip_mloopback(),
 			 * above, will be forwarded by the ip_input() routine,
 			 * if necessary.
 			 */
 			MROUTER_RLOCK();
 			if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) {
 				/*
 				 * If rsvp daemon is not running, do not
 				 * set ip_moptions. This ensures that the packet
 				 * is multicast and not just sent down one link
 				 * as prescribed by rsvpd.
 				 */
 				if (!V_rsvp_on)
 					imo = NULL;
 				if (ip_mforward &&
 				    ip_mforward(ip, ifp, m, imo) != 0) {
 					MROUTER_RUNLOCK();
 					m_freem(m);
 					goto done;
 				}
 			}
 			MROUTER_RUNLOCK();
 		}
 
 		/*
 		 * Multicasts with a time-to-live of zero may be looped-
 		 * back, above, but must not be transmitted on a network.
 		 * Also, multicasts addressed to the loopback interface
 		 * are not sent -- the above call to ip_mloopback() will
 		 * loop back a copy. ip_input() will drop the copy if
 		 * this host does not belong to the destination group on
 		 * the loopback interface.
 		 */
 		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
 			m_freem(m);
 			goto done;
 		}
 
 		goto sendit;
 	}
 
 	/*
 	 * If the source address is not specified yet, use the address
 	 * of the outoing interface.
 	 */
 	if (ip->ip_src.s_addr == INADDR_ANY)
 		ip->ip_src = src;
 
 	/*
 	 * Look for broadcast address and
 	 * verify user is allowed to send
 	 * such a packet.
 	 */
 	if (isbroadcast) {
 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 		if ((flags & IP_ALLOWBROADCAST) == 0) {
 			error = EACCES;
 			goto bad;
 		}
 		/* don't allow broadcast messages to be fragmented */
 		if (ip_len > mtu) {
 			error = EMSGSIZE;
 			goto bad;
 		}
 		m->m_flags |= M_BCAST;
 	} else {
 		m->m_flags &= ~M_BCAST;
 	}
 
 sendit:
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	if (IPSEC_ENABLED(ipv4)) {
 		if ((error = IPSEC_OUTPUT(ipv4, m, inp)) != 0) {
 			if (error == EINPROGRESS)
 				error = 0;
 			goto done;
 		}
 	}
 	/*
 	 * Check if there was a route for this packet; return error if not.
 	 */
 	if (no_route_but_check_spd) {
 		IPSTAT_INC(ips_noroute);
 		error = EHOSTUNREACH;
 		goto bad;
 	}
 	/* Update variables that are affected by ipsec4_output(). */
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
 #endif /* IPSEC */
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (PFIL_HOOKED_OUT(V_inet_pfil_head)) {
 		switch (ip_output_pfil(&m, ifp, flags, inp, dst, &fibnum,
 		    &error)) {
 		case 1: /* Finished */
 			goto done;
 
 		case 0: /* Continue normally */
 			ip = mtod(m, struct ip *);
 			break;
 
 		case -1: /* Need to try again */
 			/* Reset everything for a new round */
 			if (ro != NULL) {
 				RO_NHFREE(ro);
 				ro->ro_prepend = NULL;
 			}
-			gw = dst;
+			gw = (const struct sockaddr *)dst;
 			ip = mtod(m, struct ip *);
 			goto again;
 		}
 	}
 
 	if (vlan_pcp > -1)
 		EVL_APPLY_PRI(m, vlan_pcp);
 
 	/* IN_LOOPBACK must not appear on the wire - RFC1122. */
 	if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) ||
 	    IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) {
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
 			IPSTAT_INC(ips_badaddr);
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 	}
 
 	m->m_pkthdr.csum_flags |= CSUM_IP;
 	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
 		m = mb_unmapped_to_ext(m);
 		if (m == NULL) {
 			IPSTAT_INC(ips_odropped);
 			error = ENOBUFS;
 			goto bad;
 		}
 		in_delayed_cksum(m);
 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	} else if ((ifp->if_capenable & IFCAP_MEXTPG) == 0) {
 		m = mb_unmapped_to_ext(m);
 		if (m == NULL) {
 			IPSTAT_INC(ips_odropped);
 			error = ENOBUFS;
 			goto bad;
 		}
 	}
 #if defined(SCTP) || defined(SCTP_SUPPORT)
 	if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
 		m = mb_unmapped_to_ext(m);
 		if (m == NULL) {
 			IPSTAT_INC(ips_odropped);
 			error = ENOBUFS;
 			goto bad;
 		}
 		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
 		m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
 #endif
 
 	/*
 	 * If small enough for interface, or the interface will take
 	 * care of the fragmentation for us, we can just send directly.
 	 * Note that if_vxlan could have requested TSO even though the outer
 	 * frame is UDP.  It is correct to not fragment such datagrams and
 	 * instead just pass them on to the driver.
 	 */
 	if (ip_len <= mtu ||
 	    (m->m_pkthdr.csum_flags & ifp->if_hwassist &
 	    (CSUM_TSO | CSUM_INNER_TSO)) != 0) {
 		ip->ip_sum = 0;
 		if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
 			ip->ip_sum = in_cksum(m, hlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_IP;
 		}
 
 		/*
 		 * Record statistics for this interface address.
 		 * With CSUM_TSO the byte/packet count will be slightly
 		 * incorrect because we count the IP+TCP headers only
 		 * once instead of for every generated packet.
 		 */
 		if (!(flags & IP_FORWARDING) && ia) {
 			if (m->m_pkthdr.csum_flags &
 			    (CSUM_TSO | CSUM_INNER_TSO))
 				counter_u64_add(ia->ia_ifa.ifa_opackets,
 				    m->m_pkthdr.len / m->m_pkthdr.tso_segsz);
 			else
 				counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
 
 			counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len);
 		}
 #ifdef MBUF_STRESS_TEST
 		if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
 			m = m_fragment(m, M_NOWAIT, mbuf_frag_size);
 #endif
 		/*
 		 * Reset layer specific mbuf flags
 		 * to avoid confusing lower layers.
 		 */
 		m_clrprotoflags(m);
 		IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
 		error = ip_output_send(inp, ifp, m, gw, ro,
 		    (flags & IP_NO_SND_TAG_RL) ? false : true);
 		goto done;
 	}
 
 	/* Balk when DF bit is set or the interface didn't support TSO. */
 	if ((ip_off & IP_DF) ||
 	    (m->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_INNER_TSO))) {
 		error = EMSGSIZE;
 		IPSTAT_INC(ips_cantfrag);
 		goto bad;
 	}
 
 	/*
 	 * Too large for interface; fragment if possible. If successful,
 	 * on return, m will point to a list of packets to be sent.
 	 */
 	error = ip_fragment(ip, &m, mtu, ifp->if_hwassist);
 	if (error)
 		goto bad;
 	for (; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = 0;
 		if (error == 0) {
 			/* Record statistics for this interface address. */
 			if (ia != NULL) {
 				counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_obytes,
 				    m->m_pkthdr.len);
 			}
 			/*
 			 * Reset layer specific mbuf flags
 			 * to avoid confusing upper layers.
 			 */
 			m_clrprotoflags(m);
 
 			IP_PROBE(send, NULL, NULL, mtod(m, struct ip *), ifp,
 			    mtod(m, struct ip *), NULL);
 			error = ip_output_send(inp, ifp, m, gw, ro, true);
 		} else
 			m_freem(m);
 	}
 
 	if (error == 0)
 		IPSTAT_INC(ips_fragmented);
 
 done:
 	return (error);
  bad:
 	m_freem(m);
 	goto done;
 }
 
 /*
  * Create a chain of fragments which fit the given mtu. m_frag points to the
  * mbuf to be fragmented; on return it points to the chain with the fragments.
  * Return 0 if no error. If error, m_frag may contain a partially built
  * chain of fragments that should be freed by the caller.
  *
  * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
  */
 int
 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
     u_long if_hwassist_flags)
 {
 	int error = 0;
 	int hlen = ip->ip_hl << 2;
 	int len = (mtu - hlen) & ~7;	/* size of payload in each fragment */
 	int off;
 	struct mbuf *m0 = *m_frag;	/* the original packet		*/
 	int firstlen;
 	struct mbuf **mnext;
 	int nfrags;
 	uint16_t ip_len, ip_off;
 
 	ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
 	if (ip_off & IP_DF) {	/* Fragmentation not allowed */
 		IPSTAT_INC(ips_cantfrag);
 		return EMSGSIZE;
 	}
 
 	/*
 	 * Must be able to put at least 8 bytes per fragment.
 	 */
 	if (len < 8)
 		return EMSGSIZE;
 
 	/*
 	 * If the interface will not calculate checksums on
 	 * fragmented packets, then do it here.
 	 */
 	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 		m0 = mb_unmapped_to_ext(m0);
 		if (m0 == NULL) {
 			error = ENOBUFS;
 			IPSTAT_INC(ips_odropped);
 			goto done;
 		}
 		in_delayed_cksum(m0);
 		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 #if defined(SCTP) || defined(SCTP_SUPPORT)
 	if (m0->m_pkthdr.csum_flags & CSUM_SCTP) {
 		m0 = mb_unmapped_to_ext(m0);
 		if (m0 == NULL) {
 			error = ENOBUFS;
 			IPSTAT_INC(ips_odropped);
 			goto done;
 		}
 		sctp_delayed_cksum(m0, hlen);
 		m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
 #endif
 	if (len > PAGE_SIZE) {
 		/*
 		 * Fragment large datagrams such that each segment
 		 * contains a multiple of PAGE_SIZE amount of data,
 		 * plus headers. This enables a receiver to perform
 		 * page-flipping zero-copy optimizations.
 		 *
 		 * XXX When does this help given that sender and receiver
 		 * could have different page sizes, and also mtu could
 		 * be less than the receiver's page size ?
 		 */
 		int newlen;
 
 		off = MIN(mtu, m0->m_pkthdr.len);
 
 		/*
 		 * firstlen (off - hlen) must be aligned on an
 		 * 8-byte boundary
 		 */
 		if (off < hlen)
 			goto smart_frag_failure;
 		off = ((off - hlen) & ~7) + hlen;
 		newlen = (~PAGE_MASK) & mtu;
 		if ((newlen + sizeof (struct ip)) > mtu) {
 			/* we failed, go back the default */
 smart_frag_failure:
 			newlen = len;
 			off = hlen + len;
 		}
 		len = newlen;
 
 	} else {
 		off = hlen + len;
 	}
 
 	firstlen = off - hlen;
 	mnext = &m0->m_nextpkt;		/* pointer to next packet */
 
 	/*
 	 * Loop through length of segment after first fragment,
 	 * make new header and copy data of each part and link onto chain.
 	 * Here, m0 is the original packet, m is the fragment being created.
 	 * The fragments are linked off the m_nextpkt of the original
 	 * packet, which after processing serves as the first fragment.
 	 */
 	for (nfrags = 1; off < ip_len; off += len, nfrags++) {
 		struct ip *mhip;	/* ip header on the fragment */
 		struct mbuf *m;
 		int mhlen = sizeof (struct ip);
 
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			error = ENOBUFS;
 			IPSTAT_INC(ips_odropped);
 			goto done;
 		}
 		/*
 		 * Make sure the complete packet header gets copied
 		 * from the originating mbuf to the newly created
 		 * mbuf. This also ensures that existing firewall
 		 * classification(s), VLAN tags and so on get copied
 		 * to the resulting fragmented packet(s):
 		 */
 		if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) {
 			m_free(m);
 			error = ENOBUFS;
 			IPSTAT_INC(ips_odropped);
 			goto done;
 		}
 		/*
 		 * In the first mbuf, leave room for the link header, then
 		 * copy the original IP header including options. The payload
 		 * goes into an additional mbuf chain returned by m_copym().
 		 */
 		m->m_data += max_linkhdr;
 		mhip = mtod(m, struct ip *);
 		*mhip = *ip;
 		if (hlen > sizeof (struct ip)) {
 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
 			mhip->ip_v = IPVERSION;
 			mhip->ip_hl = mhlen >> 2;
 		}
 		m->m_len = mhlen;
 		/* XXX do we need to add ip_off below ? */
 		mhip->ip_off = ((off - hlen) >> 3) + ip_off;
 		if (off + len >= ip_len)
 			len = ip_len - off;
 		else
 			mhip->ip_off |= IP_MF;
 		mhip->ip_len = htons((u_short)(len + mhlen));
 		m->m_next = m_copym(m0, off, len, M_NOWAIT);
 		if (m->m_next == NULL) {	/* copy failed */
 			m_free(m);
 			error = ENOBUFS;	/* ??? */
 			IPSTAT_INC(ips_odropped);
 			goto done;
 		}
 		m->m_pkthdr.len = mhlen + len;
 #ifdef MAC
 		mac_netinet_fragment(m0, m);
 #endif
 		mhip->ip_off = htons(mhip->ip_off);
 		mhip->ip_sum = 0;
 		if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
 			mhip->ip_sum = in_cksum(m, mhlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_IP;
 		}
 		*mnext = m;
 		mnext = &m->m_nextpkt;
 	}
 	IPSTAT_ADD(ips_ofragments, nfrags);
 
 	/*
 	 * Update first fragment by trimming what's been copied out
 	 * and updating header.
 	 */
 	m_adj(m0, hlen + firstlen - ip_len);
 	m0->m_pkthdr.len = hlen + firstlen;
 	ip->ip_len = htons((u_short)m0->m_pkthdr.len);
 	ip->ip_off = htons(ip_off | IP_MF);
 	ip->ip_sum = 0;
 	if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
 		ip->ip_sum = in_cksum(m0, hlen);
 		m0->m_pkthdr.csum_flags &= ~CSUM_IP;
 	}
 
 done:
 	*m_frag = m0;
 	return error;
 }
 
 void
 in_delayed_cksum(struct mbuf *m)
 {
 	struct ip *ip;
 	struct udphdr *uh;
 	uint16_t cklen, csum, offset;
 
 	ip = mtod(m, struct ip *);
 	offset = ip->ip_hl << 2 ;
 
 	if (m->m_pkthdr.csum_flags & CSUM_UDP) {
 		/* if udp header is not in the first mbuf copy udplen */
 		if (offset + sizeof(struct udphdr) > m->m_len) {
 			m_copydata(m, offset + offsetof(struct udphdr,
 			    uh_ulen), sizeof(cklen), (caddr_t)&cklen);
 			cklen = ntohs(cklen);
 		} else {
 			uh = (struct udphdr *)mtodo(m, offset);
 			cklen = ntohs(uh->uh_ulen);
 		}
 		csum = in_cksum_skip(m, cklen + offset, offset);
 		if (csum == 0)
 			csum = 0xffff;
 	} else {
 		cklen = ntohs(ip->ip_len);
 		csum = in_cksum_skip(m, cklen, offset);
 	}
 	offset += m->m_pkthdr.csum_data;	/* checksum offset */
 
 	if (offset + sizeof(csum) > m->m_len)
 		m_copyback(m, offset, sizeof(csum), (caddr_t)&csum);
 	else
 		*(u_short *)mtodo(m, offset) = csum;
 }
 
 /*
  * IP socket option processing.
  */
 int
 ip_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct inpcb *inp = sotoinpcb(so);
 	int	error, optval;
 #ifdef	RSS
 	uint32_t rss_bucket;
 	int retval;
 #endif
 
 	error = optval = 0;
 	if (sopt->sopt_level != IPPROTO_IP) {
 		error = EINVAL;
 
 		if (sopt->sopt_level == SOL_SOCKET &&
 		    sopt->sopt_dir == SOPT_SET) {
 			switch (sopt->sopt_name) {
 			case SO_REUSEADDR:
 				INP_WLOCK(inp);
 				if ((so->so_options & SO_REUSEADDR) != 0)
 					inp->inp_flags2 |= INP_REUSEADDR;
 				else
 					inp->inp_flags2 &= ~INP_REUSEADDR;
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
 			case SO_REUSEPORT:
 				INP_WLOCK(inp);
 				if ((so->so_options & SO_REUSEPORT) != 0)
 					inp->inp_flags2 |= INP_REUSEPORT;
 				else
 					inp->inp_flags2 &= ~INP_REUSEPORT;
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
 			case SO_REUSEPORT_LB:
 				INP_WLOCK(inp);
 				if ((so->so_options & SO_REUSEPORT_LB) != 0)
 					inp->inp_flags2 |= INP_REUSEPORT_LB;
 				else
 					inp->inp_flags2 &= ~INP_REUSEPORT_LB;
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
 			case SO_SETFIB:
 				INP_WLOCK(inp);
 				inp->inp_inc.inc_fibnum = so->so_fibnum;
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
 			case SO_MAX_PACING_RATE:
 #ifdef RATELIMIT
 				INP_WLOCK(inp);
 				inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
 				INP_WUNLOCK(inp);
 				error = 0;
 #else
 				error = EOPNOTSUPP;
 #endif
 				break;
 			default:
 				break;
 			}
 		}
 		return (error);
 	}
 
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case IP_OPTIONS:
 #ifdef notyet
 		case IP_RETOPTS:
 #endif
 		{
 			struct mbuf *m;
 			if (sopt->sopt_valsize > MLEN) {
 				error = EMSGSIZE;
 				break;
 			}
 			m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 			if (m == NULL) {
 				error = ENOBUFS;
 				break;
 			}
 			m->m_len = sopt->sopt_valsize;
 			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
 					    m->m_len);
 			if (error) {
 				m_free(m);
 				break;
 			}
 			INP_WLOCK(inp);
 			error = ip_pcbopts(inp, sopt->sopt_name, m);
 			INP_WUNLOCK(inp);
 			return (error);
 		}
 
 		case IP_BINDANY:
 			if (sopt->sopt_td != NULL) {
 				error = priv_check(sopt->sopt_td,
 				    PRIV_NETINET_BINDANY);
 				if (error)
 					break;
 			}
 			/* FALLTHROUGH */
 		case IP_BINDMULTI:
 #ifdef	RSS
 		case IP_RSS_LISTEN_BUCKET:
 #endif
 		case IP_TOS:
 		case IP_TTL:
 		case IP_MINTTL:
 		case IP_RECVOPTS:
 		case IP_RECVRETOPTS:
 		case IP_ORIGDSTADDR:
 		case IP_RECVDSTADDR:
 		case IP_RECVTTL:
 		case IP_RECVIF:
 		case IP_ONESBCAST:
 		case IP_DONTFRAG:
 		case IP_RECVTOS:
 		case IP_RECVFLOWID:
 #ifdef	RSS
 		case IP_RECVRSSBUCKETID:
 #endif
 		case IP_VLAN_PCP:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 
 			switch (sopt->sopt_name) {
 			case IP_TOS:
 				inp->inp_ip_tos = optval;
 				break;
 
 			case IP_TTL:
 				inp->inp_ip_ttl = optval;
 				break;
 
 			case IP_MINTTL:
 				if (optval >= 0 && optval <= MAXTTL)
 					inp->inp_ip_minttl = optval;
 				else
 					error = EINVAL;
 				break;
 
 #define	OPTSET(bit) do {						\
 	INP_WLOCK(inp);							\
 	if (optval)							\
 		inp->inp_flags |= bit;					\
 	else								\
 		inp->inp_flags &= ~bit;					\
 	INP_WUNLOCK(inp);						\
 } while (0)
 
 #define	OPTSET2(bit, val) do {						\
 	INP_WLOCK(inp);							\
 	if (val)							\
 		inp->inp_flags2 |= bit;					\
 	else								\
 		inp->inp_flags2 &= ~bit;				\
 	INP_WUNLOCK(inp);						\
 } while (0)
 
 			case IP_RECVOPTS:
 				OPTSET(INP_RECVOPTS);
 				break;
 
 			case IP_RECVRETOPTS:
 				OPTSET(INP_RECVRETOPTS);
 				break;
 
 			case IP_RECVDSTADDR:
 				OPTSET(INP_RECVDSTADDR);
 				break;
 
 			case IP_ORIGDSTADDR:
 				OPTSET2(INP_ORIGDSTADDR, optval);
 				break;
 
 			case IP_RECVTTL:
 				OPTSET(INP_RECVTTL);
 				break;
 
 			case IP_RECVIF:
 				OPTSET(INP_RECVIF);
 				break;
 
 			case IP_ONESBCAST:
 				OPTSET(INP_ONESBCAST);
 				break;
 			case IP_DONTFRAG:
 				OPTSET(INP_DONTFRAG);
 				break;
 			case IP_BINDANY:
 				OPTSET(INP_BINDANY);
 				break;
 			case IP_RECVTOS:
 				OPTSET(INP_RECVTOS);
 				break;
 			case IP_BINDMULTI:
 				OPTSET2(INP_BINDMULTI, optval);
 				break;
 			case IP_RECVFLOWID:
 				OPTSET2(INP_RECVFLOWID, optval);
 				break;
 #ifdef	RSS
 			case IP_RSS_LISTEN_BUCKET:
 				if ((optval >= 0) &&
 				    (optval < rss_getnumbuckets())) {
 					inp->inp_rss_listen_bucket = optval;
 					OPTSET2(INP_RSS_BUCKET_SET, 1);
 				} else {
 					error = EINVAL;
 				}
 				break;
 			case IP_RECVRSSBUCKETID:
 				OPTSET2(INP_RECVRSSBUCKETID, optval);
 				break;
 #endif
 			case IP_VLAN_PCP:
 				if ((optval >= -1) && (optval <=
 				    (INP_2PCP_MASK >> INP_2PCP_SHIFT))) {
 					if (optval == -1) {
 						INP_WLOCK(inp);
 						inp->inp_flags2 &=
 						    ~(INP_2PCP_SET |
 						      INP_2PCP_MASK);
 						INP_WUNLOCK(inp);
 					} else {
 						INP_WLOCK(inp);
 						inp->inp_flags2 |=
 						    INP_2PCP_SET;
 						inp->inp_flags2 &=
 						    ~INP_2PCP_MASK;
 						inp->inp_flags2 |=
 						    optval << INP_2PCP_SHIFT;
 						INP_WUNLOCK(inp);
 					}
 				} else
 					error = EINVAL;
 				break;
 			}
 			break;
 #undef OPTSET
 #undef OPTSET2
 
 		/*
 		 * Multicast socket options are processed by the in_mcast
 		 * module.
 		 */
 		case IP_MULTICAST_IF:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_ADD_MEMBERSHIP:
 		case IP_DROP_MEMBERSHIP:
 		case IP_ADD_SOURCE_MEMBERSHIP:
 		case IP_DROP_SOURCE_MEMBERSHIP:
 		case IP_BLOCK_SOURCE:
 		case IP_UNBLOCK_SOURCE:
 		case IP_MSFILTER:
 		case MCAST_JOIN_GROUP:
 		case MCAST_LEAVE_GROUP:
 		case MCAST_JOIN_SOURCE_GROUP:
 		case MCAST_LEAVE_SOURCE_GROUP:
 		case MCAST_BLOCK_SOURCE:
 		case MCAST_UNBLOCK_SOURCE:
 			error = inp_setmoptions(inp, sopt);
 			break;
 
 		case IP_PORTRANGE:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 
 			INP_WLOCK(inp);
 			switch (optval) {
 			case IP_PORTRANGE_DEFAULT:
 				inp->inp_flags &= ~(INP_LOWPORT);
 				inp->inp_flags &= ~(INP_HIGHPORT);
 				break;
 
 			case IP_PORTRANGE_HIGH:
 				inp->inp_flags &= ~(INP_LOWPORT);
 				inp->inp_flags |= INP_HIGHPORT;
 				break;
 
 			case IP_PORTRANGE_LOW:
 				inp->inp_flags &= ~(INP_HIGHPORT);
 				inp->inp_flags |= INP_LOWPORT;
 				break;
 
 			default:
 				error = EINVAL;
 				break;
 			}
 			INP_WUNLOCK(inp);
 			break;
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 		case IP_IPSEC_POLICY:
 			if (IPSEC_ENABLED(ipv4)) {
 				error = IPSEC_PCBCTL(ipv4, inp, sopt);
 				break;
 			}
 			/* FALLTHROUGH */
 #endif /* IPSEC */
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case IP_OPTIONS:
 		case IP_RETOPTS:
 			INP_RLOCK(inp);
 			if (inp->inp_options) {
 				struct mbuf *options;
 
 				options = m_copym(inp->inp_options, 0,
 				    M_COPYALL, M_NOWAIT);
 				INP_RUNLOCK(inp);
 				if (options != NULL) {
 					error = sooptcopyout(sopt,
 							     mtod(options, char *),
 							     options->m_len);
 					m_freem(options);
 				} else
 					error = ENOMEM;
 			} else {
 				INP_RUNLOCK(inp);
 				sopt->sopt_valsize = 0;
 			}
 			break;
 
 		case IP_TOS:
 		case IP_TTL:
 		case IP_MINTTL:
 		case IP_RECVOPTS:
 		case IP_RECVRETOPTS:
 		case IP_ORIGDSTADDR:
 		case IP_RECVDSTADDR:
 		case IP_RECVTTL:
 		case IP_RECVIF:
 		case IP_PORTRANGE:
 		case IP_ONESBCAST:
 		case IP_DONTFRAG:
 		case IP_BINDANY:
 		case IP_RECVTOS:
 		case IP_BINDMULTI:
 		case IP_FLOWID:
 		case IP_FLOWTYPE:
 		case IP_RECVFLOWID:
 #ifdef	RSS
 		case IP_RSSBUCKETID:
 		case IP_RECVRSSBUCKETID:
 #endif
 		case IP_VLAN_PCP:
 			switch (sopt->sopt_name) {
 			case IP_TOS:
 				optval = inp->inp_ip_tos;
 				break;
 
 			case IP_TTL:
 				optval = inp->inp_ip_ttl;
 				break;
 
 			case IP_MINTTL:
 				optval = inp->inp_ip_minttl;
 				break;
 
 #define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
 #define	OPTBIT2(bit)	(inp->inp_flags2 & bit ? 1 : 0)
 
 			case IP_RECVOPTS:
 				optval = OPTBIT(INP_RECVOPTS);
 				break;
 
 			case IP_RECVRETOPTS:
 				optval = OPTBIT(INP_RECVRETOPTS);
 				break;
 
 			case IP_RECVDSTADDR:
 				optval = OPTBIT(INP_RECVDSTADDR);
 				break;
 
 			case IP_ORIGDSTADDR:
 				optval = OPTBIT2(INP_ORIGDSTADDR);
 				break;
 
 			case IP_RECVTTL:
 				optval = OPTBIT(INP_RECVTTL);
 				break;
 
 			case IP_RECVIF:
 				optval = OPTBIT(INP_RECVIF);
 				break;
 
 			case IP_PORTRANGE:
 				if (inp->inp_flags & INP_HIGHPORT)
 					optval = IP_PORTRANGE_HIGH;
 				else if (inp->inp_flags & INP_LOWPORT)
 					optval = IP_PORTRANGE_LOW;
 				else
 					optval = 0;
 				break;
 
 			case IP_ONESBCAST:
 				optval = OPTBIT(INP_ONESBCAST);
 				break;
 			case IP_DONTFRAG:
 				optval = OPTBIT(INP_DONTFRAG);
 				break;
 			case IP_BINDANY:
 				optval = OPTBIT(INP_BINDANY);
 				break;
 			case IP_RECVTOS:
 				optval = OPTBIT(INP_RECVTOS);
 				break;
 			case IP_FLOWID:
 				optval = inp->inp_flowid;
 				break;
 			case IP_FLOWTYPE:
 				optval = inp->inp_flowtype;
 				break;
 			case IP_RECVFLOWID:
 				optval = OPTBIT2(INP_RECVFLOWID);
 				break;
 #ifdef	RSS
 			case IP_RSSBUCKETID:
 				retval = rss_hash2bucket(inp->inp_flowid,
 				    inp->inp_flowtype,
 				    &rss_bucket);
 				if (retval == 0)
 					optval = rss_bucket;
 				else
 					error = EINVAL;
 				break;
 			case IP_RECVRSSBUCKETID:
 				optval = OPTBIT2(INP_RECVRSSBUCKETID);
 				break;
 #endif
 			case IP_BINDMULTI:
 				optval = OPTBIT2(INP_BINDMULTI);
 				break;
 			case IP_VLAN_PCP:
 				if (OPTBIT2(INP_2PCP_SET)) {
 					optval = (inp->inp_flags2 &
 					    INP_2PCP_MASK) >> INP_2PCP_SHIFT;
 				} else {
 					optval = -1;
 				}
 				break;
 			}
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		/*
 		 * Multicast socket options are processed by the in_mcast
 		 * module.
 		 */
 		case IP_MULTICAST_IF:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_MSFILTER:
 			error = inp_getmoptions(inp, sopt);
 			break;
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 		case IP_IPSEC_POLICY:
 			if (IPSEC_ENABLED(ipv4)) {
 				error = IPSEC_PCBCTL(ipv4, inp, sopt);
 				break;
 			}
 			/* FALLTHROUGH */
 #endif /* IPSEC */
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 	return (error);
 }
 
 /*
  * Routine called from ip_output() to loop back a copy of an IP multicast
  * packet to the input queue of a specified interface.  Note that this
  * calls the output routine of the loopback "driver", but with an interface
  * pointer that might NOT be a loopback interface -- evil, but easier than
  * replicating that code here.
  */
 static void
 ip_mloopback(struct ifnet *ifp, const struct mbuf *m, int hlen)
 {
 	struct ip *ip;
 	struct mbuf *copym;
 
 	/*
 	 * Make a deep copy of the packet because we're going to
 	 * modify the pack in order to generate checksums.
 	 */
 	copym = m_dup(m, M_NOWAIT);
 	if (copym != NULL && (!M_WRITABLE(copym) || copym->m_len < hlen))
 		copym = m_pullup(copym, hlen);
 	if (copym != NULL) {
 		/* If needed, compute the checksum and mark it as valid. */
 		if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 			in_delayed_cksum(copym);
 			copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 			copym->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 			copym->m_pkthdr.csum_data = 0xffff;
 		}
 		/*
 		 * We don't bother to fragment if the IP length is greater
 		 * than the interface's MTU.  Can this possibly matter?
 		 */
 		ip = mtod(copym, struct ip *);
 		ip->ip_sum = 0;
 		ip->ip_sum = in_cksum(copym, hlen);
 		if_simloop(ifp, copym, AF_INET, 0);
 	}
 }
diff --git a/sys/netinet/toecore.c b/sys/netinet/toecore.c
index 1c0be6011253..0bf55958c618 100644
--- a/sys/netinet/toecore.c
+++ b/sys/netinet/toecore.c
@@ -1,601 +1,602 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2012 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: Navdeep Parhar <np@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/types.h>
 #include <sys/sockopt.h>
 #include <sys/sysctl.h>
 #include <sys/socket.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 #include <net/if_llatbl.h>
 #include <net/route.h>
 
 #include <netinet/if_ether.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/nd6.h>
 #define TCPSTATES
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_syncache.h>
 #include <netinet/tcp_offload.h>
 #include <netinet/toecore.h>
 
 static struct mtx toedev_lock;
 static TAILQ_HEAD(, toedev) toedev_list;
 static eventhandler_tag listen_start_eh;
 static eventhandler_tag listen_stop_eh;
 static eventhandler_tag lle_event_eh;
 
 static int
 toedev_connect(struct toedev *tod __unused, struct socket *so __unused,
     struct nhop_object *nh __unused, struct sockaddr *nam __unused)
 {
 
 	return (ENOTSUP);
 }
 
 static int
 toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused)
 {
 
 	return (ENOTSUP);
 }
 
 static int
 toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused)
 {
 
 	return (ENOTSUP);
 }
 
 static void
 toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused,
     struct mbuf *m)
 {
 
 	m_freem(m);
 	return;
 }
 
 static void
 toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused)
 {
 
 	return;
 }
 
 static int
 toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused)
 {
 
 	return (ENOTSUP);
 }
 
 static void
 toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused)
 {
 
 	return;
 }
 
 static void
 toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused,
     struct sockaddr *sa __unused, uint8_t *lladdr __unused,
     uint16_t vtag __unused)
 {
 
 	return;
 }
 
 static void
 toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused,
     struct nhop_object *nh0 __unused, struct nhop_object *nh1 __unused)
 {
 
 	return;
 }
 
 static void
 toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused)
 {
 
 	return;
 }
 
 static void
 toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused)
 {
 
 	return;
 }
 
 static int
 toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused,
     struct mbuf *m)
 {
 
 	m_freem(m);
 	return (0);
 }
 
 static void
 toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused,
     struct socket *so __unused)
 {
 
 	return;
 }
 
 static void
 toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused,
     int sopt_dir __unused, int sopt_name __unused)
 {
 
 	return;
 }
 
 static void
 toedev_tcp_info(struct toedev *tod __unused, struct tcpcb *tp __unused,
     struct tcp_info *ti __unused)
 {
 
 	return;
 }
 
 static int
 toedev_alloc_tls_session(struct toedev *tod __unused, struct tcpcb *tp __unused,
     struct ktls_session *tls __unused, int direction __unused)
 {
 
 	return (EINVAL);
 }
 
 static void
 toedev_pmtu_update(struct toedev *tod __unused, struct tcpcb *tp __unused,
     tcp_seq seq __unused, int mtu __unused)
 {
 
 	return;
 }
 
 /*
  * Inform one or more TOE devices about a listening socket.
  */
 static void
 toe_listen_start(struct inpcb *inp, void *arg)
 {
 	struct toedev *t, *tod;
 	struct tcpcb *tp;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(inp->inp_pcbinfo == &V_tcbinfo,
 	    ("%s: inp is not a TCP inp", __func__));
 
 	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))
 		return;
 
 	tp = intotcpcb(inp);
 	if (tp->t_state != TCPS_LISTEN)
 		return;
 
 	t = arg;
 	mtx_lock(&toedev_lock);
 	TAILQ_FOREACH(tod, &toedev_list, link) {
 		if (t == NULL || t == tod)
 			tod->tod_listen_start(tod, tp);
 	}
 	mtx_unlock(&toedev_lock);
 }
 
 static void
 toe_listen_start_event(void *arg __unused, struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(tp->t_state == TCPS_LISTEN,
 	    ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
 
 	toe_listen_start(inp, NULL);
 }
 
 static void
 toe_listen_stop_event(void *arg __unused, struct tcpcb *tp)
 {
 	struct toedev *tod;
 #ifdef INVARIANTS
 	struct inpcb *inp = tp->t_inpcb;
 #endif
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(tp->t_state == TCPS_LISTEN,
 	    ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
 
 	mtx_lock(&toedev_lock);
 	TAILQ_FOREACH(tod, &toedev_list, link)
 	    tod->tod_listen_stop(tod, tp);
 	mtx_unlock(&toedev_lock);
 }
 
 /*
  * Fill up a freshly allocated toedev struct with reasonable defaults.
  */
 void
 init_toedev(struct toedev *tod)
 {
 
 	tod->tod_softc = NULL;
 
 	/*
 	 * Provide no-op defaults so that the kernel can call any toedev
 	 * function without having to check whether the TOE driver supplied one
 	 * or not.
 	 */
 	tod->tod_connect = toedev_connect;
 	tod->tod_listen_start = toedev_listen_start;
 	tod->tod_listen_stop = toedev_listen_stop;
 	tod->tod_input = toedev_input;
 	tod->tod_rcvd = toedev_rcvd;
 	tod->tod_output = toedev_output;
 	tod->tod_send_rst = toedev_output;
 	tod->tod_send_fin = toedev_output;
 	tod->tod_pcb_detach = toedev_pcb_detach;
 	tod->tod_l2_update = toedev_l2_update;
 	tod->tod_route_redirect = toedev_route_redirect;
 	tod->tod_syncache_added = toedev_syncache_added;
 	tod->tod_syncache_removed = toedev_syncache_removed;
 	tod->tod_syncache_respond = toedev_syncache_respond;
 	tod->tod_offload_socket = toedev_offload_socket;
 	tod->tod_ctloutput = toedev_ctloutput;
 	tod->tod_tcp_info = toedev_tcp_info;
 	tod->tod_alloc_tls_session = toedev_alloc_tls_session;
 	tod->tod_pmtu_update = toedev_pmtu_update;
 }
 
 /*
  * Register an active TOE device with the system.  This allows it to receive
  * notifications from the kernel.
  */
 int
 register_toedev(struct toedev *tod)
 {
 	struct toedev *t;
 
 	mtx_lock(&toedev_lock);
 	TAILQ_FOREACH(t, &toedev_list, link) {
 		if (t == tod) {
 			mtx_unlock(&toedev_lock);
 			return (EEXIST);
 		}
 	}
 
 	TAILQ_INSERT_TAIL(&toedev_list, tod, link);
 	registered_toedevs++;
 	mtx_unlock(&toedev_lock);
 
 	inp_apply_all(toe_listen_start, tod);
 
 	return (0);
 }
 
 /*
  * Remove the TOE device from the global list of active TOE devices.  It is the
  * caller's responsibility to ensure that the TOE device is quiesced prior to
  * this call.
  */
 int
 unregister_toedev(struct toedev *tod)
 {
 	struct toedev *t, *t2;
 	int rc = ENODEV;
 
 	mtx_lock(&toedev_lock);
 	TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) {
 		if (t == tod) {
 			TAILQ_REMOVE(&toedev_list, tod, link);
 			registered_toedevs--;
 			rc = 0;
 			break;
 		}
 	}
 	KASSERT(registered_toedevs >= 0,
 	    ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs));
 	mtx_unlock(&toedev_lock);
 	return (rc);
 }
 
 void
 toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
     struct inpcb *inp, void *tod, void *todctx, uint8_t iptos)
 {
 
 	INP_RLOCK_ASSERT(inp);
 
 	(void )syncache_add(inc, to, th, inp, inp->inp_socket, NULL, tod,
 	    todctx, iptos, htons(0));
 }
 
 int
 toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to,
     struct tcphdr *th, struct socket **lsop)
 {
 
 	NET_EPOCH_ASSERT();
 
 	return (syncache_expand(inc, to, th, lsop, NULL, htons(0)));
 }
 
 /*
  * General purpose check to see if a 4-tuple is in use by the kernel.  If a TCP
  * header (presumably for an incoming SYN) is also provided, an existing 4-tuple
  * in TIME_WAIT may be assassinated freeing it up for re-use.
  *
  * Note that the TCP header must have been run through tcp_fields_to_host() or
  * equivalent.
  */
 int
 toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp)
 {
 	struct inpcb *inp;
 
 	if (inc->inc_flags & INC_ISIPV6) {
 		inp = in6_pcblookup(&V_tcbinfo, &inc->inc6_faddr,
 		    inc->inc_fport, &inc->inc6_laddr, inc->inc_lport,
 		    INPLOOKUP_RLOCKPCB, ifp);
 	} else {
 		inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport,
 		    inc->inc_laddr, inc->inc_lport, INPLOOKUP_RLOCKPCB, ifp);
 	}
 	if (inp != NULL) {
 		INP_RLOCK_ASSERT(inp);
 
 		if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) {
 			if (!tcp_twcheck(inp, NULL, th, NULL, 0))
 				return (EADDRINUSE);
 		} else {
 			INP_RUNLOCK(inp);
 			return (EADDRINUSE);
 		}
 	}
 
 	return (0);
 }
 
 static void
 toe_lle_event(void *arg __unused, struct llentry *lle, int evt)
 {
 	struct toedev *tod;
 	struct ifnet *ifp;
 	struct sockaddr *sa;
 	uint8_t *lladdr;
 	uint16_t vid, pcp;
 	int family;
 	struct sockaddr_in6 sin6;
 
 	LLE_WLOCK_ASSERT(lle);
 
 	ifp = lltable_get_ifp(lle->lle_tbl);
 	family = lltable_get_af(lle->lle_tbl);
 
 	if (family != AF_INET && family != AF_INET6)
 		return;
 	/*
 	 * Not interested if the interface's TOE capability is not enabled.
 	 */
 	if ((family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) ||
 	    (family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6)))
 		return;
 
 	tod = TOEDEV(ifp);
 	if (tod == NULL)
 		return;
 
 	sa = (struct sockaddr *)&sin6;
 	lltable_fill_sa_entry(lle, sa);
 
 	vid = 0xfff;
 	pcp = 0;
 	if (evt != LLENTRY_RESOLVED) {
 		/*
 		 * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean
 		 * this entry is going to be deleted.
 		 */
 
 		lladdr = NULL;
 	} else {
 		KASSERT(lle->la_flags & LLE_VALID,
 		    ("%s: %p resolved but not valid?", __func__, lle));
 
 		lladdr = (uint8_t *)lle->ll_addr;
 		VLAN_TAG(ifp, &vid);
 		VLAN_PCP(ifp, &pcp);
 	}
 
 	tod->tod_l2_update(tod, ifp, sa, lladdr, EVL_MAKETAG(vid, pcp, 0));
 }
 
 /*
  * Returns 0 or EWOULDBLOCK on success (any other value is an error).  0 means
  * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's
  * tod_l2_update will be called later, when the entry is resolved or times out.
  */
 int
 toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
     uint8_t *lladdr, uint16_t *vtag)
 {
 	int rc;
 	uint16_t vid, pcp;
 
 	switch (sa->sa_family) {
 #ifdef INET
 	case AF_INET:
 		rc = arpresolve(ifp, 0, NULL, sa, lladdr, NULL, NULL);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
-		rc = nd6_resolve(ifp, LLE_SF(AF_INET6, 0), NULL, sa, lladdr, NULL, NULL);
+		rc = nd6_resolve(ifp, LLE_SF(AF_INET6, 0), NULL, sa, lladdr,
+		    NULL, NULL);
 		break;
 #endif
 	default:
 		return (EPROTONOSUPPORT);
 	}
 
 	if (rc == 0) {
 		vid = 0xfff;
 		pcp = 0;
 		if (ifp->if_type == IFT_L2VLAN) {
 			VLAN_TAG(ifp, &vid);
 			VLAN_PCP(ifp, &pcp);
 		} else if (ifp->if_pcp != IFNET_PCP_NONE) {
 			vid = 0;
 			pcp = ifp->if_pcp;
 		}
 		*vtag = EVL_MAKETAG(vid, pcp, 0);
 	}
 
 	return (rc);
 }
 
 void
 toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err)
 {
 
 	NET_EPOCH_ASSERT();
 	INP_WLOCK_ASSERT(inp);
 
 	if (!(inp->inp_flags & INP_DROPPED)) {
 		struct tcpcb *tp = intotcpcb(inp);
 
 		KASSERT(tp->t_flags & TF_TOE,
 		    ("%s: tp %p not offloaded.", __func__, tp));
 
 		if (err == EAGAIN) {
 			/*
 			 * Temporary failure during offload, take this PCB back.
 			 * Detach from the TOE driver and do the rest of what
 			 * TCP's pru_connect would have done if the connection
 			 * wasn't offloaded.
 			 */
 
 			tod->tod_pcb_detach(tod, tp);
 			KASSERT(!(tp->t_flags & TF_TOE),
 			    ("%s: tp %p still offloaded.", __func__, tp));
 			tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
 			(void) tp->t_fb->tfb_tcp_output(tp);
 		} else {
 			tp = tcp_drop(tp, err);
 			if (tp == NULL)
 				INP_WLOCK(inp);	/* re-acquire */
 		}
 	}
 	INP_WLOCK_ASSERT(inp);
 }
 
 static int
 toecore_load(void)
 {
 
 	mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF);
 	TAILQ_INIT(&toedev_list);
 
 	listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
 	    toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY);
 	listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
 	    toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY);
 	lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL,
 	    EVENTHANDLER_PRI_ANY);
 
 	return (0);
 }
 
 static int
 toecore_unload(void)
 {
 
 	mtx_lock(&toedev_lock);
 	if (!TAILQ_EMPTY(&toedev_list)) {
 		mtx_unlock(&toedev_lock);
 		return (EBUSY);
 	}
 
 	EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh);
 	EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh);
 	EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
 
 	mtx_unlock(&toedev_lock);
 	mtx_destroy(&toedev_lock);
 
 	return (0);
 }
 
 static int
 toecore_mod_handler(module_t mod, int cmd, void *arg)
 {
 
 	if (cmd == MOD_LOAD)
 		return (toecore_load());
 
 	if (cmd == MOD_UNLOAD)
 		return (toecore_unload());
 
 	return (EOPNOTSUPP);
 }
 
 static moduledata_t mod_data= {
 	"toecore",
 	toecore_mod_handler,
 	0
 };
 
 MODULE_VERSION(toecore, 1);
 DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
diff --git a/sys/ofed/drivers/infiniband/core/ib_addr.c b/sys/ofed/drivers/infiniband/core/ib_addr.c
index 297469bd4d87..2ac79ca64664 100644
--- a/sys/ofed/drivers/infiniband/core/ib_addr.c
+++ b/sys/ofed/drivers/infiniband/core/ib_addr.c
@@ -1,908 +1,916 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
  *
  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
  * Copyright (c) 1999-2019, Mellanox Technologies, Inc. All rights reserved.
  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * General Public License (GPL) Version 2, available from the file
  * COPYING in the main directory of this source tree, or the
  * OpenIB.org BSD license below:
  *
  *     Redistribution and use in source and binary forms, with or
  *     without modification, are permitted provided that the following
  *     conditions are met:
  *
  *      - Redistributions of source code must retain the above
  *        copyright notice, this list of conditions and the following
  *        disclaimer.
  *
  *      - Redistributions in binary form must reproduce the above
  *        copyright notice, this list of conditions and the following
  *        disclaimer in the documentation and/or other materials
  *        provided with the distribution.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/module.h>
 #include <net/if_llatbl.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <net/netevent.h>
+#include <net/if_llatbl.h>
 #include <rdma/ib_addr.h>
 #include <rdma/ib.h>
 
 #include <netinet/in_fib.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/in6_fib.h>
 
 #include "core_priv.h"
 
 struct addr_req {
 	struct list_head list;
 	struct sockaddr_storage src_addr;
 	struct sockaddr_storage dst_addr;
 	struct rdma_dev_addr *addr;
 	struct rdma_addr_client *client;
 	void *context;
 	void (*callback)(int status, struct sockaddr *src_addr,
 			 struct rdma_dev_addr *addr, void *context);
 	int timeout;
 	int status;
 };
 
 static void process_req(struct work_struct *work);
 
 static DEFINE_MUTEX(lock);
 static LIST_HEAD(req_list);
 static DECLARE_DELAYED_WORK(work, process_req);
 static struct workqueue_struct *addr_wq;
 
 int rdma_addr_size(struct sockaddr *addr)
 {
 	switch (addr->sa_family) {
 	case AF_INET:
 		return sizeof(struct sockaddr_in);
 	case AF_INET6:
 		return sizeof(struct sockaddr_in6);
 	case AF_IB:
 		return sizeof(struct sockaddr_ib);
 	default:
 		return 0;
 	}
 }
 EXPORT_SYMBOL(rdma_addr_size);
 
 int rdma_addr_size_in6(struct sockaddr_in6 *addr)
 {
 	int ret = rdma_addr_size((struct sockaddr *) addr);
 
 	return ret <= sizeof(*addr) ? ret : 0;
 }
 EXPORT_SYMBOL(rdma_addr_size_in6);
 
 int rdma_addr_size_kss(struct sockaddr_storage *addr)
 {
 	int ret = rdma_addr_size((struct sockaddr *) addr);
 
 	return ret <= sizeof(*addr) ? ret : 0;
 }
 EXPORT_SYMBOL(rdma_addr_size_kss);
 
 static struct rdma_addr_client self;
 
 void rdma_addr_register_client(struct rdma_addr_client *client)
 {
 	atomic_set(&client->refcount, 1);
 	init_completion(&client->comp);
 }
 EXPORT_SYMBOL(rdma_addr_register_client);
 
 static inline void put_client(struct rdma_addr_client *client)
 {
 	if (atomic_dec_and_test(&client->refcount))
 		complete(&client->comp);
 }
 
 void rdma_addr_unregister_client(struct rdma_addr_client *client)
 {
 	put_client(client);
 	wait_for_completion(&client->comp);
 }
 EXPORT_SYMBOL(rdma_addr_unregister_client);
 
 static inline void
 rdma_copy_addr_sub(u8 *dst, const u8 *src, unsigned min, unsigned max)
 {
 	if (min > max)
 		min = max;
 	memcpy(dst, src, min);
 	memset(dst + min, 0, max - min);
 }
 
 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
 		     const unsigned char *dst_dev_addr)
 {
 	/* check for loopback device */
 	if (dev->if_flags & IFF_LOOPBACK) {
 		dev_addr->dev_type = ARPHRD_ETHER;
 		memset(dev_addr->src_dev_addr, 0, MAX_ADDR_LEN);
 		memset(dev_addr->broadcast, 0, MAX_ADDR_LEN);
 		memset(dev_addr->dst_dev_addr, 0, MAX_ADDR_LEN);
 		dev_addr->bound_dev_if = dev->if_index;
 		return (0);
 	} else if (dev->if_type == IFT_INFINIBAND)
 		dev_addr->dev_type = ARPHRD_INFINIBAND;
 	else if (dev->if_type == IFT_ETHER)
 		dev_addr->dev_type = ARPHRD_ETHER;
 	else
 		dev_addr->dev_type = 0;
 	rdma_copy_addr_sub(dev_addr->src_dev_addr, IF_LLADDR(dev),
 			   dev->if_addrlen, MAX_ADDR_LEN);
 	rdma_copy_addr_sub(dev_addr->broadcast, dev->if_broadcastaddr,
 			   dev->if_addrlen, MAX_ADDR_LEN);
 	if (dst_dev_addr != NULL) {
 		rdma_copy_addr_sub(dev_addr->dst_dev_addr, dst_dev_addr,
 				   dev->if_addrlen, MAX_ADDR_LEN);
 	}
 	dev_addr->bound_dev_if = dev->if_index;
 	return 0;
 }
 EXPORT_SYMBOL(rdma_copy_addr);
 
 int rdma_translate_ip(const struct sockaddr *addr,
 		      struct rdma_dev_addr *dev_addr)
 {
 	struct ifnet *dev;
 	int ret;
 
 	if (dev_addr->bound_dev_if) {
 		dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
 	} else switch (addr->sa_family) {
 #ifdef INET
 	case AF_INET:
 		dev = ip_ifp_find(dev_addr->net,
 			((const struct sockaddr_in *)addr)->sin_addr.s_addr);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		dev = ip6_ifp_find(dev_addr->net,
 			((const struct sockaddr_in6 *)addr)->sin6_addr, 0);
 		break;
 #endif
 	default:
 		dev = NULL;
 		break;
 	}
 
 	if (dev != NULL) {
 		/* disallow connections through 127.0.0.1 itself */
 		if (dev->if_flags & IFF_LOOPBACK)
 			ret = -EINVAL;
 		else
 			ret = rdma_copy_addr(dev_addr, dev, NULL);
 		dev_put(dev);
 	} else {
 		ret = -ENODEV;
 	}
 	return ret;
 }
 EXPORT_SYMBOL(rdma_translate_ip);
 
 static void set_timeout(int time)
 {
 	int delay;	/* under FreeBSD ticks are 32-bit */
 
 	delay = time - jiffies;
 	if (delay <= 0)
 		delay = 1;
 	else if (delay > hz)
 		delay = hz;
 
 	mod_delayed_work(addr_wq, &work, delay);
 }
 
 static void queue_req(struct addr_req *req)
 {
 	struct addr_req *temp_req;
 
 	mutex_lock(&lock);
 	list_for_each_entry_reverse(temp_req, &req_list, list) {
 		if (time_after_eq(req->timeout, temp_req->timeout))
 			break;
 	}
 
 	list_add(&req->list, &temp_req->list);
 
 	if (req_list.next == &req->list)
 		set_timeout(req->timeout);
 	mutex_unlock(&lock);
 }
 
 #if defined(INET) || defined(INET6)
 static int addr_resolve_multi(u8 *edst, struct ifnet *ifp, struct sockaddr *dst_in)
 {
 	struct sockaddr *llsa;
 	struct sockaddr_dl sdl;
 	int error;
 
 	sdl.sdl_len = sizeof(sdl);
 	llsa = (struct sockaddr *)&sdl;
 
 	if (ifp->if_resolvemulti == NULL) {
 		error = EOPNOTSUPP;
 	} else {
 		error = ifp->if_resolvemulti(ifp, &llsa, dst_in);
 		if (error == 0) {
 			rdma_copy_addr_sub(edst, LLADDR((struct sockaddr_dl *)llsa),
 			    ifp->if_addrlen, MAX_ADDR_LEN);
 		}
 	}
 	return (error);
 }
 #endif
 
 #ifdef INET
 static int addr4_resolve(struct sockaddr_in *src_in,
 			 const struct sockaddr_in *dst_in,
 			 struct rdma_dev_addr *addr,
 			 u8 *edst,
 			 struct ifnet **ifpp)
 {
 	enum {
 		ADDR_VALID = 0,
 		ADDR_SRC_ANY = 1,
 		ADDR_DST_ANY = 2,
 	};
 	struct sockaddr_in dst_tmp = *dst_in;
 	in_port_t src_port;
 	struct sockaddr *saddr = NULL;
 	struct nhop_object *nh;
 	struct ifnet *ifp;
 	int error;
 	int type;
 
 	NET_EPOCH_ASSERT();
 
 	/* set VNET, if any */
 	CURVNET_SET(addr->net);
 
 	/* set default TTL limit */
 	addr->hoplimit = V_ip_defttl;
 
 	type = ADDR_VALID;
 	if (src_in->sin_addr.s_addr == INADDR_ANY)
 		type |= ADDR_SRC_ANY;
 	if (dst_tmp.sin_addr.s_addr == INADDR_ANY)
 		type |= ADDR_DST_ANY;
 
 	/*
 	 * Make sure the socket address length field is set.
 	 */
 	dst_tmp.sin_len = sizeof(dst_tmp);
 
 	/* Step 1 - lookup destination route if any */
 	switch (type) {
 	case ADDR_VALID:
 	case ADDR_SRC_ANY:
 		/* regular destination route lookup */
 		nh = fib4_lookup(RT_DEFAULT_FIB, dst_tmp.sin_addr,0,NHR_NONE,0);
 		if (nh == NULL) {
 			error = EHOSTUNREACH;
 			goto done;
 		}
 		break;
 	default:
 		error = ENETUNREACH;
 		goto done;
 	}
 
 	/* Step 2 - find outgoing network interface */
 	switch (type) {
 	case ADDR_VALID:
 		/* get source interface */
 		if (addr->bound_dev_if != 0) {
 			ifp = dev_get_by_index(addr->net, addr->bound_dev_if);
 		} else {
 			ifp = ip_ifp_find(addr->net, src_in->sin_addr.s_addr);
 		}
 
 		/* check source interface */
 		if (ifp == NULL) {
 			error = ENETUNREACH;
 			goto done;
 		} else if (ifp->if_flags & IFF_LOOPBACK) {
 			/*
 			 * Source address cannot be a loopback device.
 			 */
 			error = EHOSTUNREACH;
 			goto error_put_ifp;
 		} else if (nh->nh_ifp->if_flags & IFF_LOOPBACK) {
 			if (memcmp(&src_in->sin_addr, &dst_in->sin_addr,
 			    sizeof(src_in->sin_addr))) {
 				/*
 				 * Destination is loopback, but source
 				 * and destination address is not the
 				 * same.
 				 */
 				error = EHOSTUNREACH;
 				goto error_put_ifp;
 			}
 			/* get destination network interface from route */
 			dev_put(ifp);
 			ifp = nh->nh_ifp;
 			dev_hold(ifp);
 		} else if (ifp != nh->nh_ifp) {
 			/*
 			 * Source and destination interfaces are
 			 * different.
 			 */
 			error = ENETUNREACH;
 			goto error_put_ifp;
 		}
 		break;
 	case ADDR_SRC_ANY:
 		/* check for loopback device */
 		if (nh->nh_ifp->if_flags & IFF_LOOPBACK)
 			saddr = (struct sockaddr *)&dst_tmp;
 		else
 			saddr = nh->nh_ifa->ifa_addr;
 
 		/* get destination network interface from route */
 		ifp = nh->nh_ifp;
 		dev_hold(ifp);
 		break;
 	default:
 		break;
 	}
 
 	/*
 	 * Step 3 - resolve destination MAC address
 	 */
 	if (dst_tmp.sin_addr.s_addr == INADDR_BROADCAST) {
 		rdma_copy_addr_sub(edst, ifp->if_broadcastaddr,
 		    ifp->if_addrlen, MAX_ADDR_LEN);
 		error = 0;
 	} else if (IN_MULTICAST(ntohl(dst_tmp.sin_addr.s_addr))) {
 		bool is_gw = (nh->nh_flags & NHF_GATEWAY) != 0;
 		error = addr_resolve_multi(edst, ifp, (struct sockaddr *)&dst_tmp);
 		if (error != 0)
 			goto error_put_ifp;
 		else if (is_gw)
 			addr->network = RDMA_NETWORK_IPV4;
 	} else if (ifp->if_flags & IFF_LOOPBACK) {
 		memset(edst, 0, MAX_ADDR_LEN);
 		error = 0;
 	} else {
 		bool is_gw = (nh->nh_flags & NHF_GATEWAY) != 0;
 		memset(edst, 0, MAX_ADDR_LEN);
-		error = arpresolve(ifp, is_gw, NULL, is_gw ?
-		    &nh->gw_sa : (const struct sockaddr *)&dst_tmp,
-		    edst, NULL, NULL);
+#ifdef INET6
+		if (is_gw && nh->gw_sa.sa_family == AF_INET6)
+			error = nd6_resolve(ifp, LLE_SF(AF_INET, is_gw), NULL,
+			    &nh->gw_sa, edst, NULL, NULL);
+		else
+#endif
+			error = arpresolve(ifp, is_gw, NULL, is_gw ?
+			    &nh->gw_sa : (const struct sockaddr *)&dst_tmp,
+			    edst, NULL, NULL);
+
 		if (error != 0)
 			goto error_put_ifp;
 		else if (is_gw)
 			addr->network = RDMA_NETWORK_IPV4;
 	}
 
 	/*
 	 * Step 4 - update source address, if any
 	 */
 	if (saddr != NULL) {
 		src_port = src_in->sin_port;
 		memcpy(src_in, saddr, rdma_addr_size(saddr));
 		src_in->sin_port = src_port;	/* preserve port number */
 	}
 
 	*ifpp = ifp;
 
 	goto done;
 
 error_put_ifp:
 	dev_put(ifp);
 done:
 	CURVNET_RESTORE();
 
 	if (error == EWOULDBLOCK || error == EAGAIN)
 		error = ENODATA;
 	return (-error);
 }
 #else
 static int addr4_resolve(struct sockaddr_in *src_in,
 			 const struct sockaddr_in *dst_in,
 			 struct rdma_dev_addr *addr,
 			 u8 *edst,
 			 struct ifnet **ifpp)
 {
 	return -EADDRNOTAVAIL;
 }
 #endif
 
 #ifdef INET6
 static int addr6_resolve(struct sockaddr_in6 *src_in,
 			 const struct sockaddr_in6 *dst_in,
 			 struct rdma_dev_addr *addr,
 			 u8 *edst,
 			 struct ifnet **ifpp)
 {
 	enum {
 		ADDR_VALID = 0,
 		ADDR_SRC_ANY = 1,
 		ADDR_DST_ANY = 2,
 	};
 	struct sockaddr_in6 dst_tmp = *dst_in;
 	in_port_t src_port;
 	struct sockaddr *saddr = NULL;
 	struct nhop_object *nh;
 	struct ifnet *ifp;
 	int error;
 	int type;
 
 	NET_EPOCH_ASSERT();
 
 	/* set VNET, if any */
 	CURVNET_SET(addr->net);
 
 	/* set default TTL limit */
 	addr->hoplimit = V_ip_defttl;
 
 	type = ADDR_VALID;
 	if (ipv6_addr_any(&src_in->sin6_addr))
 		type |= ADDR_SRC_ANY;
 	if (ipv6_addr_any(&dst_tmp.sin6_addr))
 		type |= ADDR_DST_ANY;
 
 	/*
 	 * Make sure the socket address length field is set.
 	 */
 	dst_tmp.sin6_len = sizeof(dst_tmp);
 
 	/*
 	 * Make sure the scope ID gets embedded, else nd6_resolve() will
 	 * not find the record.
 	 */
 	dst_tmp.sin6_scope_id = addr->bound_dev_if;
 	sa6_embedscope(&dst_tmp, 0);
 
 	/* Step 1 - lookup destination route if any */
 	switch (type) {
 	case ADDR_VALID:
 		/* sanity check for IPv4 addresses */
 		if (ipv6_addr_v4mapped(&src_in->sin6_addr) !=
 		    ipv6_addr_v4mapped(&dst_tmp.sin6_addr)) {
 			error = EAFNOSUPPORT;
 			goto done;
 		}
 		/* FALLTHROUGH */
 	case ADDR_SRC_ANY:
 		/* regular destination route lookup */
 		nh = fib6_lookup(RT_DEFAULT_FIB, &dst_in->sin6_addr,
 		    addr->bound_dev_if, NHR_NONE, 0);
 		if (nh == NULL) {
 			error = EHOSTUNREACH;
 			goto done;
 		}
 		break;
 	default:
 		error = ENETUNREACH;
 		goto done;
 	}
 
 	/* Step 2 - find outgoing network interface */
 	switch (type) {
 	case ADDR_VALID:
 		/* get source interface */
 		if (addr->bound_dev_if != 0) {
 			ifp = dev_get_by_index(addr->net, addr->bound_dev_if);
 		} else {
 			ifp = ip6_ifp_find(addr->net, src_in->sin6_addr, 0);
 		}
 
 		/* check source interface */
 		if (ifp == NULL) {
 			error = ENETUNREACH;
 			goto done;
 		} else if (ifp->if_flags & IFF_LOOPBACK) {
 			/*
 			 * Source address cannot be a loopback device.
 			 */
 			error = EHOSTUNREACH;
 			goto error_put_ifp;
 		} else if (nh->nh_ifp->if_flags & IFF_LOOPBACK) {
 			if (memcmp(&src_in->sin6_addr, &dst_in->sin6_addr,
 			    sizeof(src_in->sin6_addr))) {
 				/*
 				 * Destination is loopback, but source
 				 * and destination address is not the
 				 * same.
 				 */
 				error = EHOSTUNREACH;
 				goto error_put_ifp;
 			}
 			/* get destination network interface from route */
 			dev_put(ifp);
 			ifp = nh->nh_ifp;
 			dev_hold(ifp);
 		} else if (ifp != nh->nh_ifp) {
 			/*
 			 * Source and destination interfaces are
 			 * different.
 			 */
 			error = ENETUNREACH;
 			goto error_put_ifp;
 		}
 		break;
 	case ADDR_SRC_ANY:
 		/* check for loopback device */
 		if (nh->nh_ifp->if_flags & IFF_LOOPBACK)
 			saddr = (struct sockaddr *)&dst_tmp;
 		else
 			saddr = nh->nh_ifa->ifa_addr;
 
 		/* get destination network interface from route */
 		ifp = nh->nh_ifp;
 		dev_hold(ifp);
 		break;
 	default:
 		break;
 	}
 
 	/*
 	 * Step 3 - resolve destination MAC address
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&dst_tmp.sin6_addr)) {
 		bool is_gw = (nh->nh_flags & NHF_GATEWAY) != 0;
 		error = addr_resolve_multi(edst, ifp,
 		    (struct sockaddr *)&dst_tmp);
 		if (error != 0)
 			goto error_put_ifp;
 		else if (is_gw)
 			addr->network = RDMA_NETWORK_IPV6;
 	} else if (nh->nh_ifp->if_flags & IFF_LOOPBACK) {
 		memset(edst, 0, MAX_ADDR_LEN);
 		error = 0;
 	} else {
 		bool is_gw = (nh->nh_flags & NHF_GATEWAY) != 0;
 		memset(edst, 0, MAX_ADDR_LEN);
 		error = nd6_resolve(ifp, LLE_SF(AF_INET6, is_gw), NULL,
 		    is_gw ? &nh->gw_sa : (const struct sockaddr *)&dst_tmp,
 		    edst, NULL, NULL);
 		if (error != 0)
 			goto error_put_ifp;
 		else if (is_gw)
 			addr->network = RDMA_NETWORK_IPV6;
 	}
 
 	/*
 	 * Step 4 - update source address, if any
 	 */
 	if (saddr != NULL) {
 		src_port = src_in->sin6_port;
 		memcpy(src_in, saddr, rdma_addr_size(saddr));
 		src_in->sin6_port = src_port;	/* preserve port number */
 	}
 
 	*ifpp = ifp;
 
 	goto done;
 
 error_put_ifp:
 	dev_put(ifp);
 done:
 	CURVNET_RESTORE();
 
 	if (error == EWOULDBLOCK || error == EAGAIN)
 		error = ENODATA;
 	return (-error);
 }
 #else
 static int addr6_resolve(struct sockaddr_in6 *src_in,
 			 const struct sockaddr_in6 *dst_in,
 			 struct rdma_dev_addr *addr,
 			 u8 *edst,
 			 struct ifnet **ifpp)
 {
 	return -EADDRNOTAVAIL;
 }
 #endif
 
 static int addr_resolve_neigh(struct ifnet *dev,
 			      const struct sockaddr *dst_in,
 			      u8 *edst,
 			      struct rdma_dev_addr *addr)
 {
 	if (dev->if_flags & IFF_LOOPBACK) {
 		int ret;
 
 		/*
 		 * Binding to a loopback device is not allowed. Make
 		 * sure the destination device address is global by
 		 * clearing the bound device interface:
 		 */
 		if (addr->bound_dev_if == dev->if_index)
 			addr->bound_dev_if = 0;
 
 		ret = rdma_translate_ip(dst_in, addr);
 		if (ret == 0) {
 			memcpy(addr->dst_dev_addr, addr->src_dev_addr,
 			       MAX_ADDR_LEN);
 		}
 		return ret;
 	}
 
 	/* If the device doesn't do ARP internally */
 	if (!(dev->if_flags & IFF_NOARP))
 		return rdma_copy_addr(addr, dev, edst);
 
 	return rdma_copy_addr(addr, dev, NULL);
 }
 
 static int addr_resolve(struct sockaddr *src_in,
 			const struct sockaddr *dst_in,
 			struct rdma_dev_addr *addr)
 {
 	struct epoch_tracker et;
 	struct ifnet *ndev = NULL;
 	u8 edst[MAX_ADDR_LEN];
 	int ret;
 
 	if (dst_in->sa_family != src_in->sa_family)
 		return -EINVAL;
 
 	NET_EPOCH_ENTER(et);
 	switch (src_in->sa_family) {
 	case AF_INET:
 		ret = addr4_resolve((struct sockaddr_in *)src_in,
 				    (const struct sockaddr_in *)dst_in,
 				    addr, edst, &ndev);
 		break;
 	case AF_INET6:
 		ret = addr6_resolve((struct sockaddr_in6 *)src_in,
 				    (const struct sockaddr_in6 *)dst_in, addr,
 				    edst, &ndev);
 		break;
 	default:
 		ret = -EADDRNOTAVAIL;
 		break;
 	}
 	NET_EPOCH_EXIT(et);
 
 	/* check for error */
 	if (ret != 0)
 		return ret;
 
 	/* store MAC addresses and check for loopback */
 	ret = addr_resolve_neigh(ndev, dst_in, edst, addr);
 
 	/* set belonging VNET, if any */
 	addr->net = dev_net(ndev);
 	dev_put(ndev);
 
 	return ret;
 }
 
 static void process_req(struct work_struct *work)
 {
 	struct addr_req *req, *temp_req;
 	struct sockaddr *src_in, *dst_in;
 	struct list_head done_list;
 
 	INIT_LIST_HEAD(&done_list);
 
 	mutex_lock(&lock);
 	list_for_each_entry_safe(req, temp_req, &req_list, list) {
 		if (req->status == -ENODATA) {
 			src_in = (struct sockaddr *) &req->src_addr;
 			dst_in = (struct sockaddr *) &req->dst_addr;
 			req->status = addr_resolve(src_in, dst_in, req->addr);
 			if (req->status && time_after_eq(jiffies, req->timeout))
 				req->status = -ETIMEDOUT;
 			else if (req->status == -ENODATA)
 				continue;
 		}
 		list_move_tail(&req->list, &done_list);
 	}
 
 	if (!list_empty(&req_list)) {
 		req = list_entry(req_list.next, struct addr_req, list);
 		set_timeout(req->timeout);
 	}
 	mutex_unlock(&lock);
 
 	list_for_each_entry_safe(req, temp_req, &done_list, list) {
 		list_del(&req->list);
 		req->callback(req->status, (struct sockaddr *) &req->src_addr,
 			req->addr, req->context);
 		put_client(req->client);
 		kfree(req);
 	}
 }
 
 int rdma_resolve_ip(struct rdma_addr_client *client,
 		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
 		    struct rdma_dev_addr *addr, int timeout_ms,
 		    void (*callback)(int status, struct sockaddr *src_addr,
 				     struct rdma_dev_addr *addr, void *context),
 		    void *context)
 {
 	struct sockaddr *src_in, *dst_in;
 	struct addr_req *req;
 	int ret = 0;
 
 	req = kzalloc(sizeof *req, GFP_KERNEL);
 	if (!req)
 		return -ENOMEM;
 
 	src_in = (struct sockaddr *) &req->src_addr;
 	dst_in = (struct sockaddr *) &req->dst_addr;
 
 	if (src_addr) {
 		if (src_addr->sa_family != dst_addr->sa_family) {
 			ret = -EINVAL;
 			goto err;
 		}
 
 		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
 	} else {
 		src_in->sa_family = dst_addr->sa_family;
 	}
 
 	memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
 	req->addr = addr;
 	req->callback = callback;
 	req->context = context;
 	req->client = client;
 	atomic_inc(&client->refcount);
 
 	req->status = addr_resolve(src_in, dst_in, addr);
 	switch (req->status) {
 	case 0:
 		req->timeout = jiffies;
 		queue_req(req);
 		break;
 	case -ENODATA:
 		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
 		queue_req(req);
 		break;
 	default:
 		ret = req->status;
 		atomic_dec(&client->refcount);
 		goto err;
 	}
 	return ret;
 err:
 	kfree(req);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_resolve_ip);
 
 int rdma_resolve_ip_route(struct sockaddr *src_addr,
 			  const struct sockaddr *dst_addr,
 			  struct rdma_dev_addr *addr)
 {
 	struct sockaddr_storage ssrc_addr = {};
 	struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
 
 	if (src_addr) {
 		if (src_addr->sa_family != dst_addr->sa_family)
 			return -EINVAL;
 
 		memcpy(src_in, src_addr, rdma_addr_size(src_addr));
 	} else {
 		src_in->sa_family = dst_addr->sa_family;
 	}
 
 	return addr_resolve(src_in, dst_addr, addr);
 }
 EXPORT_SYMBOL(rdma_resolve_ip_route);
 
 void rdma_addr_cancel(struct rdma_dev_addr *addr)
 {
 	struct addr_req *req, *temp_req;
 
 	mutex_lock(&lock);
 	list_for_each_entry_safe(req, temp_req, &req_list, list) {
 		if (req->addr == addr) {
 			req->status = -ECANCELED;
 			req->timeout = jiffies;
 			list_move(&req->list, &req_list);
 			set_timeout(req->timeout);
 			break;
 		}
 	}
 	mutex_unlock(&lock);
 }
 EXPORT_SYMBOL(rdma_addr_cancel);
 
 struct resolve_cb_context {
 	struct rdma_dev_addr *addr;
 	struct completion comp;
 	int status;
 };
 
 static void resolve_cb(int status, struct sockaddr *src_addr,
 	     struct rdma_dev_addr *addr, void *context)
 {
 	if (!status)
 		memcpy(((struct resolve_cb_context *)context)->addr,
 		       addr, sizeof(struct rdma_dev_addr));
 	((struct resolve_cb_context *)context)->status = status;
 	complete(&((struct resolve_cb_context *)context)->comp);
 }
 
 int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
 				 const union ib_gid *dgid,
 				 u8 *dmac, struct ifnet *dev,
 				 int *hoplimit)
 {
 	int ret = 0;
 	struct rdma_dev_addr dev_addr;
 	struct resolve_cb_context ctx;
 
 	union rdma_sockaddr sgid_addr, dgid_addr;
 
 	rdma_gid2ip(&sgid_addr._sockaddr, sgid);
 	rdma_gid2ip(&dgid_addr._sockaddr, dgid);
 
 	memset(&dev_addr, 0, sizeof(dev_addr));
 
 	dev_addr.bound_dev_if = dev->if_index;
 	dev_addr.net = dev_net(dev);
 
 	ctx.addr = &dev_addr;
 	init_completion(&ctx.comp);
 	ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
 			&dev_addr, 1000, resolve_cb, &ctx);
 	if (ret)
 		return ret;
 
 	wait_for_completion(&ctx.comp);
 
 	ret = ctx.status;
 	if (ret)
 		return ret;
 
 	memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
 	if (hoplimit)
 		*hoplimit = dev_addr.hoplimit;
 	return ret;
 }
 EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
 
 int addr_init(void)
 {
 	addr_wq = alloc_workqueue("ib_addr", WQ_MEM_RECLAIM, 0);
 	if (!addr_wq)
 		return -ENOMEM;
 
 	rdma_addr_register_client(&self);
 
 	return 0;
 }
 
 void addr_cleanup(void)
 {
 	rdma_addr_unregister_client(&self);
 	destroy_workqueue(addr_wq);
 }