diff --git a/sys/dev/iicbus/if_ic.c b/sys/dev/iicbus/if_ic.c
index 4ca8f3960298..52ab5afb9c4e 100644
--- a/sys/dev/iicbus/if_ic.c
+++ b/sys/dev/iicbus/if_ic.c
@@ -1,435 +1,435 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 1998, 2001 Nicolas Souchu
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 /*
  * I2C bus IP driver
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/filio.h>
 #include <sys/sockio.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/bus.h>
 #include <sys/time.h>
 #include <sys/malloc.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/if_ether.h>
 
 #include <net/bpf.h>
 
 #include <dev/iicbus/iiconf.h>
 #include <dev/iicbus/iicbus.h>
 
 #include "iicbus_if.h"
 
 #define PCF_MASTER_ADDRESS 0xaa
 
 #define ICHDRLEN	sizeof(u_int32_t)
 #define ICMTU		1500		/* default mtu */
 
 struct ic_softc {
 	if_t ic_ifp;
 	device_t ic_dev;
 
 	u_char ic_addr;			/* peer I2C address */
 
 	int ic_flags;
 
 	char *ic_obuf;
 	char *ic_ifbuf;
 	char *ic_cp;
 
 	int ic_xfercnt;
 
 	int ic_iferrs;
 
 	struct mtx ic_lock;
 };
 
 #define	IC_SENDING		0x0001
 #define	IC_OBUF_BUSY		0x0002
 #define	IC_IFBUF_BUSY		0x0004
 #define	IC_BUFFERS_BUSY		(IC_OBUF_BUSY | IC_IFBUF_BUSY)
 #define	IC_BUFFER_WAITER	0x0004
 
 static int icprobe(device_t);
 static int icattach(device_t);
 
 static int icioctl(if_t, u_long, caddr_t);
 static int icoutput(if_t, struct mbuf *, const struct sockaddr *,
                struct route *);
 
 static int icintr(device_t, int, char *);
 
 static device_method_t ic_methods[] = {
 	/* device interface */
 	DEVMETHOD(device_probe,		icprobe),
 	DEVMETHOD(device_attach,	icattach),
 
 	/* iicbus interface */
 	DEVMETHOD(iicbus_intr,		icintr),
 
 	{ 0, 0 }
 };
 
 static driver_t ic_driver = {
 	"ic",
 	ic_methods,
 	sizeof(struct ic_softc),
 };
 
 static void
 ic_alloc_buffers(struct ic_softc *sc, int mtu)
 {
 	char *obuf, *ifbuf;
 
 	obuf = malloc(mtu + ICHDRLEN, M_DEVBUF, M_WAITOK);
 	ifbuf = malloc(mtu + ICHDRLEN, M_DEVBUF, M_WAITOK);
 
 	mtx_lock(&sc->ic_lock);
 	while (sc->ic_flags & IC_BUFFERS_BUSY) {
 		sc->ic_flags |= IC_BUFFER_WAITER;
 		mtx_sleep(sc, &sc->ic_lock, 0, "icalloc", 0);
 		sc->ic_flags &= ~IC_BUFFER_WAITER;
 	}
 
 	free(sc->ic_obuf, M_DEVBUF);
 	free(sc->ic_ifbuf, M_DEVBUF);
 	sc->ic_obuf = obuf;
 	sc->ic_ifbuf = ifbuf;
 	if_setmtu(sc->ic_ifp, mtu);
 	mtx_unlock(&sc->ic_lock);
 }
 
 /*
  * icprobe()
  */
 static int
 icprobe(device_t dev)
 {
 	return (BUS_PROBE_NOWILDCARD);
 }
 
 /*
  * icattach()
  */
 static int
 icattach(device_t dev)
 {
 	struct ic_softc *sc = (struct ic_softc *)device_get_softc(dev);
 	if_t ifp;
 
 	ifp = sc->ic_ifp = if_alloc(IFT_PARA);
 	if (ifp == NULL)
 		return (ENOSPC);
 
 	mtx_init(&sc->ic_lock, device_get_nameunit(dev), MTX_NETWORK_LOCK,
 	    MTX_DEF);
 	sc->ic_addr = PCF_MASTER_ADDRESS;	/* XXX only PCF masters */
 	sc->ic_dev = dev;
 
 	if_setsoftc(ifp, sc);
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	if_setflags(ifp, IFF_SIMPLEX | IFF_POINTOPOINT | IFF_MULTICAST);
 	if_setioctlfn(ifp, icioctl);
 	if_setoutputfn(ifp, icoutput);
 	if_setifheaderlen(ifp, 0);
 	if_setsendqlen(ifp, ifqmaxlen);
 
 	ic_alloc_buffers(sc, ICMTU);
 
 	if_attach(ifp);
 
 	bpfattach(ifp, DLT_NULL, ICHDRLEN);
 
 	return (0);
 }
 
 /*
  * iciotcl()
  */
 static int
 icioctl(if_t ifp, u_long cmd, caddr_t data)
 {
 	struct ic_softc *sc = if_getsoftc(ifp);
 	device_t icdev = sc->ic_dev;
 	device_t parent = device_get_parent(icdev);
 	struct ifaddr *ifa = (struct ifaddr *)data;
 	struct ifreq *ifr = (struct ifreq *)data;
 	int error;
 
 	switch (cmd) {
 
 	case SIOCAIFADDR:
 	case SIOCSIFADDR:
 		if (ifa->ifa_addr->sa_family != AF_INET)
 			return (EAFNOSUPPORT);
 		mtx_lock(&sc->ic_lock);
 		if_setflagbits(ifp, IFF_UP, 0);
 		goto locked;
 	case SIOCSIFFLAGS:
 		mtx_lock(&sc->ic_lock);
 	locked:
 		if ((!(if_getflags(ifp) & IFF_UP)) &&
 		    (if_getdrvflags(ifp) & IFF_DRV_RUNNING)) {
 
 			/* XXX disable PCF */
 			if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
 			mtx_unlock(&sc->ic_lock);
 
 			/* IFF_UP is not set, try to release the bus anyway */
 			iicbus_release_bus(parent, icdev);
 			break;
 		}
 		if (((if_getflags(ifp) & IFF_UP)) &&
 		    (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))) {
 			mtx_unlock(&sc->ic_lock);
 			if ((error = iicbus_request_bus(parent, icdev,
 			    IIC_WAIT | IIC_INTR)))
 				return (error);
 			mtx_lock(&sc->ic_lock);
 			iicbus_reset(parent, IIC_FASTEST, 0, NULL);
 			if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
 		}
 		mtx_unlock(&sc->ic_lock);
 		break;
 
 	case SIOCSIFMTU:
 		ic_alloc_buffers(sc, ifr->ifr_mtu);
 		break;
 
 	case SIOCGIFMTU:
 		mtx_lock(&sc->ic_lock);
 		ifr->ifr_mtu = if_getmtu(sc->ic_ifp);
 		mtx_unlock(&sc->ic_lock);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (ifr == NULL)
 			return (EAFNOSUPPORT);		/* XXX */
 		switch (ifr->ifr_addr.sa_family) {
 		case AF_INET:
 			break;
 		default:
 			return (EAFNOSUPPORT);
 		}
 		break;
 	default:
 		return (EINVAL);
 	}
 	return (0);
 }
 
 /*
  * icintr()
  */
 static int
 icintr(device_t dev, int event, char *ptr)
 {
 	struct ic_softc *sc = (struct ic_softc *)device_get_softc(dev);
 	struct mbuf *top;
 	int len;
 
 	mtx_lock(&sc->ic_lock);
 
 	switch (event) {
 
 	case INTR_GENERAL:
 	case INTR_START:
 		sc->ic_cp = sc->ic_ifbuf;
 		sc->ic_xfercnt = 0;
 		sc->ic_flags |= IC_IFBUF_BUSY;
 		break;
 
 	case INTR_STOP:
 
 		/* if any error occurred during transfert,
 		 * drop the packet */
 		sc->ic_flags &= ~IC_IFBUF_BUSY;
 		if ((sc->ic_flags & (IC_BUFFERS_BUSY | IC_BUFFER_WAITER)) ==
 		    IC_BUFFER_WAITER)
 			wakeup(&sc);
 		if (sc->ic_iferrs)
 			goto err;
 		if ((len = sc->ic_xfercnt) == 0)
 			break;					/* ignore */
 		if (len <= ICHDRLEN)
 			goto err;
 		len -= ICHDRLEN;
 		if_inc_counter(sc->ic_ifp, IFCOUNTER_IPACKETS, 1);
 		if_inc_counter(sc->ic_ifp, IFCOUNTER_IBYTES, len);
 		BPF_TAP(sc->ic_ifp, sc->ic_ifbuf, len + ICHDRLEN);
 		top = m_devget(sc->ic_ifbuf + ICHDRLEN, len, 0, sc->ic_ifp, 0);
 		if (top) {
 			struct epoch_tracker et;
 
 			mtx_unlock(&sc->ic_lock);
 			M_SETFIB(top, if_getfib(sc->ic_ifp));
 			NET_EPOCH_ENTER(et);
 			netisr_dispatch(NETISR_IP, top);
 			NET_EPOCH_EXIT(et);
 			mtx_lock(&sc->ic_lock);
 		}
 		break;
 	err:
 		if_printf(sc->ic_ifp, "errors (%d)!\n", sc->ic_iferrs);
 		sc->ic_iferrs = 0;			/* reset error count */
 		if_inc_counter(sc->ic_ifp, IFCOUNTER_IERRORS, 1);
 		break;
 
 	case INTR_RECEIVE:
 		if (sc->ic_xfercnt >= if_getmtu(sc->ic_ifp) + ICHDRLEN) {
 			sc->ic_iferrs++;
 		} else {
 			*sc->ic_cp++ = *ptr;
 			sc->ic_xfercnt++;
 		}
 		break;
 
 	case INTR_NOACK:			/* xfer terminated by master */
 		break;
 
 	case INTR_TRANSMIT:
 		*ptr = 0xff;					/* XXX */
 	  	break;
 
 	case INTR_ERROR:
 		sc->ic_iferrs++;
 		break;
 
 	default:
 		panic("%s: unknown event (%d)!", __func__, event);
 	}
 
 	mtx_unlock(&sc->ic_lock);
 	return (0);
 }
 
 /*
  * icoutput()
  */
 static int
 icoutput(if_t ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	struct ic_softc *sc = if_getsoftc(ifp);
 	device_t icdev = sc->ic_dev;
 	device_t parent = device_get_parent(icdev);
 	int len, sent;
 	struct mbuf *mm;
 	u_char *cp;
 	u_int32_t hdr;
 
-	/* BPF writes need to be handled specially. */ 
-	if (dst->sa_family == AF_UNSPEC)
+	/* BPF writes need to be handled specially. */
+	if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
 		bcopy(dst->sa_data, &hdr, sizeof(hdr));
 	else 
 		hdr = RO_GET_FAMILY(ro, dst);
 
 	mtx_lock(&sc->ic_lock);
 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
 
 	/* already sending? */
 	if (sc->ic_flags & IC_SENDING) {
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		goto error;
 	}
 		
 	/* insert header */
 	bcopy ((char *)&hdr, sc->ic_obuf, ICHDRLEN);
 
 	cp = sc->ic_obuf + ICHDRLEN;
 	len = 0;
 	mm = m;
 	do {
 		if (len + mm->m_len > if_getmtu(sc->ic_ifp)) {
 			/* packet too large */
 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			goto error;
 		}
 			
 		bcopy(mtod(mm,char *), cp, mm->m_len);
 		cp += mm->m_len;
 		len += mm->m_len;
 
 	} while ((mm = mm->m_next));
 
 	BPF_MTAP2(ifp, &hdr, sizeof(hdr), m);
 
 	sc->ic_flags |= (IC_SENDING | IC_OBUF_BUSY);
 
 	m_freem(m);
 	mtx_unlock(&sc->ic_lock);
 
 	/* send the packet */
 	if (iicbus_block_write(parent, sc->ic_addr, sc->ic_obuf,
 				len + ICHDRLEN, &sent))
 
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	else {
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
 	}	
 
 	mtx_lock(&sc->ic_lock);
 	sc->ic_flags &= ~(IC_SENDING | IC_OBUF_BUSY);
 	if ((sc->ic_flags & (IC_BUFFERS_BUSY | IC_BUFFER_WAITER)) ==
 	    IC_BUFFER_WAITER)
 		wakeup(&sc);
 	mtx_unlock(&sc->ic_lock);
 
 	return (0);
 
 error:
 	m_freem(m);
 	mtx_unlock(&sc->ic_lock);
 
 	return(0);
 }
 
 DRIVER_MODULE(ic, iicbus, ic_driver, 0, 0);
 MODULE_DEPEND(ic, iicbus, IICBUS_MINVER, IICBUS_PREFVER, IICBUS_MAXVER);
 MODULE_VERSION(ic, 1);
diff --git a/sys/dev/wg/if_wg.c b/sys/dev/wg/if_wg.c
index 2c867956912a..30429c3725cd 100644
--- a/sys/dev/wg/if_wg.c
+++ b/sys/dev/wg/if_wg.c
@@ -1,3056 +1,3057 @@
 /* SPDX-License-Identifier: ISC
  *
  * Copyright (C) 2015-2021 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
  * Copyright (C) 2019-2021 Matt Dunwoodie <ncon@noconroy.net>
  * Copyright (c) 2019-2020 Rubicon Communications, LLC (Netgate)
  * Copyright (c) 2021 Kyle Evans <kevans@FreeBSD.org>
  * Copyright (c) 2022 The FreeBSD Foundation
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/counter.h>
 #include <sys/gtaskqueue.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/nv.h>
 #include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/rmlock.h>
 #include <sys/rwlock.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/sx.h>
 #include <machine/_inttypes.h>
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/radix.h>
 #include <netinet/in.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/icmp6.h>
 #include <netinet/udp_var.h>
 #include <netinet6/nd6.h>
 
 #include "wg_noise.h"
 #include "wg_cookie.h"
 #include "version.h"
 #include "if_wg.h"
 
 #define DEFAULT_MTU		(ETHERMTU - 80)
 #define MAX_MTU			(IF_MAXMTU - 80)
 
 #define MAX_STAGED_PKT		128
 #define MAX_QUEUED_PKT		1024
 #define MAX_QUEUED_PKT_MASK	(MAX_QUEUED_PKT - 1)
 
 #define MAX_QUEUED_HANDSHAKES	4096
 
 #define REKEY_TIMEOUT_JITTER	334 /* 1/3 sec, round for arc4random_uniform */
 #define MAX_TIMER_HANDSHAKES	(90 / REKEY_TIMEOUT)
 #define NEW_HANDSHAKE_TIMEOUT	(REKEY_TIMEOUT + KEEPALIVE_TIMEOUT)
 #define UNDERLOAD_TIMEOUT	1
 
 #define DPRINTF(sc, ...) if (if_getflags(sc->sc_ifp) & IFF_DEBUG) if_printf(sc->sc_ifp, ##__VA_ARGS__)
 
 /* First byte indicating packet type on the wire */
 #define WG_PKT_INITIATION htole32(1)
 #define WG_PKT_RESPONSE htole32(2)
 #define WG_PKT_COOKIE htole32(3)
 #define WG_PKT_DATA htole32(4)
 
 #define WG_PKT_PADDING		16
 #define WG_KEY_SIZE		32
 
 struct wg_pkt_initiation {
 	uint32_t		t;
 	uint32_t		s_idx;
 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
 	uint8_t			es[NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN];
 	uint8_t			ets[NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN];
 	struct cookie_macs	m;
 };
 
 struct wg_pkt_response {
 	uint32_t		t;
 	uint32_t		s_idx;
 	uint32_t		r_idx;
 	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
 	uint8_t			en[0 + NOISE_AUTHTAG_LEN];
 	struct cookie_macs	m;
 };
 
 struct wg_pkt_cookie {
 	uint32_t		t;
 	uint32_t		r_idx;
 	uint8_t			nonce[COOKIE_NONCE_SIZE];
 	uint8_t			ec[COOKIE_ENCRYPTED_SIZE];
 };
 
 struct wg_pkt_data {
 	uint32_t		t;
 	uint32_t		r_idx;
 	uint64_t		nonce;
 	uint8_t			buf[];
 };
 
 struct wg_endpoint {
 	union {
 		struct sockaddr		r_sa;
 		struct sockaddr_in	r_sin;
 #ifdef INET6
 		struct sockaddr_in6	r_sin6;
 #endif
 	} e_remote;
 	union {
 		struct in_addr		l_in;
 #ifdef INET6
 		struct in6_pktinfo	l_pktinfo6;
 #define l_in6 l_pktinfo6.ipi6_addr
 #endif
 	} e_local;
 };
 
 struct aip_addr {
 	uint8_t		length;
 	union {
 		uint8_t		bytes[16];
 		uint32_t	ip;
 		uint32_t	ip6[4];
 		struct in_addr	in;
 		struct in6_addr	in6;
 	};
 };
 
 struct wg_aip {
 	struct radix_node	 a_nodes[2];
 	LIST_ENTRY(wg_aip)	 a_entry;
 	struct aip_addr		 a_addr;
 	struct aip_addr		 a_mask;
 	struct wg_peer		*a_peer;
 	sa_family_t		 a_af;
 };
 
 struct wg_packet {
 	STAILQ_ENTRY(wg_packet)	 p_serial;
 	STAILQ_ENTRY(wg_packet)	 p_parallel;
 	struct wg_endpoint	 p_endpoint;
 	struct noise_keypair	*p_keypair;
 	uint64_t		 p_nonce;
 	struct mbuf		*p_mbuf;
 	int			 p_mtu;
 	sa_family_t		 p_af;
 	enum wg_ring_state {
 		WG_PACKET_UNCRYPTED,
 		WG_PACKET_CRYPTED,
 		WG_PACKET_DEAD,
 	}			 p_state;
 };
 
 STAILQ_HEAD(wg_packet_list, wg_packet);
 
 struct wg_queue {
 	struct mtx		 q_mtx;
 	struct wg_packet_list	 q_queue;
 	size_t			 q_len;
 };
 
 struct wg_peer {
 	TAILQ_ENTRY(wg_peer)		 p_entry;
 	uint64_t			 p_id;
 	struct wg_softc			*p_sc;
 
 	struct noise_remote		*p_remote;
 	struct cookie_maker		 p_cookie;
 
 	struct rwlock			 p_endpoint_lock;
 	struct wg_endpoint		 p_endpoint;
 
 	struct wg_queue	 		 p_stage_queue;
 	struct wg_queue	 		 p_encrypt_serial;
 	struct wg_queue	 		 p_decrypt_serial;
 
 	bool				 p_enabled;
 	bool				 p_need_another_keepalive;
 	uint16_t			 p_persistent_keepalive_interval;
 	struct callout			 p_new_handshake;
 	struct callout			 p_send_keepalive;
 	struct callout			 p_retry_handshake;
 	struct callout			 p_zero_key_material;
 	struct callout			 p_persistent_keepalive;
 
 	struct mtx			 p_handshake_mtx;
 	struct timespec			 p_handshake_complete;	/* nanotime */
 	int				 p_handshake_retries;
 
 	struct grouptask		 p_send;
 	struct grouptask		 p_recv;
 
 	counter_u64_t			 p_tx_bytes;
 	counter_u64_t			 p_rx_bytes;
 
 	LIST_HEAD(, wg_aip)		 p_aips;
 	size_t				 p_aips_num;
 };
 
 struct wg_socket {
 	struct socket	*so_so4;
 	struct socket	*so_so6;
 	uint32_t	 so_user_cookie;
 	int		 so_fibnum;
 	in_port_t	 so_port;
 };
 
 struct wg_softc {
 	LIST_ENTRY(wg_softc)	 sc_entry;
 	if_t			 sc_ifp;
 	int			 sc_flags;
 
 	struct ucred		*sc_ucred;
 	struct wg_socket	 sc_socket;
 
 	TAILQ_HEAD(,wg_peer)	 sc_peers;
 	size_t			 sc_peers_num;
 
 	struct noise_local	*sc_local;
 	struct cookie_checker	 sc_cookie;
 
 	struct radix_node_head	*sc_aip4;
 	struct radix_node_head	*sc_aip6;
 
 	struct grouptask	 sc_handshake;
 	struct wg_queue		 sc_handshake_queue;
 
 	struct grouptask	*sc_encrypt;
 	struct grouptask	*sc_decrypt;
 	struct wg_queue		 sc_encrypt_parallel;
 	struct wg_queue		 sc_decrypt_parallel;
 	u_int			 sc_encrypt_last_cpu;
 	u_int			 sc_decrypt_last_cpu;
 
 	struct sx		 sc_lock;
 };
 
 #define	WGF_DYING	0x0001
 
 #define MAX_LOOPS	8
 #define MTAG_WGLOOP	0x77676c70 /* wglp */
 
 #define	GROUPTASK_DRAIN(gtask)			\
 	gtaskqueue_drain((gtask)->gt_taskqueue, &(gtask)->gt_task)
 
 #define BPF_MTAP2_AF(ifp, m, af) do { \
 		uint32_t __bpf_tap_af = (af); \
 		BPF_MTAP2(ifp, &__bpf_tap_af, sizeof(__bpf_tap_af), m); \
 	} while (0)
 
 static int clone_count;
 static uma_zone_t wg_packet_zone;
 static volatile unsigned long peer_counter = 0;
 static const char wgname[] = "wg";
 static unsigned wg_osd_jail_slot;
 
 static struct sx wg_sx;
 SX_SYSINIT(wg_sx, &wg_sx, "wg_sx");
 
 static LIST_HEAD(, wg_softc) wg_list = LIST_HEAD_INITIALIZER(wg_list);
 
 static TASKQGROUP_DEFINE(wg_tqg, mp_ncpus, 1);
 
 MALLOC_DEFINE(M_WG, "WG", "wireguard");
 
 VNET_DEFINE_STATIC(struct if_clone *, wg_cloner);
 
 #define	V_wg_cloner	VNET(wg_cloner)
 #define	WG_CAPS		IFCAP_LINKSTATE
 
 struct wg_timespec64 {
 	uint64_t	tv_sec;
 	uint64_t	tv_nsec;
 };
 
 static int wg_socket_init(struct wg_softc *, in_port_t);
 static int wg_socket_bind(struct socket **, struct socket **, in_port_t *);
 static void wg_socket_set(struct wg_softc *, struct socket *, struct socket *);
 static void wg_socket_uninit(struct wg_softc *);
 static int wg_socket_set_sockopt(struct socket *, struct socket *, int, void *, size_t);
 static int wg_socket_set_cookie(struct wg_softc *, uint32_t);
 static int wg_socket_set_fibnum(struct wg_softc *, int);
 static int wg_send(struct wg_softc *, struct wg_endpoint *, struct mbuf *);
 static void wg_timers_enable(struct wg_peer *);
 static void wg_timers_disable(struct wg_peer *);
 static void wg_timers_set_persistent_keepalive(struct wg_peer *, uint16_t);
 static void wg_timers_get_last_handshake(struct wg_peer *, struct wg_timespec64 *);
 static void wg_timers_event_data_sent(struct wg_peer *);
 static void wg_timers_event_data_received(struct wg_peer *);
 static void wg_timers_event_any_authenticated_packet_sent(struct wg_peer *);
 static void wg_timers_event_any_authenticated_packet_received(struct wg_peer *);
 static void wg_timers_event_any_authenticated_packet_traversal(struct wg_peer *);
 static void wg_timers_event_handshake_initiated(struct wg_peer *);
 static void wg_timers_event_handshake_complete(struct wg_peer *);
 static void wg_timers_event_session_derived(struct wg_peer *);
 static void wg_timers_event_want_initiation(struct wg_peer *);
 static void wg_timers_run_send_initiation(struct wg_peer *, bool);
 static void wg_timers_run_retry_handshake(void *);
 static void wg_timers_run_send_keepalive(void *);
 static void wg_timers_run_new_handshake(void *);
 static void wg_timers_run_zero_key_material(void *);
 static void wg_timers_run_persistent_keepalive(void *);
 static int wg_aip_add(struct wg_softc *, struct wg_peer *, sa_family_t, const void *, uint8_t);
 static struct wg_peer *wg_aip_lookup(struct wg_softc *, sa_family_t, void *);
 static void wg_aip_remove_all(struct wg_softc *, struct wg_peer *);
 static struct wg_peer *wg_peer_alloc(struct wg_softc *, const uint8_t [WG_KEY_SIZE]);
 static void wg_peer_free_deferred(struct noise_remote *);
 static void wg_peer_destroy(struct wg_peer *);
 static void wg_peer_destroy_all(struct wg_softc *);
 static void wg_peer_send_buf(struct wg_peer *, uint8_t *, size_t);
 static void wg_send_initiation(struct wg_peer *);
 static void wg_send_response(struct wg_peer *);
 static void wg_send_cookie(struct wg_softc *, struct cookie_macs *, uint32_t, struct wg_endpoint *);
 static void wg_peer_set_endpoint(struct wg_peer *, struct wg_endpoint *);
 static void wg_peer_clear_src(struct wg_peer *);
 static void wg_peer_get_endpoint(struct wg_peer *, struct wg_endpoint *);
 static void wg_send_buf(struct wg_softc *, struct wg_endpoint *, uint8_t *, size_t);
 static void wg_send_keepalive(struct wg_peer *);
 static void wg_handshake(struct wg_softc *, struct wg_packet *);
 static void wg_encrypt(struct wg_softc *, struct wg_packet *);
 static void wg_decrypt(struct wg_softc *, struct wg_packet *);
 static void wg_softc_handshake_receive(struct wg_softc *);
 static void wg_softc_decrypt(struct wg_softc *);
 static void wg_softc_encrypt(struct wg_softc *);
 static void wg_encrypt_dispatch(struct wg_softc *);
 static void wg_decrypt_dispatch(struct wg_softc *);
 static void wg_deliver_out(struct wg_peer *);
 static void wg_deliver_in(struct wg_peer *);
 static struct wg_packet *wg_packet_alloc(struct mbuf *);
 static void wg_packet_free(struct wg_packet *);
 static void wg_queue_init(struct wg_queue *, const char *);
 static void wg_queue_deinit(struct wg_queue *);
 static size_t wg_queue_len(struct wg_queue *);
 static int wg_queue_enqueue_handshake(struct wg_queue *, struct wg_packet *);
 static struct wg_packet *wg_queue_dequeue_handshake(struct wg_queue *);
 static void wg_queue_push_staged(struct wg_queue *, struct wg_packet *);
 static void wg_queue_enlist_staged(struct wg_queue *, struct wg_packet_list *);
 static void wg_queue_delist_staged(struct wg_queue *, struct wg_packet_list *);
 static void wg_queue_purge(struct wg_queue *);
 static int wg_queue_both(struct wg_queue *, struct wg_queue *, struct wg_packet *);
 static struct wg_packet *wg_queue_dequeue_serial(struct wg_queue *);
 static struct wg_packet *wg_queue_dequeue_parallel(struct wg_queue *);
 static bool wg_input(struct mbuf *, int, struct inpcb *, const struct sockaddr *, void *);
 static void wg_peer_send_staged(struct wg_peer *);
 static int wg_clone_create(struct if_clone *ifc, char *name, size_t len,
 	struct ifc_data *ifd, if_t *ifpp);
 static void wg_qflush(if_t);
 static inline int determine_af_and_pullup(struct mbuf **m, sa_family_t *af);
 static int wg_xmit(if_t, struct mbuf *, sa_family_t, uint32_t);
 static int wg_transmit(if_t, struct mbuf *);
 static int wg_output(if_t, struct mbuf *, const struct sockaddr *, struct route *);
 static int wg_clone_destroy(struct if_clone *ifc, if_t ifp,
 	uint32_t flags);
 static bool wgc_privileged(struct wg_softc *);
 static int wgc_get(struct wg_softc *, struct wg_data_io *);
 static int wgc_set(struct wg_softc *, struct wg_data_io *);
 static int wg_up(struct wg_softc *);
 static void wg_down(struct wg_softc *);
 static void wg_reassign(if_t, struct vnet *, char *unused);
 static void wg_init(void *);
 static int wg_ioctl(if_t, u_long, caddr_t);
 static void vnet_wg_init(const void *);
 static void vnet_wg_uninit(const void *);
 static int wg_module_init(void);
 static void wg_module_deinit(void);
 
 /* TODO Peer */
 static struct wg_peer *
 wg_peer_alloc(struct wg_softc *sc, const uint8_t pub_key[WG_KEY_SIZE])
 {
 	struct wg_peer *peer;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	peer = malloc(sizeof(*peer), M_WG, M_WAITOK | M_ZERO);
 	peer->p_remote = noise_remote_alloc(sc->sc_local, peer, pub_key);
 	peer->p_tx_bytes = counter_u64_alloc(M_WAITOK);
 	peer->p_rx_bytes = counter_u64_alloc(M_WAITOK);
 	peer->p_id = peer_counter++;
 	peer->p_sc = sc;
 
 	cookie_maker_init(&peer->p_cookie, pub_key);
 
 	rw_init(&peer->p_endpoint_lock, "wg_peer_endpoint");
 
 	wg_queue_init(&peer->p_stage_queue, "stageq");
 	wg_queue_init(&peer->p_encrypt_serial, "txq");
 	wg_queue_init(&peer->p_decrypt_serial, "rxq");
 
 	peer->p_enabled = false;
 	peer->p_need_another_keepalive = false;
 	peer->p_persistent_keepalive_interval = 0;
 	callout_init(&peer->p_new_handshake, true);
 	callout_init(&peer->p_send_keepalive, true);
 	callout_init(&peer->p_retry_handshake, true);
 	callout_init(&peer->p_persistent_keepalive, true);
 	callout_init(&peer->p_zero_key_material, true);
 
 	mtx_init(&peer->p_handshake_mtx, "peer handshake", NULL, MTX_DEF);
 	bzero(&peer->p_handshake_complete, sizeof(peer->p_handshake_complete));
 	peer->p_handshake_retries = 0;
 
 	GROUPTASK_INIT(&peer->p_send, 0, (gtask_fn_t *)wg_deliver_out, peer);
 	taskqgroup_attach(qgroup_wg_tqg, &peer->p_send, peer, NULL, NULL, "wg send");
 	GROUPTASK_INIT(&peer->p_recv, 0, (gtask_fn_t *)wg_deliver_in, peer);
 	taskqgroup_attach(qgroup_wg_tqg, &peer->p_recv, peer, NULL, NULL, "wg recv");
 
 	LIST_INIT(&peer->p_aips);
 	peer->p_aips_num = 0;
 
 	return (peer);
 }
 
 static void
 wg_peer_free_deferred(struct noise_remote *r)
 {
 	struct wg_peer *peer = noise_remote_arg(r);
 
 	/* While there are no references remaining, we may still have
 	 * p_{send,recv} executing (think empty queue, but wg_deliver_{in,out}
 	 * needs to check the queue. We should wait for them and then free. */
 	GROUPTASK_DRAIN(&peer->p_recv);
 	GROUPTASK_DRAIN(&peer->p_send);
 	taskqgroup_detach(qgroup_wg_tqg, &peer->p_recv);
 	taskqgroup_detach(qgroup_wg_tqg, &peer->p_send);
 
 	wg_queue_deinit(&peer->p_decrypt_serial);
 	wg_queue_deinit(&peer->p_encrypt_serial);
 	wg_queue_deinit(&peer->p_stage_queue);
 
 	counter_u64_free(peer->p_tx_bytes);
 	counter_u64_free(peer->p_rx_bytes);
 	rw_destroy(&peer->p_endpoint_lock);
 	mtx_destroy(&peer->p_handshake_mtx);
 
 	cookie_maker_free(&peer->p_cookie);
 
 	free(peer, M_WG);
 }
 
 static void
 wg_peer_destroy(struct wg_peer *peer)
 {
 	struct wg_softc *sc = peer->p_sc;
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	/* Disable remote and timers. This will prevent any new handshakes
 	 * occuring. */
 	noise_remote_disable(peer->p_remote);
 	wg_timers_disable(peer);
 
 	/* Now we can remove all allowed IPs so no more packets will be routed
 	 * to the peer. */
 	wg_aip_remove_all(sc, peer);
 
 	/* Remove peer from the interface, then free. Some references may still
 	 * exist to p_remote, so noise_remote_free will wait until they're all
 	 * put to call wg_peer_free_deferred. */
 	sc->sc_peers_num--;
 	TAILQ_REMOVE(&sc->sc_peers, peer, p_entry);
 	DPRINTF(sc, "Peer %" PRIu64 " destroyed\n", peer->p_id);
 	noise_remote_free(peer->p_remote, wg_peer_free_deferred);
 }
 
 static void
 wg_peer_destroy_all(struct wg_softc *sc)
 {
 	struct wg_peer *peer, *tpeer;
 	TAILQ_FOREACH_SAFE(peer, &sc->sc_peers, p_entry, tpeer)
 		wg_peer_destroy(peer);
 }
 
 static void
 wg_peer_set_endpoint(struct wg_peer *peer, struct wg_endpoint *e)
 {
 	MPASS(e->e_remote.r_sa.sa_family != 0);
 	if (memcmp(e, &peer->p_endpoint, sizeof(*e)) == 0)
 		return;
 
 	rw_wlock(&peer->p_endpoint_lock);
 	peer->p_endpoint = *e;
 	rw_wunlock(&peer->p_endpoint_lock);
 }
 
 static void
 wg_peer_clear_src(struct wg_peer *peer)
 {
 	rw_wlock(&peer->p_endpoint_lock);
 	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
 	rw_wunlock(&peer->p_endpoint_lock);
 }
 
 static void
 wg_peer_get_endpoint(struct wg_peer *peer, struct wg_endpoint *e)
 {
 	rw_rlock(&peer->p_endpoint_lock);
 	*e = peer->p_endpoint;
 	rw_runlock(&peer->p_endpoint_lock);
 }
 
 /* Allowed IP */
 static int
 wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, sa_family_t af, const void *addr, uint8_t cidr)
 {
 	struct radix_node_head	*root;
 	struct radix_node	*node;
 	struct wg_aip		*aip;
 	int			 ret = 0;
 
 	aip = malloc(sizeof(*aip), M_WG, M_WAITOK | M_ZERO);
 	aip->a_peer = peer;
 	aip->a_af = af;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		if (cidr > 32) cidr = 32;
 		root = sc->sc_aip4;
 		aip->a_addr.in = *(const struct in_addr *)addr;
 		aip->a_mask.ip = htonl(~((1LL << (32 - cidr)) - 1) & 0xffffffff);
 		aip->a_addr.ip &= aip->a_mask.ip;
 		aip->a_addr.length = aip->a_mask.length = offsetof(struct aip_addr, in) + sizeof(struct in_addr);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if (cidr > 128) cidr = 128;
 		root = sc->sc_aip6;
 		aip->a_addr.in6 = *(const struct in6_addr *)addr;
 		in6_prefixlen2mask(&aip->a_mask.in6, cidr);
 		for (int i = 0; i < 4; i++)
 			aip->a_addr.ip6[i] &= aip->a_mask.ip6[i];
 		aip->a_addr.length = aip->a_mask.length = offsetof(struct aip_addr, in6) + sizeof(struct in6_addr);
 		break;
 #endif
 	default:
 		free(aip, M_WG);
 		return (EAFNOSUPPORT);
 	}
 
 	RADIX_NODE_HEAD_LOCK(root);
 	node = root->rnh_addaddr(&aip->a_addr, &aip->a_mask, &root->rh, aip->a_nodes);
 	if (node == aip->a_nodes) {
 		LIST_INSERT_HEAD(&peer->p_aips, aip, a_entry);
 		peer->p_aips_num++;
 	} else if (!node)
 		node = root->rnh_lookup(&aip->a_addr, &aip->a_mask, &root->rh);
 	if (!node) {
 		free(aip, M_WG);
 		ret = ENOMEM;
 	} else if (node != aip->a_nodes) {
 		free(aip, M_WG);
 		aip = (struct wg_aip *)node;
 		if (aip->a_peer != peer) {
 			LIST_REMOVE(aip, a_entry);
 			aip->a_peer->p_aips_num--;
 			aip->a_peer = peer;
 			LIST_INSERT_HEAD(&peer->p_aips, aip, a_entry);
 			aip->a_peer->p_aips_num++;
 		}
 	}
 	RADIX_NODE_HEAD_UNLOCK(root);
 	return (ret);
 }
 
 static struct wg_peer *
 wg_aip_lookup(struct wg_softc *sc, sa_family_t af, void *a)
 {
 	struct radix_node_head	*root;
 	struct radix_node	*node;
 	struct wg_peer		*peer;
 	struct aip_addr		 addr;
 	RADIX_NODE_HEAD_RLOCK_TRACKER;
 
 	switch (af) {
 	case AF_INET:
 		root = sc->sc_aip4;
 		memcpy(&addr.in, a, sizeof(addr.in));
 		addr.length = offsetof(struct aip_addr, in) + sizeof(struct in_addr);
 		break;
 	case AF_INET6:
 		root = sc->sc_aip6;
 		memcpy(&addr.in6, a, sizeof(addr.in6));
 		addr.length = offsetof(struct aip_addr, in6) + sizeof(struct in6_addr);
 		break;
 	default:
 		return NULL;
 	}
 
 	RADIX_NODE_HEAD_RLOCK(root);
 	node = root->rnh_matchaddr(&addr, &root->rh);
 	if (node != NULL) {
 		peer = ((struct wg_aip *)node)->a_peer;
 		noise_remote_ref(peer->p_remote);
 	} else {
 		peer = NULL;
 	}
 	RADIX_NODE_HEAD_RUNLOCK(root);
 
 	return (peer);
 }
 
 static void
 wg_aip_remove_all(struct wg_softc *sc, struct wg_peer *peer)
 {
 	struct wg_aip		*aip, *taip;
 
 	RADIX_NODE_HEAD_LOCK(sc->sc_aip4);
 	LIST_FOREACH_SAFE(aip, &peer->p_aips, a_entry, taip) {
 		if (aip->a_af == AF_INET) {
 			if (sc->sc_aip4->rnh_deladdr(&aip->a_addr, &aip->a_mask, &sc->sc_aip4->rh) == NULL)
 				panic("failed to delete aip %p", aip);
 			LIST_REMOVE(aip, a_entry);
 			peer->p_aips_num--;
 			free(aip, M_WG);
 		}
 	}
 	RADIX_NODE_HEAD_UNLOCK(sc->sc_aip4);
 
 	RADIX_NODE_HEAD_LOCK(sc->sc_aip6);
 	LIST_FOREACH_SAFE(aip, &peer->p_aips, a_entry, taip) {
 		if (aip->a_af == AF_INET6) {
 			if (sc->sc_aip6->rnh_deladdr(&aip->a_addr, &aip->a_mask, &sc->sc_aip6->rh) == NULL)
 				panic("failed to delete aip %p", aip);
 			LIST_REMOVE(aip, a_entry);
 			peer->p_aips_num--;
 			free(aip, M_WG);
 		}
 	}
 	RADIX_NODE_HEAD_UNLOCK(sc->sc_aip6);
 
 	if (!LIST_EMPTY(&peer->p_aips) || peer->p_aips_num != 0)
 		panic("wg_aip_remove_all could not delete all %p", peer);
 }
 
 static int
 wg_socket_init(struct wg_softc *sc, in_port_t port)
 {
 	struct ucred *cred = sc->sc_ucred;
 	struct socket *so4 = NULL, *so6 = NULL;
 	int rc;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if (!cred)
 		return (EBUSY);
 
 	/*
 	 * For socket creation, we use the creds of the thread that created the
 	 * tunnel rather than the current thread to maintain the semantics that
 	 * WireGuard has on Linux with network namespaces -- that the sockets
 	 * are created in their home vnet so that they can be configured and
 	 * functionally attached to a foreign vnet as the jail's only interface
 	 * to the network.
 	 */
 #ifdef INET
 	rc = socreate(AF_INET, &so4, SOCK_DGRAM, IPPROTO_UDP, cred, curthread);
 	if (rc)
 		goto out;
 
 	rc = udp_set_kernel_tunneling(so4, wg_input, NULL, sc);
 	/*
 	 * udp_set_kernel_tunneling can only fail if there is already a tunneling function set.
 	 * This should never happen with a new socket.
 	 */
 	MPASS(rc == 0);
 #endif
 
 #ifdef INET6
 	rc = socreate(AF_INET6, &so6, SOCK_DGRAM, IPPROTO_UDP, cred, curthread);
 	if (rc)
 		goto out;
 	rc = udp_set_kernel_tunneling(so6, wg_input, NULL, sc);
 	MPASS(rc == 0);
 #endif
 
 	if (sc->sc_socket.so_user_cookie) {
 		rc = wg_socket_set_sockopt(so4, so6, SO_USER_COOKIE, &sc->sc_socket.so_user_cookie, sizeof(sc->sc_socket.so_user_cookie));
 		if (rc)
 			goto out;
 	}
 	rc = wg_socket_set_sockopt(so4, so6, SO_SETFIB, &sc->sc_socket.so_fibnum, sizeof(sc->sc_socket.so_fibnum));
 	if (rc)
 		goto out;
 
 	rc = wg_socket_bind(&so4, &so6, &port);
 	if (!rc) {
 		sc->sc_socket.so_port = port;
 		wg_socket_set(sc, so4, so6);
 	}
 out:
 	if (rc) {
 		if (so4 != NULL)
 			soclose(so4);
 		if (so6 != NULL)
 			soclose(so6);
 	}
 	return (rc);
 }
 
 static int wg_socket_set_sockopt(struct socket *so4, struct socket *so6, int name, void *val, size_t len)
 {
 	int ret4 = 0, ret6 = 0;
 	struct sockopt sopt = {
 		.sopt_dir = SOPT_SET,
 		.sopt_level = SOL_SOCKET,
 		.sopt_name = name,
 		.sopt_val = val,
 		.sopt_valsize = len
 	};
 
 	if (so4)
 		ret4 = sosetopt(so4, &sopt);
 	if (so6)
 		ret6 = sosetopt(so6, &sopt);
 	return (ret4 ?: ret6);
 }
 
 static int wg_socket_set_cookie(struct wg_softc *sc, uint32_t user_cookie)
 {
 	struct wg_socket *so = &sc->sc_socket;
 	int ret;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 	ret = wg_socket_set_sockopt(so->so_so4, so->so_so6, SO_USER_COOKIE, &user_cookie, sizeof(user_cookie));
 	if (!ret)
 		so->so_user_cookie = user_cookie;
 	return (ret);
 }
 
 static int wg_socket_set_fibnum(struct wg_softc *sc, int fibnum)
 {
 	struct wg_socket *so = &sc->sc_socket;
 	int ret;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	ret = wg_socket_set_sockopt(so->so_so4, so->so_so6, SO_SETFIB, &fibnum, sizeof(fibnum));
 	if (!ret)
 		so->so_fibnum = fibnum;
 	return (ret);
 }
 
 static void
 wg_socket_uninit(struct wg_softc *sc)
 {
 	wg_socket_set(sc, NULL, NULL);
 }
 
 static void
 wg_socket_set(struct wg_softc *sc, struct socket *new_so4, struct socket *new_so6)
 {
 	struct wg_socket *so = &sc->sc_socket;
 	struct socket *so4, *so6;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	so4 = atomic_load_ptr(&so->so_so4);
 	so6 = atomic_load_ptr(&so->so_so6);
 	atomic_store_ptr(&so->so_so4, new_so4);
 	atomic_store_ptr(&so->so_so6, new_so6);
 
 	if (!so4 && !so6)
 		return;
 	NET_EPOCH_WAIT();
 	if (so4)
 		soclose(so4);
 	if (so6)
 		soclose(so6);
 }
 
 static int
 wg_socket_bind(struct socket **in_so4, struct socket **in_so6, in_port_t *requested_port)
 {
 	struct socket *so4 = *in_so4, *so6 = *in_so6;
 	int ret4 = 0, ret6 = 0;
 	in_port_t port = *requested_port;
 	struct sockaddr_in sin = {
 		.sin_len = sizeof(struct sockaddr_in),
 		.sin_family = AF_INET,
 		.sin_port = htons(port)
 	};
 	struct sockaddr_in6 sin6 = {
 		.sin6_len = sizeof(struct sockaddr_in6),
 		.sin6_family = AF_INET6,
 		.sin6_port = htons(port)
 	};
 
 	if (so4) {
 		ret4 = sobind(so4, (struct sockaddr *)&sin, curthread);
 		if (ret4 && ret4 != EADDRNOTAVAIL)
 			return (ret4);
 		if (!ret4 && !sin.sin_port) {
 			struct sockaddr_in *bound_sin;
 			int ret = so4->so_proto->pr_sockaddr(so4,
 			    (struct sockaddr **)&bound_sin);
 			if (ret)
 				return (ret);
 			port = ntohs(bound_sin->sin_port);
 			sin6.sin6_port = bound_sin->sin_port;
 			free(bound_sin, M_SONAME);
 		}
 	}
 
 	if (so6) {
 		ret6 = sobind(so6, (struct sockaddr *)&sin6, curthread);
 		if (ret6 && ret6 != EADDRNOTAVAIL)
 			return (ret6);
 		if (!ret6 && !sin6.sin6_port) {
 			struct sockaddr_in6 *bound_sin6;
 			int ret = so6->so_proto->pr_sockaddr(so6,
 			    (struct sockaddr **)&bound_sin6);
 			if (ret)
 				return (ret);
 			port = ntohs(bound_sin6->sin6_port);
 			free(bound_sin6, M_SONAME);
 		}
 	}
 
 	if (ret4 && ret6)
 		return (ret4);
 	*requested_port = port;
 	if (ret4 && !ret6 && so4) {
 		soclose(so4);
 		*in_so4 = NULL;
 	} else if (ret6 && !ret4 && so6) {
 		soclose(so6);
 		*in_so6 = NULL;
 	}
 	return (0);
 }
 
 static int
 wg_send(struct wg_softc *sc, struct wg_endpoint *e, struct mbuf *m)
 {
 	struct epoch_tracker et;
 	struct sockaddr *sa;
 	struct wg_socket *so = &sc->sc_socket;
 	struct socket *so4, *so6;
 	struct mbuf *control = NULL;
 	int ret = 0;
 	size_t len = m->m_pkthdr.len;
 
 	/* Get local control address before locking */
 	if (e->e_remote.r_sa.sa_family == AF_INET) {
 		if (e->e_local.l_in.s_addr != INADDR_ANY)
 			control = sbcreatecontrol((caddr_t)&e->e_local.l_in,
 			    sizeof(struct in_addr), IP_SENDSRCADDR,
 			    IPPROTO_IP, M_NOWAIT);
 #ifdef INET6
 	} else if (e->e_remote.r_sa.sa_family == AF_INET6) {
 		if (!IN6_IS_ADDR_UNSPECIFIED(&e->e_local.l_in6))
 			control = sbcreatecontrol((caddr_t)&e->e_local.l_pktinfo6,
 			    sizeof(struct in6_pktinfo), IPV6_PKTINFO,
 			    IPPROTO_IPV6, M_NOWAIT);
 #endif
 	} else {
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 
 	/* Get remote address */
 	sa = &e->e_remote.r_sa;
 
 	NET_EPOCH_ENTER(et);
 	so4 = atomic_load_ptr(&so->so_so4);
 	so6 = atomic_load_ptr(&so->so_so6);
 	if (e->e_remote.r_sa.sa_family == AF_INET && so4 != NULL)
 		ret = sosend(so4, sa, NULL, m, control, 0, curthread);
 	else if (e->e_remote.r_sa.sa_family == AF_INET6 && so6 != NULL)
 		ret = sosend(so6, sa, NULL, m, control, 0, curthread);
 	else {
 		ret = ENOTCONN;
 		m_freem(control);
 		m_freem(m);
 	}
 	NET_EPOCH_EXIT(et);
 	if (ret == 0) {
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, len);
 	}
 	return (ret);
 }
 
 static void
 wg_send_buf(struct wg_softc *sc, struct wg_endpoint *e, uint8_t *buf, size_t len)
 {
 	struct mbuf	*m;
 	int		 ret = 0;
 	bool		 retried = false;
 
 retry:
 	m = m_get2(len, M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (!m) {
 		ret = ENOMEM;
 		goto out;
 	}
 	m_copyback(m, 0, len, buf);
 
 	if (ret == 0) {
 		ret = wg_send(sc, e, m);
 		/* Retry if we couldn't bind to e->e_local */
 		if (ret == EADDRNOTAVAIL && !retried) {
 			bzero(&e->e_local, sizeof(e->e_local));
 			retried = true;
 			goto retry;
 		}
 	} else {
 		ret = wg_send(sc, e, m);
 	}
 out:
 	if (ret)
 		DPRINTF(sc, "Unable to send packet: %d\n", ret);
 }
 
 /* Timers */
 static void
 wg_timers_enable(struct wg_peer *peer)
 {
 	atomic_store_bool(&peer->p_enabled, true);
 	wg_timers_run_persistent_keepalive(peer);
 }
 
 static void
 wg_timers_disable(struct wg_peer *peer)
 {
 	/* By setting p_enabled = false, then calling NET_EPOCH_WAIT, we can be
 	 * sure no new handshakes are created after the wait. This is because
 	 * all callout_resets (scheduling the callout) are guarded by
 	 * p_enabled. We can be sure all sections that read p_enabled and then
 	 * optionally call callout_reset are finished as they are surrounded by
 	 * NET_EPOCH_{ENTER,EXIT}.
 	 *
 	 * However, as new callouts may be scheduled during NET_EPOCH_WAIT (but
 	 * not after), we stop all callouts leaving no callouts active.
 	 *
 	 * We should also pull NET_EPOCH_WAIT out of the FOREACH(peer) loops, but the
 	 * performance impact is acceptable for the time being. */
 	atomic_store_bool(&peer->p_enabled, false);
 	NET_EPOCH_WAIT();
 	atomic_store_bool(&peer->p_need_another_keepalive, false);
 
 	callout_stop(&peer->p_new_handshake);
 	callout_stop(&peer->p_send_keepalive);
 	callout_stop(&peer->p_retry_handshake);
 	callout_stop(&peer->p_persistent_keepalive);
 	callout_stop(&peer->p_zero_key_material);
 }
 
 static void
 wg_timers_set_persistent_keepalive(struct wg_peer *peer, uint16_t interval)
 {
 	struct epoch_tracker et;
 	if (interval != peer->p_persistent_keepalive_interval) {
 		atomic_store_16(&peer->p_persistent_keepalive_interval, interval);
 		NET_EPOCH_ENTER(et);
 		if (atomic_load_bool(&peer->p_enabled))
 			wg_timers_run_persistent_keepalive(peer);
 		NET_EPOCH_EXIT(et);
 	}
 }
 
 static void
 wg_timers_get_last_handshake(struct wg_peer *peer, struct wg_timespec64 *time)
 {
 	mtx_lock(&peer->p_handshake_mtx);
 	time->tv_sec = peer->p_handshake_complete.tv_sec;
 	time->tv_nsec = peer->p_handshake_complete.tv_nsec;
 	mtx_unlock(&peer->p_handshake_mtx);
 }
 
 static void
 wg_timers_event_data_sent(struct wg_peer *peer)
 {
 	struct epoch_tracker et;
 	NET_EPOCH_ENTER(et);
 	if (atomic_load_bool(&peer->p_enabled) &&
 	    !callout_pending(&peer->p_new_handshake))
 		callout_reset(&peer->p_new_handshake, MSEC_2_TICKS(
 		    NEW_HANDSHAKE_TIMEOUT * 1000 +
 		    arc4random_uniform(REKEY_TIMEOUT_JITTER)),
 		    wg_timers_run_new_handshake, peer);
 	NET_EPOCH_EXIT(et);
 }
 
 static void
 wg_timers_event_data_received(struct wg_peer *peer)
 {
 	struct epoch_tracker et;
 	NET_EPOCH_ENTER(et);
 	if (atomic_load_bool(&peer->p_enabled)) {
 		if (!callout_pending(&peer->p_send_keepalive))
 			callout_reset(&peer->p_send_keepalive,
 			    MSEC_2_TICKS(KEEPALIVE_TIMEOUT * 1000),
 			    wg_timers_run_send_keepalive, peer);
 		else
 			atomic_store_bool(&peer->p_need_another_keepalive,
 			    true);
 	}
 	NET_EPOCH_EXIT(et);
 }
 
 static void
 wg_timers_event_any_authenticated_packet_sent(struct wg_peer *peer)
 {
 	callout_stop(&peer->p_send_keepalive);
 }
 
 static void
 wg_timers_event_any_authenticated_packet_received(struct wg_peer *peer)
 {
 	callout_stop(&peer->p_new_handshake);
 }
 
 static void
 wg_timers_event_any_authenticated_packet_traversal(struct wg_peer *peer)
 {
 	struct epoch_tracker et;
 	uint16_t interval;
 	NET_EPOCH_ENTER(et);
 	interval = atomic_load_16(&peer->p_persistent_keepalive_interval);
 	if (atomic_load_bool(&peer->p_enabled) && interval > 0)
 		callout_reset(&peer->p_persistent_keepalive,
 		     MSEC_2_TICKS(interval * 1000),
 		     wg_timers_run_persistent_keepalive, peer);
 	NET_EPOCH_EXIT(et);
 }
 
 static void
 wg_timers_event_handshake_initiated(struct wg_peer *peer)
 {
 	struct epoch_tracker et;
 	NET_EPOCH_ENTER(et);
 	if (atomic_load_bool(&peer->p_enabled))
 		callout_reset(&peer->p_retry_handshake, MSEC_2_TICKS(
 		    REKEY_TIMEOUT * 1000 +
 		    arc4random_uniform(REKEY_TIMEOUT_JITTER)),
 		    wg_timers_run_retry_handshake, peer);
 	NET_EPOCH_EXIT(et);
 }
 
 static void
 wg_timers_event_handshake_complete(struct wg_peer *peer)
 {
 	struct epoch_tracker et;
 	NET_EPOCH_ENTER(et);
 	if (atomic_load_bool(&peer->p_enabled)) {
 		mtx_lock(&peer->p_handshake_mtx);
 		callout_stop(&peer->p_retry_handshake);
 		peer->p_handshake_retries = 0;
 		getnanotime(&peer->p_handshake_complete);
 		mtx_unlock(&peer->p_handshake_mtx);
 		wg_timers_run_send_keepalive(peer);
 	}
 	NET_EPOCH_EXIT(et);
 }
 
 static void
 wg_timers_event_session_derived(struct wg_peer *peer)
 {
 	struct epoch_tracker et;
 	NET_EPOCH_ENTER(et);
 	if (atomic_load_bool(&peer->p_enabled))
 		callout_reset(&peer->p_zero_key_material,
 		    MSEC_2_TICKS(REJECT_AFTER_TIME * 3 * 1000),
 		    wg_timers_run_zero_key_material, peer);
 	NET_EPOCH_EXIT(et);
 }
 
 static void
 wg_timers_event_want_initiation(struct wg_peer *peer)
 {
 	struct epoch_tracker et;
 	NET_EPOCH_ENTER(et);
 	if (atomic_load_bool(&peer->p_enabled))
 		wg_timers_run_send_initiation(peer, false);
 	NET_EPOCH_EXIT(et);
 }
 
 static void
 wg_timers_run_send_initiation(struct wg_peer *peer, bool is_retry)
 {
 	if (!is_retry)
 		peer->p_handshake_retries = 0;
 	if (noise_remote_initiation_expired(peer->p_remote) == ETIMEDOUT)
 		wg_send_initiation(peer);
 }
 
 static void
 wg_timers_run_retry_handshake(void *_peer)
 {
 	struct epoch_tracker et;
 	struct wg_peer *peer = _peer;
 
 	mtx_lock(&peer->p_handshake_mtx);
 	if (peer->p_handshake_retries <= MAX_TIMER_HANDSHAKES) {
 		peer->p_handshake_retries++;
 		mtx_unlock(&peer->p_handshake_mtx);
 
 		DPRINTF(peer->p_sc, "Handshake for peer %" PRIu64 " did not complete "
 		    "after %d seconds, retrying (try %d)\n", peer->p_id,
 		    REKEY_TIMEOUT, peer->p_handshake_retries + 1);
 		wg_peer_clear_src(peer);
 		wg_timers_run_send_initiation(peer, true);
 	} else {
 		mtx_unlock(&peer->p_handshake_mtx);
 
 		DPRINTF(peer->p_sc, "Handshake for peer %" PRIu64 " did not complete "
 		    "after %d retries, giving up\n", peer->p_id,
 		    MAX_TIMER_HANDSHAKES + 2);
 
 		callout_stop(&peer->p_send_keepalive);
 		wg_queue_purge(&peer->p_stage_queue);
 		NET_EPOCH_ENTER(et);
 		if (atomic_load_bool(&peer->p_enabled) &&
 		    !callout_pending(&peer->p_zero_key_material))
 			callout_reset(&peer->p_zero_key_material,
 			    MSEC_2_TICKS(REJECT_AFTER_TIME * 3 * 1000),
 			    wg_timers_run_zero_key_material, peer);
 		NET_EPOCH_EXIT(et);
 	}
 }
 
 static void
 wg_timers_run_send_keepalive(void *_peer)
 {
 	struct epoch_tracker et;
 	struct wg_peer *peer = _peer;
 
 	wg_send_keepalive(peer);
 	NET_EPOCH_ENTER(et);
 	if (atomic_load_bool(&peer->p_enabled) &&
 	    atomic_load_bool(&peer->p_need_another_keepalive)) {
 		atomic_store_bool(&peer->p_need_another_keepalive, false);
 		callout_reset(&peer->p_send_keepalive,
 		    MSEC_2_TICKS(KEEPALIVE_TIMEOUT * 1000),
 		    wg_timers_run_send_keepalive, peer);
 	}
 	NET_EPOCH_EXIT(et);
 }
 
 static void
 wg_timers_run_new_handshake(void *_peer)
 {
 	struct wg_peer *peer = _peer;
 
 	DPRINTF(peer->p_sc, "Retrying handshake with peer %" PRIu64 " because we "
 	    "stopped hearing back after %d seconds\n",
 	    peer->p_id, NEW_HANDSHAKE_TIMEOUT);
 
 	wg_peer_clear_src(peer);
 	wg_timers_run_send_initiation(peer, false);
 }
 
 static void
 wg_timers_run_zero_key_material(void *_peer)
 {
 	struct wg_peer *peer = _peer;
 
 	DPRINTF(peer->p_sc, "Zeroing out keys for peer %" PRIu64 ", since we "
 	    "haven't received a new one in %d seconds\n",
 	    peer->p_id, REJECT_AFTER_TIME * 3);
 	noise_remote_keypairs_clear(peer->p_remote);
 }
 
 static void
 wg_timers_run_persistent_keepalive(void *_peer)
 {
 	struct wg_peer *peer = _peer;
 
 	if (atomic_load_16(&peer->p_persistent_keepalive_interval) > 0)
 		wg_send_keepalive(peer);
 }
 
 /* TODO Handshake */
 static void
 wg_peer_send_buf(struct wg_peer *peer, uint8_t *buf, size_t len)
 {
 	struct wg_endpoint endpoint;
 
 	counter_u64_add(peer->p_tx_bytes, len);
 	wg_timers_event_any_authenticated_packet_traversal(peer);
 	wg_timers_event_any_authenticated_packet_sent(peer);
 	wg_peer_get_endpoint(peer, &endpoint);
 	wg_send_buf(peer->p_sc, &endpoint, buf, len);
 }
 
 static void
 wg_send_initiation(struct wg_peer *peer)
 {
 	struct wg_pkt_initiation pkt;
 
 	if (noise_create_initiation(peer->p_remote, &pkt.s_idx, pkt.ue,
 	    pkt.es, pkt.ets) != 0)
 		return;
 
 	DPRINTF(peer->p_sc, "Sending handshake initiation to peer %" PRIu64 "\n", peer->p_id);
 
 	pkt.t = WG_PKT_INITIATION;
 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
 	    sizeof(pkt) - sizeof(pkt.m));
 	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
 	wg_timers_event_handshake_initiated(peer);
 }
 
 static void
 wg_send_response(struct wg_peer *peer)
 {
 	struct wg_pkt_response pkt;
 
 	if (noise_create_response(peer->p_remote, &pkt.s_idx, &pkt.r_idx,
 	    pkt.ue, pkt.en) != 0)
 		return;
 
 	DPRINTF(peer->p_sc, "Sending handshake response to peer %" PRIu64 "\n", peer->p_id);
 
 	wg_timers_event_session_derived(peer);
 	pkt.t = WG_PKT_RESPONSE;
 	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
 	     sizeof(pkt)-sizeof(pkt.m));
 	wg_peer_send_buf(peer, (uint8_t*)&pkt, sizeof(pkt));
 }
 
 static void
 wg_send_cookie(struct wg_softc *sc, struct cookie_macs *cm, uint32_t idx,
     struct wg_endpoint *e)
 {
 	struct wg_pkt_cookie	pkt;
 
 	DPRINTF(sc, "Sending cookie response for denied handshake message\n");
 
 	pkt.t = WG_PKT_COOKIE;
 	pkt.r_idx = idx;
 
 	cookie_checker_create_payload(&sc->sc_cookie, cm, pkt.nonce,
 	    pkt.ec, &e->e_remote.r_sa);
 	wg_send_buf(sc, e, (uint8_t *)&pkt, sizeof(pkt));
 }
 
 static void
 wg_send_keepalive(struct wg_peer *peer)
 {
 	struct wg_packet *pkt;
 	struct mbuf *m;
 
 	if (wg_queue_len(&peer->p_stage_queue) > 0)
 		goto send;
 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
 		return;
 	if ((pkt = wg_packet_alloc(m)) == NULL) {
 		m_freem(m);
 		return;
 	}
 	wg_queue_push_staged(&peer->p_stage_queue, pkt);
 	DPRINTF(peer->p_sc, "Sending keepalive packet to peer %" PRIu64 "\n", peer->p_id);
 send:
 	wg_peer_send_staged(peer);
 }
 
 static void
 wg_handshake(struct wg_softc *sc, struct wg_packet *pkt)
 {
 	struct wg_pkt_initiation	*init;
 	struct wg_pkt_response		*resp;
 	struct wg_pkt_cookie		*cook;
 	struct wg_endpoint		*e;
 	struct wg_peer			*peer;
 	struct mbuf			*m;
 	struct noise_remote		*remote = NULL;
 	int				 res;
 	bool				 underload = false;
 	static sbintime_t		 wg_last_underload; /* sbinuptime */
 
 	underload = wg_queue_len(&sc->sc_handshake_queue) >= MAX_QUEUED_HANDSHAKES / 8;
 	if (underload) {
 		wg_last_underload = getsbinuptime();
 	} else if (wg_last_underload) {
 		underload = wg_last_underload + UNDERLOAD_TIMEOUT * SBT_1S > getsbinuptime();
 		if (!underload)
 			wg_last_underload = 0;
 	}
 
 	m = pkt->p_mbuf;
 	e = &pkt->p_endpoint;
 
 	if ((pkt->p_mbuf = m = m_pullup(m, m->m_pkthdr.len)) == NULL)
 		goto error;
 
 	switch (*mtod(m, uint32_t *)) {
 	case WG_PKT_INITIATION:
 		init = mtod(m, struct wg_pkt_initiation *);
 
 		res = cookie_checker_validate_macs(&sc->sc_cookie, &init->m,
 				init, sizeof(*init) - sizeof(init->m),
 				underload, &e->e_remote.r_sa,
 				if_getvnet(sc->sc_ifp));
 
 		if (res == EINVAL) {
 			DPRINTF(sc, "Invalid initiation MAC\n");
 			goto error;
 		} else if (res == ECONNREFUSED) {
 			DPRINTF(sc, "Handshake ratelimited\n");
 			goto error;
 		} else if (res == EAGAIN) {
 			wg_send_cookie(sc, &init->m, init->s_idx, e);
 			goto error;
 		} else if (res != 0) {
 			panic("unexpected response: %d\n", res);
 		}
 
 		if (noise_consume_initiation(sc->sc_local, &remote,
 		    init->s_idx, init->ue, init->es, init->ets) != 0) {
 			DPRINTF(sc, "Invalid handshake initiation\n");
 			goto error;
 		}
 
 		peer = noise_remote_arg(remote);
 
 		DPRINTF(sc, "Receiving handshake initiation from peer %" PRIu64 "\n", peer->p_id);
 
 		wg_peer_set_endpoint(peer, e);
 		wg_send_response(peer);
 		break;
 	case WG_PKT_RESPONSE:
 		resp = mtod(m, struct wg_pkt_response *);
 
 		res = cookie_checker_validate_macs(&sc->sc_cookie, &resp->m,
 				resp, sizeof(*resp) - sizeof(resp->m),
 				underload, &e->e_remote.r_sa,
 				if_getvnet(sc->sc_ifp));
 
 		if (res == EINVAL) {
 			DPRINTF(sc, "Invalid response MAC\n");
 			goto error;
 		} else if (res == ECONNREFUSED) {
 			DPRINTF(sc, "Handshake ratelimited\n");
 			goto error;
 		} else if (res == EAGAIN) {
 			wg_send_cookie(sc, &resp->m, resp->s_idx, e);
 			goto error;
 		} else if (res != 0) {
 			panic("unexpected response: %d\n", res);
 		}
 
 		if (noise_consume_response(sc->sc_local, &remote,
 		    resp->s_idx, resp->r_idx, resp->ue, resp->en) != 0) {
 			DPRINTF(sc, "Invalid handshake response\n");
 			goto error;
 		}
 
 		peer = noise_remote_arg(remote);
 		DPRINTF(sc, "Receiving handshake response from peer %" PRIu64 "\n", peer->p_id);
 
 		wg_peer_set_endpoint(peer, e);
 		wg_timers_event_session_derived(peer);
 		wg_timers_event_handshake_complete(peer);
 		break;
 	case WG_PKT_COOKIE:
 		cook = mtod(m, struct wg_pkt_cookie *);
 
 		if ((remote = noise_remote_index(sc->sc_local, cook->r_idx)) == NULL) {
 			DPRINTF(sc, "Unknown cookie index\n");
 			goto error;
 		}
 
 		peer = noise_remote_arg(remote);
 
 		if (cookie_maker_consume_payload(&peer->p_cookie,
 		    cook->nonce, cook->ec) == 0) {
 			DPRINTF(sc, "Receiving cookie response\n");
 		} else {
 			DPRINTF(sc, "Could not decrypt cookie response\n");
 			goto error;
 		}
 
 		goto not_authenticated;
 	default:
 		panic("invalid packet in handshake queue");
 	}
 
 	wg_timers_event_any_authenticated_packet_received(peer);
 	wg_timers_event_any_authenticated_packet_traversal(peer);
 
 not_authenticated:
 	counter_u64_add(peer->p_rx_bytes, m->m_pkthdr.len);
 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 error:
 	if (remote != NULL)
 		noise_remote_put(remote);
 	wg_packet_free(pkt);
 }
 
 static void
 wg_softc_handshake_receive(struct wg_softc *sc)
 {
 	struct wg_packet *pkt;
 	while ((pkt = wg_queue_dequeue_handshake(&sc->sc_handshake_queue)) != NULL)
 		wg_handshake(sc, pkt);
 }
 
 static void
 wg_mbuf_reset(struct mbuf *m)
 {
 
 	struct m_tag *t, *tmp;
 
 	/*
 	 * We want to reset the mbuf to a newly allocated state, containing
 	 * just the packet contents. Unfortunately FreeBSD doesn't seem to
 	 * offer this anywhere, so we have to make it up as we go. If we can
 	 * get this in kern/kern_mbuf.c, that would be best.
 	 *
 	 * Notice: this may break things unexpectedly but it is better to fail
 	 *         closed in the extreme case than leak informtion in every
 	 *         case.
 	 *
 	 * With that said, all this attempts to do is remove any extraneous
 	 * information that could be present.
 	 */
 
 	M_ASSERTPKTHDR(m);
 
 	m->m_flags &= ~(M_BCAST|M_MCAST|M_VLANTAG|M_PROMISC|M_PROTOFLAGS);
 
 	M_HASHTYPE_CLEAR(m);
 #ifdef NUMA
         m->m_pkthdr.numa_domain = M_NODOM;
 #endif
 	SLIST_FOREACH_SAFE(t, &m->m_pkthdr.tags, m_tag_link, tmp) {
 		if ((t->m_tag_id != 0 || t->m_tag_cookie != MTAG_WGLOOP) &&
 		    t->m_tag_id != PACKET_TAG_MACLABEL)
 			m_tag_delete(m, t);
 	}
 
 	KASSERT((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0,
 	    ("%s: mbuf %p has a send tag", __func__, m));
 
 	m->m_pkthdr.csum_flags = 0;
 	m->m_pkthdr.PH_per.sixtyfour[0] = 0;
 	m->m_pkthdr.PH_loc.sixtyfour[0] = 0;
 }
 
 static inline unsigned int
 calculate_padding(struct wg_packet *pkt)
 {
 	unsigned int padded_size, last_unit = pkt->p_mbuf->m_pkthdr.len;
 
 	/* Keepalive packets don't set p_mtu, but also have a length of zero. */
 	if (__predict_false(pkt->p_mtu == 0)) {
 		padded_size = (last_unit + (WG_PKT_PADDING - 1)) &
 		    ~(WG_PKT_PADDING - 1);
 		return (padded_size - last_unit);
 	}
 
 	if (__predict_false(last_unit > pkt->p_mtu))
 		last_unit %= pkt->p_mtu;
 
 	padded_size = (last_unit + (WG_PKT_PADDING - 1)) & ~(WG_PKT_PADDING - 1);
 	if (pkt->p_mtu < padded_size)
 		padded_size = pkt->p_mtu;
 	return (padded_size - last_unit);
 }
 
 static void
 wg_encrypt(struct wg_softc *sc, struct wg_packet *pkt)
 {
 	static const uint8_t	 padding[WG_PKT_PADDING] = { 0 };
 	struct wg_pkt_data	*data;
 	struct wg_peer		*peer;
 	struct noise_remote	*remote;
 	struct mbuf		*m;
 	uint32_t		 idx;
 	unsigned int		 padlen;
 	enum wg_ring_state	 state = WG_PACKET_DEAD;
 
 	remote = noise_keypair_remote(pkt->p_keypair);
 	peer = noise_remote_arg(remote);
 	m = pkt->p_mbuf;
 
 	/* Pad the packet */
 	padlen = calculate_padding(pkt);
 	if (padlen != 0 && !m_append(m, padlen, padding))
 		goto out;
 
 	/* Do encryption */
 	if (noise_keypair_encrypt(pkt->p_keypair, &idx, pkt->p_nonce, m) != 0)
 		goto out;
 
 	/* Put header into packet */
 	M_PREPEND(m, sizeof(struct wg_pkt_data), M_NOWAIT);
 	if (m == NULL)
 		goto out;
 	data = mtod(m, struct wg_pkt_data *);
 	data->t = WG_PKT_DATA;
 	data->r_idx = idx;
 	data->nonce = htole64(pkt->p_nonce);
 
 	wg_mbuf_reset(m);
 	state = WG_PACKET_CRYPTED;
 out:
 	pkt->p_mbuf = m;
 	atomic_store_rel_int(&pkt->p_state, state);
 	GROUPTASK_ENQUEUE(&peer->p_send);
 	noise_remote_put(remote);
 }
 
 static void
 wg_decrypt(struct wg_softc *sc, struct wg_packet *pkt)
 {
 	struct wg_peer		*peer, *allowed_peer;
 	struct noise_remote	*remote;
 	struct mbuf		*m;
 	int			 len;
 	enum wg_ring_state	 state = WG_PACKET_DEAD;
 
 	remote = noise_keypair_remote(pkt->p_keypair);
 	peer = noise_remote_arg(remote);
 	m = pkt->p_mbuf;
 
 	/* Read nonce and then adjust to remove the header. */
 	pkt->p_nonce = le64toh(mtod(m, struct wg_pkt_data *)->nonce);
 	m_adj(m, sizeof(struct wg_pkt_data));
 
 	if (noise_keypair_decrypt(pkt->p_keypair, pkt->p_nonce, m) != 0)
 		goto out;
 
 	/* A packet with length 0 is a keepalive packet */
 	if (__predict_false(m->m_pkthdr.len == 0)) {
 		DPRINTF(sc, "Receiving keepalive packet from peer "
 		    "%" PRIu64 "\n", peer->p_id);
 		state = WG_PACKET_CRYPTED;
 		goto out;
 	}
 
 	/*
 	 * We can let the network stack handle the intricate validation of the
 	 * IP header, we just worry about the sizeof and the version, so we can
 	 * read the source address in wg_aip_lookup.
 	 */
 
 	if (determine_af_and_pullup(&m, &pkt->p_af) == 0) {
 		if (pkt->p_af == AF_INET) {
 			struct ip *ip = mtod(m, struct ip *);
 			allowed_peer = wg_aip_lookup(sc, AF_INET, &ip->ip_src);
 			len = ntohs(ip->ip_len);
 			if (len >= sizeof(struct ip) && len < m->m_pkthdr.len)
 				m_adj(m, len - m->m_pkthdr.len);
 		} else if (pkt->p_af == AF_INET6) {
 			struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 			allowed_peer = wg_aip_lookup(sc, AF_INET6, &ip6->ip6_src);
 			len = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
 			if (len < m->m_pkthdr.len)
 				m_adj(m, len - m->m_pkthdr.len);
 		} else
 			panic("determine_af_and_pullup returned unexpected value");
 	} else {
 		DPRINTF(sc, "Packet is neither ipv4 nor ipv6 from peer %" PRIu64 "\n", peer->p_id);
 		goto out;
 	}
 
 	/* We only want to compare the address, not dereference, so drop the ref. */
 	if (allowed_peer != NULL)
 		noise_remote_put(allowed_peer->p_remote);
 
 	if (__predict_false(peer != allowed_peer)) {
 		DPRINTF(sc, "Packet has unallowed src IP from peer %" PRIu64 "\n", peer->p_id);
 		goto out;
 	}
 
 	wg_mbuf_reset(m);
 	state = WG_PACKET_CRYPTED;
 out:
 	pkt->p_mbuf = m;
 	atomic_store_rel_int(&pkt->p_state, state);
 	GROUPTASK_ENQUEUE(&peer->p_recv);
 	noise_remote_put(remote);
 }
 
 static void
 wg_softc_decrypt(struct wg_softc *sc)
 {
 	struct wg_packet *pkt;
 
 	while ((pkt = wg_queue_dequeue_parallel(&sc->sc_decrypt_parallel)) != NULL)
 		wg_decrypt(sc, pkt);
 }
 
 static void
 wg_softc_encrypt(struct wg_softc *sc)
 {
 	struct wg_packet *pkt;
 
 	while ((pkt = wg_queue_dequeue_parallel(&sc->sc_encrypt_parallel)) != NULL)
 		wg_encrypt(sc, pkt);
 }
 
 static void
 wg_encrypt_dispatch(struct wg_softc *sc)
 {
 	/*
 	 * The update to encrypt_last_cpu is racey such that we may
 	 * reschedule the task for the same CPU multiple times, but
 	 * the race doesn't really matter.
 	 */
 	u_int cpu = (sc->sc_encrypt_last_cpu + 1) % mp_ncpus;
 	sc->sc_encrypt_last_cpu = cpu;
 	GROUPTASK_ENQUEUE(&sc->sc_encrypt[cpu]);
 }
 
 static void
 wg_decrypt_dispatch(struct wg_softc *sc)
 {
 	u_int cpu = (sc->sc_decrypt_last_cpu + 1) % mp_ncpus;
 	sc->sc_decrypt_last_cpu = cpu;
 	GROUPTASK_ENQUEUE(&sc->sc_decrypt[cpu]);
 }
 
 static void
 wg_deliver_out(struct wg_peer *peer)
 {
 	struct wg_endpoint	 endpoint;
 	struct wg_softc		*sc = peer->p_sc;
 	struct wg_packet	*pkt;
 	struct mbuf		*m;
 	int			 rc, len;
 
 	wg_peer_get_endpoint(peer, &endpoint);
 
 	while ((pkt = wg_queue_dequeue_serial(&peer->p_encrypt_serial)) != NULL) {
 		if (atomic_load_acq_int(&pkt->p_state) != WG_PACKET_CRYPTED)
 			goto error;
 
 		m = pkt->p_mbuf;
 		pkt->p_mbuf = NULL;
 
 		len = m->m_pkthdr.len;
 
 		wg_timers_event_any_authenticated_packet_traversal(peer);
 		wg_timers_event_any_authenticated_packet_sent(peer);
 		rc = wg_send(sc, &endpoint, m);
 		if (rc == 0) {
 			if (len > (sizeof(struct wg_pkt_data) + NOISE_AUTHTAG_LEN))
 				wg_timers_event_data_sent(peer);
 			counter_u64_add(peer->p_tx_bytes, len);
 		} else if (rc == EADDRNOTAVAIL) {
 			wg_peer_clear_src(peer);
 			wg_peer_get_endpoint(peer, &endpoint);
 			goto error;
 		} else {
 			goto error;
 		}
 		wg_packet_free(pkt);
 		if (noise_keep_key_fresh_send(peer->p_remote))
 			wg_timers_event_want_initiation(peer);
 		continue;
 error:
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 		wg_packet_free(pkt);
 	}
 }
 
 static void
 wg_deliver_in(struct wg_peer *peer)
 {
 	struct wg_softc		*sc = peer->p_sc;
 	if_t			 ifp = sc->sc_ifp;
 	struct wg_packet	*pkt;
 	struct mbuf		*m;
 	struct epoch_tracker	 et;
 
 	while ((pkt = wg_queue_dequeue_serial(&peer->p_decrypt_serial)) != NULL) {
 		if (atomic_load_acq_int(&pkt->p_state) != WG_PACKET_CRYPTED)
 			goto error;
 
 		m = pkt->p_mbuf;
 		if (noise_keypair_nonce_check(pkt->p_keypair, pkt->p_nonce) != 0)
 			goto error;
 
 		if (noise_keypair_received_with(pkt->p_keypair) == ECONNRESET)
 			wg_timers_event_handshake_complete(peer);
 
 		wg_timers_event_any_authenticated_packet_received(peer);
 		wg_timers_event_any_authenticated_packet_traversal(peer);
 		wg_peer_set_endpoint(peer, &pkt->p_endpoint);
 
 		counter_u64_add(peer->p_rx_bytes, m->m_pkthdr.len +
 		    sizeof(struct wg_pkt_data) + NOISE_AUTHTAG_LEN);
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len +
 		    sizeof(struct wg_pkt_data) + NOISE_AUTHTAG_LEN);
 
 		if (m->m_pkthdr.len == 0)
 			goto done;
 
 		MPASS(pkt->p_af == AF_INET || pkt->p_af == AF_INET6);
 		pkt->p_mbuf = NULL;
 
 		m->m_pkthdr.rcvif = ifp;
 
 		NET_EPOCH_ENTER(et);
 		BPF_MTAP2_AF(ifp, m, pkt->p_af);
 
 		CURVNET_SET(if_getvnet(ifp));
 		M_SETFIB(m, if_getfib(ifp));
 		if (pkt->p_af == AF_INET)
 			netisr_dispatch(NETISR_IP, m);
 		if (pkt->p_af == AF_INET6)
 			netisr_dispatch(NETISR_IPV6, m);
 		CURVNET_RESTORE();
 		NET_EPOCH_EXIT(et);
 
 		wg_timers_event_data_received(peer);
 
 done:
 		if (noise_keep_key_fresh_recv(peer->p_remote))
 			wg_timers_event_want_initiation(peer);
 		wg_packet_free(pkt);
 		continue;
 error:
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		wg_packet_free(pkt);
 	}
 }
 
 static struct wg_packet *
 wg_packet_alloc(struct mbuf *m)
 {
 	struct wg_packet *pkt;
 
 	if ((pkt = uma_zalloc(wg_packet_zone, M_NOWAIT | M_ZERO)) == NULL)
 		return (NULL);
 	pkt->p_mbuf = m;
 	return (pkt);
 }
 
 static void
 wg_packet_free(struct wg_packet *pkt)
 {
 	if (pkt->p_keypair != NULL)
 		noise_keypair_put(pkt->p_keypair);
 	if (pkt->p_mbuf != NULL)
 		m_freem(pkt->p_mbuf);
 	uma_zfree(wg_packet_zone, pkt);
 }
 
 static void
 wg_queue_init(struct wg_queue *queue, const char *name)
 {
 	mtx_init(&queue->q_mtx, name, NULL, MTX_DEF);
 	STAILQ_INIT(&queue->q_queue);
 	queue->q_len = 0;
 }
 
 static void
 wg_queue_deinit(struct wg_queue *queue)
 {
 	wg_queue_purge(queue);
 	mtx_destroy(&queue->q_mtx);
 }
 
 static size_t
 wg_queue_len(struct wg_queue *queue)
 {
 	return (queue->q_len);
 }
 
 static int
 wg_queue_enqueue_handshake(struct wg_queue *hs, struct wg_packet *pkt)
 {
 	int ret = 0;
 	mtx_lock(&hs->q_mtx);
 	if (hs->q_len < MAX_QUEUED_HANDSHAKES) {
 		STAILQ_INSERT_TAIL(&hs->q_queue, pkt, p_parallel);
 		hs->q_len++;
 	} else {
 		ret = ENOBUFS;
 	}
 	mtx_unlock(&hs->q_mtx);
 	if (ret != 0)
 		wg_packet_free(pkt);
 	return (ret);
 }
 
 static struct wg_packet *
 wg_queue_dequeue_handshake(struct wg_queue *hs)
 {
 	struct wg_packet *pkt;
 	mtx_lock(&hs->q_mtx);
 	if ((pkt = STAILQ_FIRST(&hs->q_queue)) != NULL) {
 		STAILQ_REMOVE_HEAD(&hs->q_queue, p_parallel);
 		hs->q_len--;
 	}
 	mtx_unlock(&hs->q_mtx);
 	return (pkt);
 }
 
 static void
 wg_queue_push_staged(struct wg_queue *staged, struct wg_packet *pkt)
 {
 	struct wg_packet *old = NULL;
 
 	mtx_lock(&staged->q_mtx);
 	if (staged->q_len >= MAX_STAGED_PKT) {
 		old = STAILQ_FIRST(&staged->q_queue);
 		STAILQ_REMOVE_HEAD(&staged->q_queue, p_parallel);
 		staged->q_len--;
 	}
 	STAILQ_INSERT_TAIL(&staged->q_queue, pkt, p_parallel);
 	staged->q_len++;
 	mtx_unlock(&staged->q_mtx);
 
 	if (old != NULL)
 		wg_packet_free(old);
 }
 
 static void
 wg_queue_enlist_staged(struct wg_queue *staged, struct wg_packet_list *list)
 {
 	struct wg_packet *pkt, *tpkt;
 	STAILQ_FOREACH_SAFE(pkt, list, p_parallel, tpkt)
 		wg_queue_push_staged(staged, pkt);
 }
 
 static void
 wg_queue_delist_staged(struct wg_queue *staged, struct wg_packet_list *list)
 {
 	STAILQ_INIT(list);
 	mtx_lock(&staged->q_mtx);
 	STAILQ_CONCAT(list, &staged->q_queue);
 	staged->q_len = 0;
 	mtx_unlock(&staged->q_mtx);
 }
 
 static void
 wg_queue_purge(struct wg_queue *staged)
 {
 	struct wg_packet_list list;
 	struct wg_packet *pkt, *tpkt;
 	wg_queue_delist_staged(staged, &list);
 	STAILQ_FOREACH_SAFE(pkt, &list, p_parallel, tpkt)
 		wg_packet_free(pkt);
 }
 
 static int
 wg_queue_both(struct wg_queue *parallel, struct wg_queue *serial, struct wg_packet *pkt)
 {
 	pkt->p_state = WG_PACKET_UNCRYPTED;
 
 	mtx_lock(&serial->q_mtx);
 	if (serial->q_len < MAX_QUEUED_PKT) {
 		serial->q_len++;
 		STAILQ_INSERT_TAIL(&serial->q_queue, pkt, p_serial);
 	} else {
 		mtx_unlock(&serial->q_mtx);
 		wg_packet_free(pkt);
 		return (ENOBUFS);
 	}
 	mtx_unlock(&serial->q_mtx);
 
 	mtx_lock(&parallel->q_mtx);
 	if (parallel->q_len < MAX_QUEUED_PKT) {
 		parallel->q_len++;
 		STAILQ_INSERT_TAIL(&parallel->q_queue, pkt, p_parallel);
 	} else {
 		mtx_unlock(&parallel->q_mtx);
 		pkt->p_state = WG_PACKET_DEAD;
 		return (ENOBUFS);
 	}
 	mtx_unlock(&parallel->q_mtx);
 
 	return (0);
 }
 
 static struct wg_packet *
 wg_queue_dequeue_serial(struct wg_queue *serial)
 {
 	struct wg_packet *pkt = NULL;
 	mtx_lock(&serial->q_mtx);
 	if (serial->q_len > 0 && STAILQ_FIRST(&serial->q_queue)->p_state != WG_PACKET_UNCRYPTED) {
 		serial->q_len--;
 		pkt = STAILQ_FIRST(&serial->q_queue);
 		STAILQ_REMOVE_HEAD(&serial->q_queue, p_serial);
 	}
 	mtx_unlock(&serial->q_mtx);
 	return (pkt);
 }
 
 static struct wg_packet *
 wg_queue_dequeue_parallel(struct wg_queue *parallel)
 {
 	struct wg_packet *pkt = NULL;
 	mtx_lock(&parallel->q_mtx);
 	if (parallel->q_len > 0) {
 		parallel->q_len--;
 		pkt = STAILQ_FIRST(&parallel->q_queue);
 		STAILQ_REMOVE_HEAD(&parallel->q_queue, p_parallel);
 	}
 	mtx_unlock(&parallel->q_mtx);
 	return (pkt);
 }
 
 static bool
 wg_input(struct mbuf *m, int offset, struct inpcb *inpcb,
     const struct sockaddr *sa, void *_sc)
 {
 #ifdef INET
 	const struct sockaddr_in	*sin;
 #endif
 #ifdef INET6
 	const struct sockaddr_in6	*sin6;
 #endif
 	struct noise_remote		*remote;
 	struct wg_pkt_data		*data;
 	struct wg_packet		*pkt;
 	struct wg_peer			*peer;
 	struct wg_softc			*sc = _sc;
 	struct mbuf			*defragged;
 
 	defragged = m_defrag(m, M_NOWAIT);
 	if (defragged)
 		m = defragged;
 	m = m_unshare(m, M_NOWAIT);
 	if (!m) {
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
 		return true;
 	}
 
 	/* Caller provided us with `sa`, no need for this header. */
 	m_adj(m, offset + sizeof(struct udphdr));
 
 	/* Pullup enough to read packet type */
 	if ((m = m_pullup(m, sizeof(uint32_t))) == NULL) {
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
 		return true;
 	}
 
 	if ((pkt = wg_packet_alloc(m)) == NULL) {
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
 		m_freem(m);
 		return true;
 	}
 
 	/* Save send/recv address and port for later. */
 	switch (sa->sa_family) {
 #ifdef INET
 	case AF_INET:
 		sin = (const struct sockaddr_in *)sa;
 		pkt->p_endpoint.e_remote.r_sin = sin[0];
 		pkt->p_endpoint.e_local.l_in = sin[1].sin_addr;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (const struct sockaddr_in6 *)sa;
 		pkt->p_endpoint.e_remote.r_sin6 = sin6[0];
 		pkt->p_endpoint.e_local.l_in6 = sin6[1].sin6_addr;
 		break;
 #endif
 	default:
 		goto error;
 	}
 
 	if ((m->m_pkthdr.len == sizeof(struct wg_pkt_initiation) &&
 		*mtod(m, uint32_t *) == WG_PKT_INITIATION) ||
 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_response) &&
 		*mtod(m, uint32_t *) == WG_PKT_RESPONSE) ||
 	    (m->m_pkthdr.len == sizeof(struct wg_pkt_cookie) &&
 		*mtod(m, uint32_t *) == WG_PKT_COOKIE)) {
 
 		if (wg_queue_enqueue_handshake(&sc->sc_handshake_queue, pkt) != 0) {
 			if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
 			DPRINTF(sc, "Dropping handshake packet\n");
 		}
 		GROUPTASK_ENQUEUE(&sc->sc_handshake);
 	} else if (m->m_pkthdr.len >= sizeof(struct wg_pkt_data) +
 	    NOISE_AUTHTAG_LEN && *mtod(m, uint32_t *) == WG_PKT_DATA) {
 
 		/* Pullup whole header to read r_idx below. */
 		if ((pkt->p_mbuf = m_pullup(m, sizeof(struct wg_pkt_data))) == NULL)
 			goto error;
 
 		data = mtod(pkt->p_mbuf, struct wg_pkt_data *);
 		if ((pkt->p_keypair = noise_keypair_lookup(sc->sc_local, data->r_idx)) == NULL)
 			goto error;
 
 		remote = noise_keypair_remote(pkt->p_keypair);
 		peer = noise_remote_arg(remote);
 		if (wg_queue_both(&sc->sc_decrypt_parallel, &peer->p_decrypt_serial, pkt) != 0)
 			if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
 		wg_decrypt_dispatch(sc);
 		noise_remote_put(remote);
 	} else {
 		goto error;
 	}
 	return true;
 error:
 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1);
 	wg_packet_free(pkt);
 	return true;
 }
 
 static void
 wg_peer_send_staged(struct wg_peer *peer)
 {
 	struct wg_packet_list	 list;
 	struct noise_keypair	*keypair;
 	struct wg_packet	*pkt, *tpkt;
 	struct wg_softc		*sc = peer->p_sc;
 
 	wg_queue_delist_staged(&peer->p_stage_queue, &list);
 
 	if (STAILQ_EMPTY(&list))
 		return;
 
 	if ((keypair = noise_keypair_current(peer->p_remote)) == NULL)
 		goto error;
 
 	STAILQ_FOREACH(pkt, &list, p_parallel) {
 		if (noise_keypair_nonce_next(keypair, &pkt->p_nonce) != 0)
 			goto error_keypair;
 	}
 	STAILQ_FOREACH_SAFE(pkt, &list, p_parallel, tpkt) {
 		pkt->p_keypair = noise_keypair_ref(keypair);
 		if (wg_queue_both(&sc->sc_encrypt_parallel, &peer->p_encrypt_serial, pkt) != 0)
 			if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
 	}
 	wg_encrypt_dispatch(sc);
 	noise_keypair_put(keypair);
 	return;
 
 error_keypair:
 	noise_keypair_put(keypair);
 error:
 	wg_queue_enlist_staged(&peer->p_stage_queue, &list);
 	wg_timers_event_want_initiation(peer);
 }
 
 static inline void
 xmit_err(if_t ifp, struct mbuf *m, struct wg_packet *pkt, sa_family_t af)
 {
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
 		if (pkt)
 			pkt->p_mbuf = NULL;
 		m = NULL;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		icmp6_error(m, ICMP6_DST_UNREACH, 0, 0);
 		if (pkt)
 			pkt->p_mbuf = NULL;
 		m = NULL;
 		break;
 #endif
 	}
 	if (pkt)
 		wg_packet_free(pkt);
 	else if (m)
 		m_freem(m);
 }
 
 static int
 wg_xmit(if_t ifp, struct mbuf *m, sa_family_t af, uint32_t mtu)
 {
 	struct wg_packet	*pkt = NULL;
 	struct wg_softc		*sc = if_getsoftc(ifp);
 	struct wg_peer		*peer;
 	int			 rc = 0;
 	sa_family_t		 peer_af;
 
 	/* Work around lifetime issue in the ipv6 mld code. */
 	if (__predict_false((if_getflags(ifp) & IFF_DYING) || !sc)) {
 		rc = ENXIO;
 		goto err_xmit;
 	}
 
 	if ((pkt = wg_packet_alloc(m)) == NULL) {
 		rc = ENOBUFS;
 		goto err_xmit;
 	}
 	pkt->p_mtu = mtu;
 	pkt->p_af = af;
 
 	if (af == AF_INET) {
 		peer = wg_aip_lookup(sc, AF_INET, &mtod(m, struct ip *)->ip_dst);
 	} else if (af == AF_INET6) {
 		peer = wg_aip_lookup(sc, AF_INET6, &mtod(m, struct ip6_hdr *)->ip6_dst);
 	} else {
 		rc = EAFNOSUPPORT;
 		goto err_xmit;
 	}
 
 	BPF_MTAP2_AF(ifp, m, pkt->p_af);
 
 	if (__predict_false(peer == NULL)) {
 		rc = ENETUNREACH;
 		goto err_xmit;
 	}
 
 	if (__predict_false(if_tunnel_check_nesting(ifp, m, MTAG_WGLOOP, MAX_LOOPS))) {
 		DPRINTF(sc, "Packet looped");
 		rc = ELOOP;
 		goto err_peer;
 	}
 
 	peer_af = peer->p_endpoint.e_remote.r_sa.sa_family;
 	if (__predict_false(peer_af != AF_INET && peer_af != AF_INET6)) {
 		DPRINTF(sc, "No valid endpoint has been configured or "
 			    "discovered for peer %" PRIu64 "\n", peer->p_id);
 		rc = EHOSTUNREACH;
 		goto err_peer;
 	}
 
 	wg_queue_push_staged(&peer->p_stage_queue, pkt);
 	wg_peer_send_staged(peer);
 	noise_remote_put(peer->p_remote);
 	return (0);
 
 err_peer:
 	noise_remote_put(peer->p_remote);
 err_xmit:
 	xmit_err(ifp, m, pkt, af);
 	return (rc);
 }
 
 static inline int
 determine_af_and_pullup(struct mbuf **m, sa_family_t *af)
 {
 	u_char ipv;
 	if ((*m)->m_pkthdr.len >= sizeof(struct ip6_hdr))
 		*m = m_pullup(*m, sizeof(struct ip6_hdr));
 	else if ((*m)->m_pkthdr.len >= sizeof(struct ip))
 		*m = m_pullup(*m, sizeof(struct ip));
 	else
 		return (EAFNOSUPPORT);
 	if (*m == NULL)
 		return (ENOBUFS);
 	ipv = mtod(*m, struct ip *)->ip_v;
 	if (ipv == 4)
 		*af = AF_INET;
 	else if (ipv == 6 && (*m)->m_pkthdr.len >= sizeof(struct ip6_hdr))
 		*af = AF_INET6;
 	else
 		return (EAFNOSUPPORT);
 	return (0);
 }
 
 static int
 wg_transmit(if_t ifp, struct mbuf *m)
 {
 	sa_family_t af;
 	int ret;
 	struct mbuf *defragged;
 
 	defragged = m_defrag(m, M_NOWAIT);
 	if (defragged)
 		m = defragged;
 	m = m_unshare(m, M_NOWAIT);
 	if (!m) {
 		xmit_err(ifp, m, NULL, AF_UNSPEC);
 		return (ENOBUFS);
 	}
 
 	ret = determine_af_and_pullup(&m, &af);
 	if (ret) {
 		xmit_err(ifp, m, NULL, AF_UNSPEC);
 		return (ret);
 	}
 	return (wg_xmit(ifp, m, af, if_getmtu(ifp)));
 }
 
 static int
 wg_output(if_t ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro)
 {
 	sa_family_t parsed_af;
 	uint32_t af, mtu;
 	int ret;
 	struct mbuf *defragged;
 
-	if (dst->sa_family == AF_UNSPEC)
+	/* BPF writes need to be handled specially. */
+	if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
 		memcpy(&af, dst->sa_data, sizeof(af));
 	else
 		af = dst->sa_family;
 	if (af == AF_UNSPEC) {
 		xmit_err(ifp, m, NULL, af);
 		return (EAFNOSUPPORT);
 	}
 
 	defragged = m_defrag(m, M_NOWAIT);
 	if (defragged)
 		m = defragged;
 	m = m_unshare(m, M_NOWAIT);
 	if (!m) {
 		xmit_err(ifp, m, NULL, AF_UNSPEC);
 		return (ENOBUFS);
 	}
 
 	ret = determine_af_and_pullup(&m, &parsed_af);
 	if (ret) {
 		xmit_err(ifp, m, NULL, AF_UNSPEC);
 		return (ret);
 	}
 	if (parsed_af != af) {
 		xmit_err(ifp, m, NULL, AF_UNSPEC);
 		return (EAFNOSUPPORT);
 	}
 	mtu = (ro != NULL && ro->ro_mtu > 0) ? ro->ro_mtu : if_getmtu(ifp);
 	return (wg_xmit(ifp, m, parsed_af, mtu));
 }
 
 static int
 wg_peer_add(struct wg_softc *sc, const nvlist_t *nvl)
 {
 	uint8_t			 public[WG_KEY_SIZE];
 	const void *pub_key, *preshared_key = NULL;
 	const struct sockaddr *endpoint;
 	int err;
 	size_t size;
 	struct noise_remote *remote;
 	struct wg_peer *peer = NULL;
 	bool need_insert = false;
 
 	sx_assert(&sc->sc_lock, SX_XLOCKED);
 
 	if (!nvlist_exists_binary(nvl, "public-key")) {
 		return (EINVAL);
 	}
 	pub_key = nvlist_get_binary(nvl, "public-key", &size);
 	if (size != WG_KEY_SIZE) {
 		return (EINVAL);
 	}
 	if (noise_local_keys(sc->sc_local, public, NULL) == 0 &&
 	    bcmp(public, pub_key, WG_KEY_SIZE) == 0) {
 		return (0); // Silently ignored; not actually a failure.
 	}
 	if ((remote = noise_remote_lookup(sc->sc_local, pub_key)) != NULL)
 		peer = noise_remote_arg(remote);
 	if (nvlist_exists_bool(nvl, "remove") &&
 		nvlist_get_bool(nvl, "remove")) {
 		if (remote != NULL) {
 			wg_peer_destroy(peer);
 			noise_remote_put(remote);
 		}
 		return (0);
 	}
 	if (nvlist_exists_bool(nvl, "replace-allowedips") &&
 		nvlist_get_bool(nvl, "replace-allowedips") &&
 	    peer != NULL) {
 
 		wg_aip_remove_all(sc, peer);
 	}
 	if (peer == NULL) {
 		peer = wg_peer_alloc(sc, pub_key);
 		need_insert = true;
 	}
 	if (nvlist_exists_binary(nvl, "endpoint")) {
 		endpoint = nvlist_get_binary(nvl, "endpoint", &size);
 		if (size > sizeof(peer->p_endpoint.e_remote)) {
 			err = EINVAL;
 			goto out;
 		}
 		memcpy(&peer->p_endpoint.e_remote, endpoint, size);
 	}
 	if (nvlist_exists_binary(nvl, "preshared-key")) {
 		preshared_key = nvlist_get_binary(nvl, "preshared-key", &size);
 		if (size != WG_KEY_SIZE) {
 			err = EINVAL;
 			goto out;
 		}
 		noise_remote_set_psk(peer->p_remote, preshared_key);
 	}
 	if (nvlist_exists_number(nvl, "persistent-keepalive-interval")) {
 		uint64_t pki = nvlist_get_number(nvl, "persistent-keepalive-interval");
 		if (pki > UINT16_MAX) {
 			err = EINVAL;
 			goto out;
 		}
 		wg_timers_set_persistent_keepalive(peer, pki);
 	}
 	if (nvlist_exists_nvlist_array(nvl, "allowed-ips")) {
 		const void *addr;
 		uint64_t cidr;
 		const nvlist_t * const * aipl;
 		size_t allowedip_count;
 
 		aipl = nvlist_get_nvlist_array(nvl, "allowed-ips", &allowedip_count);
 		for (size_t idx = 0; idx < allowedip_count; idx++) {
 			if (!nvlist_exists_number(aipl[idx], "cidr"))
 				continue;
 			cidr = nvlist_get_number(aipl[idx], "cidr");
 			if (nvlist_exists_binary(aipl[idx], "ipv4")) {
 				addr = nvlist_get_binary(aipl[idx], "ipv4", &size);
 				if (addr == NULL || cidr > 32 || size != sizeof(struct in_addr)) {
 					err = EINVAL;
 					goto out;
 				}
 				if ((err = wg_aip_add(sc, peer, AF_INET, addr, cidr)) != 0)
 					goto out;
 			} else if (nvlist_exists_binary(aipl[idx], "ipv6")) {
 				addr = nvlist_get_binary(aipl[idx], "ipv6", &size);
 				if (addr == NULL || cidr > 128 || size != sizeof(struct in6_addr)) {
 					err = EINVAL;
 					goto out;
 				}
 				if ((err = wg_aip_add(sc, peer, AF_INET6, addr, cidr)) != 0)
 					goto out;
 			} else {
 				continue;
 			}
 		}
 	}
 	if (need_insert) {
 		if ((err = noise_remote_enable(peer->p_remote)) != 0)
 			goto out;
 		TAILQ_INSERT_TAIL(&sc->sc_peers, peer, p_entry);
 		sc->sc_peers_num++;
 		if (if_getlinkstate(sc->sc_ifp) == LINK_STATE_UP)
 			wg_timers_enable(peer);
 	}
 	if (remote != NULL)
 		noise_remote_put(remote);
 	return (0);
 out:
 	if (need_insert) /* If we fail, only destroy if it was new. */
 		wg_peer_destroy(peer);
 	if (remote != NULL)
 		noise_remote_put(remote);
 	return (err);
 }
 
 static int
 wgc_set(struct wg_softc *sc, struct wg_data_io *wgd)
 {
 	uint8_t public[WG_KEY_SIZE], private[WG_KEY_SIZE];
 	if_t ifp;
 	void *nvlpacked;
 	nvlist_t *nvl;
 	ssize_t size;
 	int err;
 
 	ifp = sc->sc_ifp;
 	if (wgd->wgd_size == 0 || wgd->wgd_data == NULL)
 		return (EFAULT);
 
 	/* Can nvlists be streamed in? It's not nice to impose arbitrary limits like that but
 	 * there needs to be _some_ limitation. */
 	if (wgd->wgd_size >= UINT32_MAX / 2)
 		return (E2BIG);
 
 	nvlpacked = malloc(wgd->wgd_size, M_TEMP, M_WAITOK | M_ZERO);
 
 	err = copyin(wgd->wgd_data, nvlpacked, wgd->wgd_size);
 	if (err)
 		goto out;
 	nvl = nvlist_unpack(nvlpacked, wgd->wgd_size, 0);
 	if (nvl == NULL) {
 		err = EBADMSG;
 		goto out;
 	}
 	sx_xlock(&sc->sc_lock);
 	if (nvlist_exists_bool(nvl, "replace-peers") &&
 		nvlist_get_bool(nvl, "replace-peers"))
 		wg_peer_destroy_all(sc);
 	if (nvlist_exists_number(nvl, "listen-port")) {
 		uint64_t new_port = nvlist_get_number(nvl, "listen-port");
 		if (new_port > UINT16_MAX) {
 			err = EINVAL;
 			goto out_locked;
 		}
 		if (new_port != sc->sc_socket.so_port) {
 			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
 				if ((err = wg_socket_init(sc, new_port)) != 0)
 					goto out_locked;
 			} else
 				sc->sc_socket.so_port = new_port;
 		}
 	}
 	if (nvlist_exists_binary(nvl, "private-key")) {
 		const void *key = nvlist_get_binary(nvl, "private-key", &size);
 		if (size != WG_KEY_SIZE) {
 			err = EINVAL;
 			goto out_locked;
 		}
 
 		if (noise_local_keys(sc->sc_local, NULL, private) != 0 ||
 		    timingsafe_bcmp(private, key, WG_KEY_SIZE) != 0) {
 			struct wg_peer *peer;
 
 			if (curve25519_generate_public(public, key)) {
 				/* Peer conflict: remove conflicting peer. */
 				struct noise_remote *remote;
 				if ((remote = noise_remote_lookup(sc->sc_local,
 				    public)) != NULL) {
 					peer = noise_remote_arg(remote);
 					wg_peer_destroy(peer);
 					noise_remote_put(remote);
 				}
 			}
 
 			/*
 			 * Set the private key and invalidate all existing
 			 * handshakes.
 			 */
 			/* Note: we might be removing the private key. */
 			noise_local_private(sc->sc_local, key);
 			if (noise_local_keys(sc->sc_local, NULL, NULL) == 0)
 				cookie_checker_update(&sc->sc_cookie, public);
 			else
 				cookie_checker_update(&sc->sc_cookie, NULL);
 		}
 	}
 	if (nvlist_exists_number(nvl, "user-cookie")) {
 		uint64_t user_cookie = nvlist_get_number(nvl, "user-cookie");
 		if (user_cookie > UINT32_MAX) {
 			err = EINVAL;
 			goto out_locked;
 		}
 		err = wg_socket_set_cookie(sc, user_cookie);
 		if (err)
 			goto out_locked;
 	}
 	if (nvlist_exists_nvlist_array(nvl, "peers")) {
 		size_t peercount;
 		const nvlist_t * const*nvl_peers;
 
 		nvl_peers = nvlist_get_nvlist_array(nvl, "peers", &peercount);
 		for (int i = 0; i < peercount; i++) {
 			err = wg_peer_add(sc, nvl_peers[i]);
 			if (err != 0)
 				goto out_locked;
 		}
 	}
 
 out_locked:
 	sx_xunlock(&sc->sc_lock);
 	nvlist_destroy(nvl);
 out:
 	zfree(nvlpacked, M_TEMP);
 	return (err);
 }
 
 static int
 wgc_get(struct wg_softc *sc, struct wg_data_io *wgd)
 {
 	uint8_t public_key[WG_KEY_SIZE] = { 0 };
 	uint8_t private_key[WG_KEY_SIZE] = { 0 };
 	uint8_t preshared_key[NOISE_SYMMETRIC_KEY_LEN] = { 0 };
 	nvlist_t *nvl, *nvl_peer, *nvl_aip, **nvl_peers, **nvl_aips;
 	size_t size, peer_count, aip_count, i, j;
 	struct wg_timespec64 ts64;
 	struct wg_peer *peer;
 	struct wg_aip *aip;
 	void *packed;
 	int err = 0;
 
 	nvl = nvlist_create(0);
 	if (!nvl)
 		return (ENOMEM);
 
 	sx_slock(&sc->sc_lock);
 
 	if (sc->sc_socket.so_port != 0)
 		nvlist_add_number(nvl, "listen-port", sc->sc_socket.so_port);
 	if (sc->sc_socket.so_user_cookie != 0)
 		nvlist_add_number(nvl, "user-cookie", sc->sc_socket.so_user_cookie);
 	if (noise_local_keys(sc->sc_local, public_key, private_key) == 0) {
 		nvlist_add_binary(nvl, "public-key", public_key, WG_KEY_SIZE);
 		if (wgc_privileged(sc))
 			nvlist_add_binary(nvl, "private-key", private_key, WG_KEY_SIZE);
 		explicit_bzero(private_key, sizeof(private_key));
 	}
 	peer_count = sc->sc_peers_num;
 	if (peer_count) {
 		nvl_peers = mallocarray(peer_count, sizeof(void *), M_NVLIST, M_WAITOK | M_ZERO);
 		i = 0;
 		TAILQ_FOREACH(peer, &sc->sc_peers, p_entry) {
 			if (i >= peer_count)
 				panic("peers changed from under us");
 
 			nvl_peers[i++] = nvl_peer = nvlist_create(0);
 			if (!nvl_peer) {
 				err = ENOMEM;
 				goto err_peer;
 			}
 
 			(void)noise_remote_keys(peer->p_remote, public_key, preshared_key);
 			nvlist_add_binary(nvl_peer, "public-key", public_key, sizeof(public_key));
 			if (wgc_privileged(sc))
 				nvlist_add_binary(nvl_peer, "preshared-key", preshared_key, sizeof(preshared_key));
 			explicit_bzero(preshared_key, sizeof(preshared_key));
 			if (peer->p_endpoint.e_remote.r_sa.sa_family == AF_INET)
 				nvlist_add_binary(nvl_peer, "endpoint", &peer->p_endpoint.e_remote, sizeof(struct sockaddr_in));
 			else if (peer->p_endpoint.e_remote.r_sa.sa_family == AF_INET6)
 				nvlist_add_binary(nvl_peer, "endpoint", &peer->p_endpoint.e_remote, sizeof(struct sockaddr_in6));
 			wg_timers_get_last_handshake(peer, &ts64);
 			nvlist_add_binary(nvl_peer, "last-handshake-time", &ts64, sizeof(ts64));
 			nvlist_add_number(nvl_peer, "persistent-keepalive-interval", peer->p_persistent_keepalive_interval);
 			nvlist_add_number(nvl_peer, "rx-bytes", counter_u64_fetch(peer->p_rx_bytes));
 			nvlist_add_number(nvl_peer, "tx-bytes", counter_u64_fetch(peer->p_tx_bytes));
 
 			aip_count = peer->p_aips_num;
 			if (aip_count) {
 				nvl_aips = mallocarray(aip_count, sizeof(void *), M_NVLIST, M_WAITOK | M_ZERO);
 				j = 0;
 				LIST_FOREACH(aip, &peer->p_aips, a_entry) {
 					if (j >= aip_count)
 						panic("aips changed from under us");
 
 					nvl_aips[j++] = nvl_aip = nvlist_create(0);
 					if (!nvl_aip) {
 						err = ENOMEM;
 						goto err_aip;
 					}
 					if (aip->a_af == AF_INET) {
 						nvlist_add_binary(nvl_aip, "ipv4", &aip->a_addr.in, sizeof(aip->a_addr.in));
 						nvlist_add_number(nvl_aip, "cidr", bitcount32(aip->a_mask.ip));
 					}
 #ifdef INET6
 					else if (aip->a_af == AF_INET6) {
 						nvlist_add_binary(nvl_aip, "ipv6", &aip->a_addr.in6, sizeof(aip->a_addr.in6));
 						nvlist_add_number(nvl_aip, "cidr", in6_mask2len(&aip->a_mask.in6, NULL));
 					}
 #endif
 				}
 				nvlist_add_nvlist_array(nvl_peer, "allowed-ips", (const nvlist_t *const *)nvl_aips, aip_count);
 			err_aip:
 				for (j = 0; j < aip_count; ++j)
 					nvlist_destroy(nvl_aips[j]);
 				free(nvl_aips, M_NVLIST);
 				if (err)
 					goto err_peer;
 			}
 		}
 		nvlist_add_nvlist_array(nvl, "peers", (const nvlist_t * const *)nvl_peers, peer_count);
 	err_peer:
 		for (i = 0; i < peer_count; ++i)
 			nvlist_destroy(nvl_peers[i]);
 		free(nvl_peers, M_NVLIST);
 		if (err) {
 			sx_sunlock(&sc->sc_lock);
 			goto err;
 		}
 	}
 	sx_sunlock(&sc->sc_lock);
 	packed = nvlist_pack(nvl, &size);
 	if (!packed) {
 		err = ENOMEM;
 		goto err;
 	}
 	if (!wgd->wgd_size) {
 		wgd->wgd_size = size;
 		goto out;
 	}
 	if (wgd->wgd_size < size) {
 		err = ENOSPC;
 		goto out;
 	}
 	err = copyout(packed, wgd->wgd_data, size);
 	wgd->wgd_size = size;
 
 out:
 	zfree(packed, M_NVLIST);
 err:
 	nvlist_destroy(nvl);
 	return (err);
 }
 
 static int
 wg_ioctl(if_t ifp, u_long cmd, caddr_t data)
 {
 	struct wg_data_io *wgd = (struct wg_data_io *)data;
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct wg_softc *sc;
 	int ret = 0;
 
 	sx_slock(&wg_sx);
 	sc = if_getsoftc(ifp);
 	if (!sc) {
 		ret = ENXIO;
 		goto out;
 	}
 
 	switch (cmd) {
 	case SIOCSWG:
 		ret = priv_check(curthread, PRIV_NET_WG);
 		if (ret == 0)
 			ret = wgc_set(sc, wgd);
 		break;
 	case SIOCGWG:
 		ret = wgc_get(sc, wgd);
 		break;
 	/* Interface IOCTLs */
 	case SIOCSIFADDR:
 		/*
 		 * This differs from *BSD norms, but is more uniform with how
 		 * WireGuard behaves elsewhere.
 		 */
 		break;
 	case SIOCSIFFLAGS:
 		if (if_getflags(ifp) & IFF_UP)
 			ret = wg_up(sc);
 		else
 			wg_down(sc);
 		break;
 	case SIOCSIFMTU:
 		if (ifr->ifr_mtu <= 0 || ifr->ifr_mtu > MAX_MTU)
 			ret = EINVAL;
 		else
 			if_setmtu(ifp, ifr->ifr_mtu);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		break;
 	case SIOCGTUNFIB:
 		ifr->ifr_fib = sc->sc_socket.so_fibnum;
 		break;
 	case SIOCSTUNFIB:
 		ret = priv_check(curthread, PRIV_NET_WG);
 		if (ret)
 			break;
 		ret = priv_check(curthread, PRIV_NET_SETIFFIB);
 		if (ret)
 			break;
 		sx_xlock(&sc->sc_lock);
 		ret = wg_socket_set_fibnum(sc, ifr->ifr_fib);
 		sx_xunlock(&sc->sc_lock);
 		break;
 	default:
 		ret = ENOTTY;
 	}
 
 out:
 	sx_sunlock(&wg_sx);
 	return (ret);
 }
 
 static int
 wg_up(struct wg_softc *sc)
 {
 	if_t ifp = sc->sc_ifp;
 	struct wg_peer *peer;
 	int rc = EBUSY;
 
 	sx_xlock(&sc->sc_lock);
 	/* Jail's being removed, no more wg_up(). */
 	if ((sc->sc_flags & WGF_DYING) != 0)
 		goto out;
 
 	/* Silent success if we're already running. */
 	rc = 0;
 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
 		goto out;
 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
 
 	rc = wg_socket_init(sc, sc->sc_socket.so_port);
 	if (rc == 0) {
 		TAILQ_FOREACH(peer, &sc->sc_peers, p_entry)
 			wg_timers_enable(peer);
 		if_link_state_change(sc->sc_ifp, LINK_STATE_UP);
 	} else {
 		if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
 		DPRINTF(sc, "Unable to initialize sockets: %d\n", rc);
 	}
 out:
 	sx_xunlock(&sc->sc_lock);
 	return (rc);
 }
 
 static void
 wg_down(struct wg_softc *sc)
 {
 	if_t ifp = sc->sc_ifp;
 	struct wg_peer *peer;
 
 	sx_xlock(&sc->sc_lock);
 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) {
 		sx_xunlock(&sc->sc_lock);
 		return;
 	}
 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
 
 	TAILQ_FOREACH(peer, &sc->sc_peers, p_entry) {
 		wg_queue_purge(&peer->p_stage_queue);
 		wg_timers_disable(peer);
 	}
 
 	wg_queue_purge(&sc->sc_handshake_queue);
 
 	TAILQ_FOREACH(peer, &sc->sc_peers, p_entry) {
 		noise_remote_handshake_clear(peer->p_remote);
 		noise_remote_keypairs_clear(peer->p_remote);
 	}
 
 	if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
 	wg_socket_uninit(sc);
 
 	sx_xunlock(&sc->sc_lock);
 }
 
 static int
 wg_clone_create(struct if_clone *ifc, char *name, size_t len,
     struct ifc_data *ifd, struct ifnet **ifpp)
 {
 	struct wg_softc *sc;
 	if_t ifp;
 
 	sc = malloc(sizeof(*sc), M_WG, M_WAITOK | M_ZERO);
 
 	sc->sc_local = noise_local_alloc(sc);
 
 	sc->sc_encrypt = mallocarray(sizeof(struct grouptask), mp_ncpus, M_WG, M_WAITOK | M_ZERO);
 
 	sc->sc_decrypt = mallocarray(sizeof(struct grouptask), mp_ncpus, M_WG, M_WAITOK | M_ZERO);
 
 	if (!rn_inithead((void **)&sc->sc_aip4, offsetof(struct aip_addr, in) * NBBY))
 		goto free_decrypt;
 
 	if (!rn_inithead((void **)&sc->sc_aip6, offsetof(struct aip_addr, in6) * NBBY))
 		goto free_aip4;
 
 	atomic_add_int(&clone_count, 1);
 	ifp = sc->sc_ifp = if_alloc(IFT_WIREGUARD);
 
 	sc->sc_ucred = crhold(curthread->td_ucred);
 	sc->sc_socket.so_fibnum = curthread->td_proc->p_fibnum;
 	sc->sc_socket.so_port = 0;
 
 	TAILQ_INIT(&sc->sc_peers);
 	sc->sc_peers_num = 0;
 
 	cookie_checker_init(&sc->sc_cookie);
 
 	RADIX_NODE_HEAD_LOCK_INIT(sc->sc_aip4);
 	RADIX_NODE_HEAD_LOCK_INIT(sc->sc_aip6);
 
 	GROUPTASK_INIT(&sc->sc_handshake, 0, (gtask_fn_t *)wg_softc_handshake_receive, sc);
 	taskqgroup_attach(qgroup_wg_tqg, &sc->sc_handshake, sc, NULL, NULL, "wg tx initiation");
 	wg_queue_init(&sc->sc_handshake_queue, "hsq");
 
 	for (int i = 0; i < mp_ncpus; i++) {
 		GROUPTASK_INIT(&sc->sc_encrypt[i], 0,
 		     (gtask_fn_t *)wg_softc_encrypt, sc);
 		taskqgroup_attach_cpu(qgroup_wg_tqg, &sc->sc_encrypt[i], sc, i, NULL, NULL, "wg encrypt");
 		GROUPTASK_INIT(&sc->sc_decrypt[i], 0,
 		    (gtask_fn_t *)wg_softc_decrypt, sc);
 		taskqgroup_attach_cpu(qgroup_wg_tqg, &sc->sc_decrypt[i], sc, i, NULL, NULL, "wg decrypt");
 	}
 
 	wg_queue_init(&sc->sc_encrypt_parallel, "encp");
 	wg_queue_init(&sc->sc_decrypt_parallel, "decp");
 
 	sx_init(&sc->sc_lock, "wg softc lock");
 
 	if_setsoftc(ifp, sc);
 	if_setcapabilities(ifp, WG_CAPS);
 	if_setcapenable(ifp, WG_CAPS);
 	if_initname(ifp, wgname, ifd->unit);
 
 	if_setmtu(ifp, DEFAULT_MTU);
 	if_setflags(ifp, IFF_NOARP | IFF_MULTICAST);
 	if_setinitfn(ifp, wg_init);
 	if_setreassignfn(ifp, wg_reassign);
 	if_setqflushfn(ifp, wg_qflush);
 	if_settransmitfn(ifp, wg_transmit);
 	if_setoutputfn(ifp, wg_output);
 	if_setioctlfn(ifp, wg_ioctl);
 	if_attach(ifp);
 	bpfattach(ifp, DLT_NULL, sizeof(uint32_t));
 #ifdef INET6
 	ND_IFINFO(ifp)->flags &= ~ND6_IFF_AUTO_LINKLOCAL;
 	ND_IFINFO(ifp)->flags |= ND6_IFF_NO_DAD;
 #endif
 	sx_xlock(&wg_sx);
 	LIST_INSERT_HEAD(&wg_list, sc, sc_entry);
 	sx_xunlock(&wg_sx);
 	*ifpp = ifp;
 	return (0);
 free_aip4:
 	RADIX_NODE_HEAD_DESTROY(sc->sc_aip4);
 	free(sc->sc_aip4, M_RTABLE);
 free_decrypt:
 	free(sc->sc_decrypt, M_WG);
 	free(sc->sc_encrypt, M_WG);
 	noise_local_free(sc->sc_local, NULL);
 	free(sc, M_WG);
 	return (ENOMEM);
 }
 
 static void
 wg_clone_deferred_free(struct noise_local *l)
 {
 	struct wg_softc *sc = noise_local_arg(l);
 
 	free(sc, M_WG);
 	atomic_add_int(&clone_count, -1);
 }
 
 static int
 wg_clone_destroy(struct if_clone *ifc, if_t ifp, uint32_t flags)
 {
 	struct wg_softc *sc = if_getsoftc(ifp);
 	struct ucred *cred;
 
 	sx_xlock(&wg_sx);
 	if_setsoftc(ifp, NULL);
 	sx_xlock(&sc->sc_lock);
 	sc->sc_flags |= WGF_DYING;
 	cred = sc->sc_ucred;
 	sc->sc_ucred = NULL;
 	sx_xunlock(&sc->sc_lock);
 	LIST_REMOVE(sc, sc_entry);
 	sx_xunlock(&wg_sx);
 
 	if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
 	CURVNET_SET(if_getvnet(sc->sc_ifp));
 	if_purgeaddrs(sc->sc_ifp);
 	CURVNET_RESTORE();
 
 	sx_xlock(&sc->sc_lock);
 	wg_socket_uninit(sc);
 	sx_xunlock(&sc->sc_lock);
 
 	/*
 	 * No guarantees that all traffic have passed until the epoch has
 	 * elapsed with the socket closed.
 	 */
 	NET_EPOCH_WAIT();
 
 	taskqgroup_drain_all(qgroup_wg_tqg);
 	sx_xlock(&sc->sc_lock);
 	wg_peer_destroy_all(sc);
 	NET_EPOCH_DRAIN_CALLBACKS();
 	sx_xunlock(&sc->sc_lock);
 	sx_destroy(&sc->sc_lock);
 	taskqgroup_detach(qgroup_wg_tqg, &sc->sc_handshake);
 	for (int i = 0; i < mp_ncpus; i++) {
 		taskqgroup_detach(qgroup_wg_tqg, &sc->sc_encrypt[i]);
 		taskqgroup_detach(qgroup_wg_tqg, &sc->sc_decrypt[i]);
 	}
 	free(sc->sc_encrypt, M_WG);
 	free(sc->sc_decrypt, M_WG);
 	wg_queue_deinit(&sc->sc_handshake_queue);
 	wg_queue_deinit(&sc->sc_encrypt_parallel);
 	wg_queue_deinit(&sc->sc_decrypt_parallel);
 
 	RADIX_NODE_HEAD_DESTROY(sc->sc_aip4);
 	RADIX_NODE_HEAD_DESTROY(sc->sc_aip6);
 	rn_detachhead((void **)&sc->sc_aip4);
 	rn_detachhead((void **)&sc->sc_aip6);
 
 	cookie_checker_free(&sc->sc_cookie);
 
 	if (cred != NULL)
 		crfree(cred);
 	bpfdetach(sc->sc_ifp);
 	if_detach(sc->sc_ifp);
 	if_free(sc->sc_ifp);
 
 	noise_local_free(sc->sc_local, wg_clone_deferred_free);
 
 	return (0);
 }
 
 static void
 wg_qflush(if_t ifp __unused)
 {
 }
 
 /*
  * Privileged information (private-key, preshared-key) are only exported for
  * root and jailed root by default.
  */
 static bool
 wgc_privileged(struct wg_softc *sc)
 {
 	struct thread *td;
 
 	td = curthread;
 	return (priv_check(td, PRIV_NET_WG) == 0);
 }
 
 static void
 wg_reassign(if_t ifp, struct vnet *new_vnet __unused,
     char *unused __unused)
 {
 	struct wg_softc *sc;
 
 	sc = if_getsoftc(ifp);
 	wg_down(sc);
 }
 
 static void
 wg_init(void *xsc)
 {
 	struct wg_softc *sc;
 
 	sc = xsc;
 	wg_up(sc);
 }
 
 static void
 vnet_wg_init(const void *unused __unused)
 {
 	struct if_clone_addreq req = {
 		.create_f = wg_clone_create,
 		.destroy_f = wg_clone_destroy,
 		.flags = IFC_F_AUTOUNIT,
 	};
 	V_wg_cloner = ifc_attach_cloner(wgname, &req);
 }
 VNET_SYSINIT(vnet_wg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
 	     vnet_wg_init, NULL);
 
 static void
 vnet_wg_uninit(const void *unused __unused)
 {
 	if (V_wg_cloner)
 		ifc_detach_cloner(V_wg_cloner);
 }
 VNET_SYSUNINIT(vnet_wg_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
 	       vnet_wg_uninit, NULL);
 
 static int
 wg_prison_remove(void *obj, void *data __unused)
 {
 	const struct prison *pr = obj;
 	struct wg_softc *sc;
 
 	/*
 	 * Do a pass through all if_wg interfaces and release creds on any from
 	 * the jail that are supposed to be going away.  This will, in turn, let
 	 * the jail die so that we don't end up with Schrödinger's jail.
 	 */
 	sx_slock(&wg_sx);
 	LIST_FOREACH(sc, &wg_list, sc_entry) {
 		sx_xlock(&sc->sc_lock);
 		if (!(sc->sc_flags & WGF_DYING) && sc->sc_ucred && sc->sc_ucred->cr_prison == pr) {
 			struct ucred *cred = sc->sc_ucred;
 			DPRINTF(sc, "Creating jail exiting\n");
 			if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
 			wg_socket_uninit(sc);
 			sc->sc_ucred = NULL;
 			crfree(cred);
 			sc->sc_flags |= WGF_DYING;
 		}
 		sx_xunlock(&sc->sc_lock);
 	}
 	sx_sunlock(&wg_sx);
 
 	return (0);
 }
 
 #ifdef SELFTESTS
 #include "selftest/allowedips.c"
 static bool wg_run_selftests(void)
 {
 	bool ret = true;
 	ret &= wg_allowedips_selftest();
 	ret &= noise_counter_selftest();
 	ret &= cookie_selftest();
 	return ret;
 }
 #else
 static inline bool wg_run_selftests(void) { return true; }
 #endif
 
 static int
 wg_module_init(void)
 {
 	int ret;
 	osd_method_t methods[PR_MAXMETHOD] = {
 		[PR_METHOD_REMOVE] = wg_prison_remove,
 	};
 
 	wg_packet_zone = uma_zcreate("wg packet", sizeof(struct wg_packet),
 	     NULL, NULL, NULL, NULL, 0, 0);
 
 	ret = crypto_init();
 	if (ret != 0)
 		return (ret);
 	ret = cookie_init();
 	if (ret != 0)
 		return (ret);
 
 	wg_osd_jail_slot = osd_jail_register(NULL, methods);
 
 	if (!wg_run_selftests())
 		return (ENOTRECOVERABLE);
 
 	return (0);
 }
 
 static void
 wg_module_deinit(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 	VNET_LIST_RLOCK();
 	VNET_FOREACH(vnet_iter) {
 		struct if_clone *clone = VNET_VNET(vnet_iter, wg_cloner);
 		if (clone) {
 			ifc_detach_cloner(clone);
 			VNET_VNET(vnet_iter, wg_cloner) = NULL;
 		}
 	}
 	VNET_LIST_RUNLOCK();
 	NET_EPOCH_WAIT();
 	MPASS(LIST_EMPTY(&wg_list));
 	if (wg_osd_jail_slot != 0)
 		osd_jail_deregister(wg_osd_jail_slot);
 	cookie_deinit();
 	crypto_deinit();
 	if (wg_packet_zone != NULL)
 		uma_zdestroy(wg_packet_zone);
 }
 
 static int
 wg_module_event_handler(module_t mod, int what, void *arg)
 {
 	switch (what) {
 		case MOD_LOAD:
 			return wg_module_init();
 		case MOD_UNLOAD:
 			wg_module_deinit();
 			break;
 		default:
 			return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t wg_moduledata = {
 	"if_wg",
 	wg_module_event_handler,
 	NULL
 };
 
 DECLARE_MODULE(if_wg, wg_moduledata, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_wg, WIREGUARD_VERSION);
 MODULE_DEPEND(if_wg, crypto, 1, 1, 1);
diff --git a/sys/net/if_disc.c b/sys/net/if_disc.c
index 02f3bbbfdaf1..193bb31d138f 100644
--- a/sys/net/if_disc.c
+++ b/sys/net/if_disc.c
@@ -1,246 +1,246 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)if_loop.c	8.1 (Berkeley) 6/10/93
  */
 
 /*
  * Discard interface driver for protocol testing and timing.
  * (Based on the loopback.)
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_private.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/bpf.h>
 #include <net/vnet.h>
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #ifdef TINY_DSMTU
 #define	DSMTU	(1024+512)
 #else
 #define DSMTU	65532
 #endif
 
 struct disc_softc {
 	struct ifnet *sc_ifp;
 };
 
 static int	discoutput(struct ifnet *, struct mbuf *,
 		    const struct sockaddr *, struct route *);
 static int	discioctl(struct ifnet *, u_long, caddr_t);
 static int	disc_clone_create(struct if_clone *, int, caddr_t);
 static void	disc_clone_destroy(struct ifnet *);
 
 static const char discname[] = "disc";
 static MALLOC_DEFINE(M_DISC, discname, "Discard interface");
 
 VNET_DEFINE_STATIC(struct if_clone *, disc_cloner);
 #define	V_disc_cloner	VNET(disc_cloner)
 
 static int
 disc_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct ifnet		*ifp;
 	struct disc_softc	*sc;
 
 	sc = malloc(sizeof(struct disc_softc), M_DISC, M_WAITOK | M_ZERO);
 	ifp = sc->sc_ifp = if_alloc(IFT_LOOP);
 	if (ifp == NULL) {
 		free(sc, M_DISC);
 		return (ENOSPC);
 	}
 
 	ifp->if_softc = sc;
 	if_initname(ifp, discname, unit);
 	ifp->if_mtu = DSMTU;
 	/*
 	 * IFF_LOOPBACK should not be removed from disc's flags because
 	 * it controls what PF-specific routes are magically added when
 	 * a network address is assigned to the interface.  Things just
 	 * won't work as intended w/o such routes because the output
 	 * interface selection for a packet is totally route-driven.
 	 * A valid alternative to IFF_LOOPBACK can be IFF_BROADCAST or
 	 * IFF_POINTOPOINT, but it would result in different properties
 	 * of the interface.
 	 */
 	ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST;
 	ifp->if_drv_flags = IFF_DRV_RUNNING;
 	ifp->if_ioctl = discioctl;
 	ifp->if_output = discoutput;
 	ifp->if_hdrlen = 0;
 	ifp->if_addrlen = 0;
 	ifp->if_snd.ifq_maxlen = 20;
 	if_attach(ifp);
 	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
 
 	return (0);
 }
 
 static void
 disc_clone_destroy(struct ifnet *ifp)
 {
 	struct disc_softc	*sc;
 
 	sc = ifp->if_softc;
 
 	bpfdetach(ifp);
 	if_detach(ifp);
 	if_free(ifp);
 
 	free(sc, M_DISC);
 }
 
 static void
 vnet_disc_init(const void *unused __unused)
 {
 
 	V_disc_cloner = if_clone_simple(discname, disc_clone_create,
 	    disc_clone_destroy, 0);
 }
 VNET_SYSINIT(vnet_disc_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
     vnet_disc_init, NULL);
 
 static void
 vnet_disc_uninit(const void *unused __unused)
 {
 
 	if_clone_detach(V_disc_cloner);
 }
 VNET_SYSUNINIT(vnet_disc_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
     vnet_disc_uninit, NULL);
 
 static int
 disc_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 	case MOD_UNLOAD:
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t disc_mod = {
 	"if_disc",
 	disc_modevent,
 	NULL
 };
 
 DECLARE_MODULE(if_disc, disc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 
 static int
 discoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	u_int32_t af;
 
 	M_ASSERTPKTHDR(m);
 
 	/* BPF writes need to be handled specially. */
-	if (dst->sa_family == AF_UNSPEC)
+	if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
 		af = RO_GET_FAMILY(ro, dst);
 
 	if (bpf_peers_present(ifp->if_bpf))
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
 
 	m->m_pkthdr.rcvif = ifp;
 
 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
 
 	m_freem(m);
 	return (0);
 }
 
 /*
  * Process an ioctl request.
  */
 static int
 discioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifreq *ifr = (struct ifreq *)data;
 	int error = 0;
 
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 
 		/*
 		 * Everything else is done at a higher level.
 		 */
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (ifr == NULL) {
 			error = EAFNOSUPPORT;		/* XXX */
 			break;
 		}
 		switch (ifr->ifr_addr.sa_family) {
 #ifdef INET
 		case AF_INET:
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			break;
 #endif
 		default:
 			error = EAFNOSUPPORT;
 			break;
 		}
 		break;
 	case SIOCSIFMTU:
 		ifp->if_mtu = ifr->ifr_mtu;
 		break;
 	default:
 		error = EINVAL;
 	}
 	return (error);
 }
diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c
index e5065889d732..ef64c15074ed 100644
--- a/sys/net/if_gif.c
+++ b/sys/net/if_gif.c
@@ -1,725 +1,726 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $
  */
 
 #include <sys/cdefs.h>
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/conf.h>
 #include <machine/cpu.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_private.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/bpf.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_ecn.h>
 #ifdef	INET
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #endif	/* INET */
 
 #ifdef INET6
 #ifndef INET
 #include <netinet/in.h>
 #endif
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_ecn.h>
 #include <netinet6/ip6_var.h>
 #endif /* INET6 */
 
 #include <netinet/ip_encap.h>
 #include <net/ethernet.h>
 #include <net/if_bridgevar.h>
 #include <net/if_gif.h>
 
 #include <security/mac/mac_framework.h>
 
 static const char gifname[] = "gif";
 
 MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
 static struct sx gif_ioctl_sx;
 SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
 
 void	(*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
 void	(*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
 void	(*ng_gif_attach_p)(struct ifnet *ifp);
 void	(*ng_gif_detach_p)(struct ifnet *ifp);
 
 #ifdef VIMAGE
 static void	gif_reassign(struct ifnet *, struct vnet *, char *);
 #endif
 static void	gif_delete_tunnel(struct gif_softc *);
 static int	gif_ioctl(struct ifnet *, u_long, caddr_t);
 static int	gif_transmit(struct ifnet *, struct mbuf *);
 static void	gif_qflush(struct ifnet *);
 static int	gif_clone_create(struct if_clone *, int, caddr_t);
 static void	gif_clone_destroy(struct ifnet *);
 VNET_DEFINE_STATIC(struct if_clone *, gif_cloner);
 #define	V_gif_cloner	VNET(gif_cloner)
 
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Generic Tunnel Interface");
 #ifndef MAX_GIF_NEST
 /*
  * This macro controls the default upper limitation on nesting of gif tunnels.
  * Since, setting a large value to this macro with a careless configuration
  * may introduce system crash, we don't allow any nestings by default.
  * If you need to configure nested gif tunnels, you can define this macro
  * in your kernel configuration file.  However, if you do so, please be
  * careful to configure the tunnels so that it won't make a loop.
  */
 #define MAX_GIF_NEST 1
 #endif
 VNET_DEFINE_STATIC(int, max_gif_nesting) = MAX_GIF_NEST;
 #define	V_max_gif_nesting	VNET(max_gif_nesting)
 SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
 
 static int
 gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct gif_softc *sc;
 
 	sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
 	sc->gif_fibnum = curthread->td_proc->p_fibnum;
 	GIF2IFP(sc) = if_alloc(IFT_GIF);
 	GIF2IFP(sc)->if_softc = sc;
 	if_initname(GIF2IFP(sc), gifname, unit);
 
 	GIF2IFP(sc)->if_addrlen = 0;
 	GIF2IFP(sc)->if_mtu    = GIF_MTU;
 	GIF2IFP(sc)->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
 	GIF2IFP(sc)->if_ioctl  = gif_ioctl;
 	GIF2IFP(sc)->if_transmit = gif_transmit;
 	GIF2IFP(sc)->if_qflush = gif_qflush;
 	GIF2IFP(sc)->if_output = gif_output;
 #ifdef VIMAGE
 	GIF2IFP(sc)->if_reassign = gif_reassign;
 #endif
 	GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
 	GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
 	if_attach(GIF2IFP(sc));
 	bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
 	if (ng_gif_attach_p != NULL)
 		(*ng_gif_attach_p)(GIF2IFP(sc));
 
 	return (0);
 }
 
 #ifdef VIMAGE
 static void
 gif_reassign(struct ifnet *ifp, struct vnet *new_vnet __unused,
     char *unused __unused)
 {
 	struct gif_softc *sc;
 
 	sx_xlock(&gif_ioctl_sx);
 	sc = ifp->if_softc;
 	if (sc != NULL)
 		gif_delete_tunnel(sc);
 	sx_xunlock(&gif_ioctl_sx);
 }
 #endif /* VIMAGE */
 
 static void
 gif_clone_destroy(struct ifnet *ifp)
 {
 	struct gif_softc *sc;
 
 	sx_xlock(&gif_ioctl_sx);
 	sc = ifp->if_softc;
 	gif_delete_tunnel(sc);
 	if (ng_gif_detach_p != NULL)
 		(*ng_gif_detach_p)(ifp);
 	bpfdetach(ifp);
 	if_detach(ifp);
 	ifp->if_softc = NULL;
 	sx_xunlock(&gif_ioctl_sx);
 
 	GIF_WAIT();
 	if_free(ifp);
 	free(sc, M_GIF);
 }
 
 static void
 vnet_gif_init(const void *unused __unused)
 {
 
 	V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
 	    gif_clone_destroy, 0);
 #ifdef INET
 	in_gif_init();
 #endif
 #ifdef INET6
 	in6_gif_init();
 #endif
 }
 VNET_SYSINIT(vnet_gif_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
     vnet_gif_init, NULL);
 
 static void
 vnet_gif_uninit(const void *unused __unused)
 {
 
 	if_clone_detach(V_gif_cloner);
 #ifdef INET
 	in_gif_uninit();
 #endif
 #ifdef INET6
 	in6_gif_uninit();
 #endif
 }
 VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY,
     vnet_gif_uninit, NULL);
 
 static int
 gifmodevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 	case MOD_UNLOAD:
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t gif_mod = {
 	"if_gif",
 	gifmodevent,
 	0
 };
 
 DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_gif, 1);
 
 struct gif_list *
 gif_hashinit(void)
 {
 	struct gif_list *hash;
 	int i;
 
 	hash = malloc(sizeof(struct gif_list) * GIF_HASH_SIZE,
 	    M_GIF, M_WAITOK);
 	for (i = 0; i < GIF_HASH_SIZE; i++)
 		CK_LIST_INIT(&hash[i]);
 
 	return (hash);
 }
 
 void
 gif_hashdestroy(struct gif_list *hash)
 {
 
 	free(hash, M_GIF);
 }
 
 #define	MTAG_GIF	1080679712
 static int
 gif_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct gif_softc *sc;
 	struct etherip_header *eth;
 #ifdef INET
 	struct ip *ip;
 #endif
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	uint32_t t;
 #endif
 	uint32_t af;
 	uint8_t proto, ecn;
 	int error;
 
 	NET_EPOCH_ASSERT();
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error) {
 		m_freem(m);
 		goto err;
 	}
 #endif
 	error = ENETDOWN;
 	sc = ifp->if_softc;
 	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
 	    (ifp->if_flags & IFF_UP) == 0 ||
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    sc->gif_family == 0 ||
 	    (error = if_tunnel_check_nesting(ifp, m, MTAG_GIF,
 		V_max_gif_nesting)) != 0) {
 		m_freem(m);
 		goto err;
 	}
 	/* Now pull back the af that we stashed in the csum_data. */
 	if (ifp->if_bridge)
 		af = AF_LINK;
 	else
 		af = m->m_pkthdr.csum_data;
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 	M_SETFIB(m, sc->gif_fibnum);
 	BPF_MTAP2(ifp, &af, sizeof(af), m);
 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
 	/* inner AF-specific encapsulation */
 	ecn = 0;
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		proto = IPPROTO_IPV4;
 		if (m->m_len < sizeof(struct ip))
 			m = m_pullup(m, sizeof(struct ip));
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto err;
 		}
 		ip = mtod(m, struct ip *);
 		ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
 		    ECN_NOCARE, &ecn, &ip->ip_tos);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		proto = IPPROTO_IPV6;
 		if (m->m_len < sizeof(struct ip6_hdr))
 			m = m_pullup(m, sizeof(struct ip6_hdr));
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto err;
 		}
 		t = 0;
 		ip6 = mtod(m, struct ip6_hdr *);
 		ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
 		    ECN_NOCARE, &t, &ip6->ip6_flow);
 		ecn = (ntohl(t) >> 20) & 0xff;
 		break;
 #endif
 	case AF_LINK:
 		proto = IPPROTO_ETHERIP;
 		M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT);
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto err;
 		}
 		eth = mtod(m, struct etherip_header *);
 		eth->eip_resvh = 0;
 		eth->eip_ver = ETHERIP_VERSION;
 		eth->eip_resvl = 0;
 		break;
 	default:
 		error = EAFNOSUPPORT;
 		m_freem(m);
 		goto err;
 	}
 	/* XXX should we check if our outer source is legal? */
 	/* dispatch to output logic based on outer AF */
 	switch (sc->gif_family) {
 #ifdef INET
 	case AF_INET:
 		error = in_gif_output(ifp, m, proto, ecn);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		error = in6_gif_output(ifp, m, proto, ecn);
 		break;
 #endif
 	default:
 		m_freem(m);
 	}
 err:
 	if (error)
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	return (error);
 }
 
 static void
 gif_qflush(struct ifnet *ifp __unused)
 {
 
 }
 
 int
 gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	struct route *ro)
 {
 	uint32_t af;
 
 	KASSERT(ifp->if_bridge == NULL,
 	    ("%s: unexpectedly called with bridge attached", __func__));
 
-	if (dst->sa_family == AF_UNSPEC)
+	/* BPF writes need to be handled specially. */
+	if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
 		memcpy(&af, dst->sa_data, sizeof(af));
 	else
 		af = RO_GET_FAMILY(ro, dst);
 	/*
 	 * Now save the af in the inbound pkt csum data, this is a cheat since
 	 * we are using the inbound csum_data field to carry the af over to
 	 * the gif_transmit() routine, avoiding using yet another mtag.
 	 */
 	m->m_pkthdr.csum_data = af;
 	return (ifp->if_transmit(ifp, m));
 }
 
 void
 gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn)
 {
 	struct etherip_header *eip;
 #ifdef INET
 	struct ip *ip;
 #endif
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	uint32_t t;
 #endif
 	struct ether_header *eh;
 	struct ifnet *oldifp;
 	int isr, n, af;
 
 	NET_EPOCH_ASSERT();
 
 	if (ifp == NULL) {
 		/* just in case */
 		m_freem(m);
 		return;
 	}
 	m->m_pkthdr.rcvif = ifp;
 	m_clrprotoflags(m);
 	switch (proto) {
 #ifdef INET
 	case IPPROTO_IPV4:
 		af = AF_INET;
 		if (m->m_len < sizeof(struct ip))
 			m = m_pullup(m, sizeof(struct ip));
 		if (m == NULL)
 			goto drop;
 		ip = mtod(m, struct ip *);
 		if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
 		    ECN_NOCARE, &ecn, &ip->ip_tos) == 0) {
 			m_freem(m);
 			goto drop;
 		}
 		break;
 #endif
 #ifdef INET6
 	case IPPROTO_IPV6:
 		af = AF_INET6;
 		if (m->m_len < sizeof(struct ip6_hdr))
 			m = m_pullup(m, sizeof(struct ip6_hdr));
 		if (m == NULL)
 			goto drop;
 		t = htonl((uint32_t)ecn << 20);
 		ip6 = mtod(m, struct ip6_hdr *);
 		if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
 		    ECN_NOCARE, &t, &ip6->ip6_flow) == 0) {
 			m_freem(m);
 			goto drop;
 		}
 		break;
 #endif
 	case IPPROTO_ETHERIP:
 		af = AF_LINK;
 		break;
 	default:
 		m_freem(m);
 		goto drop;
 	}
 
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	if (bpf_peers_present(ifp->if_bpf)) {
 		uint32_t af1 = af;
 		bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
 	}
 
 	if ((ifp->if_flags & IFF_MONITOR) != 0) {
 		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 		m_freem(m);
 		return;
 	}
 
 	if (ng_gif_input_p != NULL) {
 		(*ng_gif_input_p)(ifp, &m, af);
 		if (m == NULL)
 			goto drop;
 	}
 
 	/*
 	 * Put the packet to the network layer input queue according to the
 	 * specified address family.
 	 * Note: older versions of gif_input directly called network layer
 	 * input functions, e.g. ip6_input, here.  We changed the policy to
 	 * prevent too many recursive calls of such input functions, which
 	 * might cause kernel panic.  But the change may introduce another
 	 * problem; if the input queue is full, packets are discarded.
 	 * The kernel stack overflow really happened, and we believed
 	 * queue-full rarely occurs, so we changed the policy.
 	 */
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		isr = NETISR_IP;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 	case AF_LINK:
 		n = sizeof(struct etherip_header) +
 		    sizeof(struct ether_header);
 		if (n > m->m_len)
 			m = m_pullup(m, n);
 		if (m == NULL)
 			goto drop;
 		eip = mtod(m, struct etherip_header *);
 		if (eip->eip_ver != ETHERIP_VERSION) {
 			/* discard unknown versions */
 			m_freem(m);
 			goto drop;
 		}
 
 		m_adj_decap(m, sizeof(struct etherip_header));
 
 		m->m_flags &= ~(M_BCAST|M_MCAST);
 		m->m_pkthdr.rcvif = ifp;
 
 		if (ifp->if_bridge) {
 			oldifp = ifp;
 			eh = mtod(m, struct ether_header *);
 			if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 				if (ETHER_IS_BROADCAST(eh->ether_dhost))
 					m->m_flags |= M_BCAST;
 				else
 					m->m_flags |= M_MCAST;
 				if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 			}
 			BRIDGE_INPUT(ifp, m);
 
 			if (m != NULL && ifp != oldifp) {
 				/*
 				 * The bridge gave us back itself or one of the
 				 * members for which the frame is addressed.
 				 */
 				ether_demux(ifp, m);
 				return;
 			}
 		}
 		if (m != NULL)
 			m_freem(m);
 		return;
 
 	default:
 		if (ng_gif_input_orphan_p != NULL)
 			(*ng_gif_input_orphan_p)(ifp, m, af);
 		else
 			m_freem(m);
 		return;
 	}
 
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
 	return;
 drop:
 	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 }
 
 static int
 gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifreq *ifr = (struct ifreq*)data;
 	struct gif_softc *sc;
 	u_int options;
 	int error;
 
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 	case SIOCGIFMTU:
 	case SIOCSIFFLAGS:
 		return (0);
 	case SIOCSIFMTU:
 		if (ifr->ifr_mtu < GIF_MTU_MIN ||
 		    ifr->ifr_mtu > GIF_MTU_MAX)
 			return (EINVAL);
 		else
 			ifp->if_mtu = ifr->ifr_mtu;
 		return (0);
 	}
 	sx_xlock(&gif_ioctl_sx);
 	sc = ifp->if_softc;
 	if (sc == NULL) {
 		error = ENXIO;
 		goto bad;
 	}
 	error = 0;
 	switch (cmd) {
 	case SIOCDIFPHYADDR:
 		if (sc->gif_family == 0)
 			break;
 		gif_delete_tunnel(sc);
 		break;
 #ifdef INET
 	case SIOCSIFPHYADDR:
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
 		error = in_gif_ioctl(sc, cmd, data);
 		break;
 #endif
 #ifdef INET6
 	case SIOCSIFPHYADDR_IN6:
 	case SIOCGIFPSRCADDR_IN6:
 	case SIOCGIFPDSTADDR_IN6:
 		error = in6_gif_ioctl(sc, cmd, data);
 		break;
 #endif
 	case SIOCGTUNFIB:
 		ifr->ifr_fib = sc->gif_fibnum;
 		break;
 	case SIOCSTUNFIB:
 		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
 			break;
 		if (ifr->ifr_fib >= rt_numfibs)
 			error = EINVAL;
 		else
 			sc->gif_fibnum = ifr->ifr_fib;
 		break;
 	case GIFGOPTS:
 		options = sc->gif_options;
 		error = copyout(&options, ifr_data_get_ptr(ifr),
 		    sizeof(options));
 		break;
 	case GIFSOPTS:
 		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
 			break;
 		error = copyin(ifr_data_get_ptr(ifr), &options,
 		    sizeof(options));
 		if (error)
 			break;
 		if (options & ~GIF_OPTMASK) {
 			error = EINVAL;
 			break;
 		}
 		if (sc->gif_options != options) {
 			switch (sc->gif_family) {
 #ifdef INET
 			case AF_INET:
 				error = in_gif_setopts(sc, options);
 				break;
 #endif
 #ifdef INET6
 			case AF_INET6:
 				error = in6_gif_setopts(sc, options);
 				break;
 #endif
 			default:
 				/* No need to invoke AF-handler */
 				sc->gif_options = options;
 			}
 		}
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	if (error == 0 && sc->gif_family != 0) {
 		if (
 #ifdef INET
 		    cmd == SIOCSIFPHYADDR ||
 #endif
 #ifdef INET6
 		    cmd == SIOCSIFPHYADDR_IN6 ||
 #endif
 		    0) {
 			if_link_state_change(ifp, LINK_STATE_UP);
 		}
 	}
 bad:
 	sx_xunlock(&gif_ioctl_sx);
 	return (error);
 }
 
 static void
 gif_delete_tunnel(struct gif_softc *sc)
 {
 
 	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
 	if (sc->gif_family != 0) {
 		CK_LIST_REMOVE(sc, srchash);
 		CK_LIST_REMOVE(sc, chain);
 		/* Wait until it become safe to free gif_hdr */
 		GIF_WAIT();
 		free(sc->gif_hdr, M_GIF);
 	}
 	sc->gif_family = 0;
 	GIF2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 	if_link_state_change(GIF2IFP(sc), LINK_STATE_DOWN);
 }
diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c
index 55163416f807..ca9c4835daf6 100644
--- a/sys/net/if_gre.c
+++ b/sys/net/if_gre.c
@@ -1,831 +1,832 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
  * Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Heiko W.Rupp <hwr@pilhuhn.de>
  *
  * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $
  */
 
 #include <sys/cdefs.h>
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_private.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/vnet.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #ifdef INET
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #ifdef RSS
 #include <netinet/in_rss.h>
 #endif
 #endif
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
 #ifdef RSS
 #include <netinet6/in6_rss.h>
 #endif
 #endif
 
 #include <netinet/ip_encap.h>
 #include <netinet/udp.h>
 #include <net/bpf.h>
 #include <net/if_gre.h>
 
 #include <machine/in_cksum.h>
 #include <security/mac/mac_framework.h>
 
 #define	GREMTU			1476
 
 static const char grename[] = "gre";
 MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation");
 
 static struct sx gre_ioctl_sx;
 SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl");
 
 static int	gre_clone_create(struct if_clone *, int, caddr_t);
 static void	gre_clone_destroy(struct ifnet *);
 VNET_DEFINE_STATIC(struct if_clone *, gre_cloner);
 #define	V_gre_cloner	VNET(gre_cloner)
 
 #ifdef VIMAGE
 static void	gre_reassign(struct ifnet *, struct vnet *, char *);
 #endif
 static void	gre_qflush(struct ifnet *);
 static int	gre_transmit(struct ifnet *, struct mbuf *);
 static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
 static int	gre_output(struct ifnet *, struct mbuf *,
 		    const struct sockaddr *, struct route *);
 static void	gre_delete_tunnel(struct gre_softc *);
 
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Generic Routing Encapsulation");
 #ifndef MAX_GRE_NEST
 /*
  * This macro controls the default upper limitation on nesting of gre tunnels.
  * Since, setting a large value to this macro with a careless configuration
  * may introduce system crash, we don't allow any nestings by default.
  * If you need to configure nested gre tunnels, you can define this macro
  * in your kernel configuration file.  However, if you do so, please be
  * careful to configure the tunnels so that it won't make a loop.
  */
 #define MAX_GRE_NEST 1
 #endif
 
 VNET_DEFINE_STATIC(int, max_gre_nesting) = MAX_GRE_NEST;
 #define	V_max_gre_nesting	VNET(max_gre_nesting)
 SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
     &VNET_NAME(max_gre_nesting), 0, "Max nested tunnels");
 
 static void
 vnet_gre_init(const void *unused __unused)
 {
 
 	V_gre_cloner = if_clone_simple(grename, gre_clone_create,
 	    gre_clone_destroy, 0);
 #ifdef INET
 	in_gre_init();
 #endif
 #ifdef INET6
 	in6_gre_init();
 #endif
 }
 VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_gre_init, NULL);
 
 static void
 vnet_gre_uninit(const void *unused __unused)
 {
 
 	if_clone_detach(V_gre_cloner);
 #ifdef INET
 	in_gre_uninit();
 #endif
 #ifdef INET6
 	in6_gre_uninit();
 #endif
 	/* XXX: epoch_call drain */
 }
 VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_gre_uninit, NULL);
 
 static int
 gre_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct gre_softc *sc;
 
 	sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO);
 	sc->gre_fibnum = curthread->td_proc->p_fibnum;
 	GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
 	GRE2IFP(sc)->if_softc = sc;
 	if_initname(GRE2IFP(sc), grename, unit);
 
 	GRE2IFP(sc)->if_mtu = GREMTU;
 	GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
 	GRE2IFP(sc)->if_output = gre_output;
 	GRE2IFP(sc)->if_ioctl = gre_ioctl;
 	GRE2IFP(sc)->if_transmit = gre_transmit;
 	GRE2IFP(sc)->if_qflush = gre_qflush;
 #ifdef VIMAGE
 	GRE2IFP(sc)->if_reassign = gre_reassign;
 #endif
 	GRE2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
 	GRE2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
 	if_attach(GRE2IFP(sc));
 	bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
 	return (0);
 }
 
 #ifdef VIMAGE
 static void
 gre_reassign(struct ifnet *ifp, struct vnet *new_vnet __unused,
     char *unused __unused)
 {
 	struct gre_softc *sc;
 
 	sx_xlock(&gre_ioctl_sx);
 	sc = ifp->if_softc;
 	if (sc != NULL)
 		gre_delete_tunnel(sc);
 	sx_xunlock(&gre_ioctl_sx);
 }
 #endif /* VIMAGE */
 
 static void
 gre_clone_destroy(struct ifnet *ifp)
 {
 	struct gre_softc *sc;
 
 	sx_xlock(&gre_ioctl_sx);
 	sc = ifp->if_softc;
 	gre_delete_tunnel(sc);
 	bpfdetach(ifp);
 	if_detach(ifp);
 	ifp->if_softc = NULL;
 	sx_xunlock(&gre_ioctl_sx);
 
 	GRE_WAIT();
 	if_free(ifp);
 	free(sc, M_GRE);
 }
 
 static int
 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct gre_softc *sc;
 	uint32_t opt;
 	int error;
 
 	switch (cmd) {
 	case SIOCSIFMTU:
 		 /* XXX: */
 		if (ifr->ifr_mtu < 576)
 			return (EINVAL);
 		ifp->if_mtu = ifr->ifr_mtu;
 		return (0);
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 	case SIOCSIFFLAGS:
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		return (0);
 	case GRESADDRS:
 	case GRESADDRD:
 	case GREGADDRS:
 	case GREGADDRD:
 	case GRESPROTO:
 	case GREGPROTO:
 		return (EOPNOTSUPP);
 	}
 	sx_xlock(&gre_ioctl_sx);
 	sc = ifp->if_softc;
 	if (sc == NULL) {
 		error = ENXIO;
 		goto end;
 	}
 	error = 0;
 	switch (cmd) {
 	case SIOCDIFPHYADDR:
 		if (sc->gre_family == 0)
 			break;
 		gre_delete_tunnel(sc);
 		break;
 #ifdef INET
 	case SIOCSIFPHYADDR:
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
 		error = in_gre_ioctl(sc, cmd, data);
 		break;
 #endif
 #ifdef INET6
 	case SIOCSIFPHYADDR_IN6:
 	case SIOCGIFPSRCADDR_IN6:
 	case SIOCGIFPDSTADDR_IN6:
 		error = in6_gre_ioctl(sc, cmd, data);
 		break;
 #endif
 	case SIOCGTUNFIB:
 		ifr->ifr_fib = sc->gre_fibnum;
 		break;
 	case SIOCSTUNFIB:
 		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
 			break;
 		if (ifr->ifr_fib >= rt_numfibs)
 			error = EINVAL;
 		else
 			sc->gre_fibnum = ifr->ifr_fib;
 		break;
 	case GRESKEY:
 	case GRESOPTS:
 	case GRESPORT:
 		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
 			break;
 		if ((error = copyin(ifr_data_get_ptr(ifr), &opt,
 		    sizeof(opt))) != 0)
 			break;
 		if (cmd == GRESKEY) {
 			if (sc->gre_key == opt)
 				break;
 		} else if (cmd == GRESOPTS) {
 			if (opt & ~GRE_OPTMASK) {
 				error = EINVAL;
 				break;
 			}
 			if (sc->gre_options == opt)
 				break;
 		} else if (cmd == GRESPORT) {
 			if (opt != 0 && (opt < V_ipport_hifirstauto ||
 			    opt > V_ipport_hilastauto)) {
 				error = EINVAL;
 				break;
 			}
 			if (sc->gre_port == opt)
 				break;
 			if ((sc->gre_options & GRE_UDPENCAP) == 0) {
 				/*
 				 * UDP encapsulation is not enabled, thus
 				 * there is no need to reattach softc.
 				 */
 				sc->gre_port = opt;
 				break;
 			}
 		}
 		switch (sc->gre_family) {
 #ifdef INET
 		case AF_INET:
 			error = in_gre_setopts(sc, cmd, opt);
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			error = in6_gre_setopts(sc, cmd, opt);
 			break;
 #endif
 		default:
 			/*
 			 * Tunnel is not yet configured.
 			 * We can just change any parameters.
 			 */
 			if (cmd == GRESKEY)
 				sc->gre_key = opt;
 			if (cmd == GRESOPTS)
 				sc->gre_options = opt;
 			if (cmd == GRESPORT)
 				sc->gre_port = opt;
 			break;
 		}
 		/*
 		 * XXX: Do we need to initiate change of interface
 		 * state here?
 		 */
 		break;
 	case GREGKEY:
 		error = copyout(&sc->gre_key, ifr_data_get_ptr(ifr),
 		    sizeof(sc->gre_key));
 		break;
 	case GREGOPTS:
 		error = copyout(&sc->gre_options, ifr_data_get_ptr(ifr),
 		    sizeof(sc->gre_options));
 		break;
 	case GREGPORT:
 		error = copyout(&sc->gre_port, ifr_data_get_ptr(ifr),
 		    sizeof(sc->gre_port));
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	if (error == 0 && sc->gre_family != 0) {
 		if (
 #ifdef INET
 		    cmd == SIOCSIFPHYADDR ||
 #endif
 #ifdef INET6
 		    cmd == SIOCSIFPHYADDR_IN6 ||
 #endif
 		    0) {
 			if_link_state_change(ifp, LINK_STATE_UP);
 		}
 	}
 end:
 	sx_xunlock(&gre_ioctl_sx);
 	return (error);
 }
 
 static void
 gre_delete_tunnel(struct gre_softc *sc)
 {
 	struct gre_socket *gs;
 
 	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
 	if (sc->gre_family != 0) {
 		CK_LIST_REMOVE(sc, chain);
 		CK_LIST_REMOVE(sc, srchash);
 		GRE_WAIT();
 		free(sc->gre_hdr, M_GRE);
 		sc->gre_family = 0;
 	}
 	/*
 	 * If this Tunnel was the last one that could use UDP socket,
 	 * we should unlink socket from hash table and close it.
 	 */
 	if ((gs = sc->gre_so) != NULL && CK_LIST_EMPTY(&gs->list)) {
 		CK_LIST_REMOVE(gs, chain);
 		soclose(gs->so);
 		NET_EPOCH_CALL(gre_sofree, &gs->epoch_ctx);
 		sc->gre_so = NULL;
 	}
 	GRE2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 	if_link_state_change(GRE2IFP(sc), LINK_STATE_DOWN);
 }
 
 struct gre_list *
 gre_hashinit(void)
 {
 	struct gre_list *hash;
 	int i;
 
 	hash = malloc(sizeof(struct gre_list) * GRE_HASH_SIZE,
 	    M_GRE, M_WAITOK);
 	for (i = 0; i < GRE_HASH_SIZE; i++)
 		CK_LIST_INIT(&hash[i]);
 
 	return (hash);
 }
 
 void
 gre_hashdestroy(struct gre_list *hash)
 {
 
 	free(hash, M_GRE);
 }
 
 void
 gre_sofree(epoch_context_t ctx)
 {
 	struct gre_socket *gs;
 
 	gs = __containerof(ctx, struct gre_socket, epoch_ctx);
 	free(gs, M_GRE);
 }
 
 static __inline uint16_t
 gre_cksum_add(uint16_t sum, uint16_t a)
 {
 	uint16_t res;
 
 	res = sum + a;
 	return (res + (res < a));
 }
 
 void
 gre_update_udphdr(struct gre_softc *sc, struct udphdr *udp, uint16_t csum)
 {
 
 	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
 	MPASS(sc->gre_options & GRE_UDPENCAP);
 
 	udp->uh_dport = htons(GRE_UDPPORT);
 	udp->uh_sport = htons(sc->gre_port);
 	udp->uh_sum = csum;
 	udp->uh_ulen = 0;
 }
 
 void
 gre_update_hdr(struct gre_softc *sc, struct grehdr *gh)
 {
 	uint32_t *opts;
 	uint16_t flags;
 
 	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
 
 	flags = 0;
 	opts = gh->gre_opts;
 	if (sc->gre_options & GRE_ENABLE_CSUM) {
 		flags |= GRE_FLAGS_CP;
 		sc->gre_hlen += 2 * sizeof(uint16_t);
 		*opts++ = 0;
 	}
 	if (sc->gre_key != 0) {
 		flags |= GRE_FLAGS_KP;
 		sc->gre_hlen += sizeof(uint32_t);
 		*opts++ = htonl(sc->gre_key);
 	}
 	if (sc->gre_options & GRE_ENABLE_SEQ) {
 		flags |= GRE_FLAGS_SP;
 		sc->gre_hlen += sizeof(uint32_t);
 		*opts++ = 0;
 	} else
 		sc->gre_oseq = 0;
 	gh->gre_flags = htons(flags);
 }
 
 int
 gre_input(struct mbuf *m, int off, int proto, void *arg)
 {
 	struct gre_softc *sc = arg;
 	struct grehdr *gh;
 	struct ifnet *ifp;
 	uint32_t *opts;
 #ifdef notyet
 	uint32_t key;
 #endif
 	uint16_t flags;
 	int hlen, isr, af;
 
 	ifp = GRE2IFP(sc);
 	hlen = off + sizeof(struct grehdr) + 4 * sizeof(uint32_t);
 	if (m->m_pkthdr.len < hlen)
 		goto drop;
 	if (m->m_len < hlen) {
 		m = m_pullup(m, hlen);
 		if (m == NULL)
 			goto drop;
 	}
 	gh = (struct grehdr *)mtodo(m, off);
 	flags = ntohs(gh->gre_flags);
 	if (flags & ~GRE_FLAGS_MASK)
 		goto drop;
 	opts = gh->gre_opts;
 	hlen = 2 * sizeof(uint16_t);
 	if (flags & GRE_FLAGS_CP) {
 		/* reserved1 field must be zero */
 		if (((uint16_t *)opts)[1] != 0)
 			goto drop;
 		if (in_cksum_skip(m, m->m_pkthdr.len, off) != 0)
 			goto drop;
 		hlen += 2 * sizeof(uint16_t);
 		opts++;
 	}
 	if (flags & GRE_FLAGS_KP) {
 #ifdef notyet
         /* 
          * XXX: The current implementation uses the key only for outgoing
          * packets. But we can check the key value here, or even in the
          * encapcheck function.
          */
 		key = ntohl(*opts);
 #endif
 		hlen += sizeof(uint32_t);
 		opts++;
     }
 #ifdef notyet
 	} else
 		key = 0;
 
 	if (sc->gre_key != 0 && (key != sc->gre_key || key != 0))
 		goto drop;
 #endif
 	if (flags & GRE_FLAGS_SP) {
 #ifdef notyet
 		seq = ntohl(*opts);
 #endif
 		hlen += sizeof(uint32_t);
 	}
 	switch (ntohs(gh->gre_proto)) {
 	case ETHERTYPE_WCCP:
 		/*
 		 * For WCCP skip an additional 4 bytes if after GRE header
 		 * doesn't follow an IP header.
 		 */
 		if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40)
 			hlen += sizeof(uint32_t);
 		/* FALLTHROUGH */
 	case ETHERTYPE_IP:
 		isr = NETISR_IP;
 		af = AF_INET;
 		break;
 	case ETHERTYPE_IPV6:
 		isr = NETISR_IPV6;
 		af = AF_INET6;
 		break;
 	default:
 		goto drop;
 	}
 	m_adj(m, off + hlen);
 	m_clrprotoflags(m);
 	m->m_pkthdr.rcvif = ifp;
 	M_SETFIB(m, ifp->if_fib);
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 	BPF_MTAP2(ifp, &af, sizeof(af), m);
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	if ((ifp->if_flags & IFF_MONITOR) != 0)
 		m_freem(m);
 	else
 		netisr_dispatch(isr, m);
 	return (IPPROTO_DONE);
 drop:
 	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 	m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 static int
 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
    struct route *ro)
 {
 	uint32_t af;
 
-	if (dst->sa_family == AF_UNSPEC)
+	/* BPF writes need to be handled specially. */
+	if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
 		af = RO_GET_FAMILY(ro, dst);
 	/*
 	 * Now save the af in the inbound pkt csum data, this is a cheat since
 	 * we are using the inbound csum_data field to carry the af over to
 	 * the gre_transmit() routine, avoiding using yet another mtag.
 	 */
 	m->m_pkthdr.csum_data = af;
 	return (ifp->if_transmit(ifp, m));
 }
 
 static void
 gre_setseqn(struct grehdr *gh, uint32_t seq)
 {
 	uint32_t *opts;
 	uint16_t flags;
 
 	opts = gh->gre_opts;
 	flags = ntohs(gh->gre_flags);
 	KASSERT((flags & GRE_FLAGS_SP) != 0,
 	    ("gre_setseqn called, but GRE_FLAGS_SP isn't set "));
 	if (flags & GRE_FLAGS_CP)
 		opts++;
 	if (flags & GRE_FLAGS_KP)
 		opts++;
 	*opts = htonl(seq);
 }
 
 static uint32_t
 gre_flowid(struct gre_softc *sc, struct mbuf *m, uint32_t af)
 {
 	uint32_t flowid = 0;
 
 	if ((sc->gre_options & GRE_UDPENCAP) == 0 || sc->gre_port != 0)
 		return (flowid);
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 #ifdef RSS
 		flowid = rss_hash_ip4_2tuple(mtod(m, struct ip *)->ip_src,
 		    mtod(m, struct ip *)->ip_dst);
 		break;
 #endif
 		flowid = mtod(m, struct ip *)->ip_src.s_addr ^
 		    mtod(m, struct ip *)->ip_dst.s_addr;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 #ifdef RSS
 		flowid = rss_hash_ip6_2tuple(
 		    &mtod(m, struct ip6_hdr *)->ip6_src,
 		    &mtod(m, struct ip6_hdr *)->ip6_dst);
 		break;
 #endif
 		flowid = mtod(m, struct ip6_hdr *)->ip6_src.s6_addr32[3] ^
 		    mtod(m, struct ip6_hdr *)->ip6_dst.s6_addr32[3];
 		break;
 #endif
 	default:
 		break;
 	}
 	return (flowid);
 }
 
 #define	MTAG_GRE	1307983903
 static int
 gre_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	GRE_RLOCK_TRACKER;
 	struct gre_softc *sc;
 	struct grehdr *gh;
 	struct udphdr *uh;
 	uint32_t af, flowid;
 	int error, len;
 	uint16_t proto;
 
 	len = 0;
 	GRE_RLOCK();
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error) {
 		m_freem(m);
 		goto drop;
 	}
 #endif
 	error = ENETDOWN;
 	sc = ifp->if_softc;
 	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
 	    (ifp->if_flags & IFF_UP) == 0 ||
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    sc->gre_family == 0 ||
 	    (error = if_tunnel_check_nesting(ifp, m, MTAG_GRE,
 		V_max_gre_nesting)) != 0) {
 		m_freem(m);
 		goto drop;
 	}
 	af = m->m_pkthdr.csum_data;
 	BPF_MTAP2(ifp, &af, sizeof(af), m);
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 	flowid = gre_flowid(sc, m, af);
 	M_SETFIB(m, sc->gre_fibnum);
 	M_PREPEND(m, sc->gre_hlen, M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto drop;
 	}
 	bcopy(sc->gre_hdr, mtod(m, void *), sc->gre_hlen);
 	/* Determine GRE proto */
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		proto = htons(ETHERTYPE_IP);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		proto = htons(ETHERTYPE_IPV6);
 		break;
 #endif
 	default:
 		m_freem(m);
 		error = ENETDOWN;
 		goto drop;
 	}
 	/* Determine offset of GRE header */
 	switch (sc->gre_family) {
 #ifdef INET
 	case AF_INET:
 		len = sizeof(struct ip);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		len = sizeof(struct ip6_hdr);
 		break;
 #endif
 	default:
 		m_freem(m);
 		error = ENETDOWN;
 		goto drop;
 	}
 	if (sc->gre_options & GRE_UDPENCAP) {
 		uh = (struct udphdr *)mtodo(m, len);
 		uh->uh_sport |= htons(V_ipport_hifirstauto) |
 		    (flowid >> 16) | (flowid & 0xFFFF);
 		uh->uh_sport = htons(ntohs(uh->uh_sport) %
 		    V_ipport_hilastauto);
 		uh->uh_ulen = htons(m->m_pkthdr.len - len);
 		uh->uh_sum = gre_cksum_add(uh->uh_sum,
 		    htons(m->m_pkthdr.len - len + IPPROTO_UDP));
 		m->m_pkthdr.csum_flags = sc->gre_csumflags;
 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 		len += sizeof(struct udphdr);
 	}
 	gh = (struct grehdr *)mtodo(m, len);
 	gh->gre_proto = proto;
 	if (sc->gre_options & GRE_ENABLE_SEQ)
 		gre_setseqn(gh, sc->gre_oseq++);
 	if (sc->gre_options & GRE_ENABLE_CSUM) {
 		*(uint16_t *)gh->gre_opts = in_cksum_skip(m,
 		    m->m_pkthdr.len, len);
 	}
 	len = m->m_pkthdr.len - len;
 	switch (sc->gre_family) {
 #ifdef INET
 	case AF_INET:
 		error = in_gre_output(m, af, sc->gre_hlen);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		error = in6_gre_output(m, af, sc->gre_hlen, flowid);
 		break;
 #endif
 	default:
 		m_freem(m);
 		error = ENETDOWN;
 	}
 drop:
 	if (error)
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	else {
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
 	}
 	GRE_RUNLOCK();
 	return (error);
 }
 
 static void
 gre_qflush(struct ifnet *ifp __unused)
 {
 
 }
 
 static int
 gremodevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 	case MOD_UNLOAD:
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t gre_mod = {
 	"if_gre",
 	gremodevent,
 	0
 };
 
 DECLARE_MODULE(if_gre, gre_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_gre, 1);
diff --git a/sys/net/if_me.c b/sys/net/if_me.c
index e9bcd345b5c5..80c2816b808a 100644
--- a/sys/net/if_me.c
+++ b/sys/net/if_me.c
@@ -1,688 +1,689 @@
 /*-
  * Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_private.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/vnet.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_encap.h>
 
 #include <machine/in_cksum.h>
 #include <security/mac/mac_framework.h>
 
 #define	MEMTU			(1500 - sizeof(struct mobhdr))
 static const char mename[] = "me";
 static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP");
 /* Minimal forwarding header RFC 2004 */
 struct mobhdr {
 	uint8_t		mob_proto;	/* protocol */
 	uint8_t		mob_flags;	/* flags */
 #define	MOB_FLAGS_SP	0x80		/* source present */
 	uint16_t	mob_csum;	/* header checksum */
 	struct in_addr	mob_dst;	/* original destination address */
 	struct in_addr	mob_src;	/* original source addr (optional) */
 } __packed;
 
 struct me_softc {
 	struct ifnet		*me_ifp;
 	u_int			me_fibnum;
 	struct in_addr		me_src;
 	struct in_addr		me_dst;
 
 	CK_LIST_ENTRY(me_softc) chain;
 	CK_LIST_ENTRY(me_softc) srchash;
 };
 CK_LIST_HEAD(me_list, me_softc);
 #define	ME2IFP(sc)		((sc)->me_ifp)
 #define	ME_READY(sc)		((sc)->me_src.s_addr != 0)
 #define	ME_RLOCK_TRACKER	struct epoch_tracker me_et
 #define	ME_RLOCK()		epoch_enter_preempt(net_epoch_preempt, &me_et)
 #define	ME_RUNLOCK()		epoch_exit_preempt(net_epoch_preempt, &me_et)
 #define	ME_WAIT()		epoch_wait_preempt(net_epoch_preempt)
 
 #ifndef ME_HASH_SIZE
 #define	ME_HASH_SIZE	(1 << 4)
 #endif
 VNET_DEFINE_STATIC(struct me_list *, me_hashtbl) = NULL;
 VNET_DEFINE_STATIC(struct me_list *, me_srchashtbl) = NULL;
 #define	V_me_hashtbl		VNET(me_hashtbl)
 #define	V_me_srchashtbl		VNET(me_srchashtbl)
 #define	ME_HASH(src, dst)	(V_me_hashtbl[\
     me_hashval((src), (dst)) & (ME_HASH_SIZE - 1)])
 #define	ME_SRCHASH(src)		(V_me_srchashtbl[\
     fnv_32_buf(&(src), sizeof(src), FNV1_32_INIT) & (ME_HASH_SIZE - 1)])
 
 static struct sx me_ioctl_sx;
 SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl");
 
 static int	me_clone_create(struct if_clone *, int, caddr_t);
 static void	me_clone_destroy(struct ifnet *);
 VNET_DEFINE_STATIC(struct if_clone *, me_cloner);
 #define	V_me_cloner	VNET(me_cloner)
 
 #ifdef VIMAGE
 static void	me_reassign(struct ifnet *, struct vnet *, char *);
 #endif
 static void	me_qflush(struct ifnet *);
 static int	me_transmit(struct ifnet *, struct mbuf *);
 static int	me_ioctl(struct ifnet *, u_long, caddr_t);
 static int	me_output(struct ifnet *, struct mbuf *,
 		    const struct sockaddr *, struct route *);
 static int	me_input(struct mbuf *, int, int, void *);
 
 static int	me_set_tunnel(struct me_softc *, in_addr_t, in_addr_t);
 static void	me_delete_tunnel(struct me_softc *);
 
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Minimal Encapsulation for IP (RFC 2004)");
 #ifndef MAX_ME_NEST
 #define MAX_ME_NEST 1
 #endif
 
 VNET_DEFINE_STATIC(int, max_me_nesting) = MAX_ME_NEST;
 #define	V_max_me_nesting	VNET(max_me_nesting)
 SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
     &VNET_NAME(max_me_nesting), 0, "Max nested tunnels");
 
 static uint32_t
 me_hashval(in_addr_t src, in_addr_t dst)
 {
 	uint32_t ret;
 
 	ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT);
 	return (fnv_32_buf(&dst, sizeof(dst), ret));
 }
 
 static struct me_list *
 me_hashinit(void)
 {
 	struct me_list *hash;
 	int i;
 
 	hash = malloc(sizeof(struct me_list) * ME_HASH_SIZE,
 	    M_IFME, M_WAITOK);
 	for (i = 0; i < ME_HASH_SIZE; i++)
 		CK_LIST_INIT(&hash[i]);
 
 	return (hash);
 }
 
 static void
 vnet_me_init(const void *unused __unused)
 {
 
 	V_me_cloner = if_clone_simple(mename, me_clone_create,
 	    me_clone_destroy, 0);
 }
 VNET_SYSINIT(vnet_me_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_me_init, NULL);
 
 static void
 vnet_me_uninit(const void *unused __unused)
 {
 
 	if (V_me_hashtbl != NULL) {
 		free(V_me_hashtbl, M_IFME);
 		V_me_hashtbl = NULL;
 		ME_WAIT();
 		free(V_me_srchashtbl, M_IFME);
 	}
 	if_clone_detach(V_me_cloner);
 }
 VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_me_uninit, NULL);
 
 static int
 me_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct me_softc *sc;
 
 	sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO);
 	sc->me_fibnum = curthread->td_proc->p_fibnum;
 	ME2IFP(sc) = if_alloc(IFT_TUNNEL);
 	ME2IFP(sc)->if_softc = sc;
 	if_initname(ME2IFP(sc), mename, unit);
 
 	ME2IFP(sc)->if_mtu = MEMTU;
 	ME2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
 	ME2IFP(sc)->if_output = me_output;
 	ME2IFP(sc)->if_ioctl = me_ioctl;
 	ME2IFP(sc)->if_transmit = me_transmit;
 	ME2IFP(sc)->if_qflush = me_qflush;
 #ifdef VIMAGE
 	ME2IFP(sc)->if_reassign = me_reassign;
 #endif
 	ME2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
 	ME2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
 	if_attach(ME2IFP(sc));
 	bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t));
 	return (0);
 }
 
 #ifdef VIMAGE
 static void
 me_reassign(struct ifnet *ifp, struct vnet *new_vnet __unused,
     char *unused __unused)
 {
 	struct me_softc *sc;
 
 	sx_xlock(&me_ioctl_sx);
 	sc = ifp->if_softc;
 	if (sc != NULL)
 		me_delete_tunnel(sc);
 	sx_xunlock(&me_ioctl_sx);
 }
 #endif /* VIMAGE */
 
 static void
 me_clone_destroy(struct ifnet *ifp)
 {
 	struct me_softc *sc;
 
 	sx_xlock(&me_ioctl_sx);
 	sc = ifp->if_softc;
 	me_delete_tunnel(sc);
 	bpfdetach(ifp);
 	if_detach(ifp);
 	ifp->if_softc = NULL;
 	sx_xunlock(&me_ioctl_sx);
 
 	ME_WAIT();
 	if_free(ifp);
 	free(sc, M_IFME);
 }
 
 static int
 me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct sockaddr_in *src, *dst;
 	struct me_softc *sc;
 	int error;
 
 	switch (cmd) {
 	case SIOCSIFMTU:
 		if (ifr->ifr_mtu < 576)
 			return (EINVAL);
 		ifp->if_mtu = ifr->ifr_mtu;
 		return (0);
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 	case SIOCSIFFLAGS:
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		return (0);
 	}
 	sx_xlock(&me_ioctl_sx);
 	sc = ifp->if_softc;
 	if (sc == NULL) {
 		error = ENXIO;
 		goto end;
 	}
 	error = 0;
 	switch (cmd) {
 	case SIOCSIFPHYADDR:
 		src = &((struct in_aliasreq *)data)->ifra_addr;
 		dst = &((struct in_aliasreq *)data)->ifra_dstaddr;
 		if (src->sin_family != dst->sin_family ||
 		    src->sin_family != AF_INET ||
 		    src->sin_len != dst->sin_len ||
 		    src->sin_len != sizeof(struct sockaddr_in)) {
 			error = EINVAL;
 			break;
 		}
 		if (src->sin_addr.s_addr == INADDR_ANY ||
 		    dst->sin_addr.s_addr == INADDR_ANY) {
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		error = me_set_tunnel(sc, src->sin_addr.s_addr,
 		    dst->sin_addr.s_addr);
 		break;
 	case SIOCDIFPHYADDR:
 		me_delete_tunnel(sc);
 		break;
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
 		if (!ME_READY(sc)) {
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		src = (struct sockaddr_in *)&ifr->ifr_addr;
 		memset(src, 0, sizeof(*src));
 		src->sin_family = AF_INET;
 		src->sin_len = sizeof(*src);
 		switch (cmd) {
 		case SIOCGIFPSRCADDR:
 			src->sin_addr = sc->me_src;
 			break;
 		case SIOCGIFPDSTADDR:
 			src->sin_addr = sc->me_dst;
 			break;
 		}
 		error = prison_if(curthread->td_ucred, sintosa(src));
 		if (error != 0)
 			memset(src, 0, sizeof(*src));
 		break;
 	case SIOCGTUNFIB:
 		ifr->ifr_fib = sc->me_fibnum;
 		break;
 	case SIOCSTUNFIB:
 		if ((error = priv_check(curthread, PRIV_NET_ME)) != 0)
 			break;
 		if (ifr->ifr_fib >= rt_numfibs)
 			error = EINVAL;
 		else
 			sc->me_fibnum = ifr->ifr_fib;
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 end:
 	sx_xunlock(&me_ioctl_sx);
 	return (error);
 }
 
 static int
 me_lookup(const struct mbuf *m, int off, int proto, void **arg)
 {
 	const struct ip *ip;
 	struct me_softc *sc;
 
 	if (V_me_hashtbl == NULL)
 		return (0);
 
 	NET_EPOCH_ASSERT();
 	ip = mtod(m, const struct ip *);
 	CK_LIST_FOREACH(sc, &ME_HASH(ip->ip_dst.s_addr,
 	    ip->ip_src.s_addr), chain) {
 		if (sc->me_src.s_addr == ip->ip_dst.s_addr &&
 		    sc->me_dst.s_addr == ip->ip_src.s_addr) {
 			if ((ME2IFP(sc)->if_flags & IFF_UP) == 0)
 				return (0);
 			*arg = sc;
 			return (ENCAP_DRV_LOOKUP);
 		}
 	}
 	return (0);
 }
 
 /*
  * Check that ingress address belongs to local host.
  */
 static void
 me_set_running(struct me_softc *sc)
 {
 
 	if (in_localip(sc->me_src))
 		ME2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING;
 	else
 		ME2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 }
 
 /*
  * ifaddr_event handler.
  * Clear IFF_DRV_RUNNING flag when ingress address disappears to prevent
  * source address spoofing.
  */
 static void
 me_srcaddr(void *arg __unused, const struct sockaddr *sa,
     int event __unused)
 {
 	const struct sockaddr_in *sin;
 	struct me_softc *sc;
 
 	/* Check that VNET is ready */
 	if (V_me_hashtbl == NULL)
 		return;
 
 	NET_EPOCH_ASSERT();
 	sin = (const struct sockaddr_in *)sa;
 	CK_LIST_FOREACH(sc, &ME_SRCHASH(sin->sin_addr.s_addr), srchash) {
 		if (sc->me_src.s_addr != sin->sin_addr.s_addr)
 			continue;
 		me_set_running(sc);
 	}
 }
 
 static int
 me_set_tunnel(struct me_softc *sc, in_addr_t src, in_addr_t dst)
 {
 	struct epoch_tracker et;
 	struct me_softc *tmp;
 
 	sx_assert(&me_ioctl_sx, SA_XLOCKED);
 
 	if (V_me_hashtbl == NULL) {
 		V_me_hashtbl = me_hashinit();
 		V_me_srchashtbl = me_hashinit();
 	}
 
 	if (sc->me_src.s_addr == src && sc->me_dst.s_addr == dst)
 		return (0);
 
 	CK_LIST_FOREACH(tmp, &ME_HASH(src, dst), chain) {
 		if (tmp == sc)
 			continue;
 		if (tmp->me_src.s_addr == src &&
 		    tmp->me_dst.s_addr == dst)
 			return (EADDRNOTAVAIL);
 	}
 
 	me_delete_tunnel(sc);
 	sc->me_dst.s_addr = dst;
 	sc->me_src.s_addr = src;
 	CK_LIST_INSERT_HEAD(&ME_HASH(src, dst), sc, chain);
 	CK_LIST_INSERT_HEAD(&ME_SRCHASH(src), sc, srchash);
 
 	NET_EPOCH_ENTER(et);
 	me_set_running(sc);
 	NET_EPOCH_EXIT(et);
 	if_link_state_change(ME2IFP(sc), LINK_STATE_UP);
 	return (0);
 }
 
 static void
 me_delete_tunnel(struct me_softc *sc)
 {
 
 	sx_assert(&me_ioctl_sx, SA_XLOCKED);
 	if (ME_READY(sc)) {
 		CK_LIST_REMOVE(sc, chain);
 		CK_LIST_REMOVE(sc, srchash);
 		ME_WAIT();
 
 		sc->me_src.s_addr = 0;
 		sc->me_dst.s_addr = 0;
 		ME2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 		if_link_state_change(ME2IFP(sc), LINK_STATE_DOWN);
 	}
 }
 
 static uint16_t
 me_in_cksum(uint16_t *p, int nwords)
 {
 	uint32_t sum = 0;
 
 	while (nwords-- > 0)
 		sum += *p++;
 	sum = (sum >> 16) + (sum & 0xffff);
 	sum += (sum >> 16);
 	return (~sum);
 }
 
 static int
 me_input(struct mbuf *m, int off, int proto, void *arg)
 {
 	struct me_softc *sc = arg;
 	struct mobhdr *mh;
 	struct ifnet *ifp;
 	struct ip *ip;
 	int hlen;
 
 	NET_EPOCH_ASSERT();
 
 	ifp = ME2IFP(sc);
 	/* checks for short packets */
 	hlen = sizeof(struct mobhdr);
 	if (m->m_pkthdr.len < sizeof(struct ip) + hlen)
 		hlen -= sizeof(struct in_addr);
 	if (m->m_len < sizeof(struct ip) + hlen)
 		m = m_pullup(m, sizeof(struct ip) + hlen);
 	if (m == NULL)
 		goto drop;
 	mh = (struct mobhdr *)mtodo(m, sizeof(struct ip));
 	/* check for wrong flags */
 	if (mh->mob_flags & (~MOB_FLAGS_SP)) {
 		m_freem(m);
 		goto drop;
 	}
 	if (mh->mob_flags) {
 	       if (hlen != sizeof(struct mobhdr)) {
 			m_freem(m);
 			goto drop;
 	       }
 	} else
 		hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
 	/* check mobile header checksum */
 	if (me_in_cksum((uint16_t *)mh, hlen / sizeof(uint16_t)) != 0) {
 		m_freem(m);
 		goto drop;
 	}
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 	ip = mtod(m, struct ip *);
 	ip->ip_dst = mh->mob_dst;
 	ip->ip_p = mh->mob_proto;
 	ip->ip_sum = 0;
 	ip->ip_len = htons(m->m_pkthdr.len - hlen);
 	if (mh->mob_flags)
 		ip->ip_src = mh->mob_src;
 	memmove(mtodo(m, hlen), ip, sizeof(struct ip));
 	m_adj(m, hlen);
 	m_clrprotoflags(m);
 	m->m_pkthdr.rcvif = ifp;
 	m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID);
 	M_SETFIB(m, ifp->if_fib);
 	hlen = AF_INET;
 	BPF_MTAP2(ifp, &hlen, sizeof(hlen), m);
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	if ((ifp->if_flags & IFF_MONITOR) != 0)
 		m_freem(m);
 	else
 		netisr_dispatch(NETISR_IP, m);
 	return (IPPROTO_DONE);
 drop:
 	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 	return (IPPROTO_DONE);
 }
 
 static int
 me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
    struct route *ro)
 {
 	uint32_t af;
 
-	if (dst->sa_family == AF_UNSPEC)
+	/* BPF writes need to be handled specially. */
+	if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
 		af = RO_GET_FAMILY(ro, dst);
 	m->m_pkthdr.csum_data = af;
 	return (ifp->if_transmit(ifp, m));
 }
 
 #define	MTAG_ME	1414491977
 static int
 me_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	ME_RLOCK_TRACKER;
 	struct mobhdr mh;
 	struct me_softc *sc;
 	struct ip *ip;
 	uint32_t af;
 	int error, hlen, plen;
 
 	ME_RLOCK();
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error != 0)
 		goto drop;
 #endif
 	error = ENETDOWN;
 	sc = ifp->if_softc;
 	if (sc == NULL || !ME_READY(sc) ||
 	    (ifp->if_flags & IFF_MONITOR) != 0 ||
 	    (ifp->if_flags & IFF_UP) == 0 ||
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    (error = if_tunnel_check_nesting(ifp, m, MTAG_ME,
 		V_max_me_nesting)) != 0) {
 		m_freem(m);
 		goto drop;
 	}
 	af = m->m_pkthdr.csum_data;
 	if (af != AF_INET) {
 		error = EAFNOSUPPORT;
 		m_freem(m);
 		goto drop;
 	}
 	if (m->m_len < sizeof(struct ip))
 		m = m_pullup(m, sizeof(struct ip));
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto drop;
 	}
 	ip = mtod(m, struct ip *);
 	/* Fragmented datagramms shouldn't be encapsulated */
 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
 		error = EINVAL;
 		m_freem(m);
 		goto drop;
 	}
 	mh.mob_proto = ip->ip_p;
 	mh.mob_src = ip->ip_src;
 	mh.mob_dst = ip->ip_dst;
 	if (in_hosteq(sc->me_src, ip->ip_src)) {
 		hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
 		mh.mob_flags = 0;
 	} else {
 		hlen = sizeof(struct mobhdr);
 		mh.mob_flags = MOB_FLAGS_SP;
 	}
 	BPF_MTAP2(ifp, &af, sizeof(af), m);
 	plen = m->m_pkthdr.len;
 	ip->ip_src = sc->me_src;
 	ip->ip_dst = sc->me_dst;
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 	M_SETFIB(m, sc->me_fibnum);
 	M_PREPEND(m, hlen, M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto drop;
 	}
 	if (m->m_len < sizeof(struct ip) + hlen)
 		m = m_pullup(m, sizeof(struct ip) + hlen);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto drop;
 	}
 	memmove(mtod(m, void *), mtodo(m, hlen), sizeof(struct ip));
 	ip = mtod(m, struct ip *);
 	ip->ip_len = htons(m->m_pkthdr.len);
 	ip->ip_p = IPPROTO_MOBILE;
 	ip->ip_sum = 0;
 	mh.mob_csum = 0;
 	mh.mob_csum = me_in_cksum((uint16_t *)&mh, hlen / sizeof(uint16_t));
 	bcopy(&mh, mtodo(m, sizeof(struct ip)), hlen);
 	error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
 drop:
 	if (error)
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	else {
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
 	}
 	ME_RUNLOCK();
 	return (error);
 }
 
 static void
 me_qflush(struct ifnet *ifp __unused)
 {
 
 }
 
 static const struct srcaddrtab *me_srcaddrtab = NULL;
 static const struct encaptab *ecookie = NULL;
 static const struct encap_config me_encap_cfg = {
 	.proto = IPPROTO_MOBILE,
 	.min_length = sizeof(struct ip) + sizeof(struct mobhdr) -
 	    sizeof(in_addr_t),
 	.exact_match = ENCAP_DRV_LOOKUP,
 	.lookup = me_lookup,
 	.input = me_input
 };
 
 static int
 memodevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		me_srcaddrtab = ip_encap_register_srcaddr(me_srcaddr,
 		    NULL, M_WAITOK);
 		ecookie = ip_encap_attach(&me_encap_cfg, NULL, M_WAITOK);
 		break;
 	case MOD_UNLOAD:
 		ip_encap_detach(ecookie);
 		ip_encap_unregister_srcaddr(me_srcaddrtab);
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t me_mod = {
 	"if_me",
 	memodevent,
 	0
 };
 
 DECLARE_MODULE(if_me, me_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_me, 1);
diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c
index 4cb219dc92b6..5d37879e87b9 100644
--- a/sys/net/if_tuntap.c
+++ b/sys/net/if_tuntap.c
@@ -1,2083 +1,2083 @@
 /*	$NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $	*/
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
  * All rights reserved.
  * Copyright (c) 2019 Kyle Evans <kevans@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * BASED ON:
  * -------------------------------------------------------------------------
  *
  * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
  * Nottingham University 1987.
  *
  * This source may be freely distributed, however I would be interested
  * in any changes that are made.
  *
  * This driver takes packets off the IP i/f and hands them up to a
  * user process to have its wicked way with. This driver has it's
  * roots in a similar driver written by Phil Cockcroft (formerly) at
  * UCL. This driver is based much more on read/write/poll mode of
  * operation though.
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/jail.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/filio.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/syslog.h>
 #include <sys/ttycom.h>
 #include <sys/poll.h>
 #include <sys/selinfo.h>
 #include <sys/signalvar.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/conf.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
 #include <sys/random.h>
 #include <sys/ctype.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_private.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/vnet.h>
 #include <netinet/in.h>
 #ifdef INET
 #include <netinet/ip.h>
 #endif
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #endif
 #include <netinet/udp.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_lro.h>
 #include <net/bpf.h>
 #include <net/if_tap.h>
 #include <net/if_tun.h>
 
 #include <dev/virtio/network/virtio_net.h>
 
 #include <sys/queue.h>
 #include <sys/condvar.h>
 #include <security/mac/mac_framework.h>
 
 struct tuntap_driver;
 
 /*
  * tun_list is protected by global tunmtx.  Other mutable fields are
  * protected by tun->tun_mtx, or by their owning subsystem.  tun_dev is
  * static for the duration of a tunnel interface.
  */
 struct tuntap_softc {
 	TAILQ_ENTRY(tuntap_softc)	 tun_list;
 	struct cdev			*tun_alias;
 	struct cdev			*tun_dev;
 	u_short				 tun_flags;	/* misc flags */
 #define	TUN_OPEN	0x0001
 #define	TUN_INITED	0x0002
 #define	TUN_UNUSED1	0x0008
 #define	TUN_UNUSED2	0x0010
 #define	TUN_LMODE	0x0020
 #define	TUN_RWAIT	0x0040
 #define	TUN_ASYNC	0x0080
 #define	TUN_IFHEAD	0x0100
 #define	TUN_DYING	0x0200
 #define	TUN_L2		0x0400
 #define	TUN_VMNET	0x0800
 
 #define	TUN_DRIVER_IDENT_MASK	(TUN_L2 | TUN_VMNET)
 #define	TUN_READY		(TUN_OPEN | TUN_INITED)
 
 	pid_t			 tun_pid;	/* owning pid */
 	struct ifnet		*tun_ifp;	/* the interface */
 	struct sigio		*tun_sigio;	/* async I/O info */
 	struct tuntap_driver	*tun_drv;	/* appropriate driver */
 	struct selinfo		 tun_rsel;	/* read select */
 	struct mtx		 tun_mtx;	/* softc field mutex */
 	struct cv		 tun_cv;	/* for ref'd dev destroy */
 	struct ether_addr	 tun_ether;	/* remote address */
 	int			 tun_busy;	/* busy count */
 	int			 tun_vhdrlen;	/* virtio-net header length */
 	struct lro_ctrl		 tun_lro;	/* for TCP LRO */
 	bool			 tun_lro_ready;	/* TCP LRO initialized */
 };
 #define	TUN2IFP(sc)	((sc)->tun_ifp)
 
 #define	TUNDEBUG	if (tundebug) if_printf
 
 #define	TUN_LOCK(tp)		mtx_lock(&(tp)->tun_mtx)
 #define	TUN_UNLOCK(tp)		mtx_unlock(&(tp)->tun_mtx)
 #define	TUN_LOCK_ASSERT(tp)	mtx_assert(&(tp)->tun_mtx, MA_OWNED);
 
 #define	TUN_VMIO_FLAG_MASK	0x0fff
 
 /*
  * Interface capabilities of a tap device that supports the virtio-net
  * header.
  */
 #define TAP_VNET_HDR_CAPS	(IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6	\
 				| IFCAP_VLAN_HWCSUM			\
 				| IFCAP_TSO | IFCAP_LRO			\
 				| IFCAP_VLAN_HWTSO)
 
 #define TAP_ALL_OFFLOAD		(CSUM_TSO | CSUM_TCP | CSUM_UDP |\
 				    CSUM_TCP_IPV6 | CSUM_UDP_IPV6)
 
 /*
  * All mutable global variables in if_tun are locked using tunmtx, with
  * the exception of tundebug, which is used unlocked, and the drivers' *clones,
  * which are static after setup.
  */
 static struct mtx tunmtx;
 static eventhandler_tag arrival_tag;
 static eventhandler_tag clone_tag;
 static const char tunname[] = "tun";
 static const char tapname[] = "tap";
 static const char vmnetname[] = "vmnet";
 static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
 static int tundebug = 0;
 static int tundclone = 1;
 static int tap_allow_uopen = 0;	/* allow user devfs cloning */
 static int tapuponopen = 0;	/* IFF_UP on open() */
 static int tapdclone = 1;	/* enable devfs cloning */
 
 static TAILQ_HEAD(,tuntap_softc)	tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
 SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
 
 static struct sx tun_ioctl_sx;
 SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl");
 
 SYSCTL_DECL(_net_link);
 /* tun */
 static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "IP tunnel software network interface");
 SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0,
     "Enable legacy devfs interface creation");
 
 /* tap */
 static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Ethernet tunnel software network interface");
 SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tap_allow_uopen, 0,
     "Enable legacy devfs interface creation for all users");
 SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
     "Bring interface up when /dev/tap is opened");
 SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0,
     "Enable legacy devfs interface creation");
 SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tundebug, 0, "");
 
 static int	tun_create_device(struct tuntap_driver *drv, int unit,
     struct ucred *cr, struct cdev **dev, const char *name);
 static int	tun_busy_locked(struct tuntap_softc *tp);
 static void	tun_unbusy_locked(struct tuntap_softc *tp);
 static int	tun_busy(struct tuntap_softc *tp);
 static void	tun_unbusy(struct tuntap_softc *tp);
 
 static int	tuntap_name2info(const char *name, int *unit, int *flags);
 static void	tunclone(void *arg, struct ucred *cred, char *name,
 		    int namelen, struct cdev **dev);
 static void	tuncreate(struct cdev *dev);
 static void	tundtor(void *data);
 static void	tunrename(void *arg, struct ifnet *ifp);
 static int	tunifioctl(struct ifnet *, u_long, caddr_t);
 static void	tuninit(struct ifnet *);
 static void	tunifinit(void *xtp);
 static int	tuntapmodevent(module_t, int, void *);
 static int	tunoutput(struct ifnet *, struct mbuf *,
 		    const struct sockaddr *, struct route *ro);
 static void	tunstart(struct ifnet *);
 static void	tunstart_l2(struct ifnet *);
 
 static int	tun_clone_match(struct if_clone *ifc, const char *name);
 static int	tap_clone_match(struct if_clone *ifc, const char *name);
 static int	vmnet_clone_match(struct if_clone *ifc, const char *name);
 static int	tun_clone_create(struct if_clone *, char *, size_t,
 		    struct ifc_data *, struct ifnet **);
 static int	tun_clone_destroy(struct if_clone *, struct ifnet *, uint32_t);
 static void	tun_vnethdr_set(struct ifnet *ifp, int vhdrlen);
 
 static d_open_t		tunopen;
 static d_read_t		tunread;
 static d_write_t	tunwrite;
 static d_ioctl_t	tunioctl;
 static d_poll_t		tunpoll;
 static d_kqfilter_t	tunkqfilter;
 
 static int		tunkqread(struct knote *, long);
 static int		tunkqwrite(struct knote *, long);
 static void		tunkqdetach(struct knote *);
 
 static struct filterops tun_read_filterops = {
 	.f_isfd =	1,
 	.f_attach =	NULL,
 	.f_detach =	tunkqdetach,
 	.f_event =	tunkqread,
 };
 
 static struct filterops tun_write_filterops = {
 	.f_isfd =	1,
 	.f_attach =	NULL,
 	.f_detach =	tunkqdetach,
 	.f_event =	tunkqwrite,
 };
 
 static struct tuntap_driver {
 	struct cdevsw		 cdevsw;
 	int			 ident_flags;
 	struct unrhdr		*unrhdr;
 	struct clonedevs	*clones;
 	ifc_match_f		*clone_match_fn;
 	ifc_create_f		*clone_create_fn;
 	ifc_destroy_f		*clone_destroy_fn;
 } tuntap_drivers[] = {
 	{
 		.ident_flags =	0,
 		.cdevsw =	{
 		    .d_version =	D_VERSION,
 		    .d_flags =		D_NEEDMINOR,
 		    .d_open =		tunopen,
 		    .d_read =		tunread,
 		    .d_write =		tunwrite,
 		    .d_ioctl =		tunioctl,
 		    .d_poll =		tunpoll,
 		    .d_kqfilter =	tunkqfilter,
 		    .d_name =		tunname,
 		},
 		.clone_match_fn =	tun_clone_match,
 		.clone_create_fn =	tun_clone_create,
 		.clone_destroy_fn =	tun_clone_destroy,
 	},
 	{
 		.ident_flags =	TUN_L2,
 		.cdevsw =	{
 		    .d_version =	D_VERSION,
 		    .d_flags =		D_NEEDMINOR,
 		    .d_open =		tunopen,
 		    .d_read =		tunread,
 		    .d_write =		tunwrite,
 		    .d_ioctl =		tunioctl,
 		    .d_poll =		tunpoll,
 		    .d_kqfilter =	tunkqfilter,
 		    .d_name =		tapname,
 		},
 		.clone_match_fn =	tap_clone_match,
 		.clone_create_fn =	tun_clone_create,
 		.clone_destroy_fn =	tun_clone_destroy,
 	},
 	{
 		.ident_flags =	TUN_L2 | TUN_VMNET,
 		.cdevsw =	{
 		    .d_version =	D_VERSION,
 		    .d_flags =		D_NEEDMINOR,
 		    .d_open =		tunopen,
 		    .d_read =		tunread,
 		    .d_write =		tunwrite,
 		    .d_ioctl =		tunioctl,
 		    .d_poll =		tunpoll,
 		    .d_kqfilter =	tunkqfilter,
 		    .d_name =		vmnetname,
 		},
 		.clone_match_fn =	vmnet_clone_match,
 		.clone_create_fn =	tun_clone_create,
 		.clone_destroy_fn =	tun_clone_destroy,
 	},
 };
 
 struct tuntap_driver_cloner {
 	SLIST_ENTRY(tuntap_driver_cloner)	 link;
 	struct tuntap_driver			*drv;
 	struct if_clone				*cloner;
 };
 
 VNET_DEFINE_STATIC(SLIST_HEAD(, tuntap_driver_cloner), tuntap_driver_cloners) =
     SLIST_HEAD_INITIALIZER(tuntap_driver_cloners);
 
 #define	V_tuntap_driver_cloners	VNET(tuntap_driver_cloners)
 
 /*
  * Mechanism for marking a tunnel device as busy so that we can safely do some
  * orthogonal operations (such as operations on devices) without racing against
  * tun_destroy.  tun_destroy will wait on the condvar if we're at all busy or
  * open, to be woken up when the condition is alleviated.
  */
 static int
 tun_busy_locked(struct tuntap_softc *tp)
 {
 
 	TUN_LOCK_ASSERT(tp);
 	if ((tp->tun_flags & TUN_DYING) != 0) {
 		/*
 		 * Perhaps unintuitive, but the device is busy going away.
 		 * Other interpretations of EBUSY from tun_busy make little
 		 * sense, since making a busy device even more busy doesn't
 		 * sound like a problem.
 		 */
 		return (EBUSY);
 	}
 
 	++tp->tun_busy;
 	return (0);
 }
 
 static void
 tun_unbusy_locked(struct tuntap_softc *tp)
 {
 
 	TUN_LOCK_ASSERT(tp);
 	KASSERT(tp->tun_busy != 0, ("tun_unbusy: called for non-busy tunnel"));
 
 	--tp->tun_busy;
 	/* Wake up anything that may be waiting on our busy tunnel. */
 	if (tp->tun_busy == 0)
 		cv_broadcast(&tp->tun_cv);
 }
 
 static int
 tun_busy(struct tuntap_softc *tp)
 {
 	int ret;
 
 	TUN_LOCK(tp);
 	ret = tun_busy_locked(tp);
 	TUN_UNLOCK(tp);
 	return (ret);
 }
 
 static void
 tun_unbusy(struct tuntap_softc *tp)
 {
 
 	TUN_LOCK(tp);
 	tun_unbusy_locked(tp);
 	TUN_UNLOCK(tp);
 }
 
 /*
  * Sets unit and/or flags given the device name.  Must be called with correct
  * vnet context.
  */
 static int
 tuntap_name2info(const char *name, int *outunit, int *outflags)
 {
 	struct tuntap_driver *drv;
 	struct tuntap_driver_cloner *drvc;
 	char *dname;
 	int flags, unit;
 	bool found;
 
 	if (name == NULL)
 		return (EINVAL);
 
 	/*
 	 * Needed for dev_stdclone, but dev_stdclone will not modify, it just
 	 * wants to be able to pass back a char * through the second param. We
 	 * will always set that as NULL here, so we'll fake it.
 	 */
 	dname = __DECONST(char *, name);
 	found = false;
 
 	KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners),
 	    ("tuntap_driver_cloners failed to initialize"));
 	SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) {
 		KASSERT(drvc->drv != NULL,
 		    ("tuntap_driver_cloners entry not properly initialized"));
 		drv = drvc->drv;
 
 		if (strcmp(name, drv->cdevsw.d_name) == 0) {
 			found = true;
 			unit = -1;
 			flags = drv->ident_flags;
 			break;
 		}
 
 		if (dev_stdclone(dname, NULL, drv->cdevsw.d_name, &unit) == 1) {
 			found = true;
 			flags = drv->ident_flags;
 			break;
 		}
 	}
 
 	if (!found)
 		return (ENXIO);
 
 	if (outunit != NULL)
 		*outunit = unit;
 	if (outflags != NULL)
 		*outflags = flags;
 	return (0);
 }
 
 /*
  * Get driver information from a set of flags specified.  Masks the identifying
  * part of the flags and compares it against all of the available
  * tuntap_drivers. Must be called with correct vnet context.
  */
 static struct tuntap_driver *
 tuntap_driver_from_flags(int tun_flags)
 {
 	struct tuntap_driver *drv;
 	struct tuntap_driver_cloner *drvc;
 
 	KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners),
 	    ("tuntap_driver_cloners failed to initialize"));
 	SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) {
 		KASSERT(drvc->drv != NULL,
 		    ("tuntap_driver_cloners entry not properly initialized"));
 		drv = drvc->drv;
 		if ((tun_flags & TUN_DRIVER_IDENT_MASK) == drv->ident_flags)
 			return (drv);
 	}
 
 	return (NULL);
 }
 
 static int
 tun_clone_match(struct if_clone *ifc, const char *name)
 {
 	int tunflags;
 
 	if (tuntap_name2info(name, NULL, &tunflags) == 0) {
 		if ((tunflags & TUN_L2) == 0)
 			return (1);
 	}
 
 	return (0);
 }
 
 static int
 tap_clone_match(struct if_clone *ifc, const char *name)
 {
 	int tunflags;
 
 	if (tuntap_name2info(name, NULL, &tunflags) == 0) {
 		if ((tunflags & (TUN_L2 | TUN_VMNET)) == TUN_L2)
 			return (1);
 	}
 
 	return (0);
 }
 
 static int
 vmnet_clone_match(struct if_clone *ifc, const char *name)
 {
 	int tunflags;
 
 	if (tuntap_name2info(name, NULL, &tunflags) == 0) {
 		if ((tunflags & TUN_VMNET) != 0)
 			return (1);
 	}
 
 	return (0);
 }
 
 static int
 tun_clone_create(struct if_clone *ifc, char *name, size_t len,
     struct ifc_data *ifd, struct ifnet **ifpp)
 {
 	struct tuntap_driver *drv;
 	struct cdev *dev;
 	int err, i, tunflags, unit;
 
 	tunflags = 0;
 	/* The name here tells us exactly what we're creating */
 	err = tuntap_name2info(name, &unit, &tunflags);
 	if (err != 0)
 		return (err);
 
 	drv = tuntap_driver_from_flags(tunflags);
 	if (drv == NULL)
 		return (ENXIO);
 
 	if (unit != -1) {
 		/* If this unit number is still available that's okay. */
 		if (alloc_unr_specific(drv->unrhdr, unit) == -1)
 			return (EEXIST);
 	} else {
 		unit = alloc_unr(drv->unrhdr);
 	}
 
 	snprintf(name, IFNAMSIZ, "%s%d", drv->cdevsw.d_name, unit);
 
 	/* find any existing device, or allocate new unit number */
 	dev = NULL;
 	i = clone_create(&drv->clones, &drv->cdevsw, &unit, &dev, 0);
 	/* No preexisting struct cdev *, create one */
 	if (i != 0)
 		i = tun_create_device(drv, unit, NULL, &dev, name);
 	if (i == 0) {
 		dev_ref(dev);
 		tuncreate(dev);
 		struct tuntap_softc *tp = dev->si_drv1;
 		*ifpp = tp->tun_ifp;
 	}
 
 	return (i);
 }
 
 static void
 tunclone(void *arg, struct ucred *cred, char *name, int namelen,
     struct cdev **dev)
 {
 	char devname[SPECNAMELEN + 1];
 	struct tuntap_driver *drv;
 	int append_unit, i, u, tunflags;
 	bool mayclone;
 
 	if (*dev != NULL)
 		return;
 
 	tunflags = 0;
 	CURVNET_SET(CRED_TO_VNET(cred));
 	if (tuntap_name2info(name, &u, &tunflags) != 0)
 		goto out;	/* Not recognized */
 
 	if (u != -1 && u > IF_MAXUNIT)
 		goto out;	/* Unit number too high */
 
 	mayclone = priv_check_cred(cred, PRIV_NET_IFCREATE) == 0;
 	if ((tunflags & TUN_L2) != 0) {
 		/* tap/vmnet allow user open with a sysctl */
 		mayclone = (mayclone || tap_allow_uopen) && tapdclone;
 	} else {
 		mayclone = mayclone && tundclone;
 	}
 
 	/*
 	 * If tun cloning is enabled, only the superuser can create an
 	 * interface.
 	 */
 	if (!mayclone)
 		goto out;
 
 	if (u == -1)
 		append_unit = 1;
 	else
 		append_unit = 0;
 
 	drv = tuntap_driver_from_flags(tunflags);
 	if (drv == NULL)
 		goto out;
 
 	/* find any existing device, or allocate new unit number */
 	i = clone_create(&drv->clones, &drv->cdevsw, &u, dev, 0);
 	if (i) {
 		if (append_unit) {
 			namelen = snprintf(devname, sizeof(devname), "%s%d",
 			    name, u);
 			name = devname;
 		}
 
 		i = tun_create_device(drv, u, cred, dev, name);
 	}
 	if (i == 0) {
 		dev_ref(*dev);
 		if_clone_create(name, namelen, NULL);
 	}
 out:
 	CURVNET_RESTORE();
 }
 
 static void
 tun_destroy(struct tuntap_softc *tp)
 {
 
 	TUN_LOCK(tp);
 	tp->tun_flags |= TUN_DYING;
 	if (tp->tun_busy != 0)
 		cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
 	else
 		TUN_UNLOCK(tp);
 
 	CURVNET_SET(TUN2IFP(tp)->if_vnet);
 
 	/* destroy_dev will take care of any alias. */
 	destroy_dev(tp->tun_dev);
 	seldrain(&tp->tun_rsel);
 	knlist_clear(&tp->tun_rsel.si_note, 0);
 	knlist_destroy(&tp->tun_rsel.si_note);
 	if ((tp->tun_flags & TUN_L2) != 0) {
 		ether_ifdetach(TUN2IFP(tp));
 	} else {
 		bpfdetach(TUN2IFP(tp));
 		if_detach(TUN2IFP(tp));
 	}
 	sx_xlock(&tun_ioctl_sx);
 	TUN2IFP(tp)->if_softc = NULL;
 	sx_xunlock(&tun_ioctl_sx);
 	free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit);
 	if_free(TUN2IFP(tp));
 	mtx_destroy(&tp->tun_mtx);
 	cv_destroy(&tp->tun_cv);
 	free(tp, M_TUN);
 	CURVNET_RESTORE();
 }
 
 static int
 tun_clone_destroy(struct if_clone *ifc __unused, struct ifnet *ifp, uint32_t flags)
 {
 	struct tuntap_softc *tp = ifp->if_softc;
 
 	mtx_lock(&tunmtx);
 	TAILQ_REMOVE(&tunhead, tp, tun_list);
 	mtx_unlock(&tunmtx);
 	tun_destroy(tp);
 
 	return (0);
 }
 
 static void
 vnet_tun_init(const void *unused __unused)
 {
 	struct tuntap_driver *drv;
 	struct tuntap_driver_cloner *drvc;
 	int i;
 
 	for (i = 0; i < nitems(tuntap_drivers); ++i) {
 		drv = &tuntap_drivers[i];
 		drvc = malloc(sizeof(*drvc), M_TUN, M_WAITOK | M_ZERO);
 
 		drvc->drv = drv;
 		struct if_clone_addreq req = {
 			.match_f = drv->clone_match_fn,
 			.create_f = drv->clone_create_fn,
 			.destroy_f = drv->clone_destroy_fn,
 		};
 		drvc->cloner = ifc_attach_cloner(drv->cdevsw.d_name, &req);
 		SLIST_INSERT_HEAD(&V_tuntap_driver_cloners, drvc, link);
 	};
 }
 VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
 		vnet_tun_init, NULL);
 
 static void
 vnet_tun_uninit(const void *unused __unused)
 {
 	struct tuntap_driver_cloner *drvc;
 
 	while (!SLIST_EMPTY(&V_tuntap_driver_cloners)) {
 		drvc = SLIST_FIRST(&V_tuntap_driver_cloners);
 		SLIST_REMOVE_HEAD(&V_tuntap_driver_cloners, link);
 
 		if_clone_detach(drvc->cloner);
 		free(drvc, M_TUN);
 	}
 }
 VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_tun_uninit, NULL);
 
 static void
 tun_uninit(const void *unused __unused)
 {
 	struct tuntap_driver *drv;
 	struct tuntap_softc *tp;
 	int i;
 
 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, arrival_tag);
 	EVENTHANDLER_DEREGISTER(dev_clone, clone_tag);
 
 	mtx_lock(&tunmtx);
 	while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
 		TAILQ_REMOVE(&tunhead, tp, tun_list);
 		mtx_unlock(&tunmtx);
 		tun_destroy(tp);
 		mtx_lock(&tunmtx);
 	}
 	mtx_unlock(&tunmtx);
 	for (i = 0; i < nitems(tuntap_drivers); ++i) {
 		drv = &tuntap_drivers[i];
 		delete_unrhdr(drv->unrhdr);
 		clone_cleanup(&drv->clones);
 	}
 	mtx_destroy(&tunmtx);
 }
 SYSUNINIT(tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, tun_uninit, NULL);
 
 static struct tuntap_driver *
 tuntap_driver_from_ifnet(const struct ifnet *ifp)
 {
 	struct tuntap_driver *drv;
 	int i;
 
 	if (ifp == NULL)
 		return (NULL);
 
 	for (i = 0; i < nitems(tuntap_drivers); ++i) {
 		drv = &tuntap_drivers[i];
 		if (strcmp(ifp->if_dname, drv->cdevsw.d_name) == 0)
 			return (drv);
 	}
 
 	return (NULL);
 }
 
 static int
 tuntapmodevent(module_t mod, int type, void *data)
 {
 	struct tuntap_driver *drv;
 	int i;
 
 	switch (type) {
 	case MOD_LOAD:
 		mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF);
 		for (i = 0; i < nitems(tuntap_drivers); ++i) {
 			drv = &tuntap_drivers[i];
 			clone_setup(&drv->clones);
 			drv->unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx);
 		}
 		arrival_tag = EVENTHANDLER_REGISTER(ifnet_arrival_event,
 		   tunrename, 0, 1000);
 		if (arrival_tag == NULL)
 			return (ENOMEM);
 		clone_tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
 		if (clone_tag == NULL)
 			return (ENOMEM);
 		break;
 	case MOD_UNLOAD:
 		/* See tun_uninit, so it's done after the vnet_sysuninit() */
 		break;
 	default:
 		return EOPNOTSUPP;
 	}
 	return 0;
 }
 
 static moduledata_t tuntap_mod = {
 	"if_tuntap",
 	tuntapmodevent,
 	0
 };
 
 /* We'll only ever have these two, so no need for a macro. */
 static moduledata_t tun_mod = { "if_tun", NULL, 0 };
 static moduledata_t tap_mod = { "if_tap", NULL, 0 };
 
 DECLARE_MODULE(if_tuntap, tuntap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_tuntap, 1);
 DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_tun, 1);
 DECLARE_MODULE(if_tap, tap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_tap, 1);
 
 static int
 tun_create_device(struct tuntap_driver *drv, int unit, struct ucred *cr,
     struct cdev **dev, const char *name)
 {
 	struct make_dev_args args;
 	struct tuntap_softc *tp;
 	int error;
 
 	tp = malloc(sizeof(*tp), M_TUN, M_WAITOK | M_ZERO);
 	mtx_init(&tp->tun_mtx, "tun_mtx", NULL, MTX_DEF);
 	cv_init(&tp->tun_cv, "tun_condvar");
 	tp->tun_flags = drv->ident_flags;
 	tp->tun_drv = drv;
 
 	make_dev_args_init(&args);
 	if (cr != NULL)
 		args.mda_flags = MAKEDEV_REF | MAKEDEV_CHECKNAME;
 	args.mda_devsw = &drv->cdevsw;
 	args.mda_cr = cr;
 	args.mda_uid = UID_UUCP;
 	args.mda_gid = GID_DIALER;
 	args.mda_mode = 0600;
 	args.mda_unit = unit;
 	args.mda_si_drv1 = tp;
 	error = make_dev_s(&args, dev, "%s", name);
 	if (error != 0) {
 		free(tp, M_TUN);
 		return (error);
 	}
 
 	KASSERT((*dev)->si_drv1 != NULL,
 	    ("Failed to set si_drv1 at %s creation", name));
 	tp->tun_dev = *dev;
 	knlist_init_mtx(&tp->tun_rsel.si_note, &tp->tun_mtx);
 	mtx_lock(&tunmtx);
 	TAILQ_INSERT_TAIL(&tunhead, tp, tun_list);
 	mtx_unlock(&tunmtx);
 	return (0);
 }
 
 static void
 tunstart(struct ifnet *ifp)
 {
 	struct tuntap_softc *tp = ifp->if_softc;
 	struct mbuf *m;
 
 	TUNDEBUG(ifp, "starting\n");
 	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
 		IFQ_LOCK(&ifp->if_snd);
 		IFQ_POLL_NOLOCK(&ifp->if_snd, m);
 		if (m == NULL) {
 			IFQ_UNLOCK(&ifp->if_snd);
 			return;
 		}
 		IFQ_UNLOCK(&ifp->if_snd);
 	}
 
 	TUN_LOCK(tp);
 	if (tp->tun_flags & TUN_RWAIT) {
 		tp->tun_flags &= ~TUN_RWAIT;
 		wakeup(tp);
 	}
 	selwakeuppri(&tp->tun_rsel, PZERO + 1);
 	KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
 	if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
 		TUN_UNLOCK(tp);
 		pgsigio(&tp->tun_sigio, SIGIO, 0);
 	} else
 		TUN_UNLOCK(tp);
 }
 
 /*
  * tunstart_l2
  *
  * queue packets from higher level ready to put out
  */
 static void
 tunstart_l2(struct ifnet *ifp)
 {
 	struct tuntap_softc	*tp = ifp->if_softc;
 
 	TUNDEBUG(ifp, "starting\n");
 
 	/*
 	 * do not junk pending output if we are in VMnet mode.
 	 * XXX: can this do any harm because of queue overflow?
 	 */
 
 	TUN_LOCK(tp);
 	if (((tp->tun_flags & TUN_VMNET) == 0) &&
 	    ((tp->tun_flags & TUN_READY) != TUN_READY)) {
 		struct mbuf *m;
 
 		/* Unlocked read. */
 		TUNDEBUG(ifp, "not ready, tun_flags = 0x%x\n", tp->tun_flags);
 
 		for (;;) {
 			IF_DEQUEUE(&ifp->if_snd, m);
 			if (m != NULL) {
 				m_freem(m);
 				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			} else
 				break;
 		}
 		TUN_UNLOCK(tp);
 
 		return;
 	}
 
 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 
 	if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
 		if (tp->tun_flags & TUN_RWAIT) {
 			tp->tun_flags &= ~TUN_RWAIT;
 			wakeup(tp);
 		}
 
 		if ((tp->tun_flags & TUN_ASYNC) && (tp->tun_sigio != NULL)) {
 			TUN_UNLOCK(tp);
 			pgsigio(&tp->tun_sigio, SIGIO, 0);
 			TUN_LOCK(tp);
 		}
 
 		selwakeuppri(&tp->tun_rsel, PZERO+1);
 		KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */
 	}
 
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	TUN_UNLOCK(tp);
 } /* tunstart_l2 */
 
 static int
 tap_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	int error;
 
 	BPF_MTAP(ifp, m);
 	IFQ_HANDOFF(ifp, m, error);
 	return (error);
 }
 
 /* XXX: should return an error code so it can fail. */
 static void
 tuncreate(struct cdev *dev)
 {
 	struct tuntap_driver *drv;
 	struct tuntap_softc *tp;
 	struct ifnet *ifp;
 	struct ether_addr eaddr;
 	int iflags;
 	u_char type;
 
 	tp = dev->si_drv1;
 	KASSERT(tp != NULL,
 	    ("si_drv1 should have been initialized at creation"));
 
 	drv = tp->tun_drv;
 	iflags = IFF_MULTICAST;
 	if ((tp->tun_flags & TUN_L2) != 0) {
 		type = IFT_ETHER;
 		iflags |= IFF_BROADCAST | IFF_SIMPLEX;
 	} else {
 		type = IFT_PPP;
 		iflags |= IFF_POINTOPOINT;
 	}
 	ifp = tp->tun_ifp = if_alloc(type);
 	if (ifp == NULL)
 		panic("%s%d: failed to if_alloc() interface.\n",
 		    drv->cdevsw.d_name, dev2unit(dev));
 	ifp->if_softc = tp;
 	if_initname(ifp, drv->cdevsw.d_name, dev2unit(dev));
 	ifp->if_ioctl = tunifioctl;
 	ifp->if_flags = iflags;
 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
 	ifp->if_capabilities |= IFCAP_LINKSTATE;
 	if ((tp->tun_flags & TUN_L2) != 0)
 		ifp->if_capabilities |=
 		    IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO;
 	ifp->if_capenable |= IFCAP_LINKSTATE;
 
 	if ((tp->tun_flags & TUN_L2) != 0) {
 		ifp->if_init = tunifinit;
 		ifp->if_start = tunstart_l2;
 		ifp->if_transmit = tap_transmit;
 		ifp->if_qflush = if_qflush;
 
 		ether_gen_addr(ifp, &eaddr);
 		ether_ifattach(ifp, eaddr.octet);
 	} else {
 		ifp->if_mtu = TUNMTU;
 		ifp->if_start = tunstart;
 		ifp->if_output = tunoutput;
 
 		ifp->if_snd.ifq_drv_maxlen = 0;
 		IFQ_SET_READY(&ifp->if_snd);
 
 		if_attach(ifp);
 		bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
 	}
 
 	TUN_LOCK(tp);
 	tp->tun_flags |= TUN_INITED;
 	TUN_UNLOCK(tp);
 
 	TUNDEBUG(ifp, "interface %s is created, minor = %#x\n",
 	    ifp->if_xname, dev2unit(dev));
 }
 
 static void
 tunrename(void *arg __unused, struct ifnet *ifp)
 {
 	struct tuntap_softc *tp;
 	int error;
 
 	if ((ifp->if_flags & IFF_RENAMING) == 0)
 		return;
 
 	if (tuntap_driver_from_ifnet(ifp) == NULL)
 		return;
 
 	/*
 	 * We need to grab the ioctl sx long enough to make sure the softc is
 	 * still there.  If it is, we can safely try to busy the tun device.
 	 * The busy may fail if the device is currently dying, in which case
 	 * we do nothing.  If it doesn't fail, the busy count stops the device
 	 * from dying until we've created the alias (that will then be
 	 * subsequently destroyed).
 	 */
 	sx_xlock(&tun_ioctl_sx);
 	tp = ifp->if_softc;
 	if (tp == NULL) {
 		sx_xunlock(&tun_ioctl_sx);
 		return;
 	}
 	error = tun_busy(tp);
 	sx_xunlock(&tun_ioctl_sx);
 	if (error != 0)
 		return;
 	if (tp->tun_alias != NULL) {
 		destroy_dev(tp->tun_alias);
 		tp->tun_alias = NULL;
 	}
 
 	if (strcmp(ifp->if_xname, tp->tun_dev->si_name) == 0)
 		goto out;
 
 	/*
 	 * Failure's ok, aliases are created on a best effort basis.  If a
 	 * tun user/consumer decides to rename the interface to conflict with
 	 * another device (non-ifnet) on the system, we will assume they know
 	 * what they are doing.  make_dev_alias_p won't touch tun_alias on
 	 * failure, so we use it but ignore the return value.
 	 */
 	make_dev_alias_p(MAKEDEV_CHECKNAME, &tp->tun_alias, tp->tun_dev, "%s",
 	    ifp->if_xname);
 out:
 	tun_unbusy(tp);
 }
 
 static int
 tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
 {
 	struct ifnet	*ifp;
 	struct tuntap_softc *tp;
 	int error __diagused, tunflags;
 
 	tunflags = 0;
 	CURVNET_SET(TD_TO_VNET(td));
 	error = tuntap_name2info(dev->si_name, NULL, &tunflags);
 	if (error != 0) {
 		CURVNET_RESTORE();
 		return (error);	/* Shouldn't happen */
 	}
 
 	tp = dev->si_drv1;
 	KASSERT(tp != NULL,
 	    ("si_drv1 should have been initialized at creation"));
 
 	TUN_LOCK(tp);
 	if ((tp->tun_flags & TUN_INITED) == 0) {
 		TUN_UNLOCK(tp);
 		CURVNET_RESTORE();
 		return (ENXIO);
 	}
 	if ((tp->tun_flags & (TUN_OPEN | TUN_DYING)) != 0) {
 		TUN_UNLOCK(tp);
 		CURVNET_RESTORE();
 		return (EBUSY);
 	}
 
 	error = tun_busy_locked(tp);
 	KASSERT(error == 0, ("Must be able to busy an unopen tunnel"));
 	ifp = TUN2IFP(tp);
 
 	if ((tp->tun_flags & TUN_L2) != 0) {
 		bcopy(IF_LLADDR(ifp), tp->tun_ether.octet,
 		    sizeof(tp->tun_ether.octet));
 
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 		if (tapuponopen)
 			ifp->if_flags |= IFF_UP;
 	}
 
 	tp->tun_pid = td->td_proc->p_pid;
 	tp->tun_flags |= TUN_OPEN;
 
 	if_link_state_change(ifp, LINK_STATE_UP);
 	TUNDEBUG(ifp, "open\n");
 	TUN_UNLOCK(tp);
 
 	/*
 	 * This can fail with either ENOENT or EBUSY.  This is in the middle of
 	 * d_open, so ENOENT should not be possible.  EBUSY is possible, but
 	 * the only cdevpriv dtor being set will be tundtor and the softc being
 	 * passed is constant for a given cdev.  We ignore the possible error
 	 * because of this as either "unlikely" or "not actually a problem."
 	 */
 	(void)devfs_set_cdevpriv(tp, tundtor);
 	CURVNET_RESTORE();
 	return (0);
 }
 
 /*
  * tundtor - tear down the device - mark i/f down & delete
  * routing info
  */
 static void
 tundtor(void *data)
 {
 	struct proc *p;
 	struct tuntap_softc *tp;
 	struct ifnet *ifp;
 	bool l2tun;
 
 	tp = data;
 	p = curproc;
 	ifp = TUN2IFP(tp);
 
 	TUN_LOCK(tp);
 
 	/*
 	 * Realistically, we can't be obstinate here.  This only means that the
 	 * tuntap device was closed out of order, and the last closer wasn't the
 	 * controller.  These are still good to know about, though, as software
 	 * should avoid multiple processes with a tuntap device open and
 	 * ill-defined transfer of control (e.g., handoff, TUNSIFPID, close in
 	 * parent).
 	 */
 	if (p->p_pid != tp->tun_pid) {
 		log(LOG_INFO,
 		    "pid %d (%s), %s: tun/tap protocol violation, non-controlling process closed last.\n",
 		    p->p_pid, p->p_comm, tp->tun_dev->si_name);
 	}
 
 	/*
 	 * junk all pending output
 	 */
 	CURVNET_SET(ifp->if_vnet);
 
 	l2tun = false;
 	if ((tp->tun_flags & TUN_L2) != 0) {
 		l2tun = true;
 		IF_DRAIN(&ifp->if_snd);
 	} else {
 		IFQ_PURGE(&ifp->if_snd);
 	}
 
 	/* For vmnet, we won't do most of the address/route bits */
 	if ((tp->tun_flags & TUN_VMNET) != 0 ||
 	    (l2tun && (ifp->if_flags & IFF_LINK0) != 0))
 		goto out;
 #if defined(INET) || defined(INET6)
 	if (l2tun && tp->tun_lro_ready) {
 		TUNDEBUG (ifp, "LRO disabled\n");
 		tcp_lro_free(&tp->tun_lro);
 		tp->tun_lro_ready = false;
 	}
 #endif
 	if (ifp->if_flags & IFF_UP) {
 		TUN_UNLOCK(tp);
 		if_down(ifp);
 		TUN_LOCK(tp);
 	}
 
 	/* Delete all addresses and routes which reference this interface. */
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		TUN_UNLOCK(tp);
 		if_purgeaddrs(ifp);
 		TUN_LOCK(tp);
 	}
 
 out:
 	if_link_state_change(ifp, LINK_STATE_DOWN);
 	CURVNET_RESTORE();
 
 	funsetown(&tp->tun_sigio);
 	selwakeuppri(&tp->tun_rsel, PZERO + 1);
 	KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
 	TUNDEBUG (ifp, "closed\n");
 	tp->tun_flags &= ~TUN_OPEN;
 	tp->tun_pid = 0;
 	tun_vnethdr_set(ifp, 0);
 
 	tun_unbusy_locked(tp);
 	TUN_UNLOCK(tp);
 }
 
 static void
 tuninit(struct ifnet *ifp)
 {
 	struct tuntap_softc *tp = ifp->if_softc;
 
 	TUNDEBUG(ifp, "tuninit\n");
 
 	TUN_LOCK(tp);
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	if ((tp->tun_flags & TUN_L2) == 0) {
 		ifp->if_flags |= IFF_UP;
 		getmicrotime(&ifp->if_lastchange);
 		TUN_UNLOCK(tp);
 	} else {
 #if defined(INET) || defined(INET6)
 		if (tcp_lro_init(&tp->tun_lro) == 0) {
 			TUNDEBUG(ifp, "LRO enabled\n");
 			tp->tun_lro.ifp = ifp;
 			tp->tun_lro_ready = true;
 		} else {
 			TUNDEBUG(ifp, "Could not enable LRO\n");
 			tp->tun_lro_ready = false;
 		}
 #endif
 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 		TUN_UNLOCK(tp);
 		/* attempt to start output */
 		tunstart_l2(ifp);
 	}
 
 }
 
 /*
  * Used only for l2 tunnel.
  */
 static void
 tunifinit(void *xtp)
 {
 	struct tuntap_softc *tp;
 
 	tp = (struct tuntap_softc *)xtp;
 	tuninit(tp->tun_ifp);
 }
 
 /*
  * To be called under TUN_LOCK. Update ifp->if_hwassist according to the
  * current value of ifp->if_capenable.
  */
 static void
 tun_caps_changed(struct ifnet *ifp)
 {
 	uint64_t hwassist = 0;
 
 	TUN_LOCK_ASSERT((struct tuntap_softc *)ifp->if_softc);
 	if (ifp->if_capenable & IFCAP_TXCSUM)
 		hwassist |= CSUM_TCP | CSUM_UDP;
 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
 		hwassist |= CSUM_TCP_IPV6
 		    | CSUM_UDP_IPV6;
 	if (ifp->if_capenable & IFCAP_TSO4)
 		hwassist |= CSUM_IP_TSO;
 	if (ifp->if_capenable & IFCAP_TSO6)
 		hwassist |= CSUM_IP6_TSO;
 	ifp->if_hwassist = hwassist;
 }
 
 /*
  * To be called under TUN_LOCK. Update tp->tun_vhdrlen and adjust
  * if_capabilities and if_capenable as needed.
  */
 static void
 tun_vnethdr_set(struct ifnet *ifp, int vhdrlen)
 {
 	struct tuntap_softc *tp = ifp->if_softc;
 
 	TUN_LOCK_ASSERT(tp);
 
 	if (tp->tun_vhdrlen == vhdrlen)
 		return;
 
 	/*
 	 * Update if_capabilities to reflect the
 	 * functionalities offered by the virtio-net
 	 * header.
 	 */
 	if (vhdrlen != 0)
 		ifp->if_capabilities |=
 			TAP_VNET_HDR_CAPS;
 	else
 		ifp->if_capabilities &=
 			~TAP_VNET_HDR_CAPS;
 	/*
 	 * Disable any capabilities that we don't
 	 * support anymore.
 	 */
 	ifp->if_capenable &= ifp->if_capabilities;
 	tun_caps_changed(ifp);
 	tp->tun_vhdrlen = vhdrlen;
 
 	TUNDEBUG(ifp, "vnet_hdr_len=%d, if_capabilities=%x\n",
 	    vhdrlen, ifp->if_capabilities);
 }
 
 /*
  * Process an ioctl request.
  */
 static int
 tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct tuntap_softc *tp;
 	struct ifstat *ifs;
 	struct ifmediareq	*ifmr;
 	int		dummy, error = 0;
 	bool		l2tun;
 
 	ifmr = NULL;
 	sx_xlock(&tun_ioctl_sx);
 	tp = ifp->if_softc;
 	if (tp == NULL) {
 		error = ENXIO;
 		goto bad;
 	}
 	l2tun = (tp->tun_flags & TUN_L2) != 0;
 	switch(cmd) {
 	case SIOCGIFSTATUS:
 		ifs = (struct ifstat *)data;
 		TUN_LOCK(tp);
 		if (tp->tun_pid)
 			snprintf(ifs->ascii, sizeof(ifs->ascii),
 			    "\tOpened by PID %d\n", tp->tun_pid);
 		else
 			ifs->ascii[0] = '\0';
 		TUN_UNLOCK(tp);
 		break;
 	case SIOCSIFADDR:
 		if (l2tun)
 			error = ether_ioctl(ifp, cmd, data);
 		else
 			tuninit(ifp);
 		if (error == 0)
 		    TUNDEBUG(ifp, "address set\n");
 		break;
 	case SIOCSIFMTU:
 		ifp->if_mtu = ifr->ifr_mtu;
 		TUNDEBUG(ifp, "mtu set\n");
 		break;
 	case SIOCSIFFLAGS:
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		break;
 	case SIOCGIFMEDIA:
 		if (!l2tun) {
 			error = EINVAL;
 			break;
 		}
 
 		ifmr = (struct ifmediareq *)data;
 		dummy = ifmr->ifm_count;
 		ifmr->ifm_count = 1;
 		ifmr->ifm_status = IFM_AVALID;
 		ifmr->ifm_active = IFM_ETHER | IFM_FDX | IFM_1000_T;
 		if (tp->tun_flags & TUN_OPEN)
 			ifmr->ifm_status |= IFM_ACTIVE;
 		ifmr->ifm_current = ifmr->ifm_active;
 		if (dummy >= 1) {
 			int media = IFM_ETHER;
 			error = copyout(&media, ifmr->ifm_ulist, sizeof(int));
 		}
 		break;
 	case SIOCSIFCAP:
 		TUN_LOCK(tp);
 		ifp->if_capenable = ifr->ifr_reqcap;
 		tun_caps_changed(ifp);
 		TUN_UNLOCK(tp);
 		VLAN_CAPABILITIES(ifp);
 		break;
 	default:
 		if (l2tun) {
 			error = ether_ioctl(ifp, cmd, data);
 		} else {
 			error = EINVAL;
 		}
 	}
 bad:
 	sx_xunlock(&tun_ioctl_sx);
 	return (error);
 }
 
 /*
  * tunoutput - queue packets from higher level ready to put out.
  */
 static int
 tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
     struct route *ro)
 {
 	struct tuntap_softc *tp = ifp->if_softc;
 	u_short cached_tun_flags;
 	int error;
 	u_int32_t af;
 
 	TUNDEBUG (ifp, "tunoutput\n");
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m0);
 	if (error) {
 		m_freem(m0);
 		return (error);
 	}
 #endif
 
 	/* Could be unlocked read? */
 	TUN_LOCK(tp);
 	cached_tun_flags = tp->tun_flags;
 	TUN_UNLOCK(tp);
 	if ((cached_tun_flags & TUN_READY) != TUN_READY) {
 		TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
 		m_freem (m0);
 		return (EHOSTDOWN);
 	}
 
 	if ((ifp->if_flags & IFF_UP) != IFF_UP) {
 		m_freem (m0);
 		return (EHOSTDOWN);
 	}
 
 	/* BPF writes need to be handled specially. */
-	if (dst->sa_family == AF_UNSPEC)
+	if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
 		af = RO_GET_FAMILY(ro, dst);
 
 	BPF_MTAP2(ifp, &af, sizeof(af), m0);
 
 	/* prepend sockaddr? this may abort if the mbuf allocation fails */
 	if (cached_tun_flags & TUN_LMODE) {
 		/* allocate space for sockaddr */
 		M_PREPEND(m0, dst->sa_len, M_NOWAIT);
 
 		/* if allocation failed drop packet */
 		if (m0 == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			return (ENOBUFS);
 		} else {
 			bcopy(dst, m0->m_data, dst->sa_len);
 		}
 	}
 
 	if (cached_tun_flags & TUN_IFHEAD) {
 		/* Prepend the address family */
 		M_PREPEND(m0, 4, M_NOWAIT);
 
 		/* if allocation failed drop packet */
 		if (m0 == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			return (ENOBUFS);
 		} else
 			*(u_int32_t *)m0->m_data = htonl(af);
 	} else {
 #ifdef INET
 		if (af != AF_INET)
 #endif
 		{
 			m_freem(m0);
 			return (EAFNOSUPPORT);
 		}
 	}
 
 	error = (ifp->if_transmit)(ifp, m0);
 	if (error)
 		return (ENOBUFS);
 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 	return (0);
 }
 
 /*
  * the cdevsw interface is now pretty minimal.
  */
 static	int
 tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
     struct thread *td)
 {
 	struct ifreq ifr, *ifrp;
 	struct tuntap_softc *tp = dev->si_drv1;
 	struct ifnet *ifp = TUN2IFP(tp);
 	struct tuninfo *tunp;
 	int error, iflags, ival;
 	bool	l2tun;
 
 	l2tun = (tp->tun_flags & TUN_L2) != 0;
 	if (l2tun) {
 		/* tap specific ioctls */
 		switch(cmd) {
 		/* VMware/VMnet port ioctl's */
 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD4)
 		case _IO('V', 0):
 			ival = IOCPARM_IVAL(data);
 			data = (caddr_t)&ival;
 			/* FALLTHROUGH */
 #endif
 		case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
 			iflags = *(int *)data;
 			iflags &= TUN_VMIO_FLAG_MASK;
 			iflags &= ~IFF_CANTCHANGE;
 			iflags |= IFF_UP;
 
 			TUN_LOCK(tp);
 			ifp->if_flags = iflags |
 			    (ifp->if_flags & IFF_CANTCHANGE);
 			TUN_UNLOCK(tp);
 
 			return (0);
 		case SIOCGIFADDR:	/* get MAC address of the remote side */
 			TUN_LOCK(tp);
 			bcopy(&tp->tun_ether.octet, data,
 			    sizeof(tp->tun_ether.octet));
 			TUN_UNLOCK(tp);
 
 			return (0);
 		case SIOCSIFADDR:	/* set MAC address of the remote side */
 			TUN_LOCK(tp);
 			bcopy(data, &tp->tun_ether.octet,
 			    sizeof(tp->tun_ether.octet));
 			TUN_UNLOCK(tp);
 
 			return (0);
 		case TAPSVNETHDR:
 			ival = *(int *)data;
 			if (ival != 0 &&
 			    ival != sizeof(struct virtio_net_hdr) &&
 			    ival != sizeof(struct virtio_net_hdr_mrg_rxbuf)) {
 				return (EINVAL);
 			}
 			TUN_LOCK(tp);
 			tun_vnethdr_set(ifp, ival);
 			TUN_UNLOCK(tp);
 
 			return (0);
 		case TAPGVNETHDR:
 			TUN_LOCK(tp);
 			*(int *)data = tp->tun_vhdrlen;
 			TUN_UNLOCK(tp);
 
 			return (0);
 		}
 
 		/* Fall through to the common ioctls if unhandled */
 	} else {
 		switch (cmd) {
 		case TUNSLMODE:
 			TUN_LOCK(tp);
 			if (*(int *)data) {
 				tp->tun_flags |= TUN_LMODE;
 				tp->tun_flags &= ~TUN_IFHEAD;
 			} else
 				tp->tun_flags &= ~TUN_LMODE;
 			TUN_UNLOCK(tp);
 
 			return (0);
 		case TUNSIFHEAD:
 			TUN_LOCK(tp);
 			if (*(int *)data) {
 				tp->tun_flags |= TUN_IFHEAD;
 				tp->tun_flags &= ~TUN_LMODE;
 			} else
 				tp->tun_flags &= ~TUN_IFHEAD;
 			TUN_UNLOCK(tp);
 
 			return (0);
 		case TUNGIFHEAD:
 			TUN_LOCK(tp);
 			*(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
 			TUN_UNLOCK(tp);
 
 			return (0);
 		case TUNSIFMODE:
 			/* deny this if UP */
 			if (TUN2IFP(tp)->if_flags & IFF_UP)
 				return (EBUSY);
 
 			switch (*(int *)data & ~IFF_MULTICAST) {
 			case IFF_POINTOPOINT:
 			case IFF_BROADCAST:
 				TUN_LOCK(tp);
 				TUN2IFP(tp)->if_flags &=
 				    ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
 				TUN2IFP(tp)->if_flags |= *(int *)data;
 				TUN_UNLOCK(tp);
 
 				break;
 			default:
 				return (EINVAL);
 			}
 
 			return (0);
 		case TUNSIFPID:
 			TUN_LOCK(tp);
 			tp->tun_pid = curthread->td_proc->p_pid;
 			TUN_UNLOCK(tp);
 
 			return (0);
 		}
 		/* Fall through to the common ioctls if unhandled */
 	}
 
 	switch (cmd) {
 	case TUNGIFNAME:
 		ifrp = (struct ifreq *)data;
 		strlcpy(ifrp->ifr_name, TUN2IFP(tp)->if_xname, IFNAMSIZ);
 
 		return (0);
 	case TUNSIFINFO:
 		tunp = (struct tuninfo *)data;
 		if (TUN2IFP(tp)->if_type != tunp->type)
 			return (EPROTOTYPE);
 		TUN_LOCK(tp);
 		if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
 			strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ);
 			ifr.ifr_mtu = tunp->mtu;
 			CURVNET_SET(TUN2IFP(tp)->if_vnet);
 			error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp),
 			    (caddr_t)&ifr, td);
 			CURVNET_RESTORE();
 			if (error) {
 				TUN_UNLOCK(tp);
 				return (error);
 			}
 		}
 		TUN2IFP(tp)->if_baudrate = tunp->baudrate;
 		TUN_UNLOCK(tp);
 		break;
 	case TUNGIFINFO:
 		tunp = (struct tuninfo *)data;
 		TUN_LOCK(tp);
 		tunp->mtu = TUN2IFP(tp)->if_mtu;
 		tunp->type = TUN2IFP(tp)->if_type;
 		tunp->baudrate = TUN2IFP(tp)->if_baudrate;
 		TUN_UNLOCK(tp);
 		break;
 	case TUNSDEBUG:
 		tundebug = *(int *)data;
 		break;
 	case TUNGDEBUG:
 		*(int *)data = tundebug;
 		break;
 	case FIONBIO:
 		break;
 	case FIOASYNC:
 		TUN_LOCK(tp);
 		if (*(int *)data)
 			tp->tun_flags |= TUN_ASYNC;
 		else
 			tp->tun_flags &= ~TUN_ASYNC;
 		TUN_UNLOCK(tp);
 		break;
 	case FIONREAD:
 		if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
 			struct mbuf *mb;
 			IFQ_LOCK(&TUN2IFP(tp)->if_snd);
 			IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
 			for (*(int *)data = 0; mb != NULL; mb = mb->m_next)
 				*(int *)data += mb->m_len;
 			IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
 		} else
 			*(int *)data = 0;
 		break;
 	case FIOSETOWN:
 		return (fsetown(*(int *)data, &tp->tun_sigio));
 
 	case FIOGETOWN:
 		*(int *)data = fgetown(&tp->tun_sigio);
 		return (0);
 
 	/* This is deprecated, FIOSETOWN should be used instead. */
 	case TIOCSPGRP:
 		return (fsetown(-(*(int *)data), &tp->tun_sigio));
 
 	/* This is deprecated, FIOGETOWN should be used instead. */
 	case TIOCGPGRP:
 		*(int *)data = -fgetown(&tp->tun_sigio);
 		return (0);
 
 	default:
 		return (ENOTTY);
 	}
 	return (0);
 }
 
 /*
  * The cdevsw read interface - reads a packet at a time, or at
  * least as much of a packet as can be read.
  */
 static	int
 tunread(struct cdev *dev, struct uio *uio, int flag)
 {
 	struct tuntap_softc *tp = dev->si_drv1;
 	struct ifnet	*ifp = TUN2IFP(tp);
 	struct mbuf	*m;
 	size_t		len;
 	int		error = 0;
 
 	TUNDEBUG (ifp, "read\n");
 	TUN_LOCK(tp);
 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
 		TUN_UNLOCK(tp);
 		TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
 		return (EHOSTDOWN);
 	}
 
 	tp->tun_flags &= ~TUN_RWAIT;
 
 	for (;;) {
 		IFQ_DEQUEUE(&ifp->if_snd, m);
 		if (m != NULL)
 			break;
 		if (flag & O_NONBLOCK) {
 			TUN_UNLOCK(tp);
 			return (EWOULDBLOCK);
 		}
 		tp->tun_flags |= TUN_RWAIT;
 		error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1),
 		    "tunread", 0);
 		if (error != 0) {
 			TUN_UNLOCK(tp);
 			return (error);
 		}
 	}
 	TUN_UNLOCK(tp);
 
 	len = min(tp->tun_vhdrlen, uio->uio_resid);
 	if (len > 0) {
 		struct virtio_net_hdr_mrg_rxbuf vhdr;
 
 		bzero(&vhdr, sizeof(vhdr));
 		if (m->m_pkthdr.csum_flags & TAP_ALL_OFFLOAD) {
 			m = virtio_net_tx_offload(ifp, m, false, &vhdr.hdr);
 		}
 
 		TUNDEBUG(ifp, "txvhdr: f %u, gt %u, hl %u, "
 		    "gs %u, cs %u, co %u\n", vhdr.hdr.flags,
 		    vhdr.hdr.gso_type, vhdr.hdr.hdr_len,
 		    vhdr.hdr.gso_size, vhdr.hdr.csum_start,
 		    vhdr.hdr.csum_offset);
 		error = uiomove(&vhdr, len, uio);
 	}
 
 	while (m && uio->uio_resid > 0 && error == 0) {
 		len = min(uio->uio_resid, m->m_len);
 		if (len != 0)
 			error = uiomove(mtod(m, void *), len, uio);
 		m = m_free(m);
 	}
 
 	if (m) {
 		TUNDEBUG(ifp, "Dropping mbuf\n");
 		m_freem(m);
 	}
 	return (error);
 }
 
 static int
 tunwrite_l2(struct tuntap_softc *tp, struct mbuf *m,
 	    struct virtio_net_hdr_mrg_rxbuf *vhdr)
 {
 	struct epoch_tracker et;
 	struct ether_header *eh;
 	struct ifnet *ifp;
 
 	ifp = TUN2IFP(tp);
 
 	/*
 	 * Only pass a unicast frame to ether_input(), if it would
 	 * actually have been received by non-virtual hardware.
 	 */
 	if (m->m_len < sizeof(struct ether_header)) {
 		m_freem(m);
 		return (0);
 	}
 
 	eh = mtod(m, struct ether_header *);
 
 	if ((ifp->if_flags & IFF_PROMISC) == 0 &&
 	    !ETHER_IS_MULTICAST(eh->ether_dhost) &&
 	    bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) {
 		m_freem(m);
 		return (0);
 	}
 
 	if (vhdr != NULL) {
 		if (virtio_net_rx_csum(m, &vhdr->hdr)) {
 			m_freem(m);
 			return (0);
 		}
 	} else {
 		switch (ntohs(eh->ether_type)) {
 #ifdef INET
 		case ETHERTYPE_IP:
 			if (ifp->if_capenable & IFCAP_RXCSUM) {
 				m->m_pkthdr.csum_flags |=
 				    CSUM_IP_CHECKED | CSUM_IP_VALID |
 				    CSUM_DATA_VALID | CSUM_SCTP_VALID |
 				    CSUM_PSEUDO_HDR;
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 			break;
 #endif
 #ifdef INET6
 		case ETHERTYPE_IPV6:
 			if (ifp->if_capenable & IFCAP_RXCSUM_IPV6) {
 				m->m_pkthdr.csum_flags |=
 				    CSUM_DATA_VALID_IPV6 | CSUM_SCTP_VALID |
 				    CSUM_PSEUDO_HDR;
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 			break;
 #endif
 		}
 	}
 
 	/* Pass packet up to parent. */
 	CURVNET_SET(ifp->if_vnet);
 	NET_EPOCH_ENTER(et);
 #if defined(INET) || defined(INET6)
 	if (tp->tun_lro_ready && ifp->if_capenable & IFCAP_LRO &&
 	    tcp_lro_rx(&tp->tun_lro, m, 0) == 0)
 		tcp_lro_flush_all(&tp->tun_lro);
 	else
 #endif
 		(*ifp->if_input)(ifp, m);
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 	/* ibytes are counted in parent */
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	return (0);
 }
 
 static int
 tunwrite_l3(struct tuntap_softc *tp, struct mbuf *m)
 {
 	struct epoch_tracker et;
 	struct ifnet *ifp;
 	int family, isr;
 
 	ifp = TUN2IFP(tp);
 	/* Could be unlocked read? */
 	TUN_LOCK(tp);
 	if (tp->tun_flags & TUN_IFHEAD) {
 		TUN_UNLOCK(tp);
 		if (m->m_len < sizeof(family) &&
 		(m = m_pullup(m, sizeof(family))) == NULL)
 			return (ENOBUFS);
 		family = ntohl(*mtod(m, u_int32_t *));
 		m_adj(m, sizeof(family));
 	} else {
 		TUN_UNLOCK(tp);
 		family = AF_INET;
 	}
 
 	BPF_MTAP2(ifp, &family, sizeof(family), m);
 
 	switch (family) {
 #ifdef INET
 	case AF_INET:
 		isr = NETISR_IP;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 	random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	CURVNET_SET(ifp->if_vnet);
 	M_SETFIB(m, ifp->if_fib);
 	NET_EPOCH_ENTER(et);
 	netisr_dispatch(isr, m);
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 	return (0);
 }
 
 /*
  * the cdevsw write interface - an atomic write is a packet - or else!
  */
 static	int
 tunwrite(struct cdev *dev, struct uio *uio, int flag)
 {
 	struct virtio_net_hdr_mrg_rxbuf vhdr;
 	struct tuntap_softc *tp;
 	struct ifnet	*ifp;
 	struct mbuf	*m;
 	uint32_t	mru;
 	int		align, vhdrlen, error;
 	bool		l2tun;
 
 	tp = dev->si_drv1;
 	ifp = TUN2IFP(tp);
 	TUNDEBUG(ifp, "tunwrite\n");
 	if ((ifp->if_flags & IFF_UP) != IFF_UP)
 		/* ignore silently */
 		return (0);
 
 	if (uio->uio_resid == 0)
 		return (0);
 
 	l2tun = (tp->tun_flags & TUN_L2) != 0;
 	mru = l2tun ? TAPMRU : TUNMRU;
 	vhdrlen = tp->tun_vhdrlen;
 	align = 0;
 	if (l2tun) {
 		align = ETHER_ALIGN;
 		mru += vhdrlen;
 	} else if ((tp->tun_flags & TUN_IFHEAD) != 0)
 		mru += sizeof(uint32_t);	/* family */
 	if (uio->uio_resid < 0 || uio->uio_resid > mru) {
 		TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid);
 		return (EIO);
 	}
 
 	if (vhdrlen > 0) {
 		error = uiomove(&vhdr, vhdrlen, uio);
 		if (error != 0)
 			return (error);
 		TUNDEBUG(ifp, "txvhdr: f %u, gt %u, hl %u, "
 		    "gs %u, cs %u, co %u\n", vhdr.hdr.flags,
 		    vhdr.hdr.gso_type, vhdr.hdr.hdr_len,
 		    vhdr.hdr.gso_size, vhdr.hdr.csum_start,
 		    vhdr.hdr.csum_offset);
 	}
 
 	if ((m = m_uiotombuf(uio, M_NOWAIT, 0, align, M_PKTHDR)) == NULL) {
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		return (ENOBUFS);
 	}
 
 	m->m_pkthdr.rcvif = ifp;
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	if (l2tun)
 		return (tunwrite_l2(tp, m, vhdrlen > 0 ? &vhdr : NULL));
 
 	return (tunwrite_l3(tp, m));
 }
 
 /*
  * tunpoll - the poll interface, this is only useful on reads
  * really. The write detect always returns true, write never blocks
  * anyway, it either accepts the packet or drops it.
  */
 static	int
 tunpoll(struct cdev *dev, int events, struct thread *td)
 {
 	struct tuntap_softc *tp = dev->si_drv1;
 	struct ifnet	*ifp = TUN2IFP(tp);
 	int		revents = 0;
 
 	TUNDEBUG(ifp, "tunpoll\n");
 
 	if (events & (POLLIN | POLLRDNORM)) {
 		IFQ_LOCK(&ifp->if_snd);
 		if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
 			TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len);
 			revents |= events & (POLLIN | POLLRDNORM);
 		} else {
 			TUNDEBUG(ifp, "tunpoll waiting\n");
 			selrecord(td, &tp->tun_rsel);
 		}
 		IFQ_UNLOCK(&ifp->if_snd);
 	}
 	revents |= events & (POLLOUT | POLLWRNORM);
 
 	return (revents);
 }
 
 /*
  * tunkqfilter - support for the kevent() system call.
  */
 static int
 tunkqfilter(struct cdev *dev, struct knote *kn)
 {
 	struct tuntap_softc	*tp = dev->si_drv1;
 	struct ifnet	*ifp = TUN2IFP(tp);
 
 	switch(kn->kn_filter) {
 	case EVFILT_READ:
 		TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n",
 		    ifp->if_xname, dev2unit(dev));
 		kn->kn_fop = &tun_read_filterops;
 		break;
 
 	case EVFILT_WRITE:
 		TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n",
 		    ifp->if_xname, dev2unit(dev));
 		kn->kn_fop = &tun_write_filterops;
 		break;
 
 	default:
 		TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n",
 		    ifp->if_xname, dev2unit(dev));
 		return(EINVAL);
 	}
 
 	kn->kn_hook = tp;
 	knlist_add(&tp->tun_rsel.si_note, kn, 0);
 
 	return (0);
 }
 
 /*
  * Return true of there is data in the interface queue.
  */
 static int
 tunkqread(struct knote *kn, long hint)
 {
 	int			ret;
 	struct tuntap_softc	*tp = kn->kn_hook;
 	struct cdev		*dev = tp->tun_dev;
 	struct ifnet	*ifp = TUN2IFP(tp);
 
 	if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
 		TUNDEBUG(ifp,
 		    "%s have data in the queue.  Len = %d, minor = %#x\n",
 		    ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
 		ret = 1;
 	} else {
 		TUNDEBUG(ifp,
 		    "%s waiting for data, minor = %#x\n", ifp->if_xname,
 		    dev2unit(dev));
 		ret = 0;
 	}
 
 	return (ret);
 }
 
 /*
  * Always can write, always return MTU in kn->data.
  */
 static int
 tunkqwrite(struct knote *kn, long hint)
 {
 	struct tuntap_softc	*tp = kn->kn_hook;
 	struct ifnet	*ifp = TUN2IFP(tp);
 
 	kn->kn_data = ifp->if_mtu;
 
 	return (1);
 }
 
 static void
 tunkqdetach(struct knote *kn)
 {
 	struct tuntap_softc	*tp = kn->kn_hook;
 
 	knlist_remove(&tp->tun_rsel.si_note, kn, 0);
 }
diff --git a/sys/netgraph/ng_iface.c b/sys/netgraph/ng_iface.c
index 8ae4707b7abd..e9f97ff0fdec 100644
--- a/sys/netgraph/ng_iface.c
+++ b/sys/netgraph/ng_iface.c
@@ -1,817 +1,817 @@
 /*
  * ng_iface.c
  */
 
 /*-
  * Copyright (c) 1996-1999 Whistle Communications, Inc.
  * All rights reserved.
  * 
  * Subject to the following obligations and disclaimer of warranty, use and
  * redistribution of this software, in source or object code forms, with or
  * without modifications are expressly permitted by Whistle Communications;
  * provided, however, that:
  * 1. Any and all reproductions of the source or object code must include the
  *    copyright notice above and the following disclaimer of warranties; and
  * 2. No rights are granted, in any manner or form, to use Whistle
  *    Communications, Inc. trademarks, including the mark "WHISTLE
  *    COMMUNICATIONS" on advertising, endorsements, or otherwise except as
  *    such appears in the above copyright notice or in the software.
  * 
  * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND
  * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO
  * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE,
  * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
  * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY
  * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS
  * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE.
  * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES
  * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING
  * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
  * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY
  * OF SUCH DAMAGE.
  *
  * Author: Archie Cobbs <archie@freebsd.org>
  * $Whistle: ng_iface.c,v 1.33 1999/11/01 09:24:51 julian Exp $
  */
 
 /*
  * This node is also a system networking interface. It has
  * a hook for each protocol (IP, AppleTalk, etc). Packets
  * are simply relayed between the interface and the hooks.
  *
  * Interfaces are named ng0, ng1, etc.  New nodes take the
  * first available interface name.
  *
  * This node also includes Berkeley packet filter support.
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
 #include <sys/proc.h>
 #include <sys/random.h>
 #include <sys/rmlock.h>
 #include <sys/sockio.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/libkern.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_private.h>
 #include <net/if_types.h>
 #include <net/bpf.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 #include <netgraph/ng_parse.h>
 #include <netgraph/ng_iface.h>
 
 #ifdef NG_SEPARATE_MALLOC
 static MALLOC_DEFINE(M_NETGRAPH_IFACE, "netgraph_iface", "netgraph iface node");
 #else
 #define M_NETGRAPH_IFACE M_NETGRAPH
 #endif
 
 static SYSCTL_NODE(_net_graph, OID_AUTO, iface, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Point to point netgraph interface");
 VNET_DEFINE_STATIC(int, ng_iface_max_nest) = 2;
 #define	V_ng_iface_max_nest	VNET(ng_iface_max_nest)
 SYSCTL_INT(_net_graph_iface, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ng_iface_max_nest), 0, "Max nested tunnels");
 
 /* This struct describes one address family */
 struct iffam {
 	sa_family_t	family;		/* Address family */
 	const char	*hookname;	/* Name for hook */
 };
 typedef const struct iffam *iffam_p;
 
 /* List of address families supported by our interface */
 const static struct iffam gFamilies[] = {
 	{ AF_INET,	NG_IFACE_HOOK_INET	},
 	{ AF_INET6,	NG_IFACE_HOOK_INET6	},
 };
 #define	NUM_FAMILIES		nitems(gFamilies)
 
 /* Node private data */
 struct ng_iface_private {
 	struct	ifnet *ifp;		/* Our interface */
 	int	unit;			/* Interface unit number */
 	node_p	node;			/* Our netgraph node */
 	hook_p	hooks[NUM_FAMILIES];	/* Hook for each address family */
 	struct rmlock	lock;		/* Protect private data changes */
 };
 typedef struct ng_iface_private *priv_p;
 
 #define	PRIV_RLOCK(priv, t)	rm_rlock(&priv->lock, t)
 #define	PRIV_RUNLOCK(priv, t)	rm_runlock(&priv->lock, t)
 #define	PRIV_WLOCK(priv)	rm_wlock(&priv->lock)
 #define	PRIV_WUNLOCK(priv)	rm_wunlock(&priv->lock)
 
 /* Interface methods */
 static void	ng_iface_start(struct ifnet *ifp);
 static int	ng_iface_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
 static int	ng_iface_output(struct ifnet *ifp, struct mbuf *m0,
     			const struct sockaddr *dst, struct route *ro);
 static void	ng_iface_bpftap(struct ifnet *ifp,
 			struct mbuf *m, sa_family_t family);
 static int	ng_iface_send(struct ifnet *ifp, struct mbuf *m,
 			sa_family_t sa);
 #ifdef DEBUG
 static void	ng_iface_print_ioctl(struct ifnet *ifp, int cmd, caddr_t data);
 #endif
 
 /* Netgraph methods */
 static int		ng_iface_mod_event(module_t, int, void *);
 static ng_constructor_t	ng_iface_constructor;
 static ng_rcvmsg_t	ng_iface_rcvmsg;
 static ng_shutdown_t	ng_iface_shutdown;
 static ng_newhook_t	ng_iface_newhook;
 static ng_rcvdata_t	ng_iface_rcvdata;
 static ng_disconnect_t	ng_iface_disconnect;
 
 /* Helper stuff */
 static iffam_p	get_iffam_from_af(sa_family_t family);
 static iffam_p	get_iffam_from_hook(priv_p priv, hook_p hook);
 static iffam_p	get_iffam_from_name(const char *name);
 static hook_p  *get_hook_from_iffam(priv_p priv, iffam_p iffam);
 
 /* List of commands and how to convert arguments to/from ASCII */
 static const struct ng_cmdlist ng_iface_cmds[] = {
 	{
 	  NGM_IFACE_COOKIE,
 	  NGM_IFACE_GET_IFNAME,
 	  "getifname",
 	  NULL,
 	  &ng_parse_string_type
 	},
 	{
 	  NGM_IFACE_COOKIE,
 	  NGM_IFACE_POINT2POINT,
 	  "point2point",
 	  NULL,
 	  NULL
 	},
 	{
 	  NGM_IFACE_COOKIE,
 	  NGM_IFACE_BROADCAST,
 	  "broadcast",
 	  NULL,
 	  NULL
 	},
 	{
 	  NGM_IFACE_COOKIE,
 	  NGM_IFACE_GET_IFINDEX,
 	  "getifindex",
 	  NULL,
 	  &ng_parse_uint32_type
 	},
 	{ 0 }
 };
 
 /* Node type descriptor */
 static struct ng_type typestruct = {
 	.version =	NG_ABI_VERSION,
 	.name =		NG_IFACE_NODE_TYPE,
 	.mod_event =	ng_iface_mod_event,
 	.constructor =	ng_iface_constructor,
 	.rcvmsg =	ng_iface_rcvmsg,
 	.shutdown =	ng_iface_shutdown,
 	.newhook =	ng_iface_newhook,
 	.rcvdata =	ng_iface_rcvdata,
 	.disconnect =	ng_iface_disconnect,
 	.cmdlist =	ng_iface_cmds,
 };
 NETGRAPH_INIT(iface, &typestruct);
 
 VNET_DEFINE_STATIC(struct unrhdr *, ng_iface_unit);
 #define	V_ng_iface_unit			VNET(ng_iface_unit)
 
 /************************************************************************
 			HELPER STUFF
  ************************************************************************/
 
 /*
  * Get the family descriptor from the family ID
  */
 static __inline iffam_p
 get_iffam_from_af(sa_family_t family)
 {
 	iffam_p iffam;
 	int k;
 
 	for (k = 0; k < NUM_FAMILIES; k++) {
 		iffam = &gFamilies[k];
 		if (iffam->family == family)
 			return (iffam);
 	}
 	return (NULL);
 }
 
 /*
  * Get the family descriptor from the hook
  */
 static __inline iffam_p
 get_iffam_from_hook(priv_p priv, hook_p hook)
 {
 	int k;
 
 	for (k = 0; k < NUM_FAMILIES; k++)
 		if (priv->hooks[k] == hook)
 			return (&gFamilies[k]);
 	return (NULL);
 }
 
 /*
  * Get the hook from the iffam descriptor
  */
 
 static __inline hook_p *
 get_hook_from_iffam(priv_p priv, iffam_p iffam)
 {
 	return (&priv->hooks[iffam - gFamilies]);
 }
 
 /*
  * Get the iffam descriptor from the name
  */
 static __inline iffam_p
 get_iffam_from_name(const char *name)
 {
 	iffam_p iffam;
 	int k;
 
 	for (k = 0; k < NUM_FAMILIES; k++) {
 		iffam = &gFamilies[k];
 		if (!strcmp(iffam->hookname, name))
 			return (iffam);
 	}
 	return (NULL);
 }
 
 /************************************************************************
 			INTERFACE STUFF
  ************************************************************************/
 
 /*
  * Process an ioctl for the virtual interface
  */
 static int
 ng_iface_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct ifreq *const ifr = (struct ifreq *) data;
 	int error = 0;
 
 #ifdef DEBUG
 	ng_iface_print_ioctl(ifp, command, data);
 #endif
 	switch (command) {
 	/* These two are mostly handled at a higher layer */
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
 		ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE);
 		break;
 	case SIOCGIFADDR:
 		break;
 
 	/* Set flags */
 	case SIOCSIFFLAGS:
 		/*
 		 * If the interface is marked up and stopped, then start it.
 		 * If it is marked down and running, then stop it.
 		 */
 		if (ifr->ifr_flags & IFF_UP) {
 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 				ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE);
 				ifp->if_drv_flags |= IFF_DRV_RUNNING;
 			}
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				ifp->if_drv_flags &= ~(IFF_DRV_RUNNING |
 				    IFF_DRV_OACTIVE);
 		}
 		break;
 
 	/* Set the interface MTU */
 	case SIOCSIFMTU:
 		if (ifr->ifr_mtu > NG_IFACE_MTU_MAX
 		    || ifr->ifr_mtu < NG_IFACE_MTU_MIN)
 			error = EINVAL;
 		else
 			ifp->if_mtu = ifr->ifr_mtu;
 		break;
 
 	/* Stuff that's not supported */
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		error = 0;
 		break;
 	case SIOCSIFPHYS:
 		error = EOPNOTSUPP;
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 /*
  * This routine is called to deliver a packet out the interface.
  * We simply look at the address family and relay the packet to
  * the corresponding hook, if it exists and is connected.
  */
 
 static int
 ng_iface_output(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro)
 {
 	uint32_t af;
 	int error;
 
 	/* Check interface flags */
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
 		m_freem(m);
 		return (ENETDOWN);
 	}
 
 	/* Protect from deadly infinite recursion. */
 	error = if_tunnel_check_nesting(ifp, m, NGM_IFACE_COOKIE,
 	    V_ng_iface_max_nest);
 	if (error) {
 		m_freem(m);
 		return (error);
 	}
 
 	/* BPF writes need to be handled specially. */
-	if (dst->sa_family == AF_UNSPEC)
+	if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
 		af = RO_GET_FAMILY(ro, dst);
 
 	/* Berkeley packet filter */
 	ng_iface_bpftap(ifp, m, af);
 
 	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
 		M_PREPEND(m, sizeof(sa_family_t), M_NOWAIT);
 		if (m == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
 			return (ENOBUFS);
 		}
 		*(sa_family_t *)m->m_data = af;
 		error = (ifp->if_transmit)(ifp, m);
 	} else
 		error = ng_iface_send(ifp, m, af);
 
 	return (error);
 }
 
 /*
  * Start method is used only when ALTQ is enabled.
  */
 static void
 ng_iface_start(struct ifnet *ifp)
 {
 	struct mbuf *m;
 	sa_family_t sa;
 
 	KASSERT(ALTQ_IS_ENABLED(&ifp->if_snd), ("%s without ALTQ", __func__));
 
 	for(;;) {
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
 		if (m == NULL)
 			break;
 		sa = *mtod(m, sa_family_t *);
 		m_adj(m, sizeof(sa_family_t));
 		ng_iface_send(ifp, m, sa);
 	}
 }
 
 /*
  * Flash a packet by the BPF (requires prepending 4 byte AF header)
  * Note the phoney mbuf; this is OK because BPF treats it read-only.
  */
 static void
 ng_iface_bpftap(struct ifnet *ifp, struct mbuf *m, sa_family_t family)
 {
 	KASSERT(family != AF_UNSPEC, ("%s: family=AF_UNSPEC", __func__));
 	if (bpf_peers_present(ifp->if_bpf)) {
 		int32_t family4 = (int32_t)family;
 		bpf_mtap2(ifp->if_bpf, &family4, sizeof(family4), m);
 	}
 }
 
 /*
  * This routine does actual delivery of the packet into the
  * netgraph(4). It is called from ng_iface_start() and
  * ng_iface_output().
  */
 static int
 ng_iface_send(struct ifnet *ifp, struct mbuf *m, sa_family_t sa)
 {
 	struct rm_priotracker priv_tracker;
 	const priv_p priv = (priv_p) ifp->if_softc;
 	const iffam_p iffam = get_iffam_from_af(sa);
 	hook_p hook;
 	int error;
 	int len;
 
 	/* Check address family to determine hook (if known) */
 	if (iffam == NULL) {
 		m_freem(m);
 		log(LOG_WARNING, "%s: can't handle af%d\n", ifp->if_xname, sa);
 		return (EAFNOSUPPORT);
 	}
 
 	/* Copy length before the mbuf gets invalidated. */
 	len = m->m_pkthdr.len;
 
 	PRIV_RLOCK(priv, &priv_tracker);
 	hook = *get_hook_from_iffam(priv, iffam);
 	if (hook == NULL) {
 		NG_FREE_M(m);
 		PRIV_RUNLOCK(priv, &priv_tracker);
 		return ENETDOWN;
 	}
 	NG_HOOK_REF(hook);
 	PRIV_RUNLOCK(priv, &priv_tracker);
 
 	NG_OUTBOUND_THREAD_REF();
 	NG_SEND_DATA_ONLY(error, hook, m);
 	NG_OUTBOUND_THREAD_UNREF();
 	NG_HOOK_UNREF(hook);
 
 	/* Update stats. */
 	if (error == 0) {
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 	}
 
 	return (error);
 }
 
 #ifdef DEBUG
 /*
  * Display an ioctl to the virtual interface
  */
 
 static void
 ng_iface_print_ioctl(struct ifnet *ifp, int command, caddr_t data)
 {
 	char   *str;
 
 	switch (command & IOC_DIRMASK) {
 	case IOC_VOID:
 		str = "IO";
 		break;
 	case IOC_OUT:
 		str = "IOR";
 		break;
 	case IOC_IN:
 		str = "IOW";
 		break;
 	case IOC_INOUT:
 		str = "IORW";
 		break;
 	default:
 		str = "IO??";
 	}
 	log(LOG_DEBUG, "%s: %s('%c', %d, char[%d])\n",
 	       ifp->if_xname,
 	       str,
 	       IOCGROUP(command),
 	       command & 0xff,
 	       IOCPARM_LEN(command));
 }
 #endif /* DEBUG */
 
 /************************************************************************
 			NETGRAPH NODE STUFF
  ************************************************************************/
 
 /*
  * Constructor for a node
  */
 static int
 ng_iface_constructor(node_p node)
 {
 	struct ifnet *ifp;
 	priv_p priv;
 
 	/* Allocate node and interface private structures */
 	priv = malloc(sizeof(*priv), M_NETGRAPH_IFACE, M_WAITOK | M_ZERO);
 	ifp = if_alloc(IFT_PROPVIRTUAL);
 	if (ifp == NULL) {
 		free(priv, M_NETGRAPH_IFACE);
 		return (ENOMEM);
 	}
 
 	rm_init(&priv->lock, "ng_iface private rmlock");
 
 	/* Link them together */
 	ifp->if_softc = priv;
 	priv->ifp = ifp;
 
 	/* Get an interface unit number */
 	priv->unit = alloc_unr(V_ng_iface_unit);
 
 	/* Link together node and private info */
 	NG_NODE_SET_PRIVATE(node, priv);
 	priv->node = node;
 
 	/* Initialize interface structure */
 	if_initname(ifp, NG_IFACE_IFACE_NAME, priv->unit);
 	ifp->if_output = ng_iface_output;
 	ifp->if_start = ng_iface_start;
 	ifp->if_ioctl = ng_iface_ioctl;
 	ifp->if_mtu = NG_IFACE_MTU_DEFAULT;
 	ifp->if_flags = (IFF_SIMPLEX|IFF_POINTOPOINT|IFF_NOARP|IFF_MULTICAST);
 	ifp->if_type = IFT_PROPVIRTUAL;		/* XXX */
 	ifp->if_addrlen = 0;			/* XXX */
 	ifp->if_hdrlen = 0;			/* XXX */
 	ifp->if_baudrate = 64000;		/* XXX */
 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
 	ifp->if_snd.ifq_drv_maxlen = ifqmaxlen;
 	IFQ_SET_READY(&ifp->if_snd);
 
 	/* Give this node the same name as the interface (if possible) */
 	if (ng_name_node(node, ifp->if_xname) != 0)
 		log(LOG_WARNING, "%s: can't acquire netgraph name\n",
 		    ifp->if_xname);
 
 	/* Attach the interface */
 	if_attach(ifp);
 	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
 
 	/* Done */
 	return (0);
 }
 
 /*
  * Give our ok for a hook to be added
  */
 static int
 ng_iface_newhook(node_p node, hook_p hook, const char *name)
 {
 	const iffam_p iffam = get_iffam_from_name(name);
 	const priv_p priv = NG_NODE_PRIVATE(node);
 	hook_p *hookptr;
 
 	if (iffam == NULL)
 		return (EPFNOSUPPORT);
 	PRIV_WLOCK(priv);
 	hookptr = get_hook_from_iffam(priv, iffam);
 	if (*hookptr != NULL) {
 		PRIV_WUNLOCK(priv);
 		return (EISCONN);
 	}
 	*hookptr = hook;
 	NG_HOOK_HI_STACK(hook);
 	NG_HOOK_SET_TO_INBOUND(hook);
 	PRIV_WUNLOCK(priv);
 	return (0);
 }
 
 /*
  * Receive a control message
  */
 static int
 ng_iface_rcvmsg(node_p node, item_p item, hook_p lasthook)
 {
 	const priv_p priv = NG_NODE_PRIVATE(node);
 	struct ifnet *const ifp = priv->ifp;
 	struct ng_mesg *resp = NULL;
 	int error = 0;
 	struct ng_mesg *msg;
 
 	NGI_GET_MSG(item, msg);
 	switch (msg->header.typecookie) {
 	case NGM_IFACE_COOKIE:
 		switch (msg->header.cmd) {
 		case NGM_IFACE_GET_IFNAME:
 			NG_MKRESPONSE(resp, msg, IFNAMSIZ, M_NOWAIT);
 			if (resp == NULL) {
 				error = ENOMEM;
 				break;
 			}
 			strlcpy(resp->data, ifp->if_xname, IFNAMSIZ);
 			break;
 
 		case NGM_IFACE_POINT2POINT:
 		case NGM_IFACE_BROADCAST:
 		    {
 			/* Deny request if interface is UP */
 			if ((ifp->if_flags & IFF_UP) != 0)
 				return (EBUSY);
 
 			/* Change flags */
 			switch (msg->header.cmd) {
 			case NGM_IFACE_POINT2POINT:
 				ifp->if_flags |= IFF_POINTOPOINT;
 				ifp->if_flags &= ~IFF_BROADCAST;
 				break;
 			case NGM_IFACE_BROADCAST:
 				ifp->if_flags &= ~IFF_POINTOPOINT;
 				ifp->if_flags |= IFF_BROADCAST;
 				break;
 			}
 			break;
 		    }
 
 		case NGM_IFACE_GET_IFINDEX:
 			NG_MKRESPONSE(resp, msg, sizeof(uint32_t), M_NOWAIT);
 			if (resp == NULL) {
 				error = ENOMEM;
 				break;
 			}
 			*((uint32_t *)resp->data) = priv->ifp->if_index;
 			break;
 
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 	case NGM_FLOW_COOKIE:
 		switch (msg->header.cmd) {
 		case NGM_LINK_IS_UP:
 			if_link_state_change(ifp, LINK_STATE_UP);
 			break;
 		case NGM_LINK_IS_DOWN:
 			if_link_state_change(ifp, LINK_STATE_DOWN);
 			break;
 		default:
 			break;
 		}
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	NG_RESPOND_MSG(error, node, item, resp);
 	NG_FREE_MSG(msg);
 	return (error);
 }
 
 /*
  * Recive data from a hook. Pass the packet to the correct input routine.
  */
 static int
 ng_iface_rcvdata(hook_p hook, item_p item)
 {
 	const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook));
 	const iffam_p iffam = get_iffam_from_hook(priv, hook);
 	struct ifnet *const ifp = priv->ifp;
 	struct epoch_tracker et;
 	struct mbuf *m;
 	int isr;
 
 	NGI_GET_M(item, m);
 	NG_FREE_ITEM(item);
 	/* Sanity checks */
 	KASSERT(iffam != NULL, ("%s: iffam", __func__));
 	M_ASSERTPKTHDR(m);
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		NG_FREE_M(m);
 		return (ENETDOWN);
 	}
 
 	/* Update interface stats */
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 
 	/* Note receiving interface */
 	m->m_pkthdr.rcvif = ifp;
 
 	/* Berkeley packet filter */
 	ng_iface_bpftap(ifp, m, iffam->family);
 
 	/* Send packet */
 	switch (iffam->family) {
 #ifdef INET
 	case AF_INET:
 		isr = NETISR_IP;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 	random_harvest_queue(m, sizeof(*m), RANDOM_NET_NG);
 	M_SETFIB(m, ifp->if_fib);
 	CURVNET_SET(ifp->if_vnet);
 	NET_EPOCH_ENTER(et);
 	netisr_dispatch(isr, m);
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 	return (0);
 }
 
 /*
  * Shutdown and remove the node and its associated interface.
  */
 static int
 ng_iface_shutdown(node_p node)
 {
 	const priv_p priv = NG_NODE_PRIVATE(node);
 
 	/*
 	 * The ifnet may be in a different vnet than the netgraph node, 
 	 * hence we have to change the current vnet context here.
 	 */
 	CURVNET_SET_QUIET(priv->ifp->if_vnet);
 	bpfdetach(priv->ifp);
 	if_detach(priv->ifp);
 	if_free(priv->ifp);
 	CURVNET_RESTORE();
 	priv->ifp = NULL;
 	free_unr(V_ng_iface_unit, priv->unit);
 	rm_destroy(&priv->lock);
 	free(priv, M_NETGRAPH_IFACE);
 	NG_NODE_SET_PRIVATE(node, NULL);
 	NG_NODE_UNREF(node);
 	return (0);
 }
 
 /*
  * Hook disconnection. Note that we do *not* shutdown when all
  * hooks have been disconnected.
  */
 static int
 ng_iface_disconnect(hook_p hook)
 {
 	const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook));
 	const iffam_p iffam = get_iffam_from_hook(priv, hook);
 
 	if (iffam == NULL)
 		panic("%s", __func__);
 	PRIV_WLOCK(priv);
 	*get_hook_from_iffam(priv, iffam) = NULL;
 	PRIV_WUNLOCK(priv);
 	return (0);
 }
 
 /*
  * Handle loading and unloading for this node type.
  */
 static int
 ng_iface_mod_event(module_t mod, int event, void *data)
 {
 	int error = 0;
 
 	switch (event) {
 	case MOD_LOAD:
 	case MOD_UNLOAD:
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 
 static void
 vnet_ng_iface_init(const void *unused)
 {
 
 	V_ng_iface_unit = new_unrhdr(0, 0xffff, NULL);
 }
 VNET_SYSINIT(vnet_ng_iface_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
     vnet_ng_iface_init, NULL);
 
 static void
 vnet_ng_iface_uninit(const void *unused)
 {
 
 	delete_unrhdr(V_ng_iface_unit);
 }
 VNET_SYSUNINIT(vnet_ng_iface_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
     vnet_ng_iface_uninit, NULL);